diff --git a/.bazelrc b/.bazelrc index 2efdbad2e5f..bb5f1c03727 100644 --- a/.bazelrc +++ b/.bazelrc @@ -168,6 +168,8 @@ build:cuda_clang --action_env TF_CUDA_CLANG=1 build:dbg --config=opt -c dbg # for now, disable arm_neon. see: https://github.com/tensorflow/tensorflow/issues/33360 build:dbg --cxxopt -DTF_LITE_DISABLE_X86_NEON +# AWS SDK must be compiled in release mode. see: https://github.com/tensorflow/tensorflow/issues/37498 +build:dbg --copt -DDEBUG_BUILD build:tensorrt --action_env TF_NEED_TENSORRT=1 diff --git a/RELEASE.md b/RELEASE.md index b5d088821e4..6c8921cf492 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,147 @@ +# Release 2.2.0 + +TensorFlow 2.2 discontinues support for Python 2, [previously announced](https://groups.google.com/a/tensorflow.org/d/msg/announce/gVwS5RC8mds/dCt1ka2XAAAJ) as following [Python 2's EOL on January 1, 2020](https://www.python.org/dev/peps/pep-0373/#update). + +Coinciding with this change, new releases of [TensorFlow's Docker images](https://hub.docker.com/r/tensorflow/tensorflow/) provide Python 3 exclusively. Because all images now use Python 3, Docker tags containing `-py3` will no longer be provided and existing `-py3` tags like `latest-py3` will not be updated. + +## Major Features and Improvements + +* Replaced the scalar type for string tensors from `std::string` to `tensorflow::tstring` which is now ABI stable. +* A new Profiler for TF 2 for CPU/GPU/TPU. It offers both device and host performance analysis, including input pipeline and TF Ops. Optimization advisory is provided whenever possible. Please see [this tutorial](https://www.tensorflow.org/tensorboard/tensorboard_profiling_keras) and [guide](https://www.tensorflow.org/guide/profiler) for usage guidelines. +* Export C++ functions to Python using `pybind11` as opposed to `SWIG` as a part of our [deprecation of swig efforts](https://github.com/tensorflow/community/blob/master/rfcs/20190208-pybind11.md). +* `tf.distribute`: + * Support added for global sync `BatchNormalization` by using the newly added `tf.keras.layers.experimental.SyncBatchNormalization` layer. This layer will sync `BatchNormalization` statistics every step across all replicas taking part in sync training. + * Performance improvements for GPU multi-worker distributed training using `tf.distribute.experimental.MultiWorkerMirroredStrategy` + * Update NVIDIA `NCCL` to `2.5.7-1` for better performance and performance tuning. Please see [nccl developer guide](https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/env.html) for more information on this. + * Support gradient `allreduce` in `float16`. See this [example](https://github.com/tensorflow/models/blob/master/official/staging/training/grad_utils.py) usage. + * Experimental support of [all reduce gradient packing](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/CollectiveHints) to allow overlapping gradient aggregation with backward path computation. + * Deprecated `experimental_run_v2` method for distribution strategies and renamed the method `run` as it is no longer experimental. + * Add CompositeTensor support for DistributedIterators. This should help prevent unnecessary function retracing and memory leaks. +* `tf.keras`: + * `Model.fit` major improvements: + * You can now use custom training logic with `Model.fit` by overriding `Model.train_step`. + * Easily write state-of-the-art training loops without worrying about all of the features `Model.fit` handles for you (distribution strategies, callbacks, data formats, looping logic, etc) + * See the default [`Model.train_step`](https://github.com/tensorflow/tensorflow/blob/1381fc8e15e22402417b98e3881dfd409998daea/tensorflow/python/keras/engine/training.py#L540) for an example of what this function should look like. Same applies for validation and inference via `Model.test_step` and `Model.predict_step`. + * SavedModel uses its own `Model._saved_model_inputs_spec` attr now instead of + relying on `Model.inputs` and `Model.input_names`, which are no longer set for subclass Models. + This attr is set in eager, `tf.function`, and graph modes. This gets rid of the need for users to + manually call `Model._set_inputs` when using Custom Training Loops(CTLs). + * Dynamic shapes are supported for generators by calling the Model on the first batch we "peek" from the generator. + This used to happen implicitly in `Model._standardize_user_data`. Long-term, a solution where the + `DataAdapter` doesn't need to call the Model is probably preferable. + * The SavedModel format now supports all Keras built-in layers (including metrics, preprocessing layers, and stateful RNN layers) + * Update Keras batch normalization layer to use the running mean and average computation in the `fused_batch_norm`. You should see significant performance improvements when using `fused_batch_norm` in Eager mode. + +* `tf.lite`: + * Enable TFLite experimental new converter by default. +* XLA + * XLA now builds and works on windows. All prebuilt packages come with XLA available. + * XLA can be [enabled for a `tf.function`](https://www.tensorflow.org/xla#explicit_compilation_with_tffunction +) with “compile or throw exception” semantics on CPU and GPU. + +## Breaking Changes +* `tf.keras`: + * In `tf.keras.applications` the name of the "top" layer has been standardized to "predictions". This is only a problem if your code relies on the exact name of the layer. + * Huber loss function has been updated to be consistent with other Keras losses. It now computes mean over the last axis of per-sample losses before applying the reduction function. +* AutoGraph no longer converts functions passed to `tf.py_function`, `tf.py_func` and `tf.numpy_function`. +* Deprecating `XLA_CPU` and `XLA_GPU` devices with this release. +* Increasing the minimum bazel version to build TF to 2.0.0 to use Bazel's `cc_experimental_shared_library`. +* Keras compile/fit behavior for functional and subclassed models have been unified. Model properties such as `metrics`, `metrics_names` will now be available only after **training/evaluating the model on actual data** for functional models. `metrics` will **now include** model `loss` and output losses.`loss_functions` property has been removed from the model. This was an undocumented property that was accidentally public and has now been removed. + +## Known Caveats +* The current TensorFlow release now **requires** [gast](https://pypi.org/project/gast/) version 0.3.3. + +## Bug Fixes and Other Changes +* `tf.data`: + * Removed `autotune_algorithm` from experimental optimization options. +* TF Core: + * `tf.constant` always creates CPU tensors irrespective of the current device context. + * Eager `TensorHandles` maintain a list of mirrors for any copies to local or remote devices. This avoids any redundant copies due to op execution. + * For `tf.Tensor` & `tf.Variable`, `.experimental_ref()` is no longer experimental and is available as simply `.ref()`. + * `pfor/vectorized_map`: Added support for vectorizing 56 more ops. Vectorizing `tf.cond` is also supported now. + * Set as much partial shape as we can infer statically within the gradient impl of the gather op. + * Gradient of `tf.while_loop` emits `StatelessWhile` op if `cond` and body functions are stateless. This allows multiple gradients while ops to run in parallel under distribution strategy. + * Speed up `GradientTape` in eager mode by auto-generating list of op inputs/outputs which are unused and hence not cached for gradient functions. + * Support `back_prop=False` in `while_v2` but mark it as deprecated. + * Improve error message when attempting to use `None` in data-dependent control flow. + * Add `RaggedTensor.numpy()`. + * Update `RaggedTensor.__getitem__` to preserve uniform dimensions & allow indexing into uniform dimensions. + * Update `tf.expand_dims` to always insert the new dimension as a non-ragged dimension. + * Update `tf.embedding_lookup` to use `partition_strategy` and `max_norm` when `ids` is ragged. + * Allow `batch_dims==rank(indices)` in `tf.gather`. + * Add support for bfloat16 in `tf.print`. +* `tf.distribute`: + * Support `embedding_column` with variable-length input features for `MultiWorkerMirroredStrategy`. +* `tf.keras`: + * Added `experimental_aggregate_gradients` argument to `tf.keras.optimizer.Optimizer.apply_gradients`. This allows custom gradient aggregation and processing aggregated gradients in custom training loop. + * Allow `pathlib.Path` paths for loading models via Keras API. +* `tf.function`/AutoGraph: + * AutoGraph is now available in `ReplicaContext.merge_call`, `Strategy.extended.update` and `Strategy.extended.update_non_slot`. + * Experimental support for shape invariants has been enabled in `tf.function`. See the API docs for `tf.autograph.experimental.set_loop_options` for additonal info. + * AutoGraph error messages now exclude frames corresponding to APIs internal to AutoGraph. + * Improve shape inference for `tf.function` input arguments to unlock more Grappler optimizations in TensorFlow 2.x. + * Improve automatic control dependency management of resources by allowing resource reads to occur in parallel and synchronizing only on writes. + * Fix execution order of multiple stateful calls to `experimental_run_v2` in `tf.function`. + * You can now iterate over `RaggedTensors` using a for loop inside `tf.function`. +* `tf.lite`: + * Migrated the `tf.lite` C inference API out of experimental into lite/c. + * Add an option to disallow `NNAPI` CPU / partial acceleration on Android 10 + * TFLite Android AARs now include the C headers and APIs are required to use TFLite from native code. + * Refactors the delegate and delegate kernel sources to allow usage in the linter. + * Limit delegated ops to actually supported ones if a device name is specified or `NNAPI` CPU Fallback is disabled. + * TFLite now supports `tf.math.reciprocal1` op by lowering to `tf.div op`. + * TFLite's unpack op now supports boolean tensor inputs. + * Microcontroller and embedded code moved from experimental to main TensorFlow Lite folder + * Check for large TFLite tensors. + * Fix GPU delegate crash with C++17. + * Add 5D support to TFLite `strided_slice`. + * Fix error in delegation of `DEPTH_TO_SPACE` to `NNAPI` causing op not to be accelerated. + * Fix segmentation fault when running a model with LSTM nodes using `NNAPI` Delegate + * Fix `NNAPI` delegate failure when an operand for Maximum/Minimum operation is a scalar. + * Fix `NNAPI` delegate failure when Axis input for reduce operation is a scalar. + * Expose option to limit the number of partitions that will be delegated to `NNAPI`. + * If a target accelerator is specified, use its feature level to determine operations to delegate instead of SDK version. +* `tf.random`: + * Various random number generation improvements: + * Add a fast path for default `random_uniform` + * `random_seed` documentation improvement. + * `RandomBinomial` broadcasts and appends the sample shape to the left rather than the right. + * Added `tf.random.stateless_binomial`, `tf.random.stateless_gamma`, `tf.random.stateless_poisson` + * `tf.random.stateless_uniform` now supports unbounded sampling of `int` types. +* Math and Linear Algebra: + * Add `tf.linalg.LinearOperatorTridiag`. + * Add `LinearOperatorBlockLowerTriangular` + * Add broadcasting support to tf.linalg.triangular_solve[#26204](https://github.com/tensorflow/tensorflow/issues/26204), tf.math.invert_permutation. + * Add `tf.math.sobol_sample` op. + * Add `tf.math.xlog1py`. + * Add `tf.math.special.{dawsn,expi,fresnel_cos,fresnel_sin,spence}`. + * Add a Modified Discrete Cosine Transform (MDCT) and its inverse to `tf.signal`. +* TPU Enhancements: + * Refactor `TpuClusterResolver` to move shared logic to a separate pip package. + * Support configuring TPU software version from cloud tpu client. + * Allowed TPU embedding weight decay factor to be multiplied by learning rate. +* XLA Support: + * Add standalone XLA AOT runtime target + relevant .cc sources to pip package. + * Add check for memory alignment to MemoryAllocation::MemoryAllocation() on 32-bit ARM. This ensures a deterministic early exit instead of a hard to debug bus error later. + * `saved_model_cli aot_compile_cpu` allows you to compile saved models to XLA header+object files and include them in your C++ programs. + * Enable `Igamma`, `Igammac` for XLA. +* Deterministic Op Functionality: + * XLA reduction emitter is deterministic when the environment variable `TF_DETERMINISTIC_OPS` is set to "true" or "1". This extends deterministic `tf.nn.bias_add` back-prop functionality (and therefore also deterministic back-prop of bias-addition in Keras layers) to include when XLA JIT complilation is enabled. + * Fix problem, when running on a CUDA GPU and when either environment variable `TF_DETERMINSTIC_OPS` or environment variable `TF_CUDNN_DETERMINISTIC` is set to "true" or "1", in which some layer configurations led to an exception with the message "No algorithm worked!" +* Tracing and Debugging: + * Add source, destination name to `_send` traceme to allow easier debugging. + * Add traceme event to `fastpathexecute`. +* Other: + * Fix an issue with AUC.reset_states for multi-label AUC [#35852](https://github.com/tensorflow/tensorflow/issues/35852) + * Fix the TF upgrade script to not delete files when there is a parsing error and the output mode is `in-place`. + * Move `tensorflow/core:framework/*_pyclif` rules to `tensorflow/core/framework:*_pyclif`. + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +372046933, 8bitmp3, aaronhma, Abin Shahab, Aditya Patwardhan, Agoniii, Ahti Kitsik, Alan Yee, Albin Joy, Alex Hoffman, Alexander Grund, Alexandre E. Eichenberger, Amit Kumar Jaiswal, amoitra, Andrew Anderson, Angus-Luo, Anthony Barbier, Anton Kachatkou, Anuj Rawat, archis, Arpan-Dhatt, Arvind Sundararajan, Ashutosh Hathidara, autoih, Bairen Yi, Balint Cristian, Bas Aarts, BashirSbaiti, Basit Ayantunde, Ben Barsdell, Benjamin Gaillard, boron, Brett Koonce, Bryan Cutler, Christian Goll, Christian Sachs, Clayne Robison, comet, Daniel Falbel, Daria Zhuravleva, darsh8200, David Truby, Dayananda-V, deepakm, Denis Khalikov, Devansh Singh, Dheeraj R Reddy, Diederik Van Liere, Diego Caballero, Dominic Jack, dothinking, Douman, Drake Gens, Duncan Riach, Ehsan Toosi, ekuznetsov139, Elena Zhelezina, elzino, Ending2015a, Eric Schweitz, Erik Zettel, Ethan Saadia, Eugene Kuznetsov, Evgeniy Zheltonozhskiy, Ewout Ter Hoeven, exfalso, FAIJUL, Fangjun Kuang, Fei Hu, Frank Laub, Frederic Bastien, Fredrik Knutsson, frreiss, Frédéric Rechtenstein, fsx950223, Gaurav Singh, gbaned, George Grzegorz Pawelczak, George Sterpu, Gian Marco Iodice, Giorgio Arena, Hans Gaiser, Hans Pabst, Haoyu Wu, Harry Slatyer, hsahovic, Hugo, Hugo Sjöberg, IrinaM21, jacco, Jake Tae, Jean-Denis Lesage, Jean-Michel Gorius, Jeff Daily, Jens Elofsson, Jerry Shih, jerryyin, Jin Mingjian, Jinjing Zhou, JKIsaacLee, jojimonv, Jonathan Dekhtiar, Jose Ignacio Gomez, Joseph-Rance, Judd, Julian Gross, Kaixi Hou, Kaustubh Maske Patil, Keunwoo Choi, Kevin Hanselman, Khor Chean Wei, Kilaru Yasaswi Sri Chandra Gandhi, Koan-Sin Tan, Koki Ibukuro, Kristian Holsheimer, kurileo, Lakshay Tokas, Lee Netherton, leike666666, Leslie-Fang-Intel, Li, Guizi, LIUJIAN435, Lukas Geiger, Lyo Nguyen, madisetti, Maher Jendoubi, Mahmoud Abuzaina, Manuel Freiberger, Marcel Koester, Marco Jacopo Ferrarotti, Markus Franke, marload, Mbah-Javis, mbhuiyan, Meng Zhang, Michael Liao, MichaelKonobeev, Michal Tarnowski, Milan Straka, minoring, Mohamed Nour Abouelseoud, MoussaMM, Mrinal Jain, mrTsjolder, Måns Nilsson, Namrata Bhave, Nicholas Gao, Niels Ole Salscheider, nikochiko, Niranjan Hasabnis, Nishidha Panpaliya, nmostafa, Noah Trenaman, nuka137, Officium, Owen L - Sfe, Pallavi G, Paul Andrey, Peng Sun, Peng Wu, Phil Pearl, PhilipMay, pingsutw, Pooya Davoodi, PragmaTwice, pshiko, Qwerty71, R Gomathi, Rahul Huilgol, Richard Xiao, Rick Wierenga, Roberto Rosmaninho, ruchit2801, Rushabh Vasani, Sami, Sana Damani, Sarvesh Dubey, Sasan Jafarnejad, Sergii Khomenko, Shane Smiskol, Shaochen Shi, sharkdtu, Shawn Presser, ShengYang1, Shreyash Patodia, Shyam Sundar Dhanabalan, Siju Samuel, Somyajit Chakraborty Sam, Srihari Humbarwadi, srinivasan.narayanamoorthy, Srishti Yadav, Steph-En-M, Stephan Uphoff, Stephen Mugisha, SumanSudhir, Taehun Kim, Tamas Bela Feher, TengLu, Tetragramm, Thierry Herrmann, Tian Jin, tigertang, Tom Carchrae, Tom Forbes, Trent Lo, Victor Peng, vijayphoenix, Vincent Abriou, Vishal Bhola, Vishnuvardhan Janapati, vladbataev, VoVAllen, Wallyss Lima, Wen-Heng (Jack) Chung, wenxizhu, William D. Irons, William Zhang, Xiaoming (Jason) Cui, Xiaoquan Kong, Xinan Jiang, Yasir Modak, Yasuhiro Matsumoto, Yaxun (Sam) Liu, Yong Tang, Ytyt-Yt, yuan, Yuan Mingshuai, Yuan Tang, Yuki Ueda, Yusup, zhangshijin, zhuwenxi + # Release 2.0.1 ## Bug Fixes and Other Changes diff --git a/configure.py b/configure.py index ac9ed0c4d88..945c3036a8d 100644 --- a/configure.py +++ b/configure.py @@ -144,7 +144,7 @@ def write_to_bazelrc(line): def write_action_env_to_bazelrc(var_name, var): - write_to_bazelrc('build --action_env %s="%s"' % (var_name, str(var))) + write_to_bazelrc('build --action_env {}="{}"'.format(var_name, str(var))) def run_shell(cmd, allow_non_zero=False, stderr=None): @@ -205,7 +205,7 @@ def setup_python(environ_cp): # Get PYTHON_BIN_PATH, default is the current running python. default_python_bin_path = sys.executable ask_python_bin_path = ('Please specify the location of python. [Default is ' - '%s]: ') % default_python_bin_path + '{}]: ').format(default_python_bin_path) while True: python_bin_path = get_from_env_or_user_or_default(environ_cp, 'PYTHON_BIN_PATH', @@ -215,9 +215,10 @@ def setup_python(environ_cp): if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK): break elif not os.path.exists(python_bin_path): - print('Invalid python path: %s cannot be found.' % python_bin_path) + print('Invalid python path: {} cannot be found.'.format(python_bin_path)) else: - print('%s is not executable. Is it the python binary?' % python_bin_path) + print('{} is not executable. Is it the python binary?'.format( + python_bin_path)) environ_cp['PYTHON_BIN_PATH'] = '' # Convert python path to Windows style before checking lib and version @@ -236,7 +237,7 @@ def setup_python(environ_cp): default_python_lib_path = python_lib_paths[0] python_lib_path = get_input( 'Please input the desired Python library path to use. ' - 'Default is [%s]\n' % python_lib_paths[0]) + 'Default is [{}]\n'.format(python_lib_paths[0])) if not python_lib_path: python_lib_path = default_python_lib_path environ_cp['PYTHON_LIB_PATH'] = python_lib_path @@ -252,7 +253,7 @@ def setup_python(environ_cp): # Set-up env variables used by python_configure.bzl write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path) write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path) - write_to_bazelrc('build --python_path=\"%s"' % python_bin_path) + write_to_bazelrc('build --python_path=\"{}"'.format(python_bin_path)) environ_cp['PYTHON_BIN_PATH'] = python_bin_path # If choosen python_lib_path is from a path specified in the PYTHONPATH @@ -266,7 +267,7 @@ def setup_python(environ_cp): with open( os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), 'w') as f: - f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) + f.write('export PYTHON_BIN_PATH="{}"'.format(python_bin_path)) def reset_tf_configure_bazelrc(): @@ -320,11 +321,12 @@ def get_var(environ_cp, Raise the error to avoid infinitely looping. """ if not question: - question = 'Do you wish to build TensorFlow with %s support?' % query_item + question = 'Do you wish to build TensorFlow with {} support?'.format( + query_item) if not yes_reply: - yes_reply = '%s support will be enabled for TensorFlow.' % query_item + yes_reply = '{} support will be enabled for TensorFlow.'.format(query_item) if not no_reply: - no_reply = 'No %s' % yes_reply + no_reply = 'No {}'.format(yes_reply) yes_reply += '\n' no_reply += '\n' @@ -368,7 +370,7 @@ def get_var(environ_cp, print(no_reply) var = False else: - print('Invalid selection: %s' % user_input_origin) + print('Invalid selection: {}'.format(user_input_origin)) return var @@ -479,13 +481,13 @@ def check_bazel_version(min_version, max_version): if which('bazel') is None: print('Cannot find bazel. Please install bazel.') sys.exit(0) - curr_version = run_shell( - ['bazel', '--batch', '--bazelrc=/dev/null', 'version']) - for line in curr_version.split('\n'): - if 'Build label: ' in line: - curr_version = line.split('Build label: ')[1] - break + stderr = open(os.devnull, 'wb') + curr_version = run_shell(['bazel', '--version'], + allow_non_zero = True, + stderr = stderr) + if curr_version.startswith('bazel '): + curr_version = curr_version.split('bazel ')[1] min_version_int = convert_version_to_int(min_version) curr_version_int = convert_version_to_int(curr_version) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index f2018220a56..ab4316d5ed0 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -517,6 +517,7 @@ package_group( "//perftools/accelerators/xprof/api/...", "//third_party/py/autograph/...", "//third_party/swift/tensorflow/x10/...", + "//third_party/swift/tensorflow_apis/...", "//tensorflow/...", "//tensorflow_estimator/python/estimator/...", "//tensorflow_models/official/...", @@ -529,6 +530,13 @@ package_group(name = "ndarray_tensor_allow_list") # TODO(b/154762408) Remove this package group once it's no longer needed. package_group(name = "composite_tensor_whitelist") +# Packages that use private types symbols, until they are exported. +# TODO(b/154650521) Remove. +package_group( + name = "types_whitelist", + packages = ["//learning/deepmind/tensorflow/replicator/..."], +) + filegroup( name = "intel_binary_blob", data = if_mkl_ml( diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 3d3fc7065a4..d3059df1bef 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -16,7 +16,6 @@ load( "//tensorflow/core/platform:build_config_root.bzl", "tf_cuda_tests_tags", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") package( licenses = ["notice"], # Apache 2.0 @@ -609,7 +608,6 @@ filegroup( ], exclude = [ "c_api_experimental.cc", - "*c_api_tfrt*", "*test*", "*dlpack*", ], diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 9651a47d6ac..5c01ccb82bb 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -38,7 +38,7 @@ limitations under the License. #include "tensorflow/c/eager/tfe_tensorhandle_internal.h" #include "tensorflow/c/tf_tensor_internal.h" #ifdef PLATFORM_GOOGLE -#include "tensorflow/c/eager/c_api_tfrt.h" +#include "tensorflow/core/tfrt/eager/c_api_tfrt.h" #endif #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/eager/context.h" @@ -924,7 +924,7 @@ extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy( context->GetDevicePlacementPolicy()); } -TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { +TFE_TensorHandle* TFE_NewTensorHandle(const TF_Tensor* t, TF_Status* status) { tensorflow::Tensor tensor; status->status = tensorflow::TF_TensorToTensor(t, &tensor); if (!status->status.ok()) return nullptr; diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 070b3a9bb60..5afe3047dd7 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -137,7 +137,7 @@ TF_CAPI_EXPORT extern void TFE_ContextSetServerDef(TFE_Context* ctx, // placed in memory of different devices or remote address spaces. typedef struct TFE_TensorHandle TFE_TensorHandle; -TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, +TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandle(const TF_Tensor* t, TF_Status* status); // Indicates that the caller will not be using `h` any more. TF_CAPI_EXPORT extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h); diff --git a/tensorflow/c/eager/c_api_cluster_test.cc b/tensorflow/c/eager/c_api_cluster_test.cc index 8f585d6f02c..252a0408758 100644 --- a/tensorflow/c/eager/c_api_cluster_test.cc +++ b/tensorflow/c/eager/c_api_cluster_test.cc @@ -50,6 +50,13 @@ tensorflow::ServerDef GetServerDef(int num_tasks) { return GetServerDef("localhost", num_tasks); } +void ReplaceTaskInServerDef(tensorflow::ServerDef* server_def, int task_index) { + tensorflow::JobDef* job_def = server_def->mutable_cluster()->mutable_job(0); + int port = tensorflow::testing::PickUnusedPortOrDie(); + job_def->mutable_tasks()->at(task_index) = + tensorflow::strings::StrCat("localhost:", port); +} + void CheckTFE_TensorHandleHasFloats(TFE_TensorHandle* handle, const std::vector& expected_values) { std::unique_ptr status( @@ -101,6 +108,22 @@ void CheckRemoteMatMulExecutesOK(TFE_Context* ctx, TF_DeleteStatus(status); } +// Read the value of variable `var` and save it into `out_value`. +void ReadVariable(TFE_Context* ctx, TFE_TensorHandle* var, + TFE_TensorHandle** out_value) { + TF_Status* status = TF_NewStatus(); + TFE_Op* op = TFE_NewOp(ctx, "ReadVariableOp", status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpSetAttrType(op, "dtype", TF_FLOAT); + TFE_OpAddInput(op, var, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + int num_retvals = 1; + TFE_Execute(op, out_value, &num_retvals, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_DeleteOp(op); + TF_DeleteStatus(status); +} + void TestRemoteExecuteChangeServerDef(bool async) { tensorflow::ServerDef server_def = GetServerDef(2); @@ -243,6 +266,102 @@ TEST(CAPI, RemoteExecuteUpdateServerDefAsync) { TestRemoteExecuteUpdateServerDef(true); } +void TestRemoteExecuteUpdateServerDefResourceAccess(bool async) { + tensorflow::ServerDef server_def = GetServerDef(2); + // This server def has the task index set to 0. + string serialized = server_def.SerializeAsString(); + + server_def.set_task_index(1); + std::unique_ptr worker_server; + ASSERT_TRUE(tensorflow::GrpcServer::Create( + server_def, tensorflow::Env::Default(), &worker_server) + .ok()); + ASSERT_TRUE(worker_server->Start().ok()); + + TF_Status* status = TF_NewStatus(); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); + TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT); + TFE_Context* ctx = TFE_NewContext(opts, status); + EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_DeleteContextOptions(opts); + + TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status); + EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + const char dev0_name[] = "/job:localhost/replica:0/task:0/device:CPU:0"; + const char dev1_name[] = "/job:localhost/replica:0/task:1/device:CPU:0"; + + TFE_TensorHandle* var_handle0 = TestVariable(ctx, 1.0, dev0_name); + EXPECT_NE(var_handle0, nullptr); + TFE_TensorHandle* var_handle1 = TestVariable(ctx, 2.0, dev1_name); + EXPECT_NE(var_handle1, nullptr); + + TFE_TensorHandle* value_handle = nullptr; + ReadVariable(ctx, var_handle1, &value_handle); + CheckTFE_TensorHandleHasFloats(value_handle, {2}); + TFE_DeleteTensorHandle(value_handle); + + // Start a new worker to replace task:1 + ReplaceTaskInServerDef(&server_def, 1); + server_def.set_task_index(1); + // TODO(b/136478427): Figure out how to correctly shut the server down. + worker_server.release(); + ASSERT_TRUE(tensorflow::GrpcServer::Create( + server_def, tensorflow::Env::Default(), &worker_server) + .ok()); + ASSERT_TRUE(worker_server->Start().ok()); + + // Update server def to replace the remote device with the device info on the + // new worker (different incarnation ID). + server_def.set_task_index(0); + string serialized_update = server_def.SerializeAsString(); + TFE_ContextUpdateServerDef(ctx, 0, serialized_update.data(), + serialized_update.size(), status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + + // The device of var_handle0 is local device which is the same before and + // after cluster update. Remove resource with valid device should succeed. + TFE_Op* op = TFE_NewOp(ctx, "DestroyResourceOp", status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(op, var_handle0, status); + TFE_OpSetDevice(op, dev0_name, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + int num_retvals = 0; + TFE_Execute(op, nullptr, &num_retvals, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_DeleteOp(op); + + // The device of var_handle1 is remote device, which was replaced during + // cluster update. Removing resource with invalid device should fail + // gracefully (i.e., with error status) instead of crashing with segfaults. + op = TFE_NewOp(ctx, "DestroyResourceOp", status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(op, var_handle1, status); + TFE_OpSetDevice(op, dev1_name, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + num_retvals = 0; + TFE_Execute(op, nullptr, &num_retvals, status); + EXPECT_NE(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_DeleteOp(op); + + TFE_DeleteTensorHandle(var_handle0); + TFE_DeleteTensorHandle(var_handle1); + + TFE_DeleteContext(ctx); + TF_DeleteStatus(status); + + // TODO(b/136478427): Figure out how to correctly shut the server down. + worker_server.release(); +} + +TEST(CAPI, TestRemoteExecuteUpdateServerDefResourceAccess) { + TestRemoteExecuteUpdateServerDefResourceAccess(false); +} + +TEST(CAPI, TestRemoteExecuteUpdateServerDefResourceAccessAsync) { + TestRemoteExecuteUpdateServerDefResourceAccess(true); +} + void TestRemoteExecuteUpdateServerDefWithFailures(bool async) { // Fail fast on GetStatus requests so we can get errors instead of timeout // when updating cluster with non-exsitent worker @@ -282,6 +401,7 @@ void TestRemoteExecuteUpdateServerDefWithFailures(bool async) { int port = tensorflow::testing::PickUnusedPortOrDie(); job_def->mutable_tasks()->insert( {2, tensorflow::strings::StrCat("localhost:", port)}); + server_def.set_task_index(0); string serialized_update = server_def.SerializeAsString(); TFE_ContextUpdateServerDef(ctx, 0, serialized_update.data(), serialized_update.size(), status); diff --git a/tensorflow/c/eager/c_api_experimental.cc b/tensorflow/c/eager/c_api_experimental.cc index 820650e315f..0d71b11531b 100644 --- a/tensorflow/c/eager/c_api_experimental.cc +++ b/tensorflow/c/eager/c_api_experimental.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/c/eager/tfe_op_internal.h" #include "tensorflow/c/eager/tfe_tensorhandle_internal.h" #include "tensorflow/c/tf_status_helper.h" +#include "tensorflow/core/common_runtime/composite_device.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/eager/eager_operation.h" #include "tensorflow/core/lib/monitoring/counter.h" @@ -638,3 +639,35 @@ TFE_TensorHandle* TFE_NewTensorHandleFromTensor(TFE_Context* ctx, TF_Tensor* t, return tensorflow::wrap( tensorflow::unwrap(ctx)->CreateLocalHandle(t->tensor)); } + +TFE_TensorHandle* TFE_CreatePackedTensorHandle(TFE_Context* ctx, + TFE_TensorHandle** handles, + int* num_handles, + TF_Status* status) { + std::vector tensor_handles; + tensor_handles.reserve(*num_handles); + for (int i = 0; i < *num_handles; ++i) { + tensor_handles.push_back( + tensorflow::TensorHandleFromInterface(tensorflow::unwrap(handles[i]))); + } + tensorflow::EagerContext* context = + tensorflow::ContextFromInterface(tensorflow::unwrap(ctx)); + tensorflow::TensorHandle* handle = nullptr; + status->status = tensorflow::TensorHandle::CreatePackedHandle( + std::move(tensor_handles), context, &handle); + return tensorflow::wrap(handle); +} + +void TFE_ContextSetSoftDevicePlacement(TFE_Context* ctx, unsigned char enable, + TF_Status* status) { + tensorflow::EagerContext* context = + tensorflow::ContextFromInterface(tensorflow::unwrap(ctx)); + context->SetAllowSoftPlacement(enable); +} + +void TFE_ContextSetLogDevicePlacement(TFE_Context* ctx, unsigned char enable, + TF_Status* status) { + tensorflow::EagerContext* context = + tensorflow::ContextFromInterface(tensorflow::unwrap(ctx)); + context->SetLogDevicePlacement(enable); +} diff --git a/tensorflow/c/eager/c_api_experimental.h b/tensorflow/c/eager/c_api_experimental.h index 33adce40da0..1b8efe61ee0 100644 --- a/tensorflow/c/eager/c_api_experimental.h +++ b/tensorflow/c/eager/c_api_experimental.h @@ -541,6 +541,26 @@ TF_CAPI_EXPORT extern TF_Tensor* TFE_AllocateHostTensor(TFE_Context* ctx, TF_CAPI_EXPORT TFE_TensorHandle* TFE_NewTensorHandleFromTensor( TFE_Context* ctx, TF_Tensor* t, TF_Status* status); +// Create a packed TensorHandle with the given list of TensorHandles. +// If `handles` are on the same device, assign the same device to the packed +// handle; if `handles` are on different deivces, assign a CompositeDevice to +// it. +TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_CreatePackedTensorHandle( + TFE_Context* ctx, TFE_TensorHandle** handles, int* num_handles, + TF_Status* status); + +// Configure soft device placement policy for the eager executor. Note this +// policy is applied to any subsequent op executions. +TF_CAPI_EXPORT void TFE_ContextSetSoftDevicePlacement(TFE_Context* ctx, + unsigned char enable, + TF_Status* status); + +// Configure device placement policy logging for the eager executor. Note this +// policy is applied to any subsequent op executions. +TF_CAPI_EXPORT void TFE_ContextSetLogDevicePlacement(TFE_Context* ctx, + unsigned char enable, + TF_Status* status); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/eager/c_api_remote_test.cc b/tensorflow/c/eager/c_api_remote_test.cc index 0f988b1456d..12c63675c87 100644 --- a/tensorflow/c/eager/c_api_remote_test.cc +++ b/tensorflow/c/eager/c_api_remote_test.cc @@ -351,6 +351,192 @@ TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFuncOrdering) { /*heavy_load_on_streaming_rpc=*/true); } +// Add the values of three variables on three different tasks. +string AddVariablesFunction() { + tensorflow::FunctionDef def; + CHECK(tensorflow::protobuf::TextFormat::ParseFromString( + " signature {" + " name: 'AddVariablesFunction'" + " input_arg {" + " name: 'var'" + " type: DT_RESOURCE" + " }" + " output_arg {" + " name: 'sum'" + " type: DT_FLOAT" + " }" + " }" + " node_def {" + " name: 'read0'" + " op: 'ReadVariableOp'" + " input: 'var'" + " device: '/job:localhost/replica:0/task:0/device:CPU:0'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " node_def {" + " name: 'read1'" + " op: 'ReadVariableOp'" + " input: 'var'" + " device: '/job:localhost/replica:0/task:1/device:CPU:0'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " node_def {" + " name: 'read2'" + " op: 'ReadVariableOp'" + " input: 'var'" + " device: '/job:localhost/replica:0/task:2/device:CPU:0'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " node_def {" + " name: 'add1'" + " op: 'Add'" + " input: 'read0:value:0'" + " input: 'read1:value:0'" + " attr {" + " key: 'T'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " node_def {" + " name: 'add2'" + " op: 'Add'" + " input: 'add1:z:0'" + " input: 'read2:value:0'" + " attr {" + " key: 'T'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " ret {" + " key: 'sum'" + " value: 'add2:z:0'" + " }", + &def)); + return def.SerializeAsString(); +} + +TEST(CAPI, TestFunctionWithPackedInput) { + tensorflow::ServerDef server_def = GetServerDef(3); + + // This server def has the task index set to 0. + string serialized = server_def.SerializeAsString(); + + server_def.set_task_index(1); + std::unique_ptr worker_server1; + ASSERT_TRUE(tensorflow::GrpcServer::Create( + server_def, tensorflow::Env::Default(), &worker_server1) + .ok()); + ASSERT_TRUE(worker_server1->Start().ok()); + + server_def.set_task_index(2); + std::unique_ptr worker_server2; + ASSERT_TRUE(tensorflow::GrpcServer::Create( + server_def, tensorflow::Env::Default(), &worker_server2) + .ok()); + ASSERT_TRUE(worker_server2->Start().ok()); + + TF_Status* status = TF_NewStatus(); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(/*enable=*/true)); + TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT); + TFE_Context* ctx = TFE_NewContext(opts, status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + TFE_DeleteContextOptions(opts); + + TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + const char task0_name[] = "/job:localhost/replica:0/task:0/device:CPU:0"; + const char task1_name[] = "/job:localhost/replica:0/task:1/device:CPU:0"; + const char task2_name[] = "/job:localhost/replica:0/task:2/device:CPU:0"; + + // Create one variable per task. + TFE_TensorHandle* h0 = TestVariable(ctx, 1.0, task0_name); + TFE_TensorHandle* h1 = TestVariable(ctx, 2.0, task1_name); + TFE_TensorHandle* h2 = TestVariable(ctx, 3.0, task2_name); + + // Pack 3 variable handles into one TFE_TensorHandle. + int num_replicas = 3; + std::vector handles = {h0, h1, h2}; + TFE_TensorHandle* packed_handle = + TFE_CreatePackedTensorHandle(ctx, handles.data(), &num_replicas, status); + ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + EXPECT_EQ(TFE_TensorHandleDataType(packed_handle), TF_RESOURCE); + EXPECT_EQ(TFE_TensorHandleNumDims(packed_handle, status), 0); + EXPECT_EQ(TFE_TensorHandleNumElements(packed_handle, status), 1); + + const string composite_device_name = + "/job:localhost/replica:0/task:0/device:COMPOSITE:0"; + EXPECT_EQ(TFE_TensorHandleDeviceName(packed_handle, status), + composite_device_name); + EXPECT_EQ(TFE_TensorHandleBackingDeviceName(packed_handle, status), + composite_device_name); + ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + // Register and run a function which returns the sum of 3 variables. + const string function_def = AddVariablesFunction(); + TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(), + status); + ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + TFE_Op* func = TFE_NewOp(ctx, "AddVariablesFunction", status); + ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + TFE_OpAddInput(func, packed_handle, status); + ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; + TFE_Execute(func, &retvals[0], &num_retvals, status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + ASSERT_EQ(1, num_retvals); + TFE_DeleteOp(func); + TFE_DeleteTensorHandle(packed_handle); + TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status); + ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + TFE_DeleteTensorHandle(retvals[0]); + float sum = 0; + EXPECT_EQ(sizeof(sum), TF_TensorByteSize(t)); + memcpy(&sum, TF_TensorData(t), TF_TensorByteSize(t)); + TF_DeleteTensor(t); + EXPECT_EQ(sum, 6.0); + + TFE_DeleteTensorHandle(h0); + TFE_DeleteTensorHandle(h1); + TFE_DeleteTensorHandle(h2); + + TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx); + TFE_ExecutorWaitForAllPendingNodes(executor, status); + ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + TFE_DeleteExecutor(executor); + TFE_ContextRemoveFunction(ctx, "AddVariablesFunction", status); + TFE_DeleteContext(ctx); + + TF_DeleteStatus(status); + + // TODO(b/136478427): Figure out how to correctly shut the server down. + worker_server1.release(); + worker_server2.release(); +} + void TestRemoteExecuteDeleteContextWithOutstandingRPC(bool async) { tensorflow::ServerDef server_def = GetServerDef(2); diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 3160cb0e585..724176505ba 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -1132,51 +1132,6 @@ void BM_ExecuteFunction(int iters, int async) { } BENCHMARK(BM_ExecuteFunction)->Arg(0)->Arg(1); -TFE_TensorHandle* CreateVariable(TFE_Context* ctx, float value, - TF_Status* status) { - // Create the variable handle. - TFE_Op* op = TFE_NewOp(ctx, "VarHandleOp", status); - if (TF_GetCode(status) != TF_OK) return nullptr; - TFE_OpSetAttrType(op, "dtype", TF_FLOAT); - TFE_OpSetAttrShape(op, "shape", {}, 0, status); - TFE_OpSetAttrString(op, "container", "", 0); - TFE_OpSetAttrString(op, "shared_name", "", 0); - if (TF_GetCode(status) != TF_OK) return nullptr; - TFE_TensorHandle* var_handle = nullptr; - int num_retvals = 1; - TFE_Execute(op, &var_handle, &num_retvals, status); - TFE_DeleteOp(op); - if (TF_GetCode(status) != TF_OK) return nullptr; - CHECK_EQ(1, num_retvals); - - // Assign 'value' to it. - op = TFE_NewOp(ctx, "AssignVariableOp", status); - if (TF_GetCode(status) != TF_OK) return nullptr; - TFE_OpSetAttrType(op, "dtype", TF_FLOAT); - TFE_OpAddInput(op, var_handle, status); - - // Convert 'value' to a TF_Tensor then a TFE_TensorHandle. - std::unique_ptr t( - TF_AllocateTensor(TF_FLOAT, nullptr, 0, sizeof(value)), TF_DeleteTensor); - memcpy(TF_TensorData(t.get()), &value, TF_TensorByteSize(t.get())); - - std::unique_ptr - value_handle(TFE_NewTensorHandle(t.get(), status), - TFE_DeleteTensorHandle); - if (TF_GetCode(status) != TF_OK) return nullptr; - - TFE_OpAddInput(op, value_handle.get(), status); - if (TF_GetCode(status) != TF_OK) return nullptr; - - num_retvals = 0; - TFE_Execute(op, nullptr, &num_retvals, status); - TFE_DeleteOp(op); - if (TF_GetCode(status) != TF_OK) return nullptr; - CHECK_EQ(0, num_retvals); - - return var_handle; -} - TEST(CAPI, Variables) { // Variables use resource handles, so this is really a test for resource // tensor handling. @@ -1186,7 +1141,7 @@ TEST(CAPI, Variables) { ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); - TFE_TensorHandle* var_handle = CreateVariable(ctx, 12.0, status); + TFE_TensorHandle* var_handle = TestVariable(ctx, 12.0); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_Op* op = TFE_NewOp(ctx, "ReadVariableOp", status); @@ -1227,7 +1182,7 @@ void BM_ReadVariable(int iters) { CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); - TFE_TensorHandle* var_handle = CreateVariable(ctx, 5.0, status); + TFE_TensorHandle* var_handle = TestVariable(ctx, 5.0); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_Op* op = TFE_NewOp(ctx, "ReadVariableOp", status); @@ -1248,6 +1203,8 @@ void BM_ReadVariable(int iters) { CHECK_EQ(0, TFE_TensorHandleNumDims(h, status)); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); h = nullptr; + TFE_OpAddInput(op, var_handle, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); } tensorflow::testing::StopTiming(); TFE_DeleteOp(op); diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc index e67e17963b3..29b624b8537 100644 --- a/tensorflow/c/eager/c_api_test_util.cc +++ b/tensorflow/c/eager/c_api_test_util.cc @@ -133,6 +133,58 @@ TFE_TensorHandle* TestMatrixTensorHandle3X2(TFE_Context* ctx) { return th; } +TFE_TensorHandle* TestVariable(TFE_Context* ctx, float value, + const tensorflow::string& device_name) { + TF_Status* status = TF_NewStatus(); + // Create the variable handle. + TFE_Op* op = TFE_NewOp(ctx, "VarHandleOp", status); + if (TF_GetCode(status) != TF_OK) return nullptr; + TFE_OpSetAttrType(op, "dtype", TF_FLOAT); + TFE_OpSetAttrShape(op, "shape", {}, 0, status); + TFE_OpSetAttrString(op, "container", "", 0); + TFE_OpSetAttrString(op, "shared_name", "", 0); + if (!device_name.empty()) { + TFE_OpSetDevice(op, device_name.c_str(), status); + } + if (TF_GetCode(status) != TF_OK) return nullptr; + TFE_TensorHandle* var_handle = nullptr; + int num_retvals = 1; + TFE_Execute(op, &var_handle, &num_retvals, status); + if (TF_GetCode(status) != TF_OK) return nullptr; + TFE_DeleteOp(op); + if (TF_GetCode(status) != TF_OK) return nullptr; + CHECK_EQ(1, num_retvals); + + // Assign 'value' to it. + op = TFE_NewOp(ctx, "AssignVariableOp", status); + if (TF_GetCode(status) != TF_OK) return nullptr; + TFE_OpSetAttrType(op, "dtype", TF_FLOAT); + TFE_OpAddInput(op, var_handle, status); + + // Convert 'value' to a TF_Tensor then a TFE_TensorHandle. + std::unique_ptr t( + TF_AllocateTensor(TF_FLOAT, nullptr, 0, sizeof(value)), TF_DeleteTensor); + memcpy(TF_TensorData(t.get()), &value, TF_TensorByteSize(t.get())); + + std::unique_ptr + value_handle(TFE_NewTensorHandle(t.get(), status), + TFE_DeleteTensorHandle); + if (TF_GetCode(status) != TF_OK) return nullptr; + + TFE_OpAddInput(op, value_handle.get(), status); + if (TF_GetCode(status) != TF_OK) return nullptr; + + num_retvals = 0; + TFE_Execute(op, nullptr, &num_retvals, status); + TFE_DeleteOp(op); + if (TF_GetCode(status) != TF_OK) return nullptr; + CHECK_EQ(0, num_retvals); + + TF_DeleteStatus(status); + + return var_handle; +} + TFE_Op* AddOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) { TF_Status* status = TF_NewStatus(); diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h index 11ae6d1181b..4c43f8d5833 100644 --- a/tensorflow/c/eager/c_api_test_util.h +++ b/tensorflow/c/eager/c_api_test_util.h @@ -42,6 +42,11 @@ TFE_TensorHandle* DoubleTestMatrixTensorHandle3X2(TFE_Context* ctx); // Return a tensor handle containing a 3x2 matrix of floats TFE_TensorHandle* TestMatrixTensorHandle3X2(TFE_Context* ctx); +// Return a variable handle referring to a variable with the given initial value +// on the given device. +TFE_TensorHandle* TestVariable(TFE_Context* ctx, float value, + const tensorflow::string& device_name = ""); + // Return an add op multiplying `a` by `b`. TFE_Op* AddOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b); diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 170b82333d8..bd99189852e 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -29,7 +29,7 @@ using tensorflow::string; namespace tensorflow { namespace { -TEST(UnifedCAPI, TestBasicEager) { +TEST(UnifiedCAPI, TestBasicEager) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TFE_ContextOptions* opts = TFE_NewContextOptions(); @@ -81,7 +81,7 @@ TEST(UnifedCAPI, TestBasicEager) { TF_DeleteExecutionContext(ctx); } -TEST(UnifedCAPI, TestBasicGraph) { +TEST(UnifiedCAPI, TestBasicGraph) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get()); @@ -131,6 +131,7 @@ TEST(UnifedCAPI, TestBasicGraph) { string fn_name = "double"; TF_AbstractFunction* func = TF_ExecutionContextToFunction( graph_ctx, fn_name.c_str(), 1, placeholder_t, 1, output_t, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TF_DeleteAbstractTensor(placeholder_t); TF_DeleteAbstractTensor(output_t); @@ -184,7 +185,7 @@ TEST(UnifedCAPI, TestBasicGraph) { TF_DeleteExecutionContext(eager_execution_ctx); } -TEST(UnifedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) { +TEST(UnifiedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TFE_ContextOptions* opts = TFE_NewContextOptions(); @@ -200,7 +201,7 @@ TEST(UnifedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) { TF_DeleteExecutionContext(ctx); } -TEST(UnifedCAPI, TF_CallingSetOpTypeAfterFinishingOpBuildingRaises) { +TEST(UnifiedCAPI, TF_CallingSetOpTypeAfterFinishingOpBuildingRaises) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get()); @@ -221,7 +222,7 @@ TEST(UnifedCAPI, TF_CallingSetOpTypeAfterFinishingOpBuildingRaises) { TF_DeleteExecutionContext(graph_ctx); } -TEST(UnifedCAPI, TF_CallingSetOpNameAfterFinishingOpBuildingRaises) { +TEST(UnifiedCAPI, TF_CallingSetOpNameAfterFinishingOpBuildingRaises) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get()); @@ -242,7 +243,7 @@ TEST(UnifedCAPI, TF_CallingSetOpNameAfterFinishingOpBuildingRaises) { TF_DeleteExecutionContext(graph_ctx); } -TEST(UnifedCAPI, TestExecutingEagerOpInGraphModeRaises) { +TEST(UnifiedCAPI, TestExecutingEagerOpInGraphModeRaises) { // Build an Eager context. std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); @@ -288,7 +289,7 @@ TEST(UnifedCAPI, TestExecutingEagerOpInGraphModeRaises) { TF_DeleteExecutionContext(graph_ctx); } -TEST(UnifedCAPI, TestExecutingGraphOpInEagerModeRaises) { +TEST(UnifiedCAPI, TestExecutingGraphOpInEagerModeRaises) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get()); diff --git a/tensorflow/c/eager/context_interface.h b/tensorflow/c/eager/context_interface.h index 9377bf0be12..d21ab45e579 100644 --- a/tensorflow/c/eager/context_interface.h +++ b/tensorflow/c/eager/context_interface.h @@ -59,6 +59,20 @@ class AbstractContextInterface { virtual AbstractTensorInterface* CreateTensor( DataType dtype, absl::Span dim_sizes) = 0; + typedef void (*MemoryReleaser)(void* data, size_t len, void* arg); + + // Create a tensor instance from the given data buffer and description. + // `memory_releaser` will be called on destruction, and it's responsible for + // cleaning up the underlying buffer. `convert_string` indicates whether it + // has to handle tstring conversion. Expected to be removed once tstring + // migration is done. + virtual AbstractTensorInterface* CreateTensor(DataType dtype, + const int64_t* dims, + int num_dims, void* data, + size_t len, bool convert_string, + MemoryReleaser memory_releaser, + void* memory_releaser_arg) = 0; + // Create a handle to wrap and manage a Tensor virtual AbstractTensorHandleInterface* CreateLocalHandle( AbstractTensorInterface* t) = 0; diff --git a/tensorflow/c/eager/parallel_device/BUILD b/tensorflow/c/eager/parallel_device/BUILD index f4dbcc6cead..3b2640e14d1 100644 --- a/tensorflow/c/eager/parallel_device/BUILD +++ b/tensorflow/c/eager/parallel_device/BUILD @@ -27,6 +27,7 @@ cc_library( name = "parallel_device", srcs = [":sources"], hdrs = [":headers"], + visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/c:c_api", "//tensorflow/c/eager:c_api", @@ -43,6 +44,7 @@ tf_cc_test( srcs = ["parallel_device_test.cc"], deps = [ ":parallel_device", + ":parallel_device_ops", "//tensorflow/c:c_api", "//tensorflow/c:c_api_experimental", "//tensorflow/c/eager:c_api", @@ -52,3 +54,19 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) + +# Note: ParallelDevice-specific ops are experimental and not currently linked in +# to TensorFlow by default, just used in a few tests. +filegroup( + name = "parallel_device_ops_srcs", + srcs = ["parallel_device_ops.cc"], + visibility = ["//tensorflow/python/distribute/parallel_device:__pkg__"], +) + +cc_library( + name = "parallel_device_ops", + srcs = [":parallel_device_ops_srcs"], + visibility = ["//tensorflow:internal"], + deps = ["//tensorflow/core:framework"], + alwayslink = 1, +) diff --git a/tensorflow/c/eager/parallel_device/parallel_device.cc b/tensorflow/c/eager/parallel_device/parallel_device.cc index e6846809fcf..27c2699c4c2 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device.cc +++ b/tensorflow/c/eager/parallel_device/parallel_device.cc @@ -92,6 +92,10 @@ class ParallelDevice { TFE_TensorHandle* tensor, TF_Status* status) const; + // A parallel tensor with scalar integers numbering component devices. + std::unique_ptr DeviceIDs(TFE_Context* context, + TF_Status* status) const; + // Takes a description of a single operation being executed on the // ParallelDevice, and in turn runs one operation per component device with // its corresponding inputs from the input ParallelTensors (or @@ -208,6 +212,46 @@ std::unique_ptr ParallelDevice::CopyToParallelDevice( status); } +std::unique_ptr ParallelDevice::DeviceIDs( + TFE_Context* context, TF_Status* status) const { + // TODO(allenl): We could cache DeviceIDs (keyed by context). + std::vector components; + components.reserve(underlying_devices_.size()); + for (int device_index = 0; device_index < underlying_devices_.size(); + ++device_index) { + int64_t* device_id = new int64_t; + *device_id = device_index; + std::unique_ptr tensor( + TF_NewTensor( + TF_INT64, /*dims=*/nullptr, /*num_dims=*/0, device_id, + sizeof(int64_t), + [](void* data, size_t, void* arg) { + delete reinterpret_cast(data); + }, + nullptr), + TF_DeleteTensor); + // TODO(allenl): Here and when executing regular operations, we could hold + // on to one TFE_Op per device and just call TFE_ResetOp to avoid parsing + // device names repeatedly. + OpPtr const_op(TFE_NewOp(context, "Const", status)); + if (TF_GetCode(status) != TF_OK) return nullptr; + TFE_OpSetDevice(const_op.get(), underlying_devices_[device_index].c_str(), + status); + if (TF_GetCode(status) != TF_OK) return nullptr; + TFE_OpSetAttrTensor(const_op.get(), "value", tensor.get(), status); + if (TF_GetCode(status) != TF_OK) return nullptr; + TFE_OpSetAttrType(const_op.get(), "dtype", TF_INT64); + TFE_TensorHandle* device_handle; + int num_outputs = 1; + TFE_Execute(const_op.get(), &device_handle, &num_outputs, status); + if (TF_GetCode(status) != TF_OK) return nullptr; + components.emplace_back(device_handle); + if (TF_GetCode(status) != TF_OK) return nullptr; + } + return ParallelTensor::FromTensorHandles(*this, std::move(components), + status); +} + absl::optional> ParallelDevice::Execute( TFE_Context* context, std::vector inputs, const char* operation_name, const TFE_OpAttrs* attributes, @@ -282,6 +326,13 @@ absl::optional> ParallelDevice::Execute( } result.emplace(std::move(outputs)); return result; + } else if (operation_name == std::string("DeviceID")) { + std::vector result_content; + result_content.reserve(1); + result_content.push_back(DeviceIDs(context, status)); + if (TF_GetCode(status) != TF_OK) return result; + result.emplace(std::move(result_content)); + return result; } absl::optional>> maybe_parallel_results( diff --git a/tensorflow/c/eager/parallel_device/parallel_device_ops.cc b/tensorflow/c/eager/parallel_device/parallel_device_ops.cc new file mode 100644 index 00000000000..1decffca047 --- /dev/null +++ b/tensorflow/c/eager/parallel_device/parallel_device_ops.cc @@ -0,0 +1,26 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" + +// TODO(allenl): Figure out if we need this op, and if so whether we should move +// it to core TF. Right now the eager C API does some checking of op +// registrations before calling into custom devices, but we may be able to avoid +// that. +REGISTER_OP("DeviceID") + .Output("device_id: int64") + .SetIsStateful() + .SetShapeFn(tensorflow::shape_inference::ScalarShape); diff --git a/tensorflow/c/eager/parallel_device/parallel_device_test.cc b/tensorflow/c/eager/parallel_device/parallel_device_test.cc index 9b0613b0391..fdc140407df 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device_test.cc +++ b/tensorflow/c/eager/parallel_device/parallel_device_test.cc @@ -278,14 +278,15 @@ TensorHandlePtr Multiply(TFE_Context* context, TFE_TensorHandle* first, } // Assert that `handle` is equal to `expected_value`. -void AssertScalarFloatEq(TFE_TensorHandle* handle, float expected_value) { +template +void ExpectScalarEq(TFE_TensorHandle* handle, value_type expected_value) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); std::unique_ptr value_zero( TFE_TensorHandleResolve(handle, status.get()), TF_DeleteTensor); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - ASSERT_EQ(expected_value, - *static_cast(TF_TensorData(value_zero.get()))); + EXPECT_EQ(expected_value, + *static_cast(TF_TensorData(value_zero.get()))); } template @@ -343,8 +344,8 @@ void BasicTestsForTwoDevices(TFE_Context* context, const char* first_device, ExtractPerDeviceValues(context, read.get(), &components, status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - AssertScalarFloatEq(components[0].get(), 20.); - AssertScalarFloatEq(components[1].get(), 20.); + ExpectScalarEq(components[0].get(), 20.); + ExpectScalarEq(components[1].get(), 20.); std::string first_device = TFE_TensorHandleBackingDeviceName(components[0].get(), status.get()); @@ -373,8 +374,8 @@ void BasicTestsForTwoDevices(TFE_Context* context, const char* first_device, ExtractPerDeviceValues(context, read.get(), &components, status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - AssertScalarFloatEq(components[0].get(), 23.); - AssertScalarFloatEq(components[1].get(), 18.); + ExpectScalarEq(components[0].get(), 23.); + ExpectScalarEq(components[1].get(), 18.); std::string first_device = TFE_TensorHandleBackingDeviceName(components[0].get(), status.get()); @@ -383,6 +384,32 @@ void BasicTestsForTwoDevices(TFE_Context* context, const char* first_device, TFE_TensorHandleBackingDeviceName(components[1].get(), status.get()); ASSERT_EQ(underlying_devices[1], second_device); } + // Compute the device ID twice and verify the result + for (int i = 0; i < 2; ++i) { + std::unique_ptr op( + TFE_NewOp(context, "DeviceID", status.get()), TFE_DeleteOp); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + TFE_OpSetDevice(op.get(), device_name, status.get()); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + + TFE_TensorHandle* result_handle; + int num_retvals = 1; + TFE_Execute(op.get(), &result_handle, &num_retvals, status.get()); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + std::array components; + ExtractPerDeviceValues(context, result_handle, &components, status.get()); + TFE_DeleteTensorHandle(result_handle); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + + ExpectScalarEq(components[0].get(), 0); + ExpectScalarEq(components[1].get(), 1); + std::string first_device = + TFE_TensorHandleBackingDeviceName(components[0].get(), status.get()); + ASSERT_EQ(underlying_devices[0], first_device); + std::string second_device = + TFE_TensorHandleBackingDeviceName(components[1].get(), status.get()); + ASSERT_EQ(underlying_devices[1], second_device); + } } TEST(PARALLEL_DEVICE, TestBasicCPU) { @@ -498,8 +525,8 @@ TEST(PARALLEL_DEVICE, TestExplicitCopies) { ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); // The value of the original tensor is replicated on each device. - AssertScalarFloatEq(components[0].get(), 3.); - AssertScalarFloatEq(components[1].get(), 3.); + ExpectScalarEq(components[0].get(), 3.); + ExpectScalarEq(components[1].get(), 3.); // Verify that the mirrors are placed on the component devices. std::string first_device = @@ -630,7 +657,7 @@ TEST(PARALLEL_DEVICE, TestNestedParallelDevices) { &second_components, status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - AssertScalarFloatEq(second_components[1].get(), 9.); + ExpectScalarEq(second_components[1].get(), 9.); // Verify that the mirrors are placed on the component devices. std::string first_device = TFE_TensorHandleBackingDeviceName( @@ -644,8 +671,8 @@ TEST(PARALLEL_DEVICE, TestNestedParallelDevices) { std::array first_components; ExtractPerDeviceValues(context.get(), second_components[0].get(), &first_components, status.get()); - AssertScalarFloatEq(first_components[0].get(), 3.); - AssertScalarFloatEq(first_components[1].get(), 6.); + ExpectScalarEq(first_components[0].get(), 3.); + ExpectScalarEq(first_components[1].get(), 6.); first_device = TFE_TensorHandleBackingDeviceName(first_components[0].get(), status.get()); @@ -806,8 +833,8 @@ TEST(PARALLEL_DEVICE, TestCollective) { ExtractPerDeviceValues(context.get(), reduced.get(), &result_components, status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - AssertScalarFloatEq(result_components[0].get(), 3.); - AssertScalarFloatEq(result_components[1].get(), 3.); + ExpectScalarEq(result_components[0].get(), 3.); + ExpectScalarEq(result_components[1].get(), 3.); } void RegisterCollectiveMulFunction(TFE_Context* context, @@ -909,8 +936,8 @@ TEST(PARALLEL_DEVICE, TestFunction) { ExtractPerDeviceValues(context.get(), reduced.get(), &result_components, status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - AssertScalarFloatEq(result_components[0].get(), 7. * 9.); - AssertScalarFloatEq(result_components[1].get(), 7. * 9.); + ExpectScalarEq(result_components[0].get(), 7. * 9.); + ExpectScalarEq(result_components[1].get(), 7. * 9.); std::string first_device = TFE_TensorHandleBackingDeviceName( result_components[0].get(), status.get()); diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index e8cb40f153b..e1fad8e697a 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -178,7 +178,7 @@ cc_library_with_android_deps( name = "ops", srcs = ["framework/ops.cc"], hdrs = ["framework/ops.h"], - android_deps = ["//tensorflow/core:android_tensorflow_lib"], + android_deps = ["//tensorflow/core:portable_tensorflow_lib"], deps = [ "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -197,7 +197,7 @@ cc_library_with_android_deps( "framework/scope_internal.h", ], hdrs = ["framework/scope.h"], - android_deps = ["//tensorflow/core:android_tensorflow_lib"], + android_deps = ["//tensorflow/core:portable_tensorflow_lib"], common_deps = [ ":ops", ], @@ -237,7 +237,7 @@ cc_library_with_android_deps( name = "client_session", srcs = ["client/client_session.cc"], hdrs = ["client/client_session.h"], - android_deps = ["//tensorflow/core:android_tensorflow_lib"], + android_deps = ["//tensorflow/core:portable_tensorflow_lib"], common_deps = [ ":ops", ":scope", @@ -275,7 +275,7 @@ cc_library_with_android_deps( srcs = ["ops/const_op.cc"], hdrs = ["ops/const_op.h"], android_deps = [ - "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], common_deps = [ ":ops", @@ -304,7 +304,7 @@ cc_library_with_android_deps( srcs = ["ops/while_loop.cc"], hdrs = ["ops/while_loop.h"], android_deps = [ - "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], common_deps = [ ":cc_ops", diff --git a/tensorflow/cc/experimental/base/public/BUILD b/tensorflow/cc/experimental/base/public/BUILD index 4249d7918c8..045d4e6cd97 100644 --- a/tensorflow/cc/experimental/base/public/BUILD +++ b/tensorflow/cc/experimental/base/public/BUILD @@ -57,7 +57,22 @@ cc_library( "tensor.h", ], deps = [ + ":status", "//tensorflow/c:tf_datatype", "//tensorflow/c:tf_tensor", ], ) + +cc_library( + name = "tensorhandle", + hdrs = [ + "tensorhandle.h", + ], + deps = [ + ":runtime", + ":status", + ":tensor", + "//tensorflow/c/eager:c_api", + "//tensorflow/c/eager:c_api_experimental", + ], +) diff --git a/tensorflow/cc/experimental/base/public/runtime.h b/tensorflow/cc/experimental/base/public/runtime.h index 47fd8869647..711a38c233a 100644 --- a/tensorflow/cc/experimental/base/public/runtime.h +++ b/tensorflow/cc/experimental/base/public/runtime.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/c/eager/c_api_experimental.h" namespace tensorflow { +namespace experimental { namespace cc { // Runtime represents an opaque instance of a Tensorflow runtime, with its own @@ -40,6 +41,7 @@ class Runtime { private: friend class RuntimeBuilder; friend class SavedModelAPI; + friend class TensorHandle; // Wraps a TFE_Context. Takes ownership of ctx. explicit Runtime(TFE_Context* ctx) : ctx_(ctx) {} @@ -63,6 +65,7 @@ class Runtime { }; } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_H_ diff --git a/tensorflow/cc/experimental/base/public/runtime_builder.h b/tensorflow/cc/experimental/base/public/runtime_builder.h index ed3c93ae135..737e06cb2c6 100644 --- a/tensorflow/cc/experimental/base/public/runtime_builder.h +++ b/tensorflow/cc/experimental/base/public/runtime_builder.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/cc/experimental/base/public/status.h" namespace tensorflow { +namespace experimental { namespace cc { // RuntimeBuilder is a builder used to construct a tensorflow::cc::Runtime. @@ -79,6 +80,7 @@ inline std::unique_ptr RuntimeBuilder::Build(Status* status) { } } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_BUILDER_H_ diff --git a/tensorflow/cc/experimental/base/public/status.h b/tensorflow/cc/experimental/base/public/status.h index f91f2caccd8..98c8cf6ced2 100644 --- a/tensorflow/cc/experimental/base/public/status.h +++ b/tensorflow/cc/experimental/base/public/status.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/c/tf_status.h" namespace tensorflow { +namespace experimental { namespace cc { // Status is a wrapper around an error code and an optional error message. @@ -57,6 +58,7 @@ class Status { friend class RuntimeBuilder; friend class Runtime; friend class SavedModelAPI; + friend class TensorHandle; // Wraps a TF_Status*, and takes ownership of it. explicit Status(TF_Status* status) : status_(status) {} @@ -88,6 +90,7 @@ inline void Status::SetStatus(TF_Code code, const std::string& msg) { } } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_STATUS_H_ diff --git a/tensorflow/cc/experimental/base/public/tensor.h b/tensorflow/cc/experimental/base/public/tensor.h index 1afdbcad50c..fc447262ce1 100644 --- a/tensorflow/cc/experimental/base/public/tensor.h +++ b/tensorflow/cc/experimental/base/public/tensor.h @@ -19,30 +19,53 @@ limitations under the License. #include #include +#include #include +#include #include "tensorflow/c/tf_datatype.h" #include "tensorflow/c/tf_tensor.h" +#include "tensorflow/cc/experimental/base/public/status.h" namespace tensorflow { +namespace experimental { namespace cc { // Tensor represents an n-dimensional array of values. class Tensor { public: - // TODO(bmzhao): Add a factory function that constructs a Tensor from a char - // buffer, with an options struct (to specify the buffer's layout, device?, - // whether to create a TFRT or TF tensor, whether we should take ownership of - // the memory, etc). This requires extending TF_NewTensor with an options - // struct: - // https://github.com/tensorflow/tensorflow/blob/3c520614a3c056d56afdc79b59979b9b0087f8b9/tensorflow/c/tf_tensor.h#L77-L80 + using DeleterCallback = std::function; + + // Constructs a Tensor from user provided buffer. + // + // Params: + // dtype - The dtype of the tensor's data. + // shape - A shape vector, where each element corresponds to the size of + // the tensor's corresponding dimension. + // data - Pointer to a buffer of memory to construct a Tensor out of. + // len - The length (in bytes) of `data` + // deleter - A std::function to be called when the Tensor no longer needs the + // memory in `data`. This can be used to free `data`, or + // perhaps decrement a refcount associated with `data`, etc. + // status - Set to OK on success and an error on failure. + // Returns: + // If an error occurred, status->ok() will be false, and the returned + // Tensor must not be used. + // TODO(bmzhao): Add Runtime as an argument to this function so we can swap to + // a TFRT backed tensor. + // TODO(bmzhao): Add benchmarks on overhead for this function; we can + // consider using int64_t* + length rather than vector. + static Tensor FromBuffer(TF_DataType dtype, const std::vector& shape, + void* data, size_t len, DeleterCallback deleter, + Status* status); // TODO(bmzhao): In the case we construct a tensor from non-owned memory, // we should offer a way to deep copy the tensor into a new tensor, which // owns the underlying memory. This could be a .deepcopy()/clone() method. // TODO(bmzhao): In the future, we want to relax the non-copyability - // constraint. To do so, we can add a C API function that acts like CopyFrom: + // constraint. To do so, we can add a C API function that acts like + // CopyFrom: // https://github.com/tensorflow/tensorflow/blob/08931c1e3e9eb2e26230502d678408e66730826c/tensorflow/core/framework/tensor.h#L301-L311 // Tensor is movable, but not copyable @@ -85,6 +108,16 @@ class Tensor { // This object retains ownership of the pointer. TF_Tensor* GetTFTensor() const { return tensor_.get(); } + struct DeleterStruct { + std::function deleter; + }; + + static void DeleterFunction(void* memory, size_t len, void* deleter_struct) { + DeleterStruct* deleter = reinterpret_cast(deleter_struct); + deleter->deleter(memory, len); + delete deleter; + } + struct TFTensorDeleter { void operator()(TF_Tensor* p) const { TF_DeleteTensor(p); } }; @@ -111,7 +144,32 @@ inline size_t Tensor::num_bytes() const { return TF_TensorByteSize(tensor_.get()); } +inline Tensor Tensor::FromBuffer(TF_DataType dtype, + const std::vector& shape, void* data, + size_t len, DeleterCallback deleter, + Status* status) { + // Credit to apassos@ for this technique: + // Despite the fact that our API takes a std::function deleter, we are able + // to maintain ABI stability because: + // 1. Only a function pointer is sent across the C API (&DeleterFunction) + // 2. DeleterFunction is defined in the same build artifact that constructed + // the std::function (so there isn't confusion about std::function ABI). + // Note that 2. is satisifed by the fact that this is a header-only API, where + // the function implementations are inline. + + DeleterStruct* deleter_struct = new DeleterStruct{deleter}; + TF_Tensor* tensor = TF_NewTensor(dtype, shape.data(), shape.size(), data, len, + &DeleterFunction, deleter_struct); + if (tensor == nullptr) { + status->SetStatus(TF_INVALID_ARGUMENT, + "Failed to create tensor for input buffer"); + return Tensor(nullptr); + } + return Tensor(tensor); +} + } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSOR_H_ diff --git a/tensorflow/cc/experimental/base/public/tensorhandle.h b/tensorflow/cc/experimental/base/public/tensorhandle.h new file mode 100644 index 00000000000..99453ee7ea8 --- /dev/null +++ b/tensorflow/cc/experimental/base/public/tensorhandle.h @@ -0,0 +1,98 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSORHANDLE_H_ +#define TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSORHANDLE_H_ + +#include +#include + +#include "tensorflow/c/eager/c_api.h" +#include "tensorflow/c/eager/c_api_experimental.h" +#include "tensorflow/cc/experimental/base/public/runtime.h" +#include "tensorflow/cc/experimental/base/public/status.h" +#include "tensorflow/cc/experimental/base/public/tensor.h" + +namespace tensorflow { +namespace experimental { +namespace cc { + +// An opaque representation of a tensor computed/managed by the Tensorflow +// runtime (tensorflow:cc::Runtime). Unlike a tensor, a Tensorhandle may refer +// to tensors placed in memory of different devices or remote address spaces. +// Note that tensorflow::cc::Runtime MUST outlive all TensorHandles created +// from it. +class TensorHandle { + public: + // Unwraps a Tensor from the given TensorHandle. If an error occurred, + // status->ok() will be false, and the returned Tensor must not be used. + Tensor Resolve(Status* status); + + // Constructs a TensorHandle from a Tensor. If an error occurred, + // status->ok() will be false, and the returned TensorHandle must not be used. + static TensorHandle FromTensor(const Tensor& tensor, const Runtime& runtime, + Status* status); + + // TensorHandle is movable, and not copyable + TensorHandle(TensorHandle&&) = default; + TensorHandle& operator=(TensorHandle&&) = default; + + private: + // Wraps a TFE_TensorHandle. Takes ownership of handle. + explicit TensorHandle(TFE_TensorHandle* handle) : handle_(handle) {} + + // TensorHandle is not copyable + TensorHandle(const TensorHandle&) = delete; + TensorHandle& operator=(const TensorHandle&) = delete; + + // Returns the underlying TFE_TensorHandle that this object wraps. + // This object retains ownership of the pointer. + TFE_TensorHandle* GetTFETensorHandle() const { return handle_.get(); } + + // Deletes the currently wrapped TFE_TensorHandle, and swaps it with handle, + // and takes ownership of handle. + void Reset(TFE_TensorHandle* handle) { handle_.reset(handle); } + + struct TFETensorHandleDeleter { + void operator()(TFE_TensorHandle* p) const { TFE_DeleteTensorHandle(p); } + }; + std::unique_ptr handle_; +}; + +inline Tensor TensorHandle::Resolve(Status* status) { + TF_Tensor* tensor = + TFE_TensorHandleResolve(handle_.get(), status->GetTFStatus()); + if (!status->ok()) { + return Tensor(nullptr); + } + return Tensor(tensor); +} + +inline TensorHandle TensorHandle::FromTensor(const Tensor& tensor, + const Runtime& runtime, + Status* status) { + TFE_TensorHandle* tensor_handle = TFE_NewTensorHandleFromTensor( + runtime.GetTFEContext(), tensor.GetTFTensor(), status->GetTFStatus()); + if (!status->ok()) { + return TensorHandle(nullptr); + } + return TensorHandle(tensor_handle); +} + +} // namespace cc +} // namespace experimental +} // namespace tensorflow + +#endif // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSORHANDLE_H_ diff --git a/tensorflow/cc/experimental/base/tests/BUILD b/tensorflow/cc/experimental/base/tests/BUILD new file mode 100644 index 00000000000..f449d618f72 --- /dev/null +++ b/tensorflow/cc/experimental/base/tests/BUILD @@ -0,0 +1,50 @@ +# Tests for the C++ header-only base types. +load("//tensorflow:tensorflow.bzl", "tf_cc_test") + +package( + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "tensor_types_test_util", + testonly = True, + hdrs = ["tensor_types_test_util.h"], + deps = [ + "//tensorflow/c:tf_datatype", + ], +) + +tf_cc_test( + name = "tensor_test", + srcs = [ + "tensor_test.cc", + ], + deps = [ + ":tensor_types_test_util", + "//tensorflow/c:tf_datatype", + "//tensorflow/cc/experimental/base/public:status", + "//tensorflow/cc/experimental/base/public:tensor", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "tensorhandle_test", + srcs = [ + "tensorhandle_test.cc", + ], + deps = [ + ":tensor_types_test_util", + "//tensorflow/c:tf_datatype", + "//tensorflow/cc/experimental/base/public:runtime", + "//tensorflow/cc/experimental/base/public:runtime_builder", + "//tensorflow/cc/experimental/base/public:status", + "//tensorflow/cc/experimental/base/public:tensor", + "//tensorflow/cc/experimental/base/public:tensorhandle", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/cc/experimental/base/tests/tensor_test.cc b/tensorflow/cc/experimental/base/tests/tensor_test.cc new file mode 100644 index 00000000000..33f9ab637e8 --- /dev/null +++ b/tensorflow/cc/experimental/base/tests/tensor_test.cc @@ -0,0 +1,163 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/experimental/base/public/tensor.h" + +#include +#include + +#include "tensorflow/c/tf_datatype.h" +#include "tensorflow/cc/experimental/base/tests/tensor_types_test_util.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" + +namespace { + +using tensorflow::experimental::cc::Status; +using tensorflow::experimental::cc::Tensor; + +using SimpleTypes = ::testing::Types< + tensorflow::FloatType, tensorflow::DoubleType, tensorflow::Int32Type, + tensorflow::UINT8Type, tensorflow::INT8Type, tensorflow::INT64Type, + tensorflow::UINT16Type, tensorflow::UINT32Type, tensorflow::UINT64Type>; + +template +class ConstructScalarTensorTest : public ::testing::Test {}; +TYPED_TEST_SUITE(ConstructScalarTensorTest, SimpleTypes); + +// This test constructs a scalar tensor for each of the types in "SimpleTypes", +// and verifies the expected dimensions, dtype, value, number of bytes, and +// number of elements. +TYPED_TEST(ConstructScalarTensorTest, ValidTensorAttributesAfterConstruction) { + Status status; + TF_DataType dtype = TypeParam::kDType; + typename TypeParam::type value = 42; + Tensor tensor = Tensor::FromBuffer(/*dtype=*/dtype, /*shape=*/{}, + /*data=*/&value, + /*len=*/sizeof(value), + /*deleter=*/[](void*, size_t) {}, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + EXPECT_EQ(tensor.dims(), 0); + EXPECT_EQ(tensor.dtype(), dtype); + EXPECT_EQ(*reinterpret_cast(tensor.data()), 42); + EXPECT_EQ(tensor.num_bytes(), sizeof(typename TypeParam::type)); + EXPECT_EQ(tensor.num_elements(), 1); +} + +template +class Construct1DTensorTest : public ::testing::Test {}; +TYPED_TEST_SUITE(Construct1DTensorTest, SimpleTypes); + +// This test constructs a 1D tensor for each of the types in "SimpleTypes", +// and verifies the expected dimensions, dtype, value, number of bytes, and +// number of elements. +TYPED_TEST(Construct1DTensorTest, ValidTensorAttributesAfterConstruction) { + Status status; + TF_DataType dtype = TypeParam::kDType; + // This is our 1D tensor of varying dtype. + std::vector value = {42, 100, 0, 1, 4, 29}; + // Shape is Rank 1 vector. + std::vector shape; + shape.push_back(value.size()); + + Tensor tensor = Tensor::FromBuffer( + /*dtype=*/dtype, /*shape=*/shape, + /*data=*/value.data(), + /*len=*/value.size() * sizeof(typename TypeParam::type), + /*deleter=*/[](void*, size_t) {}, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + EXPECT_EQ(tensor.dims(), 1); + EXPECT_EQ(tensor.dtype(), dtype); + tensorflow::gtl::ArraySlice tensor_view( + reinterpret_cast(tensor.data()), value.size()); + EXPECT_EQ(tensor_view[0], 42); + EXPECT_EQ(tensor_view[1], 100); + EXPECT_EQ(tensor_view[2], 0); + EXPECT_EQ(tensor_view[3], 1); + EXPECT_EQ(tensor_view[4], 4); + EXPECT_EQ(tensor_view[5], 29); + + EXPECT_EQ(tensor.num_bytes(), + value.size() * sizeof(typename TypeParam::type)); + EXPECT_EQ(tensor.num_elements(), value.size()); +} + +template +class Construct2DTensorTest : public ::testing::Test {}; +TYPED_TEST_SUITE(Construct2DTensorTest, SimpleTypes); + +// This test constructs a 2D tensor for each of the types in "SimpleTypes", +// and verifies the expected dimensions, dtype, value, number of bytes, and +// number of elements. +TYPED_TEST(Construct2DTensorTest, ValidTensorAttributesAfterConstruction) { + Status status; + TF_DataType dtype = TypeParam::kDType; + // This is our 1D tensor of varying dtype. + std::vector value = {42, 100, 0, 1, 4, 29}; + // Shape is Rank 2 vector with shape 2 x 3. + std::vector shape({2, 3}); + + Tensor tensor = Tensor::FromBuffer( + /*dtype=*/dtype, /*shape=*/shape, + /*data=*/value.data(), + /*len=*/value.size() * sizeof(typename TypeParam::type), + /*deleter=*/[](void*, size_t) {}, &status); + + ASSERT_TRUE(status.ok()) << status.message(); + + EXPECT_EQ(tensor.dims(), 2); + EXPECT_EQ(tensor.dtype(), dtype); + tensorflow::gtl::ArraySlice tensor_view( + reinterpret_cast(tensor.data()), value.size()); + EXPECT_EQ(tensor_view[0], 42); + EXPECT_EQ(tensor_view[1], 100); + EXPECT_EQ(tensor_view[2], 0); + EXPECT_EQ(tensor_view[3], 1); + EXPECT_EQ(tensor_view[4], 4); + EXPECT_EQ(tensor_view[5], 29); + + EXPECT_EQ(tensor.num_bytes(), + value.size() * sizeof(typename TypeParam::type)); + EXPECT_EQ(tensor.num_elements(), value.size()); +} + +TEST(CPPTensorAPI, ConstructTensorFromBuffer) { + bool done = false; + Status status; + std::vector data_vector({12, 14, 20, 18, 39, 42, 100}); + { + // data_vector is a rank 1 tensor. + std::vector shape; + shape.push_back(data_vector.size()); + + Tensor::DeleterCallback callback = [&done](void* data, size_t len) { + done = true; + }; + + Tensor tensor = + Tensor::FromBuffer(/*dtype=*/TF_INT32, /*shape=*/shape, + /*data=*/data_vector.data(), + /*len=*/data_vector.size() * sizeof(int32_t), + /*deleter=*/callback, &status); + ASSERT_TRUE(status.ok()) << status.message(); + } + // At this point, tensor has been destroyed, and the deleter callback should + // have run. + EXPECT_TRUE(done); +} + +} // namespace diff --git a/tensorflow/cc/experimental/base/tests/tensor_types_test_util.h b/tensorflow/cc/experimental/base/tests/tensor_types_test_util.h new file mode 100644 index 00000000000..af9cad7529b --- /dev/null +++ b/tensorflow/cc/experimental/base/tests/tensor_types_test_util.h @@ -0,0 +1,76 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_TEST_TENSOR_TYPES_TEST_UTIL_H_ +#define TENSORFLOW_CC_EXPERIMENTAL_BASE_TEST_TENSOR_TYPES_TEST_UTIL_H_ + +#include + +#include "tensorflow/c/tf_datatype.h" + +namespace tensorflow { + +// Each of the following struct types have two members: a kDType that +// corresponds to a TF_Datatype enum value, and a typedef "type" +// of its corresponding C++ type. These types allow us to write Dtype-agnostic +// tests via GoogleTest's TypedTests: +// https://github.com/google/googletest/blob/e589a337170554c48bc658cc857cf15080c9eacc/googletest/docs/advanced.md#typed-tests +struct FloatType { + using type = float; + static constexpr TF_DataType kDType = TF_FLOAT; +}; + +struct DoubleType { + using type = double; + static constexpr TF_DataType kDType = TF_DOUBLE; +}; + +struct Int32Type { + using type = int32_t; + static constexpr TF_DataType kDType = TF_INT32; +}; + +struct UINT8Type { + using type = uint8_t; + static constexpr TF_DataType kDType = TF_UINT8; +}; + +struct INT8Type { + using type = int8_t; + static constexpr TF_DataType kDType = TF_INT8; +}; + +struct INT64Type { + using type = int64_t; + static constexpr TF_DataType kDType = TF_INT64; +}; + +struct UINT16Type { + using type = uint16_t; + static constexpr TF_DataType kDType = TF_UINT16; +}; + +struct UINT32Type { + using type = uint32_t; + static constexpr TF_DataType kDType = TF_UINT32; +}; + +struct UINT64Type { + using type = uint64_t; + static constexpr TF_DataType kDType = TF_UINT64; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CC_EXPERIMENTAL_BASE_TEST_TENSOR_TYPES_TEST_UTIL_H_ diff --git a/tensorflow/cc/experimental/base/tests/tensorhandle_test.cc b/tensorflow/cc/experimental/base/tests/tensorhandle_test.cc new file mode 100644 index 00000000000..cfeaba4e392 --- /dev/null +++ b/tensorflow/cc/experimental/base/tests/tensorhandle_test.cc @@ -0,0 +1,184 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/experimental/base/public/tensorhandle.h" + +#include +#include + +#include + +#include "tensorflow/c/tf_datatype.h" +#include "tensorflow/cc/experimental/base/public/runtime.h" +#include "tensorflow/cc/experimental/base/public/runtime_builder.h" +#include "tensorflow/cc/experimental/base/public/tensor.h" +#include "tensorflow/cc/experimental/base/tests/tensor_types_test_util.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +using tensorflow::experimental::cc::Runtime; +using tensorflow::experimental::cc::RuntimeBuilder; +using tensorflow::experimental::cc::Status; +using tensorflow::experimental::cc::Tensor; +using tensorflow::experimental::cc::TensorHandle; + +using SimpleTypes = ::testing::Types< + tensorflow::FloatType, tensorflow::DoubleType, tensorflow::Int32Type, + tensorflow::UINT8Type, tensorflow::INT8Type, tensorflow::INT64Type, + tensorflow::UINT16Type, tensorflow::UINT32Type, tensorflow::UINT64Type>; + +template +class ConstructScalarTensorHandleTest : public ::testing::Test {}; +TYPED_TEST_SUITE(ConstructScalarTensorHandleTest, SimpleTypes); + +// This test constructs a scalar tensor for each of the types in "SimpleTypes", +// then wraps it in a TensorHandle. We then unwrap it back into a Tensor, and +// verify the expected dims, dtype, value, num bytes, and num elements. +TYPED_TEST(ConstructScalarTensorHandleTest, + ValidTensorAttributesAfterConstruction) { + Status status; + RuntimeBuilder runtime_builder; + std::unique_ptr runtime = runtime_builder.Build(&status); + ASSERT_TRUE(status.ok()) << status.message(); + + TF_DataType dtype = TypeParam::kDType; + typename TypeParam::type value = 42; + Tensor original_tensor = + Tensor::FromBuffer(/*dtype=*/dtype, /*shape=*/{}, + /*data=*/&value, + /*len=*/sizeof(value), + /*deleter=*/[](void*, size_t) {}, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + TensorHandle handle = + TensorHandle::FromTensor(original_tensor, *runtime, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + Tensor tensor = handle.Resolve(&status); + ASSERT_TRUE(status.ok()) << status.message(); + + EXPECT_EQ(tensor.dims(), 0); + EXPECT_EQ(tensor.dtype(), dtype); + EXPECT_EQ(*reinterpret_cast(tensor.data()), 42); + EXPECT_EQ(tensor.num_bytes(), sizeof(typename TypeParam::type)); + EXPECT_EQ(tensor.num_elements(), 1); +} + +template +class Construct1DTensorHandleTest : public ::testing::Test {}; +TYPED_TEST_SUITE(Construct1DTensorHandleTest, SimpleTypes); + +// This test constructs a 1D tensor for each of the types in "SimpleTypes", +// and verifies the expected dimensions, dtype, value, number of bytes, and +// number of elements. +TYPED_TEST(Construct1DTensorHandleTest, + ValidTensorAttributesAfterConstruction) { + Status status; + RuntimeBuilder runtime_builder; + std::unique_ptr runtime = runtime_builder.Build(&status); + ASSERT_TRUE(status.ok()) << status.message(); + + TF_DataType dtype = TypeParam::kDType; + // This is our 1D tensor of varying dtype. + std::vector value = {42, 100, 0, 1, 4, 29}; + // Shape is Rank 1 vector. + std::vector shape; + shape.push_back(value.size()); + + Tensor original_tensor = Tensor::FromBuffer( + /*dtype=*/dtype, /*shape=*/shape, + /*data=*/value.data(), + /*len=*/value.size() * sizeof(typename TypeParam::type), + /*deleter=*/[](void*, size_t) {}, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + TensorHandle handle = + TensorHandle::FromTensor(original_tensor, *runtime, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + Tensor tensor = handle.Resolve(&status); + ASSERT_TRUE(status.ok()) << status.message(); + + EXPECT_EQ(tensor.dims(), 1); + EXPECT_EQ(tensor.dtype(), dtype); + tensorflow::gtl::ArraySlice tensor_view( + reinterpret_cast(tensor.data()), value.size()); + EXPECT_EQ(tensor_view[0], 42); + EXPECT_EQ(tensor_view[1], 100); + EXPECT_EQ(tensor_view[2], 0); + EXPECT_EQ(tensor_view[3], 1); + EXPECT_EQ(tensor_view[4], 4); + EXPECT_EQ(tensor_view[5], 29); + + EXPECT_EQ(tensor.num_bytes(), + value.size() * sizeof(typename TypeParam::type)); + EXPECT_EQ(tensor.num_elements(), value.size()); +} + +template +class Construct2DTensorHandleTest : public ::testing::Test {}; +TYPED_TEST_SUITE(Construct2DTensorHandleTest, SimpleTypes); + +// This test constructs a 2D tensor for each of the types in "SimpleTypes", +// and verifies the expected dimensions, dtype, value, number of bytes, and +// number of elements. +TYPED_TEST(Construct2DTensorHandleTest, + ValidTensorAttributesAfterConstruction) { + Status status; + RuntimeBuilder runtime_builder; + std::unique_ptr runtime = runtime_builder.Build(&status); + ASSERT_TRUE(status.ok()) << status.message(); + + TF_DataType dtype = TypeParam::kDType; + // This is our 1D tensor of varying dtype. + std::vector value = {42, 100, 0, 1, 4, 29}; + // Shape is Rank 2 vector with shape 2 x 3. + std::vector shape({2, 3}); + + Tensor original_tensor = Tensor::FromBuffer( + /*dtype=*/dtype, /*shape=*/shape, + /*data=*/value.data(), + /*len=*/value.size() * sizeof(typename TypeParam::type), + /*deleter=*/[](void*, size_t) {}, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + TensorHandle handle = + TensorHandle::FromTensor(original_tensor, *runtime, &status); + ASSERT_TRUE(status.ok()) << status.message(); + + Tensor tensor = handle.Resolve(&status); + ASSERT_TRUE(status.ok()) << status.message(); + + EXPECT_EQ(tensor.dims(), 2); + EXPECT_EQ(tensor.dtype(), dtype); + tensorflow::gtl::ArraySlice tensor_view( + reinterpret_cast(tensor.data()), value.size()); + EXPECT_EQ(tensor_view[0], 42); + EXPECT_EQ(tensor_view[1], 100); + EXPECT_EQ(tensor_view[2], 0); + EXPECT_EQ(tensor_view[3], 1); + EXPECT_EQ(tensor_view[4], 4); + EXPECT_EQ(tensor_view[5], 29); + + EXPECT_EQ(tensor.num_bytes(), + value.size() * sizeof(typename TypeParam::type)); + EXPECT_EQ(tensor.num_elements(), value.size()); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD index 882b4032f76..a20cc9c9945 100644 --- a/tensorflow/cc/saved_model/BUILD +++ b/tensorflow/cc/saved_model/BUILD @@ -4,7 +4,6 @@ load( "//tensorflow:tensorflow.bzl", "if_android", - "if_ios", "if_mobile", "if_not_mobile", "tf_cc_test", diff --git a/tensorflow/cc/saved_model/experimental/public/concrete_function.h b/tensorflow/cc/saved_model/experimental/public/concrete_function.h index f57ba052f1a..1adaf70b01a 100644 --- a/tensorflow/cc/saved_model/experimental/public/concrete_function.h +++ b/tensorflow/cc/saved_model/experimental/public/concrete_function.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/cc/saved_model/experimental/public/function_metadata.h" namespace tensorflow { +namespace experimental { namespace cc { // ConcreteFunction is an executable "function" loaded from a SavedModelAPI. @@ -54,6 +55,7 @@ inline const FunctionMetadata* ConcreteFunction::GetFunctionMetadata() { } } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_H_ diff --git a/tensorflow/cc/saved_model/experimental/public/concrete_function_list.h b/tensorflow/cc/saved_model/experimental/public/concrete_function_list.h index bab95278eac..88cb779ef15 100644 --- a/tensorflow/cc/saved_model/experimental/public/concrete_function_list.h +++ b/tensorflow/cc/saved_model/experimental/public/concrete_function_list.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/cc/saved_model/experimental/public/concrete_function.h" namespace tensorflow { +namespace experimental { namespace cc { // ConcreteFunctionList helps convert an opaque pointer to an array of @@ -56,6 +57,7 @@ inline std::vector ConcreteFunctionList::ToVector() { } } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_LIST_H_ diff --git a/tensorflow/cc/saved_model/experimental/public/function_metadata.h b/tensorflow/cc/saved_model/experimental/public/function_metadata.h index c3dcc45af0e..11e1a860d84 100644 --- a/tensorflow/cc/saved_model/experimental/public/function_metadata.h +++ b/tensorflow/cc/saved_model/experimental/public/function_metadata.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/c/experimental/saved_model/public/function_metadata.h" namespace tensorflow { +namespace experimental { namespace cc { // FunctionMetadata stores additional function information, including @@ -40,6 +41,7 @@ class FunctionMetadata final { }; } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_FUNCTION_METADATA_H_ diff --git a/tensorflow/cc/saved_model/experimental/public/saved_model_api.h b/tensorflow/cc/saved_model/experimental/public/saved_model_api.h index 814479de213..04018bf2aab 100644 --- a/tensorflow/cc/saved_model/experimental/public/saved_model_api.h +++ b/tensorflow/cc/saved_model/experimental/public/saved_model_api.h @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/cc/saved_model/experimental/public/concrete_function_list.h" namespace tensorflow { +namespace experimental { namespace cc { // SavedModelAPI offers a way to load Tensorflow Saved Models @@ -155,6 +156,7 @@ inline std::vector SavedModelAPI::ListFunctions() { } } // namespace cc +} // namespace experimental } // namespace tensorflow #endif // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_SAVED_MODEL_API_H_ diff --git a/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc index 155c58604bf..7f7f6b09a6d 100644 --- a/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc +++ b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc @@ -26,10 +26,14 @@ limitations under the License. #include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/core/platform/test.h" -namespace tensorflow { namespace { +using tensorflow::experimental::cc::Runtime; +using tensorflow::experimental::cc::RuntimeBuilder; +using tensorflow::experimental::cc::SavedModelAPI; +using tensorflow::experimental::cc::Status; + constexpr char kTestData[] = "cc/saved_model/testdata"; std::string SavedModelPath(tensorflow::StringPiece saved_model_dir) { @@ -43,21 +47,21 @@ std::string SavedModelPath(tensorflow::StringPiece saved_model_dir) { class CPPSavedModelAPITest : public ::testing::TestWithParam {}; TEST_P(CPPSavedModelAPITest, LoadsSavedModelWithTags) { - cc::Status status; - cc::RuntimeBuilder builder; + Status status; + RuntimeBuilder builder; bool use_tfrt = GetParam(); if (use_tfrt) { GTEST_SKIP(); // TODO(chky) : Enable this once TFRT is open sourced. } builder.SetUseTFRT(use_tfrt); - std::unique_ptr runtime = builder.Build(&status); + std::unique_ptr runtime = builder.Build(&status); ASSERT_TRUE(status.ok()) << status.message(); std::string model_dir = SavedModelPath("VarsAndArithmeticObjectGraph"); std::unordered_set tags = {"serve"}; - std::unique_ptr model = - cc::SavedModelAPI::Load(model_dir, *runtime, &status, &tags); + std::unique_ptr model = + SavedModelAPI::Load(model_dir, *runtime, &status, &tags); // TODO(bmzhao): Change this to expect TF_OK when loading is implemented. // That unblocks writing other tests that require a TF_SavedModel*, @@ -67,20 +71,20 @@ TEST_P(CPPSavedModelAPITest, LoadsSavedModelWithTags) { } TEST_P(CPPSavedModelAPITest, LoadsSavedModel) { - cc::Status status; - cc::RuntimeBuilder builder; + Status status; + RuntimeBuilder builder; bool use_tfrt = GetParam(); if (use_tfrt) { GTEST_SKIP(); // TODO(chky) : Enable this once TFRT is open sourced. } builder.SetUseTFRT(use_tfrt); - std::unique_ptr runtime = builder.Build(&status); + std::unique_ptr runtime = builder.Build(&status); ASSERT_TRUE(status.ok()) << status.message(); std::string model_dir = SavedModelPath("VarsAndArithmeticObjectGraph"); - std::unique_ptr model = - cc::SavedModelAPI::Load(model_dir, *runtime, &status); + std::unique_ptr model = + SavedModelAPI::Load(model_dir, *runtime, &status); // TODO(bmzhao): Change this to expect TF_OK when loading is implemented. // That unblocks writing other tests that require a TF_SavedModel*, @@ -94,4 +98,3 @@ INSTANTIATE_TEST_SUITE_P(RuntimeAgnosticCPPSavedModelTests, } // namespace -} // namespace tensorflow diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc index c9a36b88795..e4df3090046 100644 --- a/tensorflow/compiler/aot/codegen.cc +++ b/tensorflow/compiler/aot/codegen.cc @@ -131,6 +131,7 @@ Status AddRewritesForShape(int i, const xla::Shape& shape, TF_RETURN_IF_ERROR(XLATypeToCpp(shape.element_type(), &type)); std::vector dim_vars; string dim_sizes, indices; + int count = 1; if (shape.rank() == 0 || (shape.dimensions_size() == 1 && shape.dimensions(0) == 1)) { dim_sizes = "[1]"; @@ -140,6 +141,7 @@ Status AddRewritesForShape(int i, const xla::Shape& shape, dim_vars.push_back(absl::StrCat("size_t dim", dim)); dim_sizes += absl::StrCat("[", shape.dimensions(dim), "]"); indices += absl::StrCat("[dim", dim, "]"); + count *= shape.dimensions(dim); } } rewrites->push_back({"{{I}}", absl::StrCat(i)}); @@ -147,6 +149,7 @@ Status AddRewritesForShape(int i, const xla::Shape& shape, rewrites->push_back({"{{DIM_VARS}}", absl::StrJoin(dim_vars, ", ")}); rewrites->push_back({"{{DIM_SIZES}}", dim_sizes}); rewrites->push_back({"{{INDICES}}", indices}); + rewrites->push_back({"{{COUNT}}", absl::StrCat(count)}); return Status::OK(); } @@ -199,6 +202,12 @@ Status GenArgMethods(const tf2xla::Config& config, return (*static_cast( arg_data({{I}}))){{INDICES}}; } + int arg{{NAME}}_size() const { + return {{COUNT}} * sizeof({{TYPE}}); + } + int arg{{NAME}}_count() const { + return {{COUNT}}; + } )"; *methods += RewriteWithName(absl::StrCat(i), code, rewrites); if (!config.feed(i).name().empty()) { @@ -246,6 +255,12 @@ Status GenResultMethods(const tf2xla::Config& config, return (*static_cast( result_data({{I}}))){{INDICES}}; } + int result{{NAME}}_size() const { + return {{COUNT}} * sizeof({{TYPE}}); + } + int result{{NAME}}_count() const { + return {{COUNT}}; + } )"; *methods += RewriteWithName(absl::StrCat(i), code, rewrites); if (!config.fetch(i).name().empty()) { @@ -281,6 +296,12 @@ Status GenVariableMethods(const tf2xla::Config& config, return (*static_cast( arg_data({{I}}))){{INDICES}}; } + int var_{{NAME}}_size() const { + return {{COUNT}} * sizeof({{TYPE}}); + } + int var_{{NAME}}_count() const { + return {{COUNT}}; + } )"; const tf2xla::Variable& var = config.variable(i - config.feed_size()); rewrites.emplace_back("{{MAYBE_CONST}}", var.readonly() ? "const " : ""); diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden index af58ca233f0..d011279dbb7 100644 --- a/tensorflow/compiler/aot/codegen_test_h.golden +++ b/tensorflow/compiler/aot/codegen_test_h.golden @@ -138,6 +138,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( arg_data(0)))[dim0][dim1]; } + int arg0_size() const { + return 2 * sizeof(float); + } + int arg0_count() const { + return 2; + } void set_arg_myfeed_data(const void* data) { set_arg_data(0, data); @@ -156,6 +162,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( arg_data(0)))[dim0][dim1]; } + int arg_myfeed_size() const { + return 2 * sizeof(float); + } + int arg_myfeed_count() const { + return 2; + } void set_arg1_data(const void* data) { set_arg_data(1, data); @@ -174,6 +186,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( arg_data(1)))[dim0][dim1]; } + int arg1_size() const { + return 12 * sizeof(tensorflow::int64); + } + int arg1_count() const { + return 12; + } // Result methods for managing output buffers. Buffers are in row-major order. // Must only be called after a successful Run call. There is a set of methods @@ -204,6 +222,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( result_data(0)))[dim0][dim1]; } + int result0_size() const { + return 30 * sizeof(tensorflow::uint32); + } + int result0_count() const { + return 30; + } tensorflow::uint32* result_myfetch_data() { return static_cast(result_data(0)); @@ -219,6 +243,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( result_data(0)))[dim0][dim1]; } + int result_myfetch_size() const { + return 30 * sizeof(tensorflow::uint32); + } + int result_myfetch_count() const { + return 30; + } // Methods for managing variable buffers. Buffers are in row-major order. // @@ -261,6 +291,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( arg_data(2)))[0]; } + int var_myvar_readonly_size() const { + return 1 * sizeof(float); + } + int var_myvar_readonly_count() const { + return 1; + } void set_var_myvar_data(float* data) { set_arg_data(3, data); @@ -279,6 +315,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( arg_data(3)))[0]; } + int var_myvar_size() const { + return 1 * sizeof(float); + } + int var_myvar_count() const { + return 1; + } void set_var_myvar2_data(tensorflow::int32* data) { set_arg_data(4, data); @@ -297,6 +339,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction { return (*static_cast( arg_data(4)))[dim0]; } + int var_myvar2_size() const { + return 5 * sizeof(tensorflow::int32); + } + int var_myvar2_count() const { + return 5; + } private: // Number of buffers for the compiled computation. diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h index 34ff0c55615..17e4226405a 100644 --- a/tensorflow/compiler/jit/xla_device_ops.h +++ b/tensorflow/compiler/jit/xla_device_ops.h @@ -180,12 +180,10 @@ class XlaAssignVariableOp : public OpKernel { data::MakeIteratorOp); \ REGISTER_KERNEL_BUILDER(Name("AnonymousIterator").Device(DEVICE), \ data::AnonymousIteratorHandleOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("AnonymousIteratorV2").Device(DEVICE).HostMemory("deleter"), \ - data::AnonymousIteratorHandleOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("DeleteIterator").Device(DEVICE).HostMemory("deleter"), \ - data::DeleteIteratorOp); \ + REGISTER_KERNEL_BUILDER(Name("AnonymousIteratorV2").Device(DEVICE), \ + data::AnonymousIteratorHandleOp); \ + REGISTER_KERNEL_BUILDER(Name("DeleteIterator").Device(DEVICE), \ + data::DeleteIteratorOp); \ REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE), \ data::IteratorGetNextOp); \ REGISTER_KERNEL_BUILDER(Name("IteratorGetNextAsOptional").Device(DEVICE), \ diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index d907d28b2c7..9b5b0c209e5 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -31,7 +31,7 @@ filegroup( "//tensorflow/compiler/mlir/lite/quantization:quantization_td_files", "@llvm-project//mlir:OpBaseTdFiles", "@llvm-project//mlir:include/mlir/Interfaces/LoopLikeInterface.td", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", ], ) @@ -695,9 +695,9 @@ cc_library( "@com_google_absl//absl/strings", "@llvm-project//llvm:support", "@llvm-project//mlir:IR", - "@llvm-project//mlir:LoopOpsTransforms", "@llvm-project//mlir:MlirTranslateMain", "@llvm-project//mlir:QuantOps", + "@llvm-project//mlir:SCFTransforms", "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Support", "@llvm-project//mlir:Translation", diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index e9192388070..6a631b1433d 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -1020,7 +1020,7 @@ Optional> Translator::BuildOperator( if (!inst->getMutableAttrDict().getAttrs().empty()) { os << " {"; bool first = true; - for (auto& named_attr : inst->getMutableAttrDict().getDictionary()) { + for (auto& named_attr : inst->getAttrDictionary()) { os << (!first ? ", " : ""); first = false; named_attr.first.print(os); diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index d103c07b986..fdf1501dbef 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -20,7 +20,7 @@ limitations under the License. include "mlir/IR/OpBase.td" include "mlir/Interfaces/LoopLikeInterface.td" -include "mlir/Interfaces/SideEffects.td" +include "mlir/Interfaces/SideEffectInterfaces.td" include "tensorflow/compiler/mlir/lite/ir/tfl_op_interfaces.td" include "tensorflow/compiler/mlir/lite/quantization/quantization.td" @@ -247,7 +247,14 @@ class TFL_TFTypesWithSameBits : Or<[CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isa()">, CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isUnsignedInteger(" # num # ")">]>]>; -class TFL_OperandIsNoneOrHasRankLessThanOrEqualTo : +class TFL_TFOperandTypesWithSameBits : + And<[ + Or<[CPred<"getElementTypeOrSelf($_op.getOperand(" # i # ")).isa()">, + CPred<"getElementTypeOrSelf($_op.getOperand(" # i # ")).isUnsignedInteger(" # num # ")">]>, + Or<[CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isa()">, + CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isUnsignedInteger(" # num # ")">]>]>; + +class TFL_OperandIsNoneOrHasRankAtMost : PredOpTrait<"operand " # n # " is at most " # m # "-D", Or<[ CPred<"$_op.getOperand(" # n # ").getType().isa()">, @@ -255,13 +262,13 @@ class TFL_OperandIsNoneOrHasRankLessThanOrEqualTo : CPred<"$_op.getOperand(" # n # ").getType().cast().getRank() <= " # m>]>>; -class TFL_OperandHasRankLessThanOrEqualTo : +class TFL_OperandHasRankAtMost : PredOpTrait<"operand " # n # " is at most " # m # "-D", Or<[TFL_OperandIsUnrankedPred, CPred<"$_op.getOperand(" # n # ").getType().cast().getRank() <= " # m>]>>; -class TFL_OperandHasRankGreaterThanOrEqualTo : +class TFL_OperandHasRankAtLeast : PredOpTrait<"operand " # n # " is at least " # m # "-D", Or<[TFL_OperandIsUnrankedPred, CPred<"$_op.getOperand(" # n # @@ -300,6 +307,18 @@ class TFL_TCresVTEtIsSameAsOp : And<[ "quant::QuantizedType::castToStorageType(" "getElementTypeOrSelf($_op.getOperand(" # j # ")))">]>]>]>; +// This is a quantization-aware version of TCresVTEtIsSameAsOp +class TFL_TCopVTEtAreSameAt : Or<[ + TCopVTEtAreSameAt<[i, j]>, + TFL_TFOperandTypesWithSameBits, + And<[ + SubstLeaves<"$_self", "getElementTypeOrSelf($_op.getOperand(" # j # "))", + quant_QuantizedType.predicate>, + CPred<"quant::QuantizedType::castToStorageType(" + "getElementTypeOrSelf($_op.getOperand(" # i # "))) == " + "quant::QuantizedType::castToStorageType(" + "getElementTypeOrSelf($_op.getOperand(" # j # ")))">]>]>; + //===----------------------------------------------------------------------===// // TFL op common constraints. //===----------------------------------------------------------------------===// @@ -963,7 +982,11 @@ def TFL_ScatterNdOp : TFL_Op<"scatter_nd", [ // Same type check of lhs and rhs is handled by the ResultsBroadcastableShape trait. def TFL_LessEqualOp : TFL_Op<"less_equal", [ - ResultsBroadcastableShape, NoSideEffect, NoQuantizableResult]> { + ResultsBroadcastableShape, + BinaryOpSameElementTypeConstraint, + TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>, + NoSideEffect, + NoQuantizableResult]> { let summary = "Less_equal operator"; let description = [{ @@ -971,8 +994,8 @@ def TFL_LessEqualOp : TFL_Op<"less_equal", [ }]; let arguments = ( - ins TFL_TensorOf<[F32, I32, I64, I8, QI8, QUI8, TFL_Uint8]>:$lhs, - TFL_TensorOf<[F32, I32, I64, I8, QI8, QUI8, TFL_Uint8]>:$rhs); + ins TFL_TensorOf<[F32, I32, I64, QI8, QUI8]>:$lhs, + TFL_TensorOf<[F32, I32, I64, QI8, QUI8]>:$rhs); let results = (outs TFL_BoolTensor:$output); @@ -985,9 +1008,12 @@ def TFL_LessEqualOp : TFL_Op<"less_equal", [ let hasOptions = 0; } -def TFL_LocalResponseNormalizationOp : TFL_Op<"local_response_normalization", - [NoSideEffect]> { - let summary = "Local Response Normalization."; +def TFL_LocalResponseNormalizationOp : TFL_Op<"local_response_normalization", [ + TFL_OperandHasRank<0, 4>, + SameOperandsAndResultShape, + SameOperandsAndResultType, + NoSideEffect]> { + let summary = "Local Response Normalization."; let description = [{ The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last @@ -1004,7 +1030,7 @@ convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imag }]; let arguments = (ins - TFL_TensorOf<[F32, QI8, QUI8]>:$input, + TFL_FpTensor:$input, I32Attr:$radius, F32Attr:$bias, F32Attr:$alpha, @@ -1012,7 +1038,7 @@ convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imag ); let results = (outs - TFL_TensorOf<[F32, QI8, QUI8]>:$output + TFL_FpTensor:$output ); let hasOptions = 1; @@ -1048,7 +1074,7 @@ def TFL_MatrixDiagOp : TFL_Op<"matrix_diag", [ NoSideEffect, TFL_OperandHasAtleastRank<0, 1>, PredOpTrait<"operand and result must have the same element type", - TCresVTEtIsSameAsOp<0, 0>>]> { + TFL_TCresVTEtIsSameAsOp<0, 0>>]> { let summary = [{ Returns a tensor with the provided diagonal and everything else padded with zeros. }]; @@ -1061,17 +1087,21 @@ def TFL_MatrixDiagOp : TFL_Op<"matrix_diag", [ }]; let arguments = (ins - TFL_TensorOf<[F32, I8, I64, I32, TFL_Uint8]>:$diagonal + TFL_TensorOf<[F32, I8, I16, I32, I64, TFL_Uint8, QUI8, QI8, TFL_Quint8]>:$diagonal ); let results = (outs - TFL_TensorOf<[F32, I8, I64, I32, TFL_Uint8]>:$output + TFL_TensorOf<[F32, I8, I16, I32, I64, TFL_Uint8, QUI8, QI8, TFL_Quint8]>:$output ); let hasOptions = 0; } -def TFL_MatrixSetDiagOp : TFL_Op<"matrix_set_diag", [NoSideEffect]> { +def TFL_MatrixSetDiagOp : TFL_Op<"matrix_set_diag", [ + TFL_OperandHasAtleastRank<0, 2>, + PredOpTrait<"input and result must have the same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, + NoSideEffect]> { let summary = [{ Returns a batched matrix tensor with new batched diagonal values. }]; @@ -1083,12 +1113,12 @@ innermost matrices. These will be overwritten by the values in `diagonal`. }]; let arguments = (ins - TensorOf<[F32, I32, I64, I8, QI8, QI16, QUI8, TFL_Uint8, TFL_Quint8]>:$input, - TensorOf<[F32, I32, I64, I8, QI8, QI16, QUI8, TFL_Uint8, TFL_Quint8]>:$diagonal + TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QI16, QUI8, TFL_Quint8]>:$input, + TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QI16, QUI8, TFL_Quint8]>:$diagonal ); let results = (outs - TensorOf<[F32, I32, I64, I8, QI8, QI16, QUI8, TFL_Uint8, TFL_Quint8]>:$output + TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QI16, QUI8, TFL_Quint8]>:$result ); let hasOptions = 0; @@ -1206,7 +1236,12 @@ larger than 0. } def TFL_NotEqualOp : TFL_Op<"not_equal", [ - ResultsBroadcastableShape, Commutative, NoSideEffect, NoQuantizableResult]> { + TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>, + BinaryOpSameElementTypeConstraint, + ResultsBroadcastableShape, + Commutative, + NoSideEffect, + NoQuantizableResult]> { let summary = "Not_equal operator"; let description = [{ @@ -1214,8 +1249,8 @@ def TFL_NotEqualOp : TFL_Op<"not_equal", [ }]; let arguments = ( - ins AnyTensor:$lhs, - AnyTensor:$rhs); + ins TFL_TensorOf<[I1, F32, I32, I64, QUI8, QI8, TFL_Quint8, TFL_Str]>:$lhs, + TFL_TensorOf<[I1, F32, I32, I64, QUI8, QI8, TFL_Quint8, TFL_Str]>:$rhs); let results = (outs TFL_BoolTensor:$output); @@ -1284,7 +1319,7 @@ def TFL_EmbeddingLookupOp: TFL_Op<"embedding_lookup", PredOpTrait<"value and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 1>>, TFL_OperandHasRank<0, 1>, - TFL_OperandHasRankGreaterThanOrEqualTo<1, 2> + TFL_OperandHasRankAtLeast<1, 2> ]> { let summary = "Embedding lookup operator"; @@ -1502,7 +1537,11 @@ def TFL_FloorModOp : TFL_Op<"floor_mod", [ } def TFL_GreaterOp : TFL_Op<"greater", [ - ResultsBroadcastableShape, NoSideEffect, NoQuantizableResult]> { + ResultsBroadcastableShape, + BinaryOpSameElementTypeConstraint, + TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>, + NoSideEffect, + NoQuantizableResult]> { let summary = "Greater operator"; let description = [{ @@ -1510,10 +1549,10 @@ def TFL_GreaterOp : TFL_Op<"greater", [ }]; let arguments = ( - ins AnyTensor:$lhs, - AnyTensor:$rhs); + ins TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$lhs, + TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$rhs); - let results = (outs AnyTensor:$output); + let results = (outs TFL_BoolTensor:$output); let builders = [TFL_ComparisonBinaryBuilder]; @@ -1523,8 +1562,9 @@ def TFL_GreaterOp : TFL_Op<"greater", [ } def TFL_HardSwishOp: TFL_Op<"hard_swish", [NoSideEffect, - SameOperandsAndResultShape, - TFL_GpuTargetOp]> { + SameOperandsAndResultShape, + SameOperandsAndResultType, + TFL_GpuTargetOp]> { let summary = "Hardswish activation function."; let description = [{ Computes hard-swish activation function @@ -1563,29 +1603,34 @@ def TFL_L2NormalizationOp : TFL_Op<"l2_normalization", [NoSideEffect, let customOption = "L2NormOptions"; } -def TFL_LeakyReluOp: TFL_Op<"leaky_relu", [NoSideEffect, SameOperandsAndResultType]> { +def TFL_LeakyReluOp: TFL_Op<"leaky_relu", [ + SameOperandsAndResultShape, + NoSideEffect, + SameOperandsAndResultType]> { let summary = "Leaky Relu operator"; - // TODO(jpienaar): Add type restriction. This op is only defined for - // restricted (floating point) types. let description = [{ Element-wise Leaky ReLU operator x -> x >= 0 ? x : (alpha * x) }]; let arguments = ( - ins AnyTensor:$input, + ins TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$input, // Slope of the activation function at x < 0. F32Attr:$alpha ); - let results = (outs AnyTensor:$output); + let results = (outs TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$output); let hasOptions = 0b1; } def TFL_LessOp : TFL_Op<"less", [ - ResultsBroadcastableShape, NoSideEffect, NoQuantizableResult]> { + ResultsBroadcastableShape, + BinaryOpSameElementTypeConstraint, + TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>, + NoSideEffect, + NoQuantizableResult]> { let summary = "Less operator"; let description = [{ @@ -1593,8 +1638,8 @@ def TFL_LessOp : TFL_Op<"less", [ }]; let arguments = ( - ins AnyTensor:$lhs, - AnyTensor:$rhs); + ins TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$lhs, + TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$rhs); let results = (outs TFL_BoolTensor:$output); @@ -1655,6 +1700,8 @@ def TFL_LogicalOrOp : TFL_Op<"logical_or", [NoSideEffect]> { def TFL_LogisticOp: TFL_Op<"logistic", [ NoSideEffect, + PredOpTrait<"x and y must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, SameOperandsAndResultShape, // zero_point = 0 // scale = 1. / (max_value + 1) @@ -1667,9 +1714,9 @@ def TFL_LogisticOp: TFL_Op<"logistic", [ Computes element-wise Sigmoid of input }]; - let arguments = (ins TFL_TensorOf<[F32, QI8, QUI8, QI16, QUI16]>:$x); + let arguments = (ins TFL_TensorOf<[F32, QI8, QUI8, QI16, TFL_Quint8]>:$x); - let results = (outs TFL_TensorOf<[F32, QI8, QUI8, QI16, QUI16]>:$y); + let results = (outs TFL_TensorOf<[F32, QI8, QUI8, QI16, TFL_Quint8]>:$y); } def TFL_LogOp: TFL_Op<"log", [ @@ -1690,10 +1737,10 @@ def TFL_LogOp: TFL_Op<"log", [ let hasFolder = 1; } -// TODO(b/130643170): Adds some constraint for the input/output element types. def TFL_LogSoftmaxOp : TFL_Op<"log_softmax", [ NoSideEffect, SameOperandsAndResultShape, + SameOperandsAndResultType, // zero_point = max_value // scale = -log_softmax_output_min / (max_value + 1) FixedResultScale>, @@ -1706,9 +1753,9 @@ def TFL_LogSoftmaxOp : TFL_Op<"log_softmax", [ input - log(reduce_sum(exp(input), dim)) }]; - let arguments = (ins AnyTensor:$input); + let arguments = (ins TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$input); - let results = (outs AnyTensor:$output); + let results = (outs TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$output); let hasOptions = 1; } @@ -1727,6 +1774,9 @@ def MaxPoolOperandAndResultConstraints : PredOpTrait<"MaxPool2D operand and " TFL_TCresVTEtIsSameAsOp<0, 0>]>>; def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [ + TFL_OperandHasRank<0, 4>, + PredOpTrait<"input and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, NoSideEffect, MaxPoolOperandAndResultConstraints, SameOperandsAndResultsScale, @@ -1741,7 +1791,7 @@ def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [ }]; let arguments = ( - ins AnyTensor:$input, + ins TFL_TensorOf<[F32, QUI8, QI8, QI16, TFL_Quint8]>:$input, TFL_PaddingAttr:$padding, I32Attr:$stride_w, I32Attr:$stride_h, @@ -1750,7 +1800,7 @@ def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [ TFL_AFAttr:$fused_activation_function ); - let results = (outs AnyTensor:$output); + let results = (outs TFL_TensorOf<[F32, QUI8, QI8, QI16, TFL_Quint8]>:$output); let hasOptions = 1; @@ -1782,7 +1832,11 @@ def TFL_MaximumOp : TFL_Op<"maximum", [ let hasOptions = 0; } -def TFL_MeanOp : TFL_Op<"mean", [NoSideEffect, TFL_GpuTargetOp]> { +def TFL_MeanOp : TFL_Op<"mean", [ + PredOpTrait<"input and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, + NoSideEffect, + TFL_GpuTargetOp]> { let summary = "Mean operator"; let description = [{ @@ -1794,13 +1848,13 @@ def TFL_MeanOp : TFL_Op<"mean", [NoSideEffect, TFL_GpuTargetOp]> { }]; let arguments = (ins - TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8, TFL_Uint8]>:$input, + TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Uint8]>:$input, TFL_TensorOf<[I32, I64]>:$axis, BoolAttr:$keep_dims ); let results = (outs - TFL_TensorOf<[F32, I32, I64, I8, QI8, QUI8, TFL_Uint8]>:$output); + TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Uint8]>:$output); let hasOptions = 1; let customOption = "ReducerOptions"; @@ -1821,14 +1875,14 @@ def TFL_OneHotOp : TFL_Op<"one_hot", [NoSideEffect]> { let arguments = (ins TFL_TensorOf<[I32, I64]>:$indices, TFL_I32Tensor:$depth, - TFL_TensorOf<[F32, I32, I64, I1]>:$on_value, - TFL_TensorOf<[F32, I32, I64, I1]>:$off_value, + TFL_TensorOf<[F32, I32, I64, I1, I8, UI8]>:$on_value, + TFL_TensorOf<[F32, I32, I64, I1, I8, UI8]>:$off_value, I32Attr:$axis ); let results = (outs - TFL_TensorOf<[F32, I32, I64, I1]>:$output + TFL_TensorOf<[F32, I32, I64, I1, I8, UI8]>:$output ); let hasOptions = 1; @@ -2032,7 +2086,11 @@ def TFL_NegOp: TFL_Op<"neg", [NoSideEffect, SameOperandsAndResultType]> { let hasFolder = 1; } -def TFL_PackOp : TFL_Op<"pack", [NoSideEffect, SameOperandsAndResultsScale]> { +def TFL_PackOp : TFL_Op<"pack", [ + PredOpTrait<"values and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, + NoSideEffect, + SameOperandsAndResultsScale]> { let summary = "Packs a list of tensors along a dimension into one tensor"; let description = [{ @@ -2063,14 +2121,14 @@ def TFL_PackOp : TFL_Op<"pack", [NoSideEffect, SameOperandsAndResultsScale]> { }]; let arguments = (ins - TFL_VariadicTensorOf<[F32, I8, I16, I32, I64, QI8, QUI8, QI16]>:$values, + TFL_VariadicTensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QUI8, QI16, TFL_Quint8]>:$values, - I32Attr:$values_count, + Confined:$values_count, I32Attr:$axis ); let results = (outs - TFL_TensorOf<[F32, I8, I16, I32, I64, QI8, QUI8, QI16]>:$output + TFL_TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QUI8, QI16, TFL_Quint8]>:$output ); let verifier = [{ return Verify(*this); }]; @@ -2081,8 +2139,11 @@ def TFL_PackOp : TFL_Op<"pack", [NoSideEffect, SameOperandsAndResultsScale]> { } def TFL_PadOp : TFL_Op<"pad", [ + PredOpTrait<"input and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, NoSideEffect, SameOperandsAndResultsScale, + TFL_OperandHasRankAtMost<0, 4>, TFL_OperandHasRank<1, 2>, TFL_OperandRankEquals1DimOfOperand<0, 1>, TFL_GpuTargetOp]> { @@ -2113,22 +2174,25 @@ def TFL_PadOp : TFL_Op<"pad", [ ``` }]; - let arguments = (ins TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$input, + let arguments = (ins TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Quint8]>:$input, TFL_I32OrI64Tensor:$padding); - let results = (outs TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$output); + let results = (outs TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Quint8]>:$output); let hasOptions = 1; } def TFL_PadV2Op : TFL_Op<"padv2", [ + PredOpTrait<"input and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, NoSideEffect, SameOperandsAndResultsScale, + TFL_OperandHasRankAtMost<0, 4>, TFL_OperandHasRank<1, 2>, TFL_OperandHasRank<2, 0>, TFL_OperandRankEquals1DimOfOperand<0, 1>, PredOpTrait<"input and constant value operands must have same element type", - TCopVTEtAreSameAt<[0, 2]>>]> { + TFL_TCopVTEtAreSameAt<0, 2>>]> { let summary = "Padding operator v2"; let description = [{ @@ -2159,11 +2223,11 @@ def TFL_PadV2Op : TFL_Op<"padv2", [ }]; let arguments = ( - ins TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$input, + ins TFL_TensorOf<[F32, I32, I64, UI8, QI8, QUI8, TFL_Quint8]>:$input, TFL_I32OrI64Tensor:$padding, - TFL_TensorOf<[F32, I8, I32, I64]>:$constant_values); + TFL_TensorOf<[F32, I32, I64, UI8, QI8, QUI8, TFL_Quint8]>:$constant_values); - let results = (outs TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$output); + let results = (outs TFL_TensorOf<[F32, I32, I64, UI8, QI8, QUI8, TFL_Quint8]>:$output); let hasOptions = 1; } @@ -2191,9 +2255,21 @@ def TFL_PowOp : TFL_Op<"pow", [ResultsBroadcastableShape, let builders = [TFL_BroadcastableBinaryBuilder]; } -def TFL_PReluOp : TFL_Op<"prelu", [NoSideEffect, - TFL_GpuTargetOp, - SameOperandsAndResultsScale]> { +def TFL_PReluOp : TFL_Op<"prelu", [ + NoSideEffect, + ResultsBroadcastableShape, + TFL_GpuTargetOp, + TFL_OperandHasRankAtMost<0, 4>, + TFL_OperandHasRankAtMost<1, 4>, + BinaryOpSameElementTypeConstraint, + PredOpTrait<"input and output must have the same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, + PredOpTrait<"'alpha' should have one less rank than 'input'.", + Or<[TFL_OperandIsUnrankedPred<0>, + TFL_OperandIsUnrankedPred<1>, + CPred<"$_op.getOperand(0).getType().cast().getRank() == " + "$_op.getOperand(1).getType().cast().getRank() " + "+ 1">]>>]> { let summary = "Parameterized Relu operator"; let description = [{ @@ -2206,11 +2282,11 @@ def TFL_PReluOp : TFL_Op<"prelu", [NoSideEffect, }]; let arguments = ( - ins TFL_TensorOf<[F32, QUI8]>:$input, - TFL_TensorOf<[F32, QUI8]>:$alpha + ins TFL_TensorOf<[F32, QI8, QUI8, TFL_Quint8]>:$input, + TFL_TensorOf<[F32, QI8, QUI8, TFL_Quint8]>:$alpha ); - let results = (outs TFL_TensorOf<[F32, QUI8]>:$output); + let results = (outs TFL_TensorOf<[F32, QI8, QUI8, TFL_Quint8]>:$output); let verifier = [{ return Verify(*this); }]; } @@ -2887,7 +2963,7 @@ def TFL_DepthToSpaceOp: TFL_Op<"depth_to_space", [ SameOperandsAndResultsScale, PredOpTrait<"input and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - TFL_OperandHasRankLessThanOrEqualTo<0, 4> + TFL_OperandHasRankAtMost<0, 4> ]> { let summary = "DepthToSpace operator"; @@ -3224,7 +3300,7 @@ def TFL_QConstOp : Op:$output); let builders = [OpBuilder< "OpBuilder &, OperationState &state, TypeAttr qtype, Attribute value", @@ -3849,7 +3925,7 @@ def TFL_NumericVerifyOp : Op:$input, + TFL_TensorOf<[QI8, QUI8, QI16, F16, TFL_Quint8]>:$input, TFL_TensorOf<[F32]>:$ref, // Attributes diff --git a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc index c338b723a4a..51fcbb97360 100644 --- a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc @@ -146,6 +146,10 @@ Status ConvertSavedModelToTFLiteFlatBuffer( saved_model_exported_names.begin(), saved_model_exported_names.end()); absl::Span exported_names(exported_names_in_vector); + if (exported_names.size() != 1) { + return errors::Unimplemented("Only support a single exported name."); + } + TF_ASSIGN_OR_RETURN(auto module, ImportSavedModel(model_flags.saved_model_dir(), model_flags.saved_model_version(), tags, diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index b1d1d81af37..f42e06350e5 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -573,7 +573,7 @@ func @testLogistic(tensor<1x2x3x4x5xf32>) -> tensor<1x2x3x4x5xf32> { // test invalid Logistic input func @testLogisticWithWrongInputType(tensor) -> tensor { ^bb0(%arg0: tensor): - // expected-error @+1 {{tfl.logistic' op operand #0 must be tensor of 32-bit float or QI8 type or QUI8 type or QI16 type or QUI16 type values}} + // expected-error @+1 {{'tfl.logistic' op operand #0 must be tensor of 32-bit float or QI8 type or QUI8 type or QI16 type or TFLite quint8 type values, but got 'tensor'}} %0 = "tfl.logistic"(%arg0): (tensor) -> tensor return %0#0 : tensor } @@ -1252,10 +1252,10 @@ func @testOneHot(%arg0: tensor<3xi32>, %arg1: tensor, %arg2: tensor, % // ----- -func @testOneHotWithInvalidOutputType(%arg0: tensor<3xi32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) -> tensor<*xi8> { - // expected-error @+1 {{'tfl.one_hot' op result #0 must be tensor of 32-bit float or 32-bit signless integer or 64-bit signless integer or 1-bit signless integer values}} - %0 = "tfl.one_hot"(%arg0, %arg1, %arg2, %arg3) {axis = -1 : i32} : (tensor<3xi32>, tensor, tensor, tensor) -> tensor<*xi8> - return %0 : tensor<*xi8> +func @testOneHotWithInvalidOutputType(%arg0: tensor<3xi32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) -> tensor<*xi16> { + // expected-error @+1 {{'tfl.one_hot' op result #0 must be tensor of 32-bit float or 32-bit signless integer or 64-bit signless integer or 1-bit signless integer or 8-bit signless integer or 8-bit unsigned integer values, but got 'tensor<*xi16>'}} + %0 = "tfl.one_hot"(%arg0, %arg1, %arg2, %arg3) {axis = -1 : i32} : (tensor<3xi32>, tensor, tensor, tensor) -> tensor<*xi16> + return %0 : tensor<*xi16> } // ----- @@ -1489,7 +1489,8 @@ func @testEmbeddingLookupValueAndResultElementTypeTraitFailed(%arg0 : tensor>) -> tensor<1x56x56x192x!quant.uniform> { +func @testWrongQuantizedLocalResponseNormalization(%arg0 : tensor<1x56x56x192x!quant.uniform>) -> tensor<1x56x56x192x!quant.uniform> { + // expected-error @+1 {{'tfl.local_response_normalization' op operand #0 must be tensor of 32-bit float values, but got 'tensor<1x56x56x192x!quant.uniform>'}} %0 = "tfl.local_response_normalization"(%arg0) {alpha = 9.99999974E-5 : f32, beta = 5.000000e-01 : f32, bias = 2.000000e+00 : f32, radius = 5 : i32} : (tensor<1x56x56x192x!quant.uniform>) -> tensor<1x56x56x192x!quant.uniform> return %0 : tensor<1x56x56x192x!quant.uniform> } @@ -1523,32 +1524,32 @@ func @testDepthToSpaceInvalidOutputType(%arg0: tensor<1x1x1x4xf32>) -> tensor<1x // ----- -func @testPReluWrongOutputRank(%arg0: tensor<10x10x10x10xf32>, %arg1: tensor<1x1x10xf32>) -> tensor<10x10x10xf32> { - // expected-error @+1 {{'input' and 'output' should have the same rank}} - %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<10x10x10x10xf32>, tensor<1x1x10xf32>) -> tensor<10x10x10xf32> - return %0 : tensor<10x10x10xf32> +func @testPReluWrongOutputRank(%arg0: tensor<10x10x10x10xf32>, %arg1: tensor<10x10x10x10xf32>) -> tensor<10x10xf32> { + // expected-error @+1 {{'tfl.prelu' op result type '10x10' not broadcast compatible with broadcasted operands's shapes '10x10x10x10'}} + %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<10x10x10x10xf32>, tensor<10x10x10x10xf32>) -> tensor<10x10xf32> + return %0 : tensor<10x10xf32> } // ----- func @testPReluWrongOutputShape(%arg0: tensor<1x2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<1x2x3x5xf32> { - // expected-error @+1 {{'input' and 'output' should have the same shape}} + // expected-error @+1 {{'tfl.prelu' op result type '1x2x3x5' not broadcast compatible with broadcasted operands's shapes '1x2x3x4'}} %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<1x2x3x4xf32>, tensor<2x3x4xf32>) -> tensor<1x2x3x5xf32> return %0 : tensor<1x2x3x5xf32> } // ----- -func @testPReluWrongAlphaRank(%arg0: tensor<7x3x2x14xf32>, %arg1: tensor<2x7x3x2x14xf32>) -> tensor<7x3x2x14xf32> { +func @testPReluWrongAlphaRank(%arg0: tensor<7x3x2x14xf32>, %arg1: tensor<7x3x2x14xf32>) -> tensor<7x3x2x14xf32> { // expected-error @+1 {{'alpha' should have one less rank than 'input'.}} - %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<7x3x2x14xf32>, tensor<2x7x3x2x14xf32>) -> tensor<7x3x2x14xf32> + %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<7x3x2x14xf32>, tensor<7x3x2x14xf32>) -> tensor<7x3x2x14xf32> return %0 : tensor<7x3x2x14xf32> } // ----- func @testPReluInvalidBroadcast(%arg0: tensor<15x14x2x14xf32>, %arg1: tensor<1x1x3xf32>) -> tensor<15x14x2x14xf32> { - // expected-error @+1 {{'alpha' is not broadcastable at dimension 2.}} + // expected-error @+1 {{'tfl.prelu' op operands don't have broadcast-compatible shapes}} %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<15x14x2x14xf32>, tensor<1x1x3xf32>) -> tensor<15x14x2x14xf32> return %0 : tensor<15x14x2x14xf32> } diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc index 4bc9d9e0c2d..fce1333a491 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc @@ -160,6 +160,11 @@ int main(int argc, char **argv) { absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty()); absl::Span exported_names(exported_names_vector); + if (exported_names.size() != 1) { + llvm::errs() << "There should be only one exported name"; + return kTrFailure; + } + module = tensorflow::ImportSavedModel(input_file_name, saved_model_version, tags, exported_names, &context); } else { diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc index b9ec67736d9..62f64ab63b4 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc @@ -174,7 +174,7 @@ StatusOr ImportSavedModel( return module; } else if (saved_model_version == 1) { auto module = tensorflow::SavedModelSignatureDefsToMlirImport( - input_filename, tags, context); + input_filename, tags, exported_names, context); if (!module) return tensorflow::errors::InvalidArgument("fail to open input file"); diff --git a/tensorflow/compiler/mlir/python/BUILD b/tensorflow/compiler/mlir/python/BUILD index 666f89ac72f..1189a926383 100644 --- a/tensorflow/compiler/mlir/python/BUILD +++ b/tensorflow/compiler/mlir/python/BUILD @@ -12,6 +12,22 @@ cc_library( "//tensorflow/c:tf_status_helper", "//tensorflow/compiler/mlir/tensorflow:convert_graphdef", "//tensorflow/compiler/mlir/tensorflow:error_util", + # (yongtang) The graph_optimization_pass_registration needs to be part + # of a shared object that will be loaded whenever `import tensorflow` + # is run. The natural place is libtensorflow_framework.so. + # While adding graph_optimization_pass_registration to + # libtensorflow_framework.so is possible with some modification in + # dependency, many tests will fail due to multiple copies of LLVM. + # See https://github.com/tensorflow/tensorflow/pull/39231 for details. + # Alternatively, we place graph_optimization_pass_registration here + # because: + # - tensorflow/python/_pywrap_mlir.so already depends on LLVM anyway + # - tensorflow/python/_pywrap_mlir.so always loaded as part of python + # binding + # TODO: It might be still preferrable to place graph_optimization_pass + # as part of the libtensorflow_framework.so, as it is the central + # place for core related components. + "//tensorflow/compiler/mlir/tensorflow:graph_optimization_pass_registration", "//tensorflow/compiler/mlir/tensorflow:import_utils", "@llvm-project//llvm:support", "@llvm-project//mlir:IR", diff --git a/tensorflow/compiler/mlir/python/mlir.cc b/tensorflow/compiler/mlir/python/mlir.cc index d0f6e015922..f22fb519a64 100644 --- a/tensorflow/compiler/mlir/python/mlir.cc +++ b/tensorflow/compiler/mlir/python/mlir.cc @@ -112,7 +112,7 @@ std::string ExperimentalConvertSavedModelV1ToMlir( // Convert the SavedModelBundle to an MLIR module. mlir::MLIRContext context; - auto module_or = ConvertSavedModelV1ToMlir(bundle, &context); + auto module_or = ConvertSavedModelV1ToMlir(bundle, {}, &context); if (!module_or.status().ok()) { Set_TF_Status_from_Status(status, module_or.status()); return "// error"; diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/BUILD b/tensorflow/compiler/mlir/python/mlir_wrapper/BUILD new file mode 100644 index 00000000000..78f4312da46 --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/BUILD @@ -0,0 +1,41 @@ +load("//tensorflow:tensorflow.bzl", "tf_python_pybind_extension") + +package(licenses = ["notice"]) + +tf_python_pybind_extension( + name = "mlir_wrapper", + srcs = [ + "attrs.cc", + "basic_classes.cc", + "builders.cc", + "mlir_wrapper.cc", + "mlir_wrapper.h", + "ops.cc", + "types.cc", + ], + module_name = "mlir_wrapper", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/mlir/tensorflow", + "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", + "//tensorflow/python:pybind11_lib", + "//tensorflow/python:pybind11_status", + "@llvm-project//llvm:support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:StandardOps", + "@pybind11", + ], +) + +tf_python_pybind_extension( + name = "filecheck_wrapper", + srcs = ["filecheck_wrapper.cc"], + module_name = "filecheck_wrapper", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/python:pybind11_lib", + "//tensorflow/python:pybind11_status", + "@llvm-project//llvm:support", + "@pybind11", + ], +) diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/attrs.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/attrs.cc new file mode 100644 index 00000000000..ca7faf2e1d3 --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/attrs.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h" + +void init_attrs(py::module& m) { + py::class_(m, "Attribute"); + py::class_(m, "IntegerAttr") + .def("get", + py::overload_cast(&mlir::IntegerAttr::get)); +} diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/basic_classes.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/basic_classes.cc new file mode 100644 index 00000000000..25adb44fe1d --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/basic_classes.cc @@ -0,0 +1,49 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/Support/FileCheck.h" +#include "mlir/IR/Block.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/Region.h" // from @llvm-project +#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h" + +void init_basic_classes(py::module& m) { + py::class_(m, "MLIRContext").def(py::init<>()); + + py::class_(m, "Location"); + + py::class_(m, "UnknownLoc") + .def("get", &mlir::UnknownLoc::get); + + py::class_(m, "Region") + .def("back", &mlir::Region::back, py::return_value_policy::reference) + .def("front", &mlir::Region::front, py::return_value_policy::reference) + .def("add_block", [](mlir::Region& r) { r.push_back(new mlir::Block); }) + .def("push_back", &mlir::Region::push_back) + .def("size", [](mlir::Region& r) { return r.getBlocks().size(); }) + .def("front", &mlir::Region::front, py::return_value_policy::reference); + py::class_(m, "Block_Iterator"); + py::class_(m, "Block") + .def("new", ([]() { return new mlir::Block; }), + py::return_value_policy::reference) + .def("end", &mlir::Block::end) + .def("addArgument", &mlir::Block::addArgument); + + py::class_(m, "Value").def("getType", &mlir::Value::getType); + py::class_(m, "OpResult"); + py::class_(m, "BlockArgument"); +} diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/builders.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/builders.cc new file mode 100644 index 00000000000..338f17ed6df --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/builders.cc @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/IR/Builders.h" // from @llvm-project + +#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h" + +void init_builders(py::module& m) { + py::class_(m, "Builder") + .def(py::init()) + .def("getFunctionType", + [](mlir::Builder& b, std::vector inputs, + std::vector outputs) { + return b.getFunctionType(llvm::ArrayRef(inputs), + llvm::ArrayRef(outputs)); + }); + py::class_(m, "OpBuilder") + .def(py::init()) + .def(py::init()) + .def(py::init()) + .def(py::init()) + .def("getUnknownLoc", &mlir::OpBuilder::getUnknownLoc) + .def("setInsertionPoint", + py::overload_cast( + &mlir::OpBuilder::setInsertionPoint)) + .def("saveInsertionPoint", &mlir::OpBuilder::saveInsertionPoint) + .def("restoreInsertionPoint", &mlir::OpBuilder::restoreInsertionPoint) + .def( + "createOperation", + [](mlir::OpBuilder& opb, mlir::OperationState& state) { + return opb.createOperation(state); + }, + py::return_value_policy::reference) + .def("getContext", &mlir::OpBuilder::getContext, + py::return_value_policy::reference); + + py::class_(m, "OpBuilder_InsertionPoint") + .def("getBlock", &mlir::OpBuilder::InsertPoint::getBlock); +} diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/filecheck_wrapper.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/filecheck_wrapper.cc new file mode 100644 index 00000000000..8a841856b72 --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/filecheck_wrapper.cc @@ -0,0 +1,36 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/Support/FileCheck.h" +#include "llvm/Support/SourceMgr.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" +#include "tensorflow/python/lib/core/pybind11_lib.h" +#include "tensorflow/python/lib/core/pybind11_status.h" + +PYBIND11_MODULE(filecheck_wrapper, m) { + m.def("check", [](std::string input, std::string check) { + llvm::FileCheckRequest fcr; + llvm::FileCheck fc(fcr); + llvm::SourceMgr SM = llvm::SourceMgr(); + SM.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(input), + llvm::SMLoc()); + SM.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(check), + llvm::SMLoc()); + llvm::Regex regex = fc.buildCheckPrefixRegex(); + fc.readCheckFile(SM, llvm::StringRef(check), regex); + return fc.checkInput(SM, llvm::StringRef(input)); + }); +} diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.cc new file mode 100644 index 00000000000..6f468cd4267 --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.cc @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h" + +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/python/lib/core/pybind11_lib.h" +#include "tensorflow/python/lib/core/pybind11_status.h" + +PYBIND11_MODULE(mlir_wrapper, m) { + m.def("registerDialects", []() { + mlir::registerDialect(); + mlir::registerDialect(); + mlir::registerDialect(); + }); + + init_basic_classes(m); + init_types(m); + init_builders(m); + init_ops(m); + init_attrs(m); +} diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h new file mode 100644 index 00000000000..562c59b43e1 --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_PYTHON_MLIR_WRAPPER_MLIR_WRAPPER_H +#define TENSORFLOW_COMPILER_MLIR_PYTHON_MLIR_WRAPPER_MLIR_WRAPPER_H + +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +void init_basic_classes(py::module& m); +void init_types(py::module& m); +void init_builders(py::module& m); +void init_ops(py::module& m); +void init_attrs(py::module& m); + +#endif // TENSORFLOW_COMPILER_MLIR_PYTHON_MLIR_WRAPPER_MLIR_WRAPPER_H diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/ops.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/ops.cc new file mode 100644 index 00000000000..4432829653e --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/ops.cc @@ -0,0 +1,194 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project + +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" + +void init_ops(py::module& m) { + py::class_>( + m, "Operation") + .def("getRegion", &mlir::Operation::getRegion, + py::return_value_policy::reference) + .def("getResult", &mlir::Operation::getResult) + .def("dump", &mlir::Operation::dump) + .def("getNumResults", &mlir::Operation::getNumResults); + + py::class_(m, "OperationState") + .def(py::init([](mlir::Location loc, std::string name) { + return mlir::OperationState(loc, llvm::StringRef(name)); + })) + .def("addTypes", + [](mlir::OperationState& state, std::vector tys) { + state.addTypes(mlir::ArrayRef(tys)); + }) + .def("addOperands", + [](mlir::OperationState& os, std::vector ops) { + os.addOperands(mlir::ArrayRef(ops)); + }) + .def("addRegion", py::overload_cast<>(&mlir::OperationState::addRegion), + py::return_value_policy::reference); + + py::class_(m, "ModuleOp") + .def("create", + [](mlir::Location loc) { return mlir::ModuleOp::create(loc); }) + .def("push_back", + [](mlir::ModuleOp& m, mlir::FuncOp f) { m.push_back(f); }) + .def("dump", &mlir::ModuleOp::dump) + .def("getAsStr", [](mlir::ModuleOp& m) { + std::string str; + llvm::raw_string_ostream os(str); + m.print(os); + return os.str(); + }); + + py::class_(m, "FuncOp") + .def("create", + [](mlir::Location location, std::string name, + mlir::FunctionType type) { + auto func = mlir::FuncOp::create(location, name, type); + func.addEntryBlock(); + return func; + }) + .def( + "getBody", + [](mlir::FuncOp& f) -> mlir::Region& { return f.getBody(); }, + py::return_value_policy::reference) + .def("getArguments", + [](mlir::FuncOp& f) { return f.getArguments().vec(); }) + .def("getName", [](mlir::FuncOp& f) { return f.getName().str(); }) + .def("getType", &mlir::FuncOp::getType); + + py::class_(m, "ReturnOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, + std::vector values) -> mlir::Operation* { + return opb + .create(loc, + mlir::ArrayRef(values)) + .getOperation(); + }); + + // mlir::TF::AddOp + py::class_(m, "Tf_AddV2Op") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) -> mlir::Operation* { + return opb.create(loc, x, y).getOperation(); + }); + + py::class_(m, "Tf_AnyOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value input, + mlir::Value reduction_indices, + bool keep_dims = false) -> mlir::Operation* { + return opb + .create(loc, opb.getI1Type(), input, + reduction_indices, keep_dims) + .getOperation(); + }); + + // mlir::TF::ConstOp + py::class_(m, "Tf_ConstOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, + mlir::Attribute value) -> mlir::Operation* { + return opb.create(loc, value).getOperation(); + }); + + // mlir::TF::EqualOp + py::class_(m, "Tf_EqualOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) -> mlir::Operation* { + return opb + .create(loc, x, y, opb.getBoolAttr(true)) + .getOperation(); + }); + + // mlir::TF::GreaterEqualOp + py::class_(m, "Tf_GreaterEqualOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) -> mlir::Operation* { + return opb.create(loc, x, y) + .getOperation(); + }); + + // mlir::TF::GreaterOp + py::class_(m, "Tf_GreaterOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) -> mlir::Operation* { + return opb.create(loc, x, y).getOperation(); + }); + + // mlir::TF::LegacyCallOp + py::class_(m, "Tf_LegacyCallOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, + std::vector output, std::vector args, + std::string f) -> mlir::Operation* { + return opb + .create( + loc, mlir::ArrayRef(output), + mlir::ArrayRef(args), mlir::StringRef(f)) + .getOperation(); + }); + + // mlir::TF::LessEqualOp + py::class_(m, "Tf_LessEqualOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) -> mlir::Operation* { + return opb.create(loc, x, y).getOperation(); + }); + + // mlir::TF::LessOp + py::class_(m, "Tf_LessOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) -> mlir::Operation* { + return opb.create(loc, x, y).getOperation(); + }); + + // mlir::TF::NegOp + py::class_(m, "Tf_NegOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, + mlir::Value x) -> mlir::Operation* { + return opb.create(loc, x).getOperation(); + }); + + py::class_(m, "Tf_NotEqualOp") + .def("create", [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) { + return opb + .create( + loc, x, y, mlir::BoolAttr::get(true, opb.getContext())) + .getOperation(); + }); + + // mlir::TF::SubOp + py::class_(m, "Tf_SubOp") + .def("create", + [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x, + mlir::Value y) -> mlir::Operation* { + return opb.create(loc, x, y).getOperation(); + }); +} diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc new file mode 100644 index 00000000000..2be67f8e93e --- /dev/null +++ b/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc @@ -0,0 +1,48 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" + +void init_types(py::module& m) { + // Type + py::class_ Type(m, "Type"); + Type.def("getKind", &mlir::Type::getKind); + + // Type Enums + py::enum_(Type, "StandardTypes_Kind") + .value("BF16", mlir::StandardTypes::BF16); + + // Type Sub-classes + py::class_(m, "FunctionType") + .def("getResults", + [](mlir::FunctionType& ft) { return ft.getResults().vec(); }); + + py::class_(m, "FloatType") + .def("get", &mlir::FloatType::get); + + py::class_(m, "IntegerType") + .def("get", py::overload_cast( + &mlir::IntegerType::get)); + + py::class_(m, "UnrankedTensorType") + .def("get", &mlir::UnrankedTensorType::get); + + py::class_(m, "RankedTensorType") + .def("get", [](std::vector shape, mlir::Type ty) { + return mlir::RankedTensorType::get(mlir::ArrayRef(shape), ty); + }); +} diff --git a/tensorflow/compiler/mlir/runlit.cfg.py b/tensorflow/compiler/mlir/runlit.cfg.py index 6d3131a781c..f1271d0da24 100644 --- a/tensorflow/compiler/mlir/runlit.cfg.py +++ b/tensorflow/compiler/mlir/runlit.cfg.py @@ -70,9 +70,9 @@ tool_dirs = config.mlir_tf_tools_dirs + [ ] tool_names = [ 'mlir-opt', 'mlir-translate', 'tf-opt', 'tf_tfl_translate', - 'flatbuffer_to_string', 'flatbuffer_translate', 'tf-mlir-translate', - 'mlir-tflite-runner', 'tfcompile', 'json_to_flatbuffer', 'xla-gpu-opt', - 'xla-opt' + 'tf_tfjs_translate', 'flatbuffer_to_string', 'flatbuffer_translate', + 'tf-mlir-translate', 'mlir-tflite-runner', 'tfcompile', + 'json_to_flatbuffer', 'xla-gpu-opt', 'xla-opt' ] tools = [ToolSubst(s, unresolved='ignore') for s in tool_names] llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/tensorflow/compiler/mlir/runlit.site.cfg.py b/tensorflow/compiler/mlir/runlit.site.cfg.py index 661e6200df3..3e7596c75d7 100644 --- a/tensorflow/compiler/mlir/runlit.site.cfg.py +++ b/tensorflow/compiler/mlir/runlit.site.cfg.py @@ -44,6 +44,7 @@ mlir_tf_tools_dirs = [ 'tensorflow/compiler/mlir', 'tensorflow/compiler/mlir/lite', 'tensorflow/compiler/mlir/tensorflow', + 'tensorflow/compiler/mlir/tfjs', 'tensorflow/compiler/mlir/xla', 'tensorflow/compiler/aot', 'tensorflow/compiler/xla/service/mlir_gpu', diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 9099f2be2e1..54b560ed6ce 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -36,7 +36,7 @@ filegroup( "@llvm-project//mlir:OpBaseTdFiles", "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td", "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", ], ) @@ -556,7 +556,7 @@ cc_library( deps = [ ":tensorflow", "@llvm-project//mlir:IR", - "@llvm-project//mlir:LoopOpsTransforms", + "@llvm-project//mlir:SCFTransforms", ], alwayslink = 1, ) @@ -823,6 +823,7 @@ cc_library( ":mangling_util", ":tensorflow_attributes", ":tensorflow_types", + "//tensorflow/compiler/xla:util", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", @@ -1074,7 +1075,7 @@ genrule( srcs = [ "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td", "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", "@llvm-project//mlir:include/mlir/IR/OpBase.td", "ir/tf_generated_ops.td", "ir/tf_op_base.td", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 2b3dd529c3b..bddf064f5c6 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -192,6 +192,44 @@ retained with length 1. let verifier = [{ return Verify(*this); }]; } +def TF_AllToAllOp : TF_Op<"AllToAll", [NoSideEffect]> { + let summary = "An Op to exchange data across TPU replicas."; + + let description = [{ +On each replica, the input is split into `split_count` blocks along +`split_dimension` and send to the other replicas given group_assignment. After +receiving `split_count` - 1 blocks from other replicas, we concatenate the +blocks along `concat_dimension` as the output. + +For example, suppose there are 2 TPU replicas: +replica 0 receives input: `[[A, B]]` +replica 1 receives input: `[[C, D]]` + +group_assignment=`[[0, 1]]` +concat_dimension=0 +split_dimension=1 +split_count=2 + +replica 0's output: `[[A], [C]]` +replica 1's output: `[[B], [D]]` + }]; + + let arguments = (ins + TensorOf<[BF16, F16, F32, F64, I1, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$input, + I32Tensor:$group_assignment, + + I64Attr:$concat_dimension, + I64Attr:$split_dimension, + I64Attr:$split_count + ); + + let results = (outs + TensorOf<[BF16, F16, F32, F64, I1, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$output + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_AngleOp : TF_Op<"Angle", [NoSideEffect, SameOperandsAndResultShape]> { let summary = "Returns the argument of a complex number."; @@ -1217,7 +1255,7 @@ that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_ClipByValueOp : TF_Op<"ClipByValue", [NoSideEffect, SameOperandsAndResultType]> { +def TF_ClipByValueOp : TF_Op<"ClipByValue", [NoSideEffect]> { let summary = "Clips tensor values to a specified min and max."; let description = [{ @@ -1408,6 +1446,30 @@ tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] let hasCanonicalizer = 1; } +def TF_ConjugateTransposeOp : TF_Op<"ConjugateTranspose", [NoSideEffect]> { + let summary = [{ +Shuffle dimensions of x according to a permutation and conjugate the result. + }]; + + let description = [{ +The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: + `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` + `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])` + }]; + + let arguments = (ins + TF_Tensor:$x, + TF_I32OrI64Tensor:$perm + ); + + let results = (outs + TF_Tensor:$y + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr Tperm = TF_DerivedOperandTypeAttr<1>; +} + def TF_Conv2DOp : TF_Op<"Conv2D", [NoSideEffect, TF_LayoutSensitiveInterface]> { let summary = [{ Computes a 2-D convolution given 4-D `input` and `filter` tensors. @@ -1682,7 +1744,28 @@ Given an input tensor, this function computes hyperbolic cosine of every TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_CrossReplicaSumOp : TF_Op<"CrossReplicaSum", [AllTypesMatch<["input", "output"]>, NoSideEffect]> { +def TF_CrossOp : TF_Op<"Cross", [NoSideEffect]> { + let summary = "Compute the pairwise cross product."; + + let description = [{ +`a` and `b` must be the same shape; they can either be simple 3-element vectors, +or any shape where the innermost dimension is 3. In the latter case, each pair +of corresponding 3-element vectors is cross-multiplied independently. + }]; + + let arguments = (ins + TF_IntOrFpTensor:$a, + TF_IntOrFpTensor:$b + ); + + let results = (outs + TF_IntOrFpTensor:$product + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + +def TF_CrossReplicaSumOp : TF_Op<"CrossReplicaSum", [NoSideEffect, TF_AllTypesMatch<["input", "output"]>]> { let summary = "An Op to sum inputs across replicated TPU instances."; let description = [{ @@ -1706,7 +1789,7 @@ and `B, D, F, H` as group 1. Thus we get the outputs: TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_CumsumOp : TF_Op<"Cumsum", [AllTypesMatch<["x", "out"]>, NoSideEffect]> { +def TF_CumsumOp : TF_Op<"Cumsum", [NoSideEffect, TF_AllTypesMatch<["x", "out"]>]> { let summary = "Compute the cumulative sum of the tensor `x` along `axis`."; let description = [{ @@ -3256,8 +3339,8 @@ Gather slices from `params` axis `axis` according to `indices`. let description = [{ `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -Produces an output tensor with shape `params.shape[:axis] + indices.shape + -params.shape[axis + 1:]` where: +Produces an output tensor with shape `params.shape[:axis] + +indices.shape[batch_dims:] + params.shape[axis + 1:]` where: ```python # Scalar indices (output is rank(params) - 1). @@ -3542,6 +3625,31 @@ tf.imag(input) ==> [4.75, 5.75] TF_DerivedResultTypeAttr Tout = TF_DerivedResultTypeAttr<0>; } +def TF_InplaceUpdateOp : TF_Op<"InplaceUpdate", [NoSideEffect]> { + let summary = [{ + Create a copy of `x` with the updated specified rows 'i' with values 'v'. + + }]; + + let description = [{ + Creates a copy of tensor 'x' and updates the columns specified in tensor 'i' + with the values 'v'. Originally this function was mutative however for + compilation we make this operation create / operate on a copy. + }]; + + let arguments = (ins + TF_Tensor:$x, + I32Tensor:$i, + TF_Tensor:$v + ); + + let results = (outs + TF_Tensor:$y + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_InvOp : TF_Op<"Inv", [NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes the reciprocal of x element-wise."; @@ -4242,7 +4350,7 @@ cublas. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_MatrixBandPartOp : TF_Op<"MatrixBandPart", [AllTypesMatch<["input", "band"]>, NoSideEffect]> { +def TF_MatrixBandPartOp : TF_Op<"MatrixBandPart", [NoSideEffect, TF_AllTypesMatch<["input", "band"]>]> { let summary = [{ Copy a tensor setting everything outside a central band in each innermost matrix to zero. }]; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td index cb17341cefd..dbd8ab0fae2 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td @@ -23,7 +23,7 @@ limitations under the License. #define TF_OP_BASE include "mlir/IR/OpBase.td" -include "mlir/Interfaces/SideEffects.td" +include "mlir/Interfaces/SideEffectInterfaces.td" include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.td" //===----------------------------------------------------------------------===// @@ -70,6 +70,16 @@ class TF_OpIsBroadcastableToRes : And<[ "$_op.getOperand(" # opId # ").getType(), " "$_op.getResult(" # resId # ").getType())">]>; + +class TF_AllTypesMatchPred values> : + CPred<"TF::AreCastCompatible(llvm::makeArrayRef({"# StrJoin.result #"}))">; + +class TF_AllTypesMatch names> : + PredOpTrait< + "all of {" # StrJoin.result # "} have dynamically equal types ", + TF_AllTypesMatchPred< + !foreach(n, names, !subst("$_self", "$" # n, "$_self.getType()"))>>; + //===----------------------------------------------------------------------===// // TensorFlow op definitions //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index 3a4e9e5985e..82ddc80875a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -110,47 +110,6 @@ static inline bool HasRankAtMost(Value value, int64_t rank) { return !type || type.getRank() <= rank; } -// Returns true if the given pair of TensorFlow types can be cast to one -// another. In other words, a single run-time value is legal for both the types. -// For example, tensor<*xf32> and tensor<3xf32> are cast compatible. -static bool AreCastCompatible(Type a, Type b) { - if (TensorCastOp::areCastCompatible(a, b)) return true; - - // Resource types may optionally contain subtypes information that does not - // match. Check subtypes compatibility when possible, otherwise treat them as - // compatible. - auto a_or_element_type = getElementTypeOrSelf(a); - auto b_or_element_type = getElementTypeOrSelf(b); - - auto a_kind = a_or_element_type.getKind(); - auto b_kind = b_or_element_type.getKind(); - - if (a_kind == TensorFlowTypes::RESOURCE && - b_kind == TensorFlowTypes::RESOURCE) { - auto a_resource_type = a_or_element_type.dyn_cast(); - auto b_resource_type = b_or_element_type.dyn_cast(); - bool a_has_subtype = !a_resource_type.getSubtypes().empty(); - bool b_has_subtype = !b_resource_type.getSubtypes().empty(); - - if (!a_has_subtype || !b_has_subtype) return true; - - assert(a_resource_type.getSubtypes().size() <= 1 && - "Resource type must have at most one subtype"); - assert(b_resource_type.getSubtypes().size() <= 1 && - "Resource type must have at most one subtype"); - - return TensorCastOp::areCastCompatible( - a_resource_type.getSubtypes().front(), - b_resource_type.getSubtypes().front()); - } - - // Variant types may optionally contain subtypes information that need not - // match. It is also not possible to compare subtypes for compatibility as - // their interpretation depends on the ops operating on them. So, accept all - // pairs of variant types. - return a_kind == TensorFlowTypes::VARIANT && - b_kind == TensorFlowTypes::VARIANT; -} static bool IsUnknownDimOrRank(int64_t dim_or_rank) { return dim_or_rank == -1; @@ -984,20 +943,17 @@ void ConstOp::build(OpBuilder &builder, OperationState &result, Type type, LogicalResult ConstOp::inferReturnTypes( MLIRContext *context, Optional location, ValueRange operands, - ArrayRef attributes, RegionRange regions, + DictionaryAttr attributes, RegionRange regions, SmallVectorImpl &inferredReturnTypes) { - for (NamedAttribute named_attr : attributes) { - if (named_attr.first.strref() != "value") continue; - auto value = named_attr.second; - if (auto elem_attr = value.dyn_cast()) { - inferredReturnTypes.assign({elem_attr.getType()}); - return success(); - } - return emitOptionalError(location, - "attribute 'value' failed to satisfy constraint: " - "constant vector/tensor"); + auto value = attributes.get("value"); + if (!value) return emitOptionalError(location, "missing attribute 'value'"); + if (auto elem_attr = value.dyn_cast()) { + inferredReturnTypes.assign({elem_attr.getType()}); + return success(); } - return emitOptionalError(location, "missing attribute 'value'"); + return emitOptionalError(location, + "attribute 'value' failed to satisfy constraint: " + "constant vector/tensor"); } //===----------------------------------------------------------------------===// @@ -1416,7 +1372,7 @@ static LogicalResult Verify(DynamicStitchOp op) { auto expected_out_ty = RankedTensorType::get(expected_shape, out_ty.getElementType()); - if (!AreCastCompatible(out_ty, expected_out_ty)) { + if (!AreCastCompatible({out_ty, expected_out_ty})) { return op.emitOpError() << "has invalid output type; should be " "compatible with inferred type " << expected_out_ty; @@ -1817,14 +1773,14 @@ static LogicalResult Verify(IfOp op) { for (unsigned i = 0; i < expectedNumInputs; ++i) { auto operandType = op.getOperand(i + 1).getType().cast(); auto thenInputType = thenFuncType.getInput(i).cast(); - if (!AreCastCompatible(operandType, thenInputType)) + if (!AreCastCompatible({operandType, thenInputType})) return op.emitError( llvm::formatv("then branch input type {0} is incompatible with " "operand type {1} at index {2}", thenInputType, operandType, i)); auto elseInputType = elseFuncType.getInput(i).cast(); - if (!AreCastCompatible(operandType, elseInputType)) + if (!AreCastCompatible({operandType, elseInputType})) return op.emitError( llvm::formatv("else branch input type {0} is incompatible with " "operand type {1} at index {2}", @@ -1832,7 +1788,7 @@ static LogicalResult Verify(IfOp op) { // If branches have incompatible input types that means that no tensor can // serve as input to both the functions. Hence, the op is invalid. - if (!AreCastCompatible(thenInputType, elseInputType)) + if (!AreCastCompatible({thenInputType, elseInputType})) return op.emitError(llvm::formatv( "branches inputs have incompatible types {0} and {1} at index {2}", thenInputType, elseInputType, i)); @@ -1848,14 +1804,14 @@ static LogicalResult Verify(IfOp op) { for (unsigned i = 0; i < expectedNumResults; ++i) { auto resultType = op.getResult(i).getType().cast(); auto thenResultType = thenFuncType.getResult(i).cast(); - if (!AreCastCompatible(thenResultType, resultType)) + if (!AreCastCompatible({thenResultType, resultType})) return op.emitError( llvm::formatv("then branch result type {0} is incompatible with op " "result type {1} at index {2}", thenResultType, resultType, i)); auto elseResultType = elseFuncType.getResult(i).cast(); - if (!AreCastCompatible(elseResultType, resultType)) + if (!AreCastCompatible({elseResultType, resultType})) return op.emitError( llvm::formatv("else branch result type {0} is incompatible with op " "result type {1} at index {2}", @@ -3792,7 +3748,7 @@ static LogicalResult Verify(WhileOp op) { auto aType = a.second[idx]; auto bType = b.second[idx]; - if (!AreCastCompatible(aType, bType)) + if (!AreCastCompatible({aType, bType})) return op.emitError(llvm::formatv( "{0} type {1} is incompatible with {2} type {3} at index {4}", a.first, aType, b.first, bType, idx)); diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc index 6c3cd7fac92..d312e5e409b 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc @@ -28,6 +28,134 @@ llvm::Optional> GetShape(mlir::Value value) { if (shaped_type.hasRank()) return shaped_type.getShape(); return llvm::None; } + +// Merges cast compatible shapes and returns a more refined shape. The two +// shapes are cast compatible if they have the same rank and at each dimension, +// either both have same size or one of them is dynamic. Returns false if the +// given shapes are not cast compatible. The refined shape is same or more +// precise than the two input shapes. +bool GetCastCompatibleShape(llvm::ArrayRef a_shape, + llvm::ArrayRef b_shape, + llvm::SmallVectorImpl* refined_shape) { + if (a_shape.size() != b_shape.size()) return false; + int64_t rank = a_shape.size(); + refined_shape->reserve(rank); + for (auto dims : llvm::zip(a_shape, b_shape)) { + int64_t dim1 = std::get<0>(dims); + int64_t dim2 = std::get<1>(dims); + + if (mlir::ShapedType::isDynamic(dim1)) { + refined_shape->push_back(dim2); + continue; + } + if (mlir::ShapedType::isDynamic(dim2)) { + refined_shape->push_back(dim1); + continue; + } + if (dim1 == dim2) { + refined_shape->push_back(dim1); + continue; + } + return false; + } + return true; +} + +// Given two types `a` and `b`, returns a refined type which is cast compatible +// with both `a` and `b` and is equal to or more precise than both of them. It +// returns empty Type if the input types are not cast compatible. +// +// The two types are considered cast compatible if they have dynamically equal +// shapes and element type. For element types that do not have subtypes, they +// must be equal. However for TensorFlow types such as Resource and Variant, +// that also have subtypes, we recursively check for subtype compatibilty for +// Resource types and assume all variant types are cast compatible. If either +// one of `a` or `b` have empty subtypes, they are considered cast compatible. +// +// The returned type is same or more precise than the input types. For example, +// if `a` and `b` are cast compatible types tensor<2x?x?xf32> and +// tensor respectively, the returned type is tensor<2x4x?xf32>. +// +// Provides option to ignore ref types on 'a'. This is useful for TF ops that +// might allow operands to either be same as result type or be a ref type +// corresponding to it. +mlir::Type GetCastCompatibleType(mlir::Type a, mlir::Type b, + bool may_ignore_ref_type_a) { + // Fast path if everything is equal. + if (a == b) return b; + + auto a_tt = a.dyn_cast(); + auto b_tt = b.dyn_cast(); + + // If only one of a or b is a tensor type, they are incompatible. + if (static_cast(a_tt) ^ static_cast(b_tt)) return nullptr; + + // For non-tensor types, we do not need to worry about shape and can return + // early. + if (!a_tt && !b_tt) { + // Remove ref types. + if (may_ignore_ref_type_a) { + if (auto ref_type = a.dyn_cast()) { + a = ref_type.RemoveRef(); + if (a == b) return a; + } + } + if (a.getKind() != b.getKind()) return nullptr; + + // If either is not a type that contain subtypes then the types are not cast + // compatible. + auto a_wst = a.dyn_cast(); + auto b_wst = b.dyn_cast(); + if (!a_wst || !b_wst) return nullptr; + + // For Variant types we are more permissive right now and accept all pairs + // of Variant types. If we are more constrainted and check compatibility of + // subtypes, we might reject valid graphs. + // TODO(prakalps): Variant doesn't have a subtype, we assign it + // one, so we should only assign it one when we know the subtype. Then we + // can be more constrained and check subtypes for cast compatibility as + // well. + if (a.isa()) return a; + + // For Resource types, we recursively check the subtypes for cast + // compatibility, if possible. Otherwise treat them as compatible. + auto a_wst_st = a_wst.GetSubtypes(); + auto b_wst_st = b_wst.GetSubtypes(); + if (a_wst_st.empty() || b_wst_st.empty()) return a; + if (a_wst_st.size() != b_wst_st.size()) return nullptr; + llvm::SmallVector refined_subtypes; + for (auto subtypes : llvm::zip(a_wst_st, b_wst_st)) { + mlir::Type refined_st = + GetCastCompatibleType(std::get<0>(subtypes), std::get<1>(subtypes), + /*may_ignore_ref_type_a=*/false); + if (!refined_st) return nullptr; + refined_subtypes.push_back(refined_st.cast()); + } + + return mlir::TF::ResourceType::get(refined_subtypes, a.getContext()); + } + + // For tensor types, check compatibility of both element type and shape. + mlir::Type refined_element_ty = GetCastCompatibleType( + a_tt.getElementType(), b_tt.getElementType(), may_ignore_ref_type_a); + if (!refined_element_ty) return nullptr; + + if (!a_tt.hasRank() && !b_tt.hasRank()) { + return mlir::UnrankedTensorType::get(refined_element_ty); + } + if (!a_tt.hasRank()) { + return mlir::RankedTensorType::get(b_tt.getShape(), refined_element_ty); + } + if (!b_tt.hasRank()) { + return mlir::RankedTensorType::get(a_tt.getShape(), refined_element_ty); + } + + llvm::SmallVector refined_shape; + if (!GetCastCompatibleShape(a_tt.getShape(), b_tt.getShape(), &refined_shape)) + return nullptr; + + return mlir::RankedTensorType::get(refined_shape, refined_element_ty); +} } // namespace namespace mlir { @@ -224,44 +352,16 @@ bool BroadcastCompatible(ArrayRef lhs, ArrayRef rhs) { bool HasCompatibleElementTypes(Type lhs, Type rhs, bool may_ignore_ref_type_lhs) { - // Fast path if everything is equal. - if (lhs == rhs) return true; + return GetCastCompatibleType(lhs, rhs, may_ignore_ref_type_lhs) != nullptr; +} - // In TF all values are tensors. - auto lhs_tt = lhs.cast(); - auto rhs_tt = rhs.cast(); - - // Verify matching element types. These should be identical dynamically, - // so this allows for types not yet fully refined. - auto lhs_et = lhs_tt.getElementType(); - auto rhs_et = rhs_tt.getElementType(); - if (lhs_et == rhs_et) return true; - - // Remove ref types. - if (may_ignore_ref_type_lhs) { - if (auto ref_type = lhs_et.dyn_cast()) { - lhs_et = ref_type.RemoveRef(); - if (lhs_et == rhs_et) return true; - } - } - - if (lhs_et.getKind() != rhs_et.getKind()) return false; - - // If either is not type that contain subtypes then the element types don't - // match. - auto lhs_wst = lhs_et.dyn_cast(); - auto rhs_wst = rhs_et.dyn_cast(); - if (!lhs_wst || !rhs_wst) return false; - - // Consider the subtype recursively. - auto lhs_wst_st = lhs_wst.GetSubtypes(); - auto rhs_wst_st = rhs_wst.GetSubtypes(); - if (lhs_wst_st.empty() || rhs_wst_st.empty()) return true; - if (lhs_wst_st.size() != rhs_wst_st.size()) return false; - for (auto subtypes : llvm::zip(lhs_wst_st, rhs_wst_st)) { - if (!HasCompatibleElementTypes(std::get<0>(subtypes), - std::get<1>(subtypes))) - return false; +bool AreCastCompatible(ArrayRef types) { + Type common = types.front(); + for (auto type : types.drop_front()) { + Type refined_type = + GetCastCompatibleType(common, type, /*may_ignore_ref_type_a=*/false); + if (!refined_type) return false; + common = refined_type; } return true; } diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h index d1e6a74a0c5..4c99aae4706 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h @@ -313,6 +313,12 @@ bool BroadcastCompatible(ArrayRef lhs, ArrayRef rhs); bool HasCompatibleElementTypes(Type lhs, Type rhs, bool may_ignore_ref_type_lhs = false); +// Returns true if all TensorFlow types can be cast to one +// another. In other words, a single run-time value is legal for both the types. +// For example, tensor<*xf32>, tensor and tensor<3xf32> are cast +// compatible. +bool AreCastCompatible(ArrayRef types); + } // end namespace TF } // end namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir index 118ce2e8645..ffa287e0e53 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir @@ -881,20 +881,29 @@ func @testValidMatrixBandPartOpUnranked(%arg0: tensor<*xbf16>, %arg1: tensor, %arg1: tensor, %arg2: tensor) -> tensor<64x64xbf16> { - // expected-error @+1 {{op failed to verify that all of {input, band} have same type}} - %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor, tensor) -> tensor<64x64xbf16> - return %0 : tensor<64x64xbf16> +// Test valid tf.MatrixBandPart +// CHECK-LABEL: func @testValidMatrixBandPartOpUnrankedBand +func @testValidMatrixBandPartOpUnrankedBand(%arg0: tensor<64x64x64xbf16>, %arg1: tensor, %arg2: tensor) -> tensor<*xbf16> { + %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor, tensor) -> tensor<*xbf16> + return %0 : tensor<*xbf16> +} + +// ----- + +// Test valid tf.MatrixBandPart +// CHECK-LABEL: func @testValidMatrixBandPartOpCompatibleDynamicShapes +func @testValidMatrixBandPartOpCompatibleDynamicShapes(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor + return %0 : tensor } // ----- // Test invalid tf.MatrixBandPart -func @testInvalidMatrixBandPartOp(%arg0: tensor<64x64x64xbf16>, %arg1: tensor, %arg2: tensor) -> tensor<*xbf16> { - // expected-error @+1 {{op failed to verify that all of {input, band} have same type}} - %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor, tensor) -> tensor<*xbf16> - return %0 : tensor<*xbf16> +func @testInvalidMatrixBandPartOp(%arg0: tensor<64x64x64xbf16>, %arg1: tensor, %arg2: tensor) -> tensor<64x64xbf16> { + // expected-error @+1 {{op failed to verify that all of {input, band} have dynamically equal types}} + %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor, tensor) -> tensor<64x64xbf16> + return %0 : tensor<64x64xbf16> } // ----- diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_head_tail_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_head_tail_outside_compilation.mlir index 77ca08c089a..eb67bdcc914 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_head_tail_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_head_tail_outside_compilation.mlir @@ -1,13 +1,17 @@ // RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-extract-head-tail-outside-compilation | FileCheck %s --dump-input-on-failure -// Tests extraction of a single outside compiled cluster with no input or output dependecies. +// Tests extraction of a outside compiled ops at head of TPU computation. -// CHECK-LABEL: func @nodep_single_head_outside_compilation -func @nodep_single_head_outside_compilation() -> () { - // CHECK: "tf.A" - // CHECK-NEXT: "tf_device.launch" - "tf_device.launch"() ( { - "tf.A"() {_xla_outside_compilation = "cluster1"} : () -> () +func @single_head_outside_compilation(%arg0 : tensor) -> () { + // CHECK: tf_device.launch + // CHECK: "tf.A" + // CHECK-NEXT: tf_device.return + // + // CHECK: "tf_device.cluster" + // CHECK: "tf.C" + // CHECK-NEXT: tf_device.return + "tf_device.cluster"() ( { + "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor) -> () "tf.B"() : () -> () "tf.C"() : () -> () tf_device.return @@ -15,15 +19,62 @@ func @nodep_single_head_outside_compilation() -> () { return } -// CHECK-LABEL: func @nodep_multiple_head_outside_compilation -func @nodep_multiple_head_outside_compilation() -> () { - // CHECK: "tf.A" - // CHECK-NEXT: "tf.B" - // CHECK-NEXT: "tf_device.launch" - "tf_device.launch"() ( { - "tf.A"() {_xla_outside_compilation = "cluster1"} : () -> () - "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> () - "tf.C"() : () -> () +// CHECK-LABEL: func @multiple_head_outside_compilation +func @multiple_head_outside_compilation(%arg0 : tensor) -> () { + // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch"() + // CHECK: %[[A_OUT:.*]] = "tf.A" + // CHECK: %[[B_OUT:.*]] = "tf.B"(%[[A_OUT]]) + // CHECK: "tf.C" + // CHECK-NEXT: tf_device.return %[[B_OUT]] + // + // CHECK: "tf_device.cluster" + // CHECK: "tf.D"(%[[LAUNCH_OUT]]) + // CHECK-NEXT: tf_device.return + "tf_device.cluster"() ( { + %0 = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor) -> (tensor) + %1 = "tf.B"(%0) {_xla_outside_compilation = "cluster1"} : (tensor) -> (tensor) + "tf.C"(%1, %arg0) {_xla_outside_compilation = "cluster1"} : (tensor, tensor) -> () + "tf.D"(%1) : (tensor) -> () + tf_device.return + }) {device = "tpu0", launch_attr = "launch_attr"} : () -> () + return +} + +// CHECK-LABEL: func @test_do_not_outside_compiled_ops_in_middle +func @test_do_not_outside_compiled_ops_in_middle(%arg0 : tensor) -> () { + // CHECK-NOT: tf_device.launch + // CHECK: "tf_device.cluster" + // CHECK-NEXT: "tf.A" + // CHECK-NEXT: "tf.B" + // CHECK-NEXT: "tf.C" + // CHECK-NEXT: tf_device.return + "tf_device.cluster"() ( { + %0 = "tf.A"(%arg0) {} : (tensor) -> (tensor) + %1 = "tf.B"(%0) {_xla_outside_compilation = "cluster1"}: (tensor) -> (tensor) + "tf.C"(%1) : (tensor) -> () + tf_device.return + }) {device = "tpu0", launch_attr = "launch_attr"} : () -> () + return +} + +// CHECK-LABEL: func @test_ops_with_tpu_operands_not_extracted +func @test_ops_with_tpu_operands_not_extracted(%arg0 : tensor) -> () { + // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch"() + // CHECK: %[[A_OUT:.*]] = "tf.A" + // CHECK: %[[D_OUT:.*]] = "tf.D"(%[[A_OUT]]) + // CHECK-NEXT: tf_device.return %[[D_OUT]] + // + // CHECK: "tf_device.cluster" + // CHECK: "tf.B" + // CHECK: "tf.C" + // CHECK: "tf.E" + // CHECK-NEXT: tf_device.return + "tf_device.cluster"() ( { + %0 = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor) -> (tensor) + %1 = "tf.B"() {} : () -> (tensor) + %2 = "tf.C"(%arg0, %1) {_xla_outside_compilation = "cluster1"} : (tensor, tensor) -> (tensor) + %3 = "tf.D"(%0) {_xla_outside_compilation = "cluster1"}: (tensor) -> (tensor) + %4 = "tf.E"(%3) {} : (tensor) -> (tensor) tf_device.return }) {device = "tpu0", launch_attr = "launch_attr"} : () -> () return diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir index b2e8f116827..3cb693ee571 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir @@ -3,12 +3,12 @@ // Tests that missing `_xla_outside_compilation` attribute value results in an error. func @missing_outside_compilation_attribute() -> () { - "tf_device.launch"() ( { + "tf_device.cluster"() ( { "tf.A"() : () -> () // expected-error@+1 {{attribute '_xla_outside_compilation' is empty}} "tf.B"() {_xla_outside_compilation = ""} : () -> () tf_device.return - }) {device = "tpu0", launch_attr = "launch_attr"} : () -> () + }) {cluster_attr = "cluster_attr"} : () -> () return } @@ -18,11 +18,11 @@ func @missing_outside_compilation_attribute() -> () { // CHECK-LABEL: func @no_outside_compilation func @no_outside_compilation() -> tensor { - %0 = "tf_device.launch"() ( { + %0 = "tf_device.cluster"() ( { %1 = "tf.A"() : () -> tensor %2 = "tf.B"(%1) : (tensor) -> tensor tf_device.return %2 : tensor - }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor + }) {cluster_attr = "cluster_attr"} : () -> tensor return %0 : tensor } @@ -36,16 +36,15 @@ func @nodep_single_outside_compilation() -> () { // CHECK-NEXT: "tf_device.launch" // CHECK-NEXT: "tf.B" // CHECK-NOT: _xla_outside_compilation - // CHECK: "tf_device.launch" + // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.A" - // CHECK: device = "tpu0" - // CHECK-SAME: launch_attr = "launch_attr" - "tf_device.launch"() ( { + // CHECK: cluster_attr = "cluster_attr" + "tf_device.cluster"() ( { "tf.A"() : () -> () "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> () "tf.C"() : () -> () tf_device.return - }) {device = "tpu0", launch_attr = "launch_attr"} : () -> () + }) {cluster_attr = "cluster_attr"} : () -> () return } @@ -59,19 +58,18 @@ func @nodep_single_cluster_multiple_ops_outside_compilation() -> () { // CHECK-NEXT: "tf.C" // CHECK-NEXT: "tf.D" // CHECK-NOT: _xla_outside_compilation - // CHECK: "tf_device.launch" + // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.A" // CHECK-NEXT: "tf.E" - // CHECK: device = "tpu0" - // CHECK-SAME: launch_attr = "launch_attr" - "tf_device.launch"() ( { + // CHECK: cluster_attr = "cluster_attr" + "tf_device.cluster"() ( { "tf.A"() : () -> () "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> () "tf.C"() {_xla_outside_compilation = "cluster1"} : () -> () "tf.D"() {_xla_outside_compilation = "cluster1"} : () -> () "tf.E"() : () -> () tf_device.return - }) {device = "tpu0", launch_attr = "launch_attr"} : () -> () + }) {cluster_attr = "cluster_attr"} : () -> () return } @@ -80,15 +78,16 @@ func @nodep_single_cluster_multiple_ops_outside_compilation() -> () { // CHECK-LABEL: func @nodep_multiple_outside_compilation func @nodep_multiple_outside_compilation() -> () { // CHECK: "tf_device.parallel_execute" - // CHECK-COUNT-3: "tf_device.launch" - "tf_device.launch"() ( { + // CHECK-COUNT-2: "tf_device.launch" + // CHECK: "tf_device.cluster" + "tf_device.cluster"() ( { "tf.A"() : () -> () "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> () "tf.C"() : () -> () "tf.D"() {_xla_outside_compilation = "cluster2"} : () -> () "tf.E"() : () -> () tf_device.return - }) {device = "tpu0", launch_attr = "launch_attr"} : () -> () + }) {cluster_attr = "cluster_attr"} : () -> () return } @@ -100,17 +99,17 @@ func @single_tpu_return_single_outside_compilation(%arg0: tensor) -> tens // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[TPU_LAUNCH_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[TPU_CLUSTER_OUTPUT:[0-9]*]] = "tf_device.cluster" // CHECK: tf_device.return - // CHECK: tf_device.return %[[TPU_LAUNCH_OUTPUT]] + // CHECK: tf_device.return %[[TPU_CLUSTER_OUTPUT]] // CHECK: tf_device.return %[[PARALLEL_EXECUTE_OUTPUT]] %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { - %2 = "tf_device.launch"() ( { + %2 = "tf_device.cluster"() ( { "tf.A"() : () -> () "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> () %3 = "tf.C"() : () -> tensor tf_device.return %3 : tensor - }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor + }) {cluster_attr = "cluster_attr"} : () -> tensor tf_device.return %2 : tensor } @@ -125,17 +124,17 @@ func @multiple_tpu_return_single_outside_compilation(%arg0: tensor) -> te // CHECK: %[[REPLICATE:[0-9]*]]:4 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2 = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[TPU_LAUNCH_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK: %[[TPU_CLUSTER_OUTPUT:[0-9]*]]:2 = "tf_device.cluster" // CHECK: tf_device.return - // CHECK: tf_device.return %[[TPU_LAUNCH_OUTPUT]] + // CHECK: tf_device.return %[[TPU_CLUSTER_OUTPUT]] // CHECK: tf_device.return %[[PARALLEL_EXECUTE_OUTPUT]] %1:4 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { - %2, %3 = "tf_device.launch"() ( { + %2, %3 = "tf_device.cluster"() ( { %4 = "tf.A"() : () -> tensor "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> () %5 = "tf.C"() : () -> tensor tf_device.return %4, %5 : tensor, tensor - }) {device = "tpu0", launch_attr = "launch_attr"} : () -> (tensor, tensor) + }) {cluster_attr = "cluster_attr"} : () -> (tensor, tensor) tf_device.return %2, %3 : tensor, tensor } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir index af0119dab8f..b8a48bbb379 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir @@ -1222,6 +1222,41 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // ----- +// Tests simple case of `tf_device.cluster_func` on TPU with replication and parallel_execute. + +module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0", "/job:worker/replica:0/task:0/device:TPU:1"]} { + // CHECK-LABEL: func @replicated_parallel_tpu_cluster_func + func @replicated_parallel_tpu_cluster_func(%arg0: tensor) -> tensor { + // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" + %0 = "tf.A"(%arg0) : (tensor) -> tensor + // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate + %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { + // CHECK: "tf._TPUCompileMlir" + // CHECK: "tf.TPUCompileSucceededAssert" + // CHECK: "tf_device.parallel_execute" + // CHECK: "tf.TPUExecute" + %3 = "tf_device.parallel_execute"() ( { + "tf.D"() : () -> () + tf_device.return + }, { + %4 = "tf_device.cluster_func"(%ri_0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor) -> tensor + + tf_device.return %4 : tensor + }) : () -> (tensor) + tf_device.return %3 : tensor + } + %2 = "tf.C"(%1#1) : (tensor) -> tensor + return %2 : tensor + } + + func @tpu0_func(%arg0: tensor) -> tensor { + %0 = "tf.B"(%arg0) : (tensor) -> tensor + return %0 : tensor + } +} + +// ----- + // Tests devices are set properly for non replicated model parallelism. module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:localhost/replica:0/task:0/device:CPU:0", "/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:0/device:TPU_SYSTEM:0"]} { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index c1d99c2dee3..0b1ff2beebb 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -258,7 +258,7 @@ std::unique_ptr> CreateTPUVariableReformattingPass(); // Creates a pass that extracts outside compilation (CPU ops inside TPU cluster) // at head/tail of TPU cluster to run before/after TPU computation. -std::unique_ptr> +std::unique_ptr> CreateTPUExtractHeadTailOutsideCompilationPass(); // Creates a pass that extract outside compilation (CPU ops inside TPU cluster) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index 789088bd585..5a2cae38062 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -66,8 +66,7 @@ using tensorflow::shape_inference::ShapeHandle; namespace mlir { namespace TF { namespace { -Optional> InferShapeForFunctionReturnType( - FuncOp func) { +Optional> InferShapeForFunctionReturnType(FuncOp func) { // Find any return ops. SmallVector return_ops; for (Block& block : func) { @@ -137,9 +136,9 @@ void AddCastBackForUnsupportedNonTFUses(Operation* op, Value result, cast_op = b.create(op->getLoc(), old_type, result, /*truncate=*/b.getBoolAttr(false)); } - return mlir::Value(cast_op); + return Value(cast_op); }; - for (OpOperand& use : llvm::make_early_inc_range(result.getUses())) { + for (OpOperand& use : make_early_inc_range(result.getUses())) { if (use.getOwner()->getDialect() != tf_dialect && !IsSupportedNonTFOp(use.getOwner())) use.set(get_cast_op()); @@ -162,7 +161,7 @@ Optional GetShapeFromMlirType(Type t) { bool InferShapeForPassThroughOps(OperandRange pass_through_operands, Operation* op, Dialect* tf_dialect) { bool changed = false; - for (auto entry : llvm::zip(pass_through_operands, op->getResults())) { + for (auto entry : zip(pass_through_operands, op->getResults())) { Type operand_type = std::get<0>(entry).getType(); Value result = std::get<1>(entry); if (result.getType() == operand_type) continue; @@ -204,7 +203,7 @@ bool InferShapeForNonTFDialectOperation(Operation* op, Dialect* tf_dialect) { tf_dialect); } // TODO(b/155227679): Use OpInterface instead of hard-coding for TensorCastOp. - if (auto tensor_cast = dyn_cast(op)) { + if (auto tensor_cast = dyn_cast(op)) { return InferShapeForPassThroughOps( tensor_cast.getOperation()->getOperands(), op, tf_dialect); } @@ -254,7 +253,7 @@ GetSubtypes(Type type) { // match the i-th operand type). Returns true if anything is changed. bool PassThroughOperandTypes(OperandRange operands, ResultRange results) { bool changed = false; - for (auto entry : llvm::zip(operands, results)) { + for (auto entry : zip(operands, results)) { Type operand_type = std::get<0>(entry).getType(); Type result_type = std::get<1>(entry).getType(); if (operand_type == result_type) continue; @@ -291,14 +290,13 @@ bool InferShapeForCall(Operation* op) { CallInterfaceCallable callable = call_op.getCallableForCallee(); SymbolRefAttr sym = callable.dyn_cast(); if (!sym) return false; - FuncOp func = - dyn_cast(SymbolTable::lookupNearestSymbolFrom(op, sym)); + FuncOp func = dyn_cast(SymbolTable::lookupNearestSymbolFrom(op, sym)); if (!func) return false; bool changed = false; // Map each of the results of the call to the returned type of the // function. - for (auto result : llvm::zip(op->getResults(), func.getType().getResults())) { + for (auto result : zip(op->getResults(), func.getType().getResults())) { if (std::get<0>(result).getType() == std::get<1>(result)) continue; // Skip already statically shaped results. if (!CanBeRefined(std::get<0>(result).getType())) continue; @@ -323,8 +321,8 @@ bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, Operation* op = infer_ti.getOperation(); SmallVector inferred; LogicalResult res = infer_ti.inferReturnTypes( - op->getContext(), op->getLoc(), op->getOperands(), op->getAttrs(), - op->getRegions(), inferred); + op->getContext(), op->getLoc(), op->getOperands(), + op->getAttrDictionary(), op->getRegions(), inferred); if (failed(res)) { op->emitOpError("failed to refine type as inference failed"); return false; @@ -335,7 +333,7 @@ bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, // Map each of the results of the call to the returned type of the // function. bool changed = false; - for (auto result : llvm::zip(op->getResults(), inferred)) { + for (auto result : zip(op->getResults(), inferred)) { if (std::get<0>(result).getType() == std::get<1>(result)) continue; // Inserts a cast back to the original type if any user is not in the @@ -356,7 +354,7 @@ bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, // so for tf.Const -> tensor<10x20xf32>, [0,2,18] would point to a unique output // scalar value). struct ValuePort { - llvm::PointerUnion producer; + PointerUnion producer; SmallVector port; bool operator==(const ValuePort& other) const { @@ -374,39 +372,38 @@ struct ValuePort { port = {0}; } } - ValuePort(llvm::PointerUnion producer, + ValuePort(PointerUnion producer, SmallVector port) : producer(producer), port(port) {} - llvm::raw_ostream& print(llvm::raw_ostream& os) const { + raw_ostream& print(raw_ostream& os) const { if (auto* op = producer.dyn_cast()) os << "op " << op->getName(); if (auto ba = producer.dyn_cast()) os << "block_arg " << ba.getArgNumber(); - os << llvm::formatv(" [{0}]", llvm::make_range(port.begin(), port.end())); + os << formatv(" [{0}]", llvm::make_range(port.begin(), port.end())); return os; } }; struct ValuePortHasher { std::size_t operator()(const ValuePort& other) const { - return llvm::hash_combine( - llvm::hash_value(other.producer.getOpaqueValue()), - llvm::hash_value(ArrayRef(other.port))); + return hash_combine(llvm::hash_value(other.producer.getOpaqueValue()), + hash_value(ArrayRef(other.port))); } }; using ValuePortResultMap = std::unordered_map; -using ComputedQueryFn = llvm::function_ref; -using ValueQueryFn = llvm::function_ref; -using ValuePortInputs = llvm::SmallVectorImpl; +using ComputedQueryFn = function_ref; +using ValueQueryFn = function_ref; +using ValuePortInputs = SmallVectorImpl; -// TODO(jpienaar): InputsRequiredForOutput and ComputeOutputComponent are +// TODO(jpienaar): ComputeInputsRequiredForOutput and ComputeOutputComponent are // intended to be switched to op interfaces once more refined. -LogicalResult InputsRequiredForOutput(ValuePort value_port, - ComputedQueryFn has_been_computed, - ValuePortInputs* inputs) { +LogicalResult ComputeInputsRequiredForOutput(ValuePort value_port, + ComputedQueryFn has_been_computed, + ValuePortInputs* inputs) { auto op = value_port.producer.dyn_cast(); auto& port = value_port.port; if (!op) return failure(); @@ -460,26 +457,94 @@ Attribute ComputeOutputComponent(const ValuePort& value_port, return nullptr; } -ShapeHandle ComputeOutputAsShape(OpResult result, InferenceContext* ic) { +// Context used during ShapeInference. This class contains common information +// that is required by the individual shape inference helper functions (e.g., +// TF Graph version, constant values computed, etc.) +class ShapeInference { + public: + ShapeInference(int64_t graph_version, MLIRContext* context); + + LogicalResult ComputeInputsRequiredForOutput(ValuePort value_port, + ValuePortInputs* inputs) { + return ::mlir::TF::ComputeInputsRequiredForOutput( + value_port, + [this](const ValuePort& port) { + return results_.find(port) != results_.end(); + }, + inputs); + } + + Attribute ComputeOutputComponent(const ValuePort& value_port) { + return ::mlir::TF::ComputeOutputComponent( + value_port, [this](const ValuePort& port) { return results_[port]; }); + } + + // Returns ShapeHandle if the op result could be computed as shape. + ShapeHandle ComputeOutputAsShape(OpResult result, InferenceContext* ic); + + void RecordValue(const ValuePort& value_port, Attribute value) { + results_[value_port] = value; + } + + // Performs shape inference on the provided op and return true if the type of + // at least one result has been changed. + // A tf.Cast() is inserted for any uses that isn't in the TensorFlow dialect. + // `graph_version` indicates the current GraphDef compatibility versions + // (the versions field in graph.proto). + bool InferShapeForSingleOperation(Operation* op); + + // Infers shape on the provided region, including nested ones, iterate until + // fix point with a limit of max_iteration. Returns success if fix point is + // reached before max_iteration. + LogicalResult InferShapeUntilFixPoint(Region* region, + int64_t max_iteration = 10); + + // Updates input types and refine shapes inside body of functions that are + // attached to ControlFlow ops (If/While). These functions include Then/Else + // branches of IfOp and Cond/Body functions of WhileOp. These functions share + // following common properties: + // 1) They are never reused, ie. having a single use in module. + // 2) Their input types match those of their parent ops (excluding inputs + // like predicate). + // Returns a boolean indicating whether any change has been applied. + LogicalResult RefineShapeForControlFlowFunc(FuncOp func, + ArrayRef input_types, + int64_t max_iteration); + + // Propagate the shapes to the functions named. + LogicalResult PropagateShapeToFunctions( + ModuleOp module, Operation::operand_type_range input_types, + ArrayRef func_names, int64_t max_iteration); + + // Shape propagation for call/control flow ops. + LogicalResult PropagateShapeIntoAttachedFunctions(Operation* op, + int64_t max_iteration); + + private: + // Mapping between ValuePort (which corresponds to an OpResult or smaller, + // e.g., first element of OpResult produded) to an Attribute if the ValuePort + // corresponds to a constant value. + ValuePortResultMap results_; + int64_t graph_version_; + MLIRContext* context_; + Dialect* tf_dialect_; +}; + +ShapeInference::ShapeInference(int64_t graph_version, MLIRContext* context) + : graph_version_(graph_version) { + context_ = context; + tf_dialect_ = context->getRegisteredDialect(); +} + +ShapeHandle ShapeInference::ComputeOutputAsShape(OpResult result, + InferenceContext* ic) { LLVM_DEBUG(result.print(llvm::dbgs() << "\nEvaluate partially ")); auto rt = result.getType().dyn_cast(); if (!rt || !rt.hasStaticShape() || rt.getRank() != 1) return {}; int dim_size = rt.getDimSize(0); // Worklist to direct partial evaluation. - llvm::SmallVector worklist; - // The ValuePort evaluated results. - // TODO(jpienaar): This could be cached across invocations (e.g., part of some - // inference context). - ValuePortResultMap evaluated; - // Returns whether a ValuePort has been previously computed. - auto has_been_computed = [&evaluated](const ValuePort& port) { - return evaluated.find(port) != evaluated.end(); - }; - // Returns previously computed ValuePort value. - auto values = [&evaluated](const ValuePort& port) -> Attribute { - return evaluated[port]; - }; + SmallVector worklist; // Simple evaluator that attempts to partially evaluate the input value even // if unable to evaluate the complete output. Below follows a simple stack @@ -498,7 +563,7 @@ ShapeHandle ComputeOutputAsShape(OpResult result, InferenceContext* ic) { LLVM_DEBUG(front.print(llvm::errs() << "\nWorklist front ")); SmallVector inputs; - auto res = InputsRequiredForOutput(front, has_been_computed, &inputs); + auto res = ComputeInputsRequiredForOutput(front, &inputs); if (failed(res)) { // Abort if unable to find which required inputs need to be computed. worklist.clear(); @@ -513,16 +578,16 @@ ShapeHandle ComputeOutputAsShape(OpResult result, InferenceContext* ic) { continue; } - auto ret = ComputeOutputComponent(front, values); + auto ret = ComputeOutputComponent(front); if (!ret) continue; - evaluated[front] = ret; + RecordValue(front, ret); LLVM_DEBUG(ret.print(llvm::dbgs() << "\ncomputed result = ")); // If worklist is empty, then this is the root query op. if (worklist.empty()) { LLVM_DEBUG(llvm::dbgs() << "[root node]\n"); - if (auto dea = ret.dyn_cast()) { + if (auto dea = ret.dyn_cast()) { if (dea.getNumElements() != 1) { LLVM_DEBUG(llvm::errs() << "Unexpected number of elements\n"); return {}; @@ -536,9 +601,8 @@ ShapeHandle ComputeOutputAsShape(OpResult result, InferenceContext* ic) { return ic->MakeShape(dims); } -bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, - int64_t graph_version) { - assert(tf_dialect == op->getDialect()); +bool ShapeInference::InferShapeForSingleOperation(Operation* op) { + assert(tf_dialect_ == op->getDialect()); // The shape function of these ops sometimes does not propagate subtypes // (handle shapes) for resource and variant types. We use a simple passthrough // to make sure they are preserved in the output. @@ -550,7 +614,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, // If no result for this op needs shape inference, we have a fast-path return. // But if the type is a resource/variant, we do not skip it because we might // not have the handle shapes. - if (llvm::none_of(op->getResultTypes(), CanBeRefined)) { + if (none_of(op->getResultTypes(), CanBeRefined)) { LLVM_DEBUG(llvm::dbgs() << "Skipping inference for statically shaped op '" << op->getName() << "'.\n"); return false; @@ -565,8 +629,8 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, // This is necessary to avoid reprocessing the tf.Cast that are inserted at // the end of this function. if (isa(op) && - llvm::all_of(op->getResult(0).getUsers(), [&](Operation* user) { - return user->getDialect() != tf_dialect; + all_of(op->getResult(0).getUsers(), [&](Operation* user) { + return user->getDialect() != tf_dialect_; })) { LLVM_DEBUG(llvm::dbgs() << "Skipping inference for tf.Cast with no TF " "dialect operation users '" @@ -646,7 +710,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, // Perform the shape inference using an InferenceContext with the input // shapes. This object is abstracting the information that the ShapeInference // function operates on. - InferenceContext c(graph_version, *node_def, op_reg_data->op_def, + InferenceContext c(graph_version_, *node_def, op_reg_data->op_def, input_shapes, input_tensors, /*input_tensors_as_shapes=*/{}, handle_shapes_and_types); auto status = c.Run(op_reg_data->shape_inference_fn); @@ -659,7 +723,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, // Determine if, during shape computation, the shape functions attempted to // query an input operand as shape where the input was not known/constant. bool requires_inputs = - llvm::any_of(llvm::seq(0, c.num_inputs()), [&](int input) { + any_of(llvm::seq(0, c.num_inputs()), [&](int input) { return c.requested_input_tensor_as_partial_shape(input) && !input_tensors[input]; }); @@ -723,7 +787,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, new_element_type.isa()) { auto handle_shapes_types = c.output_handle_shapes_and_types(output); if (handle_shapes_types) { - llvm::SmallVector subtypes; + SmallVector subtypes; OpBuilder b(op); for (const auto& shape_n_type : *handle_shapes_types) { Type element_type; @@ -743,7 +807,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, if (result.getType() == new_type) continue; // Inserts a cast back to the original type if any user is not in the TF // dialect. - AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect, + AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect_, result.getType()); // Finally we inferred the shape and replace the type for this result. result.setType(new_type); @@ -755,23 +819,13 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, return changed; } -// Updates input types and refine shapes inside body of functions that are -// attached to ControlFlow ops (If/While). These functions include Then/Else -// branches of IfOp and Cond/Body functions of WhileOp. These functions share -// following common properties: -// 1) They are never reused, ie. having a single use in module. -// 2) Their input types match those of their parent ops (excluding inputs like -// predicate). -// Returns a boolean indicating whether any change has been applied. -LogicalResult RefineShapeForControlFlowFunc(FuncOp func, - llvm::ArrayRef input_types, - int64_t graph_version, - int64_t max_iteration) { +LogicalResult ShapeInference::RefineShapeForControlFlowFunc( + FuncOp func, ArrayRef input_types, int64_t max_iteration) { ModuleOp module = func.getParentOfType(); auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); int num_uses = std::distance(func_uses->begin(), func_uses->end()); if (num_uses != 1) { - func.emitWarning(llvm::formatv( + func.emitWarning(formatv( "expected control flow function {0} to have exactly 1 use, found {1}.", func.getName(), num_uses)); return failure(); @@ -785,8 +839,7 @@ LogicalResult RefineShapeForControlFlowFunc(FuncOp func, arg_and_idx.value().setType(input_types[arg_and_idx.index()]); } - auto res = - InferShapeUntilFixPoint(&func.getBody(), graph_version, max_iteration); + auto res = InferShapeUntilFixPoint(&func.getBody(), max_iteration); if (failed(res)) return res; auto new_return_types = InferShapeForFunctionReturnType(func); @@ -798,20 +851,18 @@ LogicalResult RefineShapeForControlFlowFunc(FuncOp func, return success(); } -LogicalResult PropagateShapeToFunctions( +LogicalResult ShapeInference::PropagateShapeToFunctions( ModuleOp module, Operation::operand_type_range input_types, - llvm::ArrayRef func_names, int64_t graph_version, - int64_t max_iteration) { - bool success = true; + ArrayRef func_names, int64_t max_iteration) { + bool all_succeeded = true; auto types = llvm::to_vector<4>(input_types); for (auto func_name : func_names) { FuncOp func = module.lookupSymbol(func_name); - if (failed(RefineShapeForControlFlowFunc(func, types, graph_version, - max_iteration))) { - success = false; - } + all_succeeded = + succeeded(RefineShapeForControlFlowFunc(func, types, max_iteration)) && + all_succeeded; } - return mlir::success(success); + return success(all_succeeded); } // If the callee has only one use, propagates any constant operand of call_op to @@ -831,7 +882,7 @@ void PropagateConstantToCallee(CallOpInterface call_op, // the constant inside the function. for (auto arg : func.getArguments()) { auto operand = op->getOperand(arg.getArgNumber()).getDefiningOp(); - if (llvm::isa_and_nonnull(operand)) { + if (isa_and_nonnull(operand)) { arg.replaceAllUsesWith(builder.clone(*operand)->getResult(0)); } } @@ -850,33 +901,31 @@ void PropagateConstantFromCallee(CallOpInterface call_op, for (auto retval : llvm::enumerate(func.front().getTerminator()->getOperands())) { auto retval_op = retval.value().getDefiningOp(); - if (llvm::isa_and_nonnull(retval_op)) { + if (isa_and_nonnull(retval_op)) { op->getResult(retval.index()) .replaceAllUsesWith(builder.clone(*retval_op)->getResult(0)); } } } -LogicalResult PropagateShapeIntoAttachedFunctions(Operation* op, - int64_t graph_version, - int64_t max_iteration) { +LogicalResult ShapeInference::PropagateShapeIntoAttachedFunctions( + Operation* op, int64_t max_iteration) { ModuleOp module = op->getParentOfType(); if (auto if_op = dyn_cast(op)) { return PropagateShapeToFunctions( - module, llvm::drop_begin(if_op.getOperandTypes(), 1), - {if_op.then_branch(), if_op.else_branch()}, graph_version, - max_iteration); + module, drop_begin(if_op.getOperandTypes(), 1), + {if_op.then_branch(), if_op.else_branch()}, max_iteration); } else if (auto while_op = dyn_cast(op)) { return PropagateShapeToFunctions(module, while_op.getOperandTypes(), {while_op.cond(), while_op.body()}, - graph_version, max_iteration); + max_iteration); } else if (auto call_op = dyn_cast(op)) { CallInterfaceCallable callable = call_op.getCallableForCallee(); if (SymbolRefAttr sym = callable.dyn_cast()) { PropagateConstantToCallee(call_op, sym, module); if (failed(PropagateShapeToFunctions( module, call_op.getArgOperands().getTypes(), - {sym.getRootReference()}, graph_version, max_iteration))) { + {sym.getRootReference()}, max_iteration))) { return failure(); } PropagateConstantFromCallee(call_op, sym, module); @@ -889,13 +938,10 @@ LogicalResult PropagateShapeIntoAttachedFunctions(Operation* op, return success(); } -LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version, - int64_t max_iteration) { - MLIRContext* ctx = region->getContext(); - Dialect* tf_dialect = ctx->getRegisteredDialect(); - - // An operation folder that is used to attempt folding before inference. - OperationFolder folder(ctx); +LogicalResult ShapeInference::InferShapeUntilFixPoint(Region* region, + int64_t max_iteration) { + // An operation folder that is used to attempt folding before inference._ + OperationFolder folder(context_); bool changed = true; // TODO(aminim): we could have a more efficient traversal by guiding the @@ -908,14 +954,14 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version, << "Shape inference, iteration " << iteration << "\n"); region->walk([&](Operation* op) { if (auto infer_ti = dyn_cast(op)) { - changed |= RefineWithInferTypeOpInterface(infer_ti, tf_dialect); + changed |= RefineWithInferTypeOpInterface(infer_ti, tf_dialect_); // TODO(jpienaar): Debug why we can't just return here. We end up with // additional constant due to the propagation of constant into attached // function if we return already. } - if (op->getDialect() != tf_dialect) { - changed |= InferShapeForNonTFDialectOperation(op, tf_dialect); + if (op->getDialect() != tf_dialect_) { + changed |= InferShapeForNonTFDialectOperation(op, tf_dialect_); return; } @@ -924,13 +970,12 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version, // Best-effort shape inference in attached functions. Do not return // failure even if it doesn't get to fixed point. - if (failed(PropagateShapeIntoAttachedFunctions(op, graph_version, - max_iteration))) { + if (failed(PropagateShapeIntoAttachedFunctions(op, max_iteration))) { op->emitWarning() << "unable to refine shape of attached function " "arguments and bodies"; } - changed |= InferShapeForSingleOperation(op, tf_dialect, graph_version); + changed |= InferShapeForSingleOperation(op); }); } @@ -945,31 +990,43 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version, LogicalResult InferShapeForFunction(FuncOp func, ArrayRef> arg_shapes, int64_t graph_version) { - mlir::FunctionType func_type = func.getType(); + ShapeInference context(graph_version, func.getContext()); + if (arg_shapes.empty()) { + if (failed(context.InferShapeUntilFixPoint(&func.getBody()))) + return failure(); + // TODO(b/156276510): Verify that it is always fine to refine a function's + // return type, as long as we do not change the argument shapes. + if (auto return_types = InferShapeForFunctionReturnType(func)) { + func.setType(FunctionType::get(func.getType().getInputs(), + return_types.getValue(), + func.getContext())); + } + + return success(); + } + FunctionType func_type = func.getType(); bool needs_refinement = false; - llvm::SmallVector new_arg_types; + SmallVector new_arg_types; new_arg_types.reserve(func_type.getNumInputs()); // Update argument types in-place using the provided arg_shapes. for (size_t i = 0; i < func_type.getNumInputs(); ++i) { ArrayRef shape = arg_shapes[i]; - mlir::Type element_type; - if (auto input_ty = - func_type.getInput(i).dyn_cast()) { + Type element_type; + if (auto input_ty = func_type.getInput(i).dyn_cast()) { if (!input_ty || input_ty.getShape().size() != shape.size()) { return failure(); } element_type = input_ty.getElementType(); } else { - auto unranked_input_ty = - func_type.getInput(i).dyn_cast(); + auto unranked_input_ty = func_type.getInput(i).dyn_cast(); if (!unranked_input_ty) { return failure(); } element_type = unranked_input_ty.getElementType(); } - auto new_arg_type = mlir::RankedTensorType::get(shape, element_type); + auto new_arg_type = RankedTensorType::get(shape, element_type); if (new_arg_type != func_type.getInput(i)) { // If the new type is more detailed, trigger shape inference. func.getArgument(i).setType(new_arg_type); @@ -982,28 +1039,17 @@ LogicalResult InferShapeForFunction(FuncOp func, return success(); } - mlir::LogicalResult result = - mlir::TF::InferShapeUntilFixPoint(&func.getBody(), graph_version); + LogicalResult result = context.InferShapeUntilFixPoint(&func.getBody()); if (failed(result)) { return failure(); } auto return_types = InferShapeForFunctionReturnType(func); - func.setType(mlir::FunctionType::get(new_arg_types, - return_types.hasValue() - ? return_types.getValue() - : func.getType().getResults(), - func.getContext())); - - return success(); -} - -LogicalResult InferShapeForFunctionType(FuncOp func) { - if (auto return_types = InferShapeForFunctionReturnType(func)) { - func.setType(mlir::FunctionType::get(func.getType().getInputs(), - return_types.getValue(), - func.getContext())); - } + func.setType(FunctionType::get(new_arg_types, + return_types.hasValue() + ? return_types.getValue() + : func.getType().getResults(), + func.getContext())); return success(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h index 0524ec678ed..e36d8d56d6d 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h @@ -27,30 +27,13 @@ namespace mlir { namespace TF { -// Performs shape inference on the provided op and return true if the type of -// at least one result has been changed. -// A tf.Cast() is inserted for any uses that isn't in the TensorFlow dialect. -// `graph_version` indicates the current GraphDef compatibility versions -// (the versions field in graph.proto). -bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, - int64_t graph_version); - -// Infers shape on the provided region, including nested ones, iterate until fix -// point with a limit of max_iteration. Returns success if fix point is reached -// before max_iteration. -LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version, - int64_t max_iteration = 10); - // Given a list of refined shapes matching the function arguments of func, runs // shape inference over the function to propagate this updated information. +// If arg_shapes are empty, then argument shapes will be left unchanged. LogicalResult InferShapeForFunction(FuncOp func, ArrayRef> arg_shapes, int64_t graph_version); -// Refines the return type of the given function by folding tf.Cast that -// precedes the return instruction. -LogicalResult InferShapeForFunctionType(FuncOp func); - } // namespace TF } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc index 48e4e77ce0f..acdfc0eb039 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc @@ -58,10 +58,8 @@ struct ShapeInference } int64_t producer = producer_or.ValueOrDie(); for (auto func : module.getOps()) { - InferShapeUntilFixPoint(&func.getBody(), producer); - // TODO(yuanzx): Verify that it is always fine to refine a function's - // return type, as long as we do not change the argument shapes. - InferShapeForFunctionType(func); + if (failed(InferShapeForFunction(func, /*arg_shapes=*/{}, producer))) + return signalPassFailure(); } } }; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc index 141feeb6b24..b9e214470cd 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc @@ -14,11 +14,23 @@ limitations under the License. ==============================================================================*/ #include +#include +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Block.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Transforms/RegionUtils.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" +#include "tensorflow/compiler/mlir/tensorflow/utils/device_util.h" namespace mlir { namespace TFTPU { @@ -30,30 +42,182 @@ namespace { constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation"; -struct TPUExtractHeadTailOutsideCompilation - : public PassWrapper { - void runOnFunction() override; -}; +bool HasOutsideCompilationAttribute(Operation* op) { + return op->getAttrOfType(kXlaOutsideCompilationAttr) != nullptr; +} -void TPUExtractHeadTailOutsideCompilation::runOnFunction() { - getFunction().walk([&](tf_device::LaunchOp launch) { - Block& launch_block = launch.GetBody(); - for (auto& op : llvm::make_early_inc_range(launch_block.getOperations())) { - // TODO(b/155115766): Handle outputs that should be inputs to TPU - // LaunchOp. - if (auto attr = - op.getAttrOfType(kXlaOutsideCompilationAttr)) { - op.moveBefore(launch); - } else { +// Returns whether all operands of `op` are from values inside the +// `input_value_set`. +bool OpContainsOperandsFromSet(Operation* op, + const llvm::SetVector& input_value_set) { + for (auto operand : op->getOperands()) + if (input_value_set.count(operand) == 0) return false; + + return true; +} + +void RecordOutsideCompiledOpsAndUsages( + Operation* op, llvm::SmallSetVector* outside_compiled_ops, + llvm::SetVector* outside_compiled_op_usages) { + if (HasOutsideCompilationAttribute(op) && + OpContainsOperandsFromSet(op, *outside_compiled_op_usages)) { + outside_compiled_ops->insert(op); + outside_compiled_op_usages->insert(op->getResults().begin(), + op->getResults().end()); + } +} + +// Traverses the MLIR graph and returns a set of ops that +// are connected to inputs of TPU computation and outside compiled. +void ExtractOutsideCompiledOpsConnectedToHead( + Value input_value, llvm::SetVector* values_used_in_host_cluster, + llvm::SmallSetVector* outside_compiled_ops) { + llvm::SmallSetVector parent_outside_compiled_ops_at_head; + for (auto& usage : input_value.getUses()) { + auto head_operation = usage.getOwner(); + RecordOutsideCompiledOpsAndUsages(head_operation, + &parent_outside_compiled_ops_at_head, + values_used_in_host_cluster); + } + + // Traverse the graph and find all outside compiled ops connected from + // the `input_value`. + while (!parent_outside_compiled_ops_at_head.empty()) { + llvm::SmallSetVector connected_outside_compiled_ops; + for (auto head_outside_compiled_op : parent_outside_compiled_ops_at_head) { + auto op_results = head_outside_compiled_op->getOpResults(); + for (auto op_result : op_results) { + for (auto& use : op_result.getUses()) { + auto connected_op = use.getOwner(); + RecordOutsideCompiledOpsAndUsages(connected_op, + &connected_outside_compiled_ops, + values_used_in_host_cluster); + } + } + } + + outside_compiled_ops->insert(parent_outside_compiled_ops_at_head.begin(), + parent_outside_compiled_ops_at_head.end()); + std::swap(parent_outside_compiled_ops_at_head, + connected_outside_compiled_ops); + } +} + +// TODO(hongjunchoi): Also handle ops without inputs that are outside +// compiled. +// +// Returns set of ops that are outside compiled and are directly connected +// to inputs to the TPU computation. +llvm::SmallSetVector IdentifyOutsideCompiledOpsAtHead( + tf_device::ClusterOp tpu_cluster) { + llvm::SmallSetVector outside_compiled_at_head_ops; + llvm::SetVector values_used_in_cluster; + auto& cluster_region = tpu_cluster.body(); + getUsedValuesDefinedAbove(cluster_region, cluster_region, + values_used_in_cluster); + + auto input_value_list = llvm::to_vector<8>(values_used_in_cluster); + for (auto input_value : input_value_list) + ExtractOutsideCompiledOpsConnectedToHead( + input_value, &values_used_in_cluster, &outside_compiled_at_head_ops); + return outside_compiled_at_head_ops; +} + +// Returns output values of extracted outside compiled cluster at head that +// are used by the TPU computation. +llvm::SmallVector GetHeadExtractedClusterOutputs( + const llvm::SmallSetVector& head_outside_compiled_ops) { + llvm::SmallVector outputs; + outputs.reserve(head_outside_compiled_ops.size()); + + for (auto op : head_outside_compiled_ops) { + for (Operation* user : op->getUsers()) { + if (!head_outside_compiled_ops.count(user)) { + outputs.append(op->result_begin(), op->result_end()); break; } } + } + + return outputs; +} + +// Creates new tf_device.launch op with outside compiled ops extracted +// from the head of TPU computation. +llvm::Optional IsolateHeadExtractedOpsToLaunchOp( + OpBuilder* builder, tf_device::ClusterOp cluster, + const llvm::SmallSetVector& head_outside_compiled_ops) { + if (head_outside_compiled_ops.empty()) + return llvm::Optional(); + + // Create tf_device.launch op to separate all extracted outside compiled ops + // before the tf_device.cluster. + auto output_values = + GetHeadExtractedClusterOutputs(head_outside_compiled_ops); + + llvm::SmallVector output_return_types; + output_return_types.reserve(output_values.size()); + for (auto output : output_values) + output_return_types.emplace_back(output.getType()); + + builder->setInsertionPoint(cluster); + auto host_launch_op = builder->create( + cluster.getLoc(), builder->getStringAttr(""), output_return_types); + + // Replace all usages of outside compiled ops that are used in TPU + // computation with the results of the above created launch op. + for (auto output_and_index : llvm::enumerate(output_values)) { + auto output_index = output_and_index.index(); + auto output = output_and_index.value(); + for (auto& use : output.getUses()) { + if (!head_outside_compiled_ops.count(use.getOwner())) + use.set(host_launch_op.getResult(output_index)); + } + } + + // Create terminator op for the newly created launch op. + host_launch_op.body().push_back(new Block()); + builder->setInsertionPointToEnd(&host_launch_op.GetBody()); + auto terminator = builder->create( + host_launch_op.getLoc(), output_values); + + // Move all outside compile ops from cluster op to launch op. + for (auto outside_compiled_op : head_outside_compiled_ops) + outside_compiled_op->moveBefore(terminator); + + return host_launch_op; +} + +struct TPUExtractHeadTailOutsideCompilation + : public PassWrapper> { + void runOnOperation() override; +}; + +void TPUExtractHeadTailOutsideCompilation::runOnOperation() { + // Get runtime devices information from the closest parent module. + auto module = getOperation(); + mlir::TF::RuntimeDevices devices; + if (failed(tensorflow::GetDevicesFromOp(module, &devices))) + return signalPassFailure(); + + OpBuilder builder(&getContext()); + module.walk([&](tf_device::ClusterOp cluster) { + auto head_outside_compiled_ops = IdentifyOutsideCompiledOpsAtHead(cluster); + IsolateHeadExtractedOpsToLaunchOp(&builder, cluster, + head_outside_compiled_ops); + + // TODO(b/156030523): Update device attribute of newly created host launch + // op as well as enclosing Replicate op (if TPU computation is replicated) + // with host device names. + + // TODO(b/155115766): Implement tail outside compiled op extraction. }); } } // anonymous namespace -std::unique_ptr> +std::unique_ptr> CreateTPUExtractHeadTailOutsideCompilationPass() { return std::make_unique(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc index 4e20cd9d64b..4281b85bd7f 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc @@ -34,7 +34,7 @@ constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation"; constexpr char kDeviceAttr[] = "device"; // Mapping for `_xla_outside_compilation` attribute to ops of a cluster. -using ClusterMap = +using OutsideClusterMap = llvm::SmallDenseMap, 8>; // This pass extracts a CPU computation cluster with `_xla_outside_compilation` @@ -51,7 +51,8 @@ struct TPUExtractOutsideCompilation // Collects and clusters ops in `block` with the same `_xla_outside_compilation` // attribute into `clusters` This returns an error if a // `_xla_outside_compilation` attribute of an op is empty. -LogicalResult CollectAndGroupClusterOps(Block* block, ClusterMap* clusters) { +LogicalResult CollectAndGroupOutsideClusterOps(Block* block, + OutsideClusterMap* clusters) { for (Operation& op : *block) { if (auto attr = op.getAttrOfType(kXlaOutsideCompilationAttr)) { if (attr.getValue().empty()) @@ -67,7 +68,7 @@ LogicalResult CollectAndGroupClusterOps(Block* block, ClusterMap* clusters) { } // Moves `cluster_ops` to associated `launch_op` body. -void MoveClusterOpsToLaunchOp( +void MoveOutsideClusterOpsToLaunchOp( tf_device::LaunchOp launch_op, const llvm::SmallVector& cluster_ops) { MLIRContext* context = launch_op.getContext(); @@ -84,8 +85,8 @@ void MoveClusterOpsToLaunchOp( } // Creates a `tf_device::LaunchOp` to wrap cluster ops. -tf_device::LaunchOp CreateLaunchOpForCluster(OpBuilder* builder, - Operation* last_cluster_op) { +tf_device::LaunchOp CreateLaunchOpForOutsideCluster( + OpBuilder* builder, Operation* last_cluster_op) { // TODO(b/154363171): Set the CPU device. // An empty string placeholder is used for the device as that will be later // populated with the device of the associated TPUReplicateMetadata op. @@ -117,14 +118,14 @@ void PropagateParallelExecuteReturnToReplicate( // Creates a `parallel_execute` op in place of launch with 'clusters` and // 'launch` as regions. -void CreateParallelExecuteFromClusters(tf_device::LaunchOp launch, - const ClusterMap& clusters) { - OpBuilder builder(launch); +void CreateParallelExecuteFromOutsideClusters( + tf_device::ClusterOp tpu_cluster, const OutsideClusterMap& clusters) { + OpBuilder builder(tpu_cluster); // Create parallel_execute regions. The original TPU cluster computation // is the extra region. int num_regions = 1 + clusters.size(); auto parallel_execute_op = builder.create( - launch.getLoc(), num_regions, launch.results().getTypes()); + tpu_cluster.getLoc(), num_regions, tpu_cluster.results().getTypes()); // Move outside compilation clusters to parallel_execute regions. for (const auto& cluster : llvm::enumerate(clusters)) { @@ -134,21 +135,23 @@ void CreateParallelExecuteFromClusters(tf_device::LaunchOp launch, parallel_execute_op.GetRegionBlockWithIndex(cluster.index()); builder.setInsertionPointToEnd(&outside_block); tf_device::LaunchOp launch_op = - CreateLaunchOpForCluster(&builder, cluster_ops.back()); - MoveClusterOpsToLaunchOp(launch_op, cluster_ops); + CreateLaunchOpForOutsideCluster(&builder, cluster_ops.back()); + MoveOutsideClusterOpsToLaunchOp(launch_op, cluster_ops); builder.setInsertionPointToEnd(&outside_block); // TODO(b/154363171): Handle returns from OutsideCompiled parallel_execute // regions either through communication with TPU parallel_execute regions // or modifying parallel_execute returns. - builder.create(launch.getLoc(), ArrayRef{}); + builder.create(tpu_cluster.getLoc(), + ArrayRef{}); } // Move the launch body to last parallel_execute block. Block& inside_block = parallel_execute_op.GetRegionBlockWithIndex(num_regions - 1); builder.setInsertionPointToEnd(&inside_block); - builder.create(launch.getLoc(), launch.getResults()); - launch.getOperation()->moveBefore(inside_block.getTerminator()); + builder.create(tpu_cluster.getLoc(), + tpu_cluster.getResults()); + tpu_cluster.getOperation()->moveBefore(inside_block.getTerminator()); PropagateParallelExecuteReturnToReplicate(parallel_execute_op); // TODO(b/154363171): Handle returns from OutsideCompiled parallel_execute @@ -157,17 +160,19 @@ void CreateParallelExecuteFromClusters(tf_device::LaunchOp launch, } void TPUExtractOutsideCompilation::runOnFunction() { - auto extract_result = getFunction().walk([&](tf_device::LaunchOp launch) { - ClusterMap clusters; - if (failed(CollectAndGroupClusterOps(&launch.GetBody(), &clusters))) - return WalkResult::interrupt(); + auto extract_result = + getFunction().walk([&](tf_device::ClusterOp tpu_cluster) { + OutsideClusterMap clusters; + if (failed(CollectAndGroupOutsideClusterOps(&tpu_cluster.GetBody(), + &clusters))) + return WalkResult::interrupt(); - if (clusters.empty()) return WalkResult::advance(); + if (clusters.empty()) return WalkResult::advance(); - CreateParallelExecuteFromClusters(launch, clusters); + CreateParallelExecuteFromOutsideClusters(tpu_cluster, clusters); - return WalkResult::advance(); - }); + return WalkResult::advance(); + }); if (extract_result.wasInterrupted()) return signalPassFailure(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc index 98ff0de7645..f5e9da915c8 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc @@ -92,7 +92,7 @@ constexpr char kBadArrayAttrLengthMsg[] = // // Would become following ops (unimportant attributes, types are omitted): // %1 = "tf.Shape"(%0) -// %2:2 = "tf.MLIRCompileToTPU"(%1) {module = ""} +// %2:2 = "tf._TPUCompileMlir"(%1) {module = ""} // "tf.TPUCompileSucceededAssert"(%2#0) // %3 = "tf.TPUExecute"(%0, %2#1) // %4 = "tf.SomeOp"(%3) @@ -448,19 +448,20 @@ Operation* BuildCompileOp( // core, and all replica devices per core are grouped together. void AssignDevicesToReplicate( tf_device::ReplicateOp replicate, - llvm::ArrayRef> execution_devices, + llvm::ArrayRef> + tpu_devices, OpBuilder* builder) { if (!replicate) return; - const int num_replicas = execution_devices.size(); - const int num_cores_per_replica = execution_devices.front().size(); + const int num_replicas = tpu_devices.size(); + const int num_cores_per_replica = tpu_devices.front().size(); llvm::SmallVector device_attrs; for (int core = 0; core < num_cores_per_replica; ++core) { llvm::SmallVector devices_by_core; devices_by_core.reserve(num_replicas); for (int replica = 0; replica < num_replicas; ++replica) - devices_by_core.push_back(execution_devices[replica][core]); + devices_by_core.push_back(tpu_devices[replica][core].device); device_attrs.push_back( builder->getNamedAttr(tensorflow::GetDeviceAliasForLogicalCore(core), @@ -492,11 +493,12 @@ LogicalResult BuildExecuteOp( // Creates a tf_device.parallel_execute op that wraps TPUExecute op to // represent execution of TPU program in multiple logical cores. LogicalResult BuildParallelExecuteOp( - llvm::ArrayRef> execution_devices, + llvm::ArrayRef> + tpu_devices, llvm::ArrayRef output_sharding_config, Operation* compile_op, tf_device::ClusterFuncOp cluster_func, OpBuilder* builder, tf_device::ParallelExecuteOp* parallel_execute_op) { - const int num_cores_per_replica = execution_devices.front().size(); + const int num_cores_per_replica = tpu_devices.front().size(); // parallel_execute op returns concatenated list of return values of // all its regions. // @@ -528,7 +530,7 @@ LogicalResult BuildParallelExecuteOp( num_cores_per_replica, cluster_func, builder, &input_list); if (failed(result)) return failure(); - const bool replicated = execution_devices.size() != 1; + const bool replicated = tpu_devices.size() != 1; // For each logical core, create a region with TPUExecute op. assert(input_list.size() == num_cores_per_replica); for (int core = 0; core < num_cores_per_replica; ++core) { @@ -553,7 +555,7 @@ LogicalResult BuildParallelExecuteOp( // op. std::string device = replicated ? tensorflow::GetDeviceAliasForLogicalCore(core) - : execution_devices.front()[core]; + : tpu_devices.front()[core].device; auto region_launch_op = WrapOpInLaunch(builder, region.getParent()->getLoc(), execute, device); @@ -566,13 +568,14 @@ LogicalResult BuildParallelExecuteOp( } tf_device::LaunchOp AssignDevicesToReplicatedExecute( - llvm::ArrayRef> execution_devices, + llvm::ArrayRef> + tpu_devices, Operation* execute_op, OpBuilder* builder) { - const bool replicated = execution_devices.size() != 1; + const bool replicated = tpu_devices.size() != 1; // If computation is replicated, use aliased device. Otherwise there is only // one execution device and the device is assigned to the execute op. std::string device = replicated ? tensorflow::GetDeviceAliasForLogicalCore(0) - : execution_devices.front().front(); + : tpu_devices.front().front().device; return WrapOpInLaunch(builder, execute_op->getLoc(), execute_op, device); } @@ -687,6 +690,16 @@ LogicalResult Rewrite( // Create compile op. auto& tpu_device_assignment = status_or_tpu_device_assignment.ValueOrDie(); builder->setInsertionPoint(cluster_func); + + // Create the TPUCompileMlir and TPUCompileSucceededAssert outside of + // parallel_execute region if it exists. + if (llvm::isa(cluster_func.getParentOp())) { + // Currently, outside compilation and model parallelism are not supported + // together. + assert(num_cores_per_replica == 1); + builder->setInsertionPoint(cluster_func.getParentOp()); + } + Operation* compile_op = BuildCompileOp( cluster_func, num_replicas, num_cores_per_replica, tpu_device_assignment.compilation_device, @@ -704,7 +717,7 @@ LogicalResult Rewrite( BuildTPUCompileSucceededAssertOp( compile_op, tpu_device_assignment.compilation_device, builder); - AssignDevicesToReplicate(replicate, tpu_device_assignment.execution_devices, + AssignDevicesToReplicate(replicate, tpu_device_assignment.tpu_devices, builder); llvm::SmallVector output_shardings; @@ -712,12 +725,13 @@ LogicalResult Rewrite( num_cores_per_replica, cluster_func, &output_shardings); if (failed(result)) return failure(); + builder->setInsertionPoint(cluster_func); if (num_cores_per_replica > 1) { // For model parallelism, tf_device.parallel_execute is used to express // concurrent device execution across multiple logical devices. tf_device::ParallelExecuteOp execute_op; - result = BuildParallelExecuteOp(tpu_device_assignment.execution_devices, + result = BuildParallelExecuteOp(tpu_device_assignment.tpu_devices, output_shardings, compile_op, cluster_func, builder, &execute_op); if (failed(result)) return failure(); @@ -740,7 +754,7 @@ LogicalResult Rewrite( if (failed(result)) return failure(); tf_device::LaunchOp launch_op = AssignDevicesToReplicatedExecute( - tpu_device_assignment.execution_devices, execute_op, builder); + tpu_device_assignment.tpu_devices, execute_op, builder); cluster_func.replaceAllUsesWith(launch_op); } diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index 49be3da912a..a613ce1f920 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -40,6 +40,7 @@ limitations under the License. #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" @@ -57,6 +58,7 @@ limitations under the License. #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/Types.h" // from @llvm-project #include "mlir/IR/Verifier.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project #include "tensorflow/compiler/jit/shape_inference_helpers.h" #include "tensorflow/compiler/mlir/op_or_arg_name_mapper.h" #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h" @@ -65,6 +67,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h" #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h" #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h" @@ -109,6 +112,7 @@ static inline absl::string_view StringRefToView(llvm::StringRef ref) { } namespace tensorflow { +using mlir::NamedAttrList; using mlir::TensorType; using mlir::TF::VarHandleOp; using mlir::tf_saved_model::GlobalTensorOp; @@ -306,9 +310,9 @@ class ImporterBase { // AttrValue {name : foo, attrs : {k1 : bar, k2 : rfc}}, it will convert it to // a list of MLIR Attributes: [{base_name : foo}, {base_name.k1 : bar}, // {base_name.k2 : rfc}}. - Status ConvertFunctionCallAttribute( - const std::string& base_name, const AttrValue& value, - llvm::SmallVector* attributes); + Status ConvertFunctionCallAttribute(const std::string& base_name, + const AttrValue& value, + NamedAttrList* attributes); // Helper to create either a tf_executor operation or a TF operation wrapped // in an island. When convert_to_legacy_call is true, converts the operation @@ -1089,9 +1093,9 @@ StatusOr ImporterBase::ConvertSubtypes( return subtypes; } -Status ImporterBase::ConvertFunctionCallAttribute( - const std::string& base_name, const AttrValue& value, - llvm::SmallVector* attributes) { +Status ImporterBase::ConvertFunctionCallAttribute(const std::string& base_name, + const AttrValue& value, + NamedAttrList* attributes) { TF_ASSIGN_OR_RETURN(auto func_attr, ConvertFunctionCallName(value.func().name())); attributes->push_back(builder_.getNamedAttr(base_name, func_attr)); @@ -2428,8 +2432,8 @@ class SavedModelObjectGraphImporter : public ImporterBase { // Main entry point: converts all functions in the given meta graph to an MLIR // Module. static StatusOr Convert( - SavedModelV2Bundle* saved_model, mlir::MLIRContext* context, - absl::Span exported_names, bool add_default_attributes); + SavedModelV2Bundle* saved_model, absl::Span exported_names, + mlir::MLIRContext* context, bool add_default_attributes); private: explicit SavedModelObjectGraphImporter( @@ -3129,8 +3133,8 @@ Status CreateSavedModelIR( } StatusOr SavedModelObjectGraphImporter::Convert( - SavedModelV2Bundle* saved_model, mlir::MLIRContext* context, - absl::Span exported_names, bool add_default_attributes) { + SavedModelV2Bundle* saved_model, absl::Span exported_names, + mlir::MLIRContext* context, bool add_default_attributes) { GraphDebugInfo dummy_debug_info; const GraphDebugInfo& debug_info = saved_model->debug_info() ? *saved_model->debug_info() : dummy_debug_info; @@ -3207,17 +3211,20 @@ class SavedModelSignatureDefImporter { public: // Main entry point: converts all functions (specified by SignatureDefs) in // the given meta graph to an MLIR Module. - static StatusOr Convert(const SavedModelBundle& bundle, - mlir::MLIRContext* context) { - SavedModelSignatureDefImporter importer(bundle, context); + static StatusOr Convert( + const SavedModelBundle& bundle, absl::Span exported_names, + mlir::MLIRContext* context) { + SavedModelSignatureDefImporter importer(bundle, exported_names, context); return importer.ConvertSignatures(); } private: SavedModelSignatureDefImporter(const SavedModelBundle& bundle, + absl::Span exported_names, mlir::MLIRContext* context) : bundle_(bundle), + exported_names_(exported_names), module_(mlir::ModuleOp::create(mlir::UnknownLoc::get(context))) {} // Converts the SavedModel to the SavedModel dialect. Creates an MLIR function @@ -3250,6 +3257,7 @@ class SavedModelSignatureDefImporter { const std::vector>& inputs); const SavedModelBundle& bundle_; + absl::Span exported_names_; mlir::OwningModuleRef module_; }; @@ -3265,6 +3273,9 @@ SavedModelSignatureDefImporter::ConvertSignatures() { GraphDebugInfo debug_info; if (bundle_.debug_info != nullptr) debug_info = *bundle_.debug_info; + llvm::StringSet<> exported_name_set; + exported_name_set.insert(exported_names_.begin(), exported_names_.end()); + for (const auto& key_and_signature_def : signatures) { const std::string& sig_def_key = key_and_signature_def.first; const SignatureDef& signature_def = key_and_signature_def.second; @@ -3274,6 +3285,10 @@ SavedModelSignatureDefImporter::ConvertSignatures() { if (sig_def_key == "__saved_model_init_op") { continue; } + if (!exported_name_set.empty() && + exported_name_set.count(sig_def_key) == 0) { + continue; + } TF_RETURN_IF_ERROR(ConvertSignature(graphdef, sig_def_key, signature_def, debug_info, flib_def)); @@ -3556,12 +3571,14 @@ StatusOr ConvertSavedModelToMlir( SavedModelV2Bundle* saved_model, mlir::MLIRContext* context, absl::Span exported_names, bool add_default_attributes) { return SavedModelObjectGraphImporter::Convert( - saved_model, context, exported_names, add_default_attributes); + saved_model, exported_names, context, add_default_attributes); } StatusOr ConvertSavedModelV1ToMlir( - const SavedModelBundle& saved_model, mlir::MLIRContext* context) { - return SavedModelSignatureDefImporter::Convert(saved_model, context); + const SavedModelBundle& saved_model, absl::Span exported_names, + mlir::MLIRContext* context) { + return SavedModelSignatureDefImporter::Convert(saved_model, exported_names, + context); } std::string MlirModuleToString(mlir::ModuleOp module, bool show_debug_info) { diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.h b/tensorflow/compiler/mlir/tensorflow/translate/import_model.h index 8603eadb487..bdb72345201 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.h +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.h @@ -55,6 +55,7 @@ stream_executor::port::StatusOr ConvertSavedModelToMlir( // expressed with tf_executor dialect. stream_executor::port::StatusOr ConvertSavedModelV1ToMlir(const SavedModelBundle& saved_model, + absl::Span exported_names, mlir::MLIRContext* context); // Serialize a MLIR module to a string. diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc index 2c7f84d8268..6ada0fec4e2 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc @@ -141,7 +141,8 @@ mlir::OwningModuleRef SavedModelObjectGraphToMlirImport( mlir::OwningModuleRef SavedModelSignatureDefsToMlirImport( absl::string_view saved_model_dir, - const std::unordered_set& tags, mlir::MLIRContext* context) { + const std::unordered_set& tags, + absl::Span exported_names, mlir::MLIRContext* context) { tensorflow::SavedModelBundle bundle; tensorflow::SessionOptions session_options; // Force saved model states to be restored to CPU. @@ -155,7 +156,7 @@ mlir::OwningModuleRef SavedModelSignatureDefsToMlirImport( return nullptr; } - auto module_or = ConvertSavedModelV1ToMlir(bundle, context); + auto module_or = ConvertSavedModelV1ToMlir(bundle, exported_names, context); if (!module_or.status().ok()) { LOG(ERROR) << "SavedModel V1 import failed: " << module_or.status(); return nullptr; diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h index f498864c8aa..490b7c7d8f0 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h +++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h @@ -64,7 +64,8 @@ mlir::OwningModuleRef SavedModelObjectGraphToMlirImport( // given MLIR `context`. mlir::OwningModuleRef SavedModelSignatureDefsToMlirImport( absl::string_view saved_model_dir, - const std::unordered_set& tags, mlir::MLIRContext* context); + const std::unordered_set& tags, + absl::Span exported_names, mlir::MLIRContext* context); } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index 2374687c920..e8ca691f961 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -293,6 +293,12 @@ Status ConvertMLIRToXlaComputation( tf2xla.addPass(mlir::xla_hlo::createLegalizeTfWithTf2XlaPass(device_type)); tf2xla.addNestedPass(mlir::createCanonicalizerPass()); + // Run shape inference pass to propagate shapes through tensor_cast operations + // from static to dynamic shapes. This could be generated if the shape + // inference was originally missing in a TF op but the corresponding HLO op + // had static shape after lowering. + tf2xla.addPass(mlir::TF::CreateTFShapeInferencePass()); + // Run LegalizeTFPass again because the previous legalization passes can // expose more graph pruning and canonicalization opportunities that are // necessary for the second LegalizeTFPass(allow_partial_conversion=false) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc index fcfef565952..b28f26b6c3c 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc @@ -31,12 +31,14 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h" #include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h" +#include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/bfloat16/bfloat16.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/tstring.h" #include "tensorflow/stream_executor/lib/statusor.h" @@ -131,13 +133,21 @@ StatusOr ConvertTensor(const Tensor& input_tensor, case DTYPE: \ return ConvertFlatTensor(input_tensor, type); - // TODO(fengliuai): customize the conversions for more types. + // TODO(fengliuai): customize the conversions for quantized and string types. switch (input_dtype) { CONVERT_FLAT(DT_BOOL, bool) CONVERT_FLAT(DT_FLOAT, float) CONVERT_FLAT(DT_DOUBLE, double) + CONVERT_FLAT(DT_INT8, int8) + CONVERT_FLAT(DT_INT16, int16) CONVERT_FLAT(DT_INT32, int32) CONVERT_FLAT(DT_INT64, int64) + CONVERT_FLAT(DT_UINT8, uint8) + CONVERT_FLAT(DT_UINT16, uint16) + CONVERT_FLAT(DT_UINT32, uint32) + CONVERT_FLAT(DT_UINT64, uint64) + CONVERT_FLAT(DT_COMPLEX64, std::complex) + CONVERT_FLAT(DT_COMPLEX128, std::complex) // BFLOAT16 is a special case that it needs to be cast to double type to // match its storage type. @@ -207,12 +217,20 @@ mlir::TF::ShapeAttr ConvertTypeToTensorShapeAttr(const mlir::Type& type) { // Converts an MLIR dense string elements attribute to a TensorFlow tensor // proto. -Status ConvertStringElementsAttr(const DenseStringElementsAttr attr, - TensorProto* output_tensor) { - for (const auto& val : attr.getRawStringData()) { - output_tensor->add_string_val(val.data(), val.size()); +void ConvertStringElementsAttr( + const DenseStringElementsAttr attr, + protobuf::RepeatedPtrField* output) { + for (const auto& val : attr.getRawStringData()) + output->Add({val.data(), val.size()}); +} + +template +void ConvertComplexElementsAttr(const mlir::DenseElementsAttr attr, + protobuf::RepeatedField* output) { + for (const auto& val : attr.getValues>()) { + output->Add(val.real()); + output->Add(val.imag()); } - return Status::OK(); } // Converts an MLIR opaque elements attribute to a TensorFlow tensor proto. @@ -226,139 +244,80 @@ Status ConvertOpaqueElementsAttr(const ElementsAttr attr, return InvalidArgument("Unexpected elements attribute type from MLIR."); } -// Converts an MLIR elements attribute to a TensorFlow tensor proto -// with the double_val field updated. -Status ConvertDoubleElementsAttr(const ElementsAttr attr, - TensorProto* output_tensor) { - if (auto elts = attr.dyn_cast()) { - if (elts.isSplat()) { - output_tensor->add_double_val(elts.getSplatValue()); - } else { - for (auto value : elts.getValues()) - output_tensor->add_double_val(value); - } - return Status::OK(); - } - return ConvertOpaqueElementsAttr(attr, output_tensor); -} - -// Converts an MLIR elements attribute to a TensorFlow tensor proto -// with the float_val field updated. -Status ConvertFloatElementsAttr(const ElementsAttr attr, - TensorProto* output_tensor) { - if (auto elts = attr.dyn_cast()) { - if (elts.isSplat()) { - output_tensor->add_float_val(elts.getSplatValue()); - } else { - for (auto value : elts.getValues()) - output_tensor->add_float_val(value); - } - return Status::OK(); - } - return ConvertOpaqueElementsAttr(attr, output_tensor); -} - -// Converts an MLIR elements attribute to a TensorFlow tensor proto -// with the half_val field updated. -Status ConvertHalfElementsAttr(const ElementsAttr attr, - TensorProto* output_tensor) { - if (auto elts = attr.dyn_cast()) { - if (elts.isSplat()) { - output_tensor->add_half_val( - (*elts.begin()).bitcastToAPInt().getSExtValue()); - } else { - for (const auto& value : elts.getFloatValues()) - output_tensor->add_half_val(value.bitcastToAPInt().getSExtValue()); - } - return Status::OK(); - } - return ConvertOpaqueElementsAttr(attr, output_tensor); -} - -// Converts an MLIR elements attribute to a TensorFlow tensor proto -// with the int_val field updated. -Status ConvertIntElementsAttr(const mlir::ElementsAttr attr, - TensorProto* output_tensor) { - if (auto elts = attr.dyn_cast()) { - if (elts.isSplat()) { - output_tensor->add_int_val((*elts.begin()).getSExtValue()); - } else { - for (const auto& val : elts) - output_tensor->add_int_val(val.getSExtValue()); - } - return Status::OK(); - } - return ConvertOpaqueElementsAttr(attr, output_tensor); -} - -Status ConvertBfloat16ElementsAttr(const mlir::ElementsAttr attr, - TensorProto* output_tensor) { - auto elts = attr.dyn_cast(); - if (!elts) { - return ConvertOpaqueElementsAttr(attr, output_tensor); - } - - // Bfloat16 is internally represented as `double` in MLIR. - if (elts.isSplat()) { - double v = elts.getSplatValue(); - bfloat16 bf16_val = static_cast(v); - output_tensor->add_half_val(absl::bit_cast(bf16_val)); +// Converts an MLIR elements attribute and adds it to specified repeated field. +template +void ConvertElementsAttr(const mlir::DenseElementsAttr attr, + protobuf::RepeatedField* output) { + if (attr.isSplat()) { + output->Add(attr.getSplatValue()); } else { - for (auto v : elts.getValues()) { + for (auto value : attr.getValues()) output->Add(value); + } +} + +// Converts an MLIR elements attribute containing half values and adds it to +// specified repeated field. +void ConvertHalfElementsAttr(const DenseFPElementsAttr attr, + protobuf::RepeatedField* output_tensor) { + if (attr.isSplat()) { + output_tensor->Add((*attr.begin()).bitcastToAPInt().getSExtValue()); + } else { + for (const llvm::APFloat value : attr.getFloatValues()) + output_tensor->Add(value.bitcastToAPInt().getSExtValue()); + } +} + +// Converts an MLIR elements attribute containing int values and adds it to +// specified repeated field. +void ConvertIntElementsAttr(const mlir::DenseIntElementsAttr attr, + protobuf::RepeatedField* output) { + if (attr.isSplat()) { + output->Add((*attr.begin()).getSExtValue()); + } else { + for (const llvm::APInt val : attr) output->Add(val.getSExtValue()); + } +} + +void ConvertBfloat16ElementsAttr(const mlir::DenseFPElementsAttr attr, + protobuf::RepeatedField* output) { + // Bfloat16 is internally represented as `double` in MLIR. + if (attr.isSplat()) { + double v = attr.getSplatValue(); + bfloat16 bf16_val = static_cast(v); + output->Add(absl::bit_cast(bf16_val)); + } else { + for (auto v : attr.getValues()) { bfloat16 bf16_val = static_cast(v); - output_tensor->add_half_val(absl::bit_cast(bf16_val)); + output->Add(absl::bit_cast(bf16_val)); } } - - return Status::OK(); } -// Converts an MLIR elements attribute to a TensorFlow tensor proto -// with the int64_val field updated. -Status ConvertInt64ElementsAttr(const mlir::ElementsAttr attr, - TensorProto* output_tensor) { - if (auto elts = attr.dyn_cast()) { - if (elts.isSplat()) { - output_tensor->add_int64_val((*elts.begin()).getSExtValue()); - } else { - for (const auto& val : elts) - output_tensor->add_int64_val(val.getSExtValue()); - } - return Status::OK(); - } - return ConvertOpaqueElementsAttr(attr, output_tensor); -} - -// Converts an MLIR elements attribute to a TensorFlow tensor proto -// with bool_val field updated. -Status ConvertBoolElementsAttr(const mlir::ElementsAttr attr, - TensorProto* output_tensor) { - if (auto elts = attr.dyn_cast()) { - for (const auto& val : elts) { - output_tensor->add_bool_val(val.getBoolValue()); - } - return Status::OK(); - } - return ConvertOpaqueElementsAttr(attr, output_tensor); -} - -Status ConvertToTensorProto(const ElementsAttr attr, - TensorProto* output_tensor) { +Status ConvertToTensorProto(const ElementsAttr attr, TensorProto* output) { auto type = attr.getType(); auto shape = type.getShape(); DataType output_dtype; TF_RETURN_IF_ERROR(ConvertToDataType(type, &output_dtype)); - output_tensor->set_dtype(output_dtype); - ConvertToTensorShapeProto(shape, output_tensor->mutable_tensor_shape()); + output->set_dtype(output_dtype); + ConvertToTensorShapeProto(shape, output->mutable_tensor_shape()); + + if (attr.isa()) + return ConvertOpaqueElementsAttr(attr.cast(), output); + + auto dense_attr = attr.dyn_cast(); + if (!dense_attr) return errors::InvalidArgument("Unsupported elements attr"); switch (output_dtype) { case DT_FLOAT: - return ConvertFloatElementsAttr(attr, output_tensor); + ConvertElementsAttr(dense_attr, output->mutable_float_val()); + break; case DT_HALF: - // Handles both DenseFPElementsAttr and OpaqueElementsAttr. - return ConvertHalfElementsAttr(attr, output_tensor); + ConvertHalfElementsAttr(dense_attr.cast(), + output->mutable_half_val()); + break; case DT_DOUBLE: - return ConvertDoubleElementsAttr(attr, output_tensor); + ConvertElementsAttr(dense_attr, output->mutable_double_val()); + break; case DT_QUINT8: case DT_UINT8: case DT_INT8: @@ -366,20 +325,40 @@ Status ConvertToTensorProto(const ElementsAttr attr, case DT_UINT16: case DT_INT16: case DT_INT32: - return ConvertIntElementsAttr(attr, output_tensor); + ConvertIntElementsAttr(dense_attr.cast(), + output->mutable_int_val()); + break; + case DT_UINT32: + ConvertElementsAttr(dense_attr, output->mutable_uint32_val()); + break; + case DT_UINT64: + ConvertElementsAttr(dense_attr, output->mutable_uint64_val()); + break; case DT_INT64: - return ConvertInt64ElementsAttr(attr, output_tensor); + ConvertElementsAttr(dense_attr, output->mutable_int64_val()); + break; case DT_BOOL: - return ConvertBoolElementsAttr(attr, output_tensor); + ConvertElementsAttr(dense_attr, output->mutable_bool_val()); + break; case DT_BFLOAT16: - return ConvertBfloat16ElementsAttr(attr, output_tensor); + ConvertBfloat16ElementsAttr(dense_attr.cast(), + output->mutable_half_val()); + break; case DT_STRING: - return ConvertStringElementsAttr(attr.cast(), - output_tensor); + ConvertStringElementsAttr(dense_attr.cast(), + output->mutable_string_val()); + break; + case DT_COMPLEX64: + ConvertComplexElementsAttr(dense_attr, output->mutable_scomplex_val()); + break; + case DT_COMPLEX128: + ConvertComplexElementsAttr(dense_attr, output->mutable_dcomplex_val()); + break; default: - return ConvertOpaqueElementsAttr(attr.cast(), - output_tensor); + return errors::Unimplemented(absl::StrCat("Unimplemented data type ", + DataTypeString(output_dtype))); } + return Status::OK(); } Status ConvertToTensor(const mlir::ElementsAttr attr, Tensor* output_tensor) { diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc index d711c19baae..bf96e3d1df4 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h" #include +#include #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project @@ -99,48 +100,74 @@ TEST(ConvertTypeToTensorTypeTest, ConvertStringTensor) { EXPECT_EQ(string_values[3], mlir::StringRef("four")); } -TEST(ConvertTypeToTensorTypeTest, Convert16BitFloats) { +class ConvertTensorTest : public ::testing::Test { + protected: + template + void VerifyConversion(std::initializer_list values, DataType dtype, + mlir::Type expected_ty) { + mlir::Builder b(expected_ty.getContext()); + Tensor tensor(dtype, TensorShape({static_cast(values.size())})); + tensor.flat().setValues(values); + + auto value_or = ConvertTensor(tensor, &b); + TF_ASSERT_OK(value_or.status()); + auto attr = value_or.ValueOrDie(); + + EXPECT_EQ(attr.getType().getElementType(), expected_ty); + + Tensor out; + TF_ASSERT_OK(ConvertToTensor(attr, &out)); + + test::ExpectTensorEqual(tensor, out); + } +}; + +TEST_F(ConvertTensorTest, Simple) { RegisterDialects(); + mlir::MLIRContext context; - mlir::Builder b(&context); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {Eigen::half(1.0)}, DT_HALF, mlir::FloatType::getF16(&context))); + ASSERT_NO_FATAL_FAILURE( + VerifyConversion({bfloat16(1.0), bfloat16(-1.0)}, DT_BFLOAT16, + mlir::FloatType::getBF16(&context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1.0, -1.0}, DT_FLOAT, mlir::FloatType::getF32(&context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1.0, -1.0}, DT_DOUBLE, mlir::FloatType::getF64(&context))); - { - // Create the sample tensor to convert. - Tensor tensor(DT_HALF, TensorShape({1})); - auto Tt = tensor.flat(); - Tt.setValues({Eigen::half(1.0)}); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, -1}, DT_INT8, mlir::IntegerType::get(8, &context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, -1}, DT_INT16, mlir::IntegerType::get(16, &context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, -1}, DT_INT32, mlir::IntegerType::get(32, &context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, -1}, DT_INT64, mlir::IntegerType::get(64, &context))); - auto value_or = ConvertTensor(tensor, &b); - TF_EXPECT_OK(value_or.status()); - auto attr = value_or.ValueOrDie(); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, 2}, DT_UINT8, + mlir::IntegerType::get( + 8, mlir::IntegerType::SignednessSemantics::Unsigned, &context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, 2}, DT_UINT16, + mlir::IntegerType::get( + 16, mlir::IntegerType::SignednessSemantics::Unsigned, &context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, 2}, DT_UINT32, + mlir::IntegerType::get( + 32, mlir::IntegerType::SignednessSemantics::Unsigned, &context))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion( + {1, 2}, DT_UINT64, + mlir::IntegerType::get( + 64, mlir::IntegerType::SignednessSemantics::Unsigned, &context))); - EXPECT_TRUE(attr.isa()); - EXPECT_TRUE(attr.getType().getElementType().isF16()); - - Tensor out; - TF_ASSERT_OK(ConvertToTensor(attr, &out)); - - test::ExpectTensorEqual(tensor, out); - } - - { - // Create the sample tensor to convert. - Tensor tensor(DT_BFLOAT16, TensorShape({2})); - auto Tt = tensor.flat(); - Tt.setValues({bfloat16(1.0), bfloat16(-1.0)}); - - auto value_or = ConvertTensor(tensor, &b); - TF_EXPECT_OK(value_or.status()); - auto attr = value_or.ValueOrDie(); - - EXPECT_TRUE(attr.isa()); - EXPECT_TRUE(attr.getType().getElementType().isBF16()); - - Tensor out; - TF_ASSERT_OK(ConvertToTensor(attr, &out)); - - test::ExpectTensorEqual(tensor, out); - } + ASSERT_NO_FATAL_FAILURE(VerifyConversion>( + {{0.0, 1.0}, {1.0, 0.0}}, DT_COMPLEX64, + mlir::ComplexType::get(mlir::FloatType::getF32(&context)))); + ASSERT_NO_FATAL_FAILURE(VerifyConversion>( + {{0.0, 1.0}, {1.0, 0.0}}, DT_COMPLEX128, + mlir::ComplexType::get(mlir::FloatType::getF64(&context)))); } } // namespace diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc index cc795259893..4877cbc4a44 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc @@ -59,6 +59,18 @@ limitations under the License. namespace tensorflow { namespace { +// static TensorFlow op prefix set. +std::set* GlobalOpPrefixes() { + static std::set* global_op_prefixes = [] { + std::set* result = new std::set; + result->insert("tf."); + result->insert("_tf."); + result->insert("tf_executor."); + return result; + }(); + return global_op_prefixes; +} + // Converts a location to the debug information for the node def. Status ConvertLocation(mlir::Location inst_loc, NodeDef::ExperimentalDebugInfo* debug_info) { @@ -268,8 +280,10 @@ StatusOr GetTensorFlowOpName(llvm::StringRef op_name) { // - ".sink" or ".Sink": only the NextIteration operation has this suffix. We // don't need to consider ".source"/".Source" because the nodes with this // suffix are skipped by the caller and will not be added to the graph. - if (!op_name.consume_front("_tf.") && !op_name.consume_front("tf.") && - !op_name.consume_front("tf_executor.")) { + auto prefixes = GlobalOpPrefixes(); + if (std::none_of(prefixes->begin(), prefixes->end(), [&](std::string prefix) { + return op_name.consume_front(prefix); + })) { return errors::FailedPrecondition("op node '", op_name.str(), "' was not a TF op!"); } @@ -506,4 +520,9 @@ bool IsLegacyCallInstruction(mlir::Operation* inst) { inst->getName().getStringRef().compare("_tf.LegacyCall") == 0; } +Status AddTensorFlowOpPrefix(std::string prefix) { + GlobalOpPrefixes()->insert(prefix); + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h index 32ed528bd0d..58fe39fa4e8 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h @@ -34,10 +34,17 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/stream_executor/lib/statusor.h" +namespace mlir { +class ShapedType; +} // namespace mlir + namespace tensorflow { using stream_executor::port::StatusOr; +// Add custom op prefix for TensorFlow dialects. +Status AddTensorFlowOpPrefix(std::string); + // Maps an MLIR op name in the TensorFlow dialect or the TensorFlow control // dialect back into a TensorFlow valid op name. StatusOr GetTensorFlowOpName(llvm::StringRef); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc index 6cf2781e48d..06c10c26835 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc @@ -164,12 +164,19 @@ std::string GetTPUCompilationDevice(Device system_device) { return DeviceNameUtils::ParsedNameToString(system_device); } +// Finds the host CPU device for a given TPU device. +std::string GetCPUHostDeviceForTPUDevice(Device tpu_device) { + tpu_device.type = DEVICE_CPU; + tpu_device.id = 0; + return DeviceNameUtils::ParsedNameToString(tpu_device); +} + // Determines execution devices when topology and device assignment are not // defined. This is a special case where a single core computation is replicated // to every core in the mesh. TPU devices are simply added to // `execution_devices` of one replica. `num_replicas` must be 1 or the total // number of TPU devices available, and `num_cores_per_replica` must be 1. -StatusOr GetFullMeshTPUExecutionDeviceAssignment( +StatusOr GetFullMeshTPUExecutionDeviceAssignment( int num_replicas, int num_cores_per_replica, llvm::ArrayRef> tpu_devices) { const int num_tasks = tpu_devices.size(); @@ -185,17 +192,18 @@ StatusOr GetFullMeshTPUExecutionDeviceAssignment( "'num_cores_per_replica' must be equal to 1, got ", num_cores_per_replica); - ExecutionDevices execution_devices; - execution_devices.reserve(num_replicas); + TPUDevicesAndHosts devices_and_hosts; + devices_and_hosts.reserve(num_replicas); for (int i = 0; i < num_replicas; ++i) { const int task = i / num_tpus_per_task; const int device = i % num_tpus_per_task; - execution_devices.push_back( - {tensorflow::DeviceNameUtils::ParsedNameToString( - tpu_devices[task][device])}); + const auto& tpu_device = tpu_devices[task][device]; + devices_and_hosts.push_back({TPUDeviceAndHost( + /*device=*/tensorflow::DeviceNameUtils::ParsedNameToString(tpu_device), + /*host=*/GetCPUHostDeviceForTPUDevice(tpu_device))}); } - return execution_devices; + return devices_and_hosts; } // Helper struct for keeping track of task and device for an associated TPU @@ -326,7 +334,7 @@ StatusOr> ParseTopologyAttr( // - number of device coordinates (in tuple 3) match number 'num_replicas' * // 'num_cores_per_replica' // - a TPU device associated with each device coordinate -StatusOr> +StatusOr> GetGeneralTPUExecutionDeviceAssignment( int num_replicas, int num_cores_per_replica, llvm::ArrayRef> tpu_devices, @@ -361,9 +369,9 @@ GetGeneralTPUExecutionDeviceAssignment( std::vector used_device_ids( location_to_id(bound_x - 1, bound_y - 1, bound_z - 1, bound_core - 1), false); - ExecutionDevices execution_devices( - num_replicas, - llvm::SmallVector(num_cores_per_replica, "")); + TPUDevicesAndHosts devices_and_hosts( + num_replicas, llvm::SmallVector( + num_cores_per_replica, TPUDeviceAndHost())); xla::DeviceAssignment device_assignment(num_replicas, num_cores_per_replica); int pos = 0; for (int replica = 0; replica < num_replicas; ++replica) { @@ -393,16 +401,18 @@ GetGeneralTPUExecutionDeviceAssignment( used_device_ids[device_id] = true; device_assignment(replica, logical_core) = device_id; - execution_devices[replica][logical_core] = - DeviceNameUtils::ParsedNameToString(tpu_devices[task][device]); + auto& device_and_host = devices_and_hosts[replica][logical_core]; + const auto& tpu_device = tpu_devices[task][device]; + device_and_host.device = DeviceNameUtils::ParsedNameToString(tpu_device); + device_and_host.host = GetCPUHostDeviceForTPUDevice(tpu_device); } } xla::DeviceAssignmentProto device_assignment_proto; TF_RETURN_IF_ERROR(device_assignment.Serialize(&device_assignment_proto)); - return std::pair( - std::move(execution_devices), std::move(device_assignment_proto)); + return std::pair( + std::move(devices_and_hosts), std::move(device_assignment_proto)); } } // anonymous namespace diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h index dd296a13f4b..5fdb6b8768b 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h @@ -30,29 +30,40 @@ limitations under the License. namespace tensorflow { using stream_executor::port::StatusOr; -// TPU devices to be used for execution (e.g. devices for TPUExecute ops). They -// are ordered by `num_replicas` followed by `num_cores_per_replica`. -using ExecutionDevices = - llvm::SmallVector, 8>; +// A TPU device for execution alongside its associated host CPU device. +struct TPUDeviceAndHost { + TPUDeviceAndHost() {} + TPUDeviceAndHost(llvm::StringRef device, llvm::StringRef host) + : device(device), host(host) {} -// TPU compilation device, execution devices, and optionally execution device -// IDs. Execution device IDs are populated if `topology` and `device_assignment` -// are provided. + std::string device; + std::string host; +}; + +// TPU devices to be used for execution (e.g. devices for TPUExecute ops) and +// their associated host CPU devices (for outside compilation). They are ordered +// by `num_replicas` followed by `num_cores_per_replica`. +using TPUDevicesAndHosts = + llvm::SmallVector, 8>; + +// TPU compilation device, execution and associated host devices, and optionally +// execution device IDs. Execution device IDs are populated if `topology` and +// `device_assignment` are provided. struct TPUDeviceAssignment { TPUDeviceAssignment(llvm::StringRef compilation_device, - ExecutionDevices&& execution_devices) + TPUDevicesAndHosts&& tpu_devices) : compilation_device(compilation_device), - execution_devices(std::move(execution_devices)) {} + tpu_devices(std::move(tpu_devices)) {} TPUDeviceAssignment(llvm::StringRef compilation_device, - ExecutionDevices&& execution_devices, + TPUDevicesAndHosts&& tpu_devices, xla::DeviceAssignmentProto&& xla_device_assignment) : compilation_device(compilation_device), - execution_devices(std::move(execution_devices)), + tpu_devices(std::move(tpu_devices)), xla_device_assignment(std::move(xla_device_assignment)) {} std::string compilation_device; - ExecutionDevices execution_devices; + TPUDevicesAndHosts tpu_devices; llvm::Optional xla_device_assignment; }; diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc index 87319f2adeb..7ac5635a6e4 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc @@ -323,30 +323,46 @@ TEST(TPURewriteDeviceUtilTest, ValidFullMeshDeviceAssignment) { TF_ASSERT_OK(status_or.status()); - auto& tpu_device_assignment = status_or.ValueOrDie(); + const auto& tpu_device_assignment = status_or.ValueOrDie(); EXPECT_EQ(tpu_device_assignment.compilation_device, "/job:worker/replica:0/task:0/device:CPU:0"); - auto& execution_devices = tpu_device_assignment.execution_devices; - ASSERT_EQ(execution_devices.size(), 8); - for (const auto& replica_execution_device : execution_devices) - ASSERT_EQ(replica_execution_device.size(), 1); + const auto& tpu_devices = tpu_device_assignment.tpu_devices; + ASSERT_EQ(tpu_devices.size(), 8); + for (const auto& replica_tpu_devices : tpu_devices) + ASSERT_EQ(replica_tpu_devices.size(), 1); - EXPECT_EQ(execution_devices[0][0], + EXPECT_EQ(tpu_devices[0][0].device, "/job:worker/replica:0/task:0/device:TPU:0"); - EXPECT_EQ(execution_devices[1][0], + EXPECT_EQ(tpu_devices[0][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[1][0].device, "/job:worker/replica:0/task:0/device:TPU:1"); - EXPECT_EQ(execution_devices[2][0], + EXPECT_EQ(tpu_devices[1][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[2][0].device, "/job:worker/replica:0/task:0/device:TPU:2"); - EXPECT_EQ(execution_devices[3][0], + EXPECT_EQ(tpu_devices[2][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[3][0].device, "/job:worker/replica:0/task:0/device:TPU:3"); - EXPECT_EQ(execution_devices[4][0], + EXPECT_EQ(tpu_devices[3][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[4][0].device, "/job:worker/replica:0/task:1/device:TPU:0"); - EXPECT_EQ(execution_devices[5][0], + EXPECT_EQ(tpu_devices[4][0].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[5][0].device, "/job:worker/replica:0/task:1/device:TPU:1"); - EXPECT_EQ(execution_devices[6][0], + EXPECT_EQ(tpu_devices[5][0].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[6][0].device, "/job:worker/replica:0/task:1/device:TPU:2"); - EXPECT_EQ(execution_devices[7][0], + EXPECT_EQ(tpu_devices[6][0].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[7][0].device, "/job:worker/replica:0/task:1/device:TPU:3"); + EXPECT_EQ(tpu_devices[7][0].host, + "/job:worker/replica:0/task:1/device:CPU:0"); EXPECT_FALSE(tpu_device_assignment.xla_device_assignment.hasValue()); } @@ -410,30 +426,46 @@ TEST(TPURewriteDeviceUtilTest, ValidGeneralDeviceAssignmentMesh2x2x2) { TF_ASSERT_OK(status_or.status()); - auto& tpu_device_assignment = status_or.ValueOrDie(); + const auto& tpu_device_assignment = status_or.ValueOrDie(); EXPECT_EQ(tpu_device_assignment.compilation_device, "/job:worker/replica:0/task:0/device:CPU:0"); - auto& execution_devices = tpu_device_assignment.execution_devices; - ASSERT_EQ(execution_devices.size(), 4); - for (const auto& replica_execution_device : execution_devices) - ASSERT_EQ(replica_execution_device.size(), 2); + const auto& tpu_devices = tpu_device_assignment.tpu_devices; + ASSERT_EQ(tpu_devices.size(), 4); + for (const auto& replica_tpu_devices : tpu_devices) + ASSERT_EQ(replica_tpu_devices.size(), 2); - EXPECT_EQ(execution_devices[0][0], + EXPECT_EQ(tpu_devices[0][0].device, "/job:worker/replica:0/task:0/device:TPU:0"); - EXPECT_EQ(execution_devices[0][1], + EXPECT_EQ(tpu_devices[0][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[0][1].device, "/job:worker/replica:0/task:1/device:TPU:3"); - EXPECT_EQ(execution_devices[1][0], + EXPECT_EQ(tpu_devices[0][1].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[1][0].device, "/job:worker/replica:0/task:0/device:TPU:1"); - EXPECT_EQ(execution_devices[1][1], + EXPECT_EQ(tpu_devices[1][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[1][1].device, "/job:worker/replica:0/task:1/device:TPU:2"); - EXPECT_EQ(execution_devices[2][0], + EXPECT_EQ(tpu_devices[1][1].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[2][0].device, "/job:worker/replica:0/task:0/device:TPU:3"); - EXPECT_EQ(execution_devices[2][1], + EXPECT_EQ(tpu_devices[2][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[2][1].device, "/job:worker/replica:0/task:1/device:TPU:0"); - EXPECT_EQ(execution_devices[3][0], + EXPECT_EQ(tpu_devices[2][1].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[3][0].device, "/job:worker/replica:0/task:0/device:TPU:2"); - EXPECT_EQ(execution_devices[3][1], + EXPECT_EQ(tpu_devices[3][0].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[3][1].device, "/job:worker/replica:0/task:1/device:TPU:1"); + EXPECT_EQ(tpu_devices[3][1].host, + "/job:worker/replica:0/task:1/device:CPU:0"); auto& xla_device_assignment = tpu_device_assignment.xla_device_assignment; ASSERT_TRUE(xla_device_assignment.hasValue()); @@ -511,23 +543,35 @@ TEST(TPURewriteDeviceUtilTest, ValidGeneralDeviceAssignmentMesh1x2x1x3) { EXPECT_EQ(tpu_device_assignment.compilation_device, "/job:worker/replica:0/task:0/device:CPU:0"); - auto& execution_devices = tpu_device_assignment.execution_devices; - ASSERT_EQ(execution_devices.size(), 2); - for (const auto& replica_execution_device : execution_devices) - ASSERT_EQ(replica_execution_device.size(), 3); + auto& tpu_devices = tpu_device_assignment.tpu_devices; + ASSERT_EQ(tpu_devices.size(), 2); + for (const auto& replica_tpu_devices : tpu_devices) + ASSERT_EQ(replica_tpu_devices.size(), 3); - EXPECT_EQ(execution_devices[0][0], + EXPECT_EQ(tpu_devices[0][0].device, "/job:worker/replica:0/task:1/device:TPU:1"); - EXPECT_EQ(execution_devices[0][1], + EXPECT_EQ(tpu_devices[0][0].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[0][1].device, "/job:worker/replica:0/task:1/device:TPU:0"); - EXPECT_EQ(execution_devices[0][2], + EXPECT_EQ(tpu_devices[0][1].host, + "/job:worker/replica:0/task:1/device:CPU:0"); + EXPECT_EQ(tpu_devices[0][2].device, "/job:worker/replica:0/task:2/device:TPU:0"); - EXPECT_EQ(execution_devices[1][0], + EXPECT_EQ(tpu_devices[0][2].host, + "/job:worker/replica:0/task:2/device:CPU:0"); + EXPECT_EQ(tpu_devices[1][0].device, "/job:worker/replica:0/task:2/device:TPU:1"); - EXPECT_EQ(execution_devices[1][1], + EXPECT_EQ(tpu_devices[1][0].host, + "/job:worker/replica:0/task:2/device:CPU:0"); + EXPECT_EQ(tpu_devices[1][1].device, "/job:worker/replica:0/task:0/device:TPU:0"); - EXPECT_EQ(execution_devices[1][2], + EXPECT_EQ(tpu_devices[1][1].host, + "/job:worker/replica:0/task:0/device:CPU:0"); + EXPECT_EQ(tpu_devices[1][2].device, "/job:worker/replica:0/task:0/device:TPU:1"); + EXPECT_EQ(tpu_devices[1][2].host, + "/job:worker/replica:0/task:0/device:CPU:0"); auto& xla_device_assignment = tpu_device_assignment.xla_device_assignment; ASSERT_TRUE(xla_device_assignment.hasValue()); diff --git a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc index 62b862f5e21..2e1528e0d60 100644 --- a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc +++ b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc @@ -104,26 +104,24 @@ int main(int argc, char** argv) { return 1; } + std::unordered_set tags = absl::StrSplit(saved_model_tags, ','); + std::vector exported_names_vector = + absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty()); + absl::Span exported_names(exported_names_vector); + if (import_saved_model_object_graph) { - std::unordered_set tags = - absl::StrSplit(saved_model_tags, ','); - std::vector exported_names = - absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty()); mlir::MLIRContext context; auto module = tensorflow::SavedModelObjectGraphToMlirImport( - input_filename, tags, absl::Span(exported_names), - &context); + input_filename, tags, exported_names, &context); if (!module) return 1; module->print(output->os()); } else if (import_saved_model_signature_defs) { - std::unordered_set tags = - absl::StrSplit(saved_model_tags, ','); mlir::MLIRContext context; auto module = tensorflow::SavedModelSignatureDefsToMlirImport( - input_filename, tags, &context); + input_filename, tags, exported_names, &context); if (!module) return 1; module->print(output->os()); diff --git a/tensorflow/compiler/mlir/tfjs/BUILD b/tensorflow/compiler/mlir/tfjs/BUILD index 9b731d2c912..ac629ac4573 100644 --- a/tensorflow/compiler/mlir/tfjs/BUILD +++ b/tensorflow/compiler/mlir/tfjs/BUILD @@ -1,4 +1,5 @@ load("//third_party/mlir:tblgen.bzl", "gentbl") +load("//tensorflow:tensorflow.bzl", "tf_cc_binary") package( default_visibility = ["//visibility:public"], @@ -39,7 +40,7 @@ gentbl( "ir/tfjs_ops.td", "@llvm-project//mlir:OpBaseTdFiles", "@llvm-project//mlir:include/mlir/Interfaces/LoopLikeInterface.td", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", ], ) @@ -131,10 +132,106 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration", "//tensorflow/compiler/mlir/tensorflow:tensorflow_passes", "//tensorflow/compiler/mlir/tensorflow:tf_graph_optimization_pass", - "//tensorflow/compiler/mlir/tensorflow:translate_lib", - "@llvm-project//mlir:Analysis", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", "@llvm-project//mlir:Transforms", ], ) + +cc_library( + name = "json_translate_lib", + srcs = [ + "translate/json_translate.cc", + ], + hdrs = [ + "translate/json_translate.h", + ], + deps = [ + ":tensorflow_js", + ":tensorflow_js_dialect_registration", + "//tensorflow/compiler/mlir/tensorflow", + "//tensorflow/compiler/mlir/tensorflow:convert_graphdef", + "//tensorflow/compiler/mlir/tensorflow:export_utils", + "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags", + "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/status", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:Translation", + ], + alwayslink = 1, +) + +cc_library( + name = "tf_to_tfjs_json", + srcs = ["translate/tf_to_tfjs_json.cc"], + hdrs = [ + "translate/tf_to_tfjs_json.h", + ], + deps = [ + ":json_translate_lib", + ":tfjs_optimize", + "//tensorflow/compiler/mlir/tensorflow", + "//tensorflow/compiler/mlir/tensorflow:decode_constant_pass", + "//tensorflow/compiler/mlir/tensorflow:error_util", + "//tensorflow/compiler/mlir/tensorflow:tf_dialect_lib", + "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes", + "//tensorflow/compiler/mlir/tensorflow:translate_cl_options", + "//tensorflow/compiler/mlir/tensorflow:translate_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@llvm-project//llvm:support", + "@llvm-project//mlir:AllPassesAndDialects", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Parser", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + ], + alwayslink = 1, +) + +tf_cc_binary( + name = "json_translate", + deps = [ + ":json_translate_lib", + "@llvm-project//mlir:MlirTranslateMain", + ], +) + +filegroup( + name = "tf_tfjs_translate_main", + srcs = [ + "translate/tf_tfjs_translate.cc", + ], +) + +tf_cc_binary( + name = "tf_tfjs_translate", + srcs = [":tf_tfjs_translate_main"], + deps = [ + ":json_translate_lib", + ":tensorflow_js_passes", + ":tf_to_tfjs_json", + ":tfjs_optimize", + "//tensorflow/compiler/mlir:init_mlir", + "//tensorflow/compiler/mlir/tensorflow:translate_cl_options", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform:errors", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/strings", + "@llvm-project//llvm:support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + ], +) diff --git a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h index 318895de79c..545183a052b 100644 --- a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h +++ b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h @@ -28,6 +28,7 @@ limitations under the License. #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/Interfaces/SideEffects.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project + namespace mlir { namespace tfjs { diff --git a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td index 172347bc0f5..134aa010d8c 100644 --- a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td +++ b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td @@ -23,7 +23,7 @@ limitations under the License. #define TFJS_DIALECT include "mlir/IR/OpBase.td" -include "mlir/Interfaces/SideEffects.td" +include "mlir/Interfaces/SideEffectInterfaces.td" //===----------------------------------------------------------------------===// // TensorFlow.js dialect definitions diff --git a/tensorflow/compiler/mlir/tfjs/tests/e2e/BUILD b/tensorflow/compiler/mlir/tfjs/tests/e2e/BUILD new file mode 100644 index 00000000000..5c8d37da2f0 --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/tests/e2e/BUILD @@ -0,0 +1,23 @@ +load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "glob_lit_tests") + +licenses(["notice"]) + +glob_lit_tests( + data = [ + ":test_utilities", + ], + driver = "@llvm-project//mlir:run_lit.sh", + test_file_exts = [ + "pbtxt", + ], +) + +# Bundle together all of the test utilities that are used by tests. +filegroup( + name = "test_utilities", + testonly = True, + data = [ + "//tensorflow/compiler/mlir/tfjs:tf_tfjs_translate", + "@llvm-project//llvm:FileCheck", + ], +) diff --git a/tensorflow/compiler/mlir/tfjs/tests/e2e/add.pbtxt b/tensorflow/compiler/mlir/tfjs/tests/e2e/add.pbtxt new file mode 100644 index 00000000000..f6a324fdc13 --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/tests/e2e/add.pbtxt @@ -0,0 +1,78 @@ +# RUN: tf_tfjs_translate %s -tf-input-arrays=input0,input1 -tf-input-data-types=DT_INT32,DT_INT32 -tf-input-shapes=10:10 -tf-output-arrays=Mul -o - | FileCheck %s --dump-input-on-failure +# Add two tensor<4xi32> inputs and return the result + +node { + name: "Add" + op: "Add" + input: "input0" + input: "input1" + attr { + key: "T" + value { + type: DT_INT32 + } + } +} +node { + name: "input0" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } +} +node { + name: "input1" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } +} +node { + name: "Mul" + op: "Mul" + input: "Add" + input: "Add" + attr { + key: "T" + value { + type: DT_INT32 + } + } +} +versions { + producer: 27 +} + +# CHECK: "name": "input0" +# CHECK-NEXT: "op": "Placeholder" +# CHECK: "type": "DT_INT32" +# CHECK: "name": "input1", +# CHECK-NEXT: "op": "Placeholder" +# CHECK: "type": "DT_INT32" +# CHECK: "name": "Add" +# CHECK-NEXT: "op": "AddV2" +# CHECK-NEXT: "input": +# CHECK-NEXT: "input0" +# CHECK-NEXT: "input1" +# CHECK: "type": "DT_INT32" +# CHECK: "name": "Mul1" +# CHECK-NEXT: "op": "Mul" +# CHECK-NEXT: "input": +# CHECK-NEXT: "Add" +# CHECK-NEXT: "Add" +# CHECK: "type": "DT_INT32" +# CHECK: "name": "Mul" +# CHECK-NEXT: "op": "_Retval" +# CHECK-NEXT: "input": +# CHECK-NEXT: "Mul1" +# CHECK: "type": "DT_INT32" +# CHECK: "library" +# CHECK: "versions" +# CHECK: "producer": 27 + diff --git a/tensorflow/compiler/mlir/tfjs/tests/e2e/prelu.pbtxt b/tensorflow/compiler/mlir/tfjs/tests/e2e/prelu.pbtxt new file mode 100644 index 00000000000..810db71f5e0 --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/tests/e2e/prelu.pbtxt @@ -0,0 +1,175 @@ +# RUN: tf_tfjs_translate %s -tf-input-arrays=input0 -tf-input-data-types=DT_FLOAT -tf-input-shapes=10 -tf-output-arrays=Add -tf-custom-opdefs="name: 'Prelu' input_arg: { name: 'x' type: DT_FLOAT } input_arg: { name: 'alpha' type: DT_FLOAT } output_arg: { name: 'c' type: DT_FLOAT }" -o - | FileCheck %s --dump-input-on-failure +# Add two tensor<4xi32> inputs and return the result + +node { + name: "input0" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 10 + } + } + } + } + experimental_debug_info { + } +} +node { + name: "alpha" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } + experimental_debug_info { + } +} +node { + name: "Relu" + op: "Relu" + input: "input0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + } +} +node { + name: "Neg" + op: "Neg" + input: "input0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + } +} +node { + name: "Relu1" + op: "Relu" + input: "Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + } +} +node { + name: "Mul" + op: "Mul" + input: "alpha" + input: "Relu1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + } +} +node { + name: "Add" + op: "Add" + input: "Relu" + input: "Mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + } +} +node { + name: "main" + op: "_Retval" + input: "Add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +library { +} +versions { + producer: 344 +} + +# CHECK: "node": +# CHECK: "name": "input0", +# CHECK-NEXT: "op": "Placeholder", +# CHECK-NEXT: "attr": +# CHECK: "type": "DT_FLOAT" +# CHECK: "name": "Add.Relu.Neg.Relu1.Mul", +# CHECK-NEXT: "op": "Const", +# CHECK-NEXT: "attr": +# CHECK: "value": +# CHECK: "tensor": +# CHECK: "dtype": "DT_FLOAT", +# CHECK: "tensorShape": {}, +# CHECK: "floatVal": +# CHECK: -0.5 +# CHECK: "name": "Add.Relu.Neg.Relu1.Mul1", +# CHECK-NEXT: "op": "Prelu", +# CHECK-NEXT: "input": +# CHECK: "input0", +# CHECK: "Add.Relu.Neg.Relu1.Mul" +# CHECK: "attr": +# CHECK: "_output_shapes": +# CHECK: "list": +# CHECK: "shape": +# CHECK: "dim": +# CHECK: "size": "10" +# CHECK: "experimentalDebugInfo": {} +# CHECK: "name": "Add", +# CHECK-NEXT: "op": "_Retval", +# CHECK-NEXT: "input": +# CHECK: "Add.Relu.Neg.Relu1.Mul1" +# CHECK: "attr": +# CHECK: "T": +# CHECK: "type": "DT_FLOAT" +# CHECK: "library": {}, +# CHECK: "versions": +# CHECK: "producer": 344 + diff --git a/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc b/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc index 631bb1ae2af..a445937570e 100644 --- a/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc +++ b/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ limitations under the License. #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassManager.h" // from @llvm-project #include "mlir/Transforms/Passes.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tfjs/transforms/passes.h" @@ -47,6 +46,11 @@ void AddTFToTFJSConversionPasses(mlir::OpPassManager* pm) { // Canonicalize, CSE etc. pm->addNestedPass(mlir::createCanonicalizerPass()); pm->addNestedPass(mlir::createCSEPass()); + + // raise to executor dialect in order to use GraphDef converter + pm->addNestedPass( + mlir::CreateFunctionalToExecutorDialectConversionPass()); + pm->addNestedPass(mlir::CreateBreakUpIslandsPass()); } } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tfjs/translate/json_translate.cc b/tensorflow/compiler/mlir/tfjs/translate/json_translate.cc new file mode 100644 index 00000000000..7f4b8ffae09 --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/translate/json_translate.cc @@ -0,0 +1,105 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tfjs/translate/json_translate.h" + +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/status/status.h" +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Translation.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h" +#include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h" +#include "tensorflow/compiler/mlir/tensorflow/utils/export_utils.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/status.h" + +using mlir::ModuleOp; +using mlir::TranslateFromMLIRRegistration; +using std::string; +using tensorflow::Status; +using xla::StatusOr; + +// Translates the given MLIR module in the TFJS dialect to TFJS JSON +// format. Returns false on success. +// +bool tfjs::MlirToJSONTranslateFunction(ModuleOp module, + std::string* serialized_json) { + string json_output; + // Allow TF to treat TFJS ops as TF ops. + if (!tensorflow::AddTensorFlowOpPrefix("tfjs.").ok()) { + LOG(ERROR) << "Failed to add tfjs op prefix."; + return false; + } + tensorflow::GraphExportConfig confs; + confs.export_shapes = true; + confs.export_library = true; + tensorflow::FunctionLibraryDefinition flib_def( + tensorflow::OpRegistry::Global(), tensorflow::FunctionDefLibrary()); + absl::flat_hash_set control_ret_nodes; + auto graph = absl::make_unique(flib_def); + auto status = tensorflow::ConvertMlirToGraph(module, confs, &graph, &flib_def, + &control_ret_nodes); + if (!status.ok()) { + LOG(ERROR) << "Graph export failed: " << status; + return false; + } + auto graphdef = absl::make_unique(); + graph->ToGraphDef(graphdef.get()); + + // Replace the _Arg nodes of the main function with Placeholder op. + auto nodes = graphdef->mutable_node(); + for (const auto& node : llvm::enumerate(*nodes)) { + if (node.value().op() == "_Arg") { + nodes->Mutable(node.index())->set_op("Placeholder"); + } + } + + tensorflow::protobuf::util::JsonPrintOptions json_options; + json_options.add_whitespace = true; + auto jsonStatus = tensorflow::protobuf::util::MessageToJsonString( + *graphdef, &json_output, json_options); + if (!jsonStatus.ok()) { + LOG(ERROR) << "Proto2Json failed: " << status; + return false; + } + *serialized_json = std::move(json_output); + return true; +} + +static mlir::LogicalResult MlirToJSONFileTranslateFunction( + ModuleOp module, llvm::raw_ostream& output) { + std::string serialized_json; + if (!tfjs::MlirToJSONTranslateFunction(module, &serialized_json)) + return mlir::failure(); + + output << serialized_json; + return mlir::success(); +} + +static TranslateFromMLIRRegistration MLIRToJSONFileTranslate( + "mlir-to-tfjs-json", MlirToJSONFileTranslateFunction); diff --git a/tensorflow/compiler/mlir/tfjs/translate/json_translate.h b/tensorflow/compiler/mlir/tfjs/translate/json_translate.h new file mode 100644 index 00000000000..0a931f770ad --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/translate/json_translate.h @@ -0,0 +1,31 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_JSON_TRANSLATE_H_ +#define TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_JSON_TRANSLATE_H_ + +#include + +#include "mlir/IR/Module.h" // from @llvm-project +#include "tensorflow/core/lib/core/status.h" + +namespace tfjs { + +// Translates the given MLIR `module` into a JSON string. Returns true if +// translation fails, otherwise returns false. +bool MlirToJSONTranslateFunction(mlir::ModuleOp module, + std::string* serialized_json); +} // namespace tfjs + +#endif // TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_JSON_TRANSLATE_H_ diff --git a/tensorflow/compiler/mlir/tfjs/translate/tf_tfjs_translate.cc b/tensorflow/compiler/mlir/tfjs/translate/tf_tfjs_translate.cc new file mode 100644 index 00000000000..e735a3c7b8c --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/translate/tf_tfjs_translate.cc @@ -0,0 +1,173 @@ + +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "absl/strings/str_split.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" +#include "mlir/IR/Diagnostics.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Support/FileUtilities.h" // from @llvm-project +#include "tensorflow/compiler/mlir/init_mlir.h" +#include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_cl.h" +#include "tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.h" +#include "tensorflow/compiler/mlir/tfjs/transforms/passes.h" +#include "tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/stream_executor/lib/statusor.h" + +using llvm::cl::opt; +using mlir::MLIRContext; +using stream_executor::port::StatusOr; + +// NOLINTNEXTLINE +opt input_file_name(llvm::cl::Positional, + llvm::cl::desc(""), + llvm::cl::init("-")); + +// NOLINTNEXTLINE +opt import_saved_model_object_graph( + "savedmodel-objectgraph-to-mlir", + llvm::cl::desc("Import a saved model to its MLIR representation"), + llvm::cl::value_desc("dir")); + +// NOLINTNEXTLINE +opt import_saved_model_signature_defs( + "savedmodel-signaturedefs-to-mlir", + llvm::cl::desc("Import a saved model V1 to its MLIR representation"), + llvm::cl::value_desc("dir")); + +// NOLINTNEXTLINE +opt saved_model_tags( + "tf-savedmodel-tags", + llvm::cl::desc("Tags used to indicate which MetaGraphDef to import, " + "separated by ','"), + llvm::cl::init("serve")); + +// NOLINTNEXTLINE +opt saved_model_exported_names( + "tf-savedmodel-exported-names", + llvm::cl::desc("Names to export from SavedModel, separated by ','. Empty " + "(the default) means export all."), + llvm::cl::init("")); + +// NOLINTNEXTLINE +opt output_file_name("o", llvm::cl::desc(""), + llvm::cl::value_desc("filename"), + llvm::cl::init("-")); +// NOLINTNEXTLINE +opt input_mlir( + "input-mlir", + llvm::cl::desc("Take input TensorFlow model in textual MLIR instead of " + "GraphDef format"), + llvm::cl::init(false), llvm::cl::Hidden); +// NOLINTNEXTLINE +opt output_mlir( + "output-mlir", + llvm::cl::desc("Output MLIR rather than JSON for the generated TFJS model"), + llvm::cl::init(false)); + +// The following approach allows injecting opdefs in addition +// to those that are already part of the global TF registry to be linked in +// prior to importing the graph. The primary goal is for support of custom ops. +// This is not intended to be a general solution for custom ops for the future +// but mainly for supporting older models like mobilenet_ssd. More appropriate +// mechanisms, such as op hints or using functions to represent composable ops +// like https://github.com/tensorflow/community/pull/113 should be encouraged +// going forward. +// NOLINTNEXTLINE +llvm::cl::list custom_opdefs( + "tf-custom-opdefs", llvm::cl::desc("List of custom opdefs when importing " + "graphdef")); + +// Debugging flag to print function mapping in the JSON. +// NOLINTNEXTLINE +static opt print_function_result_mapping( + "print-function-result-mapping", + llvm::cl::desc( + "Print the mapping of function result to json output buffer"), + llvm::cl::init(false)); + +enum TranslationStatus { kTrSuccess, kTrFailure }; + +static int PrintFunctionResultMapping(const std::string& result) { + std::cout << result << std::endl; + return kTrSuccess; +} + +int main(int argc, char** argv) { + tensorflow::InitMlir y(&argc, &argv); + + llvm::cl::ParseCommandLineOptions(argc, argv, + "TF GraphDef to TFJS JSON converter\n"); + + MLIRContext context; + llvm::SourceMgr source_mgr; + mlir::SourceMgrDiagnosticHandler sourceMgrHandler(source_mgr, &context); + + StatusOr module; + + if (import_saved_model_object_graph || import_saved_model_signature_defs) { + if (input_mlir) + module = tensorflow::errors::InvalidArgument( + "Importing saved model should not have input_mlir set"); + module = tensorflow::ImportSavedModel( + import_saved_model_object_graph, import_saved_model_signature_defs, + custom_opdefs, input_file_name, saved_model_tags, + saved_model_exported_names, &context); + } else { + module = tensorflow::LoadFromGraphdefOrMlirSource( + input_file_name, input_mlir, custom_opdefs, debug_info_file, + input_arrays, input_dtypes, input_shapes, output_arrays, + /*prune_unused_nodes=*/true, &source_mgr, &context); + } + + // If errors occur, the library call in the above already logged the error + // message. So we can just return here. + if (!module.ok()) return kTrFailure; + + mlir::PassManager pm(&context); + + tensorflow::AddTFToTFJSConversionPasses(&pm); + + std::string result; + auto status = tensorflow::ConvertTFOpsToTfjsJSON(module.ValueOrDie().get(), + output_mlir, &result, &pm); + if (!status.ok()) return kTrFailure; + + std::string error_msg; + auto output = mlir::openOutputFile(output_file_name, &error_msg); + if (output == nullptr) { + llvm::errs() << error_msg << '\n'; + return kTrFailure; + } + output->os() << result; + output->keep(); + + // Print out debugging info related to function mapping. + if (print_function_result_mapping) return PrintFunctionResultMapping(result); + return kTrSuccess; +} diff --git a/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.cc b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.cc new file mode 100644 index 00000000000..7dc9ea049ba --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.cc @@ -0,0 +1,152 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h" + +#include +#include +#include +#include +#include + +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/Parser.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "mlir/Support/FileUtilities.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h" +#include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h" +#include "tensorflow/compiler/mlir/tfjs/translate/json_translate.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_def_builder.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/stream_executor/lib/statusor.h" + +namespace tensorflow { + +using mlir::MLIRContext; +using mlir::ModuleOp; +using mlir::OwningModuleRef; +using stream_executor::port::StatusOr; + +namespace { +tensorflow::Status RegisterCustomOps( + const std::vector& extra_tf_opdefs) { + for (const auto& tf_opdefs_string : extra_tf_opdefs) { + tensorflow::OpDef opdef; + if (!tensorflow::protobuf::TextFormat::ParseFromString(tf_opdefs_string, + &opdef)) { + LOG(ERROR) << "OpDef parsing failed for: " << tf_opdefs_string; + return errors::InvalidArgument("fail to parse extra OpDef"); + } + // Register extra opdefs. + tensorflow::OpRegistry::Global()->Register( + [opdef](tensorflow::OpRegistrationData* op_reg_data) -> Status { + *op_reg_data = tensorflow::OpRegistrationData(opdef); + return Status::OK(); + }); + } + return Status::OK(); +} +} // namespace + +StatusOr LoadFromGraphdefOrMlirSource( + const std::string& input_filename, bool input_mlir, + const std::vector& extra_tf_opdefs, + absl::string_view debug_info_file, absl::string_view input_arrays, + absl::string_view input_dtypes, absl::string_view input_shapes, + absl::string_view output_arrays, bool prune_unused_nodes, + llvm::SourceMgr* source_mgr, MLIRContext* context) { + // Set up the input file. + std::string error_message; + auto file = mlir::openInputFile(input_filename, &error_message); + if (!file) { + llvm::errs() << error_message << "\n"; + return errors::InvalidArgument("fail to open input file"); + } + + if (input_mlir) { + source_mgr->AddNewSourceBuffer(std::move(file), llvm::SMLoc()); + return OwningModuleRef(mlir::parseSourceFile(*source_mgr, context)); + } + + TF_RETURN_IF_ERROR(RegisterCustomOps(extra_tf_opdefs)); + + return tensorflow::GraphdefToMlirTranslateFunction( + file->getBuffer(), debug_info_file, input_arrays, input_dtypes, + input_shapes, output_arrays, /*control_output_arrays=*/"", + prune_unused_nodes, /*convert_legacy_fed_inputs=*/true, + /*graph_as_function=*/false, /*upgrade_legacy=*/true, + /*enable_shape_inference=*/true, context); +} + +Status ConvertTFOpsToTfjsJSON(mlir::ModuleOp module, bool export_to_mlir, + std::string* result, + mlir::PassManager* pass_manager) { + mlir::StatusScopedDiagnosticHandler statusHandler(module.getContext(), + /*propagate=*/true); + if (failed(pass_manager->run(module))) { + return statusHandler.ConsumeStatus(); + } + + if (export_to_mlir) { + llvm::raw_string_ostream os(*result); + module.print(os); + return Status::OK(); + } + + return tfjs::MlirToJSONTranslateFunction(module, result) + ? Status::OK() + : statusHandler.ConsumeStatus(); +} + +StatusOr ImportSavedModel( + bool import_saved_model, bool import_saved_model_v1, + const std::vector& extra_tf_opdefs, + const std::string& input_filename, const std::string& saved_model_tags, + const std::string& saved_model_exported_names, mlir::MLIRContext* context) { + std::unordered_set tags = absl::StrSplit(saved_model_tags, ','); + std::vector exported_names_in_vector = + absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty()); + absl::Span exported_names(exported_names_in_vector); + if (import_saved_model) { + auto module = tensorflow::SavedModelObjectGraphToMlirImport( + input_filename, tags, absl::Span(exported_names), context); + if (!module) + return tensorflow::errors::InvalidArgument("fail to open input file"); + TF_RETURN_IF_ERROR(RegisterCustomOps(extra_tf_opdefs)); + return module; + } else if (import_saved_model_v1) { + auto module = tensorflow::SavedModelSignatureDefsToMlirImport( + input_filename, tags, exported_names, context); + + if (!module) + return tensorflow::errors::InvalidArgument("fail to open input file"); + TF_RETURN_IF_ERROR(RegisterCustomOps(extra_tf_opdefs)); + return module; + } else { + return tensorflow::errors::InvalidArgument( + "Should be either saved model v1 or v2"); + } +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h new file mode 100644 index 00000000000..d68f0e7d46e --- /dev/null +++ b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h @@ -0,0 +1,63 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_TF_TO_TFJS_JSON_H_ +#define TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_TF_TO_TFJS_JSON_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "llvm/Support/SourceMgr.h" +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "tensorflow/core/platform/status.h" +#include "tensorflow/stream_executor/lib/statusor.h" + +namespace tensorflow { + +// Load a TF model from a GraphDef definition or a TF control flow dialect MLIR +// source into a MLIR module. If `input_mlir` is true, load from a MLIR source +// file; otherwise, load from a GraphDef. +// Setting prune_unused_nodes to true, would prune unreachable nodes if +// output_arrays is specified. +stream_executor::port::StatusOr +LoadFromGraphdefOrMlirSource( + const std::string& input_filename, bool input_mlir, + const std::vector& extra_tf_opdefs, + absl::string_view debug_info_file, absl::string_view input_arrays, + absl::string_view input_dtypes, absl::string_view input_shapes, + absl::string_view output_arrays, bool prune_unused_nodes, + llvm::SourceMgr* source_mgr, mlir::MLIRContext* context); + +// Load Saved model (either v1 or v2) into MLIR. +stream_executor::port::StatusOr ImportSavedModel( + bool import_saved_model, bool import_saved_model_v1, + const std::vector& extra_tf_opdefs, + const std::string& input_filename, const std::string& saved_model_tags, + const std::string& saved_model_exported_names, mlir::MLIRContext* context); + +// Taking a MLIR module in TF executor dialect and a set of parameters, +// applies a set of passes to convert the module to TFJS dialect and +// serializes the result to JSON string. +// If `export_to_mlir` is true, the result is exported in MLIR text format, +// otherwise exported in JSON. +Status ConvertTFOpsToTfjsJSON(mlir::ModuleOp module, bool export_to_mlir, + std::string* result, + mlir::PassManager* pass_manager); +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_TF_TO_TFJS_JSON_H_ diff --git a/tensorflow/compiler/mlir/tfrt/BUILD b/tensorflow/compiler/mlir/tfrt/BUILD index 88e214f601b..edcfc574452 100644 --- a/tensorflow/compiler/mlir/tfrt/BUILD +++ b/tensorflow/compiler/mlir/tfrt/BUILD @@ -40,7 +40,7 @@ filegroup( srcs = [ "runtime_fallback/runtime_fallback_ops.td", "@llvm-project//mlir:OpBaseTdFiles", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", "@tf_runtime//:OpBaseTdFiles", ], ) @@ -128,6 +128,33 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "saved_model", + srcs = [ + "saved_model/saved_model.cc", + ], + hdrs = [ + "saved_model/saved_model.h", + ], + deps = [ + ":tf_to_corert", + "//tensorflow/compiler/mlir/tensorflow", + "//tensorflow/compiler/mlir/tensorflow:convert_graphdef", + "//tensorflow/compiler/mlir/tensorflow:tf_dialect_lib", + "//tensorflow/compiler/mlir/tensorflow:translate_lib", + "//tensorflow/core/platform:status", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@tf_runtime//:core_runtime", + "@tf_runtime//:hostcontext", + "@tf_runtime//:mlirtobef", + "@tf_runtime//:support", + "@tf_runtime//:tensor", + ], +) + cc_library( name = "compatibility_analysis", srcs = [ diff --git a/tensorflow/compiler/mlir/tfrt/runtime_fallback/runtime_fallback_ops.td b/tensorflow/compiler/mlir/tfrt/runtime_fallback/runtime_fallback_ops.td index aeed800a1c3..c33c6f8d73d 100644 --- a/tensorflow/compiler/mlir/tfrt/runtime_fallback/runtime_fallback_ops.td +++ b/tensorflow/compiler/mlir/tfrt/runtime_fallback/runtime_fallback_ops.td @@ -20,7 +20,7 @@ limitations under the License. #define TFRT_DELEGATE_DIALECT include "tfrt/tfrt_op_base.td" -include "mlir/Interfaces/SideEffects.td" +include "mlir/Interfaces/SideEffectInterfaces.td" //===----------------------------------------------------------------------===// // Type definitions diff --git a/tensorflow/compiler/mlir/tfrt/saved_model/saved_model.cc b/tensorflow/compiler/mlir/tfrt/saved_model/saved_model.cc new file mode 100644 index 00000000000..92571148cff --- /dev/null +++ b/tensorflow/compiler/mlir/tfrt/saved_model/saved_model.cc @@ -0,0 +1,131 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tfrt/saved_model/saved_model.h" + +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" +#include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h" +#include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h" +#include "tensorflow/compiler/mlir/tfrt/transforms/passes.h" +#include "tfrt/bef_converter/mlir_to_bef.h" +#include "tfrt/core_runtime/core_runtime.h" +#include "tfrt/core_runtime/op_handler.h" +#include "tfrt/host_context/host_context.h" +#include "tfrt/tensor/dense_host_tensor_view.h" + +namespace tensorflow { +namespace { + +llvm::StringRef ProcessIndexPath(mlir::ArrayAttr index_path) { + if (index_path.size() == 1 && index_path[0].isa()) { + // TODO(chky): Support cases where index_path is not a single string. + return index_path[0].cast().getValue(); + } + return ""; +} + +} // namespace + +void MapFunctionSignaturesFromTFSavedModelMLIR( + mlir::ModuleOp module, + llvm::function_ref> + input_names_and_devices, + llvm::ArrayRef output_names, + llvm::ArrayRef global_tensors)> + map_fn) { + // Create global_tensors for each functions. + mlir::SymbolTable symbol_table(module); + module.walk([&symbol_table, map_fn](mlir::FuncOp func) { + // Use the exported name as the function name, and skip non-exported + // functions. + auto func_names = mlir::tf_saved_model::GetExportedNames(func); + if (func_names.empty()) return; + + // Here we walk through each arguments and find out the input/output names, + // and input devices, variables used by this function. + llvm::SmallVector, 4> + input_names_and_devices; + llvm::SmallVector global_tensors; + for (unsigned i = 0, e = func.getNumArguments(); i != e; ++i) { + if (auto input_index_path = func.getArgAttrOfType( + i, "tf_saved_model.index_path")) { + std::pair name_and_device; + name_and_device.first = ProcessIndexPath(input_index_path); + if (auto input_device = + func.getArgAttrOfType(i, "tf.device")) { + name_and_device.second = input_device.getValue(); + } + input_names_and_devices.push_back(name_and_device); + } + if (auto variable = + mlir::tf_saved_model::LookupBoundInput(func, i, symbol_table)) { + global_tensors.push_back(variable); + } + } + + llvm::SmallVector output_names; + for (unsigned i = 0, e = func.getNumResults(); i != e; ++i) { + if (auto output_index_path = func.getResultAttrOfType( + i, "tf_saved_model.index_path")) { + output_names.push_back(ProcessIndexPath(output_index_path)); + } + } + + for (auto func_name : func_names) + map_fn(func_name, input_names_and_devices, output_names, global_tensors); + }); +} + +Status CompileTFSavedModelMLIRToBEF(const TFRTSavedModelCompileOptions& options, + mlir::ModuleOp module, + tfrt::AlignedBuffer<8>* bef_buffer) { + VLOG(1) << "TF Dialect: " << tensorflow::MlirModuleToString(module); + + // Lower MLIR TF Dialect to MLIR TFRT CoreRT dialect. + mlir::PassManager pm(module.getContext()); + + tensorflow::CoreRTPipelineOptions pass_options; + if (!options.default_device.empty()) { + pass_options.default_device = options.default_device; + } + if (!options.force_data_format.empty()) { + pass_options.force_data_format = options.force_data_format; + } + pass_options.enable_optimizer = options.enable_optimizer; + tensorflow::CreateTFExecutorToCoreRTPipeline(pm, pass_options); + + if (mlir::failed(pm.run(module))) + return tensorflow::errors::Internal( + "failed to lower TF Dialect to CoreRT dialect."); + + VLOG(1) << "TFRT Dialect: " << tensorflow::MlirModuleToString(module); + + auto bef = + tfrt::ConvertMLIRToBEF(module, /* disable_optional_sections = */ true); + if (bef.empty()) + return tensorflow::errors::Internal("failed to convert MLIR to BEF."); + + assert(bef_buffer); + bef_buffer->assign(bef.begin(), bef.end()); + + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tfrt/saved_model/saved_model.h b/tensorflow/compiler/mlir/tfrt/saved_model/saved_model.h new file mode 100644 index 00000000000..06a6c5a22f9 --- /dev/null +++ b/tensorflow/compiler/mlir/tfrt/saved_model/saved_model.h @@ -0,0 +1,78 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TFRT_SAVED_MODEL_SAVED_MODEL_H_ +#define TENSORFLOW_COMPILER_MLIR_TFRT_SAVED_MODEL_SAVED_MODEL_H_ + +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" +#include "tensorflow/core/platform/status.h" +#include "tfrt/core_runtime/tensor_handle.h" +#include "tfrt/support/aligned_buffer.h" + +namespace tfrt { +class CoreRuntime; +} + +namespace mlir { +class ModuleOp; +} + +namespace tensorflow { + +struct TFRTSavedModelCompileOptions { + // TODO(tf-runtime-team): Ideally, compiler should make the decision where + // to place the variable. + std::string variable_device = "cpu"; + std::string default_device = "cpu"; + + // Enable compiler optimization in TFRT dialect. + bool enable_optimizer = true; + + // Force data format for all layout sensitive operations, eg. setting it to + // "NHWC" will changes all data format in the graph to "NHWC" by inserting + // or removing related tf.Transpose op. Currently the supported formats are + // "NHWC" and "NCHW". + // + // TODO(tf-runtime-team): Ideally compiler should figure out whether the + // data format should be changed, instead of controlled by users. + std::string force_data_format; +}; + +// Map signatures (eg. input/output names, variables) for each function. +void MapFunctionSignaturesFromTFSavedModelMLIR( + mlir::ModuleOp module, + llvm::function_ref> + input_names_and_devices, + llvm::ArrayRef output_names, + llvm::ArrayRef global_tensors)> + map_fn); + +// Compile MLIR in TF saved model dialect into BEF. +Status CompileTFSavedModelMLIRToBEF(const TFRTSavedModelCompileOptions& options, + mlir::ModuleOp module, + tfrt::AlignedBuffer<8>* bef_buffer); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_MLIR_TFRT_SAVED_MODEL_SAVED_MODEL_H_ diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD new file mode 100644 index 00000000000..d4269c336e9 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -0,0 +1,49 @@ +load("//tensorflow:tensorflow.bzl", "tf_cc_binary") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") + +licenses(["notice"]) + +cc_library( + name = "cubin_creator", + srcs = ["cubin_creator.cc"], + hdrs = ["cubin_creator.h"], + copts = if_cuda(["-DGOOGLE_CUDA=1"]), + deps = [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@llvm-project//llvm:support", + "@llvm-project//mlir:AllPassesAndDialects", + "@llvm-project//mlir:GPUDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + "@llvm-project//mlir:Parser", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:TargetNVVMIR", + "@llvm-project//mlir:Transforms", + "//tensorflow/compiler/mlir/xla:hlo", + "//tensorflow/compiler/mlir/xla:lhlo", + "//tensorflow/compiler/mlir/xla:xla_legalize_tf", + "//tensorflow/compiler/mlir/xla:xla_materialize_broadcasts", # buildcleaner: keep + "//tensorflow/compiler/mlir/xla:xla_unfuse_batch_norm", # buildcleaner: keep + "//tensorflow/compiler/xla:debug_options_flags", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/service/gpu:stream_executor_util", + "//tensorflow/compiler/xla/service/gpu:target_constants", + "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend", + "//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering", + "//tensorflow/core:cuda_libdevice_path", + "//tensorflow/core:lib", + ] + if_cuda(["//tensorflow/stream_executor/gpu:asm_compiler"]), +) + +tf_cc_binary( + name = "tf_to_cubin", + srcs = ["tf_to_cubin.cc"], + deps = [ + ":cubin_creator", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + ], +) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc new file mode 100644 index 00000000000..b1c4b1beae1 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc @@ -0,0 +1,264 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +//===- cubin_creator.cc -----------------------------------------*- C++ -*-===// +// +// This file implements the function to compile a TF kernel function to a cubin. +// +//===----------------------------------------------------------------------===// +#include "tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h" + +#include +#include +#include + +#include "absl/memory/memory.h" +#include "absl/strings/escaping.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Parser.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "mlir/Target/NVVMIR.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h" +#include "tensorflow/compiler/mlir/xla/transforms/passes.h" +#include "tensorflow/compiler/mlir/xla/transforms/rewriters.h" +#include "tensorflow/compiler/xla/debug_options_flags.h" +#include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h" +#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" +#include "tensorflow/compiler/xla/service/gpu/target_constants.h" +#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h" +#include "tensorflow/core/platform/cuda_libdevice_path.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/path.h" +#if GOOGLE_CUDA +#include "tensorflow/stream_executor/gpu/asm_compiler.h" +#endif + +namespace { +using tensorflow::Status; +using xla::InternalError; +using xla::StatusOr; + +StatusOr GetLibdeviceDir( + const xla::HloModuleConfig& hlo_module_config) { + for (const std::string& cuda_root : tensorflow::CandidateCudaRoots( + hlo_module_config.debug_options().xla_gpu_cuda_data_dir())) { + std::string libdevice_dir = + tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice"); + VLOG(2) << "Looking for libdevice at " << libdevice_dir; + if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) { + VLOG(2) << "Found libdevice dir " << libdevice_dir; + return libdevice_dir; + } + } + return InternalError( + "Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice"); +} + +struct MaterializeBroadcastsPass + : public mlir::PassWrapper { + void runOnFunction() override { + mlir::ConversionTarget conversionTarget(getContext()); + mlir::OwningRewritePatternList conversionPatterns; + + // Consider the xla_hlo dialect legal for tests. + conversionTarget.addLegalDialect(); + // The conversion uses helpers from the Standard dialect. + conversionTarget.addLegalDialect(); + + mlir::xla_hlo::SetupMaterializeBroadcastsLegality(&getContext(), + &conversionTarget); + mlir::xla_hlo::PopulateMaterializeBroadcastsPatterns(&getContext(), + &conversionPatterns); + + if (failed(applyPartialConversion(getFunction(), conversionTarget, + conversionPatterns))) { + return signalPassFailure(); + } + } +}; + +struct UnfuseBatchNormPass + : public mlir::PassWrapper { + void runOnFunction() override { + mlir::OwningRewritePatternList patterns; + mlir::xla_hlo::PopulateUnfuseBatchNormPatterns(&getContext(), &patterns); + mlir::applyPatternsAndFoldGreedily(getOperation(), patterns); + } +}; + +Status LowerTfOpToLhloWithDynamicShapes(mlir::ModuleOp module) { + mlir::PassManager pm(module.getContext()); + auto enable_if_vlog_is_on = [](mlir::Pass* pass, mlir::Operation* op) { + return VLOG_IS_ON(1); + }; + pm.enableIRPrinting(/*shouldPrintBeforePass=*/{}, + /*shouldPrintAfterPass=*/enable_if_vlog_is_on, + /*printModuleScope=*/false, + /*printAfterOnlyOnChange=*/false, llvm::dbgs()); + pm.addNestedPass(mlir::xla_hlo::createLegalizeTFPass(false)); + pm.addNestedPass( + absl::make_unique()); + pm.addNestedPass(absl::make_unique()); + pm.addPass(mlir::xla_hlo::createLegalizeToLhloPass()); + pm.addNestedPass(mlir::xla_lhlo::createLhloCopyRemovalPass()); + + if (failed(pm.run(module))) { + return InternalError("Lowering TF to LHLO failed."); + } + return Status::OK(); +} + +struct PropagateStaticKnowledge + : public mlir::PassWrapper> { + explicit PropagateStaticKnowledge(mlir::FunctionType type, + llvm::ArrayRef same_shape_) + : func_type(type), same_shape(same_shape_) {} + + void runOnOperation() override { + // We know due to tensorflow ABI that the offset is always 0 and that the + // innermost stride is always 1. To make this visible to the compiler, + // we insert constants into the code and replace usages accordingly. + // We do not change the signature so that we keep a somewhat stable ABI + // that is easy to undertand by tools. + mlir::LLVM::LLVMFuncOp func = getOperation(); + mlir::OpBuilder b(func.getBody()); + auto index_type = func.getArgument(3).getType(); + mlir::Value one = b.create( + func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1)); + mlir::Value zero = b.create( + func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0)); + uint32_t arg_pos = 0; + std::vector positions; + for (mlir::Type arg_type : func_type.getInputs()) { + positions.push_back(arg_pos); + func.getArgument(arg_pos + 2).replaceAllUsesWith(zero); + arg_pos += 3 + arg_type.cast().getRank() * 2; + func.getArgument(arg_pos - 1).replaceAllUsesWith(one); + } + + // If we have knowledge that some arguments have the same shape, we + // can use that here. Simply replace usages of the shape parameters within + // the function body to a single shape parameter. + if (!same_shape.empty()) { + auto first = same_shape.front(); + auto first_offset = positions.at(first); + mlir::ShapedType first_type = + func_type.getInput(first).cast(); + uint32_t rank = first_type.getRank(); + for (auto same : same_shape.drop_front(1)) { + uint32_t same_offset = positions.at(same); + auto same_type = func_type.getInput(same).cast(); + if (same_type.getRank() != rank) { + func.emitOpError() << "same shape constraints on arguments with " + "non-matching shapes: #" + << first << " and #" << same; + signalPassFailure(); + } + + for (uint32_t i = 0; i < 2 * rank; ++i) { + // Replace uses for second arg data with first arg. + auto same_arg = func.getArgument(same_offset + 3 + i); + auto first_arg = func.getArgument(first_offset + 3 + i); + same_arg.replaceAllUsesWith(first_arg); + } + } + } + } + + mlir::FunctionType func_type; + llvm::ArrayRef same_shape; +}; + +Status PropagateStaticShapeKnowledgeToKernel( + mlir::ModuleOp module, llvm::ArrayRef same_shape) { + // Grab the original signature from the single function. + auto func = *module.getBody()->op_begin(); + + mlir::PassManager pm(module.getContext()); + auto enable_if_vlog_is_on = [](mlir::Pass*, mlir::Operation*) { + return VLOG_IS_ON(1); + }; + pm.enableIRPrinting(/*shouldPrintBeforePass=*/{}, + /*shouldPrintAfterPass=*/enable_if_vlog_is_on, + /*printModuleScope=*/false, + /*printAfterOnlyOnChange=*/false, llvm::dbgs()); + auto& kernel_pm = pm.nest<::mlir::gpu::GPUModuleOp>(); + kernel_pm.addNestedPass( + absl::make_unique(func.getType(), same_shape)); + + if (failed(pm.run(module))) { + return InternalError("Static knowledge propagation failed."); + } + return Status::OK(); +} +} // namespace + +StatusOr> tensorflow::kernel_gen::GenerateCubinForTfCode( + llvm::StringRef tf_code, std::pair compute_capability, + llvm::ArrayRef tile_sizes, llvm::ArrayRef same_shape, + llvm::ArrayRef unroll_factors) { + mlir::MLIRContext context; + context.allowUnregisteredDialects(); // TODO(b/152572127) + mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context); + + TF_RETURN_IF_ERROR(LowerTfOpToLhloWithDynamicShapes(module.get())); + TF_RETURN_IF_ERROR( + xla::mlir_gpu::LowerLHLOToGPU(module.get(), tile_sizes, unroll_factors, + /*collapseParallelLoops=*/false)); + TF_RETURN_IF_ERROR(xla::mlir_gpu::LowerKernelBodiesToNVVM(module.get())); + TF_RETURN_IF_ERROR( + PropagateStaticShapeKnowledgeToKernel(module.get(), same_shape)); + + mlir::OwningModuleRef kernel_module = + xla::mlir_gpu::ExtractKernelModule(*module).ValueOrDie(); + auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module); + if (!llvmModule) { + return InternalError("Could not translate MLIR module to NVVM"); + } + + llvmModule->setModuleIdentifier("acme"); + llvmModule->setDataLayout(xla::gpu::nvptx::kDataLayout); + + xla::HloModuleConfig config; + config.set_debug_options(xla::GetDebugOptionsFromFlags()); + + TF_ASSIGN_OR_RETURN(std::string libdevice_dir, GetLibdeviceDir(config)); + TF_ASSIGN_OR_RETURN(std::string ptx, xla::gpu::nvptx::CompileToPtx( + llvmModule.get(), compute_capability, + config, libdevice_dir)); + VLOG(1) << ptx; + +#if GOOGLE_CUDA + return tensorflow::se::CompileGpuAsm( + std::get<0>(compute_capability), std::get<1>(compute_capability), + ptx.c_str(), xla::gpu::PtxOptsFromConfig(config)); +#else + return InternalError( + "GOOGLE_CUDA not defined. Did you specify --config=cuda ?"); +#endif +} diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h new file mode 100644 index 00000000000..47626ba9d0d --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +//===- cubin_creator.h ------------------------------------------*- C++ -*-===// +// +// This file declares the function to compile a TF kernel function to a cubin. +// +//===----------------------------------------------------------------------===// +#ifndef TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_CUBIN_CREATOR_H_ +#define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_CUBIN_CREATOR_H_ + +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace tensorflow { +namespace kernel_gen { +xla::StatusOr> GenerateCubinForTfCode( + llvm::StringRef tf_code, + std::pair compute_capability = {7, 5}, + llvm::ArrayRef tile_sizes = {16, 64}, + llvm::ArrayRef same_shape = {}, + llvm::ArrayRef unroll_factors = {}); +} // namespace kernel_gen +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_CUBIN_CREATOR_H_ diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc new file mode 100644 index 00000000000..8edc567e777 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc @@ -0,0 +1,118 @@ +// Copyright 2020 The TensorFlow Runtime Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//===- tf_to_cubin.cc -------------------------------------------*- C++ -*-===// +// +// This file implements the entry point to compile a tf op to a cubin file. +// +//===----------------------------------------------------------------------===// +#include +#include +#include + +#include "absl/strings/numbers.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace { +bool ParseStringList(std::string string_list, std::vector* result) { + result->clear(); + uint32_t item; + auto items = absl::StrSplit(string_list, ','); + for (const auto& item_str : items) { + if (!absl::SimpleAtoi(item_str, &item)) { + LOG(ERROR) << "Expected token " << item_str << " to be an integer"; + return false; + } + result->push_back(item); + } + return true; +} +} // namespace + +int main(int argc, char** argv) { + std::string output_file = "foo.bin"; + int32_t architecture = 50; + std::vector tile_sizes; + std::vector unroll_factors; + std::vector same_shape; + + auto parse_tile_sizes = [&tile_sizes](std::string tile_sizes_str) { + if (!ParseStringList(tile_sizes_str, &tile_sizes)) { + return false; + } + // Initialize with the default. + if (tile_sizes.empty()) { + tile_sizes.push_back(16); + tile_sizes.push_back(64); + } + return true; + }; + + auto parse_unroll_factors = + [&unroll_factors](std::string unroll_factors_str) { + return ParseStringList(unroll_factors_str, &unroll_factors); + }; + + auto parse_same_shape = [&same_shape](std::string same_shape_str) { + return ParseStringList(same_shape_str, &same_shape); + }; + + std::vector flag_list = { + tensorflow::Flag("output", &output_file, "output file"), + tensorflow::Flag("arch", &architecture, + "target architecture (e.g. 50 for sm_50)"), + tensorflow::Flag("tile_sizes", parse_tile_sizes, "16,64", + "tile sizes to use"), + tensorflow::Flag("unroll_factors", parse_unroll_factors, "", + "factors to unroll by, separated by commas"), + tensorflow::Flag("same_shape", parse_same_shape, "", + "arguments with same shape, separated by commas"), + }; + bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); + tensorflow::port::InitMain("usage", &argc, &argv); + if (!parse_ok) { + return 1; + } + + std::pair compute_capability(architecture / 10, + architecture % 10); + + auto cubin = tensorflow::kernel_gen::GenerateCubinForTfCode( + argv[1], compute_capability, tile_sizes, same_shape, unroll_factors); + + if (!cubin.ok()) { + LOG(ERROR) << cubin.status(); + return 1; + } + + std::vector cubin_data = cubin.ConsumeValueOrDie(); + + auto status = tensorflow::WriteStringToFile( + tensorflow::Env::Default(), output_file, + absl::string_view{reinterpret_cast(cubin_data.data()), + cubin_data.size()}); + + if (!status.ok()) { + LOG(ERROR) << status; + return 1; + } + + return 0; +} diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index e4309d5eef0..590595a668f 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -23,7 +23,6 @@ package_group( "//tensorflow/compiler/xla/...", "//third_party/iree/...", "//third_party/mlir_edge/...", - "//third_party/tf_runtime/tools/tf_kernel_gen/...", ], ) @@ -39,7 +38,7 @@ filegroup( "ir/lhlo_ops.td", "@llvm-project//mlir:OpBaseTdFiles", "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", ], ) @@ -241,8 +240,8 @@ cc_library( "@llvm-project//llvm:support", "@llvm-project//mlir:IR", "@llvm-project//mlir:LinalgOps", - "@llvm-project//mlir:LoopOps", "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Transforms", ], @@ -279,8 +278,8 @@ cc_library( "@llvm-project//mlir:GPUDialect", "@llvm-project//mlir:IR", "@llvm-project//mlir:LinalgOps", - "@llvm-project//mlir:LoopOps", "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Transforms", ], @@ -823,7 +822,7 @@ genrule( name = "operator_writer_inc", srcs = [ "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", "@llvm-project//mlir:include/mlir/IR/OpBase.td", ":ir/hlo_ops.td", ":ir/hlo_ops_base.td", diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc index c685cc296fd..dc801f64ede 100644 --- a/tensorflow/compiler/mlir/xla/hlo_utils.cc +++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc @@ -139,6 +139,10 @@ StatusOr CreateDenseElementsAttrFromLiteral( return CreateDenseAttrFromLiteral(type, literal); case PrimitiveType::U64: return CreateDenseAttrFromLiteral(type, literal); + case PrimitiveType::C64: + return CreateDenseAttrFromLiteral(type, literal); + case PrimitiveType::C128: + return CreateDenseAttrFromLiteral(type, literal); default: return tensorflow::errors::Internal( absl::StrCat("Unsupported type: ", PrimitiveType_Name(element_type))); diff --git a/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc index bc6842a617e..5322668aa2e 100644 --- a/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc +++ b/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc @@ -97,16 +97,12 @@ static Type GetBroadcastType(Type x, Type y, Type element_type, LogicalResult InferBroadcastBinaryOpReturnTypeComponents( MLIRContext* context, Optional location, ValueRange operands, - ArrayRef attributes, Type element_type, + DictionaryAttr attributes, Type element_type, SmallVectorImpl& inferedReturnShapes) { // Find broadcast_dimensions. - DenseIntElementsAttr broadcast_dimensions; - for (auto attr : attributes) { - if (attr.first == "broadcast_dimensions") { - broadcast_dimensions = attr.second.dyn_cast(); - break; - } - } + DenseIntElementsAttr broadcast_dimensions = + attributes.get("broadcast_dimensions") + .dyn_cast_or_null(); ShapedType lhs_type = operands[0].getType().dyn_cast(); ShapedType rhs_type = operands[1].getType().dyn_cast(); @@ -168,7 +164,7 @@ LogicalResult ReifyBroadcastBinaryOpReturnTypeShapes( LogicalResult BroadcastComplexOp::inferReturnTypeComponents( MLIRContext* context, Optional location, ValueRange operands, - ArrayRef attributes, RegionRange regions, + DictionaryAttr attributes, RegionRange regions, SmallVectorImpl& inferedReturnShapes) { ShapedType lhs_type = operands[0].getType().dyn_cast(); if (!lhs_type) { @@ -191,7 +187,7 @@ LogicalResult BroadcastComplexOp::reifyReturnTypeShapes( LogicalResult BroadcastCompareOp::inferReturnTypeComponents( MLIRContext* context, Optional location, ValueRange operands, - ArrayRef attributes, RegionRange regions, + DictionaryAttr attributes, RegionRange regions, SmallVectorImpl& inferedReturnShapes) { Type element_type = IntegerType::get(1, context); return InferBroadcastBinaryOpReturnTypeComponents(context, location, operands, @@ -211,7 +207,7 @@ LogicalResult BroadcastCompareOp::reifyReturnTypeShapes( #define BROADCAST_INFER_SHAPE_TYPE_OP_DEFS(Op) \ LogicalResult Op::inferReturnTypeComponents( \ MLIRContext* context, Optional location, ValueRange operands, \ - ArrayRef attributes, RegionRange regions, \ + DictionaryAttr attributes, RegionRange regions, \ SmallVectorImpl& inferedReturnShapes) { \ return InferBroadcastBinaryOpReturnTypeComponents( \ context, location, operands, attributes, /*element_type=*/nullptr, \ diff --git a/tensorflow/compiler/mlir/xla/ir/chlo_ops.td b/tensorflow/compiler/mlir/xla/ir/chlo_ops.td index a244985c9b5..f9672c1a95a 100644 --- a/tensorflow/compiler/mlir/xla/ir/chlo_ops.td +++ b/tensorflow/compiler/mlir/xla/ir/chlo_ops.td @@ -31,7 +31,7 @@ limitations under the License. include "mlir/IR/OpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" -include "mlir/Interfaces/SideEffects.td" +include "mlir/Interfaces/SideEffectInterfaces.td" include "tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td" def HLOClient_Dialect : Dialect { diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc index cb7372a762c..68eafb8b33e 100644 --- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc +++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc @@ -1170,9 +1170,22 @@ OpFoldResult CopyOp::fold(ArrayRef operands) { return getOperand(); } //===----------------------------------------------------------------------===// OpFoldResult ReverseOp::fold(ArrayRef operands) { + auto input = operand(); + // No dimensions to reverse. - if (dimensions().getNumElements() == 0) return operand(); - return nullptr; + if (dimensions().getNumElements() == 0) return input; + + llvm::SmallVector new_dims; + new_dims.reserve(dimensions().getNumElements()); + + auto shaped_type = input.getType().cast(); + for (auto dim : dimensions().getValues()) { + if (shaped_type.getDimSize(dim.getLimitedValue()) != 1) { + return nullptr; + } + } + + return input; } //===----------------------------------------------------------------------===// @@ -1240,7 +1253,7 @@ static LogicalResult Verify(SelectOp op) { // the return type based on operand type. LogicalResult SelectOp::inferReturnTypes( MLIRContext*, Optional location, ValueRange operands, - ArrayRef attributes, RegionRange regions, + DictionaryAttr attributes, RegionRange regions, SmallVectorImpl& inferredReturnTypes) { auto x_type = operands[1].getType(); auto y_type = operands[2].getType(); diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td index dabf03d3c9f..f78ac7624d2 100644 --- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td +++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td @@ -23,7 +23,7 @@ limitations under the License. include "mlir/IR/OpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" -include "mlir/Interfaces/SideEffects.td" +include "mlir/Interfaces/SideEffectInterfaces.td" include "tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td" include "tensorflow/compiler/mlir/xla/ir/hlo_utils.td" @@ -95,6 +95,7 @@ def HLO_CreateTokenOp : HLO_Op<"create_token", [NoSideEffect]> { // XLA unary elementwise op definitions. //===----------------------------------------------------------------------===// // See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions + class HLO_UnaryElementwiseOp traits, Type TensorType>: HLO_Op { @@ -103,8 +104,7 @@ class HLO_UnaryElementwiseOp traits, let extraClassDeclaration = [{ static LogicalResult inferReturnTypeComponents( MLIRContext* context, Optional location, - ValueRange operands, ArrayRef attributes, - RegionRange regions, + ValueRange operands, DictionaryAttr attributes, RegionRange regions, SmallVectorImpl& inferedReturnShapes) { return failure(); } @@ -161,6 +161,16 @@ def HLO_Expm1Op: HLO_UnaryElementwiseOp<"exponential_minus_one", def HLO_FloorOp: HLO_UnaryElementwiseOp<"floor", [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_FloorOp; +def HLO_ImagOp: HLO_Op< + "imag", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_ImagOp { + let builders = [OpBuilder< + "OpBuilder &, OperationState &tblgen_state, Value val">]; + + let arguments = (ins HLO_ComplexTensor); + let results = (outs HLO_FpTensor); + let hasFolder = 1; +} + def HLO_IsFiniteOp: HLO_UnaryElementwiseOp<"is_finite", [NoSideEffect, SameOperandsAndResultShape], HLO_Tensor>, BASE_HLO_IsFiniteOp { @@ -188,6 +198,16 @@ def HLO_PopulationCountOp: HLO_UnaryElementwiseOp<"popcnt", [NoSideEffect, SameOperandsAndResultType], HLO_IntTensor>, BASE_HLO_PopulationCountOp; +def HLO_RealOp: HLO_Op< + "real", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_RealOp { + let builders = [OpBuilder< + "OpBuilder &, OperationState &tblgen_state, Value val">]; + + let arguments = (ins HLO_ComplexTensor); + let results = (outs HLO_FpTensor); + let hasFolder = 1; +} + def HLO_RoundOp: HLO_UnaryElementwiseOp<"round_nearest_afz", [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_RoundOp; @@ -209,50 +229,14 @@ def HLO_SqrtOp: HLO_UnaryElementwiseOp<"sqrt", BASE_HLO_SqrtOp; def HLO_TanhOp: HLO_UnaryElementwiseOp<"tanh", - [ResultsAreFloatLike, NoSideEffect, SameOperandsAndResultType], + [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>, BASE_HLO_TanhOp; -//===----------------------------------------------------------------------===// -// XLA complex unary elementwise op definitions. -//===----------------------------------------------------------------------===// -// See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions - -def HLO_ComplexOp: HLO_Op<"complex", - [NoSideEffect, SameOperandsElementType, SameOperandsAndResultShape]>, - BASE_HLO_ComplexOp { - let builders = [OpBuilder< - "OpBuilder &, OperationState &tblgen_state, Value lhs, Value rhs">]; - - let arguments = (ins HLO_FpTensor:$lhs, HLO_FpTensor:$rhs); - let results = (outs HLO_ComplexTensor); - let hasFolder = 1; -} - -def HLO_ImagOp: HLO_Op< - "imag", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_ImagOp { - let builders = [OpBuilder< - "OpBuilder &, OperationState &tblgen_state, Value val">]; - - let arguments = (ins HLO_ComplexTensor); - let results = (outs HLO_FpTensor); - let hasFolder = 1; -} - -def HLO_RealOp: HLO_Op< - "real", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_RealOp { - let builders = [OpBuilder< - "OpBuilder &, OperationState &tblgen_state, Value val">]; - - let arguments = (ins HLO_ComplexTensor); - let results = (outs HLO_FpTensor); - let hasFolder = 1; -} - //===----------------------------------------------------------------------===// // XLA binary elementwise op definitions. //===----------------------------------------------------------------------===// - // See https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations + class HLO_BinaryElementwiseOp traits> : HLO_Op { let arguments = (ins @@ -269,7 +253,7 @@ class HLO_BinaryElementwiseOp traits> : let extraClassDeclaration = [{ static LogicalResult inferReturnTypeComponents( MLIRContext* context, Optional location, ValueRange operands, - ArrayRef attributes, RegionRange regions, + DictionaryAttr attributes, RegionRange regions, SmallVectorImpl& inferedReturnShapes) { return failure(); } @@ -293,6 +277,17 @@ def HLO_AddOp : HLO_BinaryElementwiseOp<"add", def HLO_Atan2Op : HLO_BinaryElementwiseOp<"atan2", [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_Atan2Op; +def HLO_ComplexOp: HLO_Op<"complex", + [NoSideEffect, SameOperandsElementType, SameOperandsAndResultShape]>, + BASE_HLO_ComplexOp { + let builders = [OpBuilder< + "OpBuilder &, OperationState &tblgen_state, Value lhs, Value rhs">]; + + let arguments = (ins HLO_FpTensor:$lhs, HLO_FpTensor:$rhs); + let results = (outs HLO_ComplexTensor); + let hasFolder = 1; +} + def HLO_DivOp : HLO_BinaryElementwiseOp<"divide", [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_DivOp { } diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td index c087ffd1f40..b5de675f13f 100644 --- a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td +++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td @@ -150,15 +150,6 @@ class BASE_HLO_ClzOp { }]; } -class BASE_HLO_ComplexOp { - string summary = "Complex operator"; - - string description = [{ - Performs element-wise conversion of a pair of real and imaginary values to - a complex value. - }]; -} - class BASE_HLO_ConvertOp { string summary = "Convert operator"; @@ -400,6 +391,15 @@ class BASE_HLO_AddOp { }]; } +class BASE_HLO_ComplexOp { + string summary = "Complex operator"; + + string description = [{ + Performs element-wise conversion of a pair of real and imaginary values to + a complex value. + }]; +} + class BASE_HLO_DivOp { string summary = "Division operator"; diff --git a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td index 3abd117f570..db75bbd1f67 100644 --- a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td +++ b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td @@ -19,7 +19,7 @@ limitations under the License. #define LHLO_OPS include "mlir/IR/OpBase.td" -include "mlir/Interfaces/SideEffects.td" +include "mlir/Interfaces/SideEffectInterfaces.td" include "tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td" def LHLO_Dialect : Dialect { @@ -92,10 +92,20 @@ def LHLO_CosOp: LHLO_UnaryElementwiseOp<"cosine">, BASE_HLO_CosOp; def LHLO_ExpOp: LHLO_UnaryElementwiseOp<"exponential">, BASE_HLO_ExpOp; +def LHLO_ImagOp: LHLO_Op<"imag", [SameOperandsShape]>, BASE_HLO_ImagOp { + let arguments = (ins Arg:$input, + Arg:$output); +} + def LHLO_LogOp: LHLO_UnaryElementwiseOp<"log">, BASE_HLO_LogOp; def LHLO_NegOp: LHLO_UnaryElementwiseOp<"negate">, BASE_HLO_NegOp; +def LHLO_RealOp: LHLO_Op<"real", [SameOperandsShape]>, BASE_HLO_RealOp { + let arguments = (ins Arg:$input, + Arg:$output); +} + def LHLO_RsqrtOp: LHLO_UnaryElementwiseOp<"rsqrt">, BASE_HLO_RsqrtOp; def LHLO_SqrtOp: LHLO_UnaryElementwiseOp<"sqrt">, BASE_HLO_SqrtOp; @@ -106,27 +116,6 @@ def LHLO_SinOp: LHLO_UnaryElementwiseOp<"sine">, BASE_HLO_SinOp; def LHLO_TanhOp: LHLO_UnaryElementwiseOp<"tanh">, BASE_HLO_TanhOp; -//===----------------------------------------------------------------------===// -// XLA complex unary elementwise op definitions. -//===----------------------------------------------------------------------===// -// See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions - -def LHLO_ComplexOp: LHLO_Op<"complex", [SameOperandsShape]>, BASE_HLO_ComplexOp { - let arguments = (ins Arg:$lhs, - Arg:$rhs, - Arg:$output); -} - -def LHLO_ImagOp: LHLO_Op<"imag", [SameOperandsShape]>, BASE_HLO_ImagOp { - let arguments = (ins Arg:$input, - Arg:$output); -} - -def LHLO_RealOp: LHLO_Op<"real", [SameOperandsShape]>, BASE_HLO_RealOp { - let arguments = (ins Arg:$input, - Arg:$output); -} - //===----------------------------------------------------------------------===// // XLA binary elementwise op definitions. //===----------------------------------------------------------------------===// @@ -144,6 +133,12 @@ class LHLO_BinaryElementwiseOp traits> : def LHLO_AddOp : LHLO_BinaryElementwiseOp<"add", []>, BASE_HLO_AddOp; +def LHLO_ComplexOp: LHLO_Op<"complex", [SameOperandsShape]>, BASE_HLO_ComplexOp { + let arguments = (ins Arg:$lhs, + Arg:$rhs, + Arg:$output); +} + def LHLO_DivOp : LHLO_BinaryElementwiseOp<"divide", []>, BASE_HLO_DivOp; def LHLO_MaxOp : LHLO_BinaryElementwiseOp<"maximum", []>, BASE_HLO_MaxOp; diff --git a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc index 99d1da74fc5..cc334d8654f 100644 --- a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc +++ b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc @@ -56,6 +56,20 @@ static mlir::DenseIntElementsAttr GetI64ElementsAttr( return mlir::DenseIntElementsAttr::get(ty, mlir_values); } +static mlir::DenseIntElementsAttr ConvertPadding( + absl::Span> padding, + mlir::Builder* builder) { + llvm::SmallVector elements; + elements.reserve(padding.size() * 2); + for (const auto& vals : padding) { + elements.push_back(vals.first); + elements.push_back(vals.second); + } + auto ty = mlir::RankedTensorType::get( + {static_cast(padding.size()), 2}, builder->getIntegerType(64)); + return mlir::DenseIntElementsAttr::get(ty, elements); +} + MlirHloBuilder::~MlirHloBuilder() = default; StatusOr MlirHloBuilder::MakeXlaOp(mlir::Value val) { @@ -79,6 +93,31 @@ XlaOp MlirHloBuilder::ConstantLiteral(const LiteralSlice& literal) { }); } +StatusOr MlirHloBuilder::ConvGeneralDilatedInternal( + const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window, + absl::Span window_strides, + absl::Span> padding, + absl::Span lhs_dilation, absl::Span rhs_dilation, + const ConvolutionDimensionNumbers& dimension_numbers, + int64 feature_group_count, int64 batch_group_count, + const PrecisionConfig* precision_config) { + TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType( + shape, builder_)); + mlir::ArrayAttr config_attr; + if (precision_config) + config_attr = ConvertPrecisionConfig(precision_config, &builder_); + auto op = builder_.create( + loc_, ty, GetValue(lhs), GetValue(rhs), + GetI64ElementsAttr(window_strides, &builder_), + ConvertPadding(padding, &builder_), + GetI64ElementsAttr(lhs_dilation, &builder_), + GetI64ElementsAttr(rhs_dilation, &builder_), + ConvertConvDimensionNumbers(dimension_numbers, &builder_), + builder_.getI64IntegerAttr(feature_group_count), + builder_.getI64IntegerAttr(batch_group_count), config_attr); + return MakeXlaOp(op); +} + StatusOr MlirHloBuilder::TransposeInternal( const Shape& shape, XlaOp operand, absl::Span permutation) { TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType( diff --git a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h index dbcb6856971..5a84d60cdc2 100644 --- a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h +++ b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h @@ -110,6 +110,16 @@ class MlirHloBuilder : public XlaBuilder { private: XlaOp ConstantLiteral(const LiteralSlice& literal) override; + StatusOr ConvGeneralDilatedInternal( + const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window, + absl::Span window_strides, + absl::Span> padding, + absl::Span lhs_dilation, + absl::Span rhs_dilation, + const ConvolutionDimensionNumbers& dimension_numbers, + int64 feature_group_count, int64 batch_group_count, + const PrecisionConfig* precision_config) override; + StatusOr TransposeInternal( const Shape& shape, XlaOp operand, absl::Span permutation) override; diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index a1fb6b559e3..228a26b5abd 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -933,6 +933,8 @@ StatusOr CreateLiteralFromAttr(ElementsAttr attr) { ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::U16, uint16) ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::U32, uint32) ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::U64, uint64) + ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::C64, std::complex) + ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::C128, std::complex) case xla::PrimitiveType::F16: { llvm::SmallVector values; values.reserve(attr.getNumElements()); @@ -984,6 +986,21 @@ LogicalResult ConvertToHloModule::Lower( return LowerFunctionCall(&call_op, builder, &value_map); } + if (auto op = dyn_cast(inst)) { + Value operand = op.getOperand(); + auto ty = operand.getType().dyn_cast(); + // If this was a cast from a static shaped tensors, then it is a noop for + // export to HLO and we can use the operand. + if (!ty || !ty.hasStaticShape()) { + inst->emitOpError() + << "requires static shaped operand for HLO translation"; + return failure(); + } + + value_map[op.getResult()] = value_map[operand]; + return success(); + } + // TODO(jpienaar): This doesn't support layouts yet. if (matchPattern(inst, m_Constant(&const_attr))) { auto literal_or = CreateLiteralFromAttr(const_attr); diff --git a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir index 262533bbf08..53296b257ae 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir +++ b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir @@ -1,4 +1,4 @@ -// RUN: xla-opt -hlo-legalize-to-lhlo %s -o - | FileCheck %s --dump-input-on-failure +// RUN: xla-opt -hlo-legalize-to-lhlo -buffer-placement %s -o - | FileCheck %s --dump-input-on-failure // CHECK-LABEL: func @attrs func @attrs_copy(%operand: memref<2x2xf32>, %result: memref<2x2xf32>) { @@ -13,33 +13,42 @@ func @attrs_copy(%operand: memref<2x2xf32>, %result: memref<2x2xf32>) { // ----- +func @return_func(%arg0: tensor<4xf32>) -> tensor<4xf32> { + return %arg0 : tensor<4xf32> +} +// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]], %[[RESULT:.*]]: [[TYPE]]) +// CHECK-NEXT: "xla_lhlo.copy"(%[[ARG0]], %[[RESULT]]) : ([[TYPE]], [[TYPE]]) -> () +// CHECK-NEXT: "xla_lhlo.terminator"() : () -> () + +// ----- + // CHECK-LABEL: func @func_op_long func @func_op_long(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { - // CHECK: (%[[NEW_ARG0:.*]]: memref<4xf32>, %[[NEW_ARG1:.*]]: memref<4xf32>, %[[RESULT:.*]]: memref<4xf32>) - // CHECK-NEXT: %[[MUL_RESULT:.*]] = alloc() {temp = true} : memref<4xf32> - // CHECK-NEXT: %[[SUB_RESULT:.*]] = alloc() {temp = true} : memref<4xf32> - // CHECK-NEXT: %[[MIN_RESULT:.*]] = alloc() {temp = true} : memref<4xf32> - // CHECK-NEXT: %[[ADD_RESULT:.*]] = alloc() {temp = true} : memref<4xf32> - // CHECK-NEXT: %[[MAX_RESULT:.*]] = alloc() {temp = true} : memref<4xf32> %1 = xla_hlo.maximum %arg0, %arg1 : tensor<4xf32> - // CHECK-NEXT: "xla_lhlo.maximum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MAX_RESULT]]) %2 = xla_hlo.add %arg0, %1 : tensor<4xf32> - // CHECK-NEXT: "xla_lhlo.add"(%[[NEW_ARG0]], %[[MAX_RESULT]], %[[ADD_RESULT]]) %3 = xla_hlo.minimum %arg0, %arg1 : tensor<4xf32> - // CHECK-NEXT: "xla_lhlo.minimum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MIN_RESULT]]) %4 = xla_hlo.subtract %arg1, %3 : tensor<4xf32> - // CHECK-NEXT: "xla_lhlo.subtract"(%[[NEW_ARG1]], %[[MIN_RESULT]], %[[SUB_RESULT]]) %5 = xla_hlo.multiply %2, %4 : tensor<4xf32> - // CHECK-NEXT: "xla_lhlo.multiply"(%[[ADD_RESULT]], %[[SUB_RESULT]], %[[MUL_RESULT]]) - // CHECK-NEXT: dealloc %[[MAX_RESULT]] : memref<4xf32> - // CHECK-NEXT: dealloc %[[ADD_RESULT]] : memref<4xf32> - // CHECK-NEXT: dealloc %[[MIN_RESULT]] : memref<4xf32> - // CHECK-NEXT: dealloc %[[SUB_RESULT]] : memref<4xf32> - // CHECK-NEXT: "xla_lhlo.copy"(%[[MUL_RESULT]], %[[RESULT]]) : (memref<4xf32>, memref<4xf32>) -> () - // CHECK-NEXT: dealloc %[[MUL_RESULT]] : memref<4xf32> return %5 : tensor<4xf32> - // CHECK-NEXT: "xla_lhlo.terminator"() : () -> () } +// CHECK: (%[[NEW_ARG0:.*]]: memref<4xf32>, %[[NEW_ARG1:.*]]: memref<4xf32>, %[[RESULT:.*]]: memref<4xf32>) +// CHECK-NEXT: %[[MAX_RESULT:.*]] = alloc() : memref<4xf32> +// CHECK-NEXT: "xla_lhlo.maximum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MAX_RESULT]]) +// CHECK-NEXT: %[[ADD_RESULT:.*]] = alloc() : memref<4xf32> +// CHECK-NEXT: "xla_lhlo.add"(%[[NEW_ARG0]], %[[MAX_RESULT]], %[[ADD_RESULT]]) +// CHECK-NEXT: dealloc %[[MAX_RESULT]] : memref<4xf32> +// CHECK-NEXT: %[[MIN_RESULT:.*]] = alloc() : memref<4xf32> +// CHECK-NEXT: "xla_lhlo.minimum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MIN_RESULT]]) +// CHECK-NEXT: %[[SUB_RESULT:.*]] = alloc() : memref<4xf32> +// CHECK-NEXT: "xla_lhlo.subtract"(%[[NEW_ARG1]], %[[MIN_RESULT]], %[[SUB_RESULT]]) +// CHECK-NEXT: dealloc %[[MIN_RESULT]] : memref<4xf32> +// CHECK-NEXT: %[[MUL_RESULT:.*]] = alloc() : memref<4xf32> +// CHECK-NEXT: "xla_lhlo.multiply"(%[[ADD_RESULT]], %[[SUB_RESULT]], %[[MUL_RESULT]]) +// CHECK-NEXT: dealloc %[[SUB_RESULT]] : memref<4xf32> +// CHECK-NEXT: dealloc %[[ADD_RESULT]] : memref<4xf32> +// CHECK-NEXT: "xla_lhlo.copy"(%[[MUL_RESULT]], %[[RESULT]]) : (memref<4xf32>, memref<4xf32>) -> () +// CHECK-NEXT: dealloc %[[MUL_RESULT]] : memref<4xf32> +// CHECK-NEXT: "xla_lhlo.terminator"() : () -> () // ----- @@ -47,20 +56,20 @@ func @func_op_long(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> func @fusion(%multiplier: memref<2x2xf32>, %summand_1: memref<2x2xf32>, %summand_2: memref<2x2xf32>, %result: memref<2x2xf32>) { // CHECK: (%{{.*}}: {{.*}}, {{.*}}: {{.*}}, {{.*}}: {{.*}}, %[[RESULT:.*]]: {{.*}}) - // CHECK-NEXT: %[[MUL_RESULT:.*]] = alloc() {temp = true} : memref<2x2xf32> - // CHECK-NEXT: %[[ADD_RESULT:.*]] = alloc() {temp = true} : memref<2x2xf32> + // CHECK-NEXT: %[[ADD_RESULT:.*]] = alloc() : memref<2x2xf32> %tensor_summand_1 = tensor_load %summand_1 : memref<2x2xf32> %tensor_summand_2 = tensor_load %summand_2 : memref<2x2xf32> %sum = "xla_hlo.add"(%tensor_summand_1, %tensor_summand_2) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> // CHECK-NEXT: "xla_lhlo.add"(%{{.*}}, %{{.*}}, %[[ADD_RESULT]]) + // CHECK-NEXT: %[[MUL_RESULT:.*]] = alloc() : memref<2x2xf32> %tensor_multiplier = tensor_load %multiplier : memref<2x2xf32> %tensor_result = "xla_hlo.multiply"(%sum, %tensor_multiplier) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> // CHECK-NEXT: "xla_lhlo.multiply"(%[[ADD_RESULT]], %{{.*}}, %[[MUL_RESULT]]) + // CHECK-NEXT: dealloc %[[ADD_RESULT]] : memref<2x2xf32> // CHECK-NEXT: "xla_lhlo.copy"(%[[MUL_RESULT]], %[[RESULT]]) tensor_store %tensor_result, %result : memref<2x2xf32> - // CHECK-NEXT: dealloc %[[ADD_RESULT]] : memref<2x2xf32> // CHECK-NEXT: dealloc %[[MUL_RESULT]] : memref<2x2xf32> // CHECK-NEXT: "xla_lhlo.terminator"() : () -> () "xla_lhlo.terminator"() : () -> () diff --git a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir index aa949a01388..a856ee5e83c 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir +++ b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir @@ -530,3 +530,15 @@ func @convert_f64_to_f32(%input: tensor<2x2xf64>) -> tensor<2x2xf32> { // CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f64): // CHECK-NEXT: %[[RESULT:.*]] = fptrunc %[[OPERAND_IN]] : f64 to f32 // CHECK-NEXT: linalg.yield %[[RESULT]] : f32 + +// ----- + +// CHECK-LABEL: func @convert_f32_to_i32 +func @convert_f32_to_i32(%input: tensor<2x2xf32>) -> tensor<2x2xi32> { + %result = "xla_hlo.convert"(%input) : (tensor<2x2xf32>) -> tensor<2x2xi32> + return %result : tensor<2x2xi32> +} +// CHECK: linalg.generic +// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f32): +// CHECK-NEXT: %[[RESULT:.*]] = fptosi %[[OPERAND_IN]] : f32 to i32 +// CHECK-NEXT: linalg.yield %[[RESULT]] : i32 diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir index cda1dc481a7..6a2b68adac3 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir @@ -8,7 +8,9 @@ // CHECK-SAME: ) { func @main(%value: tensor<2x2xf32>) -> tensor<2x2xf32> { // The only expected instruction is a copy from the input into the output. - // CHECK: %[[OUTPUT:.*]] = std.view %[[ARG1]][][] : memref<16xi8> to memref<2x2xf32> + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C02:.*]] = constant 0 : index + // CHECK: %[[OUTPUT:.*]] = std.view %[[ARG1]][%[[C02]]][] : memref<16xi8> to memref<2x2xf32> // CHECK: xla_lhlo.copy // CHECK-SAME: %[[ARG0]], %[[OUTPUT]] return %value : tensor<2x2xf32> diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir index d2b4d269fef..0660af4ed1c 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir @@ -1,22 +1,24 @@ // RUN: tf-opt %s -xla-legalize-tf -split-input-file -verify-diagnostics +// expected-error@below{{The following operations cannot be legalized: tf.NoOp (count: 1); tf_executor.fetch (count: 1); tf_executor.graph (count: 1); tf_executor.island (count: 1); tf_executor.yield (count: 1). These legalization failure(s) may be due to missing TF to HLO lowerings and/or unsupported attributes, etc.}} +// expected-error@below{{Emitting more detail about one op that failed to legalize...}} func @tf_executor_graph_op() { - // expected-error@+1 {{failed to legalize operation 'tf_executor.graph'}} tf_executor.graph { %0 = tf_executor.island { + // expected-error@+1 {{'tf.NoOp' op is not legalizable}} "tf.NoOp"() {} : () -> () tf_executor.yield } tf_executor.fetch } return - } // ----- +// expected-error@below{{The following operations cannot be legalized: tf.OpA (count: 1). These legalization failure(s) may be due to missing TF to HLO lowerings and/or unsupported attributes, etc.}} func @tf_unknown_op(%arg0: tensor<2xi32>) -> tensor<2xi32> { - // expected-error@+1 {{failed to legalize operation 'tf.OpA'}} + // expected-error@+1 {{'tf.OpA' op is not legalizable}} %0 = "tf.OpA"(%arg0, %arg0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32> return %0: tensor<2xi32> } @@ -27,3 +29,16 @@ func @tf_known_op(%arg0: tensor<2xi32>) -> tensor<2xi32> { %0 = "tf.Add"(%arg0, %arg0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32> return %0: tensor<2xi32> } + +// ----- + +// expected-error@below{{The following operations cannot be legalized: tf.OpA (count: 1); tf.OpB (count: 2). These legalization failure(s) may be due to missing TF to HLO lowerings and/or unsupported attributes, etc.}} +// expected-error@below{{Emitting more detail about one op that failed to legalize...}} +func @tf_unknown_known_mix(%arg0: tensor<2xi32>) -> tensor<2xi32> { + // expected-error@+1 {{'tf.OpA' op is not legalizable}} + %0 = "tf.OpA"(%arg0, %arg0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32> + %1 = "tf.OpB"(%0, %0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32> + %2 = "tf.Add"(%1, %1) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32> + %3 = "tf.OpB"(%2, %2) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32> + return %2: tensor<2xi32> +} diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index e15101a165e..a5353beb772 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -1596,6 +1596,44 @@ func @unhandled_partitioned_call_2(%arg0: tensor, %arg1: tensor<*xi32>) -> return %0, %1 : tensor, tensor } + +//===----------------------------------------------------------------------===// +// ReverseV2 op legalization. +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @reverse_func_32 +func @reverse_func_32(%arg0: tensor<5xi32>) -> tensor<5xi32> { + %axis = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> (tensor<1xi32>) + + // CHECK: [[VAL:%.+]] = "xla_hlo.reverse"(%arg0) {dimensions = dense<0> : tensor<1xi64>} + %reversed = "tf.ReverseV2"(%arg0, %axis) : (tensor<5xi32>, tensor<1xi32>) -> tensor<5xi32> + + // CHECK: return [[VAL]] : tensor<5xi32> + return %reversed : tensor<5xi32> +} + +// CHECK-LABEL: @reverse_func_64 +func @reverse_func_64(%arg0: tensor<5xi32>) -> tensor<5xi32> { + %axis = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> (tensor<1xi64>) + + // CHECK: [[VAL:%.+]] = "xla_hlo.reverse"(%arg0) {dimensions = dense<0> : tensor<1xi64>} + %reversed = "tf.ReverseV2"(%arg0, %axis) : (tensor<5xi32>, tensor<1xi64>) -> tensor<5xi32> + + // CHECK: return [[VAL]] : tensor<5xi32> + return %reversed : tensor<5xi32> +} + +// CHECK-LABEL: @reverse_func_neg +func @reverse_func_neg(%arg0: tensor<5x5xi32>) -> tensor<5x5xi32> { + %axis = "tf.Const"() {value = dense<[-1]> : tensor<1xi32>} : () -> (tensor<1xi32>) + + // CHECK: [[VAL:%.+]] = "xla_hlo.reverse"(%arg0) {dimensions = dense<1> : tensor<1xi64>} + %reversed = "tf.ReverseV2"(%arg0, %axis) : (tensor<5x5xi32>, tensor<1xi32>) -> tensor<5x5xi32> + + // CHECK: return [[VAL]] : tensor<5x5xi32> + return %reversed : tensor<5x5xi32> +} + //===----------------------------------------------------------------------===// // StatefulPartitionedCall op legalization. //===----------------------------------------------------------------------===// @@ -2205,13 +2243,6 @@ func @sin_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> { return %0 : tensor<*xf32> } -// CHECK-LABEL: func @round -func @round(%arg0: tensor<2xf32>) -> tensor<2xf32> { - // CHECK: "xla_hlo.round_nearest_afz"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> - %0 = "tf.Round"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> - return %0 : tensor<2xf32> -} - // CHECK-LABEL: func @rsqrt func @rsqrt(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK: "xla_hlo.rsqrt"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> @@ -3720,11 +3751,11 @@ func @unsorted_segment_max(%data: tensor<8x?x64xf32>, %segment_ids : tensor, %arg1: tensor<16x5xi32>) -> tensor<16x2x5x3xf32> { - // CHECK: "xla_hlo.torch_index_select"(%arg0, %arg1) {batch_dims = 1 : i64, dim = 2 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>) -> tensor<16x2x5x3xf32> +func @gather_v2(%arg0: tensor<16x2x3xf32>, %arg1: tensor<16x5xi32>) -> tensor<16x2x5xf32> { + // CHECK: "xla_hlo.torch_index_select"(%arg0, %arg1) {batch_dims = 1 : i64, dim = 2 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>) -> tensor<16x2x5xf32> %0 = "tf.Const"() { value = dense<[-1]> : tensor<1xi32> } : () -> tensor<1xi32> - %1 = "tf.GatherV2"(%arg0, %arg1, %0) {batch_dims = -1 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>, tensor<1xi32>) -> tensor<16x2x5x3xf32> - return %1 : tensor<16x2x5x3xf32> + %1 = "tf.GatherV2"(%arg0, %arg1, %0) {batch_dims = -1 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>, tensor<1xi32>) -> tensor<16x2x5xf32> + return %1 : tensor<16x2x5xf32> } // CHECK-LABEL: @gather_v2_dynamic @@ -4081,6 +4112,41 @@ func @xla_sharding(%arg0: tensor<4x16xf32>) -> tensor<4x16xf32> { return %0 : tensor<4x16xf32> } +// CHECK-LABEL: inplace_update_one +func @inplace_update_one(%arg0: tensor<8x4xf32>, %arg1: tensor<1x4xf32>, %arg2: tensor<1xi32>) -> tensor<8x4xf32> { + // CHECK-DAG: [[CST:%.+]] = xla_hlo.constant dense<0> + // CHECK-DAG: [[SLICE1:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + // CHECK-DAG: [[SLICE2:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[1, 4]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + // CHECK-DAG: [[RESHAPE1:%.+]] = "xla_hlo.reshape"([[SLICE1]]) + // CHECK-DAG: [[UPDATE:%.+]] = "xla_hlo.dynamic-update-slice"(%arg0, [[SLICE2]], [[RESHAPE1]], [[CST]]) + %0 = "tf.InplaceUpdate"(%arg0, %arg2, %arg1) : (tensor<8x4xf32>, tensor<1xi32>, tensor<1x4xf32>) -> tensor<8x4xf32> + + // CHECK: return [[UPDATE]] + return %0 : tensor<8x4xf32> +} + +// CHECK-LABEL: inplace_update_three +func @inplace_update_three(%arg0: tensor<8x8x4xf32>, %arg1: tensor<3x8x4xf32>, %arg2: tensor<3xi32>) -> tensor<8x8x4xf32> { + // CHECK-DAG: [[CST:%.+]] = xla_hlo.constant dense<0> + // CHECK-DAG: [[SLICE1:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + // CHECK-DAG: [[SLICE2:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<2> : tensor<1xi64>, start_indices = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + // CHECK-DAG: [[SLICE3:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<3> : tensor<1xi64>, start_indices = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + // CHECK-DAG: [[SLICE4:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[1, 8, 4]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + // CHECK-DAG: [[SLICE5:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[2, 8, 4]> : tensor<3xi64>, start_indices = dense<[1, 0, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + // CHECK-DAG: [[SLICE6:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[3, 8, 4]> : tensor<3xi64>, start_indices = dense<[2, 0, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + // CHECK-DAG: [[RESHAPE1:%.+]] = "xla_hlo.reshape"([[SLICE1]]) + // CHECK-DAG: [[RESHAPE2:%.+]] = "xla_hlo.reshape"([[SLICE2]]) + // CHECK-DAG: [[RESHAPE3:%.+]] = "xla_hlo.reshape"([[SLICE3]]) + // CHECK-DAG: [[UPDATE1:%.+]] = "xla_hlo.dynamic-update-slice"(%arg0, [[SLICE4]], [[RESHAPE1]], [[CST]], [[CST]]) + // CHECK-DAG: [[UPDATE2:%.+]] = "xla_hlo.dynamic-update-slice"([[UPDATE1]], [[SLICE5]], [[RESHAPE2]], [[CST]], [[CST]]) + // CHECK-DAG: [[UPDATE3:%.+]] = "xla_hlo.dynamic-update-slice"([[UPDATE2]], [[SLICE6]], [[RESHAPE3]], [[CST]], [[CST]]) + %0 = "tf.InplaceUpdate"(%arg0, %arg2, %arg1) : (tensor<8x8x4xf32>, tensor<3xi32>, tensor<3x8x4xf32>) -> tensor<8x8x4xf32> + + // CHECK: return [[UPDATE3]] : tensor<8x8x4xf32> + return %0 : tensor<8x8x4xf32> +} + + // CHECK-LABEL: xla_dynamic_update_slice func @xla_dynamic_update_slice(%arg0: tensor<4x16xf32>, %arg1: tensor<2x4xf32>, %arg2: tensor<2xi32>) -> tensor<4x16xf32> { // CHECK: [[SLICE0:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<2xi32>) -> tensor<1xi32> @@ -4103,6 +4169,21 @@ func @xla_dynamic_update_slice2(%arg0: tensor<4xf32>, %arg1: tensor<2xf32>, %arg return %0 : tensor<4xf32> } +//===----------------------------------------------------------------------===// +// AllToAll op legalizations. +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func @alltoall_basic +func @alltoall_basic(%input: tensor<10xf32>) -> tensor<10xf32> { + %group_assignment = "tf.Const" () { + value = dense<[[0, 2, 4, 6], [1, 3, 5, 7], [3, 5, 6, 8]]> : tensor<3x4xi32> + } : () -> tensor<3x4xi32> + %result = "tf.AllToAll"(%input, %group_assignment) {T = f32, concat_dimension = 1 : i64, split_count = 2 : i64, split_dimension = 0 : i64} : (tensor<10xf32>, tensor<3x4xi32>) -> tensor<10xf32> + // CHECK: xla_hlo.all_to_all + // CHECK-SAME: replica_groups = dense<{{\[}}[0, 2, 4, 6], [1, 3, 5, 7], [3, 5, 6, 8]]> : tensor<3x4xi64> + return %result : tensor<10xf32> +} + //===----------------------------------------------------------------------===// // Cumsum op legalizations. //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir index 0fc30ed4901..bb8010b520c 100644 --- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir +++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir @@ -411,6 +411,19 @@ func @convert_f32_to_f32(%input: memref<2x2xf32>, %result: memref<2x2xf32>) { // ----- +// CHECK-LABEL: func @convert_f32_to_i32 +func @convert_f32_to_i32(%input: memref<2x2xf32>, %result: memref<2x2xi32>) { + "xla_lhlo.convert"(%input, %result) + : (memref<2x2xf32>, memref<2x2xi32>) -> () + return +} +// CHECK: linalg.generic +// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f32, %[[RESULT_OUT:.*]]: i32): +// CHECK-NEXT: %[[RESULT:.*]] = fptosi %[[OPERAND_IN]] : f32 to i32 +// CHECK-NEXT: linalg.yield %[[RESULT]] : i32 + +// ----- + // CHECK-LABEL: func @cos func @cos(%input: memref<2x2xf32>, %result: memref<2x2xf32>) { "xla_lhlo.cosine"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> () @@ -523,6 +536,48 @@ func @tanh(%input: memref<2x2xf32>, %result: memref<2x2xf32>) { // CHECK-NEXT: %[[RESULT:.*]] = tanh %[[OPERAND_IN]] : f32 // CHECK-NEXT: linalg.yield %[[RESULT]] : f32 +// ----- + +// CHECK-LABEL: func @complex +func @complex(%real: memref<2x2xf32>, + %imag: memref<2x2xf32>, + %cplx: memref<2x2xcomplex>) { + "xla_lhlo.complex"(%real, %imag, %cplx) + : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xcomplex>) -> () + return +} +// CHECK: linalg.generic +// CHECK-NEXT: ^bb0(%[[RE:.*]]: f32, %[[IM:.*]]: f32, %[[CP:.*]]: complex): +// CHECK-NEXT: %[[RESULT:.*]] = create_complex %[[RE]], %[[IM]] : complex +// CHECK-NEXT: linalg.yield %[[RESULT]] : complex + +// ----- + +// CHECK-LABEL: func @real +func @real(%cplx: memref<2x2xcomplex>, + %real: memref<2x2xf32>) { + "xla_lhlo.real"(%cplx, %real) + : (memref<2x2xcomplex>, memref<2x2xf32>) -> () + return +} +// CHECK: linalg.generic +// CHECK-NEXT: ^bb0(%[[CPLX_IN:.*]]: complex, %[[REAL_OUT:.*]]: f32): +// CHECK-NEXT: %[[REAL:.*]] = re %[[CPLX_IN:.*]] : complex +// CHECK-NEXT: linalg.yield %[[REAL]] : f32 + +// ----- + +// CHECK-LABEL: func @imag +func @imag(%cplx: memref<2x2xcomplex>, + %imag: memref<2x2xf32>) { + "xla_lhlo.imag"(%cplx, %imag) + : (memref<2x2xcomplex>, memref<2x2xf32>) -> () + return +} +// CHECK: linalg.generic +// CHECK-NEXT: ^bb0(%[[CPLX_IN:.*]]: complex, %[[IMAG_OUT:.*]]: f32): +// CHECK-NEXT: %[[IMAG:.*]] = im %[[CPLX_IN:.*]] : complex +// CHECK-NEXT: linalg.yield %[[IMAG]] : f32 // ----- diff --git a/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir b/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir index 4050340ce49..2340650dda8 100644 --- a/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir +++ b/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir @@ -20,6 +20,17 @@ func @addBroadcastLhs(%arg0: tensor<4xf32>, %arg1: tensor<1x4xf32>) -> tensor<1x // ----- +// CHECK-LABEL: @addBroadcastEqual +func @addBroadcastEqual(%arg0: tensor<4x1xf32>, %arg1: tensor<1x4xf32>) -> tensor<4x4xf32> { + // CHECK-NEXT: %[[BROADCAST0:.*]] = "xla_hlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x1xf32>) -> tensor<4x4xf32> + // CHECK-NEXT: %[[BROADCAST1:.*]] = "xla_hlo.broadcast_in_dim"(%arg1) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x4xf32>) -> tensor<4x4xf32> + // CHECK-NEXT: %[[RESULT:.*]] = xla_hlo.add %[[BROADCAST0]], %[[BROADCAST1]] : tensor<4x4xf32> + %0 = "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<4x1xf32>, tensor<1x4xf32>) -> tensor<4x4xf32> + return %0 : tensor<4x4xf32> +} + +// ----- + // CHECK-LABEL: @addBroadcastMultidimension func @addBroadcastMultidimension(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1x4xf32>) -> tensor<1x1x4xf32> { // CHECK-NEXT: %[[BROADCAST0:.*]] = "xla_hlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1x4xf32> diff --git a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir index 3650307ea94..15fa91588a5 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir +++ b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir @@ -294,6 +294,12 @@ func @main() { // CHECK: f16[4] constant({1, -4, -65504, 0.015625} %cst_8 = constant dense<[1.0e+00, -4.0e+00, -65504.0e+00, 1.5625e-02]> : tensor<4xf16> + // CHECK: c64[] constant((1, 0)) + %cst_9 = constant dense<(1.000000e+00,0.000000e+00)> : tensor> + + // CHECK: c128[] constant((1, 0)) + %cst_10 = constant dense<(1.000000e+00,0.000000e+00)> : tensor> + return } @@ -1038,3 +1044,16 @@ func @main(%arg0: tensor<4xui8>) -> (tensor<4xui8>) { // CHECK: ENTRY // CHECK: %[[ARG0:.*]] = u8[4] parameter(0) // ROOT %[[RESULT:.*]] = u8[4] not(u8[4] %[[ARG0]]) + +// ----- + +// CHECK: HloModule +func @main(%arg0: tensor<4xi32>) -> (tensor<*xi32>) { + %0 = "xla_hlo.not"(%arg0) : (tensor<4xi32>) -> tensor<4xi32> + %1 = tensor_cast %0 : tensor<4xi32> to tensor<*xi32> + return %1 : tensor<*xi32> +} + +// CHECK: ENTRY +// CHECK: %[[ARG0:.*]] = s32[4] parameter(0) +// ROOT %[[RESULT:.*]] = s32[4] not(s32[4] %[[ARG0]]) diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt index 75471e3a090..207a8f2eabc 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt @@ -212,10 +212,14 @@ add { // CHECK: dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xbf16> %constant.3 = bf16[4] constant({1, 2, 3, 4}) + // CHECK: dense<(1.000000e+00,0.000000e+00)> : tensor> + %constant.4 = c64[] constant((1, 0)) + + // CHECK: dense<(1.000000e+00,0.000000e+00)> : tensor> + %constant.5 = c128[] constant((1, 0)) + // CHECK: dense<[1.000000e+00, -4.000000e+00, -6.550400e+04, 1.562500e-02]> : tensor<4xf16> - ROOT %constant.4 = f16[4] constant({1, -4, -65504, 0.015625}) - - + ROOT %constant.6 = f16[4] constant({1, -4, -65504, 0.015625}) } // TODO(b/129422361) Potentially update when copy, reshape, and conv have actual diff --git a/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc index a20511a95fc..0c9585a817f 100644 --- a/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc @@ -33,24 +33,23 @@ namespace { // Converts binary ops that statically are determined to not broadcast directly // to the corresponding xla_hlo non-broadcasting op. template -struct ConvertTrivialNonBroadcastBinaryOp - : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - LogicalResult matchAndRewrite( - ChloOpTy op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { +struct ConvertTrivialNonBroadcastBinaryOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ChloOpTy op, + PatternRewriter &rewriter) const override { // Only rewrite for statically determinable non-broadcasting cases. - auto lhs = operands[0].getType().dyn_cast(); - auto rhs = operands[1].getType().dyn_cast(); - if (!lhs || !rhs) return failure(); + auto lhs_type = op.lhs().getType().template dyn_cast(); + auto rhs_type = op.rhs().getType().template dyn_cast(); + if (!lhs_type || !rhs_type) return failure(); // Requires rank broadcast. - if (lhs.getRank() != rhs.getRank()) return failure(); + if (lhs_type.getRank() != rhs_type.getRank()) return failure(); // Any dynamic dimension may require broadcasting and requires more // analysis. - if (!lhs.hasStaticShape() || !rhs.hasStaticShape()) return failure(); + if (!lhs_type.hasStaticShape() || !rhs_type.hasStaticShape()) + return failure(); - for (auto extents : llvm::zip(lhs.getShape(), rhs.getShape())) { + for (auto extents : llvm::zip(lhs_type.getShape(), rhs_type.getShape())) { auto lhs_extent = std::get<0>(extents); auto rhs_extent = std::get<1>(extents); if (lhs_extent != rhs_extent) { @@ -58,9 +57,8 @@ struct ConvertTrivialNonBroadcastBinaryOp } } - rewriter.replaceOp( - op, {Adaptor::CreateOp(op, op.getResult().getType(), operands[0], - operands[1], rewriter)}); + rewriter.replaceOp(op, {Adaptor::CreateOp(op, op.getResult().getType(), + op.lhs(), op.rhs(), rewriter)}); return success(); } }; @@ -83,14 +81,13 @@ struct ConvertTrivialNonBroadcastBinaryOp // Whether that is of any practical benefit remains to be seen. template struct ConvertRankedDynamicBroadcastBinaryOp - : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - LogicalResult matchAndRewrite( - ChloOpTy op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ChloOpTy op, + PatternRewriter &rewriter) const override { // Only support ranked operands. - Value lhs = operands[0]; - Value rhs = operands[1]; + Value lhs = op.lhs(); + Value rhs = op.rhs(); auto lhs_type = lhs.getType().dyn_cast(); auto rhs_type = rhs.getType().dyn_cast(); auto result_type = diff --git a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc index aa29241048b..10f35768bbd 100644 --- a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc +++ b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc @@ -27,6 +27,7 @@ limitations under the License. #include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Transforms/BufferPlacement.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h" #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h" @@ -39,16 +40,11 @@ namespace xla_hlo { namespace { constexpr StringRef kTempBufferAttr = "temp"; - -/// Returns DeallocOp to ensure that CopyOp is not inserted after dealloc. -Operation* FindInsertionPointForCopy(Value value) { - for (const auto& user : value.getUsers()) { - if (auto dealloc = dyn_cast(user)) { - return user; - } - } - return nullptr; -} +template +using BaseOpConversion = BufferAssignmentOpConversionPattern; +using StdReturnOpConverter = + NonVoidToVoidReturnOpConverter; Value InsertDynamicAllocAndDealloc(Location loc, Value result, Value shape_operand, @@ -92,8 +88,9 @@ Value InsertDynamicAllocAndDealloc(Location loc, Value result, return alloc; } -Value InsertAllocAndDealloc(Location loc, Value result, - ConversionPatternRewriter* rewriter) { +Value InsertAlloc(Location loc, OpResult result, + BufferAssignmentPlacer* bufferAssignment, + ConversionPatternRewriter* rewriter) { auto result_type = result.getType().dyn_cast(); if (!result_type || !result_type.hasStaticShape()) { result.getDefiningOp()->emitOpError() @@ -101,31 +98,21 @@ Value InsertAllocAndDealloc(Location loc, Value result, } auto memref_type = MemRefType::get(result_type.getShape(), result_type.getElementType()); - - Operation* op = result.getDefiningOp(); - auto block = op->getBlock(); - - OpBuilder allocBuilder(op); - allocBuilder.setInsertionPointToStart(block); // Inserting at the beginning - auto alloc = allocBuilder.create(loc, memref_type); - - alloc.setAttr(kTempBufferAttr, rewriter->getBoolAttr(true)); - - allocBuilder.setInsertionPoint(block, std::prev(block->end())); - allocBuilder.create(loc, alloc); - + OpBuilder::InsertionGuard guard(*rewriter); + rewriter->restoreInsertionPoint( + bufferAssignment->computeAllocPosition(result)); + auto alloc = rewriter->create(loc, memref_type); return alloc; } template -class HloToLhloOpConverter : public ConversionPattern { +class HloToLhloOpConverter : public BaseOpConversion { public: - explicit HloToLhloOpConverter(MLIRContext* context) - : ConversionPattern(HloOpTy::getOperationName(), 1, context) {} - + using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( - Operation* op, ArrayRef operands, + HloOpTy hloOp, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { + Operation* op = hloOp.getOperation(); const auto& original_results = op->getResults(); SmallVector buffer_args(operands.begin(), operands.end()); for (auto result : llvm::enumerate(original_results)) { @@ -135,8 +122,8 @@ class HloToLhloOpConverter : public ConversionPattern { return failure(); } if (resultType.hasStaticShape()) { - buffer_args.push_back( - InsertAllocAndDealloc(op->getLoc(), result.value(), &rewriter)); + buffer_args.push_back(InsertAlloc(op->getLoc(), result.value(), + this->bufferAssignment, &rewriter)); } else { SmallVector results_shape; auto shape_type_op = dyn_cast(op); @@ -156,9 +143,9 @@ class HloToLhloOpConverter : public ConversionPattern { }; struct HloToLhloDynamicBroadcastInDimOpConverter - : public OpConversionPattern { + : public BaseOpConversion { public: - using OpConversionPattern::OpConversionPattern; + using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( xla_hlo::DynamicBroadcastInDimOp op, ArrayRef operands, @@ -175,10 +162,9 @@ struct HloToLhloDynamicBroadcastInDimOpConverter } }; -struct HloToLhloReduceOpConverter - : public OpConversionPattern { +struct HloToLhloReduceOpConverter : public BaseOpConversion { public: - using OpConversionPattern::OpConversionPattern; + using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( xla_hlo::ReduceOp op, ArrayRef operands, @@ -194,7 +180,8 @@ struct HloToLhloReduceOpConverter const auto& original_results = op.getResults(); SmallVector buffer_args(operands.begin(), operands.end()); for (auto result : original_results) { - buffer_args.push_back(InsertAllocAndDealloc(loc, result, &rewriter)); + buffer_args.push_back( + InsertAlloc(loc, result, this->bufferAssignment, &rewriter)); } auto new_op = rewriter.create( loc, llvm::None, buffer_args, op.getAttrs()); @@ -230,12 +217,12 @@ struct HloToLhloReduceOpConverter } }; -class HloToLhloTensorLoadOpConverter : public ConversionPattern { +class HloToLhloTensorLoadOpConverter + : public BaseOpConversion { public: - explicit HloToLhloTensorLoadOpConverter(MLIRContext* context) - : ConversionPattern(TensorLoadOp::getOperationName(), 1, context) {} + using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( - Operation* op, ArrayRef operands, + mlir::TensorLoadOp op, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { rewriter.replaceOp(op, operands); return success(); @@ -243,13 +230,13 @@ class HloToLhloTensorLoadOpConverter : public ConversionPattern { }; // TODO(b/137624192): Rewrite into a copy and elide copy if possible. -class HloToLhloTensorStoreOpConverter : public ConversionPattern { +class HloToLhloTensorStoreOpConverter + : public BaseOpConversion { public: - explicit HloToLhloTensorStoreOpConverter(MLIRContext* context) - : ConversionPattern(TensorStoreOp::getOperationName(), 1, context) {} + using BaseOpConversion::BaseOpConversion; LogicalResult matchAndRewrite( - Operation* op, ArrayRef operands, + mlir::TensorStoreOp op, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { rewriter.replaceOpWithNewOp( op, llvm::None, operands.front(), operands.back()); @@ -291,7 +278,6 @@ class HloToLhloTensorStoreOpConverter : public ConversionPattern { // (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () // "xla_lhlo.multiply"(%0, %arg0, %arg3) : // (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () -// dealloc %0 : memref<2x2xf32> // "xla_lhlo.terminator"() : () -> () // }) : () -> () // return @@ -313,14 +299,13 @@ class HloToLhloTensorStoreOpConverter : public ConversionPattern { // %arg1: memref<4xf32>, // %arg2: memref<4xf32>) { // %0 = alloc() : memref<4xf32> -// %1 = alloc() : memref<4xf32> + // "xla_lhlo.maximum"(%arg0, %arg1, %0) : // (memref<4xf32>, memref<4xf32>, memref<4xf32>) -> () +// %1 = alloc() : memref<4xf32> // "xla_lhlo.add"(%arg0, %0, %1) : // (memref<4xf32>, memref<4xf32>, memref<4xf32>) -> () // "xla_lhlo.copy"(%1, %arg2) : (memref<4xf32>, memref<4xf32>) -> () -// dealloc %0 : memref<4xf32> -// dealloc %1 : memref<4xf32> // "xla_lhlo.terminator"() : () -> () // } @@ -346,101 +331,25 @@ struct HloLegalizeToLhlo }); auto module = getOperation(); - populateHLOToLHLOConversionPattern(module.getContext(), &patterns); - - // Do partial conversion so we can have unknown ops in tests. - if (failed(applyPartialConversion(module, target, patterns, nullptr))) { - signalPassFailure(); - } + BufferAssignmentTypeConverter converter; + module.walk([&](FuncOp func) { + BufferAssignmentPlacer bufferAssignment(func); + OwningRewritePatternList patterns; + populateHLOToLHLOConversionPattern(func.getContext(), &bufferAssignment, + &converter, &patterns); + return WalkResult( + applyPartialConversion(func, target, patterns, &converter)); + }); } }; - -Type ConvertType(Type t) { - if (auto tensorType = t.dyn_cast()) { - return MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - } - return t; -} - } // namespace -/// Transforms FuncOp arguments and results from tensors to buffers. Tensor -/// results are converted to memrefs and appended to the argument list. -class HloToLhloFuncOpConverter : public OpConversionPattern { - public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult matchAndRewrite( - FuncOp funcOp, ArrayRef operands, - ConversionPatternRewriter& rewriter) const final { - if (funcOp.getBody().getBlocks().size() > 1) { - funcOp.emitOpError() << "tensor to buffer conversion expects a single " - "block in the region containing the operation"; - return failure(); - } - - auto funcType = funcOp.getType(); - - TypeConverter::SignatureConversion conversion(funcType.getNumInputs()); - for (auto argType : llvm::enumerate(funcType.getInputs())) { - conversion.addInputs(argType.index(), ConvertType(argType.value())); - } - for (auto resType : funcType.getResults()) { - conversion.addInputs(ConvertType(resType)); - } - rewriter.updateRootInPlace(funcOp, [&] { - funcOp.setType( - rewriter.getFunctionType(conversion.getConvertedTypes(), llvm::None)); - rewriter.applySignatureConversion(&funcOp.getBody(), conversion); - }); - return success(); - } -}; - -/// Transforms ReturnOp to LhloTerminator. CopyOp is inserted to copy each -/// result to the corresponding buffer argument. -class StdToLhloReturnOpConverter : public OpConversionPattern { - public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult matchAndRewrite( - mlir::ReturnOp returnOp, ArrayRef operands, - ConversionPatternRewriter& rewriter) const final { - auto numReturnValues = returnOp.getNumOperands(); - auto funcOp = returnOp.getParentOfType(); - auto numFuncArgs = funcOp.getNumArguments(); - auto loc = returnOp.getLoc(); - - for (auto operand : llvm::enumerate(operands)) { - auto returnArgNumber = numFuncArgs - numReturnValues + operand.index(); - auto dstBuffer = funcOp.getArgument(returnArgNumber); - if (dstBuffer == operand.value()) { - continue; - } - - auto dealloc = FindInsertionPointForCopy(operand.value()); - - if (dealloc == nullptr) { - returnOp.emitOpError() - << "Missing dealloc for operand " << operand.index(); - return failure(); - } - OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPoint(dealloc); - rewriter.create(loc, llvm::None, operand.value(), - funcOp.getArgument(returnArgNumber)); - } - rewriter.replaceOpWithNewOp(returnOp); - return success(); - } -}; - -void populateHLOToLHLOConversionPattern(MLIRContext* context, - OwningRewritePatternList* patterns) { +void populateHLOToLHLOConversionPattern( + MLIRContext* context, BufferAssignmentPlacer* bufferAssignment, + TypeConverter* converter, OwningRewritePatternList* patterns) { // clang-format off patterns->insert< HloToLhloDynamicBroadcastInDimOpConverter, - HloToLhloFuncOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, @@ -472,8 +381,9 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context, HloToLhloReduceOpConverter, HloToLhloTensorLoadOpConverter, HloToLhloTensorStoreOpConverter, - StdToLhloReturnOpConverter - >(context); + FunctionAndBlockSignatureConverter, + StdReturnOpConverter + >(context, bufferAssignment, converter); // clang-format on } diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index fb03c9b82e5..a0a5e47ad65 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -25,6 +25,7 @@ limitations under the License. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/FormatVariadic.h" #include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/Dialect/Traits.h" // from @llvm-project @@ -2589,6 +2590,21 @@ class ConvertRangeOp : public OpRewritePattern { } }; +ElementsAttr ConvertAxisAttr(Value val, ElementsAttr attr, Builder *builder) { + auto int_attr = attr.cast(); + auto type = val.getType().cast(); + + SmallVector axis; + axis.reserve(int_attr.getNumElements()); + + int64_t rank = type.getRank(); + for (auto val : int_attr.getValues()) { + axis.push_back((val.getSExtValue() + rank) % rank); + } + + return builder->getI64TensorAttr(axis); +} + /// Converts the LinSpace tensorflow op to a xla_hlo.iota op with a scaling /// and offset applied to generate the linspace values. The output tensor needs /// to have a static shape. The implementation is defined in C++ because there @@ -4181,6 +4197,68 @@ class ConvertXlaShardingOp : public OpRewritePattern { } }; +// Converts a TF InplaceUpdate op to DynamicUpdateSlice HLO. +class ConvertInplaceUpdateOp : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TF::InplaceUpdateOp op, + PatternRewriter &rewriter) const override { + auto input = op.x(); + auto indices = op.i(); + auto updates = op.v(); + + // Slice each row of `i` and `v` to perform a separate dynamic-update-slice + // on the contents of `x`. + auto input_type = input.getType().cast(); + auto updates_type = updates.getType().cast(); + auto indices_type = indices.getType().cast(); + if (!indices_type.hasStaticShape()) return failure(); + + if (indices_type.getRank() != 1) return failure(); + + SmallVector unpacked_indices_type( + indices_type.getDimSize(0), + RankedTensorType::get({}, indices_type.getElementType())); + auto zero_attr = IntegerAttr::get(rewriter.getIntegerType(64), 0); + auto unpacked_indices = rewriter.create( + op.getLoc(), unpacked_indices_type, indices, zero_attr); + + SmallVector split_updates_shape; + split_updates_shape.append(updates_type.getShape().begin(), + updates_type.getShape().end()); + split_updates_shape.front() = 1; + SmallVector split_updates_type; + split_updates_type.resize( + updates_type.getShape().front(), + RankedTensorType::get(split_updates_shape, + updates_type.getElementType())); + + auto cst = + rewriter.create(op.getLoc(), zero_attr).getResult(); + auto split_updates = rewriter.create( + op.getLoc(), split_updates_type, cst, updates); + + SmallVector input_indices; + input_indices.resize(input_type.getRank(), cst); + + SmallVector starts(updates_type.getRank(), 0); + SmallVector strides(updates_type.getRank(), 1); + SmallVector limits(updates_type.getShape().begin(), + updates_type.getShape().end()); + + for (auto pair : + llvm::zip(unpacked_indices.output(), split_updates.output())) { + input_indices.front() = std::get<0>(pair); + input = rewriter.create( + op.getLoc(), op.getType(), input, std::get<1>(pair), input_indices); + } + + rewriter.replaceOp(op, input); + return success(); + } +}; + // Converts a TF XlaDynamicUpdateSlice op to DynamicUpdateSlice HLO. class ConvertXlaDynamicUpdateSliceOp : public OpRewritePattern { @@ -4785,6 +4863,51 @@ class ConvertQrOp : public OpRewritePattern { } }; +// Emits debug information which includes the number of ops of each type which +// failed to legalize. +void EmitLegalizationErrors(Operation *op, + const DenseSet &nonlegalized_ops) { + // Track the legalization failures by mapping op name to information about + // that failure: the number of unlegalized occurances of the op, and one + // example operation that failed. + std::map> op_name_to_error_info; + DenseSet error_ops; + for (Operation *nonlegalized_op : nonlegalized_ops) { + // Increment count of this legalization failure. + StringRef op_name = nonlegalized_op->getName().getStringRef(); + // If this emplace is successful, it's the first time we've encountered + // this op type. Initialize count to 0 so that after increment, it is 1. + auto insertion_result = op_name_to_error_info.emplace( + op_name, std::make_pair(0, nonlegalized_op)); + ++insertion_result.first->second.first; + } + std::vector error_messages; + error_messages.reserve(op_name_to_error_info.size()); + for (const auto &op_info : op_name_to_error_info) { + error_messages.push_back( + llvm::formatv("{0} (count: {1})", op_info.first, op_info.second.first)); + } + Location loc = op->getLoc(); + emitError(loc) << "The following operations cannot be legalized: " + << llvm::join(error_messages, "; ") + << ". These legalization failure(s) may be due to missing TF " + "to HLO lowerings and/or unsupported attributes, etc."; + // Emit more information about the missing ops. This error message + // contains useful details beyond the op name (input and output shapes, + // attributes, etc.). + if (!VLOG_IS_ON(1) && nonlegalized_ops.size() != 1) { + emitError(loc) + << "Emitting more detail about one op that failed to legalize..."; + } else if (VLOG_IS_ON(1)) { + emitError(loc) << "Emitting more detail about one of each type of op " + "that failed to legalize..."; + } + for (const auto &op_info : op_name_to_error_info) { + op_info.second.second->emitOpError() << "is not legalizable"; + if (!VLOG_IS_ON(1)) break; + } +} + // Performs the lowering to XLA dialect. void LegalizeTF::runOnFunction() { if (failed(legalizeTF(getFunction(), allow_partial_conversion_))) @@ -4817,12 +4940,13 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) { ConvertConv3DBackpropInputOp, ConvertCumsumOp, ConvertDiagPartOp, ConvertEinsumOp, ConvertFusedBatchNormGradOp, ConvertFusedBatchNormGradV2Op, ConvertFusedBatchNormGradV3Op, - ConvertFusedBatchNormV3Op, ConvertInfeedDequeueTupleOp, ConvertLinSpaceOp, - ConvertMaxOp, ConvertMinOp, ConvertAvgPoolOp, ConvertMaxPool2DOp, - ConvertMaxPool3DOp, ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, - ConvertMeanOp, ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, - ConvertProdOp, ConvertQrOp, ConvertRangeOp, ConvertSelectV2Op, - ConvertSigmoidOp, ConvertSizeOp, ConvertSoftmaxOp, + ConvertFusedBatchNormV3Op, ConvertInfeedDequeueTupleOp, + ConvertInplaceUpdateOp, ConvertLinSpaceOp, ConvertMaxOp, ConvertMinOp, + ConvertAvgPoolOp, ConvertMaxPool2DOp, ConvertMaxPool3DOp, + ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, ConvertMeanOp, + ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, + ConvertRangeOp, ConvertSelectV2Op, ConvertSigmoidOp, ConvertSizeOp, + ConvertSoftmaxOp, ConvertSoftmaxOp, ConvertSplitOp, ConvertSplitVOp, ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertSumOp, ConvertTensorScatterUpdateOp, ConvertTileOp, ConvertTopKV2Op, @@ -4841,7 +4965,16 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) { if (!allow_partial_conversion) { // Fully qualify ReturnOp here as xla_hlo dialect also defines a ReturnOp. target.addLegalOp(); - return applyFullConversion(op, target, patterns); + DenseSet nonlegalized_ops; + LogicalResult result = applyPartialConversion( + op, target, patterns, /*converter=*/nullptr, &nonlegalized_ops); + // In order to enforce that the conversion result is fully converted, + // fail if there are any nonlegalized ops in the set. + if (failed(result) || !nonlegalized_ops.empty()) { + EmitLegalizationErrors(op, nonlegalized_ops); + return failure(); + } + return result; } return applyPartialConversion(op, target, patterns); diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td index b2a7c1e7f62..2a27c1f2966 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td @@ -273,6 +273,13 @@ def : Pat<(TF_CrossReplicaSumOp $input, (TF_ConstOp $group_assignment)), (HLO_CrossReplicaSumOp $input, (CastElementsToI64Elements $group_assignment))>; +//===----------------------------------------------------------------------===// +// All2All op patterns. +//===----------------------------------------------------------------------===// + +def : Pat<(TF_AllToAllOp AnyRankedTensor:$input, (TF_ConstOp $group_assignment), I64Attr:$concat_dimension, $split_dimension, $split_count), + (HLO_AllToAllOp $input, $split_dimension, $concat_dimension, $split_count, (CastElementsToI64Elements $group_assignment))>; + //===----------------------------------------------------------------------===// // FFT op patterns. //===----------------------------------------------------------------------===// @@ -513,6 +520,16 @@ foreach callOp = [TF_PartitionedCallOp, TF_StatefulPartitionedCallOp] in { [(ArgTypesMatchCallee $op, $args, $f)]>; } +//===----------------------------------------------------------------------===// +// Reverse op patterns. +//===----------------------------------------------------------------------===// + +// Handles axis conversion for TF reverse. +def ConvertAxisAttr : NativeCodeCall<"ConvertAxisAttr($0, $1, &$_builder)">; + +def : Pat<(TF_ReverseV2Op AnyRankedTensor:$values, (TF_ConstOp $axis)), + (HLO_ReverseOp $values, (ConvertAxisAttr $values, $axis))>; + //===----------------------------------------------------------------------===// // Ternary op patterns. //===----------------------------------------------------------------------===// @@ -543,7 +560,6 @@ foreach Mapping = [ [TF_LogicalNotOp, HLO_NotOp], [TF_NegOp, HLO_NegOp], [TF_RealOp, HLO_RealOp], - [TF_RoundOp, HLO_RoundOp], [TF_RsqrtOp, HLO_RsqrtOp], [TF_SinOp, HLO_SinOp], [TF_SqrtOp, HLO_SqrtOp], diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index 551462572f1..86a2defd3a8 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -83,31 +83,51 @@ static bool IsOpWhitelisted(Operation* op) { // clang-format off static llvm::SmallDenseSet ops = { TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), - TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), - TypeID::get(), - TypeID::get(), TypeID::get(), - TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), - TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), @@ -116,21 +136,41 @@ static bool IsOpWhitelisted(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), - TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), - TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), - TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), - TypeID::get() + TypeID::get(), + TypeID::get(), + TypeID::get() }; // clang-format on diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc index e6f3ac02d4f..f0eb3cc1a0f 100644 --- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc +++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "llvm/ADT/ArrayRef.h" #include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project #include "mlir/Dialect/Linalg/IR/LinalgOps.h" // from @llvm-project -#include "mlir/Dialect/LoopOps/LoopOps.h" // from @llvm-project +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project @@ -112,7 +112,7 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern { auto step = rewriter.create( loc, rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), 1)); - auto loop = rewriter.create(loc, zero, upper, step); + auto loop = rewriter.create(loc, zero, upper, step); rewriter.setInsertionPointToStart(loop.getBody()); // Compute memrefs for the value to reduce. This makes it easier to just @@ -173,8 +173,7 @@ struct LhloLegalizeToGpu : public PassWrapper { OwningRewritePatternList patterns; ConversionTarget target(getContext()); target.addLegalDialect(); + gpu::GPUDialect, scf::SCFDialect, XlaLhloDialect>(); target.addIllegalOp(); auto func = getFunction(); patterns.insert(func.getContext()); diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc index 54b3acd3787..c5f5b39e04c 100644 --- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc +++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc @@ -18,7 +18,7 @@ limitations under the License. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" // from @llvm-project -#include "mlir/Dialect/LoopOps/LoopOps.h" // from @llvm-project +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project @@ -64,12 +64,12 @@ Value ApplySingleResultLhloCode(Location loc, ValueRange operands, // into a reduction operator of loop.reduce by doing buffer allocation for // scalar arguments and the result of `loop.reduce` to make it compatible with // LHLO ops. -void ConvertToReductionOperator(Location loc, loop::ReduceOp reduce_op, +void ConvertToReductionOperator(Location loc, scf::ReduceOp reduce_op, Block* lhlo_block, OpBuilder* b) { Block& loop_reduce_op_body = reduce_op.reductionOperator().front(); OpBuilder::InsertionGuard guard(*b); b->setInsertionPointToStart(&loop_reduce_op_body); - b->create( + b->create( loc, ApplySingleResultLhloCode(loc, loop_reduce_op_body.getArguments(), lhlo_block, b)); } @@ -136,9 +136,9 @@ MappedIvs MapWindowIvsToInput(OpTy op, ValueRange ivs, ValueRange window_ivs, return mapped_ivs; } -// Returns loop::Parallel over a shaped value with static or dynamic shape. -loop::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value, - OpBuilder* b) { +// Returns scf::Parallel over a shaped value with static or dynamic shape. +scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value, + OpBuilder* b) { Value zero = b->create(loc, 0); Value one = b->create(loc, 1); @@ -151,10 +151,10 @@ loop::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value, lower.push_back(zero); step.push_back(one); } - return b->create(loc, lower, upper, step); + return b->create(loc, lower, upper, step); } -// Converts `xla_lhlo.ReduceOp` into two loop::ParallelOp and a loop::ReduceOp. +// Converts `xla_lhlo.ReduceOp` into two scf::ParallelOp and a scf::ReduceOp. // The outper `ParallelOp` refers to the parallel loops if there are // any. The inner `ParalleOp` refers to the reduction loops and `ReduceOp` // contains the reduction operator. @@ -197,7 +197,7 @@ class ReduceOpConverter : public OpConversionPattern { // TODO(b/137624192) Implement variadic reduce. if (xla_reduce_op.out().size() != 1) return failure(); - loop::ReduceOp reduce_op = + scf::ReduceOp reduce_op = CreateReduceOpInNestedParallelLoops(xla_reduce_op, &rewriter); ConvertToReductionOperator(xla_reduce_op.getLoc(), reduce_op, &xla_reduce_op.body().front(), &rewriter); @@ -225,7 +225,7 @@ class ReduceOpConverter : public OpConversionPattern { // } : f32 // loop.yield // } - loop::ReduceOp CreateReduceOpInNestedParallelLoops( + scf::ReduceOp CreateReduceOpInNestedParallelLoops( xla_lhlo::ReduceOp xla_reduce_op, ConversionPatternRewriter* rewriter) const { auto loc = xla_reduce_op.getLoc(); @@ -254,13 +254,13 @@ class ReduceOpConverter : public OpConversionPattern { SmallVector init_value = { rewriter->create(loc, *xla_reduce_op.init_values().begin())}; // Outer ParallelOp is not needed if it is a reduction across all dims. - loop::ParallelOp outer; + scf::ParallelOp outer; if (!parallel_lower.empty()) { - outer = rewriter->create(loc, parallel_lower, - parallel_upper, parallel_step); + outer = rewriter->create(loc, parallel_lower, + parallel_upper, parallel_step); rewriter->setInsertionPointToStart(outer.getBody()); } - loop::ParallelOp inner = rewriter->create( + scf::ParallelOp inner = rewriter->create( loc, reduce_lower, reduce_upper, reduce_step, init_value); Value reduction_result = *inner.getResults().begin(); @@ -294,7 +294,7 @@ class ReduceOpConverter : public OpConversionPattern { rewriter->setInsertionPointToStart(inner.getBody()); Value elem = rewriter->create( loc, *xla_reduce_op.operands().begin(), indices); - return rewriter->create(loc, elem); + return rewriter->create(loc, elem); } }; @@ -314,8 +314,8 @@ class ReduceOpConverter : public OpConversionPattern { // accumulator = reduction_operator(output[O], value) // output[O] = accumulator // -// Converts `xla_lhlo.ReduceWindowOp` into two loop::ParallelOp and a -// loop::ReduceOp. +// Converts `xla_lhlo.ReduceWindowOp` into two scf::ParallelOp and a +// scf::ReduceOp. // The outper `ParallelOp` refers to the parallel loops that traverese output // buffer. The inner `ParalleOp` refers to the reduction loops that traverse // reduction windows and `ReduceOp` contains the reduction operator. @@ -366,12 +366,12 @@ class ReduceWindowOpConverter LogicalResult matchAndRewrite( xla_lhlo::ReduceWindowOp xla_reduce_window_op, ArrayRef /*args*/, ConversionPatternRewriter& rewriter) const final { - loop::ParallelOp output_loop, window_loop; + scf::ParallelOp output_loop, window_loop; std::tie(output_loop, window_loop) = CreateParallelLoopsToTraverseOutputAndWindow(xla_reduce_window_op, &rewriter); - loop::ReduceOp reduce_op = CreateReduceOpInNestedParallelLoops( + scf::ReduceOp reduce_op = CreateReduceOpInNestedParallelLoops( xla_reduce_window_op, output_loop, window_loop, &rewriter); ConvertToReductionOperator(xla_reduce_window_op.getLoc(), reduce_op, @@ -381,7 +381,7 @@ class ReduceWindowOpConverter } private: - std::pair + std::pair CreateParallelLoopsToTraverseOutputAndWindow( xla_lhlo::ReduceWindowOp xla_reduce_window_op, ConversionPatternRewriter* rewriter) const { @@ -405,7 +405,7 @@ class ReduceWindowOpConverter window_upper.push_back( rewriter->create(loc, window_dim.getSExtValue())); } - auto window_loop = rewriter->create( + auto window_loop = rewriter->create( loc, window_lower, window_upper, window_step, init_value); Value reduction_result = *window_loop.getResults().begin(); @@ -414,9 +414,9 @@ class ReduceWindowOpConverter return std::make_pair(output_loop, window_loop); } - loop::ReduceOp CreateReduceOpInNestedParallelLoops( + scf::ReduceOp CreateReduceOpInNestedParallelLoops( xla_lhlo::ReduceWindowOp xla_reduce_window_op, - loop::ParallelOp output_loop, loop::ParallelOp window_loop, + scf::ParallelOp output_loop, scf::ParallelOp window_loop, ConversionPatternRewriter* rewriter) const { rewriter->setInsertionPointToStart(window_loop.getBody()); auto loc = xla_reduce_window_op.getLoc(); @@ -436,20 +436,20 @@ class ReduceWindowOpConverter xla_reduce_window_op, output_loop.getInductionVars(), window_loop.getInductionVars(), rewriter); - auto elem_or_init = rewriter->create( + auto elem_or_init = rewriter->create( loc, xla_operand_type.getElementType(), mapped_ivs.in_bounds, /*withElseRegion=*/true); OpBuilder then_builder = elem_or_init.getThenBodyBuilder(); Value elem = then_builder.create( loc, xla_reduce_window_op.operand(), mapped_ivs.ivs); - then_builder.create(loc, elem); + then_builder.create(loc, elem); OpBuilder else_builder = elem_or_init.getElseBodyBuilder(); - else_builder.create(loc, *window_loop.initVals().begin()); + else_builder.create(loc, *window_loop.initVals().begin()); - return rewriter->create(loc, - *elem_or_init.results().begin()); + return rewriter->create(loc, + *elem_or_init.results().begin()); } }; @@ -490,7 +490,7 @@ class SelectAndScatterOpConverter ConversionPatternRewriter& rewriter) const final { auto loc = s_and_s_op.getLoc(); InitializeOutput(s_and_s_op, &rewriter); - loop::ParallelOp loop_over_src = + scf::ParallelOp loop_over_src = MakeLoopOverShape(loc, s_and_s_op.source(), &rewriter); rewriter.setInsertionPointToStart(loop_over_src.getBody()); @@ -520,7 +520,7 @@ class SelectAndScatterOpConverter auto loc = s_and_s_op.getLoc(); Value init_value = b->create(loc, s_and_s_op.init_value()); - loop::ParallelOp loop_over_output = + scf::ParallelOp loop_over_output = MakeLoopOverShape(loc, s_and_s_op.out(), b); OpBuilder::InsertionGuard guard(*b); b->setInsertionPointToStart(loop_over_output.getBody()); @@ -531,10 +531,10 @@ class SelectAndScatterOpConverter struct WindowLoops { SmallVector selected_ivs; SmallVector window_ivs; - loop::ForOp inner_loop; + scf::ForOp inner_loop; }; WindowLoops InsertWindowLoops(xla_lhlo::SelectAndScatterOp s_and_s_op, - loop::ParallelOp loop_over_src, + scf::ParallelOp loop_over_src, OpBuilder* b) const { auto loc = s_and_s_op.getLoc(); Value zero = b->create(loc, 0); @@ -558,12 +558,12 @@ class SelectAndScatterOpConverter s_and_s_op.window_dimensions()->getIntValues()) { Value upper = b->create(loc, window_dim.getSExtValue()); result.inner_loop = - b->create(loc, zero, upper, one, iter_args); + b->create(loc, zero, upper, one, iter_args); if (b->getInsertionBlock() == loop_over_src.getBody()) { ip = b->saveInsertionPoint(); result.selected_ivs = result.inner_loop.getResults().take_front(rank); } else { - b->create(loc, result.inner_loop.getResults()); + b->create(loc, result.inner_loop.getResults()); } b->setInsertionPointToStart(result.inner_loop.getBody()); iter_args = ValueRange{result.inner_loop.getRegionIterArgs()}; @@ -599,7 +599,7 @@ class SelectAndScatterOpConverter }; SmallVector SelectIvs(xla_lhlo::SelectAndScatterOp s_and_s_op, - loop::ParallelOp loop_over_src, + scf::ParallelOp loop_over_src, OpBuilder* b) const { auto loc = s_and_s_op.getLoc(); @@ -614,7 +614,7 @@ class SelectAndScatterOpConverter IterArgs ivs_val_flag(window_loops.inner_loop.getRegionIterArgs()); - auto if_in_bounds = inner_loop_b.create( + auto if_in_bounds = inner_loop_b.create( loc, window_loops.inner_loop.getResultTypes(), mapped_ivs.in_bounds, /*withElseRegion=*/true); @@ -623,16 +623,16 @@ class SelectAndScatterOpConverter OpBuilder in_bounds_then_b = if_in_bounds.getThenBodyBuilder(); auto select_or_init_results = SelectOrInitialize( s_and_s_op, mapped_ivs.ivs, &ivs_val_flag, &in_bounds_then_b); - in_bounds_then_b.create(loc, select_or_init_results); + in_bounds_then_b.create(loc, select_or_init_results); } // Case when we are in the pad. { OpBuilder in_bounds_else_b = if_in_bounds.getElseBodyBuilder(); - in_bounds_else_b.create(loc, ivs_val_flag.to_vector()); + in_bounds_else_b.create(loc, ivs_val_flag.to_vector()); } - inner_loop_b.create(loc, if_in_bounds.getResults()); + inner_loop_b.create(loc, if_in_bounds.getResults()); return window_loops.selected_ivs; } @@ -647,8 +647,8 @@ class SelectAndScatterOpConverter Value operand_elem = b->create(loc, s_and_s_op.operand(), operand_ivs); auto if_init = - b->create(loc, iter_arg_types, ivs_val_flag->is_init(), - /*withElseRegion=*/true); + b->create(loc, iter_arg_types, ivs_val_flag->is_init(), + /*withElseRegion=*/true); // Init == true, i.e. iter args are already initialized with a selected // element in boundaries of the operand. Select function has to be computed // here. @@ -660,32 +660,31 @@ class SelectAndScatterOpConverter ApplySingleResultLhloCode(loc, {operand_elem, ivs_val_flag->value()}, &lhlo_select, &if_init_then_b); - auto if_pred = - if_init_then_b.create(loc, iter_arg_types, pred, - /*withElseRegion=*/true); + auto if_pred = if_init_then_b.create(loc, iter_arg_types, pred, + /*withElseRegion=*/true); // Pred == true, therefore pack newly selected ivs, val and init flag back // to iter_args and return. { OpBuilder if_pred_then_b = if_pred.getThenBodyBuilder(); - if_pred_then_b.create( + if_pred_then_b.create( loc, IterArgs{operand_ivs, operand_elem, true_i1}.to_vector()); } // Pred == false, therefore return old iter_args. { OpBuilder if_pred_else_b = if_pred.getElseBodyBuilder(); - if_pred_else_b.create(loc, ivs_val_flag->to_vector()); + if_pred_else_b.create(loc, ivs_val_flag->to_vector()); } - if_init_then_b.create(loc, if_pred.getResults()); + if_init_then_b.create(loc, if_pred.getResults()); } // Init == false, i.e. only pad was visited before and this is the first // element in the boundaries of the operand. { OpBuilder if_init_else_b = if_init.getElseBodyBuilder(); - if_init_else_b.create( + if_init_else_b.create( loc, IterArgs{operand_ivs, operand_elem, true_i1}.to_vector()); } return if_init.getResults(); @@ -708,7 +707,7 @@ struct LhloLegalizeToParallelLoops ConversionTarget target(getContext()); target.addLegalDialect(); + scf::SCFDialect, XlaLhloDialect>(); target.addIllegalOp(); diff --git a/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h b/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h index dceb73efb33..c317dc36b3c 100644 --- a/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h +++ b/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h @@ -227,6 +227,28 @@ inline Value MapLhloOpToStdScalarOp( loc, result_types, args, b); } +template <> +inline Value MapLhloOpToStdScalarOp( + Location loc, ArrayRef result_types, ArrayRef args, + OpBuilder* b) { + return MapLhloOpToStdScalarOpImpl{}(loc, result_types, args, + b); +} + +template <> +inline Value MapLhloOpToStdScalarOp( + Location loc, ArrayRef result_types, ArrayRef args, + OpBuilder* b) { + return MapLhloOpToStdScalarOpImpl{}(loc, result_types, args, b); +} + +template <> +inline Value MapLhloOpToStdScalarOp( + Location loc, ArrayRef result_types, ArrayRef args, + OpBuilder* b) { + return MapLhloOpToStdScalarOpImpl{}(loc, result_types, args, b); +} + template <> inline Value MapLhloOpToStdScalarOp( Location loc, ArrayRef result_types, ArrayRef args, @@ -259,11 +281,9 @@ inline Value MapLhloOpToStdScalarOp( // No conversion is needed for the same width integers return args.front(); } - // TODO(dfki-ehna): Add other primitive type conversions - // if (mlir::FpToSiOp::areCastCompatible(sourceType, targetType)) { - // return b.create(loc, result_types, - // args,mlir::None); - // } + if (mlir::FPToSIOp::areCastCompatible(sourceType, targetType)) { + return b->create(loc, result_types, args, mlir::None); + } return nullptr; } diff --git a/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc b/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc index a4ffa57957e..bf666400900 100644 --- a/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc +++ b/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc @@ -50,12 +50,6 @@ static DenseIntElementsAttr GetI64ElementsAttrForSeq(int start, int end, template bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter, Value *out_lhs, Value *out_rhs) { - if (!op.broadcast_dimensions().hasValue()) { - // Note: the op may still have an implicit broadcast on it, such as - // for (tensor<1xf32>, tensor<4xf32>). - return false; - } - // Insert BroadcastInDimOps for the left-hand-side and right-hand-side args, // replacing the original LHS and RHS args in the source op with the results // of the broadcasts. @@ -79,25 +73,7 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter, auto lhs_rank = lhs_ranked_type.getRank(); auto rhs_rank = rhs_ranked_type.getRank(); - - // Set broadcast_dimensions to [0, ..., rank] for the higher rank arg. - // Use the original op.broadcast_dimensions for the lower rank arg. - auto higher_rank_broadcast_dims = - GetI64ElementsAttrForSeq(0, std::max(lhs_rank, rhs_rank), rewriter); - DenseIntElementsAttr lhs_broadcast_dims; - DenseIntElementsAttr rhs_broadcast_dims; - if (lhs_rank > rhs_rank) { - lhs_broadcast_dims = higher_rank_broadcast_dims; - rhs_broadcast_dims = op.broadcast_dimensions().getValue(); - } else if (lhs_rank < rhs_rank) { - lhs_broadcast_dims = op.broadcast_dimensions().getValue(); - rhs_broadcast_dims = higher_rank_broadcast_dims; - } else { - // This shouldn't happen for legal ops. If the broadcast_dimensions - // attribute is set, the ranks should be different. - // TODO(scotttodd): Add a custom verification for ops and assert here. - return false; - } + ArrayRef op_shape = op_ranked_type.getShape(); // BroadcastInDimOp must have the same element type for operands and results, // so preserve the original output shape and the original input element type. @@ -105,16 +81,32 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter, // broadcast_in_dim (tensor<1x4xf32>) -> tensor<1x4xf32> // broadcast_in_dim (tensor<4xf32>) -> tensor<1x4xf32> // SrcOp (tensor<1x4xf32>, tensor<1x4xf32>) -> tensor<1x4xi1> - ArrayRef op_shape = op_ranked_type.getShape(); - auto lhs_type = - RankedTensorType::get(op_shape, lhs_ranked_type.getElementType()); - auto rhs_type = - RankedTensorType::get(op_shape, rhs_ranked_type.getElementType()); + if (lhs_ranked_type.getShape() != op_ranked_type.getShape()) { + auto type = + RankedTensorType::get(op_shape, lhs_ranked_type.getElementType()); + DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, lhs_rank, rewriter); + if (lhs_rank < rhs_rank) { + attr = op.broadcast_dimensions().getValue(); + } - *out_lhs = rewriter->createOrFold(op.getLoc(), lhs_type, - lhs, lhs_broadcast_dims); - *out_rhs = rewriter->createOrFold(op.getLoc(), rhs_type, - rhs, rhs_broadcast_dims); + lhs = + rewriter->createOrFold(op.getLoc(), type, lhs, attr); + } + + if (rhs_ranked_type.getShape() != op_ranked_type.getShape()) { + auto type = + RankedTensorType::get(op_shape, rhs_ranked_type.getElementType()); + DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, rhs_rank, rewriter); + if (rhs_rank < lhs_rank) { + attr = op.broadcast_dimensions().getValue(); + } + + rhs = + rewriter->createOrFold(op.getLoc(), type, rhs, attr); + } + + *out_lhs = lhs; + *out_rhs = rhs; return true; } @@ -359,9 +351,15 @@ struct CompareWithBroadcastConvert : public OpRewritePattern { void SetupMaterializeBroadcastsLegality(MLIRContext *context, ConversionTarget *conversionTarget) { -#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType) \ - conversionTarget->addDynamicallyLegalOp( \ - [](OpType op) { return !op.broadcast_dimensions().hasValue(); }); +#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType) \ + conversionTarget->addDynamicallyLegalOp([](OpType op) { \ + if (op.broadcast_dimensions().hasValue()) return false; \ + auto l = op.lhs().getType().cast(); \ + auto r = op.rhs().getType().cast(); \ + if (!l.hasRank() || !r.hasRank()) return false; \ + return l.getShape() == r.getShape(); \ + }); + // Binary elementwise ops. ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(AddOp); ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(Atan2Op); diff --git a/tensorflow/compiler/mlir/xla/transforms/rewriters.h b/tensorflow/compiler/mlir/xla/transforms/rewriters.h index ad81cda19b9..9cde6f84474 100644 --- a/tensorflow/compiler/mlir/xla/transforms/rewriters.h +++ b/tensorflow/compiler/mlir/xla/transforms/rewriters.h @@ -23,6 +23,7 @@ limitations under the License. #include "mlir/Transforms/DialectConversion.h" // from @llvm-project namespace mlir { +class BufferAssignmentPlacer; namespace xla_hlo { // Collection of rewrite patterns for lowering a general dot product. @@ -38,9 +39,9 @@ void PopulateXlaToStdPatterns(OwningRewritePatternList *patterns, MLIRContext *ctx); // Collection of rewrite patterns for lowering of HLO to LHLO dialect. -void populateHLOToLHLOConversionPattern(MLIRContext *context, - OwningRewritePatternList *patterns); - +void populateHLOToLHLOConversionPattern( + MLIRContext *context, BufferAssignmentPlacer *bufferAssignment, + TypeConverter *converter, OwningRewritePatternList *patterns); // Collection of rewrite patterns for lowering of HLO to Linalg dialect. void populateHLOToLinalgConversionPattern(MLIRContext *context, OwningRewritePatternList *patterns); diff --git a/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc b/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc index 8976bd5b7d2..71441656c08 100644 --- a/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc +++ b/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc @@ -38,7 +38,8 @@ struct InferReturnTypeComponentsPattern : public RewritePattern { SmallVector components; if (failed(defining_op_int.inferReturnTypeComponents( op->getContext(), op->getLoc(), defining_op->getOperands(), - defining_op->getAttrs(), defining_op->getRegions(), components))) { + defining_op->getAttrDictionary(), defining_op->getRegions(), + components))) { return failure(); } diff --git a/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc index 436a3e701e1..a12bd9e7c1a 100644 --- a/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc @@ -251,17 +251,15 @@ Value LhloDialectEmitter::GetOrCreateView( // Create the view for this slice size, possible with an affine map to model // the offset. The result is cached in the slices_ map. - SmallVector offset_map; - if (slice.offset()) { - offset_map.push_back(AffineMap::get( - /*dimCount=*/1, /*symbolCount=*/0, - {getAffineDimExpr(0, builder_.getContext()) + slice.offset()}, - builder_.getContext())); - } - auto slice_type = MemRefType::get({slice.size()}, i8_type_, offset_map); + // The std.view result type does not carry the static offset: this is not + // useful information. Rather, the view op must have the static offset. + auto slice_type = MemRefType::get({slice.size()}, i8_type_, {}); - auto slice_view = builder_.create( - alloc_buffer.getLoc(), slice_type, alloc_buffer, /*operands=*/llvm::None); + Value byte_shift = + builder_.create(alloc_buffer.getLoc(), slice.offset()); + auto slice_view = + builder_.create(alloc_buffer.getLoc(), slice_type, alloc_buffer, + byte_shift, /*sizes=*/ArrayRef{}); slices_.insert({slice_key, slice_view}); return slice_view; } @@ -277,9 +275,12 @@ StatusOr LhloDialectEmitter::GetOrCreateView( Value slice_view = GetOrCreateView(out_slice); TF_ASSIGN_OR_RETURN(Type out_type, ::xla::ConvertShapeToType( target_shape, builder_)); + Value byte_shift = + builder_.create(builder_.getUnknownLoc(), 0); if (slice_view.getType() != out_type) - slice_view = builder_.create(builder_.getUnknownLoc(), out_type, - slice_view, llvm::None); + slice_view = + builder_.create(builder_.getUnknownLoc(), out_type, slice_view, + byte_shift, /*sizes=*/ArrayRef{}); return slice_view; } diff --git a/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc index 9cce6799288..799a20aa693 100644 --- a/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc +++ b/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc @@ -84,7 +84,8 @@ class PointwiseToLinalgConverter : public OpConversionPattern { emitError(loc, "lhlo to linalg conversion expects ranked args"); return failure(); } - if (!argType.getElementType().isSignlessIntOrFloat()) { + auto elemTy = argType.getElementType(); + if (!elemTy.isSignlessIntOrFloat() && !elemTy.template isa()) { return failure(); } @@ -618,17 +619,20 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, + PointwiseToLinalgConverter, PointwiseToLinalgConverter, // TODO(ataei): Remove this pattern, CopyOp is folded away. PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, + PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, + PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, @@ -716,16 +720,19 @@ void populateHLOToLinalgConversionPattern(MLIRContext* context, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, + PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, + PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, + PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index cd22b527444..ea4ba8dab6b 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -470,6 +470,7 @@ tf_xla_py_test( name = "concat_ops_test", size = "medium", srcs = ["concat_ops_test.py"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "many_xla_args", @@ -561,6 +562,7 @@ tf_xla_py_test( name = "dynamic_slice_ops_test", size = "small", srcs = ["dynamic_slice_ops_test.py"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -1342,6 +1344,7 @@ tf_xla_py_test( name = "ternary_ops_test", size = "medium", srcs = ["ternary_ops_test.py"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -1384,6 +1387,7 @@ tf_xla_py_test( size = "medium", srcs = ["fused_batchnorm_test.py"], python_version = "PY3", + shard_count = 5, tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip ], diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 92ea1cfaf87..00ed6d83e2e 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -73,8 +73,6 @@ class BinaryOpsTest(xla_test.XLATestCase): self.assertAllCloseAccordingToType( result[i], expected[i], rtol=rtol, atol=atol) - @test_util.disable_mlir_bridge( - "F16 type is not supported in CreateDenseElementsAttrFromLiteral") def testFloatOps(self): for dtype in self.float_types: if dtype == dtypes.bfloat16.as_numpy_dtype: @@ -1098,8 +1096,6 @@ class BinaryOpsTest(xla_test.XLATestCase): x, expected=np.matmul(x, x.transpose([0, 1, 3, 2]))) - @test_util.disable_mlir_bridge( - "TODO(b/155097273): Handle complex dtype constants") def testExpandDims(self): for dtype in self.numeric_types: self._testBinary( @@ -1197,8 +1193,6 @@ class BinaryOpsTest(xla_test.XLATestCase): np.full([1, 1, 3, 5], 3., dtype=np.float32), expected=np.full([4, 5, 1, 2, 5], 18., dtype=np.float32)) - @test_util.disable_mlir_bridge( - "TODO(b/155097273): Handle complex dtype constants") def testPad(self): for dtype, pad_type in itertools.product( self.numeric_types, [np.int32, np.int64]): @@ -1339,8 +1333,6 @@ class BinaryOpsTest(xla_test.XLATestCase): ], dtype=dtype)) - @test_util.disable_mlir_bridge( - "TODO(b/155097273): Handle complex dtype constants") def testReshape(self): for dtype in self.numeric_types: self._testBinary( @@ -1473,8 +1465,6 @@ class BinaryOpsTest(xla_test.XLATestCase): [1, 2]], dtype=dtype)) - @test_util.disable_mlir_bridge( - "TODO(b/155097273): Handle complex dtype constants") def testTranspose(self): for dtype in self.numeric_types: self._testBinary( @@ -1493,8 +1483,6 @@ class BinaryOpsTest(xla_test.XLATestCase): np.array([1, 0], dtype=np.int32), expected=np.array([[1, 3], [2, 4]], dtype=dtype)) - @test_util.disable_mlir_bridge( - "TODO(b/155097273): Handle complex dtype constants") def testConjugateTranspose(self): for dtype in self.complex_types: self._testBinary( @@ -1513,7 +1501,6 @@ class BinaryOpsTest(xla_test.XLATestCase): np.array([1, 0], dtype=np.int32), expected=np.array([[1 + 1j, 3 + 3j], [2 - 2j, 4 - 4j]], dtype=dtype)) - @test_util.disable_mlir_bridge("Enable tf.Cross Compilation") def testCross(self): for dtype in self.float_types: self._testBinary( diff --git a/tensorflow/compiler/tests/concat_ops_test.py b/tensorflow/compiler/tests/concat_ops_test.py index 10dd2d6542c..f35ded924d5 100644 --- a/tensorflow/compiler/tests/concat_ops_test.py +++ b/tensorflow/compiler/tests/concat_ops_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gradients_impl @@ -293,6 +294,7 @@ class ConcatTest(xla_test.XLATestCase): # The purpose of this is to ensure that XLA on GPU will not run out of memory # with too many arguments. + @test_util.disable_mlir_bridge("TODO(b/153895138): Debug.") def testConcatLargeNumberOfTensors(self): if "CPU" in self.device: self.skipTest("This test can time out on CPU, so we will just allow " diff --git a/tensorflow/compiler/tests/ternary_ops_test.py b/tensorflow/compiler/tests/ternary_ops_test.py index a2a47f19a6e..a1bb64eb88d 100644 --- a/tensorflow/compiler/tests/ternary_ops_test.py +++ b/tensorflow/compiler/tests/ternary_ops_test.py @@ -24,6 +24,7 @@ import scipy.special as sps from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops @@ -47,6 +48,8 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase): {'start': 1, 'end': 2, 'num': 1}, {'start': 1, 'end': 4, 'num': 3}, {'start': 0, 'end': 41, 'num': 42}) + @test_util.disable_mlir_bridge( + 'TODO(b/156174708): Dynamic result types not supported') def testLinspace(self, start, end, num): expected = np.linspace(start, end, num, dtype=np.float32) result = self._testTernary( @@ -74,6 +77,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase): np.int32(2), expected=np.array([1, 3, 5], dtype=np.int32)) + @test_util.disable_mlir_bridge('TODO(b/155949336)') def testSelect(self): for dtype in self.numeric_types: self._testTernary( @@ -211,6 +215,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase): upper, expected=np.minimum(np.maximum(x, lower), upper)) + @test_util.disable_mlir_bridge('Enable tf.Betainc Compilation') def testBetaincSanity(self): # This operation is only supported for float32 and float64. for dtype in self.numeric_types & {np.float32, np.float64}: @@ -248,6 +253,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase): 'atol': 2e-4 }, ) + @test_util.disable_mlir_bridge('Enable tf.Betainc Compilation') def testBetainc(self, sigma, rtol, atol): # This operation is only supported for float32 and float64. for dtype in self.numeric_types & {np.float32, np.float64}: diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 0c4c7bacdf3..3e36f67615b 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -186,8 +186,6 @@ class UnaryOpsTest(xla_test.XLATestCase): self._assertOpOutputMatchesExpected( math_ops.cos, x, expected=np.cos(x), rtol=tol, atol=1e-5) - @test_util.disable_mlir_bridge( - "TODO(b/153812660): Handle tf.Softmax compilation") def testFloatOps(self): for dtype in self.float_types: x = np.arange(-0.90, 0.90, 0.25) @@ -514,6 +512,11 @@ class UnaryOpsTest(xla_test.XLATestCase): ], dtype=dtype)) + @test_util.disable_mlir_bridge( + "TODO(b/153812660): Handle tf.QuantizeAndDequantize compilation") + def testQuantizeAndDequantize(self): + for dtype in self.float_types: + def quantize_and_dequantize_v2(x): return array_ops.quantize_and_dequantize_v2( x, -127, 127, signed_input=True, num_bits=8) @@ -598,8 +601,7 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([-1, -0.5, 0, 0.3], dtype=dtype), expected=np.array([-1., -0.5, 0., 0.296875], dtype=dtype)) - @test_util.disable_mlir_bridge( - "Complex types not supported in CreateDenseElementsAttrFromLiteral") + @test_util.disable_mlir_bridge("TODO(b/156135423): Fix ConvertSigmoidOp") def testComplexOps(self): for dtype in self.complex_types: diff --git a/tensorflow/compiler/tests/xla_ops_test.py b/tensorflow/compiler/tests/xla_ops_test.py index 3b304df9024..1f83701ea7c 100644 --- a/tensorflow/compiler/tests/xla_ops_test.py +++ b/tensorflow/compiler/tests/xla_ops_test.py @@ -51,7 +51,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase): equality_fn = self.assertAllClose equality_fn(result, expected, rtol=1e-3) - @test_util.disable_mlir_bridge('Not supported yet') def testAdd(self): for dtype in self.numeric_types: self._assertOpOutputMatchesExpected( @@ -72,7 +71,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase): np.array([7, 11], dtype=dtype)), expected=np.array([[8, 13], [10, 15]], dtype=dtype)) - @test_util.disable_mlir_bridge('Not supported yet') def testBroadcast(self): for dtype in self.numeric_types: v = np.arange(4, dtype=np.int32).astype(dtype).reshape([2, 2]) @@ -110,7 +108,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase): xla_data_pb2.PrecisionConfig.HIGHEST) @parameterized.parameters(*PRECISION_VALUES) - @test_util.disable_mlir_bridge('Not supported yet') def testConv(self, precision): for dtype in set(self.float_types).intersection( set([dtypes.bfloat16.as_numpy_dtype, np.float32])): @@ -195,8 +192,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase): args=(np.array([1, 2, 3], dtype=dtype),), expected=np.array([-1, -2, -3], dtype=dtype)) - @test_util.disable_mlir_bridge( - 'Requires XlaPad op shape inference to have static result types') def testPad(self): for dtype in self.numeric_types: diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 4aec6a2512c..a43b16e9e6a 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2413,26 +2413,19 @@ Status ConvertExpandDims(OpConverterParams* params) { } Status Converter::SqueezeTensor(nvinfer1::ITensor* input, - const std::vector& trt_axes, + std::vector* input_dims, nvinfer1::ITensor** output) { - const nvinfer1::Dims dims = input->getDimensions(); - std::vector input_dims(dims.d, dims.d + dims.nbDims); - // Mark axes to remove by setting them to 0. - for (int axis : trt_axes) { - input_dims[axis] = 0; - } - #if IS_TRT_VERSION_GE(6, 0, 0, 0) // If the remaining dimensions of a squeeze operation have dynamic sizes, we // need to use TRT ops to build the result shape for the squeeze operation. // This is because IShuffleLayer::setReshapeDimensions treats -1 as a special // value. - if (absl::c_any_of(input_dims, [](int i) { return i == -1; })) { + if (absl::c_any_of(*input_dims, [](int i) { return i == -1; })) { nvinfer1::ITensor* shape = network()->addShape(*input)->getOutput(0); std::vector concat_inputs; - for (int i = 0; i < input_dims.size(); i++) { + for (int i = 0; i < input_dims->size(); i++) { // If input dim wasn't set to 0 earlier, we include it in new shape. - if (input_dims[i] != 0) { + if (input_dims->at(i) != 0) { concat_inputs.push_back( network() ->addSlice(*shape, {1, {i}}, {1, {1}}, {1, {1}}) @@ -2452,11 +2445,12 @@ Status Converter::SqueezeTensor(nvinfer1::ITensor* input, } #endif // Remove all dims which are equal to 0. - input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0), - input_dims.end()); + input_dims->erase(std::remove(input_dims->begin(), input_dims->end(), 0), + input_dims->end()); // Reshape tensor. nvinfer1::Dims new_dims; - TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims)); + VLOG(2) << "input_dims" << input_dims; + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(*input_dims, &new_dims)); TF_RETURN_IF_ERROR(PrepareTensorForShape(TRT_TensorOrWeights(input), new_dims, /*validation_only=*/false, output)); return Status::OK(); @@ -2475,31 +2469,48 @@ Status ConvertSqueeze(OpConverterParams* params) { TFAttrs attrs(node_def); auto squeeze_dims = attrs.get>("squeeze_dims"); if (squeeze_dims.empty()) { - return errors::Unimplemented( - "Squeeze is only implemented for explicit dims, at ", node_def.name()); - } - std::vector trt_axes; - trt_axes.reserve(squeeze_dims.size()); - for (int tf_axis : squeeze_dims) { - // If the axis is valid, then convert it to TRT axis, otherwise abort - // conversion. - int trt_axis; - TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(), - params->use_implicit_batch, &trt_axis)); - // Make sure target dimension is size 1 or unknown size (-1) - if (input_dims[trt_axis] != -1 && input_dims[trt_axis] != 1) { - return errors::InvalidArgument( - "Dimension ", tf_axis, " with size ", input_dims[trt_axis], - " cannot be squeezed because it must be size 1, at ", + if (params->use_implicit_batch || !HasStaticShape(dims)) { + return errors::Unimplemented( + "Squeeze is not implemented for empty squeeze_dims, at ", node_def.name()); + } else { + // explicit batch mode with static input shape we squeeze all singleton + // dimensions + for (int& dim : input_dims) { + if (dim == 1) { + // Mark it for removal by setting it to 0 + dim = 0; + } + } + } + } else { + std::vector trt_axes; + trt_axes.reserve(squeeze_dims.size()); + for (int tf_axis : squeeze_dims) { + // If the axis is valid, then convert it to TRT axis, otherwise abort + // conversion. + int trt_axis; + TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(), + params->use_implicit_batch, &trt_axis)); + // Make sure target dimension is size 1 or unknown size (-1) + if (input_dims[trt_axis] != -1 && input_dims[trt_axis] != 1) { + return errors::InvalidArgument( + "Dimension ", tf_axis, " with size ", input_dims[trt_axis], + " cannot be squeezed because it must be size 1, at ", + node_def.name()); + } + trt_axes.push_back(trt_axis); + } + // Mark axes to remove by setting them to 0. + for (int axis : trt_axes) { + input_dims[axis] = 0; } - trt_axes.push_back(trt_axis); } if (params->validation_only) return Status::OK(); nvinfer1::ITensor* output_tensor = nullptr; TF_RETURN_IF_ERROR(params->converter->SqueezeTensor( - input_tensor.tensor(), trt_axes, &output_tensor)); + input_tensor.tensor(), &input_dims, &output_tensor)); params->outputs->push_back(TRT_TensorOrWeights(output_tensor)); return Status::OK(); } diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index 8608c8226ee..2092aecd657 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -529,11 +529,9 @@ class Converter { // Helper function to add a squeeze op to the network. // - // The trt_axes argument lists those axes that need to be squeezed. Each axis - // in the list is numbered according to TRT convention (see ConvertAxis for - // details). - Status SqueezeTensor(nvinfer1::ITensor* input, - const std::vector& trt_axes, + // The input_dims argument stores the TRT dimensions of the input tensor, + // where the dimensions to be squeezed are replaced by 0. + Status SqueezeTensor(nvinfer1::ITensor* input, std::vector* input_dims, nvinfer1::ITensor** output); // Creates an IConstantLayer using 'weights' whose dimensions are specified by diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 1f30b837450..884ed7a5771 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h" #include +#include #include #include #include @@ -66,6 +67,7 @@ namespace convert { using absl::StrCat; using ::testing::ElementsAre; using ::testing::ElementsAreArray; +using ::testing::FloatNear; using ::testing::Matcher; using ::testing::NanSensitiveFloatNear; @@ -216,6 +218,21 @@ void ExpectTrtDimsEqualsArray(const std::vector& lhs, << " actual: " << DebugString(rhs); } +Matcher> ArrayFloatNear(const std::vector& values, + float max_abs_error = 1e-5, + bool nan_sensitive = false) { + std::vector> matchers; + matchers.reserve(values.size()); + for (const float& v : values) { + if (nan_sensitive) { + matchers.emplace_back(NanSensitiveFloatNear(v, max_abs_error)); + } else { + matchers.emplace_back(FloatNear(v, max_abs_error)); + } + } + return ElementsAreArray(matchers); +} + template void ExpectArrayNear(const std::vector& lhs, absl::Span rhs) { ASSERT_EQ(lhs.size(), rhs.size()); @@ -3129,11 +3146,13 @@ TEST_P(ParameterizedOpConverterTest, ConvertSqueeze) { TestParamBase{ {1, 2, 1, 3}, // input dims {}, // input partial dims - {2, 1, 3}, // expected output dims + {2, 3}, // expected output dims {}, // axis - Status{ - error::UNIMPLEMENTED, - "Squeeze is only implemented for explicit dims, at my_squeeze"}}, + trt_mode == TrtTestMode::kExplicitBatch + ? Status::OK() + : Status{error::UNIMPLEMENTED, + "Squeeze is not implemented for empty squeeze_dims, at " + "my_squeeze"}}, TestParamBase{{1, 2, 1, 3}, {}, {2, 1, 3}, @@ -5112,135 +5131,54 @@ TEST_F(OpConverterTest, ConvertGather) { TestConvertGather(this); } -TEST_F(OpConverterTest, ConvertUnary) { +template +NodeDef CreateUnaryOp() { + Scope s = Scope::NewRootScope(); + auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); + return T(s.WithOpName("my_unary"), input).operation.node()->def(); +} + +TEST_P(ParameterizedOpConverterTest, ConvertUnary) { + const auto& spec = GetParam(); + const TrtTestMode trt_mode = std::get<0>(spec); + const DataType tf_dtype = std::get<1>(spec); + TrtPrecisionMode converter_precision = std::get<2>(spec); { // Input is weights, should fail. - Reset(); - Scope s = Scope::NewRootScope(); - auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); - auto neg = ops::Neg(s.WithOpName("my_unary"), input); - const NodeDef& node_def = neg.operation.node()->def(); + Reset(converter_precision, trt_mode); + const NodeDef node_def = CreateUnaryOp(); AddTestWeights("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2}); RunValidationAndConversion( node_def, error::UNIMPLEMENTED, "The input \"x\" for Neg must be a tensor, at my_unary"); } - - // Get nodedef for unary layer. - auto get_unary_nodedef = [](string op_name) -> NodeDef { - Scope s = Scope::NewRootScope(); - auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); - if (op_name == "Abs") { - auto unary = ops::Abs(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Acos") { - auto unary = ops::Acos(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Acosh") { - auto unary = ops::Acosh(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Asin") { - auto unary = ops::Asin(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Asinh") { - auto unary = ops::Asinh(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Atan") { - auto unary = ops::Atan(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Atanh") { - auto unary = ops::Atanh(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Ceil") { - auto unary = ops::Ceil(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Cos") { - auto unary = ops::Cos(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Cosh") { - auto unary = ops::Cosh(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Exp") { - auto unary = ops::Exp(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Floor") { - auto unary = ops::Floor(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Log") { - auto unary = ops::Log(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Neg") { - auto unary = ops::Neg(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Reciprocal") { - auto unary = ops::Reciprocal(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Rsqrt") { - auto unary = ops::Rsqrt(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Sin") { - auto unary = ops::Sin(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Sinh") { - auto unary = ops::Sinh(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Sqrt") { - auto unary = ops::Sqrt(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } else if (op_name == "Tan") { - auto unary = ops::Tan(s.WithOpName("my_unary"), input); - return unary.operation.node()->def(); - } - EXPECT_TRUE(false); - return NodeDef(); - }; - // Get expected output for unary layer. - auto get_unary_output = [](string op_name, float input) -> float { - if (op_name == "Abs") { - return std::abs(input); - } else if (op_name == "Acos") { - return std::acos(input); - } else if (op_name == "Acosh") { - return std::acosh(input); - } else if (op_name == "Asin") { - return std::asin(input); - } else if (op_name == "Asinh") { - return std::asinh(input); - } else if (op_name == "Atan") { - return std::atan(input); - } else if (op_name == "Atanh") { - return std::atanh(input); - } else if (op_name == "Ceil") { - return std::ceil(input); - } else if (op_name == "Cos") { - return std::cos(input); - } else if (op_name == "Cosh") { - return std::cosh(input); - } else if (op_name == "Exp") { - return std::exp(input); - } else if (op_name == "Floor") { - return std::floor(input); - } else if (op_name == "Log") { - return std::log(input); - } else if (op_name == "Neg") { - return -input; - } else if (op_name == "Reciprocal") { - return 1.0 / input; - } else if (op_name == "Rsqrt") { - return 1.0 / std::sqrt(input); - } else if (op_name == "Sin") { - return std::sin(input); - } else if (op_name == "Sinh") { - return std::sinh(input); - } else if (op_name == "Sqrt") { - return std::sqrt(input); - } else if (op_name == "Tan") { - return std::tan(input); - } - EXPECT_TRUE(false); - return 0; - }; - + using OpFunc = std::function; + using ValFunc = float (*)(float); + std::map> op_map; +#define ADD_OP(name, op, compute) \ + op_map[name] = \ + std::make_pair(CreateUnaryOp, static_cast(compute)) + ADD_OP("Abs", ops::Abs, std::abs); + ADD_OP("Acos", ops::Acos, std::acos); + ADD_OP("Acosh", ops::Acosh, std::acosh); + ADD_OP("Asin", ops::Asin, std::asin); + ADD_OP("Asinh", ops::Asinh, std::asinh); + ADD_OP("Atan", ops::Atan, std::atan); + ADD_OP("Atanh", ops::Atanh, std::atanh); + ADD_OP("Ceil", ops::Ceil, std::ceil); + ADD_OP("Cos", ops::Cos, std::cos); + ADD_OP("Cosh", ops::Cosh, std::cosh); + ADD_OP("Exp", ops::Exp, std::exp); + ADD_OP("Floor", ops::Floor, std::floor); + ADD_OP("Log", ops::Log, std::log); + ADD_OP("Neg", ops::Neg, [](float x) { return -x; }); + ADD_OP("Reciprocal", ops::Reciprocal, [](float x) { return 1.0f / x; }); + ADD_OP("Rsqrt", ops::Rsqrt, [](float x) { return 1.0f / std::sqrt(x); }); + ADD_OP("Sin", ops::Sin, std::sin); + ADD_OP("Sinh", ops::Sinh, std::sinh); + ADD_OP("Sqrt", ops::Sqrt, std::sqrt); + ADD_OP("Tan", ops::Tan, std::tan); +#undef ADD_OP // Get list of ops to test. std::vector ops_to_test; // Add all ops supported by ConvertUnary. @@ -5251,26 +5189,30 @@ TEST_F(OpConverterTest, ConvertUnary) { } // Add other unary ops to test. ops_to_test.push_back("Rsqrt"); - // Ok. + // Prepare test parameters + auto p = TestParamBase{ + {1, 1, 2, 3}, // input dims + {}, // input partial dims + {1, 1, 2, 3}, // expected output dims + }; for (const string& op_name : ops_to_test) { - Reset(); - NodeDef node_def = get_unary_nodedef(op_name); - AddTestTensor("input", {1, 2, 3}); - RunValidationAndConversion(node_def); - TRT_TensorOrWeights output; - TF_EXPECT_OK(GetTensorOrWeights("my_unary", &output)); - ASSERT_TRUE(output.is_tensor()); - ExpectTrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions()); - - const std::vector input = {-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f}; - const DataVec input_data{{"input", AsTensor(input)}}; - DataVec output_data{{"my_unary", ConstructTensor(6)}}; - BuildAndRun(input_data, &output_data); - for (int i = 0; i < input.size(); ++i) { - const float expected_output = get_unary_output(op_name, input[i]); - EXPECT_THAT(GetSpanForData(output_data[0])[i], - NanSensitiveFloatNear(expected_output, 0.0001)); + SCOPED_TRACE(op_name); + Reset(converter_precision, trt_mode); + if (!op_map.count(op_name)) { + FAIL() << "Unary op test map does not contain op " << op_name; } + NodeDef node_def = op_map[op_name].first(); + + AddTestTensor("input", p.input_dims, TfDataTypeToTrt(tf_dtype), trt_mode); + RunValidationAndConversion(node_def, Status::OK(), "my_unary", + p.expected_output_dims); + + std::vector input_values{-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f}; + std::vector output; + std::transform(input_values.begin(), input_values.end(), + std::back_inserter(output), op_map[op_name].second); + InstantiateBuildAndRun(tf_dtype, "my_unary", this, p, input_values, + ArrayFloatNear(output, 0.0001, true)); } } diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index a5332385994..c2ad1255a35 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -81,7 +81,7 @@ tf_portable_proto_library( name = "portable_tf2xla_proto", config_string = "allow_all:true", header_outs = ["//tensorflow/compiler/tf2xla/tf2xla.proto.h"], - portable_deps = ["//tensorflow/core:portable_proto_lib_full_runtime"], + portable_deps = ["//tensorflow/core:portable_proto_lib"], proto_deps = [ ":tf2xla_proto", "//tensorflow/core:protos_all", diff --git a/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc index bb2c0d9ddb8..5dbc083368c 100644 --- a/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc @@ -28,6 +28,15 @@ limitations under the License. namespace tensorflow { namespace { +absl::InlinedVector SliceVector(xla::XlaOp input, int64 rank) { + absl::InlinedVector scalar_indices; + scalar_indices.reserve(rank); + for (int i = 0; i < rank; i++) + scalar_indices.push_back( + xla::Reshape(xla::Slice(input, {i}, {i + 1}, {1}), {})); + return scalar_indices; +} + class DynamicUpdateSliceOp : public XlaOpKernel { public: explicit DynamicUpdateSliceOp(OpKernelConstruction* context) @@ -41,21 +50,23 @@ class DynamicUpdateSliceOp : public XlaOpKernel { const TensorShape update_shape = ctx->InputShape("update"); const TensorShape index_shape = ctx->InputShape("indices"); + int64 rank = input_shape.dims(); OP_REQUIRES( ctx, TensorShapeUtils::IsVector(index_shape) && - index_shape.num_elements() == input_shape.dims(), + index_shape.num_elements() == rank, errors::InvalidArgument("index must be a vector with length equal to " "the number of input dimensions")); OP_REQUIRES( - ctx, input_shape.dims() == update_shape.dims(), + ctx, rank == update_shape.dims(), errors::InvalidArgument("input and update must have the same rank," " input shape is ", input_shape.DebugString(), "; update shape is ", update_shape.DebugString())); + xla::XlaOp indices = ctx->Input("indices"); xla::XlaOp result = xla::DynamicUpdateSlice( - ctx->Input("input"), ctx->Input("update"), ctx->Input("indices")); + ctx->Input("input"), ctx->Input("update"), SliceVector(indices, rank)); ctx->SetOutput(0, result); } }; @@ -76,17 +87,18 @@ class DynamicSliceOp : public XlaOpKernel { const TensorShape start_indices_shape = ctx->InputShape("start_indices"); const TensorShape size_indices_shape = ctx->InputShape("size_indices"); + int64 rank = input_shape.dims(); OP_REQUIRES(ctx, TensorShapeUtils::IsVector(start_indices_shape) && - start_indices_shape.num_elements() == input_shape.dims(), + start_indices_shape.num_elements() == rank, errors::InvalidArgument( "start_indices must be a vector with length equal to " "input rank, but input rank is ", - input_shape.dims(), " and start_indices has shape ", + rank, " and start_indices has shape ", start_indices_shape.DebugString())); OP_REQUIRES(ctx, TensorShapeUtils::IsVector(size_indices_shape) && - size_indices_shape.num_elements() == input_shape.dims(), + size_indices_shape.num_elements() == rank, errors::InvalidArgument( "size_indices must be a vector with length equal to " "input rank, but input rank is ", @@ -96,8 +108,10 @@ class DynamicSliceOp : public XlaOpKernel { std::vector size_indices; OP_REQUIRES_OK( ctx, ctx->ConstantInputAsIntVector("size_indices", &size_indices)); + + xla::XlaOp start_indices = ctx->Input("start_indices"); xla::XlaOp result = xla::DynamicSlice( - ctx->Input("input"), ctx->Input("start_indices"), size_indices); + ctx->Input("input"), SliceVector(start_indices, rank), size_indices); ctx->SetOutput(0, result); } }; diff --git a/tensorflow/compiler/tf2xla/kernels/slice_op.cc b/tensorflow/compiler/tf2xla/kernels/slice_op.cc index 17d0b87edda..7f274c6b00f 100644 --- a/tensorflow/compiler/tf2xla/kernels/slice_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/slice_op.cc @@ -42,19 +42,17 @@ class SliceOp : public XlaOpKernel { const TensorShape begin_tensor_shape = ctx->InputShape(1); const TensorShape size_tensor_shape = ctx->InputShape(2); + const int input_dims = input_shape.dims(); OP_REQUIRES( ctx, TensorShapeUtils::IsVector(begin_tensor_shape) && TensorShapeUtils::IsVector(size_tensor_shape) && - begin_tensor_shape.num_elements() == input_shape.dims() && - size_tensor_shape.num_elements() == input_shape.dims(), + begin_tensor_shape.num_elements() == input_dims && + size_tensor_shape.num_elements() == input_dims, errors::InvalidArgument( "Expected begin and size arguments to be 1-D tensors of size ", - input_shape.dims(), ", but got shapes ", - begin_tensor_shape.DebugString(), " and ", - size_tensor_shape.DebugString(), " instead.")); - - const int input_dims = input_shape.dims(); + input_dims, ", but got shapes ", begin_tensor_shape.DebugString(), + " and ", size_tensor_shape.DebugString(), " instead.")); std::vector begin; std::vector size; @@ -129,7 +127,15 @@ class SliceOp : public XlaOpKernel { input_shape.dim_size(i), "], but ", "got ", size[i])); } - ctx->SetOutput(0, xla::DynamicSlice(ctx->Input(0), ctx->Input(1), size)); + + absl::InlinedVector scalar_indices; + scalar_indices.reserve(input_dims); + xla::XlaOp begin = ctx->Input("begin"); + for (int i = 0; i < input_dims; i++) + scalar_indices.push_back( + xla::Reshape(xla::Slice(begin, {i}, {i + 1}, {1}), {})); + + ctx->SetOutput(0, xla::DynamicSlice(ctx->Input(0), scalar_indices, size)); } } }; diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 1350f9e3e0b..0193bea9d6d 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -17,7 +17,6 @@ package_group( "//tensorflow/compiler/...", "//tensorflow/python/tpu/...", "//third_party/py/jax/...", - "//third_party/tf_runtime/tools/tf_kernel_gen/...", ], ) diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc index cd52e2f5e45..404f9eb7519 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.cc +++ b/tensorflow/compiler/xla/client/executable_build_options.cc @@ -70,6 +70,12 @@ ExecutableBuildOptions& ExecutableBuildOptions::set_num_partitions( return *this; } +ExecutableBuildOptions& ExecutableBuildOptions::set_use_spmd_partitioning( + bool use_spmd_partitioning) { + use_spmd_partitioning_ = use_spmd_partitioning; + return *this; +} + ExecutableBuildOptions& ExecutableBuildOptions::set_device_assignment( const DeviceAssignment& device_assignment) { device_assignment_ = device_assignment; diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h index 360ad0260df..9a7fdd974b1 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.h +++ b/tensorflow/compiler/xla/client/executable_build_options.h @@ -77,6 +77,11 @@ class ExecutableBuildOptions { int num_partitions() const { return num_partitions_; } ExecutableBuildOptions& set_num_partitions(int num_partitions); + // Indicates whether to use SPMD (true) or MPMD (false) partitioning when + // num_partitions > 1 and XLA is requested to partition the input program. + bool use_spmd_partitioning() const { return use_spmd_partitioning_; } + ExecutableBuildOptions& set_use_spmd_partitioning(bool use_spmd_partitioning); + // If set, this specifies a static device assignment for the computation. // Otherwise, the computation will be compiled generically and can be run with // any device assignment compatible with the computation's replica and @@ -104,6 +109,7 @@ class ExecutableBuildOptions { se::DeviceMemoryAllocator* device_allocator_ = nullptr; int num_replicas_ = 1; int num_partitions_ = 1; + bool use_spmd_partitioning_ = false; absl::optional device_assignment_; bool alias_passthrough_params_ = false; }; diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index a779086f1d5..6539817d524 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -860,28 +860,6 @@ XlaOp XlaBuilder::SliceInDim(XlaOp operand, int64 start_index, }); } -XlaOp XlaBuilder::DynamicSlice(XlaOp operand, XlaOp start_indices, - absl::Span slice_sizes) { - return ReportErrorOrReturn([&]() -> StatusOr { - HloInstructionProto instr; - - TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand)); - TF_ASSIGN_OR_RETURN(const Shape* start_indices_shape, - GetShapePtr(start_indices)); - TF_ASSIGN_OR_RETURN( - Shape shape, ShapeInference::InferDynamicSliceShape( - *operand_shape, {*start_indices_shape}, slice_sizes)); - *instr.mutable_shape() = shape.ToProto(); - - for (int64 size : slice_sizes) { - instr.add_dynamic_slice_sizes(size); - } - - return AddInstruction(std::move(instr), HloOpcode::kDynamicSlice, - {operand, start_indices}); - }); -} - XlaOp XlaBuilder::DynamicSlice(XlaOp operand, absl::Span start_indices, absl::Span slice_sizes) { @@ -910,26 +888,6 @@ XlaOp XlaBuilder::DynamicSlice(XlaOp operand, }); } -XlaOp XlaBuilder::DynamicUpdateSlice(XlaOp operand, XlaOp update, - XlaOp start_indices) { - return ReportErrorOrReturn([&]() -> StatusOr { - HloInstructionProto instr; - - TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand)); - TF_ASSIGN_OR_RETURN(const Shape* update_shape, GetShapePtr(update)); - TF_ASSIGN_OR_RETURN(const Shape* start_indices_shape, - GetShapePtr(start_indices)); - TF_ASSIGN_OR_RETURN( - Shape shape, - ShapeInference::InferDynamicUpdateSliceShape( - *operand_shape, *update_shape, {*start_indices_shape})); - *instr.mutable_shape() = shape.ToProto(); - - return AddInstruction(std::move(instr), HloOpcode::kDynamicUpdateSlice, - {operand, update, start_indices}); - }); -} - XlaOp XlaBuilder::DynamicUpdateSlice(XlaOp operand, XlaOp update, absl::Span start_indices) { return ReportErrorOrReturn([&]() -> StatusOr { @@ -1301,7 +1259,6 @@ XlaOp XlaBuilder::ConvGeneralDilated( int64 feature_group_count, int64 batch_group_count, const PrecisionConfig* precision_config) { return ReportErrorOrReturn([&]() -> StatusOr { - HloInstructionProto instr; TF_ASSIGN_OR_RETURN(const Shape* lhs_shape, GetShapePtr(lhs)); TF_ASSIGN_OR_RETURN(const Shape* rhs_shape, GetShapePtr(rhs)); TF_RETURN_IF_ERROR( @@ -1314,30 +1271,45 @@ XlaOp XlaBuilder::ConvGeneralDilated( window_dimensions[i] = rhs_shape->dimensions(dimension_numbers.kernel_spatial_dimensions(i)); } - TF_ASSIGN_OR_RETURN(*instr.mutable_window(), + + TF_ASSIGN_OR_RETURN(Window window, ShapeInference::InferWindowFromDimensions( window_dimensions, window_strides, padding, lhs_dilation, rhs_dilation)); - - TF_ASSIGN_OR_RETURN( - Shape shape, ShapeInference::InferConvolveShape( - *lhs_shape, *rhs_shape, feature_group_count, - batch_group_count, instr.window(), dimension_numbers)); - *instr.mutable_shape() = shape.ToProto(); - - *instr.mutable_convolution_dimension_numbers() = dimension_numbers; - instr.set_feature_group_count(feature_group_count); - instr.set_batch_group_count(batch_group_count); - - if (precision_config != nullptr) { - *instr.mutable_precision_config() = *precision_config; - } - - return AddInstruction(std::move(instr), HloOpcode::kConvolution, - {lhs, rhs}); + TF_ASSIGN_OR_RETURN(Shape shape, + ShapeInference::InferConvolveShape( + *lhs_shape, *rhs_shape, feature_group_count, + batch_group_count, window, dimension_numbers)); + return ConvGeneralDilatedInternal(shape, lhs, rhs, window, window_strides, + padding, lhs_dilation, rhs_dilation, + dimension_numbers, feature_group_count, + batch_group_count, precision_config); }); } +StatusOr XlaBuilder::ConvGeneralDilatedInternal( + const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window, + absl::Span window_strides, + absl::Span> padding, + absl::Span lhs_dilation, absl::Span rhs_dilation, + const ConvolutionDimensionNumbers& dimension_numbers, + int64 feature_group_count, int64 batch_group_count, + const PrecisionConfig* precision_config) { + HloInstructionProto instr; + *instr.mutable_shape() = shape.ToProto(); + + *instr.mutable_window() = window; + *instr.mutable_convolution_dimension_numbers() = dimension_numbers; + instr.set_feature_group_count(feature_group_count); + instr.set_batch_group_count(batch_group_count); + + if (precision_config != nullptr) { + *instr.mutable_precision_config() = *precision_config; + } + + return AddInstruction(std::move(instr), HloOpcode::kConvolution, {lhs, rhs}); +} + XlaOp XlaBuilder::Fft(XlaOp operand, const FftType fft_type, const absl::Span fft_length) { return ReportErrorOrReturn([&]() -> StatusOr { @@ -2203,6 +2175,39 @@ XlaOp XlaBuilder::BatchNormGrad(XlaOp operand, XlaOp scale, XlaOp batch_mean, }); } +XlaOp XlaBuilder::AllGather(XlaOp operand, int64 all_gather_dimension, + int64 shard_count, + absl::Span replica_groups, + const absl::optional& channel_id, + const absl::optional& layout) { + return ReportErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand)); + + TF_ASSIGN_OR_RETURN(Shape inferred_shape, + ShapeInference::InferAllGatherShape( + *operand_shape, all_gather_dimension, shard_count)); + if (layout) { + *inferred_shape.mutable_layout() = *layout; + instr.set_constrain_layout(true); + } + *instr.mutable_shape() = inferred_shape.ToProto(); + + instr.add_dimensions(all_gather_dimension); + for (const ReplicaGroup& group : replica_groups) { + *instr.add_replica_groups() = group; + } + if (channel_id.has_value()) { + instr.set_channel_id(channel_id->handle()); + } + + TF_ASSIGN_OR_RETURN( + auto all_gather, + AddInstruction(std::move(instr), HloOpcode::kAllGather, {operand})); + return all_gather; + }); +} + XlaOp XlaBuilder::CrossReplicaSum( XlaOp operand, absl::Span replica_groups) { return ReportErrorOrReturn([&]() -> StatusOr { @@ -3105,20 +3110,11 @@ XlaOp SliceInDim(const XlaOp operand, int64 start_index, int64 limit_index, stride, dimno); } -XlaOp DynamicSlice(const XlaOp operand, const XlaOp start_indices, - absl::Span slice_sizes) { - return operand.builder()->DynamicSlice(operand, start_indices, slice_sizes); -} XlaOp DynamicSlice(const XlaOp operand, absl::Span start_indices, absl::Span slice_sizes) { return operand.builder()->DynamicSlice(operand, start_indices, slice_sizes); } -XlaOp DynamicUpdateSlice(const XlaOp operand, const XlaOp update, - const XlaOp start_indices) { - return operand.builder()->DynamicUpdateSlice(operand, update, start_indices); -} - XlaOp DynamicUpdateSlice(const XlaOp operand, const XlaOp update, absl::Span start_indices) { return operand.builder()->DynamicUpdateSlice(operand, update, start_indices); @@ -3470,6 +3466,16 @@ XlaOp ReduceWindowWithGeneralPadding( base_dilations, window_dilations, padding); } +XlaOp AllGather(const XlaOp operand, int64 all_gather_dimension, + int64 shard_count, + absl::Span replica_groups, + const absl::optional& channel_id, + const absl::optional& layout) { + return operand.builder()->AllGather(operand, all_gather_dimension, + shard_count, replica_groups, channel_id, + layout); +} + XlaOp CrossReplicaSum(const XlaOp operand, absl::Span replica_groups) { return operand.builder()->CrossReplicaSum(operand, replica_groups); diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 2ab4c575862..24b0cba3a1b 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -421,14 +421,9 @@ class XlaBuilder { virtual XlaOp SliceInDim(XlaOp operand, int64 start_index, int64 limit_index, int64 stride, int64 dimno); - ABSL_DEPRECATED("Use span-of-indices form instead") - XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices, - absl::Span slice_sizes); XlaOp DynamicSlice(XlaOp operand, absl::Span start_indices, absl::Span slice_sizes); - ABSL_DEPRECATED("Use span-of-indices form instead") - XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, XlaOp start_indices); XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, absl::Span start_indices); @@ -491,6 +486,16 @@ class XlaBuilder { int64 batch_group_count = 1, const PrecisionConfig* precision_config = nullptr); + virtual StatusOr ConvGeneralDilatedInternal( + const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window, + absl::Span window_strides, + absl::Span> padding, + absl::Span lhs_dilation, + absl::Span rhs_dilation, + const ConvolutionDimensionNumbers& dimension_numbers, + int64 feature_group_count, int64 batch_group_count, + const PrecisionConfig* precision_config); + XlaOp Fft(XlaOp operand, FftType fft_type, absl::Span fft_length); @@ -549,6 +554,12 @@ class XlaBuilder { XlaOp CrossReplicaSum(XlaOp operand, absl::Span replica_groups = {}); + XlaOp AllGather( + XlaOp operand, int64 all_gather_dimension, int64 shard_count, + absl::Span replica_groups = {}, + const absl::optional& channel_id = absl::nullopt, + const absl::optional& layout = absl::nullopt); + XlaOp AllReduce( XlaOp operand, const XlaComputation& computation, absl::Span replica_groups = {}, @@ -842,14 +853,10 @@ class XlaBuilder { friend XlaOp SliceInDim(XlaOp operand, int64 start_index, int64 limit_index, int64 stride, int64 dimno); - friend XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices, - absl::Span slice_sizes); friend XlaOp DynamicSlice(XlaOp operand, absl::Span start_indices, absl::Span slice_sizes); - friend XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, - XlaOp start_indices); friend XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, absl::Span start_indices); @@ -992,6 +999,11 @@ class XlaBuilder { absl::Span> padding); friend XlaOp CrossReplicaSum(XlaOp operand, absl::Span replica_groups); + friend XlaOp AllGather(XlaOp operand, int64 all_gather_dimension, + int64 shard_count, + absl::Span replica_groups, + const absl::optional& channel_id, + const absl::optional& layout); friend XlaOp AllReduce(XlaOp operand, const XlaComputation& computation, absl::Span replica_groups, const absl::optional& channel_id, @@ -1417,10 +1429,6 @@ XlaOp SliceInDim(XlaOp operand, int64 start_index, int64 limit_index, XlaOp DynamicSlice(XlaOp operand, absl::Span start_indices, absl::Span slice_sizes); -ABSL_DEPRECATED("Use span-of-indices form instead") -XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices, - absl::Span slice_sizes); - // Enqueues a dynamic update slice operation onto the computation, which // updates a slice of 'operand' with 'update' at dynamic 'start_indices'. // The shape of 'update' determines the shape of the slice of 'operand' @@ -1441,9 +1449,6 @@ XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices, XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, absl::Span start_indices); -ABSL_DEPRECATED("Use span-of-indices form instead") -XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, XlaOp start_indices); - // Enqueues a concatenate instruction onto the computation. 'operands' must // have >= 1 entry. XlaOp ConcatInDim(XlaBuilder* builder, absl::Span operands, @@ -1771,6 +1776,11 @@ XlaOp ReduceWindowWithGeneralPadding( XlaOp CrossReplicaSum(XlaOp operand, absl::Span replica_groups = {}); +XlaOp AllGather(XlaOp operand, int64 all_gather_dimension, int64 shard_count, + absl::Span replica_groups = {}, + const absl::optional& channel_id = absl::nullopt, + const absl::optional& layout = absl::nullopt); + // Enqueues an operation that do an AllReduce of the operand cross cores. Here // AllReduce means doing a reduction on the input operand cross cores and then // broadcasting the reduction result to those cores. The reduction function is diff --git a/tensorflow/compiler/xla/client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_builder_test.cc index 1fa839b2014..e1733cd179c 100644 --- a/tensorflow/compiler/xla/client/xla_builder_test.cc +++ b/tensorflow/compiler/xla/client/xla_builder_test.cc @@ -381,6 +381,18 @@ TEST_F(XlaBuilderTest, Transpose) { EXPECT_THAT(root, op::Transpose(op::Parameter())); } +TEST_F(XlaBuilderTest, AllGather) { + XlaBuilder b(TestName()); + auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x"); + AllGather(x, /*all_gather_dimension=*/1, /*shard_count=*/4); + TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); + auto root = module->entry_computation()->root_instruction(); + + EXPECT_EQ(root->opcode(), HloOpcode::kAllGather); + EXPECT_TRUE( + ShapeUtil::Equal(root->shape(), ShapeUtil::MakeShape(F32, {4, 64}))); +} + TEST_F(XlaBuilderTest, AllToAll) { XlaBuilder b(TestName()); auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x"); diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h index 43ee0fdd820..8ae8c418d5d 100644 --- a/tensorflow/compiler/xla/executable_run_options.h +++ b/tensorflow/compiler/xla/executable_run_options.h @@ -50,6 +50,7 @@ class RunId { public: // Creates a new, unique RunId. RunId(); + explicit RunId(int64 value) : data_(value) {} RunId(const RunId&) = default; RunId& operator=(const RunId&) = default; diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py index ef0caff0ae6..6d4482af43f 100644 --- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py +++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py @@ -20,6 +20,9 @@ from __future__ import print_function from absl import logging +# Import xla_client to load shared C++ extensions (just CompileOptions at the +# time of writing). +from tensorflow.compiler.xla.python import xla_client # pylint: disable=unused-import from tensorflow.compiler.xla.python.tpu_driver.client import tpu_client_extension as _tpu_client diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 799d5654840..3349528ebc2 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2397,6 +2397,42 @@ tf_cc_test( ], ) +cc_library( + name = "all_gather_decomposer", + srcs = ["all_gather_decomposer.cc"], + hdrs = ["all_gather_decomposer.h"], + deps = [ + ":hlo", + ":hlo_casting_utils", + ":hlo_pass", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:types", + "//tensorflow/core:lib", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/strings", + ], +) + +tf_cc_test( + name = "all_gather_decomposer_test", + srcs = ["all_gather_decomposer_test.cc"], + deps = [ + ":all_gather_decomposer", + ":hlo", + ":hlo_matchers", + ":hlo_parser", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + cc_library( name = "tuple_simplifier", srcs = ["tuple_simplifier.cc"], @@ -3234,6 +3270,29 @@ tf_cc_test( ], ) +cc_library( + name = "memory_space_propagation", + srcs = ["memory_space_propagation.cc"], + hdrs = ["memory_space_propagation.h"], + deps = [ + ":hlo", + ":hlo_dataflow_analysis", + ":hlo_pass", + ], +) + +tf_cc_test( + name = "memory_space_propagation_test", + srcs = ["memory_space_propagation_test.cc"], + deps = [ + ":hlo_parser", + ":memory_space_propagation", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + ], +) + cc_library( name = "hlo_dce", srcs = ["hlo_dce.cc"], @@ -3787,6 +3846,7 @@ cc_library( "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", "@llvm-project//llvm:core", "@llvm-project//llvm:transform_utils", ], diff --git a/tensorflow/compiler/xla/service/all_gather_decomposer.cc b/tensorflow/compiler/xla/service/all_gather_decomposer.cc new file mode 100644 index 00000000000..ad63218eca8 --- /dev/null +++ b/tensorflow/compiler/xla/service/all_gather_decomposer.cc @@ -0,0 +1,154 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/all_gather_decomposer.h" + +#include + +#include "absl/algorithm/container.h" +#include "absl/strings/str_join.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/platform/logging.h" + +namespace xla { + +// Creates a computation of x + y. +HloComputation* MakeBinaryAdd(PrimitiveType type, HloModule* module) { + HloComputation::Builder sum_b("add"); + auto x = sum_b.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/0, ShapeUtil::MakeShape(type, {}), "x")); + auto y = sum_b.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/1, ShapeUtil::MakeShape(type, {}), "y")); + if (type == PRED) { + sum_b.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(type, {}), HloOpcode::kOr, x, y)); + } else { + sum_b.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(type, {}), HloOpcode::kAdd, x, y)); + } + HloComputation* reduction = module->AddEmbeddedComputation(sum_b.Build()); + return reduction; +} + +Status DecomposeAllGather(HloAllGatherInstruction* ag, int64 partition_count, + HloComputation* comp) { + auto zero = comp->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(ag->shape().element_type()))); + zero = comp->AddInstruction( + HloInstruction::CreateBroadcast(ag->shape(), zero, {})); + auto zero_index = comp->AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::Zero(S32))); + std::vector start_indices(ag->shape().rank(), zero_index); + auto shard_id_from_subgroup = [&](HloInstruction* replica_or_global_id) { + if (ag->replica_groups().empty()) { + return replica_or_global_id; + } + if (ag->replica_groups().size() == 1) { + // Whether the group is {1, 2, ..., N - 1}. + bool trivial_group = true; + for (int64 i = 0; i < ag->replica_groups()[0].replica_ids_size(); ++i) { + if (ag->replica_groups()[0].replica_ids(i) != i) { + trivial_group = false; + break; + } + } + if (trivial_group) { + CHECK_EQ(partition_count, ag->replica_groups()[0].replica_ids_size()); + return replica_or_global_id; + } + } + // Create a table of shard IDs for each replica_or_global_id, then slice it + // using replica_or_global_id. + std::vector shard_ids(ag->replica_groups().size() * + ag->replica_groups()[0].replica_ids_size()); + for (const auto& group : ag->replica_groups()) { + for (int64 i = 0; i < group.replica_ids_size(); ++i) { + shard_ids[group.replica_ids(i)] = i; + } + } + auto id_table = comp->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1(shard_ids))); + auto shard_id = comp->AddInstruction(HloInstruction::CreateDynamicSlice( + ShapeUtil::MakeShape(S32, {1}), id_table, {replica_or_global_id}, {1})); + shard_id = comp->AddInstruction( + HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {}), shard_id)); + return shard_id; + }; + HloInstruction* shard_id; + if (ag->channel_id().has_value()) { + if (ag->use_global_device_ids()) { + auto pid = comp->AddInstruction(HloInstruction::CreatePartitionId()); + auto rid = comp->AddInstruction(HloInstruction::CreateReplicaId()); + auto pcount = comp->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR0(partition_count))); + auto global_id = comp->AddInstruction(HloInstruction::CreateBinary( + pid->shape(), HloOpcode::kAdd, pid, + comp->AddInstruction(HloInstruction::CreateBinary( + pid->shape(), HloOpcode::kMultiply, rid, pcount)))); + shard_id = shard_id_from_subgroup(global_id); + } else { + TF_RET_CHECK(!ag->replica_groups().empty()); + TF_RET_CHECK(ag->replica_groups()[0].replica_ids_size() == 1); + shard_id = comp->AddInstruction(HloInstruction::CreatePartitionId()); + } + } else { + shard_id = shard_id_from_subgroup( + comp->AddInstruction(HloInstruction::CreateReplicaId())); + } + start_indices[ag->all_gather_dimension()] = + comp->AddInstruction(HloInstruction::CreateBinary( + shard_id->shape(), HloOpcode::kMultiply, shard_id, + comp->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR0(ag->operand(0)->shape().dimensions( + ag->all_gather_dimension())))))); + auto dus = comp->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + zero->shape(), zero, ag->mutable_operand(0), start_indices)); + auto ar = comp->AddInstruction(HloInstruction::CreateAllReduce( + dus->shape(), {dus}, + MakeBinaryAdd(dus->shape().element_type(), comp->parent()), + ag->replica_groups(), + /*constrain_layout=*/ag->constrain_layout(), ag->channel_id(), + ag->use_global_device_ids())); + TF_RETURN_IF_ERROR(ag->ReplaceAllUsesWith(ar)); + TF_RETURN_IF_ERROR(comp->RemoveInstructionAndUnusedOperands(ag)); + return Status::OK(); +} + +StatusOr AllGatherDecomposer::Run(HloModule* module) { + bool changed = false; + for (auto comp : module->MakeNonfusionComputations()) { + for (auto hlo : comp->MakeInstructionPostOrder()) { + if (hlo->opcode() != HloOpcode::kAllGather) { + continue; + } + auto ag = Cast(hlo); + if (should_decompose_(*ag)) { + TF_RETURN_IF_ERROR(DecomposeAllGather(ag, partition_count_, comp)); + changed = true; + } + } + } + return changed; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/all_gather_decomposer.h b/tensorflow/compiler/xla/service/all_gather_decomposer.h new file mode 100644 index 00000000000..d1983e37383 --- /dev/null +++ b/tensorflow/compiler/xla/service/all_gather_decomposer.h @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_ALL_GATHER_DECOMPOSER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_ALL_GATHER_DECOMPOSER_H_ + +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// AllGatherDecomposer is a pass which converts unsupported all-gathers into +// dynamic-update-slices and all-reduces. +class AllGatherDecomposer : public HloModulePass { + public: + AllGatherDecomposer( + std::function should_decompose, + int64 partition_count) + : should_decompose_(std::move(should_decompose)), + partition_count_(partition_count) {} + explicit AllGatherDecomposer(int64 partition_count) + : should_decompose_( + [](const HloAllGatherInstruction& ag) { return true; }), + partition_count_(partition_count) {} + absl::string_view name() const override { return "all_gather_decomposer"; } + + // Run AllGatherDecomposer pass on computations in 'module'. + // Returns whether the 'module' was changed. + StatusOr Run(HloModule* module) override; + + private: + std::function should_decompose_; + int64 partition_count_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_ALL_GATHER_DECOMPOSER_H_ diff --git a/tensorflow/compiler/xla/service/all_gather_decomposer_test.cc b/tensorflow/compiler/xla/service/all_gather_decomposer_test.cc new file mode 100644 index 00000000000..ebcd66ffa07 --- /dev/null +++ b/tensorflow/compiler/xla/service/all_gather_decomposer_test.cc @@ -0,0 +1,161 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/all_gather_decomposer.h" + +#include + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +using ::testing::AllOf; +namespace op = xla::testing::opcode_matchers; +using AllGatherDecomposerTest = HloTestBase; + +TEST_F(AllGatherDecomposerTest, CrossReplicaAllGather) { + const string module_str = R"( +HloModule module + +ENTRY entry { + param0 = f32[10,20] parameter(0) + ROOT ag = f32[10,80] all-gather(param0), replica_groups={}, dimensions={1} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnUnverifiedModule((module_str))); + AllGatherDecomposer decomposer(/*partition_count=*/4); + TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::AllReduce(op::DynamicUpdateSlice( + op::Broadcast(op::Constant()), op::Parameter(0), op::Constant(), + op::Multiply(op::ReplicaId(), op::Constant())))); +} + +TEST_F(AllGatherDecomposerTest, CrossPartitionAllGather) { + const string module_str = R"( +HloModule module + +ENTRY entry { + param0 = f32[10,20] parameter(0) + ROOT ag = f32[10,80] all-gather(param0), replica_groups={{0}}, channel_id=1, + dimensions={1} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnUnverifiedModule((module_str))); + AllGatherDecomposer decomposer(/*partition_count=*/4); + TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::AllReduce(op::DynamicUpdateSlice( + op::Broadcast(op::Constant()), op::Parameter(0), op::Constant(), + op::Multiply(op::PartitionId(), op::Constant())))); +} + +TEST_F(AllGatherDecomposerTest, CrossReplicaAllGatherWithTrivialGroup) { + const string module_str = R"( +HloModule module + +ENTRY entry { + param0 = f32[10,20] parameter(0) + ROOT ag = f32[10,80] all-gather(param0), replica_groups={{0,1,2,3}}, + dimensions={1} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnUnverifiedModule((module_str))); + AllGatherDecomposer decomposer(/*partition_count=*/4); + TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::AllReduce(op::DynamicUpdateSlice( + op::Broadcast(op::Constant()), op::Parameter(0), op::Constant(), + op::Multiply(op::ReplicaId(), op::Constant())))); +} + +TEST_F(AllGatherDecomposerTest, CrossReplicaAllGatherWithSubgroups) { + const string module_str = R"( +HloModule module + +ENTRY entry { + param0 = f32[10,20] parameter(0) + ROOT ag = f32[10,80] all-gather(param0), + replica_groups={{2,1,0,3}, {4,6,7,5}}, dimensions={1} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnUnverifiedModule((module_str))); + AllGatherDecomposer decomposer(/*partition_count=*/4); + TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get())); + EXPECT_TRUE(changed); + auto id = + AllOf(op::Shape("s32[]"), + op::Reshape(op::DynamicSlice(op::Constant(), op::ReplicaId()))); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::AllReduce(op::DynamicUpdateSlice( + op::Broadcast(op::Constant()), op::Parameter(0), + op::Constant(), op::Multiply(id, op::Constant())))); +} + +TEST_F(AllGatherDecomposerTest, CrossReplicaAllGatherWithSubgroupsGlobalIds) { + const string module_str = R"( +HloModule module + +ENTRY entry { + param0 = f32[10,20] parameter(0) + ROOT ag = f32[10,80] all-gather(param0), + replica_groups={{2,1,0,3}, {4,6,7,5}}, dimensions={1}, channel_id=1, + use_global_device_ids=true +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnUnverifiedModule((module_str))); + AllGatherDecomposer decomposer(/*partition_count=*/4); + TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get())); + EXPECT_TRUE(changed); + LOG(ERROR) << module->ToString(); + auto global_id = + op::Add(op::PartitionId(), op::Multiply(op::ReplicaId(), op::Constant())); + auto id = AllOf(op::Shape("s32[]"), + op::Reshape(op::DynamicSlice(op::Constant(), global_id))); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::AllReduce(op::DynamicUpdateSlice( + op::Broadcast(op::Constant()), op::Parameter(0), + op::Constant(), op::Multiply(id, op::Constant())))); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc index abb695fa486..30d764225c2 100644 --- a/tensorflow/compiler/xla/service/bfloat16_support.cc +++ b/tensorflow/compiler/xla/service/bfloat16_support.cc @@ -79,6 +79,7 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision( const HloInstruction& hlo, int64 operand_index) { switch (hlo.opcode()) { case HloOpcode::kAbs: + case HloOpcode::kAllGather: case HloOpcode::kAllToAll: case HloOpcode::kBroadcast: case HloOpcode::kClamp: diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index 8c76e912011..ce9c8a4ea62 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -91,6 +91,7 @@ CompileOnlyService::CompileAheadOfTime( TF_RETURN_IF_ERROR(options.static_device_assignment().Serialize( execution_options.mutable_device_assignment())); } + execution_options.set_use_spmd_partitioning(options.use_spmd_partitioning()); for (const AotXlaComputationInstance& instance : computations) { TF_RET_CHECK(instance.computation.has_host_program_shape()); *execution_options.mutable_shape_with_output_layout() = diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index cf646159a38..57b24e372e6 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -76,6 +76,7 @@ class AotCompilationOptions { virtual int64 replica_count() const { return 0; } virtual int64 num_cores() const { return 0; } + virtual bool use_spmd_partitioning() const { return false; } // Optional allocator that may be used for allocating temp space on the device // during compilation. diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc index e21ca01c803..05364a4492b 100644 --- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc @@ -109,24 +109,6 @@ llvm_ir::ElementGenerator CpuElementalIrEmitter::MakeElementGenerator( const HloInstruction* hlo, const HloToElementGeneratorMap& operand_to_generator) { switch (hlo->opcode()) { - case HloOpcode::kMap: - return [this, hlo, &operand_to_generator]( - const IrArray::Index& index) -> StatusOr { - std::vector operands; - for (int i = 0; i < hlo->operand_count(); i++) { - TF_ASSIGN_OR_RETURN(llvm::Value * operand_value, - operand_to_generator.at(hlo->operand(i))(index)); - operands.push_back(operand_value); - } - return ir_emitter_->EmitElementalMap(*Cast(hlo), - operands, llvm_ir::IrName(hlo)); - }; - case HloOpcode::kReduceWindow: - return [this, hlo, &operand_to_generator](const IrArray::Index& index) { - return ir_emitter_->EmitElementalReduceWindow( - Cast(hlo), - operand_to_generator.at(hlo->operand(0)), index); - }; case HloOpcode::kConvolution: return [this, hlo, &operand_to_generator](const IrArray::Index& index) { return ir_emitter_->EmitElementalConvolution( @@ -134,22 +116,6 @@ llvm_ir::ElementGenerator CpuElementalIrEmitter::MakeElementGenerator( operand_to_generator.at(hlo->operand(0)), operand_to_generator.at(hlo->operand(1)), index); }; - case HloOpcode::kReduce: - return [this, hlo, &operand_to_generator](const IrArray::Index& index) { - auto reduce_instr = Cast(hlo); - std::vector input_generators; - for (const HloInstruction* instr : reduce_instr->inputs()) { - input_generators.push_back(operand_to_generator.at(instr)); - } - - std::vector initial_value_generators; - for (const HloInstruction* instr : reduce_instr->init_values()) { - initial_value_generators.push_back(operand_to_generator.at(instr)); - } - return ir_emitter_->EmitElementalReduce( - reduce_instr, std::move(input_generators), - std::move(initial_value_generators), index); - }; default: return ElementalIrEmitter::MakeElementGenerator(hlo, operand_to_generator); diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h index e3fba9306b7..5c9f6677ab3 100644 --- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h @@ -44,6 +44,12 @@ class CpuElementalIrEmitter : public ElementalIrEmitter { StatusOr EmitTanh(PrimitiveType prim_type, llvm::Value* value) override; + StatusOr> EmitThreadLocalCall( + const HloComputation& callee, absl::Span parameters, + absl::string_view name) override { + return ir_emitter_->EmitThreadLocalCall(callee, parameters, name); + } + IrEmitter* ir_emitter_; }; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index c19fa779b60..2b715bfa17a 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -695,101 +695,6 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) { return Status::OK(); } -llvm::Value* IrEmitter::EmitElementalMap( - const HloMapInstruction& map_instr, - absl::Span elemental_operands, absl::string_view name) { - return EmitScalarReturningThreadLocalCall(*map_instr.to_apply(), - elemental_operands, name); -} - -StatusOr IrEmitter::EmitElementalReduceWindow( - const HloReduceWindowInstruction* reduce_window, - const llvm_ir::ElementGenerator& input_generator, - const llvm_ir::IrArray::Index& index) { - const HloInstruction* operand = reduce_window->operand(0); - const Window& window = reduce_window->window(); - - // We fold inputs into the accumulator and initialize it to - // the initial value on the reduce_window. - PrimitiveType operand_element_type = operand->shape().element_type(); - llvm::Value* accumulator_address = llvm_ir::EmitAllocaAtFunctionEntry( - llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_), - "reduce_window_accumulator_address", &b_, - MinimumAlignmentForPrimitiveType(operand_element_type)); - Store(Load(GetEmittedValueFor(reduce_window->operand(1))), - accumulator_address); - - llvm_ir::ForLoopNest loops(IrName(reduce_window, "inner"), &b_); - std::vector window_size; - for (const auto& dim : window.dimensions()) { - window_size.push_back(dim.size()); - } - const llvm_ir::IrArray::Index window_index = loops.AddLoopsForShape( - ShapeUtil::MakeShape(operand_element_type, window_size), "window"); - CHECK_EQ(window_index.size(), index.size()); - - SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), &b_); - - std::vector input_multi_index(index.size()); - llvm::Value* in_bounds_condition = nullptr; - for (size_t i = 0; i < index.size(); ++i) { - llvm::Value* strided_index = - NSWMul(index[i], b_.getInt64(window.dimensions(i).stride())); - input_multi_index[i] = NSWSub( - NSWAdd(strided_index, - NSWMul(window_index[i], - b_.getInt64(window.dimensions(i).window_dilation()))), - b_.getInt64(window.dimensions(i).padding_low())); - - // We need to verify that we are not in the dilated base area. - llvm::Value* dilation_condition = - ICmpEQ(SRem(input_multi_index[i], - b_.getInt64(window.dimensions(i).base_dilation())), - b_.getInt64(0)); - if (in_bounds_condition == nullptr) { - in_bounds_condition = dilation_condition; - } else { - in_bounds_condition = And(in_bounds_condition, dilation_condition); - } - - // Apply base dilation to the index. - input_multi_index[i] = - SDiv(input_multi_index[i], - b_.getInt64(window.dimensions(i).base_dilation())); - - // We need to check if 0 <= input_multi_index[i] < bound, as otherwise we - // are in the padding so that we can skip the computation. That is - // equivalent to input_multi_index[i] < bound as an *unsigned* comparison, - // since a negative value will wrap to a large positive value. - llvm::Value* index_condition = - ICmpULT(input_multi_index[i], - b_.getInt64(ShapeUtil::GetDimension(operand->shape(), i))); - if (in_bounds_condition == nullptr) { - in_bounds_condition = index_condition; - } else { - in_bounds_condition = And(in_bounds_condition, index_condition); - } - } - CHECK(in_bounds_condition != nullptr); - - llvm_ir::LlvmIfData if_data = - llvm_ir::EmitIfThenElse(in_bounds_condition, "in-bounds", &b_); - SetToFirstInsertPoint(if_data.true_block, &b_); - - // We are not in the padding, so carry out the computation. - llvm_ir::IrArray::Index input_index(input_multi_index, operand->shape(), - b_.getInt64Ty()); - TF_ASSIGN_OR_RETURN(llvm::Value* const input_value, - input_generator(input_index)); - llvm::Value* result = EmitScalarReturningThreadLocalCall( - *reduce_window->to_apply(), {Load(accumulator_address), input_value}, - "reducer_function"); - Store(result, accumulator_address); - - SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &b_); - return Load(accumulator_address); -} - Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) { // Pseudo code for reduce window: // @@ -2099,108 +2004,6 @@ StatusOr IrEmitter::EmitVectorizedReduce( return true; } -StatusOr IrEmitter::EmitElementalReduce( - const HloReduceInstruction* reduce, - std::vector input_generators, - std::vector initial_value_generators, - const llvm_ir::IrArray::Index& index) { - const Shape& out_shape = reduce->shape(); - bool is_variadic = !out_shape.IsArray(); - int accumulators_count = 1; - if (is_variadic) { - CHECK(out_shape.IsTuple()); - accumulators_count = out_shape.tuple_shapes_size(); - } - - absl::Span reduced_dimensions(reduce->dimensions()); - - std::vector accumulator_addrs; - std::vector accumulator_types; - for (int i = 0; i < accumulators_count; i++) { - const Shape& element_shape = - is_variadic ? out_shape.tuple_shapes(i) : out_shape; - PrimitiveType accumulator_type = element_shape.element_type(); - llvm::Type* accumulator_llvm_type = - llvm_ir::PrimitiveTypeToIrType(accumulator_type, module_); - accumulator_types.push_back(accumulator_llvm_type); - - // Initialize an accumulator with init_value. - llvm::AllocaInst* accumulator_addr = llvm_ir::EmitAllocaAtFunctionEntry( - accumulator_llvm_type, "accumulator_" + std::to_string(i), &b_, - MinimumAlignmentForPrimitiveType(accumulator_type)); - TF_ASSIGN_OR_RETURN( - llvm::Value* const init_value, - initial_value_generators[i](llvm_ir::IrArray::Index(index.GetType()))); - Store(init_value, accumulator_addr); - accumulator_addrs.push_back(accumulator_addr); - } - - // The enclosing loops go over all the target elements. Now we have to compute - // the actual target element. For this, we build a new loop nest to iterate - // over all the reduction dimensions in the argument. - // AddLoopsForShapeOnDimensions will return an Index where induction Value*s - // are placed for each dimension in dimensions, and all the rest are nullptrs. - llvm_ir::ForLoopNest loops(IrName(reduce, "inner"), &b_); - const HloInstruction* arg = reduce->operand(0); - std::vector input_multi_index = - loops.AddLoopsForShapeOnDimensions(arg->shape(), reduced_dimensions, - "reduction_dim"); - - SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), &b_); - - // Build a full index for the input argument, using input_multi_index as the - // base. In input_multi_index only the reduction dimensions are filled in. We - // fill in the rest of the dimensions with induction Value*s taken from - // 'index' which iterates over the target array. See the high-level - // description in the XLA documentation for details. - llvm_ir::IrArray::Index::const_iterator it = index.begin(); - - for (auto& i : input_multi_index) { - if (i == nullptr) { - i = *it++; - } - } - CHECK(index.end() == it); - llvm_ir::IrArray::Index input_index(input_multi_index, arg->shape(), - b_.getInt64Ty()); - - std::vector reduction_operands; - for (llvm::Value* accum : accumulator_addrs) { - llvm::Value* accum_value = Load(accum); - reduction_operands.push_back(accum_value); - } - - for (int i = 0; i < accumulators_count; i++) { - TF_ASSIGN_OR_RETURN(llvm::Value* const input_element, - input_generators[i](input_index)); - reduction_operands.push_back(input_element); - } - - std::vector results = EmitThreadLocalCall( - *reduce->to_apply(), reduction_operands, "reduce_function"); - - CHECK(results.size() == accumulators_count); - for (int i = 0; i < accumulators_count; i++) { - Store(results[i], accumulator_addrs[i]); - } - SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &b_); - - if (is_variadic) { - // Emit a structure, as that what the LoopEmitter expects. - llvm::Value* returned_structure = llvm::UndefValue::get( - llvm::StructType::get(b_.getContext(), accumulator_types)); - for (int i = 0; i < accumulators_count; i++) { - llvm::Value* accumulator_value = Load(accumulator_addrs[i]); - returned_structure = - b_.CreateInsertValue(returned_structure, accumulator_value, i); - } - return returned_structure; - } else { - CHECK_EQ(accumulator_addrs.size(), 1); - return Load(accumulator_addrs[0]); - } -} - Status IrEmitter::HandleReduce(HloInstruction* reduce) { auto arg = reduce->mutable_operand(0); auto init_value = reduce->mutable_operand(1); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index cc5aa3f37fc..24524c67b11 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -58,6 +58,8 @@ namespace cpu { // functions. class IrEmitter : public DfsHloVisitorWithDefault, public IrBuilderMixin { + friend class CpuElementalIrEmitter; + public: using GeneratorForOperandIrArrays = std::function()>; @@ -113,28 +115,12 @@ class IrEmitter : public DfsHloVisitorWithDefault, // Emit an LLVM global variable for every constant buffer allocation. Status EmitConstantGlobals(); - // Emit code to map one element according to `map_instr`. - llvm::Value* EmitElementalMap( - const HloMapInstruction& map_instr, - absl::Span elemental_operands, - absl::string_view name); - // Emit code to emit the element at `index` for a reduce window instruction. - StatusOr EmitElementalReduceWindow( - const HloReduceWindowInstruction* reduce_window, - const llvm_ir::ElementGenerator& input_generator, - const llvm_ir::IrArray::Index& index); // Emit code to emit the element at `index` for a convolution instruction. StatusOr EmitElementalConvolution( const HloConvolutionInstruction* convolution, const llvm_ir::ElementGenerator& input_generator, const llvm_ir::ElementGenerator& kernel_generator, const llvm_ir::IrArray::Index& index); - // Emit code to emit the element at `index` for a reduce instruction. - StatusOr EmitElementalReduce( - const HloReduceInstruction* reduce, - std::vector input_generators, - std::vector initial_value_generator, - const llvm_ir::IrArray::Index& index); protected: // diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index cadea620ec6..caea9d9095a 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -116,6 +116,7 @@ class DfsHloVisitorBase { virtual Status HandleFft(HloInstructionPtr fft) = 0; virtual Status HandleTriangularSolve(HloInstructionPtr hlo) = 0; virtual Status HandleCholesky(HloInstructionPtr hlo) = 0; + virtual Status HandleAllGather(HloInstructionPtr hlo) = 0; virtual Status HandleAllReduce(HloInstructionPtr hlo) = 0; virtual Status HandleAllToAll(HloInstructionPtr hlo) = 0; virtual Status HandleCollectivePermute(HloInstructionPtr hlo) = 0; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index baa9240fb56..9cd220245ba 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -98,6 +98,9 @@ class DfsHloVisitorWithDefaultBase Status HandleCholesky(HloInstructionPtr hlo) override { return DefaultAction(hlo); } + Status HandleAllGather(HloInstructionPtr crs) override { + return DefaultAction(crs); + } Status HandleAllReduce(HloInstructionPtr crs) override { return DefaultAction(crs); } diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 30300b8c195..8cb660de46c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -2422,6 +2422,43 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( -> StatusOr { return EmitElementalDot(hlo, operand_to_generator, dot_result_index); }; + case HloOpcode::kMap: + return [this, hlo, &operand_to_generator]( + const IrArray::Index& index) -> StatusOr { + std::vector operands; + for (int i = 0; i < hlo->operand_count(); i++) { + TF_ASSIGN_OR_RETURN(llvm::Value * operand_value, + operand_to_generator.at(hlo->operand(i))(index)); + operands.push_back(operand_value); + } + std::vector input_generators; + for (const HloInstruction* instr : hlo->operands()) { + input_generators.push_back(operand_to_generator.at(instr)); + } + return EmitElementalMap(Cast(hlo), operands); + }; + case HloOpcode::kReduceWindow: + return [this, hlo, &operand_to_generator](const IrArray::Index& index) { + return EmitElementalReduceWindow( + Cast(hlo), + operand_to_generator.at(hlo->operand(0)), + operand_to_generator.at(hlo->operand(1)), index); + }; + case HloOpcode::kReduce: + return [this, hlo, &operand_to_generator](const IrArray::Index& index) { + auto reduce_instr = Cast(hlo); + std::vector input_generators; + for (const HloInstruction* instr : reduce_instr->inputs()) { + input_generators.push_back(operand_to_generator.at(instr)); + } + + std::vector initial_value_generators; + for (const HloInstruction* instr : reduce_instr->init_values()) { + initial_value_generators.push_back(operand_to_generator.at(instr)); + } + return EmitElementalReduce(reduce_instr, std::move(input_generators), + std::move(initial_value_generators), index); + }; default: return [hlo](const IrArray::Index& index) { return Unimplemented("Unhandled opcode for elemental IR emission: %s", @@ -2451,4 +2488,215 @@ llvm::Value* ElementalIrEmitter::EmitComposeComplex(const HloInstruction* op, return complex; } +StatusOr ElementalIrEmitter::EmitElementalMap( + const HloMapInstruction* map_instr, + absl::Span elemental_operands) { + TF_ASSIGN_OR_RETURN( + std::vector values, + EmitThreadLocalCall(*map_instr->to_apply(), elemental_operands, + llvm_ir::IrName(map_instr))); + CHECK_EQ(values.size(), 1); + return values[0]; +} + +StatusOr ElementalIrEmitter::EmitElementalReduceWindow( + const HloReduceWindowInstruction* reduce_window, + const llvm_ir::ElementGenerator& input_generator, + const llvm_ir::ElementGenerator& initial_value_generator, + const llvm_ir::IrArray::Index& index) { + // Pseudocode: + // for each index I in output + // value = init_value + // for each index W in window + // for each dimension i from 0 to rank - 1 + // (input index I)[i] = O[i] * stride[i] + W[i] - pad_low[i] + // if I in bounds of input + // value = function(value, input[I]) + // output[O] = value + const HloInstruction* operand = reduce_window->operand(0); + const Window& window = reduce_window->window(); + + PrimitiveType operand_element_type = operand->shape().element_type(); + llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry( + llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_), + "reduce_window_accum_ptr", b_); + { + TF_ASSIGN_OR_RETURN( + llvm::Value* const init_value, + initial_value_generator(llvm_ir::IrArray::Index(index.GetType()))); + Store(init_value, accum_ptr); + } + + llvm::Type* index_type = index.GetType(); + auto index_typed_const = [&](uint64 c) -> llvm::Constant* { + return index.GetConstantWithIndexType(c); + }; + + llvm_ir::ForLoopNest loops(IrName(reduce_window), b_, index_type); + std::vector window_size; + for (const auto& dim : window.dimensions()) { + window_size.push_back(dim.size()); + } + const IrArray::Index window_index = loops.AddLoopsForShape( + ShapeUtil::MakeShape(operand_element_type, window_size), "window"); + CHECK_EQ(window_index.size(), index.size()); + + SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b_); + + std::vector input_multi_index(index.size()); + llvm::Value* in_bounds = b_->getInt1(true); + for (size_t i = 0; i < index.size(); ++i) { + llvm::Value* stridden_index = + NSWMul(index[i], index_typed_const(window.dimensions(i).stride())); + input_multi_index[i] = NSWSub( + NSWAdd( + stridden_index, + NSWMul(window_index[i], + index_typed_const(window.dimensions(i).window_dilation()))), + index_typed_const(window.dimensions(i).padding_low())); + + // We need to verify that we are not in the dilated base area. + llvm::Value* dilation_condition = + ICmpEQ(SRem(input_multi_index[i], + index_typed_const(window.dimensions(i).base_dilation())), + index_typed_const(0)); + in_bounds = And(in_bounds, dilation_condition); + + // Apply base dilation to the index. + input_multi_index[i] = + SDiv(input_multi_index[i], + index_typed_const(window.dimensions(i).base_dilation())); + + // We must check whether 0 <= input_multi_index[i] < bound, as + // otherwise we are in the pad and so can skip the computation. This + // comparison is equivalent to the unsigned comparison + // input_multi_index[i] < bound, as a negative value wraps to a large + // positive value. + in_bounds = And(in_bounds, + ICmpULT(input_multi_index[i], + index_typed_const(operand->shape().dimensions(i)))); + } + + llvm_ir::LlvmIfData if_data = + llvm_ir::EmitIfThenElse(in_bounds, "in_bounds", b_); + SetToFirstInsertPoint(if_data.true_block, b_); + + // We are not in pad, so do the computation. + IrArray::Index input_index(input_multi_index, operand->shape(), index_type); + TF_ASSIGN_OR_RETURN(llvm::Value * input_value, input_generator(input_index)); + TF_ASSIGN_OR_RETURN( + std::vector accum_values, + EmitThreadLocalCall(*reduce_window->to_apply(), + {Load(accum_ptr), input_value}, "reducer_function")); + CHECK_EQ(accum_values.size(), 1); + Store(accum_values[0], accum_ptr); + + SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b_); + return Load(accum_ptr); +} + +StatusOr ElementalIrEmitter::EmitElementalReduce( + const HloReduceInstruction* reduce, + std::vector input_generators, + std::vector initial_value_generators, + const llvm_ir::IrArray::Index& index) { + const Shape& out_shape = reduce->shape(); + bool is_variadic = !out_shape.IsArray(); + int accumulators_count = 1; + if (is_variadic) { + CHECK(out_shape.IsTuple()); + accumulators_count = out_shape.tuple_shapes_size(); + } + + absl::Span reduced_dimensions(reduce->dimensions()); + + std::vector accumulator_addrs; + std::vector accumulator_types; + llvm::Type* index_type = index.GetType(); + for (int i = 0; i < accumulators_count; i++) { + const Shape& element_shape = + is_variadic ? out_shape.tuple_shapes(i) : out_shape; + PrimitiveType accumulator_type = element_shape.element_type(); + llvm::Type* accumulator_llvm_type = + llvm_ir::PrimitiveTypeToIrType(accumulator_type, module_); + accumulator_types.push_back(accumulator_llvm_type); + + // Initialize an accumulator with init_value. + llvm::AllocaInst* accumulator_addr = llvm_ir::EmitAllocaAtFunctionEntry( + accumulator_llvm_type, "accumulator_" + std::to_string(i), b()); + TF_ASSIGN_OR_RETURN( + llvm::Value* const init_value, + initial_value_generators[i](llvm_ir::IrArray::Index(index_type))); + Store(init_value, accumulator_addr); + accumulator_addrs.push_back(accumulator_addr); + } + + // The enclosing loops go over all the target elements. Now we have to compute + // the actual target element. For this, we build a new loop nest to iterate + // over all the reduction dimensions in the argument. + // AddLoopsForShapeOnDimensions will return an Index where induction Value*s + // are placed for each dimension in dimensions, and all the rest are nullptrs. + llvm_ir::ForLoopNest loops(IrName(reduce, "inner"), b(), index_type); + const HloInstruction* arg = reduce->operand(0); + std::vector input_multi_index = + loops.AddLoopsForShapeOnDimensions(arg->shape(), reduced_dimensions, + "reduction_dim"); + + SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b()); + + // Build a full index for the input argument, using input_multi_index as the + // base. In input_multi_index only the reduction dimensions are filled in. We + // fill in the rest of the dimensions with induction Value*s taken from + // 'index' which iterates over the target array. See the high-level + // description in the XLA documentation for details. + auto it = index.begin(); + + for (auto& i : input_multi_index) { + if (i == nullptr) { + i = *it++; + } + } + CHECK(index.end() == it); + llvm_ir::IrArray::Index input_index(input_multi_index, arg->shape(), + index_type); + + std::vector reduction_operands; + for (llvm::Value* accum : accumulator_addrs) { + llvm::Value* accum_value = Load(accum); + reduction_operands.push_back(accum_value); + } + + for (int i = 0; i < accumulators_count; i++) { + TF_ASSIGN_OR_RETURN(llvm::Value* const input_element, + input_generators[i](input_index)); + reduction_operands.push_back(input_element); + } + + TF_ASSIGN_OR_RETURN( + std::vector results, + EmitThreadLocalCall(*reduce->to_apply(), reduction_operands, + "reduce_function")); + + CHECK(results.size() == accumulators_count); + for (int i = 0; i < accumulators_count; i++) { + Store(results[i], accumulator_addrs[i]); + } + SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b()); + + if (is_variadic) { + // Emit a structure, as that what the LoopEmitter expects. + llvm::Value* returned_structure = llvm::UndefValue::get( + llvm::StructType::get(b()->getContext(), accumulator_types)); + for (int i = 0; i < accumulators_count; i++) { + llvm::Value* accumulator_value = Load(accumulator_addrs[i]); + returned_structure = + b()->CreateInsertValue(returned_structure, accumulator_value, i); + } + return returned_structure; + } else { + CHECK_EQ(accumulator_addrs.size(), 1); + return Load(accumulator_addrs[0]); + } +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h index 94e8f1d6400..06a9d7b194c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h @@ -17,12 +17,17 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_ELEMENTAL_IR_EMITTER_H_ #include +#include +#include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_builder_mixin.h" #include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h" #include "tensorflow/compiler/xla/statusor.h" @@ -220,6 +225,26 @@ class ElementalIrEmitter : public IrBuilderMixin { const HloToElementGeneratorMap& operand_to_generator, const llvm_ir::IrArray::Index& dot_result_index); + virtual StatusOr> EmitThreadLocalCall( + const HloComputation& callee, absl::Span parameters, + absl::string_view name) = 0; + + StatusOr EmitElementalMap( + const HloMapInstruction* map_instr, + absl::Span elemental_operands); + + StatusOr EmitElementalReduceWindow( + const HloReduceWindowInstruction* reduce_window, + const llvm_ir::ElementGenerator& input_generator, + const llvm_ir::ElementGenerator& initial_value_generator, + const llvm_ir::IrArray::Index& index); + + StatusOr EmitElementalReduce( + const HloReduceInstruction* reduce, + std::vector input_generators, + std::vector initial_value_generators, + const llvm_ir::IrArray::Index& index); + llvm::IRBuilder<>* const b_; llvm::Module* module_; diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 61bc41283e1..0f6b2cb72e6 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -684,7 +684,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_pass", - "//tensorflow/core:autotuning_proto_cc", + "//tensorflow/core/protobuf:autotuning_proto_cc", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/core/util/proto:proto_utils", @@ -720,7 +720,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_casting_utils", "//tensorflow/compiler/xla/service:hlo_pass", - "//tensorflow/core:autotuning_proto_cc", + "//tensorflow/core/protobuf:autotuning_proto_cc", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:stream_executor_no_cuda", @@ -1674,7 +1674,7 @@ tf_proto_library_cc( protodeps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:hlo_proto", - "//tensorflow/core:autotuning_proto", + "//tensorflow/core/protobuf:autotuning_proto", ], ) @@ -1685,8 +1685,8 @@ cc_library( deps = [ ":gpu_autotuning_proto_cc", "//tensorflow/compiler/xla:debug_options_flags", - "//tensorflow/core:autotuning_proto_cc", "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core/protobuf:autotuning_proto_cc", "@com_google_absl//absl/container:flat_hash_map", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index c6df786fb51..1be0b1b4e7b 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -305,168 +305,5 @@ llvm::Value* GpuElementalIrEmitter::EmitThreadId() { return NSWAdd(NSWMul(block_id, threads_per_block), thread_id_in_block); } -llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( - const HloInstruction* hlo, - const HloToElementGeneratorMap& operand_to_generator) { - switch (hlo->opcode()) { - case HloOpcode::kMap: - return [=, &operand_to_generator]( - const IrArray::Index& index) -> StatusOr { - TF_RET_CHECK(!hlo->operands().empty()) - << "Zero operand map not implemented in GPU backend."; - TF_RET_CHECK(hlo->to_apply()->num_parameters() > 0); - std::vector operand_elements; - for (HloInstruction* operand : hlo->operands()) { - TF_ASSIGN_OR_RETURN(llvm::Value * value, - operand_to_generator.at(operand)(index)); - operand_elements.push_back(value); - } - return compute_nested_(*hlo->to_apply(), operand_elements); - }; - case HloOpcode::kReduceWindow: - // Pseudocode: - // for each index I in output - // value = init_value - // for each index W in window - // for each dimension i from 0 to rank - 1 - // (input index I)[i] = O[i] * stride[i] + W[i] - pad_low[i] - // if I in bounds of input - // value = function(value, input[I]) - // output[O] = value - return [=, &operand_to_generator]( - const IrArray::Index& index) -> StatusOr { - const HloInstruction* operand = hlo->operand(0); - const Window& window = hlo->window(); - - PrimitiveType operand_element_type = operand->shape().element_type(); - llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry( - llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_), - "reduce_window_accum_ptr", b_); - { - TF_ASSIGN_OR_RETURN(llvm::Value * init_value, - operand_to_generator.at(hlo->operand(1))( - IrArray::Index(index.GetType()))); - Store(init_value, accum_ptr); - } - - llvm::Type* index_type = index.GetType(); - auto index_typed_const = [&](uint64 c) -> llvm::Constant* { - return index.GetConstantWithIndexType(c); - }; - - llvm_ir::ForLoopNest loops(IrName(hlo), b_, index_type); - std::vector window_size; - for (const auto& dim : window.dimensions()) { - window_size.push_back(dim.size()); - } - const IrArray::Index window_index = loops.AddLoopsForShape( - ShapeUtil::MakeShape(operand_element_type, window_size), "window"); - CHECK_EQ(window_index.size(), index.size()); - - SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b_); - - std::vector input_multi_index(index.size()); - llvm::Value* in_bounds = b_->getInt1(true); - for (size_t i = 0; i < index.size(); ++i) { - llvm::Value* stridden_index = NSWMul( - index[i], index_typed_const(window.dimensions(i).stride())); - input_multi_index[i] = NSWSub( - NSWAdd(stridden_index, - NSWMul(window_index[i], - index_typed_const( - window.dimensions(i).window_dilation()))), - index_typed_const(window.dimensions(i).padding_low())); - - // We need to verify that we are not in the dilated base area. - llvm::Value* dilation_condition = ICmpEQ( - SRem(input_multi_index[i], - index_typed_const(window.dimensions(i).base_dilation())), - index_typed_const(0)); - in_bounds = And(in_bounds, dilation_condition); - - // Apply base dilation to the index. - input_multi_index[i] = - SDiv(input_multi_index[i], - index_typed_const(window.dimensions(i).base_dilation())); - - // We must check whether 0 <= input_multi_index[i] < bound, as - // otherwise we are in the pad and so can skip the computation. This - // comparison is equivalent to the unsigned comparison - // input_multi_index[i] < bound, as a negative value wraps to a large - // positive value. - in_bounds = - And(in_bounds, - ICmpULT(input_multi_index[i], - index_typed_const(operand->shape().dimensions(i)))); - } - - llvm_ir::LlvmIfData if_data = - llvm_ir::EmitIfThenElse(in_bounds, "in_bounds", b_); - SetToFirstInsertPoint(if_data.true_block, b_); - - // We are not in pad, so do the computation. - IrArray::Index input_index(input_multi_index, operand->shape(), - index_type); - TF_ASSIGN_OR_RETURN(llvm::Value * input_value, - operand_to_generator.at(operand)(input_index)); - TF_ASSIGN_OR_RETURN( - llvm::Value * accum_value, - compute_nested_(*hlo->to_apply(), {Load(accum_ptr), input_value})); - Store(accum_value, accum_ptr); - - SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b_); - return Load(accum_ptr); - }; - case HloOpcode::kReduce: - // TODO(b/118332391): This should be supported. - CHECK_EQ(hlo->operand_count(), 2) << "Did not expect variadic reduce"; - return [=, &operand_to_generator]( - const IrArray::Index& output_index) -> StatusOr { - const HloInstruction* operand = hlo->operand(0); - llvm::Value* accum_ptr = - b()->CreateAlloca(llvm_ir::PrimitiveTypeToIrType( - hlo->shape().element_type(), module_)); - llvm::Type* index_type = output_index.GetType(); - TF_ASSIGN_OR_RETURN(llvm::Value * init_value, - operand_to_generator.at(hlo->operand(1))( - IrArray::Index(index_type))); - b()->CreateStore(init_value, accum_ptr); - - llvm_ir::ForLoopNest loops(IrName(hlo), b_, index_type); - std::vector input_multi_index = - loops.AddLoopsForShapeOnDimensions( - operand->shape(), hlo->dimensions(), "reduction_dim"); - if (!ShapeUtil::IsScalar(hlo->shape())) { - // Here only input_multi_index[hlo->dimensions()] are non-null, so we - // must set the rest. - size_t j = 0; - for (auto& i : input_multi_index) { - if (i == nullptr) { - i = output_index[j++]; - } - } - CHECK_EQ(output_index.size(), j); - } - llvm_ir::IrArray::Index input_index( - input_multi_index, hlo->operand(0)->shape(), index_type); - - SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b()); - TF_ASSIGN_OR_RETURN( - llvm::Value * input_value, - operand_to_generator.at(hlo->operand(0))(input_index)); - TF_ASSIGN_OR_RETURN( - llvm::Value * accum_value, - compute_nested_(*hlo->to_apply(), - {b()->CreateLoad(accum_ptr), input_value})); - b()->CreateStore(accum_value, accum_ptr); - SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b()); - return b()->CreateLoad(accum_ptr); - }; - default: - return ElementalIrEmitter::MakeElementGenerator(hlo, - operand_to_generator); - } -} - } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h index c8a58a21980..3c4e9f7c1e6 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h @@ -47,10 +47,6 @@ class GpuElementalIrEmitter : public ElementalIrEmitter { llvm::Module* module, llvm::IRBuilder<>* b, NestedComputer compute_nested); - llvm_ir::ElementGenerator MakeElementGenerator( - const HloInstruction* hlo, - const HloToElementGeneratorMap& operand_to_generator) override; - protected: StatusOr EmitFloatBinaryOp(const HloInstruction* op, llvm::Value* lhs_value, @@ -92,6 +88,17 @@ class GpuElementalIrEmitter : public ElementalIrEmitter { StatusOr EmitComplexAbs(PrimitiveType prim_type, llvm::Value* value) override; + StatusOr> EmitThreadLocalCall( + const HloComputation& callee, absl::Span parameters, + absl::string_view) override { + // TODO(b/118332391): Supported variadic return values. + auto result = compute_nested_(callee, parameters); + if (!result.ok()) { + return result.status(); + } + return std::vector{result.ValueOrDie()}; + } + llvm::Value* EmitThreadId() override; private: diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 94a4df43cf4..32a9038b15a 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -707,6 +707,10 @@ Status HloCostAnalysis::HandleCholesky(const HloInstruction* hlo) { return Status::OK(); } +Status HloCostAnalysis::HandleAllGather(const HloInstruction* hlo) { + return Status::OK(); +} + Status HloCostAnalysis::HandleAllReduce(const HloInstruction* crs) { // We assume 2 replicas, so that each output element is the sum of two input // elements. diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 915c4dcbe84..9fdb42185fb 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -76,6 +76,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor { Status HandleFft(const HloInstruction* fft) override; Status HandleTriangularSolve(const HloInstruction* hlo) override; Status HandleCholesky(const HloInstruction* hlo) override; + Status HandleAllGather(const HloInstruction* hlo) override; Status HandleAllReduce(const HloInstruction* crs) override; Status HandleAllToAll(const HloInstruction* hlo) override; Status HandleCollectivePermute(const HloInstruction* hlo) override; diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc index b8e3f83b515..900b557b4dc 100644 --- a/tensorflow/compiler/xla/service/hlo_dce.cc +++ b/tensorflow/compiler/xla/service/hlo_dce.cc @@ -47,16 +47,14 @@ StatusOr HloDCE::RunOnComputation( // computation's instruction while simultaneously removing instructions. std::vector dead_roots; for (auto* instruction : computation->instructions()) { + auto maybe_collective_op = DynCast(instruction); if (instruction != computation->root_instruction() && instruction->user_count() == 0 && computation->IsSafelyRemovable(instruction) && (!instruction->HasSideEffect() || (remove_cross_partition_collective_ops && - ((instruction->opcode() == HloOpcode::kAllReduce && - !Cast(instruction)->constrain_layout()) || - (instruction->opcode() == HloOpcode::kAllToAll && - !Cast(instruction)->constrain_layout()) || - instruction->opcode() == HloOpcode::kCollectivePermute)))) { + (maybe_collective_op != nullptr && + !maybe_collective_op->constrain_layout())))) { dead_roots.push_back(instruction); } } diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 47a455ac3f4..cd2a61d7eff 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1057,6 +1057,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kGetDimensionSize: case HloOpcode::kSetDimensionSize: return kGray; + case HloOpcode::kAllGather: case HloOpcode::kAllReduce: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 27fac19587e..9e9c8b0913b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -388,6 +388,24 @@ StatusOr> HloInstruction::CreateFromProto( proto.outfeed_config()); break; } + case HloOpcode::kAllGather: { + absl::optional channel_id; + if (proto.channel_id() > 0) { + channel_id = proto.channel_id(); + } + + TF_RET_CHECK(proto.dimensions_size() == 1) + << "AllGather cannot have more than 1 all-gather dimensions"; + TF_RET_CHECK(all_operands().size() == 1) + << "AllGather must have a single operand"; + int64 all_gather_dimension = proto.dimensions(0); + instruction = CreateAllGather( + shape, operands(0), all_gather_dimension, + std::vector(proto.replica_groups().begin(), + proto.replica_groups().end()), + proto.constrain_layout(), channel_id, proto.use_global_device_ids()); + break; + } case HloOpcode::kAllReduce: { TF_RET_CHECK(proto.called_computation_ids_size() == 1) << "AllReduce should have 1 called computation but sees " @@ -929,6 +947,15 @@ HloInstruction::CreateReducePrecision(const Shape& shape, shape, operand, exponent_bits, mantissa_bits); } +/* static */ std::unique_ptr HloInstruction::CreateAllGather( + const Shape& shape, HloInstruction* operand, int64 all_gather_dimension, + const std::vector& replica_groups, bool constrain_layout, + const absl::optional& channel_id, bool use_global_device_ids) { + return absl::make_unique( + shape, operand, all_gather_dimension, replica_groups, constrain_layout, + channel_id, use_global_device_ids); +} + /* static */ std::unique_ptr HloInstruction::CreateAllReduce( const Shape& shape, absl::Span operands, HloComputation* reduce_computation, @@ -1518,6 +1545,7 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kParameter: case HloOpcode::kGetTupleElement: case HloOpcode::kReducePrecision: + case HloOpcode::kAllGather: case HloOpcode::kAllReduce: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: @@ -1997,6 +2025,7 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kReducePrecision: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: + case HloOpcode::kAllGather: case HloOpcode::kAllReduce: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: @@ -2851,6 +2880,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleConvolution(this); case HloOpcode::kFft: return visitor->HandleFft(this); + case HloOpcode::kAllGather: + return visitor->HandleAllGather(this); case HloOpcode::kAllReduce: return visitor->HandleAllReduce(this); case HloOpcode::kAllToAll: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 923138862a7..8be7a034877 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -618,6 +618,16 @@ class HloInstruction { const Shape& shape, HloInstruction* operand, const int exponent_bits, const int mantissa_bits); + // Creates an all-gather op, which concats the operands of all participants + // along all_gather_dimension. The replica_groups, channel_id, and + // use_global_device_ids arguments are identical to those in all-reduce, + // except that the order of the group members determines the concatenation + // order of inputs from different participants. + static std::unique_ptr CreateAllGather( + const Shape& shape, HloInstruction* operand, int64 all_gather_dimension, + const std::vector& replica_groups, bool constrain_layout, + const absl::optional& channel_id, bool use_global_device_ids); + // Creates a cross replica reduction op. // // `reduction_computation`: the reduction function. diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index eb821d40e78..d5bdd674563 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -556,6 +556,51 @@ bool HloCollectiveInstruction::IdenticalSlowPath( }); } +HloAllGatherInstruction::HloAllGatherInstruction( + const Shape& shape, HloInstruction* operand, int64 all_gather_dimension, + const std::vector& replica_groups, bool constrain_layout, + const absl::optional& channel_id, bool use_global_device_ids) + : HloCollectiveInstruction(HloOpcode::kAllGather, shape, {operand}, + replica_groups, constrain_layout, channel_id), + all_gather_dimension_(all_gather_dimension), + use_global_device_ids_(use_global_device_ids) {} + +std::vector HloAllGatherInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + std::vector result = + HloCollectiveInstruction::ExtraAttributesToStringImpl(options); + result.push_back(StrCat("dimensions={", all_gather_dimension_, "}")); + if (use_global_device_ids_) { + result.push_back("use_global_device_ids=true"); + } + return result; +} + +std::unique_ptr +HloAllGatherInstruction::CloneWithNewOperandsImpl( + const Shape& shape, absl::Span new_operands, + HloCloneContext* /*context*/) const { + return absl::make_unique( + shape, new_operands[0], all_gather_dimension(), replica_groups(), + constrain_layout(), channel_id(), use_global_device_ids()); +} + +HloInstructionProto HloAllGatherInstruction::ToProto() const { + HloInstructionProto proto = HloCollectiveInstruction::ToProto(); + proto.add_dimensions(all_gather_dimension_); + return proto; +} + +bool HloAllGatherInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + const auto& casted_other = static_cast(other); + return HloCollectiveInstruction::IdenticalSlowPath(other, eq_computations) && + all_gather_dimension_ == casted_other.all_gather_dimension() && + use_global_device_ids() == casted_other.use_global_device_ids(); +} + HloAllReduceInstruction::HloAllReduceInstruction( const Shape& shape, absl::Span operands, HloComputation* reduce_computation, diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index eecd02d891e..ae78d365cfa 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -348,6 +348,38 @@ class HloCollectiveInstruction : public HloChannelInstruction { bool constrain_layout_; }; +class HloAllGatherInstruction : public HloCollectiveInstruction { + public: + explicit HloAllGatherInstruction( + const Shape& shape, HloInstruction* operand, int64 all_gather_dimension, + const std::vector& replica_groups, bool constrain_layout, + const absl::optional& channel_id, bool use_global_device_ids); + // Same as HloAllReduceInstruction::use_global_device_ids. + bool use_global_device_ids() const { return use_global_device_ids_; } + + // The dimension on which data from different participants are concatenated. + int64 all_gather_dimension() const { return all_gather_dimension_; } + + protected: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + HloInstructionProto ToProto() const override; + + private: + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, absl::Span new_operands, + HloCloneContext* context) const override; + + int64 all_gather_dimension_; + bool use_global_device_ids_; +}; + class HloAllReduceInstruction : public HloCollectiveInstruction { public: explicit HloAllReduceInstruction( diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index de65ed99303..9722d5c2b76 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -420,6 +420,8 @@ StatusOr HloModule::CreateModuleConfigFromShape( if (execution_options->num_partitions() > 0) { module_config.set_num_partitions(execution_options->num_partitions()); } + module_config.set_use_spmd_partitioning( + execution_options->use_spmd_partitioning()); if (execution_options->has_device_assignment()) { TF_ASSIGN_OR_RETURN(std::unique_ptr device_assignment, DeviceAssignment::Deserialize( diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index b31a9ae6ca5..833d0fe59d0 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -128,6 +128,11 @@ class HloModuleConfig { } int64 num_partitions() const { return num_partitions_; } + void set_use_spmd_partitioning(bool use_spmd_partitioning) { + use_spmd_partitioning_ = use_spmd_partitioning; + } + bool use_spmd_partitioning() const { return use_spmd_partitioning_; } + // Return a string which unambiguously represents all the fields of this data // structure. Used for generating a cache key for storing the compiled // executable. @@ -199,6 +204,14 @@ class HloModuleConfig { std::vector>* mutable_dot_config() { return &dot_config_; } + absl::Span>> layout_config() const { + return layout_config_; + } + + std::vector>>* mutable_layout_config() { + return &layout_config_; + } + private: // If you add new members, be sure to update compilation_cache_key. @@ -216,6 +229,10 @@ class HloModuleConfig { // The number of partitions (model parallelism) to compile this binary for. int64 num_partitions_ = 1; + // Whether to use SPMD (true) or MPMD (false) when num_partitions_ > 0 and XLA + // needs to partition the module. + bool use_spmd_partitioning_ = false; + // The target maximum parallelism at which to partition HLOs for parallel // execution on the CPU backend. int64 intra_op_parallelism_threads_ = -1; @@ -232,6 +249,9 @@ class HloModuleConfig { FusionConfigCollection fusion_config_collection_ = FusionConfigCollection::kOff; + // TODO(b/155665133): Consolidate fusion, dot, and layout config into a proto + // similar to backend config. + // Custom fusion configuration, where fusion_config_[c][v] control if node v // in computation c must be fused to all its consumers (true) or not (false). std::vector> fusion_config_; @@ -240,6 +260,10 @@ class HloModuleConfig { // how to convert dot operation v (sorted topologically and by computation) to // convolution. std::vector> dot_config_; + + // Layout configuration, where layout_config_[v][i] controls the layout + // decision i of operation v. + std::vector>> layout_config_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index 2d66237de59..664fa10a990 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -48,6 +48,7 @@ namespace xla { V(kAdd, "add", 2) \ V(kAddDependency, "add-dependency", 2) \ V(kAfterAll, "after-all", kHloOpcodeIsVariadic) \ + V(kAllGather, "all-gather", 1) \ V(kAllReduce, "all-reduce", kHloOpcodeIsVariadic) \ V(kAllToAll, "all-to-all", kHloOpcodeIsVariadic) \ V(kAtan2, "atan2", 2) \ diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index a9c3cacc4c4..2a90c95850c 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -850,6 +850,35 @@ bool HloParserImpl::ParseInstructionRhs(HloComputation::Builder* builder, HloInstruction::CreateBitcastConvert(shape, operands[0])); break; } + case HloOpcode::kAllGather: { + optional>> tmp_groups; + optional> replica_group_ids; + optional channel_id; + optional> dimensions; + optional constrain_layout; + optional use_global_device_ids; + attrs["replica_groups"] = {/*required=*/false, + AttrTy::kBracedInt64ListList, &tmp_groups}; + attrs["channel_id"] = {/*required=*/false, AttrTy::kInt64, &channel_id}; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions}; + attrs["constrain_layout"] = {/*required=*/false, AttrTy::kBool, + &constrain_layout}; + attrs["use_global_device_ids"] = {/*required=*/false, AttrTy::kBool, + &use_global_device_ids}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + return false; + } + std::vector replica_groups; + if (tmp_groups) { + replica_groups = CreateReplicaGroups(*tmp_groups); + } + instruction = builder->AddInstruction(HloInstruction::CreateAllGather( + shape, operands[0], dimensions->at(0), replica_groups, + constrain_layout ? *constrain_layout : false, channel_id, + use_global_device_ids ? *use_global_device_ids : false)); + break; + } case HloOpcode::kAllReduce: { optional>> tmp_groups; optional to_apply; diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 7e66b4e648d..e18014a3071 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -1480,6 +1480,43 @@ ENTRY CRS { )" }, +// all-gather +{ +"AllGather", +R"(HloModule AllGather + +ENTRY AllGather { + input = f32[128,32]{0,1} parameter(0) + ROOT ag = f32[128,128]{0,1} all-gather(input), replica_groups={}, dimensions={1} +} + +)" +}, +// all-gather with constrained layout +{ +"AllGatherWithLayout", +R"(HloModule AllGather + +ENTRY AllGather { + input = f32[128,32]{0,1} parameter(0) + ROOT ag = f32[128,128]{0,1} all-gather(input), replica_groups={}, constrain_layout=true, dimensions={1} +} + +)" +}, +// all-gather with subgroups +{ +"AllGatherWithSubgroups", +R"(HloModule AllGatherWithSubgroups + +ENTRY AllGatherWithSubgroups { + input = f32[128,32]{0,1} parameter(0) + ROOT ag = f32[128,64]{0,1} all-gather(input), replica_groups={{0,1},{2,3}}, dimensions={1} +} + +)", +/*replica_count=*/4, +}, // all-to-all { "AllToAll", diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 0911af10f38..d15a36532eb 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -236,6 +236,40 @@ static Status CheckReplicaGroups(HloInstruction* hlo) { return Status::OK(); } +Status ShapeVerifier::HandleAllGather(HloInstruction* hlo) { + auto ag = Cast(hlo); + TF_RETURN_IF_ERROR(CheckReplicaGroups(ag)); + TF_RET_CHECK(ag->all_gather_dimension() >= 0); + TF_RET_CHECK(ag->all_gather_dimension() < ag->shape().rank()); + TF_RET_CHECK(ag->all_gather_dimension() < ag->operand(0)->shape().rank()); + if (ag->use_global_device_ids() && ag->replica_groups().empty()) { + return InternalError( + "Replica group must be specified when use_global_device_ids is true"); + } + + int64 shard_count = CeilOfRatio( + ag->shape().dimensions(ag->all_gather_dimension()), + ag->operand(0)->shape().dimensions(ag->all_gather_dimension())); + if (ag->channel_id().has_value()) { + if (ag->use_global_device_ids()) { + TF_RET_CHECK(shard_count == ag->replica_groups()[0].replica_ids_size()); + } else { + if (ag->replica_groups().empty() || + ag->replica_groups()[0].replica_ids_size() != 1) { + return InternalError( + "Replica group size must be 1 when use_global_device_ids is " + "false if the all-gather is also cross-partition"); + } + } + } else if (!ag->replica_groups().empty()) { + // Cross-replica all-gather: shard count is subgroup size. + TF_RET_CHECK(shard_count == ag->replica_groups()[0].replica_ids_size()); + } + return CheckShape(ag, ShapeInference::InferAllGatherShape( + ag->operand(0)->shape(), ag->all_gather_dimension(), + shard_count)); +} + Status ShapeVerifier::HandleAllReduce(HloInstruction* crs) { TF_RETURN_IF_ERROR(CheckReplicaGroups(crs)); @@ -628,9 +662,11 @@ Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) { shape_size_function_(bitcast->operand(0)->shape())) { return InternalError( "Bitcast cannot have different shape sizes of output (%d) and operand " - "(%d)", + "(%d) (%s) (%s)", shape_size_function_(bitcast->shape()), - shape_size_function_(bitcast->operand(0)->shape())); + shape_size_function_(bitcast->operand(0)->shape()), + bitcast->shape().ToString(true), + bitcast->operand(0)->shape().ToString(true)); } return Status::OK(); } @@ -697,11 +733,7 @@ Status ShapeVerifier::HandleFusion(HloInstruction* fusion) { } for (HloInstruction* fused_param : fused_parameters) { int64 param_no = fused_param->parameter_number(); - // Since fusion buffers aren't materialized, fusion parameters will not have - // the same memory space as the fusion operand. - if (!ShapesSame(fused_param->shape(), fusion->operand(param_no)->shape(), - /*minor_to_major_only=*/false, - /*ignore_memory_space=*/true)) { + if (!ShapesSame(fused_param->shape(), fusion->operand(param_no)->shape())) { return InternalError( "Shape mismatch between parameter number %d and its operand in " "%s.", diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index 2e83361a591..7a2d3dc2e6c 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -56,6 +56,7 @@ class ShapeVerifier : public DfsHloVisitor { Status HandleFft(HloInstruction* fft) override; Status HandleCholesky(HloInstruction* hlo) override; Status HandleTriangularSolve(HloInstruction* hlo) override; + Status HandleAllGather(HloInstruction* hlo) override; Status HandleAllReduce(HloInstruction* crs) override; Status HandleAllToAll(HloInstruction* hlo) override; Status HandleCollectivePermute(HloInstruction* hlo) override; diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 99242c9ca21..1bc3d24274c 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -145,6 +145,7 @@ bool IsAlwaysDuplicable(const HloInstruction& instruction) { case HloOpcode::kCholesky: case HloOpcode::kConditional: case HloOpcode::kConvolution: + case HloOpcode::kAllGather: case HloOpcode::kAllReduce: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index a67c677bd03..13699f3adf9 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -951,7 +951,8 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { if (!Shape::Equal() .IgnoreDynamicDimension() .MinorToMajorOnlyInLayout()(instruction_subshape, - buffer->shape())) { + buffer->shape()) && + instruction->opcode() != HloOpcode::kBitcast) { return InternalError( "Layout of instruction %s at index {%s} does not match " "source LogicalBuffer %s: %s vs %s", @@ -1798,13 +1799,6 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { // potential bugs in the layout assignment pass that may accidentally use the // existing layout. for (HloInstruction* instruction : computation->instructions()) { - if (instruction->opcode() == HloOpcode::kBitcast) { - // bitcasts are inherently layout sensitive and so a bitcast instruction - // present in the IR before layout assignment is a bug. - return InternalError( - "Unexpected bitcast operation seen during layout assignment: %s.", - instruction->ToString()); - } // Some instructions carry mandatory layouts in their shape. if (instruction->opcode() != HloOpcode::kInfeed && !IsLayoutConstrainedCustomCall(instruction) && @@ -2179,6 +2173,7 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kConditional: case HloOpcode::kConvert: case HloOpcode::kCos: + case HloOpcode::kAllGather: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: case HloOpcode::kDivide: diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 304a80c7a52..6e575247e6b 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -814,27 +814,6 @@ TEST_F(LayoutAssignmentTest, ConditionalAsymmetricLayout) { EXPECT_THAT(false_result->opcode(), HloOpcode::kCopy); } -TEST_F(LayoutAssignmentTest, InternalErrorOnBitcast) { - auto builder = HloComputation::Builder(TestName()); - auto constant0 = builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); - builder.AddInstruction( - HloInstruction::CreateBitcast(constant0->shape(), constant0)); - auto m = CreateNewVerifiedModule(); - m->AddEntryComputation(builder.Build()); - - ComputationLayout computation_layout( - m->entry_computation()->ComputeProgramShape()); - LayoutAssignment layout_assignment(&computation_layout); - Status error_status = layout_assignment.Run(m.get()).status(); - EXPECT_FALSE(error_status.ok()); - EXPECT_THAT( - error_status.error_message(), - ::testing::HasSubstr( - "Unexpected bitcast operation seen during layout assignment")); -} - TEST_F(LayoutAssignmentTest, ChannelLayoutMismatch) { // Pin non matching layouts to parameter and root. const char* module_str = R"( diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index ef8ddfc1a76..c80646e0c70 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -112,6 +112,8 @@ ExecutionOptions CreateExecutionOptions( } execution_options.set_num_replicas(build_options.num_replicas()); execution_options.set_num_partitions(build_options.num_partitions()); + execution_options.set_use_spmd_partitioning( + build_options.use_spmd_partitioning()); if (build_options.has_device_assignment()) { TF_CHECK_OK(build_options.device_assignment().Serialize( execution_options.mutable_device_assignment())); diff --git a/tensorflow/compiler/xla/service/memory_space_propagation.cc b/tensorflow/compiler/xla/service/memory_space_propagation.cc new file mode 100644 index 00000000000..80eb4017477 --- /dev/null +++ b/tensorflow/compiler/xla/service/memory_space_propagation.cc @@ -0,0 +1,67 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/memory_space_propagation.h" + +namespace xla { + +StatusOr MemorySpacePropagation::Run(HloModule* module) { + bool modified = false; + TF_ASSIGN_OR_RETURN(auto dataflow_analysis, + HloDataflowAnalysis::Run(*module)); + dataflow_analysis_ = std::move(dataflow_analysis); + + for (HloComputation* computation : module->MakeNonfusionComputations()) { + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kFusion) { + // Propagate the operand subshapes. + for (int operand_idx = 0; operand_idx < instruction->operand_count(); + ++operand_idx) { + modified |= + PropagateSubshapes(instruction->operand(operand_idx)->shape(), + instruction->fused_parameter(operand_idx)); + } + + // Propagate output subshapes. + modified |= PropagateSubshapes(instruction->shape(), + instruction->fused_expression_root()); + } + } + } + return modified; +} + +bool MemorySpacePropagation::PropagateSubshapes( + const Shape& caller_shape, const HloInstruction* callee_instruction) const { + bool modified = false; + for (const ShapeUtil::IndexedShape& indexed_shape : + ShapeUtil::GetLeafShapes(caller_shape)) { + int64 memory_space = indexed_shape.shape.layout().memory_space(); + const HloValue& value = dataflow_analysis_->GetUniqueValueAt( + callee_instruction, indexed_shape.index); + + for (const HloPosition& position : value.positions()) { + Shape* shape = ShapeUtil::GetMutableSubshape( + position.instruction->mutable_shape(), position.index); + if (shape->layout().memory_space() != memory_space) { + shape->mutable_layout()->set_memory_space(memory_space); + modified = true; + } + } + } + return modified; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/memory_space_propagation.h b/tensorflow/compiler/xla/service/memory_space_propagation.h new file mode 100644 index 00000000000..65a1dfd14a6 --- /dev/null +++ b/tensorflow/compiler/xla/service/memory_space_propagation.h @@ -0,0 +1,46 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_PROPAGATION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_PROPAGATION_H_ + +#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// This is a legalization pass that propagates the memory space in the layout to +// the fusion computations. +class MemorySpacePropagation : public HloModulePass { + public: + ~MemorySpacePropagation() override = default; + absl::string_view name() const override { return "memory-space-propagation"; } + StatusOr Run(HloModule* module) override; + + private: + // Given the caller shape (operand or output) and its corresponding + // insturction in the fused computation (parameter or root), propagates the + // memory space to all the subshapes in the callee side. Returns true if the + // module is modified. + bool PropagateSubshapes(const Shape& caller_shape, + const HloInstruction* callee_instruction) const; + + std::unique_ptr dataflow_analysis_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_PROPAGATION_H_ diff --git a/tensorflow/compiler/xla/service/memory_space_propagation_test.cc b/tensorflow/compiler/xla/service/memory_space_propagation_test.cc new file mode 100644 index 00000000000..8d74958f6aa --- /dev/null +++ b/tensorflow/compiler/xla/service/memory_space_propagation_test.cc @@ -0,0 +1,203 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/memory_space_propagation.h" + +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +class MemorySpacePropagationTest : public HloTestBase { + public: + MemorySpacePropagationTest() + : HloTestBase(), + verifier_(/*layout_sensitive=*/false, /*allow_mixed_precision*/ false) { + } + + Status Verify(HloModule* module) { return verifier_.Run(module).status(); } + + private: + HloVerifier verifier_; +}; + +TEST_F(MemorySpacePropagationTest, NoMemorySpace) { + absl::string_view hlo_string = R"( + HloModule NoMemorySpace + + %fused_computation { + %param_1.3 = s32[1]{0:T(128)} parameter(1) + %constant.2 = s32[]{:T(128)} constant(-2147483648) + %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5 + %param_2.3 = s32[5]{0:T(128)} parameter(2) + %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0 + %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3) + %param_0.1 = s32[6]{0:T(128)} parameter(0) + ROOT %add.0 = s32[6]{0:T(128)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1) + } + + ENTRY %entry { + %param0 = s32[6]{0:T(128)} parameter(0) + %param1 = s32[1]{0:T(128)} parameter(1) + %param2 = s32[5]{0:T(128)} parameter(2) + %arg0 = s32[6]{0:T(128)} copy(%param0) + %arg1 = s32[1]{0:T(128)} copy(%param1) + %arg2 = s32[5]{0:T(128)} copy(%param2) + %fusion = s32[6]{0:T(128)} fusion(s32[6]{0:T(128)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)} %arg2), kind=kLoop, calls=%fused_computation + ROOT %root = s32[6]{0:T(128)} copy(%fusion) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + MemorySpacePropagation memory_space_propagation; + EXPECT_FALSE(memory_space_propagation.Run(module.get()).ValueOrDie()); + TF_ASSERT_OK_AND_ASSIGN(auto ref, ParseAndReturnVerifiedModule(hlo_string)); + EXPECT_EQ(module->Hash(), ref->Hash()); +} + +TEST_F(MemorySpacePropagationTest, NonTupleOutput) { + absl::string_view hlo_string = R"( + HloModule NonTupleOutput + + %fused_computation { + %param_1.3 = s32[1]{0:T(128)} parameter(1) + %constant.2 = s32[]{:T(128)} constant(-2147483648) + %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5 + %param_2.3 = s32[5]{0:T(128)} parameter(2) + %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0 + %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3) + %param_0.1 = s32[6]{0:T(128)} parameter(0) + ROOT %add.0 = s32[6]{0:T(128)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1) + } + + ENTRY %entry { + %param0 = s32[6]{0:T(128)} parameter(0) + %param1 = s32[1]{0:T(128)} parameter(1) + %param2 = s32[5]{0:T(128)} parameter(2) + %arg0 = s32[6]{0:T(128)S(1)} copy(%param0) + %arg1 = s32[1]{0:T(128)} copy(%param1) + %arg2 = s32[5]{0:T(128)S(1)} copy(%param2) + %fusion = s32[6]{0:T(128)S(1)} fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation + ROOT %root = s32[6]{0:T(128)} copy(%fusion) + } + )"; + absl::string_view expected_hlo_string = R"( + HloModule NonTupleOutput + + %fused_computation { + %param_1.3 = s32[1]{0:T(128)} parameter(1) + %constant.2 = s32[]{:T(128)} constant(-2147483648) + %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5 + %param_2.3 = s32[5]{0:T(128)S(1)} parameter(2) + %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0 + %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3) + %param_0.1 = s32[6]{0:T(128)S(1)} parameter(0) + ROOT %add.0 = s32[6]{0:T(128)S(1)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1) + } + + ENTRY %entry { + %param0 = s32[6]{0:T(128)} parameter(0) + %param1 = s32[1]{0:T(128)} parameter(1) + %param2 = s32[5]{0:T(128)} parameter(2) + %arg0 = s32[6]{0:T(128)S(1)} copy(%param0) + %arg1 = s32[1]{0:T(128)} copy(%param1) + %arg2 = s32[5]{0:T(128)S(1)} copy(%param2) + %fusion = s32[6]{0:T(128)S(1)} fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation + ROOT %root = s32[6]{0:T(128)} copy(%fusion) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnUnverifiedModule(hlo_string)); + MemorySpacePropagation memory_space_propagation; + EXPECT_TRUE(memory_space_propagation.Run(module.get()).ValueOrDie()); + TF_EXPECT_OK(Verify(module.get())); + TF_ASSERT_OK_AND_ASSIGN(auto ref, + ParseAndReturnVerifiedModule(expected_hlo_string)); + EXPECT_EQ(module->Hash(), ref->Hash()); +} + +TEST_F(MemorySpacePropagationTest, TupleOutput) { + absl::string_view hlo_string = R"( + HloModule TupleOutput + + %fused_computation { + %param_1.3 = s32[1]{0:T(128)} parameter(1) + %constant.2 = s32[]{:T(128)} constant(-2147483648) + %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5 + %param_2.3 = s32[5]{0:T(128)} parameter(2) + %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0 + %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3) + %param_0.1 = s32[6]{0:T(128)} parameter(0) + %add.0 = s32[6]{0:T(128)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1) + %multiply.0 = s32[6]{0:T(128)} multiply(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1) + ROOT %tuple = (s32[6]{0:T(128)}, s32[6]{0:T(128)}) tuple(%add.0, %multiply.0) + } + + ENTRY %entry { + %param0 = s32[6]{0:T(128)} parameter(0) + %param1 = s32[1]{0:T(128)} parameter(1) + %param2 = s32[5]{0:T(128)} parameter(2) + %arg0 = s32[6]{0:T(128)S(1)} copy(%param0) + %arg1 = s32[1]{0:T(128)} copy(%param1) + %arg2 = s32[5]{0:T(128)S(1)} copy(%param2) + %fusion = (s32[6]{0:T(128)S(1)}, s32[6]{0:T(128)}) fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation + %gte0 = s32[6]{0:T(128)S(1)} get-tuple-element(%fusion), index=0 + %gte1 = s32[6]{0:T(128)} get-tuple-element(%fusion), index=1 + ROOT %root = s32[6]{0:T(128)} add(%gte0, %gte1) + } + )"; + absl::string_view expected_hlo_string = R"( + HloModule TupleOutput + + %fused_computation { + %param_1.3 = s32[1]{0:T(128)} parameter(1) + %constant.2 = s32[]{:T(128)} constant(-2147483648) + %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5 + %param_2.3 = s32[5]{0:T(128)S(1)} parameter(2) + %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0 + %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3) + %param_0.1 = s32[6]{0:T(128)S(1)} parameter(0) + %add.0 = s32[6]{0:T(128)S(1)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1) + %multiply.0 = s32[6]{0:T(128)} multiply(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1) + ROOT %tuple = (s32[6]{0:T(128)S(1)}, s32[6]{0:T(128)}) tuple(%add.0, %multiply.0) + } + + ENTRY %entry { + %param0 = s32[6]{0:T(128)} parameter(0) + %param1 = s32[1]{0:T(128)} parameter(1) + %param2 = s32[5]{0:T(128)} parameter(2) + %arg0 = s32[6]{0:T(128)S(1)} copy(%param0) + %arg1 = s32[1]{0:T(128)} copy(%param1) + %arg2 = s32[5]{0:T(128)S(1)} copy(%param2) + %fusion = (s32[6]{0:T(128)S(1)}, s32[6]{0:T(128)}) fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation + %gte0 = s32[6]{0:T(128)S(1)} get-tuple-element(%fusion), index=0 + %gte1 = s32[6]{0:T(128)} get-tuple-element(%fusion), index=1 + ROOT %root = s32[6]{0:T(128)} add(%gte0, %gte1) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnUnverifiedModule(hlo_string)); + MemorySpacePropagation memory_space_propagation; + EXPECT_TRUE(memory_space_propagation.Run(module.get()).ValueOrDie()); + TF_EXPECT_OK(Verify(module.get())); + TF_ASSERT_OK_AND_ASSIGN(auto ref, + ParseAndReturnVerifiedModule(expected_hlo_string)); + EXPECT_EQ(module->Hash(), ref->Hash()); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/mlir_gpu/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/BUILD index cd679f7412e..a57e4300d6e 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/BUILD +++ b/tensorflow/compiler/xla/service/mlir_gpu/BUILD @@ -185,11 +185,11 @@ cc_library( "@llvm-project//mlir:LinalgOps", "@llvm-project//mlir:LinalgToLLVM", "@llvm-project//mlir:LinalgTransforms", - "@llvm-project//mlir:LoopOps", - "@llvm-project//mlir:LoopOpsTransforms", "@llvm-project//mlir:LoopsToGPUPass", "@llvm-project//mlir:NVVMDialect", "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", + "@llvm-project//mlir:SCFTransforms", "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Support", "@llvm-project//mlir:Transforms", diff --git a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc index 33d3690d4ab..847ad918308 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc @@ -31,9 +31,9 @@ limitations under the License. #include "mlir/Dialect/LLVMIR/NVVMDialect.h" // from @llvm-project #include "mlir/Dialect/Linalg/IR/LinalgOps.h" // from @llvm-project #include "mlir/Dialect/Linalg/Passes.h" // from @llvm-project -#include "mlir/Dialect/LoopOps/LoopOps.h" // from @llvm-project -#include "mlir/Dialect/LoopOps/Passes.h" // from @llvm-project -#include "mlir/Dialect/LoopOps/Transforms.h" // from @llvm-project +#include "mlir/Dialect/SCF/Passes.h" // from @llvm-project +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project +#include "mlir/Dialect/SCF/Transforms.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project @@ -45,6 +45,7 @@ limitations under the License. #include "mlir/IR/Region.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassManager.h" // from @llvm-project +#include "mlir/Transforms/BufferPlacement.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project #include "mlir/Transforms/LoopUtils.h" // from @llvm-project #include "mlir/Transforms/Passes.h" // from @llvm-project @@ -60,34 +61,6 @@ namespace { using ::mlir::xla_lhlo::FusionOp; -// Following are some small transformations that are required to clean up code -// after lowering from linalg to loops. - -// A simple pass that applies lowering of HLO to LHLO only within LHLO ops that -// contain regions with HLO ops, e.g. FusionOp, ReduceOp, SelectAndScatterOp. -// This is needed, as these ops are not closed from above and hence nested pass -// managers can not be applied. -struct NestedHloRegionsConverter - : public mlir::PassWrapper { - void runOnFunction() override { - auto& ctx = getContext(); - mlir::OwningRewritePatternList patterns; - mlir::ConversionTarget target(ctx); - target.addLegalDialect<::mlir::xla_lhlo::XlaLhloDialect>(); - ::mlir::xla_hlo::populateHLOToLHLOConversionPattern(&ctx, &patterns); - - getFunction().walk([&](mlir::Operation* op) { - if (op->getNumRegions() == 0) { - return; - } - if (failed(applyPartialConversion(op, target, patterns, nullptr))) { - signalPassFailure(); - } - }); - } -}; - // Replaces a FusionOp by the operations contained in its region. struct FusionOpRemover : public mlir::PassWrapper { @@ -132,7 +105,7 @@ struct StoreForwardingPass // No store operation found. Continue search outside of the parallel // loop if block is in a parallel loop. if (auto parallelOp = - llvm::dyn_cast(block->getParentOp())) { + llvm::dyn_cast(block->getParentOp())) { return findStore(parallelOp.getOperation(), matches); } return {}; @@ -388,8 +361,8 @@ struct MapParallelLoops struct FuseInnerParallelLoops : public mlir::PassWrapper { void runOnFunction() override { - getFunction().walk([](mlir::loop::ParallelOp op) { - mlir::loop::naivelyFuseParallelOps(op.region()); + getFunction().walk([](mlir::scf::ParallelOp op) { + mlir::scf::naivelyFuseParallelOps(op.region()); }); } }; @@ -401,7 +374,7 @@ struct ParallelLoopCollapsingToFirstDim void runOnOperation() override { mlir::Operation* module = getOperation(); - module->walk([&](mlir::loop::ParallelOp op) { + module->walk([&](mlir::scf::ParallelOp op) { unsigned num_loops = op.getNumLoops(); std::vector combinedLoops; combinedLoops.reserve(num_loops); @@ -436,8 +409,10 @@ Status LowerLHLOToGPU(mlir::ModuleOp module, tiling_for_unrolling.append(tile_sizes.begin(), tile_sizes.end()); } - // First, lower bodies of LHLO operations that contain HLO ops. - pm.addPass(absl::make_unique()); + // Legalize from HLO to LHLO. + pm.addPass(::mlir::xla_hlo::createLegalizeToLhloPass()); + // Moving `AllocOp`s and inserting missing `DeallocOp`s + pm.addPass(::mlir::createBufferPlacementPass()); // Next, we can strip the outer fusion operation. pm.addPass(absl::make_unique()); // Remove unnecessary LHLO copies. diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index ab71c30dcae..2ed5e709d81 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -313,6 +313,8 @@ StatusOr> Service::CreateModuleConfig( if (execution_options->num_partitions() > 0) { config->set_num_partitions(execution_options->num_partitions()); } + config->set_use_spmd_partitioning( + execution_options->use_spmd_partitioning()); config->set_seed(execution_options->seed()); config->set_launch_id(execution_options->launch_id()); config->set_debug_options(execution_options->debug_options()); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index f3c8eec1751..8d6ef9faba9 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1999,6 +1999,17 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, return a; } +/* static */ StatusOr ShapeInference::InferAllGatherShape( + const Shape& operand_shape, int64 all_gather_dimension, int64 shard_count) { + TF_RET_CHECK(all_gather_dimension > 0); + TF_RET_CHECK(all_gather_dimension < operand_shape.rank()); + TF_RET_CHECK(shard_count > 0); + auto shape = operand_shape; + shape.set_dimensions(all_gather_dimension, + shard_count * shape.dimensions(all_gather_dimension)); + return shape; +} + /* static */ StatusOr ShapeInference::InferAllReduceShape( absl::Span operand_shapes) { for (const Shape* operand_shape : operand_shapes) { diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index 2e96a77aa22..2cb5930d098 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -123,6 +123,12 @@ class ShapeInference { // Infers the shape produced by the given triangular solve operation. static StatusOr InferCholeskyShape(const Shape& a); + // Infers the shape produced by an all-gather with the given operand shape, + // concat dimension, and shard count. + static StatusOr InferAllGatherShape(const Shape& operand_shape, + int64 all_gather_dimension, + int64 shard_count); + // Infers the shape produced by a cross replica sum with the given operand // shapes. static StatusOr InferAllReduceShape( diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index 2d33184b7d0..1111811d3a3 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -300,7 +300,7 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody( } StatusOr WhileLoopInvariantCodeMotion::Run(HloModule* module) { - VLOG(2) << "HLO module before WhileLoopConstantSinking:"; + VLOG(2) << "HLO module before WhileLoopInvariantCodeMotion:"; XLA_VLOG_LINES(2, module->ToString()); bool changed = false; @@ -332,10 +332,10 @@ StatusOr WhileLoopInvariantCodeMotion::Run(HloModule* module) { } if (changed) { - VLOG(2) << "HLO module after WhileLoopConstantSinking:"; + VLOG(2) << "HLO module after WhileLoopInvariantCodeMotion:"; XLA_VLOG_LINES(2, module->ToString()); } else { - VLOG(2) << "HLO module unchanged after WhileLoopConstantSinking"; + VLOG(2) << "HLO module unchanged after WhileLoopInvariantCodeMotion"; } return changed; diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index 5a482305513..d575bbb1f3e 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -863,7 +863,7 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // Starts = iteration * 2; auto starts = Mul(iteration, ConstantR0(&builder, 2)); // UpdateSlice. - auto out1 = DynamicUpdateSlice(input, update, starts); + auto out1 = DynamicUpdateSlice(input, update, {starts}); Tuple(&builder, {out0, out1}); body = builder.Build().ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index a015af674af..f4b08f454b9 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -333,6 +333,10 @@ message ExecutionOptions { // Used to identify a set of programs that should be launch together. int32 launch_id = 10; + + // Indicates whether to use SPMD (true) or MPMD (false) partitioning when + // num_partitions > 1 and XLA is requested to partition the input program. + bool use_spmd_partitioning = 11; } message GetDeviceHandlesRequest { diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc index 1bcd8561e61..ba6e6a093d6 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc @@ -158,7 +158,7 @@ Status XRTCompileOp::Compile(OpKernelContext* ctx, argument_layout_ptrs[i] = &argument_layouts[i]; } xla::ExecutableBuildOptions build_options; - build_options.set_device_ordinal(client->default_device_ordinal()); + build_options.set_device_ordinal(device_ref.device_ordinal()); build_options.set_num_replicas(num_replicas); build_options.set_result_layout(xla::Shape(config.program_shape().result())); build_options.set_device_allocator(device_ref.backend()->memory_allocator()); @@ -206,7 +206,8 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, CompilationCacheKey(computation_proto, &key)); // Process-wide cache of XLA executables. - auto cache_or = GetOrCreateCompilationCache(rm, /*max_number_of_entries=*/0); + auto cache_or = XRTGenericDeviceAccessor::GetOrCreateCompilationCache( + ctx, /*max_number_of_entries=*/0); OP_REQUIRES_OK(ctx, cache_or.status()); auto cache = cache_or.ConsumeValueOrDie(); @@ -259,15 +260,11 @@ void XRTReleaseCompilationRefOp::Compute(OpKernelContext* ctx) { VLOG(1) << "XRTReleaseCompilationRefOp::Compute"; auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseCompilationCell()); - ResourceMgr* rm; - OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm)); - // Process-wide cache of XLA executables. - XRTCompilationCache* cache; - OP_REQUIRES_OK(ctx, rm->Lookup( - rm->default_container(), - kXRTCompilationCacheResourceName, &cache)); - core::ScopedUnref cache_unref(cache); + auto cache_or = XRTGenericDeviceAccessor::GetOrCreateCompilationCache( + ctx, /*max_number_of_entries=*/0); + OP_REQUIRES_OK(ctx, cache_or.status()); + auto cache = cache_or.ConsumeValueOrDie(); const Tensor& keys_tensor = ctx->input(0); auto flat_keys = keys_tensor.flat(); diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc index b641f333e8b..d39b37387f2 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc @@ -149,13 +149,17 @@ xla::StatusOr GetChainedOpInputs( xla::StatusOr> RunExecutable( OpKernelContext* context, XRTGenericDeviceAccessor::ScopedRef* device_ref, xla::LocalExecutable* executable, const InputBuffers& input_buffers, - se::Stream* stream, int rng_seed, int replica_id) { + se::Stream* stream, int rng_seed, + const xrt::CommonExecutionConfig& config) { VLOG(2) << "Executing computation."; xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_allocator(device_ref->backend()->memory_allocator()); run_options.set_intra_op_thread_pool(&context->eigen_cpu_device()); run_options.set_rng_seed(rng_seed); + if (config.run_id() != 0) { + run_options.set_run_id(xla::RunId(config.run_id())); + } if (executable->executable() ->module_config() .has_static_device_assignment()) { @@ -164,8 +168,11 @@ xla::StatusOr> RunExecutable( } xla::GpuExecutableRunOptions gpu_options; std::vector gpu_global_ids; - if (replica_id >= 0) { - gpu_global_ids.emplace_back(replica_id); + if (config.local_replica_mapping_size() > 0) { + gpu_global_ids.reserve(config.local_replica_mapping_size()); + for (auto& gid : config.local_replica_mapping()) { + gpu_global_ids.emplace_back(xla::GlobalDeviceId(gid)); + } gpu_options.set_gpu_global_device_ids(gpu_global_ids); } std::shared_ptr nccl_factory = GetNcclUniqueIdFactory(); @@ -222,10 +229,11 @@ xla::StatusOr> ExecuteComputation( OpKernelContext* context, XRTMemoryManager* memory_manager, XRTGenericDeviceAccessor::ScopedRef* device_ref, xla::LocalExecutable* executable, const InputBuffers& input_buffers, - se::Stream* stream, int rng_seed, int replica_id) { + se::Stream* stream, int rng_seed, + const xrt::CommonExecutionConfig& config) { auto runfn = [&]() { return RunExecutable(context, device_ref, executable, input_buffers, stream, - rng_seed, replica_id); + rng_seed, config); }; // We pass zero as requested_free_size as there is no simple way to get the @@ -241,14 +249,15 @@ xla::StatusOr> ExecuteComputation( XRTGenericDeviceAccessor::ScopedRef* device_ref, xla::LocalExecutable* executable, const std::vector& input_coords, bool release_inputs, - se::Stream* stream, int rng_seed, int replica_id) { + se::Stream* stream, int rng_seed, + const xrt::CommonExecutionConfig& config) { XRTMemoryManager::WorkingSet working_set(memory_manager); TF_ASSIGN_OR_RETURN(InputBuffers input_buffers, GetInputBuffers(&working_set, device_ref->backend(), input_coords, release_inputs)); return ExecuteComputation(context, memory_manager.get(), device_ref, executable, input_buffers, stream, rng_seed, - replica_id); + config); } // XRTExecuteOp @@ -297,8 +306,9 @@ Status XRTExecuteOp::DoWork(OpKernelContext* context) { bool release_inputs = config_proto.release_input_handles(); bool release_compilation = config_proto.release_compilation_handle(); - TF_ASSIGN_OR_RETURN( - auto cache, GetOrCreateCompilationCache(rm, /*max_number_of_entries=*/0)); + TF_ASSIGN_OR_RETURN(auto cache, + XRTGenericDeviceAccessor::GetOrCreateCompilationCache( + context, /*max_number_of_entries=*/0)); // We are guaranteed that the underlying device object won't be deleted out // from under us, while the ScopedRef is live. class XRTGenericDeviceAccessor::ScopedRef device_ref; @@ -330,7 +340,7 @@ Status XRTExecuteOp::DoWork(OpKernelContext* context) { RefPtr output_tuple, ExecuteComputation(context, memory_manager, &device_ref, executable, input_coords, release_inputs, stream, rng_seed, - config_proto.replica_id())); + config_proto.common_config())); return CreateExecuteOutput(context, memory_manager.get(), std::move(output_tuple), @@ -379,8 +389,9 @@ Status XRTExecuteChainedOp::DoWork(OpKernelContext* context) { xrt::XRTChainedExecuteConfig config; TF_RET_CHECK(ParseFromTString(execution_config.scalar()(), &config)); - TF_ASSIGN_OR_RETURN( - auto cache, GetOrCreateCompilationCache(rm, /*max_number_of_entries=*/0)); + TF_ASSIGN_OR_RETURN(auto cache, + XRTGenericDeviceAccessor::GetOrCreateCompilationCache( + context, /*max_number_of_entries=*/0)); // We are guaranteed that the underlying device object won't be deleted out // from under us, while the ScopedRef is live. class XRTGenericDeviceAccessor::ScopedRef device_ref; @@ -408,7 +419,7 @@ Status XRTExecuteChainedOp::DoWork(OpKernelContext* context) { return ExecuteComputation(context, memory_manager.get(), &device_ref, executable, input_buffers, stream, rng_seed, - config.replica_id()); + config.common_config()); }; return ExecuteChained(context, memory_manager, device_ref.backend(), diff --git a/tensorflow/compiler/xrt/xrt.proto b/tensorflow/compiler/xrt/xrt.proto index 1cbd851f7ef..9a351732c4b 100644 --- a/tensorflow/compiler/xrt/xrt.proto +++ b/tensorflow/compiler/xrt/xrt.proto @@ -111,6 +111,17 @@ message XLATupleNode { repeated XLATupleNode tuples = 3; } +message CommonExecutionConfig { + // The replica index this execute is driving. + int32 replica_id = 1; + // Mapping local device ordinals to global replica IDs. + // local_replica_mapping[LOCAL_DEVICE_ORDINAL] = GLOBAL_REPLICA_ID + repeated int32 local_replica_mapping = 2; + // The execution run ID used to correlate different XRT execute operations + // happeining in parallel from different threads. + int64 run_id = 3; +} + // Options for an XLA execution. message XRTExecutionConfig { // Local device to run on. This is present because the execute Op @@ -133,8 +144,9 @@ message XRTExecutionConfig { // a single tuple allocation the execution will return a vector of // allocations, one for each of the first-level elements of the result tuple. bool return_exploded_tuple = 7; - // The replica index this execute is driving. - int32 replica_id = 8; + reserved 8; + // The common configuration for XRT execute operations. + CommonExecutionConfig common_config = 9; } message XRTChainedExecuteConfig { @@ -145,8 +157,9 @@ message XRTChainedExecuteConfig { // Optional key to disambiguate between executions. This is only needed if // multiple host send/recvs may be outstanding concurrently with executions. string execution_instance_key = 3; - // The replica index this execute is driving. - int32 replica_id = 4; + reserved 4; + // The common configuration for XRT execute operations. + CommonExecutionConfig common_config = 5; } // A single chained execute operation. An operation can either be a device data diff --git a/tensorflow/compiler/xrt/xrt_device.cc b/tensorflow/compiler/xrt/xrt_device.cc index 1b5557d556d..46954572c5d 100644 --- a/tensorflow/compiler/xrt/xrt_device.cc +++ b/tensorflow/compiler/xrt/xrt_device.cc @@ -17,19 +17,56 @@ limitations under the License. #include "tensorflow/compiler/xrt/xrt_device.h" +#include + #include "tensorflow/compiler/jit/xla_device.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/mutex.h" namespace tensorflow { +namespace { + +class ResourceMgrArena { + public: + static ResourceMgrArena* Get() { + static ResourceMgrArena* arena = new ResourceMgrArena(); + return arena; + } + + ResourceMgr* GetResourceMgr(const std::string& platform_name) { + mutex_lock lock(mutex_); + auto it = resource_managers_.find(platform_name); + if (it == resource_managers_.end()) { + it = resource_managers_.emplace(platform_name, new ResourceMgr()).first; + } + return it->second; + } + + private: + mutex mutex_; + std::map resource_managers_; +}; + +} // namespace /*static*/ Status XRTGenericDeviceAccessor::GetResourceManager( OpKernelContext* ctx, ResourceMgr** rm) { - *rm = ctx->resource_manager(); + const XlaDevice::Metadata* metadata; + TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata)); + *rm = ResourceMgrArena::Get()->GetResourceMgr(metadata->platform()->Name()); return Status::OK(); } +/* static */ xla::StatusOr> +XRTGenericDeviceAccessor::GetOrCreateCompilationCache( + OpKernelContext* ctx, int64 max_number_of_entries) { + ResourceMgr* rm; + TF_RETURN_IF_ERROR(GetResourceManager(ctx, &rm)); + return tensorflow::GetOrCreateCompilationCache(rm, max_number_of_entries); +} + /*static*/ Status XRTGenericDeviceAccessor::InitScopedRef( OpKernelContext* ctx, int device_ordinal, ScopedRef* scoped_ref) { const XlaDevice::Metadata* metadata; diff --git a/tensorflow/compiler/xrt/xrt_device.h b/tensorflow/compiler/xrt/xrt_device.h index 5ebee7641f0..02fab315830 100644 --- a/tensorflow/compiler/xrt/xrt_device.h +++ b/tensorflow/compiler/xrt/xrt_device.h @@ -19,6 +19,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XRT_XRT_DEVICE_H_ #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xrt/xrt_compilation_cache.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" @@ -31,6 +32,9 @@ class XRTGenericDeviceAccessor { public: static Status GetResourceManager(OpKernelContext* ctx, ResourceMgr** rm); + static xla::StatusOr> GetOrCreateCompilationCache( + OpKernelContext* ctx, int64 max_number_of_entries); + // We use a ScopedRef pattern here even though it's not strictly necessary, // just so that templated uses of this and the TPU accessor class will be as // similar as possible. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b4bec2a6907..6b4874a8393 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -83,7 +83,6 @@ load( "tf_gen_op_libs", "tf_genrule_cmd_append_to_srcs", "tf_opts_nortti_if_lite_protos", - "tf_opts_nortti_if_mobile", "tf_portable_full_lite_protos", "transitive_hdrs", ) @@ -100,28 +99,23 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") # buildifier: disable=same-origin-load load("//tensorflow:tensorflow.bzl", "tf_cc_tests_gpu") -# buildifier: disable=same-origin-load -# Placeholder: load("//tensorflow:tensorflow.bzl", "tf_portable_proto_lib") - # buildifier: disable=same-origin-load load("//tensorflow:tensorflow.bzl", "tf_monitoring_deps") # For platform specific build config load( "//tensorflow/core/platform:build_config.bzl", - "tf_additional_all_protos", "tf_additional_lib_deps", "tf_additional_test_deps", "tf_jspb_proto_library", "tf_kernel_tests_linkstatic", "tf_lib_proto_parsing_deps", "tf_portable_deps_no_runtime", + "tf_portable_proto_lib", "tf_proto_library", - "tf_proto_library_cc", "tf_protos_all_impl", "tf_protos_grappler_impl", "tf_protos_profiler_impl", - "tf_pyclif_proto_library", ) load( "//tensorflow/core/platform:rules_cc.bzl", @@ -184,18 +178,18 @@ package_group(name = "friends") # filegroup; e.g. ones with individual proto_library targets. # LINT.IfChange COMMON_PROTO_SRCS = [ - "protobuf/bfc_memory_map.proto", - "protobuf/config.proto", - "protobuf/cluster.proto", - "protobuf/debug.proto", - "protobuf/device_filters.proto", - "protobuf/device_properties.proto", - "protobuf/graph_debug_info.proto", - "protobuf/queue_runner.proto", - "protobuf/rewriter_config.proto", - "protobuf/tensor_bundle.proto", - "protobuf/saver.proto", - "protobuf/verifier_config.proto", + "//tensorflow/core/protobuf:bfc_memory_map.proto", + "//tensorflow/core/protobuf:config.proto", + "//tensorflow/core/protobuf:cluster.proto", + "//tensorflow/core/protobuf:debug.proto", + "//tensorflow/core/protobuf:device_filters.proto", + "//tensorflow/core/protobuf:device_properties.proto", + "//tensorflow/core/protobuf:graph_debug_info.proto", + "//tensorflow/core/protobuf:queue_runner.proto", + "//tensorflow/core/protobuf:rewriter_config.proto", + "//tensorflow/core/protobuf:tensor_bundle.proto", + "//tensorflow/core/protobuf:saver.proto", + "//tensorflow/core/protobuf:verifier_config.proto", ] EXAMPLE_PROTO_SRCS = [ @@ -242,7 +236,7 @@ PROFILER_PROTO_SRCS = [ ] ERROR_CODES_PROTO_SRCS = [ - "protobuf/error_codes.proto", + "//tensorflow/core/protobuf:error_codes.proto", "//tensorflow/core/lib/core:error_codes.proto", ] # LINT.ThenChange(//tensorflow/core/portable_proto_config.asciipb) @@ -255,11 +249,13 @@ tf_proto_library( cc_api_version = 2, make_default_target_header_only = True, protodeps = [ - ":core_protos", - ":error_codes_proto_impl", "//tensorflow/core/example:protos_all", "//tensorflow/core/framework:protos_all", "//tensorflow/core/lib/core:error_codes_proto", + "//tensorflow/core/profiler/protobuf:xplane_proto", + "//tensorflow/core/profiler:profiler_options_proto", + "//tensorflow/core/protobuf:error_codes_proto_impl", + "//tensorflow/core/protobuf:for_core_protos", "//tensorflow/core/util:protos_all", "//tensorflow/core/util:test_log_proto_impl", ], @@ -1274,7 +1270,7 @@ filegroup( "//tensorflow/core/platform:mobile_srcs_no_runtime", "//tensorflow/core/public:mobile_srcs_no_runtime", "//tensorflow/core/util:mobile_srcs_no_runtime", - "//tensorflow/core/util/ctc:android_srcs", + "//tensorflow/core/util/ctc:mobile_srcs", ] + glob( [ "client/**/*.cc", @@ -1304,12 +1300,12 @@ filegroup( "//tensorflow/core/common_runtime/eager:srcs", "//tensorflow/core/framework:mobile_srcs_only_runtime", "//tensorflow/core/graph:mobile_srcs_only_runtime", - "//tensorflow/core/kernels:android_srcs", + "//tensorflow/core/kernels:mobile_srcs", "//tensorflow/core/lib/io:mobile_srcs_only_runtime", "//tensorflow/core/profiler:mobile_srcs", "//tensorflow/core/public:mobile_srcs_only_runtime", "//tensorflow/core/util/sparse:mobile_srcs_only_runtime", - "//tensorflow/core/util/tensor_bundle:android_srcs", + "//tensorflow/core/util/tensor_bundle:mobile_srcs", "//tensorflow/core/util:mobile_srcs_only_runtime", # Sources for which we already have granular targets. @@ -1382,10 +1378,9 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":protos_all_cc_impl", "//tensorflow/core/util:stats_calculator_portable", "//tensorflow/core:mobile_additional_lib_deps", - ] + tf_portable_deps_no_runtime(), + ] + tf_portable_proto_lib() + tf_portable_deps_no_runtime(), alwayslink = 1, ) @@ -1417,54 +1412,12 @@ cc_library( ], ) -# Native library support for iOS applications. -# -# bazel build --config=ios_x86_64 \ -# :ios_tensorflow_lib -cc_library( - name = "ios_tensorflow_lib", - srcs = if_ios([ - ":portable_op_registrations_and_gradients", - "//tensorflow/core/kernels:android_core_ops", - "//tensorflow/core/kernels:android_extended_ops", - ]), - copts = tf_copts() + tf_opts_nortti_if_lite_protos() + ["-Os"], - visibility = ["//visibility:public"], - deps = [ - ":portable_tensorflow_lib_lite", - ":protos_all_cc_impl", - "//third_party/eigen3", - "//third_party/fft2d:fft2d_headers", - "@com_google_protobuf//:protobuf", - "@fft2d", - "@gemmlowp", - ], - alwayslink = 1, -) - alias( name = "ios_tensorflow_lib_lite", actual = ":portable_tensorflow_lib_lite", visibility = ["//visibility:public"], ) -cc_library( - name = "ios_tensorflow_test_lib", - testonly = 1, - srcs = if_ios([":android_test_srcs"]), - copts = tf_copts() + ["-Os"], - tags = [ - "manual", - "notap", - ], - visibility = ["//visibility:public"], - deps = [ - ":ios_tensorflow_lib", - "//tensorflow/core/platform/default/build_config:gtest", - "//third_party/eigen3", - ], -) - # Full TensorFlow library with operator support. Use this unless reducing # binary size (by packaging a reduced operator set) is a concern. alias( @@ -1473,10 +1426,16 @@ alias( visibility = ["//visibility:public"], ) +alias( + name = "ios_tensorflow_lib", + actual = ":portable_tensorflow_lib", + visibility = ["//visibility:public"], +) + cc_library( name = "portable_tensorflow_lib", srcs = if_mobile([":portable_op_registrations_and_gradients"]), - copts = tf_copts() + tf_opts_nortti_if_lite_protos(), + copts = tf_copts() + tf_opts_nortti_if_lite_protos() + if_ios(["-Os"]), features = tf_features_nomodules_if_mobile(), tags = [ "manual", @@ -1559,6 +1518,12 @@ alias( visibility = ["//visibility:public"], ) +alias( + name = "ios_tensorflow_test_lib", + actual = ":portable_tensorflow_test_lib", + visibility = ["//visibility:public"], +) + cc_library( name = "portable_tensorflow_test_lib", testonly = 1, @@ -1569,7 +1534,7 @@ cc_library( "//tensorflow/core/framework:android_test_hdrs", "//tensorflow/core/util:android_test_hdrs", ], - copts = tf_copts(android_optimization_level_override = None), + copts = tf_copts(android_optimization_level_override = None) + if_ios(["-Os"]), features = tf_features_nomodules_if_mobile() + tf_opts_nortti_if_lite_protos(), tags = [ "manual", @@ -1637,20 +1602,13 @@ alias( [ alias( name = "protobuf_%s_pyclif%s" % (proto_name, target_suffix), - actual = ":protobuf/%s_pyclif%s" % (proto_name, target_suffix), + actual = "//tensorflow/core/protobuf:%s_pyclif%s" % (proto_name, target_suffix), visibility = ["//visibility:public"], ) for target_suffix in [ "", "_pb2", ] - ] + [ - tf_pyclif_proto_library( - name = "protobuf/%s_pyclif" % proto_name, - proto_lib = ":protos_all", - proto_srcfile = "protobuf/%s.proto" % proto_name, - visibility = ["//visibility:public"], - ), ] for proto_name in [ "config", @@ -1664,77 +1622,74 @@ alias( # ----------------------------------------------------------------------------- # Internal targets -tf_proto_library( +alias( name = "autotuning_proto", - srcs = ["protobuf/autotuning.proto"], - cc_api_version = 2, - make_default_target_header_only = True, + actual = "//tensorflow/core/protobuf:autotuning_proto", visibility = [ "//tensorflow:internal", ], ) -tf_proto_library( +alias( + name = "autotuning_proto_cc", + actual = "//tensorflow/core/protobuf:autotuning_proto_cc", + visibility = [ + "//tensorflow:internal", + ], +) + +alias( name = "conv_autotuning_proto", - srcs = ["protobuf/conv_autotuning.proto"], - cc_api_version = 2, - make_default_target_header_only = True, - protodeps = [ - "//tensorflow/stream_executor:dnn_proto", - ], + actual = "//tensorflow/core/protobuf:conv_autotuning_proto", visibility = [ "//tensorflow:internal", ], ) -tf_proto_library_cc( - name = "worker_proto", - srcs = ["protobuf/worker.proto"], - cc_api_version = 2, - protodeps = tf_additional_all_protos(), - visibility = ["//visibility:public"], -) - -tf_proto_library_cc( - name = "worker_service_proto", - srcs = ["protobuf/worker_service.proto"], - has_services = 1, - cc_api_version = 2, - cc_stubby_versions = ["2"], - protodeps = [":worker_proto"], +alias( + name = "conv_autotuning_proto_cc", + actual = "//tensorflow/core/protobuf:conv_autotuning_proto_cc", visibility = [ "//tensorflow:internal", ], ) -tf_proto_library_cc( - name = "master_proto", - srcs = ["protobuf/master.proto"], - cc_api_version = 2, - protodeps = tf_additional_all_protos(), - visibility = ["//tensorflow:internal"], -) - -tf_proto_library_cc( - name = "master_service_proto", - srcs = ["protobuf/master_service.proto"], - has_services = 1, - cc_api_version = 2, - cc_stubby_versions = ["2"], - protodeps = [":master_proto"], +alias( + name = "worker_proto_cc", + actual = "//tensorflow/core/protobuf:worker_proto_cc", visibility = [ "//tensorflow:internal", ], ) -tf_proto_library_cc( - name = "eager_service_proto", - srcs = ["protobuf/eager_service.proto"], - has_services = 1, - cc_api_version = 2, - cc_grpc_version = 1, - cc_stubby_versions = ["2"], - protodeps = tf_additional_all_protos(), +alias( + name = "worker_service_proto_cc", + actual = "//tensorflow/core/protobuf:worker_service_proto_cc", + visibility = [ + "//tensorflow:internal", + ], +) + +alias( + name = "master_proto_cc", + actual = "//tensorflow/core/protobuf:master_proto_cc", + visibility = [ + "//learning/brain/frameworks/uptc:__subpackages__", + "//tensorflow:internal", + ], +) + +alias( + name = "master_service_proto_cc", + actual = "//tensorflow/core/protobuf:master_service_proto_cc", + visibility = [ + "//tensorflow:internal", + ], +) + +alias( + name = "eager_service_proto_cc", + actual = "//tensorflow/core/protobuf:eager_service_proto_cc", visibility = [ "//tensorflow:internal", ], @@ -2146,49 +2101,14 @@ cc_library( ], ) -tf_proto_library( +alias( name = "error_codes_proto_impl", - srcs = ["protobuf/error_codes.proto"], - cc_api_version = 2, - make_default_target_header_only = True, + actual = "//tensorflow/core/protobuf:error_codes_proto_impl", ) -tf_proto_library( - name = "core_protos", - srcs = COMMON_PROTO_SRCS + [ - # Protos which are not needed on mobile builds, but should be included - # in protos_all. - # - # Note that some protos are in neither core_proto_srcs nor this - # filegroup; e.g. ones with individual proto_library targets. - "protobuf/control_flow.proto", - # TODO(ebrevdo): Re-enable once CriticalSection is in core. - # "protobuf/critical_section.proto", - "protobuf/data/experimental/snapshot.proto", - "protobuf/debug_event.proto", - "protobuf/meta_graph.proto", - "protobuf/named_tensor.proto", - "protobuf/remote_tensor_handle.proto", - "protobuf/saved_model.proto", - "protobuf/saved_object_graph.proto", - "protobuf/struct.proto", - "protobuf/tensorflow_server.proto", - "protobuf/trackable_object_graph.proto", - "protobuf/transport_options.proto", - ], - cc_api_version = 2, - make_default_target_header_only = True, - protodeps = [ - ":error_codes_proto_impl", - "//tensorflow/core/example:protos_all", - "//tensorflow/core/framework:protos_all", - "//tensorflow/core/lib/core:error_codes_proto", - "//tensorflow/core/profiler/protobuf:xplane_proto", - "//tensorflow/core/profiler:profiler_options_proto", - "//tensorflow/core/util:protos_all", - "//tensorflow/core/util:test_log_proto_impl", - ], - visibility = ["//visibility:private"], +alias( + name = "error_codes_proto_impl_cc", + actual = "//tensorflow/core/protobuf:error_codes_proto_impl_cc", ) alias( @@ -2480,13 +2400,9 @@ alias( visibility = ["//visibility:public"], ) -tf_proto_library_cc( - name = "replay_log_proto", - srcs = ["protobuf/replay_log.proto"], - cc_api_version = 2, - protodeps = [ - ":master_proto", - ] + tf_additional_all_protos(), +alias( + name = "replay_log_proto_cc", + actual = "//tensorflow/core/protobuf:replay_log_proto_cc", visibility = [ "//tensorflow:internal", ], @@ -3117,6 +3033,11 @@ alias( actual = "//tensorflow/core/platform:cuda_libdevice_path", ) +# Normalize CORE_PROTO_SRCS to generate valid output file names. +PORTABLE_PROTO_HEADERS_OUT = tf_android_core_proto_headers(CORE_PROTO_SRCS) + [ + "//google/protobuf/any.proto.h", +] + transitive_hdrs( name = "headers", visibility = ["//tensorflow:__subpackages__"], @@ -3129,8 +3050,3 @@ transitive_hdrs( "//tensorflow/core/platform:platform_strings", ], ) - -# Normalize CORE_PROTO_SRCS to generate valid output file names. -PORTABLE_PROTO_HEADERS_OUT = tf_android_core_proto_headers(CORE_PROTO_SRCS) + [ - "//google/protobuf/any.proto.h", -] diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt index 0f49a18a114..f3379461a5f 100644 --- a/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt @@ -65,7 +65,7 @@ END summary: "Update \'*var\' according to the Ftrl-proximal scheme." description: < l1 else 0.0 accum = accum_new diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt index 3218ab7776c..1eb33005e91 100644 --- a/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt @@ -65,8 +65,8 @@ END summary: "Update \'*var\' according to the Ftrl-proximal scheme." description: < l1 else 0.0 diff --git a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt index c6104da4a64..7f2a8a1cf1a 100644 --- a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt @@ -30,8 +30,8 @@ END summary: "Gather slices from `params` axis `axis` according to `indices`." description: < l1 else 0.0 diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index eb506d29571..016896b36f4 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -243,6 +243,7 @@ filegroup( "memory_types.h", "mkl_cpu_allocator.h", "mkl_layout_pass.h", + "mkl_tfconversion_pass.h", "optimization_registry.h", "partitioning_utils.h", "placer.h", @@ -1028,9 +1029,13 @@ cc_library( cc_library( name = "mkl_layout_pass", srcs = ["mkl_layout_pass.cc"], - hdrs = ["mkl_layout_pass.h"], + hdrs = [ + "mkl_layout_pass.h", + "//tensorflow/core/graph:mkl_graph_util_header", + ], copts = tf_copts(), deps = [ + ":function", ":optimization_registry", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", @@ -1043,9 +1048,13 @@ cc_library( cc_library( name = "mkl_tfconversion_pass", srcs = ["mkl_tfconversion_pass.cc"], - hdrs = ["mkl_tfconversion_pass.h"], + hdrs = [ + "mkl_tfconversion_pass.h", + "//tensorflow/core/graph:mkl_graph_util_header", + ], copts = tf_copts(), deps = [ + ":function", ":optimization_registry", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", @@ -1212,6 +1221,7 @@ cc_library( ":propagator_debug_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/platform:hash", "//tensorflow/core/profiler/lib:traceme", ], ) @@ -2285,7 +2295,7 @@ tf_cc_test( "//tensorflow/cc:cc_ops", "//tensorflow/cc:scope", "//tensorflow/core/kernels:cwise_op", - ] + if_mkl([":mkl_array_ops_op_lib"]), + ] + if_mkl(["//tensorflow/core:mkl_array_ops_op_lib"]), ) tf_cc_test( @@ -2510,10 +2520,12 @@ tf_cc_test( "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/common_runtime/device_mgr.cc b/tensorflow/core/common_runtime/device_mgr.cc index c7583c374f2..0b693085da3 100644 --- a/tensorflow/core/common_runtime/device_mgr.cc +++ b/tensorflow/core/common_runtime/device_mgr.cc @@ -45,6 +45,7 @@ StaticDeviceMgr::StaticDeviceMgr(std::vector> devices) } const auto& t = d->device_type(); device_type_counts_[t]++; + device_incarnation_set_.insert(d->attributes().incarnation()); if (cpu_device_ == nullptr && t == "CPU" && d->parsed_name().id == 0) { cpu_device_ = d.get(); } @@ -123,6 +124,10 @@ Status StaticDeviceMgr::LookupDevice(StringPiece name, Device** device) const { return Status::OK(); } +bool StaticDeviceMgr::ContainsDevice(int64 device_incarnation) const { + return device_incarnation_set_.contains(device_incarnation); +} + void StaticDeviceMgr::ClearContainers( gtl::ArraySlice containers) const { Status s; diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h index 56248b39078..83a0d0cc29c 100644 --- a/tensorflow/core/common_runtime/device_mgr.h +++ b/tensorflow/core/common_runtime/device_mgr.h @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/lib/core/arena.h" #include "tensorflow/core/lib/core/status.h" @@ -56,6 +57,11 @@ class DeviceMgr { // Accepts either a full device name, or just the replica-local suffix. virtual Status LookupDevice(StringPiece name, Device** device) const = 0; + // Check if the current device manager contains device with the given + // incarnation ID. Looking up by incarnation IDs because they are randomly + // generated and not intentionally reused (unlike device pointers). + virtual bool ContainsDevice(int64 device_incarnation) const = 0; + // Clears given containers of all devices if 'container' is // non-empty. Otherwise, clears default containers of all devices. virtual void ClearContainers(gtl::ArraySlice containers) const = 0; @@ -86,6 +92,7 @@ class StaticDeviceMgr : public DeviceMgr { string DebugString() const override; string DeviceMappingString() const override; Status LookupDevice(StringPiece name, Device** device) const override; + bool ContainsDevice(int64 device_incarnation) const override; void ClearContainers(gtl::ArraySlice containers) const override; int NumDeviceType(const string& type) const override; Device* HostCPU() const override; @@ -95,6 +102,7 @@ class StaticDeviceMgr : public DeviceMgr { StringPiece CopyToBackingStore(StringPiece s); + absl::flat_hash_set device_incarnation_set_; std::unordered_map device_map_; core::Arena name_backing_store_; // Storage for keys in device_map_ std::unordered_map device_type_counts_; @@ -117,6 +125,7 @@ class DynamicDeviceMgr : public DeviceMgr { string DebugString() const override; string DeviceMappingString() const override; Status LookupDevice(StringPiece name, Device** device) const override; + bool ContainsDevice(int64 device_incarnation) const override; void ClearContainers(gtl::ArraySlice containers) const override; int NumDeviceType(const string& type) const override; Device* HostCPU() const override; @@ -140,6 +149,7 @@ class DynamicDeviceMgr : public DeviceMgr { std::unordered_map> dynamic_devices_ TF_GUARDED_BY(devices_mu_); + absl::flat_hash_set device_incarnation_set_ TF_GUARDED_BY(devices_mu_); std::unordered_map device_map_ TF_GUARDED_BY(devices_mu_); std::unordered_map device_type_counts_ diff --git a/tensorflow/core/common_runtime/dynamic_device_mgr.cc b/tensorflow/core/common_runtime/dynamic_device_mgr.cc index f35fa7e416a..f47de47c5b9 100644 --- a/tensorflow/core/common_runtime/dynamic_device_mgr.cc +++ b/tensorflow/core/common_runtime/dynamic_device_mgr.cc @@ -92,6 +92,11 @@ Status DynamicDeviceMgr::LookupDevice(StringPiece name, Device** device) const { return Status::OK(); } +bool DynamicDeviceMgr::ContainsDevice(int64 device_incarnation) const { + tf_shared_lock l(devices_mu_); + return device_incarnation_set_.contains(device_incarnation); +} + void DynamicDeviceMgr::ClearContainers( gtl::ArraySlice containers) const { Status s; @@ -138,6 +143,7 @@ Status DynamicDeviceMgr::AddDevices( device_map_[name] = d.get(); } device_type_counts_[d->device_type()]++; + device_incarnation_set_.insert(d->attributes().incarnation()); dynamic_devices_.emplace(d.get(), std::move(d)); } return Status::OK(); @@ -171,6 +177,7 @@ Status DynamicDeviceMgr::RemoveDevices(std::vector devices) { device_map_.erase(name); } device_type_counts_[d->device_type()]--; + device_incarnation_set_.erase(d->attributes().incarnation()); dynamic_devices_.erase(it); } return Status::OK(); diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 695342d5e7a..2b2313d91ff 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -305,6 +305,7 @@ tf_cuda_library( visibility = ["//tensorflow:internal"], deps = [ ":attr_builder", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/memory", "@com_google_absl//absl/types:optional", @@ -369,6 +370,7 @@ cc_library( ":eager_operation", ":kernel_and_device", ":tensor_handle", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/strings", @@ -396,6 +398,24 @@ cc_library( }) + if_mkl([":mkl_eager_op_rewrite"]), ) +tf_cc_test( + name = "execute_node_test", + srcs = ["execute_node_test.cc"], + deps = [ + ":context", + ":core", + ":execute", + ":kernel_and_device", + ":tensor_handle", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "@com_google_absl//absl/memory", + ], +) + cc_library( name = "mkl_eager_op_rewrite", srcs = ["mkl_eager_op_rewrite.cc"], @@ -466,6 +486,7 @@ cc_library( ":eager_operation", ":kernel_and_device", ":tensor_handle", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc index 4e5bc934c38..b8dfe92aac6 100644 --- a/tensorflow/core/common_runtime/eager/context.cc +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/platform/platform.h" // clang-format on +#include "tensorflow/c/tf_tensor.h" #include "tensorflow/c/tf_tensor_internal.h" #include "tensorflow/c/eager/operation_interface.h" #include "tensorflow/c/eager/tensor_handle_interface.h" @@ -168,6 +169,28 @@ AbstractTensorInterface* EagerContext::CreateTensor( return new TensorInterface(Tensor(dtype, TensorShape(dim_sizes))); } +AbstractTensorInterface* EagerContext::CreateTensor( + DataType dtype, const int64_t* dims, int num_dims, void* data, size_t len, + bool convert_string, MemoryReleaser memory_releaser, + void* memory_releaser_arg) { + TF_Tensor* tensor_wrapper = + TF_NewTensor(static_cast(dtype), dims, num_dims, data, len, + memory_releaser, memory_releaser_arg); + + if (convert_string) { + tensorflow::Tensor tensor; + Status status = TF_TensorToTensor(tensor_wrapper, &tensor); + TF_DeleteTensor(tensor_wrapper); + if (!status.ok()) return nullptr; + return new TensorInterface(std::move(tensor)); + } else { + AbstractTensorInterface* result = nullptr; + std::swap(result, tensor_wrapper->tensor); + TF_DeleteTensor(tensor_wrapper); + return result; + } +} + std::unique_ptr EagerContext::LoadSavedModelAPI( const std::string& directory, const absl::optional>& tags, @@ -853,6 +876,18 @@ Status EagerContext::FindDeviceFromName(const char* device_name, return status; } +Status EagerContext::FindCompositeDeviceFromName( + const char* device_name, CompositeDevice** device) const { + tf_shared_lock l(composite_devices_mu_); + for (const auto& d : composite_devices_) { + if (d.second->name() == device_name) { + *device = d.second.get(); + return Status::OK(); + } + } + return errors::NotFound("Unknown composite device: ", device_name); +} + Status EagerContext::FindCustomDeviceFromName(const string& device_name, CustomDevice** dev) const { auto dev_it = custom_devices_.find(device_name); @@ -904,8 +939,7 @@ Status EagerContext::FindOrCreateCompositeDevice( composite_devices_.size(), &s); TF_RETURN_IF_ERROR(s); *composite_device = device.get(); - // TODO(b/145922293): Add the composite device to the device set of pflr in - // order to make placer recognize it. + pflr_->AddCompositeDevice(*composite_device); composite_devices_.emplace(hash_key, std::move(device)); return Status::OK(); } diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h index aad318886a9..d034aaf2f9c 100644 --- a/tensorflow/core/common_runtime/eager/context.h +++ b/tensorflow/core/common_runtime/eager/context.h @@ -173,6 +173,11 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted { AbstractTensorInterface* CreateTensor( DataType dtype, absl::Span dim_sizes) override; + AbstractTensorInterface* CreateTensor(DataType dtype, const int64_t* dims, + int num_dims, void* data, size_t len, + bool convert_string, + MemoryReleaser memory_releaser, + void* memory_releaser_arg) override; AbstractTensorHandleInterface* CreateLocalHandle( AbstractTensorInterface* t) override; @@ -295,7 +300,9 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted { void AddKernelToCache(Fprint128 cache_key, KernelAndDevice* kernel); bool LogDevicePlacement() const { return log_device_placement_; } + void SetLogDevicePlacement(bool enable) { log_device_placement_ = enable; } bool AllowSoftPlacement() const { return allow_soft_placement_; } + void SetAllowSoftPlacement(bool enable) { allow_soft_placement_ = enable; } bool LogMemory() const { return log_memory_; } Rendezvous* GetRendezvous() const { return rendezvous_; } @@ -483,6 +490,9 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted { Status FindDeviceFromName(const char* device_name, Device** device) const; + Status FindCompositeDeviceFromName(const char* device_name, + CompositeDevice** device) const; + Status FindCustomDeviceFromName(const string& device_name, CustomDevice** dev) const; @@ -617,9 +627,8 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted { mutex metadata_mu_; RunMetadata run_metadata_ TF_GUARDED_BY(metadata_mu_); GraphCollector graph_collector_; - // TODO(fishx): Allow update following two bool after context creation. - const bool log_device_placement_; - const bool allow_soft_placement_; + std::atomic log_device_placement_; + std::atomic allow_soft_placement_; // Information related to step containers. std::atomic num_active_steps_; diff --git a/tensorflow/core/common_runtime/eager/context_test.cc b/tensorflow/core/common_runtime/eager/context_test.cc index 9154a288a84..f83e3f0b45d 100644 --- a/tensorflow/core/common_runtime/eager/context_test.cc +++ b/tensorflow/core/common_runtime/eager/context_test.cc @@ -180,6 +180,10 @@ TEST_F(EagerContextTest, CompositeDevice) { &composite_device_0)); EXPECT_EQ(composite_device_0->name(), "/job:worker/replica:0/task:0/device:COMPOSITE:0"); + CompositeDevice* device = nullptr; + TF_EXPECT_OK(context()->FindCompositeDeviceFromName( + "/job:worker/replica:0/task:0/device:COMPOSITE:0", &device)); + EXPECT_EQ(device, composite_device_0); CompositeDevice* composite_device_1 = nullptr; TF_ASSERT_OK(context()->FindOrCreateCompositeDevice(underlying_devices, &composite_device_1)); @@ -190,6 +194,12 @@ TEST_F(EagerContextTest, CompositeDevice) { &composite_device_2)); EXPECT_EQ(composite_device_2->name(), "/job:worker/replica:0/task:0/device:COMPOSITE:1"); + TF_EXPECT_OK(context()->FindCompositeDeviceFromName( + "/job:worker/replica:0/task:0/device:COMPOSITE:1", &device)); + EXPECT_EQ(device, composite_device_2); + + EXPECT_TRUE(errors::IsNotFound(context()->FindCompositeDeviceFromName( + "/job:worker/replica:0/task:0/device:COMPOSITE:2", &device))); } } // namespace diff --git a/tensorflow/core/common_runtime/eager/eager_executor.cc b/tensorflow/core/common_runtime/eager/eager_executor.cc index 13630a01ea9..7850978410f 100644 --- a/tensorflow/core/common_runtime/eager/eager_executor.cc +++ b/tensorflow/core/common_runtime/eager/eager_executor.cc @@ -98,7 +98,7 @@ const char* EagerExecutor::StateStringLocked() { Status EagerExecutor::SyncExecute(EagerNode* node) { if (Async()) { - return errors::Internal("Executor does not support sync execution"); + return errors::Internal("Executor does not support async execution"); } if (node->AsAsync() != nullptr) { return errors::Internal("Executor does not support executing async nodes"); diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 95d85bfbcc9..f6b4370bbdc 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -365,8 +365,12 @@ Status GetOrCreateKernelAndDevice( Device* device = absl::get(op->Device()); Fprint128 cache_key = op->MutableAttrs()->CacheKey(op->DeviceName()); + /// Include soft placement policy in cache key since the placement strategy + // can change and thus affect which kernel is picked. + cache_key = FingerprintCat128(cache_key, ctx.AllowSoftPlacement()); std::vector input_dev_ptrs; + absl::flat_hash_map*> composite_devices; std::unordered_map input_resource_variable_dtypes_and_shapes; // We can eliminate some overhead by running simple functions using regular @@ -410,6 +414,13 @@ Status GetOrCreateKernelAndDevice( Device* input_device; TF_RETURN_IF_ERROR(GetDeviceForInput(ctx, input, &input_device)); input_dev_ptrs.push_back(input_device); + CompositeDevice* composite_device = nullptr; + if (ctx.FindCompositeDeviceFromName(input_device->name().c_str(), + &composite_device) + .ok()) { + composite_devices[input_device->name()] = + composite_device->underlying_devices(); + } cache_key = FingerprintCat128(cache_key, Fingerprint128(input_device->name())); @@ -480,13 +491,6 @@ Status GetOrCreateKernelAndDevice( << KernelsRegisteredForOp(op->Name()); op->SetDevice(device); } - if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) { - string msg = strings::StrCat("Executing op ", ndef.op(), " in device ", - DeviceNameOrUnspecified(device)); - if (!logging::LogToListeners(msg)) { - LOG(INFO) << msg; - } - } FunctionLibraryRuntime* flr = device == nullptr ? nullptr : ctx.func_lib(device); @@ -520,6 +524,7 @@ Status GetOrCreateKernelAndDevice( #endif // IS_MOBILE_PLATFORM kernel.reset(new KernelAndDeviceFunc( flr, ctx.pflr(), std::move(input_dev_ptrs), + std::move(composite_devices), std::move(input_resource_variable_dtypes_and_shapes), runner, ctx.GetCollectiveExecutorHandle(), ctx.HostCPU(), op->Name(), [&ctx](const int64 step_id) { return ctx.CreateRendezvous(step_id); }, @@ -598,6 +603,14 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, int num_outputs = kernel->num_outputs(); TF_RETURN_IF_ERROR(ValidateInputTypeAndPlacement(&ctx, op, kernel)); + if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) { + string msg = strings::StrCat("Executing op ", op->Name(), " in device ", + kernel->device()->name()); + if (!logging::LogToListeners(msg)) { + LOG(INFO) << msg; + } + } + GraphCollector* graph_collector = nullptr; if (ctx.ShouldStoreGraphs()) { graph_collector = ctx.GetGraphCollector(); @@ -832,6 +845,16 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, ctx.GetContextViewId(), eager_client.get(), op->MutableAttrs()->BuildNodeDef(), op->EagerContext().FuncLibDef(), op->Inputs(), {retvals, num_outputs})); + + if (op->EagerContext().LogDevicePlacement() || VLOG_IS_ON(1)) { + string msg = strings::StrCat( + "Executing op ", op->Name(), " on task ", + DeviceNameUtils::ParsedNameToString(op->GetDeviceParsedName())); + if (!logging::LogToListeners(msg)) { + LOG(INFO) << msg; + } + } + Status s = executor.AddOrExecute(std::move(node)); // Since the operation failed, we need to Unref any outputs that were // allocated. @@ -865,6 +888,19 @@ bool IsPinnableOp(const string& op_type) { !absl::StartsWith(op_type, "XRT"); } +// Validate if the remote device with the given incarnation is valid in the +// remote device manager of the current eager context. +Status ValidateTensorHandleRemoteDevice(EagerContext* ctx, + int64 device_incarnation) { + if (ctx->remote_device_mgr()->ContainsDevice(device_incarnation)) { + return Status::OK(); + } + return errors::InvalidArgument( + "Resource input tensor contains an invalid device. This might happen " + "when the client has connected to a different cluster, or some remote " + "workers have been restarted."); +} + // The Op device may be updated if: // - A resource touching input is specified: all resource-touching ops run in // the device the resource is, regardless of anything else that has been @@ -926,6 +962,10 @@ Status MaybeUpdateOpDevice(EagerOperation* op) { for (int i = 0; i < op->Inputs().size(); ++i) { TensorHandle* tensor_handle = op->Inputs()[i]; if (tensor_handle->dtype == DT_RESOURCE) { + if (tensor_handle->resource_remote_device_incarnation() != 0) { + TF_RETURN_IF_ERROR(ValidateTensorHandleRemoteDevice( + &ctx, tensor_handle->resource_remote_device_incarnation())); + } Device* resource_device = tensor_handle->resource_device(); DVLOG(2) << "for op " << op->Name() << " input " << i << " " << DataTypeString(tensor_handle->dtype) @@ -1093,15 +1133,6 @@ Status EagerExecute(EagerOperation* op, TensorHandle** retvals, return EagerLocalExecute(op, retvals, num_retvals); } - if (op->EagerContext().LogDevicePlacement() || VLOG_IS_ON(1)) { - string msg = strings::StrCat( - "Executing op ", op->Name(), " on task ", - DeviceNameUtils::ParsedNameToString(op->GetDeviceParsedName())); - if (!logging::LogToListeners(msg)) { - LOG(INFO) << msg; - } - } - #if defined(IS_MOBILE_PLATFORM) return errors::Unimplemented( "Eager's remote execution is not available on mobile devices."); @@ -1402,6 +1433,14 @@ void EagerLocalExecuteAsync(EagerOperation* op, TensorHandle** retvals, return; } + if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) { + string msg = strings::StrCat("Executing op ", op->Name(), " in device ", + kernel->device()->name()); + if (!logging::LogToListeners(msg)) { + LOG(INFO) << msg; + } + } + GraphCollector* graph_collector = nullptr; if (ctx.ShouldStoreGraphs()) { graph_collector = ctx.GetGraphCollector(); diff --git a/tensorflow/core/common_runtime/eager/execute_node.cc b/tensorflow/core/common_runtime/eager/execute_node.cc index b7bebd4ba11..3197d3e0ac7 100644 --- a/tensorflow/core/common_runtime/eager/execute_node.cc +++ b/tensorflow/core/common_runtime/eager/execute_node.cc @@ -17,6 +17,51 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { + +#if !defined(IS_MOBILE_PLATFORM) +bool ExecuteNodeArgs::IsRemote(EagerContext* ctx, Device* input_device, + TensorHandle* handle) { + uint64 context_view_id = ctx->GetContextViewId(); + if (handle->Type() == TensorHandle::REMOTE || + handle->HasRemoteMirror(input_device, context_view_id)) { + if (!has_remote_inputs_) { + has_remote_inputs_ = true; + } + return true; + } + return false; +} +#endif // IS_MOBILE_PLATFORM + +Status ExecuteNodeArgs::InitPackedHandle(const int index, EagerContext* ctx, + Device* input_device, + TensorHandle* packed_handle) { + int num_handles = packed_handle->NumPackedHandles(); + packed_args_.emplace(index, gtl::InlinedVector(num_handles)); + TensorValue* packed_arg_flat = &(packed_args_[index][0]); + for (int i = 0; i < num_handles; ++i) { + TensorHandle* h = nullptr; + TF_RETURN_IF_ERROR(packed_handle->ExtractPackedHandle(i, &h)); + // We have validated that h->device() is not a CustomDevice when + // constructing a pack TensorHandle. + const Status status = + h->TensorValue(absl::get(h->device()), &packed_arg_flat[i]); + if (!status.ok()) { +#if !defined(IS_MOBILE_PLATFORM) + if (IsRemote(ctx, input_device, h)) { + continue; + } +#endif // IS_MOBILE_PLATFORM + if (h->Type() == TensorHandle::PACKED) { + return errors::InvalidArgument( + "Nested packed handles are not supported"); + } + return status; + } + } + return Status::OK(); +} + Status ExecuteNodeArgs::Init( EagerContext* ctx, const gtl::InlinedVector& op_inputs, const core::RefCountPtr& kernel) { @@ -35,16 +80,17 @@ Status ExecuteNodeArgs::Init( Status s = in->TensorValue(ctx->CanonicalDevice(d), &tensor_args_flat[i]); if (!s.ok()) { #if !defined(IS_MOBILE_PLATFORM) - uint64 context_view_id = ctx->GetContextViewId(); - if (in->Type() == TensorHandle::REMOTE || - in->HasRemoteMirror(d, context_view_id)) { - if (!has_remote_inputs_) { - has_remote_inputs_ = true; - } + if (IsRemote(ctx, d, in)) { continue; } #endif - return s; + if (in->Type() != TensorHandle::PACKED) { + return s; + } + if (!has_packed_inputs_) { + has_packed_inputs_ = true; + } + TF_RETURN_IF_ERROR(InitPackedHandle(i, ctx, d, in)); } } } @@ -54,24 +100,44 @@ Status ExecuteNodeArgs::Init( serialize_remote_handle_ = [ctx, &op_inputs](const FunctionArgIndex& index, eager::RemoteTensorHandle* handle) -> Status { - if (index.sub_index >= 0) { - return errors::InvalidArgument("Got unexpected sub_index ", - index.sub_index, " for argument ", - index.index); + TensorHandle* h = op_inputs[index.index]; + if (op_inputs[index.index]->Type() == TensorHandle::PACKED) { + TF_RETURN_IF_ERROR( + op_inputs[index.index]->ExtractPackedHandle(index.sub_index, &h)); } - VariantDevice variant_device = op_inputs[index.index]->device(); + VariantDevice variant_device = h->device(); if (VariantDeviceIsCustom(variant_device)) { return errors::Internal( "Custom devices and remote execution are currently not supported " "together."); } Device* device = absl::get(variant_device); - return ctx->RemoteMgr()->SerializeRemoteTensorHandle( - op_inputs[index.index], handle, device, device->name()); + return ctx->RemoteMgr()->SerializeRemoteTensorHandle(h, handle, device, + device->name()); }; } #endif // !IS_MOBILE_PLATFORM return Status::OK(); } +Status ExecuteNodeArgs::GetLocalArg(const FunctionArgIndex& index, + Tensor* val) const { + Status s = EagerKernelArgs::GetLocalArg(index, val); + if (s.ok()) { + return Status::OK(); + } + if (packed_args_.contains(index.index)) { + Tensor* arg = packed_args_.at(index.index).at(index.sub_index).tensor; + if (arg) { + *val = *arg; + return Status::OK(); + } else { + return errors::NotFound("Argument (", index.index, ",", index.sub_index, + ") has no local tensor."); + } + } else { + return s; + } +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/execute_node.h b/tensorflow/core/common_runtime/eager/execute_node.h index d416f58bbcd..7924471066e 100644 --- a/tensorflow/core/common_runtime/eager/execute_node.h +++ b/tensorflow/core/common_runtime/eager/execute_node.h @@ -20,6 +20,7 @@ limitations under the License. #include #include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/platform.h" // clang-format on @@ -54,6 +55,8 @@ class ExecuteNodeArgs : public EagerKernelArgs { const absl::InlinedVector& op_inputs, const core::RefCountPtr& kernel); + Status GetLocalArg(const FunctionArgIndex& index, Tensor* val) const override; + bool HasRemoteOrPackedInputs() const override { return has_remote_inputs_ || has_packed_inputs_; }; @@ -66,8 +69,20 @@ class ExecuteNodeArgs : public EagerKernelArgs { #endif // IS_MOBILE_PLATFORM private: +#if !defined(IS_MOBILE_PLATFORM) + // Returns whether `handle` is a remote handle or has a remote mirror on + // `input_device` + bool IsRemote(EagerContext* ctx, Device* input_device, TensorHandle* handle); +#endif // IS_MOBILE_PLATFORM + + // Initialize a packed TensorHandle which is the `index`-th argument. + Status InitPackedHandle(const int index, EagerContext* ctx, + Device* input_device, TensorHandle* packed_handle); + bool has_remote_inputs_ = false; bool has_packed_inputs_ = false; + // Maps from the index of a packed arg to a list of sub-args. + absl::flat_hash_map> packed_args_; #if !defined(IS_MOBILE_PLATFORM) std::function serialize_remote_handle_; diff --git a/tensorflow/core/common_runtime/eager/execute_node_test.cc b/tensorflow/core/common_runtime/eager/execute_node_test.cc new file mode 100644 index 00000000000..99f030322df --- /dev/null +++ b/tensorflow/core/common_runtime/eager/execute_node_test.cc @@ -0,0 +1,142 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/execute_node.h" + +#include + +#include "tensorflow/core/common_runtime/composite_device.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +class TestKernelAndDeviceFunc final : public KernelAndDeviceFunc { + public: + TestKernelAndDeviceFunc(std::vector input_devices, + Device* host_cpu_device) + : KernelAndDeviceFunc( + /*flr=*/nullptr, /*pflr=*/nullptr, /*input_devices=*/{}, + /*composite_devices=*/{}, /*input_resource_dtypes_and_shapes=*/{}, + /*runner=*/nullptr, /*collective_executor=*/nullptr, + host_cpu_device, /*name=*/"", + /*rendezvous_creator=*/nullptr, /*get_op_id=*/nullptr), + test_input_devices_(std::move(input_devices)) {} + + Device* InputDevice(int i) const override { return test_input_devices_[i]; } + + private: + std::vector test_input_devices_; +}; + +TEST(ExecuteNodeTest, ExecuteNodeArgs) { + StaticDeviceMgr device_mgr( + DeviceFactory::NewDevice("CPU", {}, "/job:localhost/replica:0/task:0")); + Device* device0 = device_mgr.ListDevices().at(0); + auto remote_device_mgr = absl::make_unique(); + std::vector> remote_devices; + remote_devices.emplace_back( + DeviceFactory::NewDevice("CPU", {}, "/job:localhost/replica:0/task:1")); + TF_ASSERT_OK(remote_device_mgr->AddDevices(std::move(remote_devices))); + Device* device1 = remote_device_mgr->ListDevices().at(0); + + Status s; + std::unique_ptr composite_device = + CompositeDevice::MakeDevice({device0->name(), device1->name()}, + /*unique_device_id=*/0, &s); + TF_ASSERT_OK(s); + + auto ctx = new EagerContext( + SessionOptions(), + tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, + tensorflow::ContextMirroringPolicy::MIRRORING_NONE, false, false, + &device_mgr, false, nullptr, nullptr, nullptr); + + // Set a RemoteMgr to the EagerContext. + auto remote_mgr = absl::make_unique( + /*is_master=*/true, ctx); + TF_ASSERT_OK(ctx->InitializeRemoteMaster( + /*server=*/nullptr, /*worker_env=*/nullptr, + /*worker_session=*/nullptr, /*remote_eager_workers=*/nullptr, + std::move(remote_device_mgr), /*remote_contexts=*/{}, + EagerContext::NewContextId(), + /*r=*/nullptr, &device_mgr, /*keep_alive_secs*/ 600, + /*cluster_flr=*/nullptr, std::move(remote_mgr))); + + DataType dtype = DT_FLOAT; + Tensor t0(dtype, TensorShape({})); + // Create two local TensorHandles + t0.scalar()() = {1.0f}; + TensorHandle* h0 = + TensorHandle::CreateLocalHandle(std::move(t0), device0, device0, ctx); + Tensor t1(dtype, TensorShape({})); + t1.scalar()() = {2.0f}; + TensorHandle* h1 = + TensorHandle::CreateLocalHandle(std::move(t1), device0, device0, ctx); + // Create two remote TensorHandles + TensorHandle* h2 = TensorHandle::CreateLazyRemoteHandle( + /*op_id=*/1, /*output_num=*/0, dtype, device1, ctx); + TensorHandle* h3 = TensorHandle::CreateLazyRemoteHandle( + /*op_id=*/2, /*output_num=*/1, dtype, device1, ctx); + // Create a packed TensorHandle + TensorHandle* packed_h = nullptr; + TF_ASSERT_OK(TensorHandle::CreatePackedHandle({h1, h2}, ctx, &packed_h)); + + // LOCAL, PACKED, REMOTE + absl::InlinedVector inputs = {h0, packed_h, h3}; + + std::vector input_devices; + for (auto* h : inputs) { + input_devices.push_back(absl::get(h->DeviceOrHostCPU(*ctx))); + } + const core::RefCountPtr kernel( + new TestKernelAndDeviceFunc(std::move(input_devices), device0)); + + ExecuteNodeArgs args(inputs.size()); + TF_EXPECT_OK(args.Init(ctx, inputs, kernel)); + EXPECT_TRUE(args.HasRemoteOrPackedInputs()); + Tensor local0; + TF_EXPECT_OK(args.GetLocalArg(FunctionArgIndex(0), &local0)); + EXPECT_EQ(local0.flat().size(), 1); + EXPECT_EQ(local0.flat()(0), 1.0); + Tensor local1; + TF_EXPECT_OK(args.GetLocalArg(FunctionArgIndex(1, 0), &local1)); + EXPECT_EQ(local1.flat().size(), 1); + EXPECT_EQ(local1.flat()(0), 2.0); + eager::RemoteTensorHandle remote0; + TF_EXPECT_OK(args.GetRemoteArg(FunctionArgIndex(1, 1), &remote0)); + EXPECT_EQ(remote0.op_id(), 1); + EXPECT_EQ(remote0.output_num(), 0); + eager::RemoteTensorHandle remote1; + TF_EXPECT_OK(args.GetRemoteArg(FunctionArgIndex(2), &remote1)); + EXPECT_EQ(remote1.op_id(), 2); + EXPECT_EQ(remote1.output_num(), 1); + + h0->Unref(); + h1->Unref(); + h2->Unref(); + h3->Unref(); + packed_h->Unref(); + ctx->Unref(); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 98d71959e2d..3c586e8188a 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -158,6 +158,7 @@ Status KernelAndDeviceFunc::InstantiateFunc(const NodeDef& ndef, for (const Device* device : input_devices_) { options.input_devices.push_back(device->name()); } + options.composite_devices = composite_devices_; options.input_resource_dtypes_and_shapes = input_resource_dtypes_and_shapes_; const auto& it = ndef.attr().find("executor_type"); @@ -425,7 +426,9 @@ Device* KernelAndDeviceOp::InputDevice(int i) const { } Device* KernelAndDeviceFunc::InputDevice(int i) const { - if (input_dtypes_[i] == DT_RESOURCE) { + if ((input_dtypes_[i] == DT_RESOURCE) && + (composite_devices_.find(input_devices_[i]->name()) == + composite_devices_.end())) { return host_cpu_device_; } else { return input_devices_[i]; diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h index a740b898262..d2c54322513 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.h +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/platform/platform.h" // clang-format on +#include "absl/container/flat_hash_map.h" #include "absl/types/optional.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/process_function_library_runtime.h" @@ -241,7 +242,7 @@ class KernelAndDeviceOp final : public KernelAndDevice { // Represents a multi-device function. Functions can also be run using // various function-calling kernels including CallOp and PartitionedCallOp. // In such cases, KernelAndDeviceOp is used. -class KernelAndDeviceFunc final : public KernelAndDevice { +class KernelAndDeviceFunc : public KernelAndDevice { public: // `flr` can be nullptr. // `pflr` must not be nullptr. @@ -249,6 +250,7 @@ class KernelAndDeviceFunc final : public KernelAndDevice { KernelAndDeviceFunc( FunctionLibraryRuntime* flr, ProcessFunctionLibraryRuntime* pflr, std::vector input_devices, + absl::flat_hash_map*> composite_devices, std::unordered_map input_resource_dtypes_and_shapes, std::function)>* runner, @@ -261,6 +263,7 @@ class KernelAndDeviceFunc final : public KernelAndDevice { pflr_(pflr), handle_(kInvalidHandle), input_devices_(std::move(input_devices)), + composite_devices_(std::move(composite_devices)), input_resource_dtypes_and_shapes_( std::move(input_resource_dtypes_and_shapes)), name_(name), @@ -320,6 +323,8 @@ class KernelAndDeviceFunc final : public KernelAndDevice { // CPU devices are not null. Resource handles' devices are actual backing // devices. std::vector input_devices_; + // Maps from a CompositeDevice name to a list of physical device names. + absl::flat_hash_map*> composite_devices_; std::unordered_map input_resource_dtypes_and_shapes_; diff --git a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc index 2d4ae338144..f2339806814 100644 --- a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc +++ b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc @@ -17,7 +17,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/eager/eager_op_rewrite_registry.h" -#include "tensorflow/core/common_runyime/mkl_layout_pass.h" +#include "tensorflow/core/common_runtime/mkl_layout_pass.h" #include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/util/mkl_util.h" diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index 1d7b4ea5d6c..dfe3e4a1426 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -49,6 +49,13 @@ limitations under the License. namespace tensorflow { +namespace { +int64 GetRemoteDeviceIncarnation(Device* device) { + if (device == nullptr || device->IsLocal()) return 0; + return device->attributes().incarnation(); +} +} // namespace + TensorHandle::PackedTensorHandleData::PackedTensorHandleData( std::vector&& handles, const TensorShape& shape) : handles_(std::move(handles)), shape_(shape) { @@ -124,6 +131,10 @@ string TensorHandle::PackedTensorHandleData::DebugString() const { return debug_str; } +int TensorHandle::PackedTensorHandleData::NumPackedHandles() const { + return handles_.size(); +} + Status TensorHandle::PackedTensorHandleData::ExtractPackedHandle( const int index, TensorHandle** handle) const { if (index < 0 || index >= handles_.size()) { @@ -185,6 +196,13 @@ Status TensorHandle::GetResourceAllowedDevices(std::vector* result) { return GetResourceHandleInfoImpl(get_resource_info); } +int TensorHandle::NumPackedHandles() const { + if (Type() != PACKED) { + return 0; + } + return absl::get(data_).NumPackedHandles(); +} + Status TensorHandle::ExtractPackedHandle(const int index, TensorHandle** handle) const { if (Type() != PACKED) { @@ -233,6 +251,8 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device, device_((!ctx || d == ctx->HostCPU()) ? nullptr : d), op_device_(op_device), resource_device_(resource_device), + resource_remote_device_incarnation_( + GetRemoteDeviceIncarnation(resource_device_)), ctx_(ctx), data_(absl::in_place_type, std::move(t)) { DVLOG(3) << "Creating Local TensorHandle: " << this @@ -247,6 +267,8 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device, op_device_(op_device), resource_device_( GetResourceDevice(t.flat()(0), ctx)), + resource_remote_device_incarnation_( + GetRemoteDeviceIncarnation(resource_device_)), ctx_(ctx), resource_handle_info_( {t.flat()(0).dtypes_and_shapes(), @@ -263,6 +285,7 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, CustomDevice* d, device_(d), op_device_(nullptr), resource_device_(nullptr), + resource_remote_device_incarnation_(0), ctx_(ctx), data_(absl::in_place_type, std::move(t)) { // TODO(allenl): Figure out a better op_device story for custom devices, @@ -286,6 +309,8 @@ TensorHandle::TensorHandle(Device* d, Device* op_device, device_((d == ctx->HostCPU()) ? nullptr : d), op_device_(op_device), resource_device_(resource_device), + resource_remote_device_incarnation_( + GetRemoteDeviceIncarnation(resource_device_)), ctx_(ctx), data_(absl::in_place_type) { DVLOG(3) << "Creating empty Local TensorHandle: " << this @@ -315,8 +340,8 @@ Status TensorHandle::CreatePackedHandle(std::vector&& handles, return errors::InvalidArgument( "CustomDevice is not supported for packing."); } else { - devices.push_back( - absl::get(handle->DeviceOrHostCPU(*ctx))->name()); + devices.push_back(handle->op_device() ? handle->op_device()->name() + : ctx->HostCPU()->name()); } } @@ -343,6 +368,8 @@ TensorHandle::TensorHandle(std::vector&& handles, Device* device, device_(device), op_device_(device), resource_device_(dtype == DT_RESOURCE ? device : nullptr), + resource_remote_device_incarnation_( + GetRemoteDeviceIncarnation(resource_device_)), ctx_(ctx), data_(absl::in_place_type, std::move(handles), shape) { @@ -365,6 +392,8 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num, device_(d), op_device_(d), resource_device_(dtype == DT_RESOURCE ? d : nullptr), + resource_remote_device_incarnation_( + GetRemoteDeviceIncarnation(resource_device_)), ctx_(ctx), data_(absl::in_place_type, op_id, output_num, remote_task, ctx) { @@ -387,6 +416,8 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num, device_(d), op_device_(d), resource_device_(dtype == DT_RESOURCE ? d : nullptr), + resource_remote_device_incarnation_( + GetRemoteDeviceIncarnation(resource_device_)), ctx_(ctx), data_(absl::in_place_type, op_id, output_num, ctx->GetContextViewId()) { diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index 7908f39d4b4..25d7fea3200 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -133,6 +133,9 @@ class TensorHandle : public AbstractTensorHandleInterface, VariantDevice device() const { return device_; } Device* op_device() const { return op_device_; } Device* resource_device() const { return resource_device_; } + int64 resource_remote_device_incarnation() const { + return resource_remote_device_incarnation_; + } VariantDevice DeviceOrHostCPU(const EagerContext& ctx) const; @@ -231,6 +234,8 @@ class TensorHandle : public AbstractTensorHandleInterface, std::vector* result); Status GetResourceAllowedDevices(std::vector* result); + // Returns the number of packed handles. 0 if the handle type is not PACKED. + int NumPackedHandles() const; // It's called on a packed TensorHandle. Extract a handle with the given // index. Status ExtractPackedHandle(const int index, TensorHandle** handle) const; @@ -263,6 +268,9 @@ class TensorHandle : public AbstractTensorHandleInterface, // If the tensor dtype is DT_RESOURCE, resource_device_ holds the device // backing the resource. Else resource_device_ is nullptr. tensorflow::Device* const resource_device_; + // Incarnation ID of the resource device if it locates on a remote device, or + // 0 if it locates on a local device. + const int64 resource_remote_device_incarnation_; mutable mutex mu_; @@ -316,6 +324,8 @@ class TensorHandle : public AbstractTensorHandleInterface, void Poison(Status status); string DebugString() const; + // Number of packed handles. + int NumPackedHandles() const; // Extract a handle on the given index. Status ExtractPackedHandle(const int index, TensorHandle** handle) const; diff --git a/tensorflow/core/common_runtime/eager/tensor_handle_test.cc b/tensorflow/core/common_runtime/eager/tensor_handle_test.cc index c823b6aa9b0..779158375de 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle_test.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/random.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -66,17 +67,28 @@ TEST(TensorHandle_ShapeTest, AsyncShape) { ctx->Unref(); } -static Device* CreateDevice(const char* type, const char* name) { +static Device* CreateDevice(const char* type, const char* name, + bool is_local = true) { class FakeDevice : public Device { public: - explicit FakeDevice(const DeviceAttributes& attr) : Device(nullptr, attr) {} + explicit FakeDevice(const DeviceAttributes& attr, bool is_local) + : Device(nullptr, attr), is_local_(is_local) {} Status Sync() override { return Status::OK(); } Allocator* GetAllocator(AllocatorAttributes) override { return nullptr; } + bool IsLocal() const override { return is_local_; } + + private: + const bool is_local_; }; DeviceAttributes attr; attr.set_name(name); attr.set_device_type(type); - return new FakeDevice(attr); + int64 incarnation = random::New64(); + while (incarnation == 0) { + incarnation = random::New64(); + } + attr.set_incarnation(incarnation); + return new FakeDevice(attr, is_local); } } // namespace @@ -164,6 +176,7 @@ TEST_F(PackedTensorHandleTest, PackedHandle) { h2->Unref(); h3->Unref(); + EXPECT_EQ(packed_handle->NumPackedHandles(), 4); EXPECT_EQ(packed_handle->Type(), TensorHandle::PACKED); EXPECT_EQ(packed_handle->dtype, dtype); TensorShape packed_shape; @@ -185,7 +198,7 @@ TEST_F(PackedTensorHandleTest, PackedHandle) { const std::vector expected_handle_types = { TensorHandle::LOCAL, TensorHandle::LOCAL, TensorHandle::REMOTE, TensorHandle::REMOTE}; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < packed_handle->NumPackedHandles(); ++i) { TensorHandle* h = nullptr; TF_ASSERT_OK(packed_handle->ExtractPackedHandle(i, &h)); EXPECT_EQ(absl::get(h->device()), ListDevices().at(i)); @@ -203,4 +216,87 @@ TEST_F(PackedTensorHandleTest, PackedHandle) { packed_handle->Unref(); } +TEST(TensorHandle_ResourceDeviceTest, OnLocalDevice) { + std::unique_ptr d0( + CreateDevice("CPU", "/job:localhost/replica:0/task:0/device:CPU:0")); + StaticDeviceMgr local_device_mgr(std::move(d0)); + auto ctx = new EagerContext( + SessionOptions(), + tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, + tensorflow::ContextMirroringPolicy::MIRRORING_NONE, false, false, + &local_device_mgr, false, nullptr, nullptr, nullptr); + + tensorflow::DataType dtype = DT_RESOURCE; + TensorShape shape = {2}; + Tensor t(dtype, shape); + + Device* d = local_device_mgr.ListDevices()[0]; + TensorHandle* th = + TensorHandle::CreateLocalHandle(std::move(t), d, d, d, ctx); + // Remote device incarnation for local resource should be 0 (invalid) + EXPECT_EQ(0, th->resource_remote_device_incarnation()); + // Local device manager must contain the resource device. + EXPECT_TRUE(local_device_mgr.ContainsDevice( + th->resource_device()->attributes().incarnation())); + + std::unique_ptr d1( + CreateDevice("CPU", "/job:localhost/replica:0/task:0/device:CPU:0")); + StaticDeviceMgr new_device_mgr(std::move(d1)); + EXPECT_FALSE(new_device_mgr.ContainsDevice( + th->resource_device()->attributes().incarnation())); + + th->Unref(); + ctx->Unref(); +} + +TEST(TensorHandle_ResourceDeviceTest, OnRemoteDevice) { + std::unique_ptr d_local( + CreateDevice("CPU", "/job:localhost/replica:0/task:0/device:CPU:0")); + StaticDeviceMgr local_device_mgr(std::move(d_local)); + auto ctx = new EagerContext( + SessionOptions(), + tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, + tensorflow::ContextMirroringPolicy::MIRRORING_NONE, false, false, + &local_device_mgr, false, nullptr, nullptr, nullptr); + + std::unique_ptr d0( + CreateDevice("CPU", "/job:worker/task:0/device:CPU:0", false)); + Device* d0_ptr = d0.get(); + std::unique_ptr d1( + CreateDevice("CPU", "/job:worker/task:1/device:CPU:0", false)); + Device* d1_ptr = d1.get(); + + DynamicDeviceMgr remote_device_mgr; + std::vector> vector_d0; + vector_d0.emplace_back(std::move(d0)); + TF_ASSERT_OK(remote_device_mgr.AddDevices(std::move(vector_d0))); + + TensorHandle* th0 = TensorHandle::CreateUnshapedRemoteHandle( + 0, 0, "", DT_RESOURCE, d0_ptr, ctx); + EXPECT_TRUE(remote_device_mgr.ContainsDevice( + th0->resource_remote_device_incarnation())); + + std::vector> vector_d1; + vector_d1.emplace_back(std::move(d1)); + TF_ASSERT_OK(remote_device_mgr.AddDevices(std::move(vector_d1))); + EXPECT_TRUE(remote_device_mgr.ContainsDevice( + th0->resource_remote_device_incarnation())); + + TensorHandle* th1 = TensorHandle::CreateUnshapedRemoteHandle( + 0, 0, "", DT_RESOURCE, d1_ptr, ctx); + EXPECT_TRUE(remote_device_mgr.ContainsDevice( + th1->resource_remote_device_incarnation())); + + std::vector remove_d1{d1_ptr}; + TF_ASSERT_OK(remote_device_mgr.RemoveDevices(std::move(remove_d1))); + EXPECT_FALSE(remote_device_mgr.ContainsDevice( + th1->resource_remote_device_incarnation())); + EXPECT_TRUE(remote_device_mgr.ContainsDevice( + th0->resource_remote_device_incarnation())); + + th0->Unref(); + th1->Unref(); + ctx->Unref(); +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 1f2a364258f..447a9e0ae77 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -403,7 +403,7 @@ ExecutorState::ExecutorState( runner_(args.runner), sync_on_finish_(args.sync_on_finish), run_all_kernels_inline_(args.run_all_kernels_inline), - propagator_(immutable_state, step_id_), + propagator_(immutable_state, step_id_, vlog_), num_outstanding_ops_(0) { if (args.user_intra_op_threadpool != nullptr) { Device* device = immutable_state_.params().device; @@ -811,16 +811,14 @@ template Status ExecutorState::PrepareInputs( const NodeItem& item, Entry* first_input, TensorValueVec* inputs, AllocatorAttributeVec* input_alloc_attrs, bool* is_input_dead) { - inputs->clear(); inputs->resize(item.num_inputs); - input_alloc_attrs->clear(); input_alloc_attrs->resize(item.num_inputs); *is_input_dead = false; - bool is_merge = item.is_merge; for (int i = 0; i < item.num_inputs; ++i) { - const bool expect_ref = IsRefType(item.input_type(i)); + const bool expect_ref = TF_PREDICT_FALSE(item.is_any_input_ref_typed) && + IsRefType(item.input_type(i)); Entry* entry = first_input + i; (*input_alloc_attrs)[i] = entry->alloc_attr; @@ -830,7 +828,10 @@ Status ExecutorState::PrepareInputs( switch (entry->state) { case Entry::State::NO_VALUE: { // Only merge and transfer nodes can have no-value inputs. - if (!is_merge) { + inp->mutex_if_ref = nullptr; + if (item.is_merge) { + inp->tensor = nullptr; + } else { DCHECK(item.is_transfer_node) << item.kernel->name() << " - input " << i; entry->state = Entry::State::HAS_CONST_TENSOR; @@ -846,17 +847,18 @@ Status ExecutorState::PrepareInputs( } case Entry::State::HAS_VALUE: { - if (expect_ref) { + if (TF_PREDICT_FALSE(expect_ref)) { return AttachDef( errors::InvalidArgument(i, "-th input expects a ref type"), item.kernel->def()); } + inp->mutex_if_ref = nullptr; inp->tensor = entry->val.get(); break; } case Entry::State::HAS_CONST_TENSOR: { - if (expect_ref) { + if (TF_PREDICT_FALSE(expect_ref)) { return AttachDef( errors::InvalidArgument(i, "-th input expects a ref type"), item.kernel->def()); @@ -865,6 +867,7 @@ Status ExecutorState::PrepareInputs( // stores a non-const `Tensor*`, and relies on the `OpKernelContext` // accessors making dynamic checks that prevent using an immutable // tensor as a mutable tensor. + inp->mutex_if_ref = nullptr; inp->tensor = const_cast(entry->const_tensor); break; } @@ -872,8 +875,8 @@ Status ExecutorState::PrepareInputs( case Entry::State::HAS_REF_TENSOR: { { tf_shared_lock ml(*entry->ref_tensor.mu); - if (!entry->ref_tensor.tensor->IsInitialized() && - !item.is_initialization_op) { + if (TF_PREDICT_FALSE(!entry->ref_tensor.tensor->IsInitialized() && + !item.is_initialization_op)) { return AttachDef(errors::FailedPrecondition( "Attempting to use uninitialized value ", item.kernel->requested_input(i)), @@ -896,12 +899,13 @@ Status ExecutorState::PrepareInputs( } entry->state = Entry::State::HAS_VALUE; + inp->mutex_if_ref = nullptr; inp->tensor = entry->val.get(); // The dtype of entry->ref_tensor.tensor could have been changed by // another operation that ran after the operation that "produced" it // executed, so re-validate that the type of the dereferenced tensor // matches the expected input type. - if (item.input_type(i) != inp->tensor->dtype()) { + if (TF_PREDICT_FALSE(item.input_type(i) != inp->tensor->dtype())) { return AttachDef( errors::InvalidArgument( i, "-th input expects type ", diff --git a/tensorflow/core/common_runtime/executor_test.cc b/tensorflow/core/common_runtime/executor_test.cc index 9a1b7cff813..dd65b5dce1d 100644 --- a/tensorflow/core/common_runtime/executor_test.cc +++ b/tensorflow/core/common_runtime/executor_test.cc @@ -549,7 +549,8 @@ BENCHMARK(BM_FeedInputFetchOutput); // // ...using the functional `WhileOp` (if `lower` is false) or the // `Switch`/`Merge`-style of control flow (if `lower` is true). -static void BM_WhileLoopHelper(int iters, int loop_iters, bool lower) { +static void BM_WhileLoopHelper(int iters, int loop_iters, int loop_vars, + bool lower) { testing::StopTiming(); std::unique_ptr graph(new Graph(OpRegistry::Global())); @@ -558,20 +559,44 @@ static void BM_WhileLoopHelper(int iters, int loop_iters, bool lower) { // Define the loop body as a function: `x = x + 1`. const Tensor one_t = test::AsScalar(1); + + std::vector args; + args.reserve(loop_vars); + args.push_back("x: int32"); + for (int i = 1; i < loop_vars; ++i) { + args.push_back(strings::StrCat("x", i, ": int32")); + } + + std::vector body_rets; + body_rets.reserve(loop_vars); + body_rets.push_back("y: int32"); + for (int i = 1; i < loop_vars; ++i) { + body_rets.push_back(strings::StrCat("y", i, ": int32")); + } + + std::vector body_nodes; + body_nodes.reserve(1 + loop_vars); + body_nodes.push_back( + {{"one"}, "Const", {}, {{"value", one_t}, {"dtype", DT_INT32}}}); + body_nodes.push_back({{"y"}, "Add", {"x", "one"}, {{"T", DT_INT32}}}); + for (int i = 1; i < loop_vars; ++i) { + body_nodes.push_back({{strings::StrCat("y", i)}, + "Identity", + {strings::StrCat("x", i)}, + {{"T", DT_INT32}}}); + } + *f_lib_proto.add_function() = FunctionDefHelper::Define( // Name "XPlusOne", // Args - {"x: int32"}, + args, // Return values - {"y: int32"}, + body_rets, // Attr def {}, // Nodes - { - {{"one"}, "Const", {}, {{"value", one_t}, {"dtype", DT_INT32}}}, - {{"y"}, "Add", {"x", "one"}, {{"T", DT_INT32}}}, - }); + body_nodes); // Define the loop condition as a function: `x < loop_iters`. const Tensor loop_iters_t = test::AsScalar(loop_iters); @@ -579,7 +604,7 @@ static void BM_WhileLoopHelper(int iters, int loop_iters, bool lower) { // Name "LessThanOrEqualToN", // Args - {"x: int32"}, + args, // Return values {"z: bool"}, // Attr def @@ -594,7 +619,12 @@ static void BM_WhileLoopHelper(int iters, int loop_iters, bool lower) { TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto)); auto a = ops::Const(root.WithOpName("A"), 0, {}); Node* while_node; - std::vector inputs({NodeBuilder::NodeOut(a.node())}); + std::vector inputs; + std::vector input_types(loop_vars, DT_INT32); + inputs.reserve(loop_vars); + for (int i = 0; i < loop_vars; ++i) { + inputs.push_back(NodeBuilder::NodeOut(a.node())); + } AttrValue int32_attr; int32_attr.set_type(DT_INT32); AttrValue cond_func; @@ -604,7 +634,7 @@ static void BM_WhileLoopHelper(int iters, int loop_iters, bool lower) { TF_ASSERT_OK( NodeBuilder("while", "While", &root.graph()->flib_def()) .Input(inputs) - .Attr("T", {DT_INT32}) + .Attr("T", input_types) .Attr("cond", cond_func) .Attr("body", body_func) .Attr("parallel_iterations", 100) @@ -635,21 +665,33 @@ static void BM_WhileLoopHelper(int iters, int loop_iters, bool lower) { test::Benchmark("cpu", graph.release()).Run(iters); } -static void BM_LoweredWhileLoop(int iters, int loop_iters) { - BM_WhileLoopHelper(iters, loop_iters, /* lower= */ true); +static void BM_LoweredWhileLoop(int iters, int loop_iters, int loop_vars) { + BM_WhileLoopHelper(iters, loop_iters, loop_vars, /* lower= */ true); } -BENCHMARK(BM_LoweredWhileLoop)->Arg(0); -BENCHMARK(BM_LoweredWhileLoop)->Arg(1); -BENCHMARK(BM_LoweredWhileLoop)->Arg(10); -BENCHMARK(BM_LoweredWhileLoop)->Arg(100); -BENCHMARK(BM_LoweredWhileLoop)->Arg(1000); +BENCHMARK(BM_LoweredWhileLoop) + ->ArgPair(0, 1) + ->ArgPair(1, 1) + ->ArgPair(10, 1) + ->ArgPair(100, 1) + ->ArgPair(1000, 1) + ->ArgPair(0, 100) + ->ArgPair(1, 100) + ->ArgPair(10, 100) + ->ArgPair(100, 100) + ->ArgPair(1000, 100); -static void BM_FunctionalWhileLoop(int iters, int loop_iters) { - BM_WhileLoopHelper(iters, loop_iters, /* lower= */ false); +static void BM_FunctionalWhileLoop(int iters, int loop_iters, int loop_vars) { + BM_WhileLoopHelper(iters, loop_iters, loop_vars, /* lower= */ false); } -BENCHMARK(BM_FunctionalWhileLoop)->Arg(0); -BENCHMARK(BM_FunctionalWhileLoop)->Arg(1); -BENCHMARK(BM_FunctionalWhileLoop)->Arg(10); -BENCHMARK(BM_FunctionalWhileLoop)->Arg(100); -BENCHMARK(BM_FunctionalWhileLoop)->Arg(1000); +BENCHMARK(BM_FunctionalWhileLoop) + ->ArgPair(0, 1) + ->ArgPair(1, 1) + ->ArgPair(10, 1) + ->ArgPair(100, 1) + ->ArgPair(1000, 1) + ->ArgPair(0, 100) + ->ArgPair(1, 100) + ->ArgPair(10, 100) + ->ArgPair(100, 100) + ->ArgPair(1000, 100); } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/graph_view.cc b/tensorflow/core/common_runtime/graph_view.cc index 7db0781551d..7a63e06814a 100644 --- a/tensorflow/core/common_runtime/graph_view.cc +++ b/tensorflow/core/common_runtime/graph_view.cc @@ -191,9 +191,11 @@ char* GraphView::InitializeNode(char* ptr, const Node* n) { DCHECK_LT(DataType_MAX, 255); // Must fit in uint8 uint8* input_types = item->input_type_base(); + item->is_any_input_ref_typed = false; for (int i = 0; i < num_inputs; i++) { input_types[i] = static_cast(n->input_type(i)); DCHECK_EQ(item->input_type(i), n->input_type(i)); + item->is_any_input_ref_typed |= IsRefType(n->input_type(i)); } // Check ScopedAllocatorAttrs and forward_from. Also assign output_types. diff --git a/tensorflow/core/common_runtime/graph_view.h b/tensorflow/core/common_runtime/graph_view.h index 6d31555ed9a..38eb3e33bcb 100644 --- a/tensorflow/core/common_runtime/graph_view.h +++ b/tensorflow/core/common_runtime/graph_view.h @@ -81,6 +81,8 @@ struct NodeItem { // of any output edge is a // merge or control trigger // node. + bool is_any_input_ref_typed : 1; // True iff any IsRefType(dt) for dt in this + // node's input types. // The kernel for this node. OpKernel* kernel = nullptr; diff --git a/tensorflow/core/common_runtime/immutable_executor_state.cc b/tensorflow/core/common_runtime/immutable_executor_state.cc index a98d9f0feaa..03d12a0e98a 100644 --- a/tensorflow/core/common_runtime/immutable_executor_state.cc +++ b/tensorflow/core/common_runtime/immutable_executor_state.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/graph/edgeset.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_node_util.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { @@ -39,9 +40,6 @@ ImmutableExecutorState::~ImmutableExecutorState() { params_.delete_kernel(item->kernel); } } - for (auto fiter : frame_info_) { - delete fiter.second; - } } namespace { @@ -71,11 +69,16 @@ void GetMaxPendingCounts(const Node* n, size_t* max_pending, ImmutableExecutorState::FrameInfo* ImmutableExecutorState::EnsureFrameInfo( const string& fname) { - auto slot = &frame_info_[fname]; - if (*slot == nullptr) { - *slot = new FrameInfo; + auto iter = frame_info_.find(fname); + if (iter != frame_info_.end()) { + return iter->second.get(); + } else { + auto frame_info = absl::make_unique(fname); + absl::string_view fname_view = frame_info->name; + auto emplace_result = + frame_info_.emplace(fname_view, std::move(frame_info)); + return emplace_result.first->second.get(); } - return *slot; } Status ImmutableExecutorState::Initialize(const Graph& graph) { @@ -89,7 +92,7 @@ Status ImmutableExecutorState::Initialize(const Graph& graph) { EnsureFrameInfo(it)->nodes = absl::make_unique>(); } - root_frame_info_ = frame_info_[""]; + root_frame_info_ = frame_info_[""].get(); pending_ids_.resize(gview_.num_nodes()); @@ -157,6 +160,28 @@ Status ImmutableExecutorState::Initialize(const Graph& graph) { TF_RETURN_IF_ERROR( GetNodeAttr(n->attrs(), "is_constant", &is_constant_enter)); item->is_constant_enter = is_constant_enter; + + string frame_name; + TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "frame_name", &frame_name)); + FrameInfo* frame_info = frame_info_[frame_name].get(); + + int parallel_iterations; + TF_RETURN_IF_ERROR( + GetNodeAttr(n->attrs(), "parallel_iterations", ¶llel_iterations)); + + if (frame_info->parallel_iterations == -1) { + frame_info->parallel_iterations = parallel_iterations; + } else if (frame_info->parallel_iterations != parallel_iterations) { + LOG(WARNING) << "Loop frame \"" << frame_name + << "\" had two different values for parallel_iterations: " + << frame_info->parallel_iterations << " vs. " + << parallel_iterations << "."; + } + + if (enter_frame_info_.size() <= id) { + enter_frame_info_.resize(id + 1); + } + enter_frame_info_[id] = frame_info; } else { item->is_constant_enter = false; } diff --git a/tensorflow/core/common_runtime/immutable_executor_state.h b/tensorflow/core/common_runtime/immutable_executor_state.h index 50c98939ea8..a35edfe227c 100644 --- a/tensorflow/core/common_runtime/immutable_executor_state.h +++ b/tensorflow/core/common_runtime/immutable_executor_state.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/common_runtime/graph_view.h" #include "tensorflow/core/common_runtime/local_executor_params.h" #include "tensorflow/core/common_runtime/pending_counts.h" @@ -41,11 +42,16 @@ class Graph; class ImmutableExecutorState { public: struct FrameInfo { - FrameInfo() - : input_count(0), + explicit FrameInfo(string name) + : name(std::move(name)), + input_count(0), total_inputs(0), pending_counts(nullptr), - nodes(nullptr) {} + nodes(nullptr), + parallel_iterations(-1) {} + + // The name of the frame. + string name; // The total number of inputs to a frame. int input_count; @@ -63,6 +69,9 @@ class ImmutableExecutorState { // The nodes in a frame. Used only for debugging. std::unique_ptr> nodes; + + // The number of iterations of this frame that can execute concurrently. + int32 parallel_iterations; }; explicit ImmutableExecutorState(const LocalExecutorParams& p) @@ -83,17 +92,13 @@ class ImmutableExecutorState { } const std::vector& root_nodes() const { return root_nodes_; } - const FrameInfo* get_frame_info(const string& frame_name) const { - auto it_frame_info = frame_info_.find(frame_name); - if (it_frame_info == frame_info_.end()) { - return nullptr; - } else { - return it_frame_info->second; - } - } - const FrameInfo& get_root_frame_info() const { return *root_frame_info_; } + const FrameInfo& get_enter_frame_info(const NodeItem& node_item) const { + DCHECK(node_item.is_enter); + return *enter_frame_info_[node_item.node_id]; + } + bool requires_control_flow_support() const { return requires_control_flow_; } // Copies the pending counts for nodes in this graph to the given array. @@ -135,9 +140,14 @@ class ImmutableExecutorState { // Mapping from frame name to static information about the frame. // TODO(yuanbyu): We could cache it along with the graph so to avoid // the overhead of constructing it for each executor instance. - gtl::FlatMap frame_info_; + absl::flat_hash_map> + frame_info_; const FrameInfo* root_frame_info_; // Not owned. + // If the graph contains any "Enter" or "RefEnter" nodes, this vector maps + // dense node IDs to the corresponding FrameInfo. + std::vector enter_frame_info_; + // If `requires_control_flow_` is false, this points to an array of initial // pending counts for the nodes in the graph, indexed by node ID. std::unique_ptr[]> atomic_pending_counts_; diff --git a/tensorflow/core/common_runtime/lower_while_op.cc b/tensorflow/core/common_runtime/lower_while_op.cc index e9d322721f2..90fdc886c50 100644 --- a/tensorflow/core/common_runtime/lower_while_op.cc +++ b/tensorflow/core/common_runtime/lower_while_op.cc @@ -238,12 +238,14 @@ Status LowerWhileHelper::CreateEnterNodes() { TF_RETURN_IF_ERROR(while_op_->input_edges(&edges)); for (const Edge* edge : edges) { Node* enter_node; - NodeBuilder builder = NodeBuilder(NewName("enter"), "Enter", - graph_->op_registry(), &debug_info_) - .Input(NodeOut(edge->src(), edge->src_output())) - .Attr("frame_name", name_) - .Attr("parallel_iterations", parallel_iterations_) - .Device(while_op_->requested_device()); + NodeBuilder builder = + NodeBuilder(NewName("enter"), "Enter", graph_->op_registry(), + &debug_info_) + .Input(NodeOut(edge->src(), edge->src_output())) + .Attr("frame_name", name_) + .Attr("parallel_iterations", parallel_iterations_) + .Device(edge->src()->requested_device()) + .AssignedDevice(edge->src()->assigned_device_name()); if (IsResource(edge->dst_input())) { builder.Attr("is_constant", true); } @@ -282,7 +284,8 @@ Status LowerWhileHelper::CreateMergeNodes() { NodeBuilder(NewName("merge"), "Merge", graph_->op_registry(), &debug_info_) .Input({NodeOut(enter_node, 0), NodeOut(enter_node, 0)}) - .Device(while_op_->requested_device()) + .Device(enter_node->requested_device()) + .AssignedDevice(enter_node->assigned_device_name()) .Finalize(graph_, &merge_node)); merge_nodes_.emplace_back(merge_node); } @@ -323,21 +326,19 @@ Status LowerWhileHelper::CreateSwitchNodes() { TF_RETURN_IF_ERROR(while_op_->input_node(i, &input_node)); op_name = strings::StrCat(input_node->name(), "_switch"); } + Node* merge_node = merge_nodes_[op_input_output_to_lowered_node_[i]]; Node* switch_node; string op_type = "Switch"; - if (IsRefType( - merge_nodes_[op_input_output_to_lowered_node_[i]]->output_type( - 0))) { + if (IsRefType(merge_node->output_type(0))) { op_type = "RefSwitch"; } - TF_RETURN_IF_ERROR( - NodeBuilder(NewName(op_name), op_type, graph_->op_registry(), - &debug_info_) - .Input( - NodeOut(merge_nodes_[op_input_output_to_lowered_node_[i]], 0)) - .Input(NodeOut(loop_cond_node_, 0)) - .Device(while_op_->requested_device()) - .Finalize(graph_, &switch_node)); + TF_RETURN_IF_ERROR(NodeBuilder(NewName(op_name), op_type, + graph_->op_registry(), &debug_info_) + .Input(NodeOut(merge_node, 0)) + .Input(NodeOut(loop_cond_node_, 0)) + .Device(merge_node->requested_device()) + .AssignedDevice(merge_node->assigned_device_name()) + .Finalize(graph_, &switch_node)); switch_nodes_.emplace_back(switch_node); } return Status::OK(); @@ -392,7 +393,10 @@ Status LowerWhileHelper::CreateExitNodes() { &debug_info_) .Input(NodeOut(switch_nodes_[op_input_output_to_lowered_node_[i]], 0)) - .Device(while_op_->requested_device()) + .Device(switch_nodes_[op_input_output_to_lowered_node_[i]] + ->requested_device()) + .AssignedDevice(switch_nodes_[op_input_output_to_lowered_node_[i]] + ->assigned_device_name()) .Finalize(graph_, &exit_node)); exit_nodes_.emplace_back(exit_node); outputs.emplace_back(NodeOut(exit_node, 0)); @@ -440,11 +444,13 @@ Status LowerWhileHelper::CreateNextIterationNodes() { if (IsResource(i)) { continue; } + Node* merge_node = merge_nodes_[op_input_output_to_lowered_node_[i]]; TF_RETURN_IF_ERROR(NodeBuilder(NewName("next_iteration"), "NextIteration", graph_->op_registry(), &debug_info_) .Input(NodeOut(body_call_node_, i)) .ControlInput(body_call_node_) - .Device(while_op_->requested_device()) + .Device(merge_node->requested_device()) + .AssignedDevice(merge_node->assigned_device_name()) .Finalize(graph_, &next_iteration)); next_iterations_nodes_.emplace_back(next_iteration); } diff --git a/tensorflow/core/common_runtime/lower_while_op_test.cc b/tensorflow/core/common_runtime/lower_while_op_test.cc index 0fc005cfb6f..9d7870f891d 100644 --- a/tensorflow/core/common_runtime/lower_while_op_test.cc +++ b/tensorflow/core/common_runtime/lower_while_op_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "absl/strings/match.h" #include "tensorflow/cc/client/client_session.h" #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/ops/array_ops.h" @@ -25,6 +26,7 @@ limitations under the License. #include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -169,6 +171,238 @@ TEST(LowerWhileOpTest, Simple) { } } +TEST(LowerWhileOpTest, ForwardAssignedInputDevice) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + // Add test functions for cond and body. + FunctionDefLibrary f_lib_proto; + *f_lib_proto.add_function() = test::function::XTimesTwo(); + *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8); + + TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto)); + auto type = DT_FLOAT; + Node* placeholder; + TF_CHECK_OK(NodeBuilder("placed_node", "Placeholder") + .Attr("dtype", type) + .Finalize(graph.get(), &placeholder)); + const string assigned_device_name = "/job:localhost/replica:0/task:0/gpu:0"; + placeholder->set_assigned_device_name(assigned_device_name); + Node* while_node; + std::vector inputs({NodeBuilder::NodeOut(placeholder)}); + AttrValue cond_func; + cond_func.mutable_func()->set_name("LessThanOrEqualToN"); + AttrValue body_func; + body_func.mutable_func()->set_name("XTimesTwo"); + TF_ASSERT_OK( + NodeBuilder("while", "While", &graph->flib_def()) + .Input(inputs) + .Attr("T", {type}) + .Attr("cond", cond_func) + .Attr("body", body_func) + .Attr("parallel_iterations", 100) + .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true) + .Finalize(graph.get(), &while_node)); + TF_ASSERT_OK(Rewrite(&graph)); + + const Node* placeholder_node = nullptr; + for (const auto* op : graph->op_nodes()) { + if (op->name() == "placed_node") { + placeholder_node = op; + } + } + ASSERT_NE(placeholder_node, nullptr); + // Verify the assigned device of the Enter node. + int enter_consumers = 0; + const Node* enter_node = nullptr; + for (const Node* consumer : placeholder_node->out_nodes()) { + if (consumer->type_string() == "Enter") { + enter_consumers += 1; + enter_node = consumer; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(enter_consumers, 1); + // Verify the assigned device of the Merge node. + int merge_consumers = 0; + const Node* merge_node = nullptr; + for (const Node* consumer : enter_node->out_nodes()) { + if (consumer->type_string() == "Merge") { + merge_consumers += 1; + merge_node = consumer; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(merge_consumers, 1); + // Verify the assigned device of the NextIteration node. + int next_iteration_consumers = 0; + for (const Node* consumer : merge_node->in_nodes()) { + if (consumer->type_string() == "NextIteration") { + next_iteration_consumers += 1; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(next_iteration_consumers, 1); + // Verify the assigned device of the Switch node. + int switch_consumers = 0; + const Node* switch_node = nullptr; + for (const Node* consumer : merge_node->out_nodes()) { + if (consumer->type_string() == "Switch") { + switch_consumers += 1; + switch_node = consumer; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(switch_consumers, 1); + // Verify the assigned device of the Exit node. + int exit_consumers = 0; + for (const Node* consumer : switch_node->out_nodes()) { + if (consumer->type_string() == "Exit") { + exit_consumers += 1; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(exit_consumers, 1); +} + +TEST(LowerWhileOpTest, ForwardRequestedInputDevice) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + // Add test functions for cond and body. + FunctionDefLibrary f_lib_proto; + *f_lib_proto.add_function() = test::function::XTimesTwo(); + *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8); + + TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto)); + auto type = DT_FLOAT; + // We will place the loop var on the gpu:0. + const string gpu_0_device = "/job:localhost/replica:0/task:0/gpu:0"; + // We will place loop's control input on the gpu:1. + const string gpu_1_device = "/job:localhost/replica:0/task:0/gpu:1"; + // We will place While op on gpu:2. + const string gpu_2_device = "/job:localhost/replica:0/task:0/gpu:2"; + Node* gpu_0_ph; + TF_CHECK_OK(NodeBuilder("placed_node", "Placeholder") + .Attr("dtype", type) + .Device(gpu_0_device) + .Finalize(graph.get(), &gpu_0_ph)); + Node* control_in; + // Add a control input to the While op to trigger the creation of a + // LoopExecuted node. + TF_CHECK_OK(NodeBuilder("control_in", "Placeholder") + .Attr("dtype", type) + .Device(gpu_1_device) + .Finalize(graph.get(), &control_in)); + Node* while_node; + std::vector inputs({NodeBuilder::NodeOut(gpu_0_ph)}); + AttrValue cond_func; + cond_func.mutable_func()->set_name("LessThanOrEqualToN"); + AttrValue body_func; + body_func.mutable_func()->set_name("XTimesTwo"); + TF_ASSERT_OK( + NodeBuilder("while", "While", &graph->flib_def()) + .Input(inputs) + .ControlInput(control_in) + .Device(gpu_2_device) + .Attr("T", {type}) + .Attr("cond", cond_func) + .Attr("body", body_func) + .Attr("parallel_iterations", 100) + .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true) + .Finalize(graph.get(), &while_node)); + + // Create an empty Const node with control dep from the While op. + // This triggers the creation of a LoopExecuted node. + Node* control_out; + TensorProto proto; + proto.set_dtype(DT_FLOAT); + TensorShape empty_shape({0}); + empty_shape.AsProto(proto.mutable_tensor_shape()); + TF_ASSERT_OK(NodeBuilder("control_out", "Const") + .ControlInput(while_node) + .Attr("dtype", DT_FLOAT) + .Attr("value", proto) + .Finalize(graph.get(), &control_out)); + + TF_ASSERT_OK(Rewrite(&graph)); + + const Node* placeholder_node = nullptr; + for (const auto* op : graph->op_nodes()) { + if (op->name() == "placed_node") { + placeholder_node = op; + } + } + ASSERT_NE(placeholder_node, nullptr); + // Verify the requested device of the Enter node. + int enter_consumers = 0; + const Node* enter_node = nullptr; + for (const Node* consumer : placeholder_node->out_nodes()) { + if (consumer->type_string() == "Enter") { + enter_consumers += 1; + enter_node = consumer; + ASSERT_EQ(consumer->requested_device(), gpu_0_device); + } + } + ASSERT_EQ(enter_consumers, 1); + // Verify the requested device of the Merge node. + int merge_consumers = 0; + const Node* merge_node = nullptr; + for (const Node* consumer : enter_node->out_nodes()) { + if (consumer->type_string() == "Merge") { + merge_consumers += 1; + merge_node = consumer; + ASSERT_EQ(consumer->requested_device(), gpu_0_device); + } + } + ASSERT_EQ(merge_consumers, 1); + // Verify the requested device of the NextIteration node. + int next_iteration_consumers = 0; + for (const Node* consumer : merge_node->in_nodes()) { + if (consumer->type_string() == "NextIteration") { + next_iteration_consumers += 1; + ASSERT_EQ(consumer->requested_device(), gpu_0_device); + } + } + ASSERT_EQ(next_iteration_consumers, 1); + // Verify the requested device of the Switch node. + int switch_consumers = 0; + const Node* switch_node = nullptr; + for (const Node* consumer : merge_node->out_nodes()) { + if (consumer->type_string() == "Switch") { + switch_consumers += 1; + switch_node = consumer; + ASSERT_EQ(consumer->requested_device(), gpu_0_device); + } + } + ASSERT_EQ(switch_consumers, 1); + // Verify the requested device of the Exit node. + int exit_consumers = 0; + for (const Node* consumer : switch_node->out_nodes()) { + if (consumer->type_string() == "Exit") { + exit_consumers += 1; + ASSERT_EQ(consumer->requested_device(), gpu_0_device); + } + } + ASSERT_EQ(exit_consumers, 1); + // Verify the requested device of LoopControlInputs. + const Node* loop_control_inputs_node = nullptr; + for (const auto* op : graph->op_nodes()) { + if (absl::StrContains(op->name(), "LoopControlInputs")) { + loop_control_inputs_node = op; + } + } + ASSERT_NE(loop_control_inputs_node, nullptr); + ASSERT_EQ(loop_control_inputs_node->requested_device(), gpu_2_device); + // Verify the requested device of LoopExecuted. + const Node* loop_executed_node = nullptr; + for (const auto* op : graph->op_nodes()) { + if (absl::StrContains(op->name(), "LoopExecuted")) { + loop_executed_node = op; + } + } + ASSERT_NE(loop_executed_node, nullptr); + ASSERT_EQ(loop_executed_node->requested_device(), gpu_2_device); +} + TEST(LowerWhileOpTest, MultipleInputs) { std::unique_ptr graph(new Graph(OpRegistry::Global())); diff --git a/tensorflow/core/common_runtime/propagator_state.cc b/tensorflow/core/common_runtime/propagator_state.cc index 4fd5e0f97d9..a6639b1132e 100644 --- a/tensorflow/core/common_runtime/propagator_state.cc +++ b/tensorflow/core/common_runtime/propagator_state.cc @@ -16,31 +16,33 @@ limitations under the License. #include "tensorflow/core/common_runtime/propagator_state.h" #include "tensorflow/core/common_runtime/graph_view.h" +#include "tensorflow/core/common_runtime/immutable_executor_state.h" #include "tensorflow/core/common_runtime/propagator_debug_utils.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/platform/hash.h" #include "tensorflow/core/profiler/lib/traceme.h" namespace tensorflow { PropagatorState::PropagatorState(const ImmutableExecutorState& immutable_state, - int64 step_id) + int64 step_id, bool vlog) : immutable_state_(immutable_state), step_id_(step_id), - vlog_(VLOG_IS_ON(1)) { + vlog_(vlog || VLOG_IS_ON(1)) { // We start the entire execution in iteration 0 of the root frame // so let us create the root frame and the state for iteration 0. // We assume root_frame_->frame_name.empty(). root_frame_ = new FrameState(immutable_state_, 1); root_frame_->frame_id = 0; // must be 0 - root_frame_->InitializeFrameInfo(root_frame_->frame_name); + root_frame_->InitializeFrameInfo(immutable_state_.get_root_frame_info()); // Initialize iteration 0. root_frame_->SetIteration( 0, new PropagatorState::IterationState(0, root_frame_->pending_counts, root_frame_->total_input_tensors)); - outstanding_frames_.insert({root_frame_->frame_name, root_frame_}); + outstanding_frames_.emplace(root_frame_->frame_id, root_frame_); } PropagatorState::~PropagatorState() { @@ -224,16 +226,16 @@ void PropagatorState::FindOrCreateChildFrame(FrameState* frame, const NodeItem& node_item, FrameState** child) { // Get the child frame name. - AttrSlice attrs(node_item.kernel->def()); - const string& enter_name = GetNodeAttrString(attrs, "frame_name"); - DCHECK(!enter_name.empty()) << "Could not find \"frame_name\" attr in node " - << node_item.kernel->name(); - const string child_name = strings::StrCat( - frame->frame_name, ";", iter_state->iter_num, ";", enter_name); + const ImmutableExecutorState::FrameInfo& frame_info = + immutable_state_.get_enter_frame_info(node_item); + + const uint64 child_id = Hash64Combine( + frame->frame_id, + Hash64Combine(iter_state->iter_num, Hash64(frame_info.name))); { - mutex_lock executor_lock(mu_); - auto it = outstanding_frames_.find(child_name); + tf_shared_lock executor_lock(mu_); + auto it = outstanding_frames_.find(child_id); if (it != outstanding_frames_.end()) { *child = it->second; return; @@ -242,20 +244,18 @@ void PropagatorState::FindOrCreateChildFrame(FrameState* frame, // Need to create a new frame instance. // Note that this new frame instance is created without any locks. - if (vlog_) VLOG(2) << "Create frame: " << child_name; + if (vlog_) { + const string child_name = strings::StrCat( + frame->frame_name, ";", iter_state->iter_num, ";", frame_info.name); + VLOG(2) << "Create frame: " << child_name << " id: " << child_id; + } - int parallel_iters; - bool found_parallel_iters = - TryGetNodeAttr(attrs, "parallel_iterations", ¶llel_iters); - DCHECK(found_parallel_iters) - << "Could not find \"parallel_iterations\" attr in node " - << node_item.kernel->name(); - FrameState* temp = new FrameState(immutable_state_, parallel_iters); - temp->frame_name = child_name; - temp->frame_id = Hash64(child_name); + FrameState* temp = + new FrameState(immutable_state_, frame_info.parallel_iterations); + temp->frame_id = child_id; temp->parent_frame = frame; temp->parent_iter = iter_state; - temp->InitializeFrameInfo(enter_name); + temp->InitializeFrameInfo(frame_info); // Initialize iteration 0. { @@ -266,13 +266,13 @@ void PropagatorState::FindOrCreateChildFrame(FrameState* frame, { mutex_lock executor_lock(mu_); - auto it = outstanding_frames_.find(child_name); + auto it = outstanding_frames_.find(child_id); if (it != outstanding_frames_.end()) { *child = it->second; } else { mutex_lock frame_lock(frame->mu); iter_state->outstanding_frame_count++; - outstanding_frames_[child_name] = temp; + outstanding_frames_[child_id] = temp; *child = temp; temp = nullptr; } @@ -349,11 +349,10 @@ void PropagatorState::DeleteFrame(FrameState* frame, TaggedNodeSeq* ready) { } // Delete the frame. - const string& frame_name = frame->frame_name; - if (vlog_) VLOG(2) << "Delete frame " << frame_name; + if (vlog_) VLOG(2) << "Delete frame " << frame->frame_id; { mutex_lock executor_lock(mu_); - outstanding_frames_.erase(frame_name); + outstanding_frames_.erase(frame->frame_id); } delete frame; } @@ -655,14 +654,11 @@ bool PropagatorState::FrameState::CleanupIterations(IterationState* iter_state, } void PropagatorState::FrameState::InitializeFrameInfo( - const string& enter_name) { - const ImmutableExecutorState::FrameInfo* finfo = - immutable_state.get_frame_info(enter_name); - DCHECK_NE(finfo, nullptr); - pending_counts = finfo->pending_counts.get(); - total_input_tensors = finfo->total_inputs; - num_pending_inputs = finfo->input_count; - nodes = finfo->nodes.get(); + const ImmutableExecutorState::FrameInfo& finfo) { + pending_counts = finfo.pending_counts.get(); + total_input_tensors = finfo.total_inputs; + num_pending_inputs = finfo.input_count; + nodes = finfo.nodes.get(); } void PropagatorState::FrameState::SetIteration(int64 iter, diff --git a/tensorflow/core/common_runtime/propagator_state.h b/tensorflow/core/common_runtime/propagator_state.h index 459e28a83ee..167519ccc73 100644 --- a/tensorflow/core/common_runtime/propagator_state.h +++ b/tensorflow/core/common_runtime/propagator_state.h @@ -45,7 +45,8 @@ typedef gtl::InlinedVector AllocatorAttributeVec; // adding them to a `TaggedNodeSeq`. class PropagatorState { public: - PropagatorState(const ImmutableExecutorState& immutable_state, int64 step_id); + PropagatorState(const ImmutableExecutorState& immutable_state, int64 step_id, + bool vlog); ~PropagatorState(); private: @@ -279,7 +280,7 @@ class PropagatorState { // during structured traversal: parent_frame->mu < mu. mutex mu; - void InitializeFrameInfo(const string& enter_name); + void InitializeFrameInfo(const ImmutableExecutorState::FrameInfo& finfo); inline IterationState* GetIteration(int64 iter) TF_EXCLUSIVE_LOCKS_REQUIRED(mu) { @@ -447,12 +448,13 @@ class PropagatorState { // The root frame in which the execution of this step is started. FrameState* root_frame_; - // Mapping from frame name to outstanding frames. A new frame is created + // Mapping from frame ID to outstanding frames. A new frame is created // at some iteration of an active frame. So the unique key for the new - // child frame is composed of the name of the parent frame, the iteration + // child frame is a hash composed of the ID of the parent frame, the iteration // number at which the parent frame is creating the new frame, and the // name of the new frame from nodedef. - gtl::FlatMap outstanding_frames_ TF_GUARDED_BY(mu_); + absl::flat_hash_map outstanding_frames_ + TF_GUARDED_BY(mu_); TF_DISALLOW_COPY_AND_ASSIGN(PropagatorState); }; diff --git a/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc b/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc index 3609a5e7e1f..cfbcde82ce2 100644 --- a/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc +++ b/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc @@ -195,6 +195,9 @@ Status ReplicatePerReplicaNodesInFunctionGraph( for (Node* n : graph->op_nodes()) { if (composite_device_names.find(n->assigned_device_name()) != composite_device_names.end()) { + // TODO(b/145922293): Validate that an _Arg node assigned to a + // CompositeDevice should have an attribute indicating that the _Arg node + // represents a packed input. composite_device_to_cluster_nodes[n->assigned_device_name()].push_back(n); } } diff --git a/tensorflow/core/common_runtime/simple_propagator_state.cc b/tensorflow/core/common_runtime/simple_propagator_state.cc index 48fac96dd3d..01322cc3514 100644 --- a/tensorflow/core/common_runtime/simple_propagator_state.cc +++ b/tensorflow/core/common_runtime/simple_propagator_state.cc @@ -23,16 +23,16 @@ limitations under the License. namespace tensorflow { SimplePropagatorState::SimplePropagatorState( - const ImmutableExecutorState& immutable_state, int64 step_id) + const ImmutableExecutorState& immutable_state, int64 step_id, bool vlog) : SimplePropagatorState(immutable_state, step_id, - immutable_state.get_root_frame_info()) {} + immutable_state.get_root_frame_info(), vlog) {} SimplePropagatorState::SimplePropagatorState( const ImmutableExecutorState& immutable_state, int64 step_id, - const ImmutableExecutorState::FrameInfo& finfo) + const ImmutableExecutorState::FrameInfo& finfo, bool vlog) : immutable_state_(immutable_state), step_id_(step_id), - vlog_(VLOG_IS_ON(1)), + vlog_(vlog || VLOG_IS_ON(1)), input_tensors_(finfo.total_inputs), pending_( new std::atomic[immutable_state.graph_view().num_nodes()]), diff --git a/tensorflow/core/common_runtime/simple_propagator_state.h b/tensorflow/core/common_runtime/simple_propagator_state.h index 1aee4c7ff2f..024341e5048 100644 --- a/tensorflow/core/common_runtime/simple_propagator_state.h +++ b/tensorflow/core/common_runtime/simple_propagator_state.h @@ -47,7 +47,7 @@ namespace tensorflow { class SimplePropagatorState { public: SimplePropagatorState(const ImmutableExecutorState& immutable_state, - int64 step_id); + int64 step_id, bool vlog); ~SimplePropagatorState(); // A `TaggedNode` corresponds to a single invocation of a node's kernel, @@ -157,7 +157,8 @@ class SimplePropagatorState { private: SimplePropagatorState(const ImmutableExecutorState& immutable_state_, int64 step_id, - const ImmutableExecutorState::FrameInfo& finfo); + const ImmutableExecutorState::FrameInfo& finfo, + bool vlog); const ImmutableExecutorState& immutable_state_; const int64 step_id_; diff --git a/tensorflow/core/data/service/master_impl.cc b/tensorflow/core/data/service/master_impl.cc index 6e2c95c475e..336ab068c40 100644 --- a/tensorflow/core/data/service/master_impl.cc +++ b/tensorflow/core/data/service/master_impl.cc @@ -169,7 +169,11 @@ Status DataServiceMasterImpl::GetOrCreateJob( if (job != nullptr) { TF_RETURN_IF_ERROR(ValidateMatchingJob(**job, requested_processing_mode, request->dataset_id())); - response->set_job_id((*job)->job_id()); + int64 job_id = (*job)->job_id(); + response->set_job_id(job_id); + VLOG(3) << "Found existing job for name=" << request->job_name() + << ", index=" << request->job_name_index() + << ". job_id: " << job_id; return Status::OK(); } int64 job_id; @@ -177,6 +181,8 @@ Status DataServiceMasterImpl::GetOrCreateJob( request->job_name(), &job_id)); named_jobs_[key] = jobs_[job_id]; response->set_job_id(job_id); + VLOG(3) << "Created job " << job_id << " for dataset " + << request->dataset_id() << " and name " << request->job_name(); return Status::OK(); } diff --git a/tensorflow/core/data/service/master_impl.h b/tensorflow/core/data/service/master_impl.h index de25ea0d6a8..e8b70e84d0f 100644 --- a/tensorflow/core/data/service/master_impl.h +++ b/tensorflow/core/data/service/master_impl.h @@ -75,7 +75,7 @@ class DataServiceMasterImpl { } std::string DebugString() { - return absl::StrCat("id: ", worker_id_, "address: ", address_); + return absl::StrCat("id: ", worker_id_, " address: ", address_); } private: diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index 7395244a569..8d00825227b 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -84,6 +84,7 @@ Status DataServiceWorkerImpl::ProcessTask(const ProcessTaskRequest* request, Status DataServiceWorkerImpl::ProcessTaskInternal(const TaskDef& task_def) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + VLOG(3) << "Received request to process task " << task_def.task_id(); standalone::Dataset::Params params; std::unique_ptr dataset; TF_RETURN_IF_ERROR(standalone::Dataset::FromGraph( @@ -100,6 +101,7 @@ Status DataServiceWorkerImpl::ProcessTaskInternal(const TaskDef& task_def) task.id = task_def.task_id(); task.dataset = std::move(dataset); task.iterator = std::move(iterator); + VLOG(3) << "Began processing for task " << task_def.task_id(); return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc index 92e92f47356..9930bb86e6b 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc @@ -728,7 +728,9 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncTest) { core::RefCountPtr kernel = nullptr; const int64 op_id = 2; kernel.reset(new KernelAndDeviceFunc( - flr, eager_pflr_.get(), std::move(input_dev_ptrs), {}, /*runner=*/nullptr, + flr, eager_pflr_.get(), std::move(input_dev_ptrs), + /*composite_devices=*/{}, /*input_resource_dtypes_and_shapes=*/{}, + /*runner=*/nullptr, /*collective_executor=*/nullptr, local_device, fdef_.signature().name(), [ctx](const int64 step_id) { return ctx->CreateRendezvous(step_id); }, [=]() { return op_id; })); @@ -773,7 +775,9 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncAsyncTest) { core::RefCountPtr kernel = nullptr; const int64 op_id = 2; kernel.reset(new KernelAndDeviceFunc( - flr, eager_pflr_.get(), std::move(input_dev_ptrs), {}, /*runner=*/nullptr, + flr, eager_pflr_.get(), std::move(input_dev_ptrs), + /*composite_devices=*/{}, /*input_resource_dtypes_and_shapes=*/{}, + /*runner=*/nullptr, /*collective_executor=*/nullptr, local_device, fdef_.signature().name(), [ctx](const int64 step_id) { return ctx->CreateRendezvous(step_id); }, [=]() { return op_id; })); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index 32083fc272f..25aa5f3480c 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -70,6 +70,18 @@ class NoReusePortOption : public ::grpc::ServerBuilderOption { plugins) override {} }; +// Define an option subclass in order to enable SO_REUSEPORT for the +// server socket. +class ReusePortOption : public ::grpc::ServerBuilderOption { + public: + void UpdateArguments(::grpc::ChannelArguments* args) override { + args->SetInt(GRPC_ARG_ALLOW_REUSEPORT, 1); + } + + void UpdatePlugins(std::vector>* + plugins) override {} +}; + // static utility function RendezvousMgrInterface* NewRpcRendezvousMgr(const WorkerEnv* env) { return new RpcRendezvousMgr(env); @@ -220,8 +232,18 @@ Status GrpcServer::Init(const GrpcServerOptions& opts) { GetServerCredentials(server_def_), &bound_port_); builder.SetMaxMessageSize(std::numeric_limits::max()); - builder.SetOption( - std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption)); + bool reuse_port = false; + const Status status = + ReadBoolFromEnvVar("TF_GRPC_REUSE_PORT", false, &reuse_port); + if (!status.ok()) { + LOG(ERROR) << status.error_message(); + } + auto server_build_option = + reuse_port + ? std::unique_ptr<::grpc::ServerBuilderOption>(new ReusePortOption) + : std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption); + builder.SetOption(std::move(server_build_option)); + // Allow subclasses to specify more args to pass to the gRPC server. MaybeMutateBuilder(&builder); master_impl_ = CreateMaster(&master_env_); diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index 1c3b64f4a0d..97ac9dd35ae 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -142,7 +142,31 @@ class Node { metrics_(name_), output_(args.output.get()) {} - virtual ~Node() { FlushMetrics(); } + virtual ~Node() { + // Clear the sub-nodes instead of relying on implicit shared pointer + // destructor to avoid potential stack overflow when the tree is deep. + std::deque> queue; + { + mutex_lock l(mu_); + while (inputs_.size() > 0) { + queue.push_back(inputs_.front()); + inputs_.pop_front(); + } + } + while (!queue.empty()) { + auto node = queue.back(); + queue.pop_back(); + { + mutex_lock l(node->mu_); + while (node->inputs_.size() > 0) { + queue.push_back(node->inputs_.front()); + node->inputs_.pop_front(); + } + } + } + + FlushMetrics(); + } // Adds an input. void add_input(std::shared_ptr node) TF_LOCKS_EXCLUDED(mu_) { diff --git a/tensorflow/core/framework/shape_inference_testutil.h b/tensorflow/core/framework/shape_inference_testutil.h index 40a6d53d223..361f7ed13c1 100644 --- a/tensorflow/core/framework/shape_inference_testutil.h +++ b/tensorflow/core/framework/shape_inference_testutil.h @@ -16,6 +16,7 @@ limitations under the License. #define TENSORFLOW_CORE_FRAMEWORK_SHAPE_INFERENCE_TESTUTIL_H_ #include + #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/lib/core/status.h" @@ -90,7 +91,7 @@ class ShapeInferenceTestutil { ::tensorflow::shape_inference::ShapeInferenceTestutil::InferShapes( \ op, i, "e") \ .error_message(); \ - const std::string& substring = error_substring; \ + const std::string substring = error_substring; \ EXPECT_NE("", error_message); \ EXPECT_TRUE(absl::StrContains(error_message, substring)) \ << "Expected to see '" << substring << "' in '" << error_message \ diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 0b8846faf05..b880055b47d 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -531,7 +531,10 @@ cc_library( tf_cuda_cc_test( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], - tags = ["no_cuda_on_cpu_tap"], # Do not re-enable again without actually testing. + tags = [ + "no_cuda_on_cpu_tap", # Do not re-enable again without actually testing. + "no_windows", # b/56402646 + ], deps = [ ":gpu_swapping_kernels", ":gpu_swapping_ops", diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index f47265f6334..cd0d44e8e12 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -114,12 +114,12 @@ FunctionDefLibrary GetFunctionDefLibraryStub( } uint64 DeadlineMicroSeconds(const RewriterConfig& cfg) { - const uint64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000; + const uint64 kTwentyMinutesInUsec = 20 * 60 * 1000 * 1000; if (cfg.meta_optimizer_timeout_ms() < 0) { return 0; } else { return cfg.meta_optimizer_timeout_ms() == 0 - ? Env::Default()->NowMicros() + kFiveMinutesInUsec + ? Env::Default()->NowMicros() + kTwentyMinutesInUsec : Env::Default()->NowMicros() + cfg.meta_optimizer_timeout_ms() * 1000; } diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index e47c681bb61..7cfb6fcae67 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -7096,7 +7096,7 @@ cc_library( build_test( name = "android_tensorflow_kernels_build_test", - targets = [":android_tensorflow_kernels"], + targets = [":portable_tensorflow_kernels"], ) cc_library( @@ -7109,7 +7109,7 @@ cc_library( "//tensorflow/core:android_gif_internal", "//tensorflow/core:android_jpeg_internal", "//tensorflow/core:android_png_internal", - "//tensorflow/core:android_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", ], alwayslink = 1, ) @@ -7126,7 +7126,7 @@ cc_library( linkopts = ["-ldl"], visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:android_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", ], alwayslink = 1, ) diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/kernels/cuda_sparse.h index eb69469b615..2d41cc72421 100644 --- a/tensorflow/core/kernels/cuda_sparse.h +++ b/tensorflow/core/kernels/cuda_sparse.h @@ -259,7 +259,7 @@ class GpuSparse { // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-coo2csr. Status Coo2csr(const int* cooRowInd, int nnz, int m, int* csrRowPtr) const; -#if CUDA_VERSION < 10020 +#if (GOOGLE_CUDA && (CUDA_VERSION < 10020)) || TENSORFLOW_USE_ROCM // Sparse-dense matrix multiplication C = alpha * op(A) * op(B) + beta * C, // where A is a sparse matrix in CSR format, B and C are dense tall // matrices. This routine allows transposition of matrix B, which @@ -311,7 +311,7 @@ class GpuSparse { // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csrmv_mergepath // // **NOTE** This is an in-place operation for data in y. -#if CUDA_VERSION < 10020 +#if (GOOGLE_CUDA && (CUDA_VERSION < 10020)) || TENSORFLOW_USE_ROCM template Status Csrmv(gpusparseOperation_t transA, int m, int n, int nnz, const Scalar* alpha_host, const gpusparseMatDescr_t descrA, @@ -366,7 +366,7 @@ class GpuSparse { Scalar* csrSortedValC, int* csrSortedRowPtrC, int* csrSortedColIndC, void* workspace); -#if CUDA_VERSION >= 10000 +#if GOOGLE_CUDA && (CUDA_VERSION >= 10000) // Computes sparse-sparse matrix multiplication of matrices // stored in CSR format. This is part zero: calculate required workspace // size. @@ -383,7 +383,7 @@ class GpuSparse { // output. csrSortedRowPtrC must be preallocated on device with // m + 1 entries. See: // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csrgemm. -#if CUDA_VERSION < 10000 +#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM Status CsrgemmNnz(gpusparseOperation_t transA, gpusparseOperation_t transB, int m, int k, int n, const gpusparseMatDescr_t descrA, int nnzA, const int* csrSortedRowPtrA, @@ -408,7 +408,7 @@ class GpuSparse { // addition. csrValC and csrColIndC must be allocated on the device // with nnzTotalDevHostPtr entries (as calculated by CsrgemmNnz). See: // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csrgemm. -#if CUDA_VERSION < 10000 +#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM template Status Csrgemm(gpusparseOperation_t transA, gpusparseOperation_t transB, int m, int k, int n, const gpusparseMatDescr_t descrA, diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index d8ae7190a7f..70ca70176e8 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -63,7 +63,10 @@ class AnonymousResourceOp : public OpKernel { if (create_deleter_) { Tensor* deleter_t; - OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &deleter_t)); + AllocatorAttributes attr; + attr.set_on_host(true); + OP_REQUIRES_OK( + ctx, ctx->allocate_output(1, TensorShape({}), &deleter_t, attr)); deleter_t->scalar()() = ResourceDeleter(handle, ctx->resource_manager()); } diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index d61c574cb35..4ddfd99951c 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -138,6 +138,7 @@ tf_kernel_library( "//tensorflow/core/kernels/data:dataset_utils", "//tensorflow/core/kernels/data:name_utils", "//tensorflow/core/kernels/data:serialization_utils", + "//tensorflow/core/profiler/lib:traceme", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc index 8c336686deb..697f4d99a1e 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc @@ -37,6 +37,7 @@ limitations under the License. #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/snappy.h" +#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/protobuf/error_codes.pb.h" namespace tensorflow { @@ -178,7 +179,10 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { class Iterator : public DatasetIterator { public: explicit Iterator(const Params& params, int64 iterator_index) - : DatasetIterator(params), iterator_index_(iterator_index) {} + : DatasetIterator(params), + iterator_index_(iterator_index), + max_outstanding_requests_(params.dataset->max_outstanding_requests_) { + } ~Iterator() override { mutex_lock l(mu_); @@ -390,21 +394,23 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { // TODO(aaudibert): add backoff and max retries. int64 deadline_micros = Env::Default()->NowMicros() + kRetryTimeoutMicros; - Status s = FetchElement(task_thread, deadline_micros); + Status s = GetElement(task_thread, deadline_micros); if (!s.ok()) { - LOG(WARNING) << "Failed to fetch element from worker at " + LOG(WARNING) << "Failed to get element from worker at " << task_thread->address << ": " << s; } } } - // Fetches an element from a task and adds the element to `results_`. + // Gets an element from a task and adds the element to `results_`. // // If the task reaches end_of_sequence or is cancelled (e.g. due to a - // worker dying), FetchElement returns Status::OK() without adding to + // worker dying), GetElement returns Status::OK() without adding to // `results_`. - Status FetchElement(TaskThread* task_thread, int64 deadline_micros) { - VLOG(3) << "Fetching an element for task id " << task_thread->task_id; + Status GetElement(TaskThread* task_thread, int64 deadline_micros) { + VLOG(3) << "Getting an element for task id " << task_thread->task_id; + tensorflow::profiler::TraceMe activity( + "GetElement", tensorflow::profiler::TraceMeLevel::kInfo); CompressedElement compressed; bool end_of_sequence; for (int num_retries = 0;; ++num_retries) { @@ -453,7 +459,7 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { } results_.push(std::move(element)); cv_.notify_all(); - VLOG(3) << "Fetched an element for task id " << task_thread->task_id; + VLOG(3) << "Got an element for task id " << task_thread->task_id; return Status::OK(); } diff --git a/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc index 111afa218df..e813de70931 100644 --- a/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc @@ -38,8 +38,12 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel { explicit Dataset(OpKernelContext* ctx, DatasetBase* input) : DatasetBase(DatasetContext(ctx)), input_(input) { input_->Ref(); + batch_size_ = -1; for (const PartialTensorShape& shape : input->output_shapes()) { if (!shape.unknown_rank()) { + if (batch_size_ < 0 && shape.dim_size(0) >= 0) { + batch_size_ = shape.dim_size(0); + } gtl::InlinedVector partial_dim_sizes; for (int i = 1; i < shape.dims(); ++i) { partial_dim_sizes.push_back(shape.dim_size(i)); @@ -69,6 +73,17 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel { string DebugString() const override { return "UnbatchDatasetOp::Dataset"; } + int64 Cardinality() const override { + int64 n = input_->Cardinality(); + if (n == kInfiniteCardinality || n == kUnknownCardinality) { + return n; + } + if (batch_size_ > 0) { + return n * batch_size_; + } + return kUnknownCardinality; + } + Status CheckExternalState() const override { return input_->CheckExternalState(); } @@ -222,6 +237,8 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel { const DatasetBase* const input_; std::vector shapes_; + // batch_size_ may or may not be known, with -1 as unknown + int64 batch_size_; }; }; diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index a2e8ca13192..9fb3c5fb46e 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -1102,9 +1102,8 @@ REGISTER_KERNEL_BUILDER( MakeIteratorOp); REGISTER_KERNEL_BUILDER(Name("DeleteIterator").Device(DEVICE_CPU).Priority(2), DeleteIteratorOp); -REGISTER_KERNEL_BUILDER( - Name("DeleteIterator").Device(DEVICE_GPU).HostMemory("deleter").Priority(1), - DeleteIteratorOp); +REGISTER_KERNEL_BUILDER(Name("DeleteIterator").Device(DEVICE_GPU).Priority(1), + DeleteIteratorOp); REGISTER_KERNEL_BUILDER( Name("AnonymousIterator").Device(DEVICE_CPU).Priority(2), AnonymousIteratorHandleOp); @@ -1116,7 +1115,6 @@ REGISTER_KERNEL_BUILDER( AnonymousIteratorHandleOp); REGISTER_KERNEL_BUILDER(Name("AnonymousIteratorV2") .Device(DEVICE_GPU) - .HostMemory("deleter") .Priority(1), AnonymousIteratorHandleOp); REGISTER_KERNEL_BUILDER(Name("DatasetToSingleElement").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index 5dae096d5b5..7b8f697d2d3 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -621,6 +621,11 @@ class ParallelMapIterator : public DatasetBaseIterator { return false; } if (!deterministic_) { + // Iterate through in-flight results and returns the first one that is + // found to be available and not end-of-input. If the first result (in + // order) is end-of-input, we know that all earlier iterations have + // already been completed, so it is safe to return that result for the + // caller to process end of iteration. for (auto it = invocation_results_.begin(); it != invocation_results_.end(); ++it) { if ((*it)->notification.HasBeenNotified() && diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h index 00356778026..42364e416ea 100644 --- a/tensorflow/core/kernels/debug_ops.h +++ b/tensorflow/core/kernels/debug_ops.h @@ -435,9 +435,9 @@ class DebugIdentityV2Op : public OpKernel { for (const string& dump_root : dump_roots_) { tfdbg::DebugEventsWriter* debug_events_writer = tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root); - debug_events_writer->WriteGraphExecutionTrace( - tfdbg_context_id_, device_name_, op_name_, output_slot_, - tensor_debug_mode_, tensor); + OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace( + tfdbg_context_id_, device_name_, op_name_, + output_slot_, tensor_debug_mode_, tensor)); } context->set_output(0, tensor); } diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc index 0f5a7019b1f..3b38daf0067 100644 --- a/tensorflow/core/kernels/dequantize_op.cc +++ b/tensorflow/core/kernels/dequantize_op.cc @@ -61,7 +61,9 @@ class DequantizeOp : public OpKernel { " is '" + DataTypeString(ctx->output_type(0)) + "'")); + need_cast_ = true; if (ctx->output_type(0) == DT_FLOAT) { + need_cast_ = false; OP_REQUIRES(ctx, (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST" || mode_string == "SCALED"), @@ -98,8 +100,9 @@ class DequantizeOp : public OpKernel { } Tensor* output = nullptr; - Tensor float_output = tensorflow::Tensor(DT_FLOAT, input.shape()); OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); + Tensor float_output = + need_cast_ ? tensorflow::Tensor(DT_FLOAT, input.shape()) : *output; if (num_slices == 1) { const float min_range = input_min_tensor.flat()(0); const float max_range = input_max_tensor.flat()(0); @@ -128,10 +131,12 @@ class DequantizeOp : public OpKernel { max_ranges(i), output_tensor.template chip<1>(i)); } } - S* out_ptr = output->flat().data(); - float* in_ptr = float_output.flat().data(); - for (int64 i = 0; i < float_output.NumElements(); ++i) { - out_ptr[i] = static_cast(in_ptr[i]); + if (need_cast_) { + S* out_ptr = output->flat().data(); + float* in_ptr = float_output.flat().data(); + for (int64 i = 0; i < float_output.NumElements(); ++i) { + out_ptr[i] = static_cast(in_ptr[i]); + } } } @@ -219,6 +224,7 @@ class DequantizeOp : public OpKernel { int mode_; int axis_; bool narrow_range_; + bool need_cast_; }; REGISTER_KERNEL_BUILDER(Name("Dequantize") diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h index 855506e9d8a..37fc1b3ae08 100644 --- a/tensorflow/core/kernels/list_kernels.h +++ b/tensorflow/core/kernels/list_kernels.h @@ -435,8 +435,10 @@ class TensorListConcat : public OpKernel { for (int i = 0; i < tensor_list->tensors().size(); i++) { const Tensor& element_tensor = tensor_list->tensors()[i]; if (element_tensor.dtype() != DT_INVALID) { - inputs_flat.emplace_back(new typename TTypes::ConstMatrix( - element_tensor.shaped({1, element_tensor.NumElements()}))); + if (element_tensor.NumElements() > 0) { + inputs_flat.emplace_back(new typename TTypes::ConstMatrix( + element_tensor.shaped({1, element_tensor.NumElements()}))); + } } else { AllocatorAttributes attr; if (element_dtype_ == DT_VARIANT) { diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index a85f3f449fd..3045fd050d5 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -245,6 +245,7 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle, run_opts.source_device = lib->device() == nullptr ? "" : lib->device()->name(); run_opts.allow_dead_tensors = true; + run_opts.rendezvous = ctx->rendezvous(); std::vector* rets = new std::vector; const string& func_name = func_->name(); diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc index 93830515040..3d94fe1b6a5 100644 --- a/tensorflow/core/kernels/sendrecv_ops.cc +++ b/tensorflow/core/kernels/sendrecv_ops.cc @@ -92,14 +92,16 @@ void SendOp::Compute(OpKernelContext* ctx) { FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_); if (frame_iter == FrameAndIter(0, 0)) { // Use the cached rendezvous key. - VLOG(2) << "Send " << parsed_key_.buf_; + VLOG(2) << "Send " << parsed_key_.buf_ << " using " + << reinterpret_cast(ctx->rendezvous()); ctx->SetStatus(ctx->rendezvous()->Send(parsed_key_, args, ctx->input(0), ctx->is_input_dead())); return; } else { Rendezvous::ParsedKey in_loop_parsed; GetRendezvousKey(key_prefix_, frame_iter, &in_loop_parsed.buf_); - VLOG(2) << "Send " << in_loop_parsed.buf_; + VLOG(2) << "Send " << in_loop_parsed.buf_ << " using " + << reinterpret_cast(ctx->rendezvous()); OP_REQUIRES_OK(ctx, Rendezvous::ParseKey(in_loop_parsed.buf_, &in_loop_parsed)); @@ -200,13 +202,15 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_); if (frame_iter == FrameAndIter(0, 0)) { - VLOG(2) << "Recv " << parsed_key_.buf_; + VLOG(2) << "Recv " << parsed_key_.buf_ << " using " + << reinterpret_cast(ctx->rendezvous()); ctx->rendezvous()->RecvAsync(parsed_key_, args, make_recv_callback(ctx, std::move(done))); } else { Rendezvous::ParsedKey in_loop_parsed; GetRendezvousKey(key_prefix_, frame_iter, &in_loop_parsed.buf_); - VLOG(2) << "Recv " << in_loop_parsed.buf_; + VLOG(2) << "Recv " << in_loop_parsed.buf_ << " using " + << reinterpret_cast(ctx->rendezvous()); OP_REQUIRES_OK_ASYNC( ctx, Rendezvous::ParseKey(in_loop_parsed.buf_, &in_loop_parsed), done); ctx->rendezvous()->RecvAsync(in_loop_parsed, args, diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc index a0834800446..50fa0ec88ea 100644 --- a/tensorflow/core/kernels/sparse/mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc @@ -728,12 +728,14 @@ namespace { template struct GPUDataType; +// GPUDataType templates are currently not instantiated in the ROCm flow +// So leaving out the #elif TENSORFLOW_USE_ROCM blocks for now +// hipblas library is not (yet) being pulled in via rocm_configure.bzl +// so cannot reference tyeps from hipblas headers here template <> struct GPUDataType { #if GOOGLE_CUDA static constexpr cudaDataType_t type = CUDA_R_16F; -#elif TENSORFLOW_USE_ROCM - static constexpr hipblasDataType_t type = HIPBLAS_R_16F; #endif }; @@ -741,8 +743,6 @@ template <> struct GPUDataType { #if GOOGLE_CUDA static constexpr cudaDataType_t type = CUDA_R_32F; -#elif TENSORFLOW_USE_ROCM - static constexpr hipblasDataType_t type = HIPBLAS_R_32F; #endif }; @@ -750,8 +750,6 @@ template <> struct GPUDataType> { #if GOOGLE_CUDA static constexpr cudaDataType_t type = CUDA_C_32F; -#elif TENSORFLOW_USE_ROCM - static constexpr hipblasDataType_t type = HIPBLAS_C_32F; #endif }; @@ -759,8 +757,6 @@ template <> struct GPUDataType { #if GOOGLE_CUDA static constexpr cudaDataType_t type = CUDA_R_64F; -#elif TENSORFLOW_USE_ROCM - static constexpr hipblasDataType_t type = HIPBLAS_R_64F; #endif }; @@ -768,8 +764,6 @@ template <> struct GPUDataType> { #if GOOGLE_CUDA static constexpr cudaDataType_t type = CUDA_C_64F; -#elif TENSORFLOW_USE_ROCM - static constexpr hipblasDataType_t type = HIPBLAS_C_64F; #endif }; @@ -957,7 +951,7 @@ class CSRSparseMatrixMatVec { const int n = a.dense_shape_host(1); const int nnz = a.values.size(); DCHECK_EQ(nnz, a.col_ind.size()); -#if CUDA_VERSION >= 10020 +#if GOOGLE_CUDA && (CUDA_VERSION >= 10020) TF_RETURN_IF_ERROR(cuda_sparse.Csrmv(transA_, m, n, nnz, &alpha, a.values.data(), a.row_ptr.data(), a.col_ind.data(), x, &beta, y)); diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc index 7325d5f6873..fb652e13d15 100644 --- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc @@ -417,7 +417,7 @@ class CSRSparseMatMulGPUOp : public OpKernel { } auto b_input_dense_shape = b_input_matrix->dense_shape().vec(); -#if CUDA_VERSION >= 10000 +#if GOOGLE_CUDA && (CUDA_VERSION >= 10000) size_t maxWorkspaceSize = 0; for (int i = 0; i < batch_size; ++i) { // Calculate maximum workspace size over batch. @@ -558,7 +558,7 @@ struct CSRSparseSparseMatrixMatMul initialized_(false), transpose_a_(transpose_a), adjoint_a_(adjoint_a), -#if CUDA_VERSION < 10000 +#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM transpose_b_(transpose_b) { #else transpose_b_(transpose_b), @@ -573,7 +573,7 @@ struct CSRSparseSparseMatrixMatMul : GPUSPARSE(OPERATION_NON_TRANSPOSE); } -#if CUDA_VERSION >= 10000 +#if GOOGLE_CUDA && (CUDA_VERSION >= 10000) ~CSRSparseSparseMatrixMatMul() { if (initialized_) { cusparseDestroyCsrgemm2Info(info_); @@ -591,7 +591,7 @@ struct CSRSparseSparseMatrixMatMul TF_RETURN_IF_ERROR(descrA_.Initialize()); TF_RETURN_IF_ERROR(descrB_.Initialize()); TF_RETURN_IF_ERROR(descrC_.Initialize()); -#if CUDA_VERSION >= 10000 +#if GOOGLE_CUDA && (CUDA_VERSION >= 10000) TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateCsrgemm2Info(&info_)); #endif initialized_ = true; @@ -600,6 +600,7 @@ struct CSRSparseSparseMatrixMatMul Status GetWorkspaceSize(const ConstCSRComponent& a, const ConstCSRComponent& b, size_t* bufferSize) { +#if GOOGLE_CUDA && (CUDA_VERSION >= 10000) DCHECK(initialized_); const int m = a.dense_shape_host(a.dense_shape_host.size() - (transpose_a_ ? 1 : 2)); @@ -621,6 +622,7 @@ struct CSRSparseSparseMatrixMatMul m, n, k, descrA_.descr(), nnzA, a.row_ptr.data(), a.col_ind.data(), descrB_.descr(), nnzB, b.row_ptr.data(), b.col_ind.data(), info_, bufferSize)); +#endif return Status::OK(); } @@ -650,7 +652,7 @@ struct CSRSparseSparseMatrixMatMul *output_nnz = -1; -#if CUDA_VERSION < 10000 +#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM TF_RETURN_IF_ERROR(cuda_sparse_.CsrgemmNnz( transA_, transB_, m, n, k, descrA_.descr(), nnzA, a.row_ptr.data(), a.col_ind.data(), descrB_.descr(), nnzB, b.row_ptr.data(), @@ -693,7 +695,7 @@ struct CSRSparseSparseMatrixMatMul b.dense_shape_host(b.dense_shape_host.size() - (transpose_b_ ? 2 : 1)); DCHECK_EQ(n, c->dense_shape_host(c->dense_shape_host.size() - 1)); -#if CUDA_VERSION < 10000 +#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM TF_RETURN_IF_ERROR(cuda_sparse_.Csrgemm( transA_, transB_, m, k, n, descrA_.descr(), nnzA, a.values.data(), a.row_ptr.data(), a.col_ind.data(), descrB_.descr(), nnzB, @@ -732,7 +734,7 @@ struct CSRSparseSparseMatrixMatMul GpuSparseMatrixDescriptor descrC_; gpusparseOperation_t transA_; gpusparseOperation_t transB_; -#if CUDA_VERSION >= 10000 +#if GOOGLE_CUDA && (CUDA_VERSION >= 10000) csrgemm2Info_t info_; #endif }; diff --git a/tensorflow/core/lib/core/BUILD b/tensorflow/core/lib/core/BUILD index 80ad4943f16..491e4c5e7aa 100644 --- a/tensorflow/core/lib/core/BUILD +++ b/tensorflow/core/lib/core/BUILD @@ -138,10 +138,13 @@ tf_proto_library( cc_api_version = 2, make_default_target_header_only = True, protodeps = [ - "//tensorflow/core:error_codes_proto_impl", + "//tensorflow/core/protobuf:error_codes_proto_impl", ], - visibility = ["//tensorflow/core:__subpackages__"], - exports = ["//tensorflow/core:error_codes_proto_impl"], + visibility = [ + "//tensorflow/core:__subpackages__", + "//tensorflow/core/protobuf:__subpackages__", + ], + exports = ["//tensorflow/core/protobuf:error_codes_proto_impl"], ) # Export source files needed for mobile builds, which do not use granular targets. diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 819f8fcdadb..c7ff378d2ac 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -621,7 +621,7 @@ cc_library( ":stringpiece", ":stringprintf", ":types", - "//tensorflow/core:error_codes_proto_impl_cc", + "//tensorflow/core/protobuf:error_codes_proto_impl_cc", "@com_google_absl//absl/base", ], ) diff --git a/tensorflow/core/platform/build_config.bzl b/tensorflow/core/platform/build_config.bzl index f0613cdc069..ab452562245 100644 --- a/tensorflow/core/platform/build_config.bzl +++ b/tensorflow/core/platform/build_config.bzl @@ -26,6 +26,7 @@ load( _tf_platform_alias = "tf_platform_alias", _tf_platform_deps = "tf_platform_deps", _tf_portable_deps_no_runtime = "tf_portable_deps_no_runtime", + _tf_portable_proto_lib = "tf_portable_proto_lib", _tf_proto_library = "tf_proto_library", _tf_proto_library_cc = "tf_proto_library_cc", _tf_proto_library_py = "tf_proto_library_py", @@ -65,6 +66,7 @@ tf_lib_proto_parsing_deps = _tf_lib_proto_parsing_deps tf_logging_deps = _tf_logging_deps tf_platform_alias = _tf_platform_alias tf_platform_deps = _tf_platform_deps +tf_portable_proto_lib = _tf_portable_proto_lib tf_portable_deps_no_runtime = _tf_portable_deps_no_runtime tf_proto_library = _tf_proto_library tf_proto_library_cc = _tf_proto_library_cc diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 18a8285ece1..2dc4fdc0fd9 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -577,8 +577,8 @@ def tf_additional_all_protos(): def tf_protos_all_impl(): return [ - clean_dep("//tensorflow/core:autotuning_proto_cc_impl"), - clean_dep("//tensorflow/core:conv_autotuning_proto_cc_impl"), + clean_dep("//tensorflow/core/protobuf:autotuning_proto_cc_impl"), + clean_dep("//tensorflow/core/protobuf:conv_autotuning_proto_cc_impl"), clean_dep("//tensorflow/core:protos_all_cc_impl"), ] @@ -727,6 +727,9 @@ def tf_protobuf_deps(): otherwise = [clean_dep("@com_google_protobuf//:protobuf_headers")], ) +def tf_portable_proto_lib(): + return ["//tensorflow/core:protos_all_cc_impl"] + def tf_protobuf_compiler_deps(): return if_static( [ @@ -764,7 +767,7 @@ def tf_portable_deps_no_runtime(): "@nsync//:nsync_cpp", "@com_googlesource_code_re2//:re2", "@farmhash_archive//:farmhash", - ] + tf_protobuf_deps() + ] def tf_google_mobile_srcs_no_runtime(): return [] diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.cc b/tensorflow/core/platform/profile_utils/cpu_utils.cc index 587c97875a0..b22123a804a 100644 --- a/tensorflow/core/platform/profile_utils/cpu_utils.cc +++ b/tensorflow/core/platform/profile_utils/cpu_utils.cc @@ -88,6 +88,8 @@ static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr; defined(__ppc__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) retval = sscanf(line.c_str(), "clock : %lfMHz", &cpu_freq); freq_factor = 1.0; +#elif defined(__s390x__) + retval = sscanf(line.c_str(), "bogomips per cpu: %lf", &cpu_freq); #else retval = sscanf(line.c_str(), "bogomips : %lf", &cpu_freq); #endif diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 387efc831ff..369d26a92d9 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -17,15 +17,18 @@ cc_library( "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:cost_utils", - "//tensorflow/core/profiler/utils:event_span", + "//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:op_utils", "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:timespan", "//tensorflow/core/profiler/utils:trace_utils", + "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -35,9 +38,11 @@ tf_cc_test( srcs = ["xplane_to_op_metrics_db_test.cc"], deps = [ ":xplane_to_op_metrics_db", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:time_utils", "//tensorflow/core/profiler/utils:xplane_builder", @@ -86,13 +91,15 @@ cc_library( ":op_stats_to_input_pipeline_analysis", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/platform:logging", "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc", "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:overview_page_proto_cc", + "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", + "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", "//tensorflow/core/profiler/utils:errors", + "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:time_utils", @@ -118,11 +125,11 @@ cc_library( "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/utils:errors", "//tensorflow/core/profiler/utils:event_span", + "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:time_utils", "//tensorflow/core/util:stats_calculator_portable", - "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -135,13 +142,12 @@ cc_library( hdrs = ["op_stats_to_tf_stats.h"], deps = [ ":op_metrics_to_record", + "//tensorflow/core:lib", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc", "//tensorflow/core/profiler/utils:op_metrics_db_utils", - "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:time_utils", - "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -152,13 +158,18 @@ tf_cc_test( deps = [ ":op_stats_to_tf_stats", ":xplane_to_op_stats", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", + "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:time_utils", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", + "@com_google_absl//absl/strings", ], ) @@ -171,6 +182,9 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/utils:event_span", + "//tensorflow/core/profiler/utils:timespan", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -205,6 +219,7 @@ cc_library( srcs = ["xplane_to_op_stats.cc"], hdrs = ["xplane_to_op_stats.h"], deps = [ + ":op_metrics_db_combiner", ":step_events_to_steps_db", ":xplane_to_kernel_stats_db", ":xplane_to_op_metrics_db", @@ -213,15 +228,20 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc", "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:hardware_type_utils", "//tensorflow/core/profiler/utils:kernel_stats_utils", + "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_utils", + "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -239,11 +259,15 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", + "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", + "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:group_events", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_utils", + "@com_google_absl//absl/strings", ], ) @@ -259,7 +283,6 @@ cc_library( ":xplane_to_memory_profile", ":xplane_to_op_stats", ":xplane_to_trace_events", - "//tensorflow/core:human_readable_json", "//tensorflow/core:lib", "//tensorflow/core/profiler:profiler_service_proto_cc", "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc", @@ -269,6 +292,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:overview_page_proto_cc", "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:trace_events_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/rpc/client:save_profile", "//tensorflow/core/profiler/utils:xplane_schema", @@ -284,12 +308,14 @@ tf_cc_test( srcs = ["xplane_to_profile_response_test.cc"], deps = [ ":xplane_to_profile_response", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/profiler:profiler_service_proto_cc", "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc", "//tensorflow/core/profiler/protobuf:overview_page_proto_cc", "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:group_events", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", @@ -303,13 +329,16 @@ cc_library( hdrs = ["xplane_to_step_events.h"], deps = [ "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:tf_xplane_visitor", + "//tensorflow/core/profiler/utils:timespan", "//tensorflow/core/profiler/utils:trace_utils", "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -319,12 +348,16 @@ tf_cc_test( srcs = ["xplane_to_step_events_test.cc"], deps = [ ":xplane_to_step_events", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:group_events", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_utils", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -338,7 +371,9 @@ cc_library( "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -354,6 +389,8 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/profiler/protobuf:trace_events_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_utils", @@ -369,14 +406,14 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:kernel_stats_utils", "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:trace_utils", "//tensorflow/core/profiler/utils:xplane_schema", - "//tensorflow/core/profiler/utils:xplane_utils", "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -389,14 +426,14 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:timespan", "//tensorflow/core/profiler/utils:xplane_schema", - "//tensorflow/core/profiler/utils:xplane_utils", "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -413,10 +450,13 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_utils", + "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/strings", ], ) @@ -428,15 +468,18 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/framework:protos_all_cc", "//tensorflow/core/platform:protobuf", "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:xplane_schema", - "//tensorflow/core/profiler/utils:xplane_utils", + "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:optional", ], ) @@ -446,10 +489,14 @@ tf_cc_test( srcs = ["xplane_to_memory_profile_test.cc"], deps = [ ":xplane_to_memory_profile", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_utils", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc index 3f601bb9134..8229d1020b9 100644 --- a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc +++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/op_metrics_to_record.cc b/tensorflow/core/profiler/convert/op_metrics_to_record.cc index b51c679776b..8e28199b827 100644 --- a/tensorflow/core/profiler/convert/op_metrics_to_record.cc +++ b/tensorflow/core/profiler/convert/op_metrics_to_record.cc @@ -15,7 +15,9 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_metrics_to_record.h" +#include #include +#include #include "absl/algorithm/container.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index ca2a6c28875..83673458d21 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -15,11 +15,12 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" +#include + #include -#include +#include #include "google/protobuf/any.pb.h" -#include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -27,7 +28,6 @@ limitations under the License. #include "absl/strings/string_view.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_metrics_to_record.h" #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h" @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/utils/errors.h" #include "tensorflow/core/profiler/utils/event_span.h" +#include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" @@ -103,7 +104,7 @@ StepSummary GetStepSummaryForSampleStats(const Stat& sample_stats) { avg = sdv = min = max = 0.0; } else { avg = sample_stats.avg(); - sdv = std::sqrt(sample_stats.sample_variance()); + sdv = sqrt(sample_stats.sample_variance()); min = sample_stats.min(); max = sample_stats.max(); } @@ -243,7 +244,7 @@ enum class InputOpCategory { kPreprocessing // data preprocessing. }; -string InputOpCategoryString(InputOpCategory category) { +std::string InputOpCategoryString(InputOpCategory category) { switch (category) { case InputOpCategory::kEnqueue: return "Enqueue"; @@ -327,10 +328,6 @@ InputOpDetails ConvertOpMetricsToInputOpDetails(const OpMetrics& op_metrics, return details; } -string AnchorElement(absl::string_view url, absl::string_view text) { - return absl::StrCat("", text, ""); -} - // Returns the ratio of the host-to-device time in each step to the step-time. double RatioOfHostToDeviceTimeToStepTime( const OpMetricsDb& host_tf_metrics_db, @@ -362,9 +359,9 @@ double RatioOfHostToDeviceTimeToStepTime( } void KernelLaunchAnalysis(bool tfdata_used, double kernel_launch_percent, - string* kernel_launch_classification, - string* kernel_launch_statement) { - string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent); + std::string* kernel_launch_classification, + std::string* kernel_launch_statement) { + std::string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent); if (kernel_launch_percent >= kHighlyKernelLaunchBoundThresholdInPercent) { *kernel_launch_classification = "high"; *kernel_launch_statement = absl::StrCat( @@ -389,14 +386,14 @@ void KernelLaunchAnalysis(bool tfdata_used, double kernel_launch_percent, } void AllOtherAnalysis(bool all_other_reported, double all_other_percent, - string* all_other_classification, - string* all_other_statement) { + std::string* all_other_classification, + std::string* all_other_statement) { if (all_other_reported) { *all_other_classification = "no"; *all_other_statement = ""; return; } - string percent_str = absl::StrFormat("%.1lf", all_other_percent); + std::string percent_str = absl::StrFormat("%.1lf", all_other_percent); if (all_other_percent >= kHighlyAllOtherBoundThresholdInPercent) { *all_other_classification = "high"; *all_other_statement = @@ -588,9 +585,10 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( } bool InputAnalysis(double input_percent, double all_other_percent, - string* input_classification, string* input_statement) { + std::string* input_classification, + std::string* input_statement) { absl::string_view non_input_time = "other time"; - string infeed_percent_str = absl::StrFormat("%.1lf", input_percent); + std::string infeed_percent_str = absl::StrFormat("%.1lf", input_percent); if (input_percent >= kHighlyInfeedBoundThresholdInPercent) { *input_classification = "host"; *input_statement = absl::StrCat( @@ -610,7 +608,8 @@ bool InputAnalysis(double input_percent, double all_other_percent, // Input analysis says it is not input-bound, but "All-Other" time // is significant. It could still be input-bound (or Python overhead). *input_classification = "both"; - string all_other_percent_str = absl::StrFormat("%.1lf", all_other_percent); + std::string all_other_percent_str = + absl::StrFormat("%.1lf", all_other_percent); *input_statement = absl::StrCat( "Your program is POTENTIALLY input-bound because ", all_other_percent_str, @@ -630,8 +629,8 @@ bool InputAnalysis(double input_percent, double all_other_percent, } } -void OutputAnalysis(double output_percent, string* output_classification, - string* output_statement) { +void OutputAnalysis(double output_percent, std::string* output_classification, + std::string* output_statement) { string tc_outfeed_percent_str = absl::StrFormat("%.1lf", output_percent); if (output_percent >= kHighlyOutfeedBoundThresholdInPercent) { *output_classification = "host"; @@ -703,19 +702,19 @@ BottleneckAnalysis ComputeBottleneckAnalysis( double kernel_launch_percent = 100.0 * total_host_prepare_ms / total_step_time_ms; double all_other_percent = 100.0 * total_unknown_ms / total_step_time_ms; - string input_classification; - string input_statement; + std::string input_classification; + std::string input_statement; bool all_other_reported = InputAnalysis(input_percent, all_other_percent, &input_classification, &input_statement); - string kernel_launch_classification; - string kernel_launch_statement; + std::string kernel_launch_classification; + std::string kernel_launch_statement; KernelLaunchAnalysis(TfDataInUse(input_time_breakdown), kernel_launch_percent, &kernel_launch_classification, &kernel_launch_statement); - string all_other_classification; - string all_other_statement; + std::string all_other_classification; + std::string all_other_statement; AllOtherAnalysis(all_other_reported, all_other_percent, &all_other_classification, &all_other_statement); @@ -729,9 +728,9 @@ BottleneckAnalysis ComputeBottleneckAnalysis( return analysis; } -string GetSummaryNextStep(absl::string_view input_classification, - const InputTimeBreakdown& breakdown) { - string summary_next_step; +std::string GetSummaryNextStep(absl::string_view input_classification, + const InputTimeBreakdown& breakdown) { + std::string summary_next_step; if (input_classification == "host" || input_classification == "both") { if (!TfDataInUse(breakdown)) { summary_next_step = absl::StrCat( diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h index 738daeaac12..93b4df0b2c2 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h @@ -16,12 +16,15 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_INPUT_PIPELINE_ANALYSIS_H_ #define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_INPUT_PIPELINE_ANALYSIS_H_ +#include + #include "google/protobuf/any.pb.h" #include "absl/strings/string_view.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" +#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" @@ -50,16 +53,18 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( // Returns true if explanation for "All Others" time is also included in // input_statement. bool InputAnalysis(double input_percent, double all_other_percent, - string* input_classification, string* input_statement); + std::string* input_classification, + std::string* input_statement); -void OutputAnalysis(double output_percent, string* output_classification, - string* output_statement); +void OutputAnalysis(double output_percent, std::string* output_classification, + std::string* output_statement); string GetSummaryNextStep(absl::string_view input_classification, const InputTimeBreakdown& breakdown); void AddErrorMessages(const OpStats& op_stats, InputPipelineAnalysisResult* result); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index e19690a6606..bec92e0d998 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -15,13 +15,11 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h" -#include -#include +#include #include "google/protobuf/any.pb.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_metrics_to_record.h" #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" @@ -30,7 +28,10 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" +#include "tensorflow/core/profiler/protobuf/steps_db.pb.h" +#include "tensorflow/core/profiler/protobuf/tf_function.pb.h" #include "tensorflow/core/profiler/utils/errors.h" +#include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" @@ -44,24 +45,23 @@ namespace { // statement of suggestion will be made. constexpr double kLowPrecisionPercentThreshold = 10; -OverviewPageTip MakeOverviewPageTip(const string& text) { - OverviewPageTip tip; - tip.set_link(text); - return tip; -} +struct TfFunctionInfo { + absl::string_view function_name; + double expensive_call_percent; +}; -string AnchorElement(const string& url, const string& text) { - return absl::StrCat("", text, ""); +OverviewPageTip MakeOverviewPageTip(std::string text) { + OverviewPageTip tip; + tip.set_link(std::move(text)); + return tip; } // Makes a recommendation for looking up a document. // doc_url is expected to be already be escaped suitably for use in an HTML // attribute. -OverviewPageTip MakeOverviewPageTipDocLink(const string& doc_url, - const string& text) { - OverviewPageTip tip; - tip.set_link(AnchorElement(doc_url, text)); - return tip; +OverviewPageTip MakeOverviewPageTipDocLink(absl::string_view doc_url, + absl::string_view text) { + return MakeOverviewPageTip(AnchorElement(doc_url, text)); } void ComputeHostTips(OverviewPageRecommendation* re) { @@ -75,12 +75,13 @@ void ComputeHostTips(OverviewPageRecommendation* re) { void ComputeDeviceTips(HardwareType hardware_type, OverviewPageRecommendation* re) { - const string& device_name = HardwareType_Name(hardware_type); - string timeline_name = - (hardware_type == tensorflow::profiler::TPU) ? "TPU core" : device_name; - string op_stats_toolname = (hardware_type == tensorflow::profiler::TPU) - ? "op_profile" - : "tensorflow_stats"; + absl::string_view device_name = HardwareType_Name(hardware_type); + absl::string_view timeline_name = device_name; + absl::string_view op_stats_toolname = "tensorflow_stats"; + if (hardware_type == tensorflow::profiler::TPU) { + timeline_name = "TPU core"; + op_stats_toolname = "op_profile"; + } *re->add_device_tips() = MakeOverviewPageTip( absl::StrCat(op_stats_toolname, " (identify the time-consuming operations " @@ -121,14 +122,16 @@ std::string GeneratePrecisionStatement(const PrecisionStats& precision_stats) { } // namespace -void SetCommonRecommendation(const string& input_classification, - const string& input_statement, - const string& output_statement, +void SetCommonRecommendation(absl::string_view input_classification, + absl::string_view input_statement, + absl::string_view output_statement, HardwareType hardware_type, + absl::string_view tf_function_statement_html, OverviewPageRecommendation* re) { - re->set_bottleneck(input_classification); - re->set_statement(input_statement); - re->set_output_statement(output_statement); + re->set_bottleneck(std::string(input_classification)); + re->set_statement(std::string(input_statement)); + re->set_output_statement(std::string(output_statement)); + re->set_tf_function_statement_html(std::string(tf_function_statement_html)); ComputeHostTips(re); ComputeDeviceTips(hardware_type, re); ComputeDocumentationTips(re); @@ -245,6 +248,35 @@ OverviewPageRunEnvironment ComputeRunEnvironment( return re; } +std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) { + std::vector candidates; + for (const auto& name_fun : tf_function_db.tf_functions()) { + const auto& fun = name_fun.second; + if (fun.expensive_call_percent() >= kTfFunctionReportThresholdInPercent) { + candidates.push_back({name_fun.first, fun.expensive_call_percent()}); + } + } + if (candidates.empty()) return ""; + auto cmp = [](const TfFunctionInfo& a, const TfFunctionInfo& b) { + return a.expensive_call_percent > b.expensive_call_percent; + }; + // Sorts candidates in descending order of expensive_call_percent. + absl::c_sort(candidates, cmp); + std::string expensive_functions = ""; + auto num_functions_shown = std::min( + static_cast(3), candidates.size()); + + for (auto i = 0; i < num_functions_shown; i++) { + if (i > 0) absl::StrAppend(&expensive_functions, ", "); + absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name, + "\""); + } + if (candidates.size() > num_functions_shown) + absl::StrAppend(&expensive_functions, " and more"); + return absl::StrCat("Expensive tf-functions detected (", expensive_functions, + ") due to either retracing or eager execution."); +} + OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, HardwareType hardware_type) { OverviewPage overview_page; @@ -258,9 +290,10 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, overview_page.input_analysis().step_details()); *overview_page.mutable_recommendation() = ComputeGenericRecommendation( bottleneck, op_stats.device_op_metrics_db().precision_stats()); - SetCommonRecommendation(bottleneck.input_classification(), - bottleneck.input_statement(), "", hardware_type, - overview_page.mutable_recommendation()); + SetCommonRecommendation( + bottleneck.input_classification(), bottleneck.input_statement(), "", + hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()), + overview_page.mutable_recommendation()); return overview_page; } diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h index e6d12708e9f..b4b3991a18d 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h @@ -17,9 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_ #include "absl/strings/string_view.h" -#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" @@ -29,10 +27,16 @@ limitations under the License. namespace tensorflow { namespace profiler { -void SetCommonRecommendation(const string& input_classification, - const string& input_statement, - const string& output_statement, +// Reports tf-function optimization opportunity in the Overview Page if the +// expensive-call-time percentage is over this threshold for at least one of +// the tf-functions profiled. +const double kTfFunctionReportThresholdInPercent = 20; + +void SetCommonRecommendation(absl::string_view input_classification, + absl::string_view input_statement, + absl::string_view output_statement, HardwareType hardware_type, + absl::string_view tf_function_statement_html, OverviewPageRecommendation* re); OverviewPageRecommendation ComputeGenericRecommendation( @@ -47,6 +51,9 @@ OverviewPageRunEnvironment ComputeRunEnvironment( OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, HardwareType hardware_type); +// Returns a html which provides tf-function related recommendation. +std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db); + void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis); } // namespace profiler diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc index da409f89a60..e23813a5b5d 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc @@ -15,13 +15,12 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h" -#include "absl/container/flat_hash_set.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_metrics_to_record.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" -#include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc index 3e098da7eb8..9ca83b51a70 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc @@ -15,10 +15,14 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h" -#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" -#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" +#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/tf_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/time_utils.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" @@ -75,8 +79,8 @@ TEST(OpStatsToTfStats, GpuTfStats) { kKernel3DurationNs, /*on_device=*/true, kKernel3, &device_plane, &stream2); - const OpStats& op_stats = ConvertXSpaceToOpStats(space); - const TfStatsDatabase& tf_stats = ConvertOpStatsToTfStats(op_stats); + const OpStats op_stats = ConvertXSpaceToOpStats(space); + const TfStatsDatabase tf_stats = ConvertOpStatsToTfStats(op_stats); // TfOp1, TfOp2, Idle EXPECT_EQ(3, tf_stats.with_idle().tf_stats_record_size()); diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc index ed0d83ade2f..e4713cd73fb 100644 --- a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc +++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc @@ -15,10 +15,18 @@ limitations under the License. #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h" #include +#include +#include #include "google/protobuf/any.pb.h" +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/steps_db.pb.h" +#include "tensorflow/core/profiler/utils/event_span.h" +#include "tensorflow/core/profiler/utils/timespan.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.h b/tensorflow/core/profiler/convert/step_events_to_steps_db.h index b3ea74e905f..9db65163f7a 100644 --- a/tensorflow/core/profiler/convert/step_events_to_steps_db.h +++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_STEP_EVENTS_TO_STEPS_DB_H_ #define TENSORFLOW_CORE_PROFILER_CONVERT_STEP_EVENTS_TO_STEPS_DB_H_ +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/utils/event_span.h" diff --git a/tensorflow/core/profiler/convert/trace_events_to_json.cc b/tensorflow/core/profiler/convert/trace_events_to_json.cc index 9c8176c10ad..07e32ced9d0 100644 --- a/tensorflow/core/profiler/convert/trace_events_to_json.cc +++ b/tensorflow/core/profiler/convert/trace_events_to_json.cc @@ -15,9 +15,14 @@ limitations under the License. #include "tensorflow/core/profiler/convert/trace_events_to_json.h" +#include +#include +#include + #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "include/json/json.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/trace_events.pb.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc index 785902e2a50..023d6a73d77 100644 --- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc +++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc @@ -15,16 +15,20 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h" +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" -#include "tensorflow/core/profiler/utils/event_span.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/trace_utils.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" -#include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h index 04bd0e8ae5f..9c7fca22887 100644 --- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h +++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h @@ -17,9 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_KERNEL_STATS_DB_H_ #include -#include -#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" @@ -31,6 +29,7 @@ KernelStatsDb ConvertDeviceTraceXPlaneToKernelStatsDb( const XPlane& device_trace, const std::function& on_kernel_fn); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index 1695bd34d73..5b2a7489241 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -15,21 +15,28 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h" -#include +#include #include #include +#include #include +#include +#include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" -#include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc index 1173e4d5c72..e0d87ac7567 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc @@ -15,7 +15,11 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc index 09df59e44d9..4a369b8b96a 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc @@ -15,21 +15,31 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h" +#include +#include #include #include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h" #include "tensorflow/core/profiler/convert/op_stack.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/cost_utils.h" +#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/op_utils.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" +#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/timespan.h" #include "tensorflow/core/profiler/utils/trace_utils.h" +#include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h index 1a785d0335f..f2d7fc702fc 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h +++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h @@ -21,10 +21,9 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" -#include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/op_utils.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" -#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc index 3e577d00e1c..8bd0443b8f6 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc @@ -15,9 +15,12 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc index 7fdd6ffd8cb..f008219cbd2 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc @@ -15,7 +15,11 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h" +#include + +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h" #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h" #include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h" #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h" @@ -23,12 +27,19 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/protobuf/tf_function.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/kernel_stats_utils.h" +#include "tensorflow/core/profiler/utils/tf_op_utils.h" +#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc index c7b140b6a67..7b4652f6c0b 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc @@ -15,10 +15,14 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h" +#include "absl/strings/str_cat.h" #include "tensorflow/core/platform/test.h" -#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" +#include "tensorflow/core/profiler/protobuf/tf_function.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/group_events.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc index b0259bb8865..e6fe74942fc 100644 --- a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc +++ b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc @@ -15,8 +15,10 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_profile_response.h" #include "absl/container/flat_hash_set.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/platform/human_readable_json.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" #include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h" @@ -33,6 +35,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/trace_events.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/rpc/client/save_profile.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" @@ -65,20 +68,26 @@ void AddToolData(absl::string_view tool_name, const Proto& tool_output, } template -Status AddJsonToolData(absl::string_view tool_name, const Proto& tool_output, - ProfileResponse* response) { - std::string json_output; - TF_RETURN_IF_ERROR(ProtoToHumanReadableJson(tool_output, &json_output, - /*ignore_accuracy_loss=*/true)); - auto* tool_data = response->add_tool_data(); - tool_data->set_name(string(tool_name)); - tool_data->mutable_data()->append(json_output.data(), json_output.size()); +Status ConvertProtoToJson(const Proto& proto_output, std::string* json_output) { + protobuf::util::JsonPrintOptions json_options; + json_options.always_print_primitive_fields = true; + auto status = protobuf::util::MessageToJsonString(proto_output, json_output, + json_options); + if (!status.ok()) { + // Convert error_msg google::protobuf::StringPiece (or absl::string_view) to + // tensorflow::StringPiece. + auto error_msg = status.message(); + return errors::Internal( + strings::StrCat("Could not convert proto to JSON string: ", + StringPiece(error_msg.data(), error_msg.length()))); + } return Status::OK(); } // Returns the tool name with extension. string ToolName(absl::string_view tool) { if (tool == kTraceViewer) return "trace.json.gz"; + if (tool == kMemoryProfile) return "memory_profile.json.gz"; return absl::StrCat(tool, ".pb"); } @@ -130,8 +139,11 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace, if (tools.contains(kMemoryProfile)) { if (const XPlane* host_plane = FindPlaneWithName(xspace, kHostThreads)) { MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane); - TF_RETURN_IF_ERROR( - AddJsonToolData(ToolName(kMemoryProfile), memory_profile, response)); + std::string json_output; + TF_RETURN_IF_ERROR(ConvertProtoToJson(memory_profile, &json_output)); + TF_RETURN_IF_ERROR(SaveGzippedToolDataToTensorboardProfile( + req.repository_root(), req.session_id(), req.host_name(), + ToolName(kMemoryProfile), json_output)); } } return Status::OK(); diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response.h b/tensorflow/core/profiler/convert/xplane_to_profile_response.h index 84b9fdd914b..03ca13f1788 100644 --- a/tensorflow/core/profiler/convert/xplane_to_profile_response.h +++ b/tensorflow/core/profiler/convert/xplane_to_profile_response.h @@ -15,8 +15,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_PROFILE_RESPONSE_H_ #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_PROFILE_RESPONSE_H_ -#include "absl/container/flat_hash_set.h" -#include "absl/strings/string_view.h" #include "tensorflow/core/platform/status.h" #include "tensorflow/core/profiler/profiler_service.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc b/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc index d4965a9975c..ad9ca1028f6 100644 --- a/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc @@ -14,13 +14,14 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/convert/xplane_to_profile_response.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/profiler/profiler_service.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" -#include "tensorflow/core/profiler/utils/xplane_schema.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.cc b/tensorflow/core/profiler/convert/xplane_to_step_events.cc index 78bd3dbee0f..7bb7cd6943c 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_step_events.cc @@ -15,29 +15,37 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_step_events.h" -#include "tensorflow/core/lib/strings/str_util.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/timespan.h" #include "tensorflow/core/profiler/utils/trace_utils.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { namespace { inline bool IsExplicitHostStepMarker(absl::string_view event_name) { - return (str_util::StartsWith(event_name, "train") || - str_util::StartsWith(event_name, "test") || - str_util::StartsWith(event_name, "TraceContext")) && - !str_util::StrContains(event_name, "/"); + return (absl::StartsWith(event_name, "train") || + absl::StartsWith(event_name, "test") || + absl::StartsWith(event_name, "TraceContext")) && + !absl::StrContains(event_name, "/"); } // Returns true if the given event_name should be considered as real computation // on CPU. inline bool IsRealCpuCompute(absl::string_view event_name) { - bool not_real = str_util::StartsWith(event_name, "EagerExecute") || - str_util::StartsWith(event_name, "EagerLocalExecute") || - str_util::StartsWith(event_name, "EagerKernelExecute") || - str_util::StartsWith(event_name, "FunctionRun") || + bool not_real = absl::StartsWith(event_name, "EagerExecute") || + absl::StartsWith(event_name, "EagerLocalExecute") || + absl::StartsWith(event_name, "EagerKernelExecute") || + absl::StartsWith(event_name, "FunctionRun") || IsExplicitHostStepMarker(event_name); return !not_real; } diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.h b/tensorflow/core/profiler/convert/xplane_to_step_events.h index a7ac3b9e89e..62fc89813a1 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events.h +++ b/tensorflow/core/profiler/convert/xplane_to_step_events.h @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/event_span.h" -#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc b/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc index 3e1610c2e0f..36e6a2c3091 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc @@ -15,7 +15,13 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_step_events.h" +#include + +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/group_events.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc index 15cc98df9fb..b25cdc4d219 100644 --- a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc @@ -15,20 +15,25 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" +#include #include +#include +#include +#include #include "absl/algorithm/container.h" -#include "absl/container/flat_hash_map.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" -#include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/timespan.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" -#include "tensorflow/core/profiler/utils/xplane_utils.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { @@ -54,6 +59,21 @@ std::pair Decode( DCHECK(false); } +double ComputeExpensiveCallPercent(const TfFunction& tf_function) { + // Computes the expensiveness in terms of time (rather than count). + uint64 total_call_time_ps = 0; + uint64 expensive_call_time_ps = 0; + for (const auto& mode_metrics : tf_function.metrics()) { + const auto mode = mode_metrics.first; + const auto& metrics = mode_metrics.second; + total_call_time_ps += metrics.self_time_ps(); + if (mode == TRACED_MODE || mode == EAGER_MODE) { + expensive_call_time_ps += metrics.self_time_ps(); + } + } + return SafeDivide(100.0 * expensive_call_time_ps, total_call_time_ps); +} + // Each invocation of a tf-function creates an ActivationRecord. struct ActivationRecord { std::string function_name; // name of the tf-function. @@ -133,6 +153,7 @@ void CombineTfFunction(const TfFunction& src, TfFunction* dst) { CombineTfFunctionMetrics(src_metrics, dst_metrics); } } + dst->set_expensive_call_percent(ComputeExpensiveCallPercent(*dst)); } // Execution history of all tf-functions invoked. @@ -210,6 +231,10 @@ class TfFunctionExecutions { metrics->set_count(metrics->count() + 1); metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps); } + for (auto& name_fun : *result.mutable_tf_functions()) { + TfFunction& fun = name_fun.second; + fun.set_expensive_call_percent(ComputeExpensiveCallPercent(fun)); + } return result; } @@ -243,9 +268,9 @@ class TfFunctionExecutions { } // namespace -std::string DebugString(const TfFunctionDb tf_function_db) { +std::string DebugString(const TfFunctionDb& tf_function_db) { std::string str; - ::tensorflow::protobuf::TextFormat::PrintToString(tf_function_db, &str); + protobuf::TextFormat::PrintToString(tf_function_db, &str); return str; } diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.h b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h index 470b22d34b8..df55ac79bb8 100644 --- a/tensorflow/core/profiler/convert/xplane_to_tf_functions.h +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h @@ -16,8 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_ #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_ +#include + #include "tensorflow/core/profiler/protobuf/tf_function.pb.h" -#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc index 253ef1a74f9..25e56d17418 100644 --- a/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc @@ -15,12 +15,17 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" +#include + +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/profiler/protobuf/tf_function.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { @@ -33,6 +38,8 @@ const absl::string_view kTracedXla = "traced-xla"; const absl::string_view kNotTracedNonXla = "notTraced-nonXla"; const absl::string_view kNotTracedXla = "notTraced-xla"; +constexpr double kMaxError = 0.001; + TfFunctionDb ConvertXSpaceToTfFunctionDb(const XSpace& space) { TfFunctionDb result; const XPlane* host_plane = FindPlaneWithName(space, kHostThreads); @@ -75,6 +82,8 @@ TEST(ConvertXPlaneToTfFunctions, CombineTwoThreads) { tf_function_db.tf_functions().at(kFunctionName); EXPECT_EQ(tf_function.total_tracing_count(), 4); EXPECT_EQ(tf_function.compiler(), MIXED_COMPILER); + EXPECT_NEAR(tf_function.expensive_call_percent(), 90, kMaxError); + const auto& metrics = tf_function.metrics(); EXPECT_EQ(metrics.size(), 2); EXPECT_EQ(metrics.count(TRACED_MODE), 1); @@ -108,6 +117,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) { tf_function_db.tf_functions().at(kOuterFunctionName); EXPECT_EQ(outer.total_tracing_count(), 1); EXPECT_EQ(outer.compiler(), OTHER_COMPILER); + EXPECT_NEAR(outer.expensive_call_percent(), 100, kMaxError); const auto& outer_metrics = outer.metrics(); EXPECT_EQ(outer_metrics.size(), 1); EXPECT_EQ(outer_metrics.count(TRACED_MODE), 1); @@ -118,6 +128,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) { tf_function_db.tf_functions().at(kInnerFunctionName); EXPECT_EQ(inner.total_tracing_count(), 0); EXPECT_EQ(inner.compiler(), XLA_COMPILER); + EXPECT_NEAR(inner.expensive_call_percent(), 0, kMaxError); const auto& inner_metrics = inner.metrics(); EXPECT_EQ(inner_metrics.size(), 1); EXPECT_EQ(inner_metrics.count(NOT_TRACED_MODE), 1); @@ -148,6 +159,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) { tf_function_db.tf_functions().at(kEagerFunctionName); EXPECT_EQ(eager.total_tracing_count(), 0); EXPECT_EQ(eager.compiler(), INVALID_COMPILER); + EXPECT_NEAR(eager.expensive_call_percent(), 100, kMaxError); const auto& eager_metrics = eager.metrics(); EXPECT_EQ(eager_metrics.size(), 1); EXPECT_EQ(eager_metrics.count(EAGER_MODE), 1); @@ -158,6 +170,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) { tf_function_db.tf_functions().at(kConcreteFunctionName); EXPECT_EQ(concrete.total_tracing_count(), 0); EXPECT_EQ(concrete.compiler(), INVALID_COMPILER); + EXPECT_NEAR(concrete.expensive_call_percent(), 0, kMaxError); const auto& concrete_metrics = concrete.metrics(); EXPECT_EQ(concrete_metrics.size(), 1); EXPECT_EQ(concrete_metrics.count(CONCRETE_MODE), 1); diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc index 901f3be764a..c404f7bb7e4 100644 --- a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc @@ -15,8 +15,21 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h" +#include + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/trace_events.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events.h b/tensorflow/core/profiler/convert/xplane_to_trace_events.h index 5c6fbead805..b7bddb7b366 100644 --- a/tensorflow/core/profiler/convert/xplane_to_trace_events.h +++ b/tensorflow/core/profiler/convert/xplane_to_trace_events.h @@ -16,7 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TRACE_EVENTS_H_ #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TRACE_EVENTS_H_ -#include "absl/strings/str_split.h" +#include + #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/trace_events.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc b/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc index afff5e60d97..b9a9fe09981 100644 --- a/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc @@ -16,8 +16,9 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/profiler/protobuf/trace_events.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" -#include "tensorflow/core/profiler/utils/xplane_schema.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD index 9fab42cd54a..85fa4e7fc44 100644 --- a/tensorflow/core/profiler/internal/BUILD +++ b/tensorflow/core/profiler/internal/BUILD @@ -423,8 +423,10 @@ tf_cc_test( deps = [ ":traceme_recorder", "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", + "@com_google_googletest//:gtest", ], ) @@ -434,7 +436,6 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", ], ) @@ -444,6 +445,7 @@ cc_library( hdrs = ["profiler_factory.h"], deps = [ ":profiler_interface", + "//tensorflow/core/profiler:profiler_options_proto_cc", ] + if_static([ ":profiler_factory_impl", ]), @@ -461,8 +463,7 @@ cc_library( deps = [ ":profiler_interface", "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler:profiler_options_proto_cc", ], alwayslink = True, ) @@ -513,15 +514,10 @@ tf_cc_test( srcs = ["scoped_annotation_test.cc"], deps = [ ":annotation_stack", - "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", - "//tensorflow/core:framework", - "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib", "//tensorflow/core/profiler/lib:scoped_annotation", "@com_google_absl//absl/strings", ], @@ -544,6 +540,6 @@ tf_cc_test( ":parse_annotation", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/profiler/internal/annotation_stack.cc b/tensorflow/core/profiler/internal/annotation_stack.cc index 4cfd1027a68..4c15ca47c3d 100644 --- a/tensorflow/core/profiler/internal/annotation_stack.cc +++ b/tensorflow/core/profiler/internal/annotation_stack.cc @@ -15,6 +15,10 @@ limitations under the License. #include "tensorflow/core/profiler/internal/annotation_stack.h" +#include + +#include "tensorflow/core/platform/types.h" + namespace tensorflow { namespace profiler { namespace internal { diff --git a/tensorflow/core/profiler/internal/annotation_stack.h b/tensorflow/core/profiler/internal/annotation_stack.h index 38cd962cb32..e626c4c73cc 100644 --- a/tensorflow/core/profiler/internal/annotation_stack.h +++ b/tensorflow/core/profiler/internal/annotation_stack.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" diff --git a/tensorflow/core/profiler/internal/cpu/BUILD b/tensorflow/core/profiler/internal/cpu/BUILD index e156667c5a7..c24c8c7d456 100644 --- a/tensorflow/core/profiler/internal/cpu/BUILD +++ b/tensorflow/core/profiler/internal/cpu/BUILD @@ -18,6 +18,7 @@ cc_library( "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:xplane_builder", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", ], ) @@ -26,10 +27,10 @@ cc_library( srcs = ["host_tracer.cc"], deps = [ ":host_tracer_utils", - "//tensorflow/core:core_cpu_lib", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/internal:profiler_factory", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/internal:traceme_recorder", @@ -50,14 +51,17 @@ tf_cc_test( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/lib:profiler_session", "//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", - "@com_google_googletest//:gtest_main", + "@com_google_googletest//:gtest", ], ) @@ -67,17 +71,14 @@ cc_library( copts = ["-fexceptions"], features = ["-use_header_modules"], deps = [ - "//tensorflow/core:core_cpu_lib", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/internal:profiler_factory", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - "//tensorflow/core/profiler/utils:xplane_schema", - "//tensorflow/core/profiler/utils:xplane_utils", "//tensorflow/python/profiler/internal:python_hooks", - "@com_google_absl//absl/strings", ], alwayslink = True, ) @@ -86,9 +87,12 @@ cc_library( name = "metadata_collector", srcs = ["metadata_collector.cc"], deps = [ + "//tensorflow/compiler/xla/service:hlo_proto_cc", "//tensorflow/compiler/xla/service/gpu:gpu_debug_info_manager", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/internal:profiler_factory", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer.cc b/tensorflow/core/profiler/internal/cpu/host_tracer.cc index 753d8c53b9c..be1a7a2777b 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer.cc @@ -12,18 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include #include #include #include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/framework/step_stats.pb.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/internal/cpu/host_tracer_utils.h" #include "tensorflow/core/profiler/internal/profiler_factory.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/internal/traceme_recorder.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" @@ -119,8 +124,8 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) { std::vector parts = absl::StrSplit(event.name, kUserMetadataMarker); if (parts.size() >= 2) { - ns->set_node_name(string(parts[0])); - ns->set_timeline_label(string(parts[1])); + ns->set_node_name(std::string(parts[0])); + ns->set_timeline_label(std::string(parts[1])); } else { ns->set_node_name(std::move(event.name)); } diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc index e32ba92de66..499b7b6b564 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc @@ -12,17 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include #include #include -#include +#include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/lib/profiler_session.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" @@ -38,13 +44,13 @@ namespace { using ::testing::UnorderedElementsAre; -NodeExecStats MakeNodeStats(const string& name, uint32 thread_id, - const string& label = "") { +NodeExecStats MakeNodeStats(absl::string_view name, uint32 thread_id, + absl::string_view label = "") { NodeExecStats ns; - ns.set_node_name(name); + ns.set_node_name(std::string(name)); ns.set_thread_id(thread_id); if (!label.empty()) { - ns.set_timeline_label(label); + ns.set_timeline_label(std::string(label)); } return ns; } @@ -109,7 +115,7 @@ TEST(HostTracerTest, CollectsTraceMeEventsAsRunMetadata) { TEST(HostTracerTest, CollectsTraceMeEventsAsXSpace) { uint32 thread_id; - string thread_name = "MyThreadName"; + std::string thread_name = "MyThreadName"; XSpace space; // We start a thread with a known and controled name. As of the time of diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc index a4709ae2113..2e5d8ac1770 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc @@ -14,10 +14,13 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/internal/cpu/host_tracer_utils.h" +#include #include #include #include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/internal/parse_annotation.h" #include "tensorflow/core/profiler/internal/traceme_recorder.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" diff --git a/tensorflow/core/profiler/internal/cpu/metadata_collector.cc b/tensorflow/core/profiler/internal/cpu/metadata_collector.cc index c6aa7840920..58da20ae3c5 100644 --- a/tensorflow/core/profiler/internal/cpu/metadata_collector.cc +++ b/tensorflow/core/profiler/internal/cpu/metadata_collector.cc @@ -13,17 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include #include #include #include "tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/profiler/internal/profiler_factory.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/internal/cpu/python_tracer.cc b/tensorflow/core/profiler/internal/cpu/python_tracer.cc index aa259f53cfa..d684cb8f768 100644 --- a/tensorflow/core/profiler/internal/cpu/python_tracer.cc +++ b/tensorflow/core/profiler/internal/cpu/python_tracer.cc @@ -12,18 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include +#include -#include "absl/strings/str_split.h" -#include "tensorflow/core/framework/step_stats.pb.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/profiler/internal/profiler_factory.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/env_var.h" #include "tensorflow/python/profiler/internal/python_hooks.h" diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc index 1110e103d57..9119c3d5d0b 100644 --- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc @@ -17,10 +17,8 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/node_hash_map.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/cleanup.h" -#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mem.h" @@ -286,19 +284,14 @@ void CUPTIAPI FreeCuptiActivityBuffer(CUcontext context, uint32_t stream_id, << reinterpret_cast(buffer) << std::dec << " size: " << size << " valid_size: " << valid_size; - // Ensure buffer is free when this function returns. - auto buffer_cleanup = - gtl::MakeCleanup([buffer] { port::AlignedFree(buffer); }); + if (valid_size > 0) { + VLOG(3) << "Activity profile for stream " << stream_id; - if (valid_size <= 0) { - return; + CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton(); + cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size) + .IgnoreError(); } - - VLOG(3) << "Activity profile for stream " << stream_id; - - CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton(); - cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size) - .IgnoreError(); + port::AlignedFree(buffer); } void AddKernelEventUponApiExit(CuptiTraceCollector *collector, uint32 device_id, @@ -984,7 +977,7 @@ class CudaEventRecorder { using StreamKey = std::pair; absl::node_hash_map context_infos_; - absl::flat_hash_map> stream_infos_; + absl::flat_hash_map stream_infos_; }; // This hook uses cuda events to measure device side activities. diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.h b/tensorflow/core/profiler/internal/gpu/cupti_tracer.h index c6e0c50b093..e236afc5c41 100644 --- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.h +++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.h @@ -21,9 +21,9 @@ limitations under the License. #include "absl/container/node_hash_set.h" #include "absl/types/optional.h" #include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/internal/gpu/cupti_interface.h" diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer.cc b/tensorflow/core/profiler/internal/gpu/device_tracer.cc index 3fb502dcde2..ac6662c8432 100644 --- a/tensorflow/core/profiler/internal/gpu/device_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/device_tracer.cc @@ -27,9 +27,9 @@ limitations under the License. #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "tensorflow/core/framework/step_stats.pb.h" -#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/abi.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" diff --git a/tensorflow/core/profiler/internal/parse_annotation.cc b/tensorflow/core/profiler/internal/parse_annotation.cc index 2a3fa3f8454..32c26befa3d 100644 --- a/tensorflow/core/profiler/internal/parse_annotation.cc +++ b/tensorflow/core/profiler/internal/parse_annotation.cc @@ -15,6 +15,9 @@ limitations under the License. #include "tensorflow/core/profiler/internal/parse_annotation.h" #include +#include +#include +#include #include "absl/strings/ascii.h" #include "absl/strings/str_split.h" diff --git a/tensorflow/core/profiler/internal/parse_annotation.h b/tensorflow/core/profiler/internal/parse_annotation.h index 6c2e536962b..bb0f12217d3 100644 --- a/tensorflow/core/profiler/internal/parse_annotation.h +++ b/tensorflow/core/profiler/internal/parse_annotation.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_PARSE_ANNOTATION_H_ #define TENSORFLOW_CORE_PROFILER_INTERNAL_PARSE_ANNOTATION_H_ -#include #include #include "absl/strings/string_view.h" diff --git a/tensorflow/core/profiler/internal/parse_annotation_test.cc b/tensorflow/core/profiler/internal/parse_annotation_test.cc index 4d4a2d5ea95..e5d876ac5af 100644 --- a/tensorflow/core/profiler/internal/parse_annotation_test.cc +++ b/tensorflow/core/profiler/internal/parse_annotation_test.cc @@ -14,6 +14,9 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/internal/parse_annotation.h" +#include + +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/internal/profiler_factory.cc b/tensorflow/core/profiler/internal/profiler_factory.cc index e2bae59b892..5152e79bdc8 100644 --- a/tensorflow/core/profiler/internal/profiler_factory.cc +++ b/tensorflow/core/profiler/internal/profiler_factory.cc @@ -14,8 +14,14 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/internal/profiler_factory.h" +#include +#include +#include + #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/internal/profiler_factory.h b/tensorflow/core/profiler/internal/profiler_factory.h index 6bcdcf28c3c..c223d7275d9 100644 --- a/tensorflow/core/profiler/internal/profiler_factory.h +++ b/tensorflow/core/profiler/internal/profiler_factory.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/internal/profiler_interface.h b/tensorflow/core/profiler/internal/profiler_interface.h index 2605e834f09..9fe85e38652 100644 --- a/tensorflow/core/profiler/internal/profiler_interface.h +++ b/tensorflow/core/profiler/internal/profiler_interface.h @@ -15,8 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_INTERFACE_H_ #define TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_INTERFACE_H_ -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/profiler/profiler_options.pb.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/protobuf/config.pb.h" diff --git a/tensorflow/core/profiler/internal/scoped_annotation_test.cc b/tensorflow/core/profiler/internal/scoped_annotation_test.cc index 70a627fd640..50c1244b9ee 100644 --- a/tensorflow/core/profiler/internal/scoped_annotation_test.cc +++ b/tensorflow/core/profiler/internal/scoped_annotation_test.cc @@ -15,10 +15,11 @@ limitations under the License. #include "tensorflow/core/profiler/lib/scoped_annotation.h" +#include + #include "absl/strings/str_cat.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" -#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/internal/annotation_stack.h" namespace tensorflow { @@ -48,11 +49,13 @@ TEST(ScopedAnnotation, Simple) { EXPECT_EQ(AnnotationStack::Get(), ""); // not enabled } -string GenerateRandomString(int length) { return string(length, 'a'); } +std::string GenerateRandomString(int length) { + return std::string(length, 'a'); +} void BM_ScopedAnnotationDisabled(int iters, int annotation_size) { testing::StopTiming(); - string annotation = GenerateRandomString(annotation_size); + std::string annotation = GenerateRandomString(annotation_size); testing::StartTiming(); for (int i = 0; i < iters; i++) { ScopedAnnotation trace(annotation); @@ -64,7 +67,7 @@ BENCHMARK(BM_ScopedAnnotationDisabled)->Arg(8)->Arg(32)->Arg(128); void BM_ScopedAnnotationEnabled(int iters, int annotation_size) { testing::StopTiming(); - string annotation = GenerateRandomString(annotation_size); + std::string annotation = GenerateRandomString(annotation_size); AnnotationStack::Enable(true); testing::StartTiming(); for (int i = 0; i < iters; i++) { @@ -78,7 +81,7 @@ BENCHMARK(BM_ScopedAnnotationEnabled)->Arg(8)->Arg(32)->Arg(128); void BM_ScopedAnnotationEnabled_Nested(int iters, int annotation_size) { testing::StopTiming(); - string annotation = GenerateRandomString(annotation_size); + std::string annotation = GenerateRandomString(annotation_size); AnnotationStack::Enable(true); testing::StartTiming(); for (int i = 0; i < iters; i++) { diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc index 22b3bdc2042..56e6e2bcba3 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.cc +++ b/tensorflow/core/profiler/internal/tfprof_stats.cc @@ -58,7 +58,6 @@ TFStats::TFStats(std::unique_ptr graph, ckpt_reader_(std::move(ckpt_reader)) { CHECK(graph) << "Must at least have GraphDef"; - absl::PrintF("Parsing Inputs...\n"); AddGraph(std::move(graph)); if (run_meta && run_meta->has_step_stats()) { AddRunMeta(0, std::move(run_meta)); diff --git a/tensorflow/core/profiler/internal/traceme_recorder.cc b/tensorflow/core/profiler/internal/traceme_recorder.cc index 365e3992bc3..268585bde8c 100644 --- a/tensorflow/core/profiler/internal/traceme_recorder.cc +++ b/tensorflow/core/profiler/internal/traceme_recorder.cc @@ -16,8 +16,18 @@ limitations under the License. #include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/internal/traceme_recorder.h b/tensorflow/core/profiler/internal/traceme_recorder.h index 8b5b32cf4bc..1da7d4cebb1 100644 --- a/tensorflow/core/profiler/internal/traceme_recorder.h +++ b/tensorflow/core/profiler/internal/traceme_recorder.h @@ -15,8 +15,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_TRACEME_RECORDER_H_ #define TENSORFLOW_CORE_PROFILER_INTERNAL_TRACEME_RECORDER_H_ -#include - #include #include diff --git a/tensorflow/core/profiler/internal/traceme_recorder_test.cc b/tensorflow/core/profiler/internal/traceme_recorder_test.cc index 90478881361..8d7abc94e8f 100644 --- a/tensorflow/core/profiler/internal/traceme_recorder_test.cc +++ b/tensorflow/core/profiler/internal/traceme_recorder_test.cc @@ -15,19 +15,28 @@ limitations under the License. #include "tensorflow/core/profiler/internal/traceme_recorder.h" #include +#include +#include +#include +#include +#include #include -#include #include "absl/strings/str_cat.h" -#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/notification.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/threadpool.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { namespace profiler { namespace { +using ::testing::ElementsAre; + MATCHER_P(Named, name, "") { return arg.name == name; } constexpr static uint64 kNanosInSec = 1000000000; @@ -45,7 +54,7 @@ TEST(RecorderTest, SingleThreaded) { ASSERT_EQ(results.size(), 1); EXPECT_THAT(results[0].events, - ::testing::ElementsAre(Named("during1"), Named("during2"))); + ElementsAre(Named("during1"), Named("during2"))); } void SpinNanos(int nanos) { diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index 33486685fb8..6316fd118fc 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -47,18 +47,19 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:framework", "//tensorflow/core/platform", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler:profiler_options_proto_cc", - "//tensorflow/core/util:ptr_util", + "@com_google_absl//absl/memory", + "//tensorflow/core:protos_all_cc", ] + if_not_android([ ":profiler_utils", "//tensorflow/core/profiler/internal:profiler_factory", "//tensorflow/core/profiler/utils:derived_timeline", "//tensorflow/core/profiler/utils:group_events", "//tensorflow/core/profiler/utils:xplane_utils", + "//tensorflow/core/profiler/utils:xplane_schema", ]), alwayslink = True, ) @@ -110,6 +111,7 @@ cc_library( ":traceme", "//tensorflow/core:lib", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/core/profiler/lib/annotated_traceme.h b/tensorflow/core/profiler/lib/annotated_traceme.h index f40c1e9ad92..c3257e2adbe 100644 --- a/tensorflow/core/profiler/lib/annotated_traceme.h +++ b/tensorflow/core/profiler/lib/annotated_traceme.h @@ -15,7 +15,11 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_LIB_ANNOTATED_TRACEME_H_ #define TENSORFLOW_CORE_PROFILER_LIB_ANNOTATED_TRACEME_H_ +#include + #include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/lib/scoped_annotation.h" diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc index b907f74179c..9783cd14f95 100644 --- a/tensorflow/core/profiler/lib/profiler_session.cc +++ b/tensorflow/core/profiler/lib/profiler_session.cc @@ -15,19 +15,28 @@ limitations under the License. #include "tensorflow/core/profiler/lib/profiler_session.h" -#include "tensorflow/core/lib/core/errors.h" +#include + +#include "absl/memory/memory.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/platform.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/util/env_var.h" -#include "tensorflow/core/util/ptr_util.h" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/profiler/internal/profiler_factory.h" #include "tensorflow/core/profiler/lib/profiler_utils.h" #include "tensorflow/core/profiler/utils/derived_timeline.h" #include "tensorflow/core/profiler/utils/group_events.h" +#include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" #endif @@ -44,7 +53,7 @@ ProfileOptions GetOptions(const ProfileOptions& opts) { /*static*/ std::unique_ptr ProfilerSession::Create( const ProfileOptions& options) { - return WrapUnique(new ProfilerSession(options)); + return absl::WrapUnique(new ProfilerSession(options)); } /*static*/ std::unique_ptr ProfilerSession::Create() { diff --git a/tensorflow/core/profiler/lib/profiler_session.h b/tensorflow/core/profiler/lib/profiler_session.h index 1c20876d9d0..6f92b047eb7 100644 --- a/tensorflow/core/profiler/lib/profiler_session.h +++ b/tensorflow/core/profiler/lib/profiler_session.h @@ -18,12 +18,14 @@ limitations under the License. #include #include -#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/profiler_options.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/lib/scoped_annotation.h b/tensorflow/core/profiler/lib/scoped_annotation.h index 61b0cf42dd6..2cad5fd4708 100644 --- a/tensorflow/core/profiler/lib/scoped_annotation.h +++ b/tensorflow/core/profiler/lib/scoped_annotation.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include "absl/strings/string_view.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/profiler/lib/traceme.h b/tensorflow/core/profiler/lib/traceme.h index 8b42f187850..af93ac11b1e 100644 --- a/tensorflow/core/profiler/lib/traceme.h +++ b/tensorflow/core/profiler/lib/traceme.h @@ -15,7 +15,11 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_ #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_ +#include +#include + #include "absl/strings/match.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/strings/strip.h" #include "tensorflow/core/platform/env_time.h" diff --git a/tensorflow/core/profiler/profiler_service.proto b/tensorflow/core/profiler/profiler_service.proto index 37ca4084e42..a096a10efe2 100644 --- a/tensorflow/core/profiler/profiler_service.proto +++ b/tensorflow/core/profiler/profiler_service.proto @@ -10,6 +10,10 @@ import "tensorflow/core/profiler/profiler_service_monitor_result.proto"; service ProfilerService { // Starts a profiling session, blocks until it completes, and returns data. rpc Profile(ProfileRequest) returns (ProfileResponse) {} + // Signal to terminate the Profile rpc for a on-going profiling session, + // The Profile rpc will return successfully and prematurely without timeout. + // This is used by programmatic mode to end the session in workers. + rpc Terminate(TerminateRequest) returns (TerminateResponse) {} // Collects profiling data and returns user-friendly metrics. rpc Monitor(MonitorRequest) returns (MonitorResponse) {} } @@ -81,6 +85,13 @@ message ProfileResponse { // next-field: 8 } +message TerminateRequest { + // Which session id to terminate. + string session_id = 1; +} + +message TerminateResponse {} + message MonitorRequest { // Duration for which to profile between each update. uint64 duration_ms = 1; diff --git a/tensorflow/core/profiler/protobuf/overview_page.proto b/tensorflow/core/profiler/protobuf/overview_page.proto index 8c83dbd0871..018aa759cc5 100644 --- a/tensorflow/core/profiler/protobuf/overview_page.proto +++ b/tensorflow/core/profiler/protobuf/overview_page.proto @@ -84,6 +84,9 @@ message OverviewPageRecommendation { // A statement for output that recommends the next steps for investigating the // bottleneck. string output_statement = 9; + // A statement that recommends the next steps for investigating tf-function + // related bottleneck (it is a html so that it can link to other tools/docs. + string tf_function_statement_html = 10; // A list of tips for improving host performance. repeated OverviewPageTip host_tips = 3; // A list of tips for improving device performance. diff --git a/tensorflow/core/profiler/protobuf/tf_function.proto b/tensorflow/core/profiler/protobuf/tf_function.proto index fe07c00c8d3..1f5e1530475 100644 --- a/tensorflow/core/profiler/protobuf/tf_function.proto +++ b/tensorflow/core/profiler/protobuf/tf_function.proto @@ -49,6 +49,9 @@ message TfFunction { int64 total_tracing_count = 2; // Compiler used to compile this function. TfFunctionCompiler compiler = 3; + // Percentage of time spent in the expensive calls to this function in the + // profiled period. + double expensive_call_percent = 4; } // Statistics for all tf-functions. diff --git a/tensorflow/core/profiler/rpc/BUILD b/tensorflow/core/profiler/rpc/BUILD index d8af53fe8f9..1e572dfd9bd 100644 --- a/tensorflow/core/profiler/rpc/BUILD +++ b/tensorflow/core/profiler/rpc/BUILD @@ -14,14 +14,12 @@ cc_library( ["//tensorflow_serving/model_servers:__pkg__"], ), deps = [ - "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/profiler:profiler_service_proto_cc", "//tensorflow/core/profiler/convert:xplane_to_profile_response", "//tensorflow/core/profiler/lib:profiler_session_headers", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/strings", + "@com_google_absl//absl/memory", tf_grpc_cc_dependency(), ], ) @@ -36,7 +34,6 @@ cc_library( ], deps = [ ":profiler_service_impl", - "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/profiler:profiler_service_proto_cc", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/profiler/rpc/client/BUILD b/tensorflow/core/profiler/rpc/client/BUILD index 43ebb35230c..609f98aa6c1 100644 --- a/tensorflow/core/profiler/rpc/client/BUILD +++ b/tensorflow/core/profiler/rpc/client/BUILD @@ -11,9 +11,10 @@ cc_library( visibility = ["//tensorflow/python/profiler/internal:__pkg__"], deps = [ ":save_profile", - "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler:profiler_analysis_proto_cc", + "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler:profiler_service_proto_cc", "@com_google_absl//absl/strings", tf_grpc_cc_dependency(), @@ -28,8 +29,8 @@ cc_library( deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", "//tensorflow/core/profiler:profiler_service_proto_cc", - "//tensorflow/core/profiler/protobuf:trace_events_proto_cc", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", ], diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc index ebc74c9252c..a8642aff54a 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.cc +++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc @@ -14,19 +14,25 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/rpc/client/capture_profile.h" +#include +#include +#include #include #include "grpcpp/grpcpp.h" -#include "absl/strings/escaping.h" -#include "absl/strings/match.h" #include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" #include "absl/strings/str_split.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/profiler_analysis.grpc.pb.h" +#include "tensorflow/core/profiler/profiler_analysis.pb.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" +#include "tensorflow/core/profiler/profiler_service.grpc.pb.h" +#include "tensorflow/core/profiler/profiler_service.pb.h" #include "tensorflow/core/profiler/rpc/client/save_profile.h" -#include "tensorflow/core/util/events_writer.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.h b/tensorflow/core/profiler/rpc/client/capture_profile.h index 404912ef716..c809d2099ae 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.h +++ b/tensorflow/core/profiler/rpc/client/capture_profile.h @@ -17,9 +17,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_ #define TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_ -#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/profiler/profiler_service.grpc.pb.h" +#include "tensorflow/core/profiler/profiler_options.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/rpc/client/save_profile.cc b/tensorflow/core/profiler/rpc/client/save_profile.cc index ab2e494871c..9cf2e291692 100644 --- a/tensorflow/core/profiler/rpc/client/save_profile.cc +++ b/tensorflow/core/profiler/rpc/client/save_profile.cc @@ -15,20 +15,27 @@ limitations under the License. #include "tensorflow/core/profiler/rpc/client/save_profile.h" -#include -#include +#include +#include +#include +#include #include #include "absl/strings/match.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "absl/strings/strip.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/io/compression.h" +#include "tensorflow/core/lib/io/zlib_compression_options.h" +#include "tensorflow/core/lib/io/zlib_outputbuffer.h" #include "tensorflow/core/platform/env.h" -#include "tensorflow/core/platform/protobuf.h" -#include "tensorflow/core/profiler/protobuf/trace_events.pb.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/file_system.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/profiler/profiler_service.pb.h" + // Windows.h #defines ERROR, but it is also used in // tensorflow/core/util/event.proto #undef ERROR @@ -56,9 +63,9 @@ string ProfilerJoinPathImpl(std::initializer_list paths) { path = absl::StripPrefix(path, kPathSep); if (absl::EndsWith(result, kPathSep)) { - strings::StrAppend(&result, path); + absl::StrAppend(&result, path); } else { - strings::StrAppend(&result, kPathSep, path); + absl::StrAppend(&result, kPathSep, path); } } @@ -75,7 +82,8 @@ string ProfilerJoinPath(const T&... args) { constexpr char kProtoTraceFileName[] = "trace"; constexpr char kTfStatsHelperSuffix[] = "tf_stats_helper_result"; -Status DumpToolDataToLogDirectory(StringPiece run_dir, const string& host, +Status DumpToolDataToLogDirectory(absl::string_view run_dir, + absl::string_view host, const ProfileToolData& tool, std::ostream* os) { // Don't save the intermediate results for combining the per host tool data. diff --git a/tensorflow/core/profiler/rpc/client/save_profile.h b/tensorflow/core/profiler/rpc/client/save_profile.h index d9070f06c71..2e8fc96390a 100644 --- a/tensorflow/core/profiler/rpc/client/save_profile.h +++ b/tensorflow/core/profiler/rpc/client/save_profile.h @@ -16,7 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_RPC_CLIENT_SAVE_PROFILE_H_ #define TENSORFLOW_CORE_PROFILER_RPC_CLIENT_SAVE_PROFILE_H_ -#include "tensorflow/core/lib/core/status.h" +#include + +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/profiler_service.pb.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/rpc/profiler_server.cc b/tensorflow/core/profiler/rpc/profiler_server.cc index 36f0f9efad9..f05a829fb93 100644 --- a/tensorflow/core/profiler/rpc/profiler_server.cc +++ b/tensorflow/core/profiler/rpc/profiler_server.cc @@ -16,18 +16,19 @@ limitations under the License. #include "tensorflow/core/profiler/rpc/profiler_server.h" #include -#include +#include #include "grpcpp/grpcpp.h" #include "absl/strings/str_cat.h" -#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/profiler_service.grpc.pb.h" #include "tensorflow/core/profiler/rpc/profiler_service_impl.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { void ProfilerServer::StartProfilerServer(int32 port) { - string server_address = absl::StrCat("0.0.0.0:", port); + std::string server_address = absl::StrCat("0.0.0.0:", port); service_ = CreateProfilerService(); ::grpc::ServerBuilder builder; builder.AddListeningPort(server_address, ::grpc::InsecureServerCredentials()); diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.cc b/tensorflow/core/profiler/rpc/profiler_service_impl.cc index 8f1be23594a..0a234d7e4da 100644 --- a/tensorflow/core/profiler/rpc/profiler_service_impl.cc +++ b/tensorflow/core/profiler/rpc/profiler_service_impl.cc @@ -15,19 +15,24 @@ limitations under the License. #include "tensorflow/core/profiler/rpc/profiler_service_impl.h" +#include + #include "grpcpp/support/status.h" -#include "absl/container/flat_hash_set.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" -#include "absl/strings/string_view.h" -#include "tensorflow/core/lib/core/errors.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/profiler/convert/xplane_to_profile_response.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/lib/profiler_session.h" +#include "tensorflow/core/profiler/profiler_service.grpc.pb.h" +#include "tensorflow/core/profiler/profiler_service.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace { @@ -61,11 +66,16 @@ class ProfilerServiceImpl : public grpc::ProfilerService::Service { } Env* env = Env::Default(); - for (size_t i = 0; i < req->duration_ms(); ++i) { + for (uint64 i = 0; i < req->duration_ms(); ++i) { env->SleepForMicroseconds(EnvTime::kMillisToMicros); if (ctx->IsCancelled()) { return ::grpc::Status::CANCELLED; } + if (TF_PREDICT_FALSE(IsStopped(req->session_id()))) { + mutex_lock lock(mutex_); + stop_signals_per_session_.erase(req->session_id()); + break; + } } status = CollectDataToResponse(*req, profiler.get(), response); @@ -76,12 +86,31 @@ class ProfilerServiceImpl : public grpc::ProfilerService::Service { return ::grpc::Status::OK; } + + ::grpc::Status Terminate(::grpc::ServerContext* ctx, + const TerminateRequest* req, + TerminateResponse* response) override { + mutex_lock lock(mutex_); + stop_signals_per_session_[req->session_id()] = true; + return ::grpc::Status::OK; + } + + private: + bool IsStopped(const std::string& session_id) { + mutex_lock lock(mutex_); + auto it = stop_signals_per_session_.find(session_id); + return it != stop_signals_per_session_.end() && it->second; + } + + mutex mutex_; + absl::flat_hash_map stop_signals_per_session_ + GUARDED_BY(mutex_); }; } // namespace std::unique_ptr CreateProfilerService() { - return MakeUnique(); + return absl::make_unique(); } } // namespace tensorflow diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.h b/tensorflow/core/profiler/rpc/profiler_service_impl.h index 4a7636cf101..00a850acbf2 100644 --- a/tensorflow/core/profiler/rpc/profiler_service_impl.h +++ b/tensorflow/core/profiler/rpc/profiler_service_impl.h @@ -15,10 +15,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVICE_IMPL_H_ #define TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVICE_IMPL_H_ -#include "grpcpp/grpcpp.h" -#include "grpcpp/server_context.h" -#include "grpcpp/support/status.h" -#include "tensorflow/core/profiler/lib/profiler_session.h" +#include + #include "tensorflow/core/profiler/profiler_service.grpc.pb.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index ad26dcc5774..ca20236d63b 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -30,6 +30,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], @@ -50,6 +51,14 @@ cc_library( hdrs = ["math_utils.h"], ) +cc_library( + name = "html_utils", + hdrs = ["html_utils.h"], + deps = [ + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "op_metrics_db_utils", srcs = ["op_metrics_db_utils.cc"], @@ -83,7 +92,6 @@ cc_library( hdrs = ["tf_op_utils.h"], deps = [ "//tensorflow/core:regexp_internal", - "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings", ], ) @@ -96,6 +104,7 @@ tf_cc_test( ":tf_op_utils", "//tensorflow/core:test", "//tensorflow/core:test_main", + "@com_google_absl//absl/strings", ], ) @@ -156,6 +165,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "@com_google_absl//absl/strings", ], ) @@ -170,7 +180,6 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", ], ) @@ -196,7 +205,6 @@ tf_cc_test( name = "xplane_utils_test", srcs = ["xplane_utils_test.cc"], deps = [ - ":time_utils", ":xplane_builder", ":xplane_utils", ":xplane_visitor", @@ -205,6 +213,8 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -232,6 +242,7 @@ cc_library( deps = [ ":xplane_schema", ":xplane_visitor", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", ], ) @@ -243,9 +254,11 @@ cc_library( deps = [ ":tf_op_utils", ":tf_xplane_visitor", + ":xplane_builder", ":xplane_schema", ":xplane_utils", ":xplane_visitor", + "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/container:flat_hash_map", @@ -263,10 +276,13 @@ tf_cc_test( ":xplane_builder", ":xplane_schema", ":xplane_utils", + ":xplane_visitor", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/types:optional", ], ) @@ -281,10 +297,13 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler/costs:cost_estimator", + "//tensorflow/core/grappler/costs:op_context", "//tensorflow/core/grappler/costs:op_level_cost_estimator", "//tensorflow/core/grappler/costs:op_performance_data_cc", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -296,6 +315,7 @@ cc_library( ":group_events", ":tf_op_utils", ":tf_xplane_visitor", + ":time_utils", ":timespan", ":trace_utils", ":xplane_builder", @@ -305,8 +325,10 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) @@ -321,6 +343,8 @@ tf_cc_test( ":xplane_builder", ":xplane_schema", ":xplane_utils", + ":xplane_visitor", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", @@ -347,10 +371,10 @@ cc_library( ":xplane_builder", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/framework:protos_all_cc", "//tensorflow/core/profiler/protobuf:tfstreamz_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", ], ) diff --git a/tensorflow/core/profiler/utils/cost_utils.cc b/tensorflow/core/profiler/utils/cost_utils.cc index 754aa655af3..a94f09bb79c 100644 --- a/tensorflow/core/profiler/utils/cost_utils.cc +++ b/tensorflow/core/profiler/utils/cost_utils.cc @@ -15,12 +15,27 @@ limitations under the License. #include "tensorflow/core/profiler/utils/cost_utils.h" +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "absl/types/optional.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/grappler/costs/cost_estimator.h" +#include "tensorflow/core/grappler/costs/op_context.h" #include "tensorflow/core/grappler/costs/op_performance_data.pb.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/cost_utils.h b/tensorflow/core/profiler/utils/cost_utils.h index f1095556c2b..a778bca5330 100644 --- a/tensorflow/core/profiler/utils/cost_utils.h +++ b/tensorflow/core/profiler/utils/cost_utils.h @@ -15,12 +15,13 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_COST_UTILS_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_COST_UTILS_H_ -#include +#include -#include "absl/strings/string_view.h" +#include "absl/container/flat_hash_set.h" +#include "tensorflow/core/grappler/costs/cost_estimator.h" #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h" #include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { @@ -46,7 +47,8 @@ class TfOpRoofLineCostEstimator OpRoofLineStats Predict(const XEventVisitor& event); private: - std::set unsupported_ops_; // summary for unsupported ops. + absl::flat_hash_set + unsupported_ops_; // summary for unsupported ops. TF_DISALLOW_COPY_AND_ASSIGN(TfOpRoofLineCostEstimator); }; diff --git a/tensorflow/core/profiler/utils/derived_timeline.cc b/tensorflow/core/profiler/utils/derived_timeline.cc index c99d8e82cb7..112c0977763 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.cc +++ b/tensorflow/core/profiler/utils/derived_timeline.cc @@ -14,15 +14,27 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/utils/derived_timeline.h" +#include +#include +#include + +#include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/group_events.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/time_utils.h" #include "tensorflow/core/profiler/utils/timespan.h" #include "tensorflow/core/profiler/utils/trace_utils.h" +#include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" diff --git a/tensorflow/core/profiler/utils/derived_timeline.h b/tensorflow/core/profiler/utils/derived_timeline.h index 61b62bdc8da..cd4da7996c5 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.h +++ b/tensorflow/core/profiler/utils/derived_timeline.h @@ -15,7 +15,13 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_DERIVED_TIMELINE_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_DERIVED_TIMELINE_H_ +#include +#include + +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/group_events.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" diff --git a/tensorflow/core/profiler/utils/derived_timeline_test.cc b/tensorflow/core/profiler/utils/derived_timeline_test.cc index f3e6b66f087..76a0188480a 100644 --- a/tensorflow/core/profiler/utils/derived_timeline_test.cc +++ b/tensorflow/core/profiler/utils/derived_timeline_test.cc @@ -15,8 +15,9 @@ limitations under the License. #include "tensorflow/core/profiler/utils/derived_timeline.h" -#include "absl/strings/match.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/group_events.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" @@ -24,6 +25,7 @@ limitations under the License. #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/errors.cc b/tensorflow/core/profiler/utils/errors.cc index d829ee06709..9c678e98a43 100644 --- a/tensorflow/core/profiler/utils/errors.cc +++ b/tensorflow/core/profiler/utils/errors.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/core/profiler/utils/errors.h" +#include "absl/strings/string_view.h" + namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/event_span.cc b/tensorflow/core/profiler/utils/event_span.cc index 1c64f7bf6bb..5e0413c4ba2 100644 --- a/tensorflow/core/profiler/utils/event_span.cc +++ b/tensorflow/core/profiler/utils/event_span.cc @@ -14,14 +14,19 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/utils/event_span.h" -#include // NOLINT -#include -#include // NOLINT +#include +#include #include +#include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/utils/timespan.h" namespace tensorflow { namespace profiler { @@ -269,10 +274,7 @@ void CombineStepEvents(const StepEvents& src, StepEvents* dst) { // Converts from overlapped step-events to non-overlapped step-events. StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) { - auto start_time = std::chrono::steady_clock::now(); StepEvents non_overlapped_step_events; - - // We could parallelize the following loop if necessary. for (const auto& step_events : overlapped_step_events) { const auto& step_id = step_events.first; const auto& step_details = step_events.second; @@ -281,12 +283,6 @@ StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) { *non_overlapped_step_events[step_id].MutableEvents() = ToNonOverlappedEvents(step_details.Events()); } - auto end_time = std::chrono::steady_clock::now(); - auto elapsed_time_us = std::chrono::duration_cast( - end_time - start_time); - double elapsed_time_ms = elapsed_time_us.count() / 1000.0; - LOG(INFO) << "Generation of step-events took " << elapsed_time_ms << " ms" - << std::endl; return non_overlapped_step_events; } diff --git a/tensorflow/core/profiler/utils/event_span.h b/tensorflow/core/profiler/utils/event_span.h index 36b31722968..1adc6a75d82 100644 --- a/tensorflow/core/profiler/utils/event_span.h +++ b/tensorflow/core/profiler/utils/event_span.h @@ -16,10 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ +#include #include #include "absl/container/flat_hash_map.h" -#include "tensorflow/core/platform/logging.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/utils/timespan.h" diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 60d12c0862d..42961492225 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -15,13 +15,25 @@ limitations under the License. #include "tensorflow/core/profiler/utils/group_events.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include "absl/container/flat_hash_map.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" diff --git a/tensorflow/core/profiler/utils/group_events.h b/tensorflow/core/profiler/utils/group_events.h index 1140f2dab8d..4b6fc58e3b8 100644 --- a/tensorflow/core/profiler/utils/group_events.h +++ b/tensorflow/core/profiler/utils/group_events.h @@ -16,9 +16,16 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ +#include #include +#include +#include #include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index 6b6a0d2a19d..11996ba4068 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -16,12 +16,15 @@ limitations under the License. #include "tensorflow/core/profiler/utils/group_events.h" #include "absl/container/flat_hash_map.h" +#include "absl/types/optional.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/hardware_type_utils.cc b/tensorflow/core/profiler/utils/hardware_type_utils.cc index 75896c03851..e2a4004555b 100644 --- a/tensorflow/core/profiler/utils/hardware_type_utils.cc +++ b/tensorflow/core/profiler/utils/hardware_type_utils.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/html_utils.h b/tensorflow/core/profiler/utils/html_utils.h new file mode 100644 index 00000000000..215d9f51d5b --- /dev/null +++ b/tensorflow/core/profiler/utils/html_utils.h @@ -0,0 +1,36 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_ +#define TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_ + +#include + +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace tensorflow { +namespace profiler { + +// Creates a html that links to the given url with the given text. +inline std::string AnchorElement(absl::string_view url, + absl::string_view text) { + return absl::StrCat("", text, ""); +} + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_ diff --git a/tensorflow/core/profiler/utils/kernel_stats_utils.cc b/tensorflow/core/profiler/utils/kernel_stats_utils.cc index 14038d5c177..c40c3a89c9c 100644 --- a/tensorflow/core/profiler/utils/kernel_stats_utils.cc +++ b/tensorflow/core/profiler/utils/kernel_stats_utils.cc @@ -15,15 +15,17 @@ limitations under the License. #include "tensorflow/core/profiler/utils/kernel_stats_utils.h" +#include +#include #include #include #include "absl/strings/match.h" #include "absl/strings/numbers.h" -#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" namespace tensorflow { @@ -34,15 +36,15 @@ void ParseKernelLaunchParams(absl::string_view xstat_kernel_details, const std::vector params = absl::StrSplit(xstat_kernel_details, absl::ByAnyChar(":\n")); - constexpr uint32_t kNumDimensions = 3; - for (uint32_t dim = 0; dim < kNumDimensions; ++dim) { + constexpr uint32 kNumDimensions = 3; + for (uint32 dim = 0; dim < kNumDimensions; ++dim) { kernel->add_block_dim(1); kernel->add_grid_dim(1); } // Process value pairs. - for (uint32_t ii = 0; ii < params.size(); ii += 2) { - uint32_t value = 0; + for (uint32 ii = 0; ii < params.size(); ii += 2) { + uint32 value = 0; if (params[ii] == "registers_per_thread" && absl::SimpleAtoi(params[ii + 1], &value)) { kernel->set_registers_per_thread(value); diff --git a/tensorflow/core/profiler/utils/op_metrics_db_utils.cc b/tensorflow/core/profiler/utils/op_metrics_db_utils.cc index 06307d6d102..863d2f79819 100644 --- a/tensorflow/core/profiler/utils/op_metrics_db_utils.cc +++ b/tensorflow/core/profiler/utils/op_metrics_db_utils.cc @@ -15,8 +15,13 @@ limitations under the License. #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" +#include +#include + +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" @@ -40,7 +45,7 @@ class DeviceTfOpMetricsDbBuilder : public OpMetricsDbBuilder { /*hlo_module_id=*/0, tf_op_name); if (tf_op_metrics->category().empty()) { tf_op_metrics->set_category( - tf_op_type == kUnknownOp ? "Unknown" : string(tf_op_type)); + tf_op_type == kUnknownOp ? "Unknown" : std::string(tf_op_type)); } tf_op_metrics->set_is_eager(device_op_metrics.is_eager()); // The occurrences of a TF-op is the maximum among the occurrences of all @@ -89,8 +94,8 @@ uint64 IdleTimePs(const OpMetricsDb& metrics_db) { void AddIdleOp(OpMetricsDb* db) { uint64 idle_time_ps = IdleTimePs(*db); OpMetrics* metrics = db->add_metrics_db(); - metrics->set_name(string(kIdle)); - metrics->set_category(string(kIdle)); + metrics->set_name(std::string(kIdle)); + metrics->set_category(std::string(kIdle)); metrics->set_occurrences(0); metrics->set_time_ps(idle_time_ps); metrics->set_self_time_ps(idle_time_ps); diff --git a/tensorflow/core/profiler/utils/op_utils.cc b/tensorflow/core/profiler/utils/op_utils.cc index 74ce13def0a..921e0617902 100644 --- a/tensorflow/core/profiler/utils/op_utils.cc +++ b/tensorflow/core/profiler/utils/op_utils.cc @@ -15,8 +15,14 @@ limitations under the License. #include "tensorflow/core/profiler/utils/op_utils.h" +#include +#include + +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/utils/tf_op_utils.h" namespace tensorflow { namespace profiler { @@ -69,9 +75,9 @@ void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id, OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(program_id, name); if (op_metrics->category().empty()) op_metrics->set_category(category == kUnknownOp ? "unknown" - : string(category)); + : std::string(category)); if (op_metrics->provenance().empty()) - op_metrics->set_provenance(string(provenance)); + op_metrics->set_provenance(std::string(provenance)); op_metrics->set_is_eager(op_metrics->is_eager() || is_eager); op_metrics->set_occurrences(op_metrics->occurrences() + occurrences); op_metrics->set_time_ps(op_metrics->time_ps() + time_ps); diff --git a/tensorflow/core/profiler/utils/op_utils.h b/tensorflow/core/profiler/utils/op_utils.h index 8aaa0f4f5c2..f94328d1b8d 100644 --- a/tensorflow/core/profiler/utils/op_utils.h +++ b/tensorflow/core/profiler/utils/op_utils.h @@ -16,13 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_OP_UTILS_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_OP_UTILS_H_ -#include - #include "absl/strings/string_view.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" -#include "tensorflow/core/profiler/utils/tf_op_utils.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/tf_op_utils.cc b/tensorflow/core/profiler/utils/tf_op_utils.cc index 5a4204440a3..630a74c4e47 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.cc +++ b/tensorflow/core/profiler/utils/tf_op_utils.cc @@ -15,11 +15,14 @@ limitations under the License. #include "tensorflow/core/profiler/utils/tf_op_utils.h" +#include +#include + #include "absl/strings/ascii.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" -#include "absl/strings/strip.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/regexp.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h index d1ac69e2976..b8af9463d51 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.h +++ b/tensorflow/core/profiler/utils/tf_op_utils.h @@ -16,9 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TF_OP_UTILS_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_TF_OP_UTILS_H_ +#include #include -#include "absl/base/attributes.h" #include "absl/strings/match.h" #include "absl/strings/string_view.h" diff --git a/tensorflow/core/profiler/utils/tf_op_utils_test.cc b/tensorflow/core/profiler/utils/tf_op_utils_test.cc index fa5169557d1..136dbee2430 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils_test.cc +++ b/tensorflow/core/profiler/utils/tf_op_utils_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/profiler/utils/tf_op_utils.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/utils/tf_xplane_visitor.h b/tensorflow/core/profiler/utils/tf_xplane_visitor.h index 33a170f8efd..17a7b94ef92 100644 --- a/tensorflow/core/profiler/utils/tf_xplane_visitor.h +++ b/tensorflow/core/profiler/utils/tf_xplane_visitor.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TF_XPLANE_VISITOR_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_TF_XPLANE_VISITOR_H_ +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" diff --git a/tensorflow/core/profiler/utils/tfstreamz_utils.cc b/tensorflow/core/profiler/utils/tfstreamz_utils.cc index 5fef494fc3b..f4cbaa84100 100644 --- a/tensorflow/core/profiler/utils/tfstreamz_utils.cc +++ b/tensorflow/core/profiler/utils/tfstreamz_utils.cc @@ -14,37 +14,46 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/utils/tfstreamz_utils.h" +#include #include +#include +#include +#include #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "absl/strings/substitute.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/monitoring/collected_metrics.h" -#include "tensorflow/core/lib/monitoring/collection_registry.h" -#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/lib/monitoring/metric_def.h" +#include "tensorflow/core/lib/monitoring/types.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/tfstreamz.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/xplane_builder.h" namespace tensorflow { namespace profiler { namespace { -string ConstructXStatName(const string& name, const monitoring::Point& point) { + +std::string ConstructXStatName(absl::string_view name, + const monitoring::Point& point) { if (point.labels.empty()) { - return name; + return std::string(name); } return absl::Substitute( "$0{$1}", name, - absl::StrJoin(point.labels, ", ", - [](string* out, const monitoring::Point::Label& label) { - absl::StrAppend(out, label.name, "=", label.value); - })); + absl::StrJoin( + point.labels, ", ", + [](std::string* out, const monitoring::Point::Label& label) { + absl::StrAppend(out, label.name, "=", label.value); + })); } -string SerializePercentile(const monitoring::Percentiles& percentiles) { +std::string SerializePercentile(const monitoring::Percentiles& percentiles) { tfstreamz::Percentiles output; output.set_unit_of_measure( static_cast(percentiles.unit_of_measure)); @@ -82,11 +91,11 @@ Status SerializeToXPlane(const std::vector& snapshots, xevent.SetEndTimestampNs(snapshot.end_time_ns); auto& metric_descriptor_map = snapshot.metrics->metric_descriptor_map; for (const auto& point_set : snapshot.metrics->point_set_map) { - const string& metric_name = point_set.first; + const std::string& metric_name = point_set.first; // Each metrics have multiple points corresponding to different labels. for (const auto& point : point_set.second->points) { // Generates one KPI metric for each point. - string stat_name = ConstructXStatName(metric_name, *point); + std::string stat_name = ConstructXStatName(metric_name, *point); auto* metadata = xplane.GetOrCreateStatMetadata(stat_name); auto it = metric_descriptor_map.find(metric_name); if (it != metric_descriptor_map.end()) { diff --git a/tensorflow/core/profiler/utils/tfstreamz_utils.h b/tensorflow/core/profiler/utils/tfstreamz_utils.h index ae8e4079bcb..1ab21ed1b5e 100644 --- a/tensorflow/core/profiler/utils/tfstreamz_utils.h +++ b/tensorflow/core/profiler/utils/tfstreamz_utils.h @@ -15,11 +15,13 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TFSTREAMZ_UTILS_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_TFSTREAMZ_UTILS_H_ +#include +#include + #include "tensorflow/core/lib/monitoring/collected_metrics.h" -#include "tensorflow/core/lib/monitoring/collection_registry.h" #include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" -#include "tensorflow/core/profiler/utils/xplane_builder.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/timespan.h b/tensorflow/core/profiler/utils/timespan.h index bccbeaa796f..82775af1415 100644 --- a/tensorflow/core/profiler/utils/timespan.h +++ b/tensorflow/core/profiler/utils/timespan.h @@ -16,6 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TIMESPAN_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_TIMESPAN_H_ +#include +#include + #include "absl/strings/str_cat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/profiler/utils/xplane_builder.cc b/tensorflow/core/profiler/utils/xplane_builder.cc index 9e66a15cc36..f923f3982f4 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.cc +++ b/tensorflow/core/profiler/utils/xplane_builder.cc @@ -14,6 +14,14 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/utils/xplane_builder.h" +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/time_utils.h" namespace tensorflow { @@ -54,7 +62,7 @@ XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata( return metadata; } -XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata(string&& name) { +XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata(std::string&& name) { XEventMetadata*& metadata = event_metadata_by_name_[name]; if (metadata == nullptr) { metadata = diff --git a/tensorflow/core/profiler/utils/xplane_builder.h b/tensorflow/core/profiler/utils/xplane_builder.h index 803cc7b89c2..b0d743a0caf 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.h +++ b/tensorflow/core/profiler/utils/xplane_builder.h @@ -15,10 +15,15 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_BUILDER_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_BUILDER_H_ +#include + +#include +#include + #include "absl/container/flat_hash_map.h" #include "absl/strings/numbers.h" #include "absl/strings/string_view.h" -#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/time_utils.h" @@ -53,12 +58,12 @@ class XStatsBuilder { void AddStatValue(const XStatMetadata& metadata, absl::string_view value, bool is_bytes = false) { if (is_bytes) { - AddStat(metadata)->set_bytes_value(string(value)); + AddStat(metadata)->set_bytes_value(std::string(value)); } else { - AddStat(metadata)->set_str_value(string(value)); + AddStat(metadata)->set_str_value(std::string(value)); } } - void AddStatValue(const XStatMetadata& metadata, string&& value, + void AddStatValue(const XStatMetadata& metadata, std::string&& value, bool is_bytes = false) { if (is_bytes) { AddStat(metadata)->set_bytes_value(std::move(value)); @@ -160,7 +165,7 @@ class XLineBuilder { int64 NumEvents() { return line_->events_size(); } - void SetName(absl::string_view name) { line_->set_name(string(name)); } + void SetName(absl::string_view name) { line_->set_name(std::string(name)); } void SetNameIfEmpty(absl::string_view name) { if (line_->name().empty()) SetName(name); @@ -205,7 +210,7 @@ class XPlaneBuilder : public XStatsBuilder { int64 Id() { return plane_->id(); } void SetId(int64 id) { plane_->set_id(id); } - void SetName(absl::string_view name) { plane_->set_name(string(name)); } + void SetName(absl::string_view name) { plane_->set_name(std::string(name)); } void ReserveLines(size_t num_lines) { plane_->mutable_lines()->Reserve(num_lines); @@ -222,7 +227,7 @@ class XPlaneBuilder : public XStatsBuilder { XEventMetadata* GetOrCreateEventMetadata(int64 metadata_id); XEventMetadata* GetOrCreateEventMetadata(absl::string_view name); - XEventMetadata* GetOrCreateEventMetadata(string&& name); + XEventMetadata* GetOrCreateEventMetadata(std::string&& name); inline XEventMetadata* GetOrCreateEventMetadata(const char* name) { return GetOrCreateEventMetadata(absl::string_view(name)); } @@ -251,7 +256,7 @@ void XStatsBuilder::AddStat(const XStatMetadata& key, const XStat& stat, if (stat.value_case() == XStat::kRefValue) { const auto& stat_metadata_map = src.stat_metadata(); const auto it = stat_metadata_map.find(stat.ref_value()); - if (ABSL_PREDICT_FALSE(it == stat_metadata_map.end())) { + if (TF_PREDICT_FALSE(it == stat_metadata_map.end())) { // the reference value in stat is not found in XStatMetadata from src. return; } diff --git a/tensorflow/core/profiler/utils/xplane_builder_test.cc b/tensorflow/core/profiler/utils/xplane_builder_test.cc index cb8749703a2..e55e01d8233 100644 --- a/tensorflow/core/profiler/utils/xplane_builder_test.cc +++ b/tensorflow/core/profiler/utils/xplane_builder_test.cc @@ -14,7 +14,11 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/utils/xplane_builder.h" +#include + +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index 51bc4d03810..f8ff31b078a 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -17,7 +17,10 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index 97e54a7fc2f..31ff90155f5 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -16,11 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_ -#include "absl/strings/match.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" -#include "absl/types/span.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/xplane_utils.cc b/tensorflow/core/profiler/utils/xplane_utils.cc index b2cc1fd46a5..7f5221c5391 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.cc +++ b/tensorflow/core/profiler/utils/xplane_utils.cc @@ -14,12 +14,21 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/utils/xplane_utils.h" +#include +#include +#include +#include + #include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/timespan.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" +#include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { diff --git a/tensorflow/core/profiler/utils/xplane_utils.h b/tensorflow/core/profiler/utils/xplane_utils.h index 4f0a8b82646..49087c49cd8 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.h +++ b/tensorflow/core/profiler/utils/xplane_utils.h @@ -17,6 +17,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" diff --git a/tensorflow/core/profiler/utils/xplane_utils_test.cc b/tensorflow/core/profiler/utils/xplane_utils_test.cc index b9b15b2e8a9..04e06fcb05b 100644 --- a/tensorflow/core/profiler/utils/xplane_utils_test.cc +++ b/tensorflow/core/profiler/utils/xplane_utils_test.cc @@ -15,9 +15,14 @@ limitations under the License. #include "tensorflow/core/profiler/utils/xplane_utils.h" +#include + #include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "tensorflow/core/platform/env_time.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" diff --git a/tensorflow/core/profiler/utils/xplane_visitor.cc b/tensorflow/core/profiler/utils/xplane_visitor.cc index ab97271a69a..42068b7c61a 100644 --- a/tensorflow/core/profiler/utils/xplane_visitor.cc +++ b/tensorflow/core/profiler/utils/xplane_visitor.cc @@ -14,7 +14,16 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/profiler/utils/xplane_visitor.h" +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" namespace tensorflow { namespace profiler { diff --git a/tensorflow/core/profiler/utils/xplane_visitor.h b/tensorflow/core/profiler/utils/xplane_visitor.h index 52aa60bb2e6..4120a2821ca 100644 --- a/tensorflow/core/profiler/utils/xplane_visitor.h +++ b/tensorflow/core/profiler/utils/xplane_visitor.h @@ -15,9 +15,11 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_VISITOR_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_VISITOR_H_ +#include + #include -#include -#include +#include +#include #include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" diff --git a/tensorflow/core/protobuf/BUILD b/tensorflow/core/protobuf/BUILD new file mode 100644 index 00000000000..a374c808a14 --- /dev/null +++ b/tensorflow/core/protobuf/BUILD @@ -0,0 +1,182 @@ +# For platform specific build config +load( + "//tensorflow/core/platform:build_config.bzl", + "tf_additional_all_protos", + "tf_proto_library", + "tf_proto_library_cc", + "tf_pyclif_proto_library", +) + +package( + default_visibility = [ + "//tensorflow:internal", + "//tensorflow/core:__subpackages__", + "//tensorflow_models:__subpackages__", + ], + licenses = ["notice"], # Apache 2.0 +) + +COMMON_PROTO_SRCS = [ + "bfc_memory_map.proto", + "config.proto", + "cluster.proto", + "debug.proto", + "device_filters.proto", + "device_properties.proto", + "graph_debug_info.proto", + "queue_runner.proto", + "rewriter_config.proto", + "tensor_bundle.proto", + "saver.proto", + "verifier_config.proto", +] + +[ + [ + tf_pyclif_proto_library( + name = "%s_pyclif" % proto_name, + proto_lib = ":for_core_protos", + proto_srcfile = "%s.proto" % proto_name, + visibility = ["//visibility:public"], + ), + ] + for proto_name in [ + "config", + "device_properties", + "graph_debug_info", + "meta_graph", + "saved_model", + ] +] + +tf_proto_library( + name = "autotuning_proto", + srcs = ["autotuning.proto"], + cc_api_version = 2, + make_default_target_header_only = True, +) + +tf_proto_library( + name = "conv_autotuning_proto", + srcs = ["conv_autotuning.proto"], + cc_api_version = 2, + make_default_target_header_only = True, + protodeps = [ + "//tensorflow/stream_executor:dnn_proto", + ], +) + +tf_proto_library_cc( + name = "worker_proto", + srcs = ["worker.proto"], + cc_api_version = 2, + protodeps = tf_additional_all_protos(), + visibility = ["//visibility:public"], +) + +tf_proto_library_cc( + name = "worker_service_proto", + srcs = ["worker_service.proto"], + has_services = 1, + cc_api_version = 2, + cc_stubby_versions = ["2"], + protodeps = [":worker_proto"], +) + +tf_proto_library_cc( + name = "master_proto", + srcs = ["master.proto"], + cc_api_version = 2, + protodeps = tf_additional_all_protos(), + visibility = ["//tensorflow:internal"], +) + +tf_proto_library_cc( + name = "master_service_proto", + srcs = ["master_service.proto"], + has_services = 1, + cc_api_version = 2, + cc_stubby_versions = ["2"], + protodeps = [":master_proto"], +) + +tf_proto_library_cc( + name = "eager_service_proto", + srcs = ["eager_service.proto"], + has_services = 1, + cc_api_version = 2, + cc_grpc_version = 1, + cc_stubby_versions = ["2"], + protodeps = tf_additional_all_protos(), +) + +tf_proto_library_cc( + name = "replay_log_proto", + srcs = ["replay_log.proto"], + cc_api_version = 2, + protodeps = [ + ":master_proto", + ] + tf_additional_all_protos(), +) + +tf_proto_library( + name = "error_codes_proto_impl", + srcs = ["error_codes.proto"], + cc_api_version = 2, + make_default_target_header_only = True, +) + +exports_files( + srcs = ["error_codes.proto"] + COMMON_PROTO_SRCS + [ + # Protos which are not needed on mobile builds, but should be included + # in protos_all. + # + # Note that some protos are in neither core_proto_srcs nor this + # filegroup; e.g. ones with individual proto_library targets. + "control_flow.proto", + # TODO(ebrevdo): Re-enable once CriticalSection is in core. + # "critical_section.proto", + "data/experimental/snapshot.proto", + "debug_event.proto", + "meta_graph.proto", + "named_tensor.proto", + "remote_tensor_handle.proto", + "saved_model.proto", + "saved_object_graph.proto", + "struct.proto", + "tensorflow_server.proto", + "trackable_object_graph.proto", + "transport_options.proto", + ], +) + +tf_proto_library( + name = "for_core_protos", + srcs = COMMON_PROTO_SRCS + [ + # Protos which are not needed on mobile builds, but should be included + # in protos_all. + # + # Note that some protos are in neither core_proto_srcs nor this + # filegroup; e.g. ones with individual proto_library targets. + "control_flow.proto", + # TODO(ebrevdo): Re-enable once CriticalSection is in core. + # "critical_section.proto", + "data/experimental/snapshot.proto", + "debug_event.proto", + "meta_graph.proto", + "named_tensor.proto", + "remote_tensor_handle.proto", + "saved_model.proto", + "saved_object_graph.proto", + "struct.proto", + "tensorflow_server.proto", + "trackable_object_graph.proto", + "transport_options.proto", + ], + cc_api_version = 2, + make_default_target_header_only = True, + protodeps = [ + ":error_codes_proto_impl", + "//tensorflow/core/framework:protos_all", + ], +) diff --git a/tensorflow/core/protobuf/remote_tensor_handle.proto b/tensorflow/core/protobuf/remote_tensor_handle.proto index 10995226a9b..36e3f810b73 100644 --- a/tensorflow/core/protobuf/remote_tensor_handle.proto +++ b/tensorflow/core/protobuf/remote_tensor_handle.proto @@ -21,11 +21,11 @@ message RemoteTensorHandle { int64 op_id = 1; // The index into the outputs of the operation that produced this tensor. int32 output_num = 2; - // Device of the operation that produced this tensor. Cannot be empty. + // Device where the tensor is located. Cannot be empty. // For multi-device functions, it's the default device passed to placer. string device = 3; - // Device where the tensor is located. Can be empty if the operation producing - // this tensor is a multi-device function. + // Device of the operation producing this tensor. Can be empty if the + // operation producing this tensor is a multi-device function. string op_device = 4; // Tensor type. DataType dtype = 5; diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 4df199d935b..68df6a1b632 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -21,7 +21,7 @@ limitations under the License. // Also update tensorflow/tensorflow.bzl and // tensorflow/tools/pip_package/setup.py #define TF_MAJOR_VERSION 2 -#define TF_MINOR_VERSION 1 +#define TF_MINOR_VERSION 2 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 394 // Updated: 2020/5/7 +#define TF_GRAPH_DEF_VERSION 400 // Updated: 2020/5/13 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 6184f52d240..4ea5fc39929 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -1,6 +1,10 @@ # Description: Utilities for TPU Operations package( + default_visibility = [ + "//tensorflow/core/tpu:__subpackages__", + "//tensorflow/stream_executor/tpu:__subpackages__", + ], licenses = ["notice"], # Apache 2.0 ) @@ -32,3 +36,26 @@ cc_library( "//tensorflow/core/protobuf/tpu:tpu_embedding_output_layout_proto_cc", ], ) + +cc_library( + name = "tpu_defs", + srcs = ["tpu_defs.cc"], + hdrs = ["tpu_defs.h"], +) + +cc_library( + name = "tpu_init_mode", + srcs = ["tpu_init_mode.cc"], + hdrs = ["tpu_init_mode.h"], + deps = [ + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "tpu_config_c_api", + hdrs = ["tpu_config_c_api.h"], + deps = [ + "//tensorflow/c:tf_status", + ], +) diff --git a/tensorflow/core/tpu/tpu_config_c_api.h b/tensorflow/core/tpu/tpu_config_c_api.h new file mode 100644 index 00000000000..334a6a19325 --- /dev/null +++ b/tensorflow/core/tpu/tpu_config_c_api.h @@ -0,0 +1,54 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_TPU_CONFIG_C_API_H_ +#define TENSORFLOW_CORE_TPU_TPU_CONFIG_C_API_H_ + +#include + +#include "tensorflow/c/tf_status.h" + +typedef struct TpuSerializedProto TpuSerializedProto; + +extern "C" { + +bool TPUHostInitialized(); + +// TODO(frankchn): Modify API to take in raw values instead of Tensors. +void ConfigureDistributedTpuOp_DoWork(size_t input_size, + TpuSerializedProto** inputs, + TpuSerializedProto* output, + TF_Status* status); + +void WaitForDistributedTpuOp_DoWork(size_t input_size, + TpuSerializedProto** inputs, + TpuSerializedProto* output, + TF_Status* status); + +void ShutdownDistributedTpuOp_DoWork(TF_Status* status); + +void InitializeHostForDistributedTpuOp_DoWork( + size_t input_size, TpuSerializedProto** inputs, + bool enable_whole_mesh_compilations, TpuSerializedProto* output, + TF_Status* status); + +void SetGlobalTPUArrayOp_DoWork(size_t input_size, TpuSerializedProto** inputs, + TF_Status* status); + +void DisconnectDistributedTpuChipsOp_DoWork(TpuSerializedProto* output, + TF_Status* status); +} + +#endif // TENSORFLOW_CORE_TPU_TPU_CONFIG_C_API_H_ diff --git a/tensorflow/core/tpu/tpu_defs.cc b/tensorflow/core/tpu/tpu_defs.cc new file mode 100644 index 00000000000..dc370ea2ba7 --- /dev/null +++ b/tensorflow/core/tpu/tpu_defs.cc @@ -0,0 +1,28 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/tpu_defs.h" + +namespace tensorflow { + +const char* const DEVICE_TPU_NODE = "TPU"; +const char* const TPU_FAST_MEM_ATTR = "_TPU_FAST_MEM"; +const char* const DEVICE_TPU_REPLICATED_CORE = "TPU_REPLICATED_CORE"; +const char* const DEVICE_TPU_SYSTEM = "TPU_SYSTEM"; +const char* const DEVICE_TPU_XLA_JIT = "XLA_TPU_JIT"; +const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR = + "_mirrored_variable_indices"; + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h new file mode 100644 index 00000000000..b2a6e3ce303 --- /dev/null +++ b/tensorflow/core/tpu/tpu_defs.h @@ -0,0 +1,48 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Common definitions related to TPUs. + +#ifndef TENSORFLOW_CORE_TPU_TPU_DEFS_H_ +#define TENSORFLOW_CORE_TPU_TPU_DEFS_H_ + +namespace tensorflow { + +// Name of the TPU device, which corresponds to a single core. +extern const char* const DEVICE_TPU_NODE; // "TPU"; + +// The TPU_REPLICATED_CORE device is a virtual device corresponding to one core +// of a replicated TPU computation. Only valid within the body of a +// TPUReplicate computation. +extern const char* const DEVICE_TPU_REPLICATED_CORE; + +extern const char* const DEVICE_TPU_SYSTEM; // "TPU_SYSTEM"; + +// Name of the XLA_TPU_JIT compilation device, which is an internal device to +// compile graphs for TPU. Not registered as a device; no operators can be +// assigned to this device by a user. +extern const char* const DEVICE_TPU_XLA_JIT; // "XLA_TPU_JIT"; + +// Attribute used internally to pass "is_mirrored_variable" attribute on +// TPUReplicatedInput nodes to _TPUReplicate. +extern const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR; + +// Attribute used internally to annoate ops which might consume TPU FastMem +// variable. +extern const char* const TPU_FAST_MEM_ATTR; // "_TPU_FAST_MEM" + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_TPU_DEFS_H_ diff --git a/tensorflow/core/tpu/tpu_init_mode.cc b/tensorflow/core/tpu/tpu_init_mode.cc new file mode 100644 index 00000000000..42952df29d8 --- /dev/null +++ b/tensorflow/core/tpu/tpu_init_mode.cc @@ -0,0 +1,66 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/tpu_init_mode.h" + +#include + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +namespace { + +mutex init_mode_mutex(LINKER_INITIALIZED); +TPUInitMode init_mode TF_GUARDED_BY(init_mode_mutex); + +} // namespace + +namespace test { + +void ForceSetTPUInitMode(const TPUInitMode mode) { + mutex_lock l(init_mode_mutex); + init_mode = mode; +} + +} // namespace test + +Status SetTPUInitMode(const TPUInitMode mode) { + if (mode == TPUInitMode::kNone) { + return errors::InvalidArgument("State cannot be set to: ", + static_cast(mode)); + } + { + mutex_lock l(init_mode_mutex); + if (init_mode != TPUInitMode::kNone && mode != init_mode) { + return errors::FailedPrecondition( + "TPUInit already attempted with mode: ", static_cast(init_mode), + " and cannot be changed to: ", static_cast(mode), + ". You are most probably trying to initialize the TPU system, both " + "using the explicit API and using an initialization Op within the " + "graph; please choose one. "); + } + init_mode = mode; + } + return Status::OK(); +} + +TPUInitMode GetTPUInitMode() { + mutex_lock l(init_mode_mutex); + return init_mode; +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_init_mode.h b/tensorflow/core/tpu/tpu_init_mode.h new file mode 100644 index 00000000000..73ca68ad8a0 --- /dev/null +++ b/tensorflow/core/tpu/tpu_init_mode.h @@ -0,0 +1,47 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_TPU_INIT_MODE_H_ +#define TENSORFLOW_CORE_TPU_TPU_INIT_MODE_H_ + +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +enum class TPUInitMode : int { kNone, kGlobal, kRegular }; + +// Sets the TPU initialization mode appropriately. +// +// Requires that mode is not kNone, and mode doesn't transition kGlobal +// <-> kRegular. +// +// IMPLEMENTATION DETAILS: +// Used internally to record the current mode and type of API used for TPU +// initialization in a global static variable. +Status SetTPUInitMode(TPUInitMode mode); + +// Returns the current TPUInitMode. +TPUInitMode GetTPUInitMode(); + +namespace test { + +// Forces the tpu init mode to be changed. +void ForceSetTPUInitMode(TPUInitMode mode); + +} // namespace test + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_TPU_INIT_MODE_H_ diff --git a/tensorflow/core/util/debug_events_writer.cc b/tensorflow/core/util/debug_events_writer.cc index 595f92d07c0..d9c3393ce3c 100644 --- a/tensorflow/core/util/debug_events_writer.cc +++ b/tensorflow/core/util/debug_events_writer.cc @@ -179,7 +179,7 @@ Status DebugEventsWriter::Init() { metadata->set_tensorflow_version(TF_VERSION_STRING); metadata->set_file_version( strings::Printf("%s%d", kVersionPrefix, kCurrentFormatVersion)); - SerializeAndWriteDebugEvent(&debug_event, METADATA); + TF_RETURN_IF_ERROR(SerializeAndWriteDebugEvent(&debug_event, METADATA)); TF_RETURN_WITH_CONTEXT_IF_ERROR( metadata_writer_->Flush(), "Failed to flush debug event metadata writer"); @@ -189,38 +189,38 @@ Status DebugEventsWriter::Init() { return Status::OK(); } -void DebugEventsWriter::WriteSourceFile(SourceFile* source_file) { +Status DebugEventsWriter::WriteSourceFile(SourceFile* source_file) { DebugEvent debug_event; debug_event.set_allocated_source_file(source_file); - SerializeAndWriteDebugEvent(&debug_event, SOURCE_FILES); + return SerializeAndWriteDebugEvent(&debug_event, SOURCE_FILES); } -void DebugEventsWriter::WriteStackFrameWithId( +Status DebugEventsWriter::WriteStackFrameWithId( StackFrameWithId* stack_frame_with_id) { DebugEvent debug_event; debug_event.set_allocated_stack_frame_with_id(stack_frame_with_id); - SerializeAndWriteDebugEvent(&debug_event, STACK_FRAMES); + return SerializeAndWriteDebugEvent(&debug_event, STACK_FRAMES); } -void DebugEventsWriter::WriteGraphOpCreation( +Status DebugEventsWriter::WriteGraphOpCreation( GraphOpCreation* graph_op_creation) { DebugEvent debug_event; debug_event.set_allocated_graph_op_creation(graph_op_creation); - SerializeAndWriteDebugEvent(&debug_event, GRAPHS); + return SerializeAndWriteDebugEvent(&debug_event, GRAPHS); } -void DebugEventsWriter::WriteDebuggedGraph(DebuggedGraph* debugged_graph) { +Status DebugEventsWriter::WriteDebuggedGraph(DebuggedGraph* debugged_graph) { DebugEvent debug_event; debug_event.set_allocated_debugged_graph(debugged_graph); - SerializeAndWriteDebugEvent(&debug_event, GRAPHS); + return SerializeAndWriteDebugEvent(&debug_event, GRAPHS); } -void DebugEventsWriter::WriteExecution(Execution* execution) { +Status DebugEventsWriter::WriteExecution(Execution* execution) { if (circular_buffer_size_ <= 0) { // No cyclic-buffer behavior. DebugEvent debug_event; debug_event.set_allocated_execution(execution); - SerializeAndWriteDebugEvent(&debug_event, EXECUTION); + return SerializeAndWriteDebugEvent(&debug_event, EXECUTION); } else { // Circular buffer behavior. DebugEvent debug_event; @@ -234,16 +234,18 @@ void DebugEventsWriter::WriteExecution(Execution* execution) { if (execution_buffer_.size() > circular_buffer_size_) { execution_buffer_.pop_front(); } + return Status::OK(); } } -void DebugEventsWriter::WriteGraphExecutionTrace( +Status DebugEventsWriter::WriteGraphExecutionTrace( GraphExecutionTrace* graph_execution_trace) { + TF_RETURN_IF_ERROR(Init()); if (circular_buffer_size_ <= 0) { // No cyclic-buffer behavior. DebugEvent debug_event; debug_event.set_allocated_graph_execution_trace(graph_execution_trace); - SerializeAndWriteDebugEvent(&debug_event, GRAPH_EXECUTION_TRACES); + return SerializeAndWriteDebugEvent(&debug_event, GRAPH_EXECUTION_TRACES); } else { // Circular buffer behavior. DebugEvent debug_event; @@ -257,15 +259,14 @@ void DebugEventsWriter::WriteGraphExecutionTrace( if (graph_execution_trace_buffer_.size() > circular_buffer_size_) { graph_execution_trace_buffer_.pop_front(); } + return Status::OK(); } } -void DebugEventsWriter::WriteGraphExecutionTrace(const string& tfdbg_context_id, - const string& device_name, - const string& op_name, - int32 output_slot, - int32 tensor_debug_mode, - const Tensor& tensor_value) { +Status DebugEventsWriter::WriteGraphExecutionTrace( + const string& tfdbg_context_id, const string& device_name, + const string& op_name, int32 output_slot, int32 tensor_debug_mode, + const Tensor& tensor_value) { std::unique_ptr trace(new GraphExecutionTrace()); trace->set_tfdbg_context_id(tfdbg_context_id); if (!op_name.empty()) { @@ -279,7 +280,7 @@ void DebugEventsWriter::WriteGraphExecutionTrace(const string& tfdbg_context_id, } trace->set_device_name(device_name); tensor_value.AsProtoTensorContent(trace->mutable_tensor_proto()); - WriteGraphExecutionTrace(trace.release()); + return WriteGraphExecutionTrace(trace.release()); } void DebugEventsWriter::WriteSerializedNonExecutionDebugEvent( @@ -487,8 +488,8 @@ Status DebugEventsWriter::InitNonMetadataFile(DebugEventFileType type) { return Status::OK(); } -void DebugEventsWriter::SerializeAndWriteDebugEvent(DebugEvent* debug_event, - DebugEventFileType type) { +Status DebugEventsWriter::SerializeAndWriteDebugEvent(DebugEvent* debug_event, + DebugEventFileType type) { std::unique_ptr* writer = nullptr; SelectWriter(type, &writer); if (writer != nullptr) { @@ -497,6 +498,11 @@ void DebugEventsWriter::SerializeAndWriteDebugEvent(DebugEvent* debug_event, string str; debug_event->AppendToString(&str); (*writer)->WriteSerializedDebugEvent(str); + return Status::OK(); + } else { + return errors::Internal( + "Unable to find debug events file writer for DebugEventsFileType ", + type); } } diff --git a/tensorflow/core/util/debug_events_writer.h b/tensorflow/core/util/debug_events_writer.h index 6d219d7c9ef..39835adf1a6 100644 --- a/tensorflow/core/util/debug_events_writer.h +++ b/tensorflow/core/util/debug_events_writer.h @@ -119,27 +119,27 @@ class DebugEventsWriter { // The four DebugEvent fields below are written _without_ the circular buffer. // Source file contents are written to the *.source_files file. // Takes ownership of source_file. - void WriteSourceFile(SourceFile* source_file); + Status WriteSourceFile(SourceFile* source_file); // Stack frames are written to the *.code_locations file. // Takes ownership of stack_frame_with_id. - void WriteStackFrameWithId(StackFrameWithId* stack_frame_with_id); + Status WriteStackFrameWithId(StackFrameWithId* stack_frame_with_id); // Graph op creation events are written to the *.graphs file. // Takes ownership of graph_op_creation. - void WriteGraphOpCreation(GraphOpCreation* graph_op_creation); + Status WriteGraphOpCreation(GraphOpCreation* graph_op_creation); // Debugged graphs are written to the *.graphs file. // Takes ownership of debugged_graph. - void WriteDebuggedGraph(DebuggedGraph* debugged_graph); + Status WriteDebuggedGraph(DebuggedGraph* debugged_graph); // The two DebugEvent fields below are written to the circular buffer // and saved to disk only at the FlushExecutionFiles() call. // Execution events (eager execution of an op or a tf.function) are written to // the *.execution file. // Takes ownership of execution. - void WriteExecution(Execution* execution); + Status WriteExecution(Execution* execution); // Graph execution traces (graph-internal tensor values or their summaries) // are written to the *.graph_execution_traces file. // Takes ownership of graph_execution_trace. - void WriteGraphExecutionTrace(GraphExecutionTrace* graph_execution_trace); + Status WriteGraphExecutionTrace(GraphExecutionTrace* graph_execution_trace); // Write a graph execution trace without using a protocol buffer. // Instead, pass the raw values related to the graph execution trace. @@ -155,11 +155,11 @@ class DebugEventsWriter { // tensor_value: The value of the tensor that describes the tensor(s) // that this trace is concerned with. The semantics of this tensor value // depends on the value of `tensor_debug_mode`. - void WriteGraphExecutionTrace(const string& tfdbg_context_id, - const string& device_name, - const string& op_name, int32 output_slot, - int32 tensor_debug_mode, - const Tensor& tensor_value); + Status WriteGraphExecutionTrace(const string& tfdbg_context_id, + const string& device_name, + const string& op_name, int32 output_slot, + int32 tensor_debug_mode, + const Tensor& tensor_value); // Writes a serialized DebugEvent to one of the debug-events files // concerned with the non-execution events: the SOURCE_FILES, STACK_FRAMES @@ -217,8 +217,8 @@ class DebugEventsWriter { // Initialize the TFRecord writer for non-metadata file type. Status InitNonMetadataFile(DebugEventFileType type); - void SerializeAndWriteDebugEvent(DebugEvent* debug_event, - DebugEventFileType type); + Status SerializeAndWriteDebugEvent(DebugEvent* debug_event, + DebugEventFileType type); void SelectWriter(DebugEventFileType type, std::unique_ptr** writer); diff --git a/tensorflow/core/util/debug_events_writer_test.cc b/tensorflow/core/util/debug_events_writer_test.cc index 66cde55864b..bd0c731bc90 100644 --- a/tensorflow/core/util/debug_events_writer_test.cc +++ b/tensorflow/core/util/debug_events_writer_test.cc @@ -263,7 +263,7 @@ TEST_F(DebugEventsWriterTest, WriteSourceFile) { source_file_1->add_lines(""); source_file_1->add_lines("print(tf.constant([42.0]))"); source_file_1->add_lines(""); - writer->WriteSourceFile(source_file_1); + TF_ASSERT_OK(writer->WriteSourceFile(source_file_1)); SourceFile* source_file_2 = new SourceFile(); source_file_2->set_file_path("/home/tf_programs/train.py"); @@ -271,7 +271,7 @@ TEST_F(DebugEventsWriterTest, WriteSourceFile) { source_file_2->add_lines("import tensorflow.keras as keras"); source_file_2->add_lines(""); source_file_2->add_lines("model = keras.Sequential()"); - writer->WriteSourceFile(source_file_2); + TF_ASSERT_OK(writer->WriteSourceFile(source_file_2)); TF_ASSERT_OK(writer->FlushNonExecutionFiles()); TF_ASSERT_OK(writer->Close()); @@ -336,8 +336,8 @@ TEST_F(DebugEventsWriterTest, WriteStackFramesFile) { file_line_col->set_func("my_func"); file_line_col->set_code(" x = x ** 2.0"); - writer->WriteStackFrameWithId(stack_frame_1); - writer->WriteStackFrameWithId(stack_frame_2); + TF_ASSERT_OK(writer->WriteStackFrameWithId(stack_frame_1)); + TF_ASSERT_OK(writer->WriteStackFrameWithId(stack_frame_2)); TF_ASSERT_OK(writer->FlushNonExecutionFiles()); TF_ASSERT_OK(writer->Close()); @@ -382,12 +382,12 @@ TEST_F(DebugEventsWriterTest, WriteGraphOpCreationAndDebuggedGraph) { GraphOpCreation* graph_op_creation = new GraphOpCreation(); graph_op_creation->set_op_type("MatMul"); graph_op_creation->set_op_name("Dense_1/MatMul"); - writer->WriteGraphOpCreation(graph_op_creation); + TF_ASSERT_OK(writer->WriteGraphOpCreation(graph_op_creation)); DebuggedGraph* debugged_graph = new DebuggedGraph(); debugged_graph->set_graph_id("deadbeaf"); debugged_graph->set_graph_name("my_func_graph"); - writer->WriteDebuggedGraph(debugged_graph); + TF_ASSERT_OK(writer->WriteDebuggedGraph(debugged_graph)); TF_ASSERT_OK(writer->FlushNonExecutionFiles()); TF_ASSERT_OK(writer->Close()); @@ -428,7 +428,7 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheSameFile) { SourceFile* source_file = new SourceFile(); source_file->set_file_path(file_path); source_file->set_host_name("localhost.localdomain"); - writer->WriteSourceFile(source_file); + TF_ASSERT_OK(writer->WriteSourceFile(source_file)); }; for (size_t i = 0; i < kConcurrentWrites; ++i) { thread_pool->Schedule(fn); @@ -469,7 +469,7 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteAndFlushCallsToTheSameFile) { SourceFile* source_file = new SourceFile(); source_file->set_file_path(file_path); source_file->set_host_name("localhost.localdomain"); - writer->WriteSourceFile(source_file); + TF_ASSERT_OK(writer->WriteSourceFile(source_file)); TF_ASSERT_OK(writer->FlushNonExecutionFiles()); }; for (size_t i = 0; i < kConcurrentWrites; ++i) { @@ -512,16 +512,16 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheDifferentFiles) { source_file->set_file_path( strings::Printf("/home/tf_programs/program_%.2d.py", index)); source_file->set_host_name("localhost.localdomain"); - writer->WriteSourceFile(source_file); + TF_ASSERT_OK(writer->WriteSourceFile(source_file)); } else if (index % 3 == 1) { StackFrameWithId* stack_frame = new StackFrameWithId(); stack_frame->set_id(strings::Printf("e%.2d", index)); - writer->WriteStackFrameWithId(stack_frame); + TF_ASSERT_OK(writer->WriteStackFrameWithId(stack_frame)); } else { GraphOpCreation* op_creation = new GraphOpCreation(); op_creation->set_op_type("Log"); op_creation->set_op_name(strings::Printf("Log_%.2d", index)); - writer->WriteGraphOpCreation(op_creation); + TF_ASSERT_OK(writer->WriteGraphOpCreation(op_creation)); } }; for (size_t i = 0; i < kConcurrentWrites; ++i) { @@ -586,7 +586,7 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferNoFlush) { Execution* execution = new Execution(); execution->set_op_type("Log"); execution->add_input_tensor_ids(i); - writer->WriteExecution(execution); + TF_ASSERT_OK(writer->WriteExecution(execution)); } std::vector actuals; @@ -611,7 +611,7 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) { Execution* execution = new Execution(); execution->set_op_type("Log"); execution->add_input_tensor_ids(i); - writer->WriteExecution(execution); + TF_ASSERT_OK(writer->WriteExecution(execution)); } TF_ASSERT_OK(writer->FlushExecutionFiles()); @@ -637,7 +637,7 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) { Execution* execution = new Execution(); execution->set_op_type("Abs"); execution->add_input_tensor_ids(counter.fetch_add(1)); - writer->WriteExecution(execution); + TF_ASSERT_OK(writer->WriteExecution(execution)); }; for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) { thread_pool->Schedule(fn); @@ -682,7 +682,7 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) { for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) { GraphExecutionTrace* trace = new GraphExecutionTrace(); trace->set_tfdbg_context_id(strings::Printf("graph_%.2ld", i)); - writer->WriteGraphExecutionTrace(trace); + TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace)); } std::vector actuals; @@ -695,6 +695,31 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) { TF_ASSERT_OK(writer->Close()); } +TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithoutPreviousInitCall) { + const size_t kCyclicBufferSize = -1; + DebugEventsWriter* writer = + DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize); + // NOTE(cais): `writer->Init()` is not called here before + // WriteGraphExecutionTrace() is called. This test checks that this is okay + // and the `GraphExecutionTrace` gets written correctly even without `Init()` + // being called first. This scenario can happen when a TF Graph with tfdbg + // debug ops are executed on a remote TF server. + + GraphExecutionTrace* trace = new GraphExecutionTrace(); + trace->set_tfdbg_context_id(strings::Printf("graph_0")); + TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace)); + TF_ASSERT_OK(writer->FlushExecutionFiles()); + + std::vector actuals; + ReadDebugEventProtos(writer, DebugEventFileType::GRAPH_EXECUTION_TRACES, + &actuals); + EXPECT_EQ(actuals.size(), 1); + EXPECT_EQ(actuals[0].graph_execution_trace().tfdbg_context_id(), "graph_0"); + + // Close the writer so the files can be safely deleted. + TF_ASSERT_OK(writer->Close()); +} + TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) { const size_t kCyclicBufferSize = 10; DebugEventsWriter* writer = @@ -706,7 +731,7 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) { for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) { GraphExecutionTrace* trace = new GraphExecutionTrace(); trace->set_tfdbg_context_id(strings::Printf("graph_%.2ld", i)); - writer->WriteGraphExecutionTrace(trace); + TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace)); } TF_ASSERT_OK(writer->FlushExecutionFiles()); @@ -731,7 +756,7 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) { GraphExecutionTrace* trace = new GraphExecutionTrace(); trace->set_tfdbg_context_id( strings::Printf("new_graph_%.2ld", counter.fetch_add(1))); - writer->WriteGraphExecutionTrace(trace); + TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace)); }; for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) { thread_pool->Schedule(fn); @@ -818,7 +843,7 @@ TEST_F(DebugEventsWriterTest, DisableCyclicBufferBehavior) { Execution* execution = new Execution(); execution->set_op_type("Log"); execution->add_input_tensor_ids(i); - writer->WriteExecution(execution); + TF_ASSERT_OK(writer->WriteExecution(execution)); } TF_ASSERT_OK(writer->FlushExecutionFiles()); @@ -834,7 +859,7 @@ TEST_F(DebugEventsWriterTest, DisableCyclicBufferBehavior) { for (size_t i = 0; i < kNumEvents; ++i) { GraphExecutionTrace* trace = new GraphExecutionTrace(); trace->set_tfdbg_context_id(strings::Printf("graph_%.2ld", i)); - writer->WriteGraphExecutionTrace(trace); + TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace)); } TF_ASSERT_OK(writer->FlushExecutionFiles()); diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index b537cc30190..53aa48bd33c 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -1950,8 +1950,8 @@ func GatherV2BatchDims(value int64) GatherV2Attr { // Gather slices from `params` axis `axis` according to `indices`. // // `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `params.shape[:axis] + indices.shape + -// params.shape[axis + 1:]` where: +// Produces an output tensor with shape `params.shape[:axis] + +// indices.shape[batch_dims:] + params.shape[axis + 1:]` where: // // ```python // # Scalar indices (output is rank(params) - 1). @@ -12059,7 +12059,7 @@ func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBo // // value: The cropped area of the image must have an aspect ratio = // width / height within this range. -// If not specified, defaults to {f:0.75 f:1.33} +// If not specified, defaults to {f:0.75 f:1.33} func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { m["aspect_ratio_range"] = value @@ -12070,7 +12070,7 @@ func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistorted // // value: The cropped area of the image must contain a fraction of the // supplied image within this range. -// If not specified, defaults to {f:0.05 f:1} +// If not specified, defaults to {f:0.05 f:1} func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { m["area_range"] = value @@ -18975,7 +18975,7 @@ func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2 // // value: The cropped area of the image must have an aspect ratio = // width / height within this range. -// If not specified, defaults to {f:0.75 f:1.33} +// If not specified, defaults to {f:0.75 f:1.33} func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { m["aspect_ratio_range"] = value @@ -18986,7 +18986,7 @@ func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistort // // value: The cropped area of the image must contain a fraction of the // supplied image within this range. -// If not specified, defaults to {f:0.05 f:1} +// If not specified, defaults to {f:0.05 f:1} func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { m["area_range"] = value @@ -19390,7 +19390,7 @@ func ImageSummaryMaxImages(value int64) ImageSummaryAttr { // ImageSummaryBadColor sets the optional bad_color attribute to value. // // value: Color to use for pixels with non-finite values. -// If not specified, defaults to {dtype:DT_UINT8 tensor_shape:{dim:{size:4}} int_val:255 int_val:0 int_val:0 int_val:255} +// If not specified, defaults to {dtype:DT_UINT8 tensor_shape:{dim:{size:4}} int_val:255 int_val:0 int_val:0 int_val:255} func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { return func(m optionalAttr) { m["bad_color"] = value @@ -20461,7 +20461,7 @@ func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr { // filter element on that dimension. The dimension order is determined by the // value of `data_format`, see above for details. Dilations in the batch and // depth dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr { return func(m optionalAttr) { m["dilations"] = value @@ -21633,7 +21633,7 @@ func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr { // element on that dimension. The dimension order is determined by the value of // `data_format`, see above for details. Dilations in the batch and depth // dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr { return func(m optionalAttr) { m["dilations"] = value @@ -22341,7 +22341,7 @@ func Conv2DDataFormat(value string) Conv2DAttr { // filter element on that dimension. The dimension order is determined by the // value of `data_format`, see above for details. Dilations in the batch and // depth dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func Conv2DDilations(value []int64) Conv2DAttr { return func(m optionalAttr) { m["dilations"] = value @@ -22537,7 +22537,7 @@ func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType(value tf.DataTy // QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations sets the optional dilations attribute to value. // // value: List of dilation values. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { return func(m optionalAttr) { m["dilations"] = value @@ -22606,7 +22606,7 @@ func QuantizedDepthwiseConv2DWithBiasAndReluOutType(value tf.DataType) Quantized // QuantizedDepthwiseConv2DWithBiasAndReluDilations sets the optional dilations attribute to value. // // value: List of dilation values. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func QuantizedDepthwiseConv2DWithBiasAndReluDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr { return func(m optionalAttr) { m["dilations"] = value @@ -22721,7 +22721,7 @@ func QuantizedDepthwiseConv2DWithBiasOutType(value tf.DataType) QuantizedDepthwi // QuantizedDepthwiseConv2DWithBiasDilations sets the optional dilations attribute to value. // // value: List of dilation values. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func QuantizedDepthwiseConv2DWithBiasDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAttr { return func(m optionalAttr) { m["dilations"] = value @@ -22780,7 +22780,7 @@ func QuantizedDepthwiseConv2DOutType(value tf.DataType) QuantizedDepthwiseConv2D // QuantizedDepthwiseConv2DDilations sets the optional dilations attribute to value. // // value: List of dilation values. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func QuantizedDepthwiseConv2DDilations(value []int64) QuantizedDepthwiseConv2DAttr { return func(m optionalAttr) { m["dilations"] = value @@ -22954,7 +22954,7 @@ func QuantizedConv2DPerChannelOutType(value tf.DataType) QuantizedConv2DPerChann // QuantizedConv2DPerChannelDilations sets the optional dilations attribute to value. // // value: list of dilation values. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func QuantizedConv2DPerChannelDilations(value []int64) QuantizedConv2DPerChannelAttr { return func(m optionalAttr) { m["dilations"] = value @@ -23331,7 +23331,7 @@ func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr { // filter element on that dimension. The dimension order is determined by the // value of `data_format`, see above for details. Dilations in the batch and // depth dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr { return func(m optionalAttr) { m["dilations"] = value @@ -25651,7 +25651,7 @@ func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksi type Conv3DBackpropFilterAttr func(optionalAttr) // Conv3DBackpropFilterDilations sets the optional dilations attribute to value. -// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr { return func(m optionalAttr) { m["dilations"] = value @@ -25714,7 +25714,7 @@ func Conv3DDataFormat(value string) Conv3DAttr { // filter element on that dimension. The dimension order is determined by the // value of `data_format`, see above for details. Dilations in the batch and // depth dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} func Conv3DDilations(value []int64) Conv3DAttr { return func(m optionalAttr) { m["dilations"] = value @@ -25965,7 +25965,7 @@ func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dN // element on that dimension. The dimension order is determined by the value of // `data_format`, see above for details. Dilations in the batch and depth // dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { return func(m optionalAttr) { m["dilations"] = value @@ -26449,7 +26449,7 @@ func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { // filter element on that dimension. The dimension order is determined by the // value of `data_format`, see above for details. Dilations in the batch and // depth dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { return func(m optionalAttr) { m["dilations"] = value @@ -45537,7 +45537,7 @@ func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2d // element on that dimension. The dimension order is determined by the value of // `data_format`, see above for details. Dilations in the batch and depth // dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { return func(m optionalAttr) { m["dilations"] = value @@ -47477,7 +47477,7 @@ func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumula type Conv3DBackpropInputAttr func(optionalAttr) // Conv3DBackpropInputDilations sets the optional dilations attribute to value. -// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1} func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr { return func(m optionalAttr) { m["dilations"] = value @@ -47548,7 +47548,7 @@ func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr { // element on that dimension. The dimension order is determined by the value of // `data_format`, see above for details. Dilations in the batch and depth // dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr { return func(m optionalAttr) { m["dilations"] = value @@ -48537,7 +48537,7 @@ func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr { // element on that dimension. The dimension order is determined by the value of // `data_format`, see above for details. Dilations in the batch and depth // dimensions must be 1. -// If not specified, defaults to {i:1 i:1 i:1 i:1} +// If not specified, defaults to {i:1 i:1 i:1 i:1} func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr { return func(m optionalAttr) { m["dilations"] = value diff --git a/tensorflow/go/saved_model.go b/tensorflow/go/saved_model.go index 7aa1e83cbc4..64ae82e3b01 100644 --- a/tensorflow/go/saved_model.go +++ b/tensorflow/go/saved_model.go @@ -22,7 +22,7 @@ import ( "unsafe" "github.com/golang/protobuf/proto" - corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto" + corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto" ) // #include diff --git a/tensorflow/go/signature.go b/tensorflow/go/signature.go index 8aac0e2ec93..c2db0c75247 100644 --- a/tensorflow/go/signature.go +++ b/tensorflow/go/signature.go @@ -16,7 +16,7 @@ limitations under the License. package tensorflow -import corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto" +import corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto" // #include "tensorflow/c/c_api.h" import "C" diff --git a/tensorflow/go/signature_test.go b/tensorflow/go/signature_test.go index e6927f3cebd..f9fa8427819 100644 --- a/tensorflow/go/signature_test.go +++ b/tensorflow/go/signature_test.go @@ -20,9 +20,9 @@ import ( "fmt" "testing" - corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto" tspb "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/tensor_shape_go_proto" typb "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/types_go_proto" + corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto" ) func TestSignatureFromProto(t *testing.T) { diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index a2ab4854165..14babee2da7 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -253,6 +253,7 @@ cc_library( "//tensorflow/lite/core/api", "//tensorflow/lite/delegates/nnapi:nnapi_delegate", "//tensorflow/lite/experimental/resource", + "//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/nnapi:nnapi_implementation", "//tensorflow/lite/schema:schema_fbs", ] + select({ diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl index f6cdb981328..4af4bd4aae8 100644 --- a/tensorflow/lite/build_def.bzl +++ b/tensorflow/lite/build_def.bzl @@ -702,6 +702,7 @@ def gen_model_coverage_test(src, model_name, data, failure_type, tags, size = "m "//tensorflow/lite/python:lite", "//tensorflow/python:client_testlib", ] + flex_dep(target_op_sets), + timeout = "long", ) def if_tflite_experimental_runtime(if_eager, if_non_eager, if_none = []): diff --git a/tensorflow/lite/c/common.c b/tensorflow/lite/c/common.c index f70a60002dd..e6b47896528 100644 --- a/tensorflow/lite/c/common.c +++ b/tensorflow/lite/c/common.c @@ -79,7 +79,8 @@ TfLiteFloatArray* TfLiteFloatArrayCreate(int size) { void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); } void TfLiteTensorDataFree(TfLiteTensor* t) { - if (t->allocation_type == kTfLiteDynamic) { + if (t->allocation_type == kTfLiteDynamic || + t->allocation_type == kTfLitePersistentRo) { free(t->data.raw); } t->data.raw = NULL; @@ -172,7 +173,8 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, } void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { - if (tensor->allocation_type != kTfLiteDynamic) { + if (tensor->allocation_type != kTfLiteDynamic && + tensor->allocation_type != kTfLitePersistentRo) { return; } // TODO(b/145340303): Tensor data should be aligned. diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 12ddf9945fd..ab150e87d93 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -29,6 +29,9 @@ limitations under the License. // TfLiteDelegate - allows delegation of nodes to alternative backends. // // Some abstractions in this file are created and managed by Interpreter. +// +// NOTE: The order of values in these structs are "semi-ABI stable". New values +// should be added only to the end of structs and never reordered. #ifndef TENSORFLOW_LITE_C_COMMON_H_ #define TENSORFLOW_LITE_C_COMMON_H_ @@ -318,15 +321,23 @@ typedef union TfLitePtrUnion { void* data; } TfLitePtrUnion; -// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped -// data (or data externally allocated). kTfLiteArenaRw is arena allocated -// data. kTfLiteDynamic is for tensors that are allocated during evaluation. +// Memory allocation strategies. +// * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated. +// * kTfLiteArenaRw: Arena allocated with no guarantees about persistence, +// and available during eval. +// * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and +// only available during eval. +// * kTfLiteDynamic: Allocated during eval, or for string tensors. +// * kTfLitePersistentRo: Allocated and populated during prepare. This is +// useful for tensors that can be computed during prepare and treated +// as constant inputs for downstream ops (also in prepare). typedef enum TfLiteAllocationType { kTfLiteMemNone = 0, kTfLiteMmapRo, kTfLiteArenaRw, kTfLiteArenaRwPersistent, kTfLiteDynamic, + kTfLitePersistentRo, } TfLiteAllocationType; // The delegates should use zero or positive integers to represent handles. diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index 4cebd059a80..7f4e0e286ea 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -1183,7 +1183,8 @@ TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor, // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too. if (tensor->allocation_type == kTfLiteArenaRw || tensor->allocation_type == kTfLiteDynamic || - tensor->allocation_type == kTfLiteArenaRwPersistent) { + tensor->allocation_type == kTfLiteArenaRwPersistent || + tensor->allocation_type == kTfLitePersistentRo) { tensor_resized_since_op_invoke_ |= TfLiteIntArrayEqual(tensor->dims, new_size) == 0; if (tensor->type != kTfLiteString) { @@ -1195,14 +1196,16 @@ TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor, return kTfLiteError; } - // Realloc space for kTfLiteDynamic tensors. + // Realloc space for heap-allocated tensors. TfLiteTensorRealloc(bytesRequired, tensor); tensor->bytes = bytesRequired; } if (tensor->dims) TfLiteIntArrayFree(tensor->dims); tensor->dims = new_size; - if (tensor->allocation_type != kTfLiteDynamic) { + // Reset arena-allocated tensors; they will be allocated later. + if (tensor->allocation_type == kTfLiteArenaRw || + tensor->allocation_type == kTfLiteArenaRwPersistent) { tensor->data.raw = nullptr; } } else { diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD index 9fe80605e39..d69d2207e63 100644 --- a/tensorflow/lite/delegates/flex/BUILD +++ b/tensorflow/lite/delegates/flex/BUILD @@ -26,7 +26,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib_lite", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/c:c_api_internal", @@ -66,7 +66,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], "//conditions:default": [ "//tensorflow/core:tensorflow", @@ -103,7 +103,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib_lite", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/core:lib", @@ -137,7 +137,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib_lite", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/core/common_runtime/eager:context", @@ -183,7 +183,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib_lite", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/core/common_runtime/eager:context", @@ -211,7 +211,7 @@ tf_cc_test( "//tensorflow/core:android_tensorflow_lib", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], "//conditions:default": [ "//tensorflow/core:tensorflow", @@ -245,7 +245,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib_lite", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/c:c_api_internal", diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD index 099f653a1b8..2581232bc2b 100644 --- a/tensorflow/lite/delegates/gpu/BUILD +++ b/tensorflow/lite/delegates/gpu/BUILD @@ -167,7 +167,7 @@ ios_static_framework( "metal_delegate.h", "metal_delegate_internal.h", ], - minimum_os_version = "10.0", + minimum_os_version = "11.0", deps = [":metal_delegate"], ) diff --git a/tensorflow/lite/delegates/gpu/cl/api.cc b/tensorflow/lite/delegates/gpu/cl/api.cc index 7ffb5604d83..475eed4dccc 100644 --- a/tensorflow/lite/delegates/gpu/cl/api.cc +++ b/tensorflow/lite/delegates/gpu/cl/api.cc @@ -352,10 +352,10 @@ class GlBufferHolder : public TensorTie { }; TensorObject TensorToObj(const Tensor& tensor) { - if (tensor.StorageType() == TensorStorageType::BUFFER) { + if (tensor.GetStorageType() == TensorStorageType::BUFFER) { return OpenClBuffer{tensor.GetMemoryPtr()}; } - if (tensor.StorageType() == TensorStorageType::IMAGE_BUFFER) { + if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) { return OpenClBuffer{tensor.GetMemoryPtrForWriting()}; } return OpenClTexture{tensor.GetMemoryPtr()}; @@ -516,9 +516,9 @@ TensorObjectDef TensorToDef(const Tensor& tensor) { def.dimensions.h = tensor.Height(); def.dimensions.w = tensor.Width(); def.dimensions.c = tensor.Channels(); - def.object_def.data_layout = ToDataLayout(tensor.StorageType()); - def.object_def.data_type = tensor.DataType(); - def.object_def.object_type = ToObjectType(tensor.StorageType()); + def.object_def.data_layout = ToDataLayout(tensor.GetStorageType()); + def.object_def.data_type = tensor.GetDataType(); + def.object_def.object_type = ToObjectType(tensor.GetStorageType()); def.object_def.user_provided = false; return def; } diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.cc b/tensorflow/lite/delegates/gpu/cl/tensor.cc index f01975e2347..4a52508af0e 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc @@ -29,7 +29,7 @@ namespace cl { namespace { absl::Status CreateImageBufferFromBuffer(const CLContext& context, - cl_mem memory, enum DataType data_type, + cl_mem memory, DataType data_type, int width, cl_mem* result) { cl_image_format format; cl_image_desc desc; diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.h b/tensorflow/lite/delegates/gpu/cl/tensor.h index d59ef838888..cb7d4263a5c 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.h +++ b/tensorflow/lite/delegates/gpu/cl/tensor.h @@ -75,8 +75,8 @@ class Tensor { int4 GetWHSB() const { return int4(shape_.w, shape_.h, Slices(), shape_.b); } int4 GetWHDS() const { return int4(shape_.w, shape_.h, shape_.d, Slices()); } - enum DataType DataType() const { return descriptor_.data_type; } - TensorStorageType StorageType() const { return descriptor_.storage_type; } + DataType GetDataType() const { return descriptor_.data_type; } + TensorStorageType GetStorageType() const { return descriptor_.storage_type; } // for profiling and memory statistics uint64_t GetMemorySizeInBytes() const; diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 6f0a13bc1bd..46856a70a7c 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -2421,6 +2421,40 @@ class TransformLandmarksOperationParser : public TFLiteOperationParser { private: }; +class TransformLandmarksV2OperationParser : public TFLiteOperationParser { + public: + absl::Status IsSupported(const TfLiteContext* context, + const TfLiteNode* tflite_node, + const TfLiteRegistration* registration) final { + RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node, + /*runtime_inputs=*/2, /*outputs=*/1)); + return absl::OkStatus(); + } + + absl::Status Parse(const TfLiteNode* tflite_node, + const TfLiteRegistration* registration, + GraphFloat32* graph, ObjectReader* reader) final { + Node* node = graph->NewNode(); + RETURN_IF_ERROR(reader->AddInput(node, 0)); // data + RETURN_IF_ERROR(reader->AddInput(node, 1)); // bbox + RETURN_IF_ERROR(reader->AddOutputs(node)); + std::string op_name = "transform_landmarks_v2"; + node->operation.type = op_name; + BHWC output_shape; + RETURN_IF_ERROR( + ParseCustomAttributes(op_name, tflite_node->custom_initial_data, + tflite_node->custom_initial_data_size, + &(node->operation.attributes), &output_shape)); + + auto output_value = graph->FindOutputs(node->id)[0]; + + output_value->tensor.shape = graph->FindInputs(node->id)[0]->tensor.shape; + return absl::OkStatus(); + } + + private: +}; + class Landmarks2TransformMatrixOperationParser : public TFLiteOperationParser { public: absl::Status IsSupported(const TfLiteContext* context, @@ -2451,6 +2485,37 @@ class Landmarks2TransformMatrixOperationParser : public TFLiteOperationParser { } }; +class Landmarks2TransformMatrixV2OperationParser + : public TFLiteOperationParser { + public: + absl::Status IsSupported(const TfLiteContext* context, + const TfLiteNode* tflite_node, + const TfLiteRegistration* registration) final { + return CheckInputsOutputs(context, tflite_node, /*runtime_inputs=*/1, + /*outputs=*/1); + } + + absl::Status Parse(const TfLiteNode* tflite_node, + const TfLiteRegistration* registration, + GraphFloat32* graph, ObjectReader* reader) final { + Node* node = graph->NewNode(); + RETURN_IF_ERROR(reader->AddInput(node, 0)); // landmarks + RETURN_IF_ERROR(reader->AddOutputs(node)); // transform matrix + + const std::string op_name = "landmarks_to_transform_matrix_v2"; + node->operation.type = op_name; + BHWC output_shape; + RETURN_IF_ERROR( + ParseCustomAttributes(op_name, tflite_node->custom_initial_data, + tflite_node->custom_initial_data_size, + &(node->operation.attributes), &output_shape)); + + auto output_value = graph->FindOutputs(node->id)[0]; + output_value->tensor.shape = output_shape; + return absl::OkStatus(); + } +}; + class AlignmentPointsToTransformMatrixOperationParser : public TFLiteOperationParser { public: @@ -2672,9 +2737,15 @@ std::unique_ptr NewOperationParser( if (custom_name == "TransformLandmarks") { return std::make_unique(); } + if (custom_name == "TransformLandmarksV2") { + return std::make_unique(); + } if (custom_name == "Landmarks2TransformMatrix") { return std::make_unique(); } + if (custom_name == "Landmarks2TransformMatrixV2") { + return std::make_unique(); + } if (custom_name == "AlignmentPointsToTransformMatrix") { return std::make_unique< AlignmentPointsToTransformMatrixOperationParser>(); diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index 28ce67b1ce3..bdcf6f605cc 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -506,6 +506,14 @@ BHWC CalculateOutputShape(const BHWC& input, const PadAttributes& attr) { attr.appended.c + attr.prepended.c + input.c); } +BHWDC CalculateOutputShape(const BHWDC& input, const Pad3DAttributes& attr) { + return BHWDC(attr.appended.b + attr.prepended.b + input.b, + attr.appended.h + attr.prepended.h + input.h, + attr.appended.w + attr.prepended.w + input.w, + attr.appended.d + attr.prepended.d + input.d, + attr.appended.c + attr.prepended.c + input.c); +} + BHWC CalculateOutputShape(const BHWC& input, const FullyConnectedAttributes& attr) { return BHWC(input.b, 1, 1, attr.weights.shape.o); @@ -562,6 +570,62 @@ absl::Status CalculateOutputShape(const std::vector& input, return absl::OkStatus(); } +absl::Status CalculateOutputShape(const std::vector& input, + const ConcatAttributes& attr, + BHWDC* output_shape) { + BHWDC new_shape = input[0]; + switch (attr.axis) { + case Axis::CHANNELS: + for (int i = 1; i < input.size(); ++i) { + if (input[i].h != new_shape.h || input[i].w != new_shape.w || + input[i].d != new_shape.d) { + return absl::InvalidArgumentError( + "Height, Width and Depth must be the same when concatenating " + "by channels axis"); + } + new_shape.c += input[i].c; + } + break; + case Axis::HEIGHT: + for (int i = 1; i < input.size(); ++i) { + if (input[i].w != new_shape.w || input[i].c != new_shape.c || + input[i].d != new_shape.d) { + return absl::InvalidArgumentError( + "Width, Depth and Channels must be the same when concatenating " + "by height axis"); + } + new_shape.h += input[i].h; + } + break; + case Axis::WIDTH: + for (int i = 1; i < input.size(); ++i) { + if (input[i].h != new_shape.h || input[i].c != new_shape.c || + input[i].d != new_shape.d) { + return absl::InvalidArgumentError( + "Height, Depth and Channels must be the same when concatenating " + "by width axis"); + } + new_shape.w += input[i].w; + } + break; + case Axis::DEPTH: + for (int i = 1; i < input.size(); ++i) { + if (input[i].w != new_shape.w || input[i].h != new_shape.h || + input[i].c != new_shape.c) { + return absl::InvalidArgumentError( + "Width, Height and Channels must be the same when concatenating " + "by depth axis"); + } + new_shape.d += input[i].d; + } + break; + default: + return absl::InvalidArgumentError("Invalid axis"); + } + *output_shape = new_shape; + return absl::OkStatus(); +} + Padding2D CalculateSamePadding(const BHWC& input, const Convolution2DAttributes& attr) { return MakeSamePadding(input, attr); diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index 4eb41dfe1a3..d0268eee585 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -206,6 +206,12 @@ absl::Status CalculateOutputShape(const std::vector& input, const ConcatAttributes& attr, BHWC* output_shape); +// @return shape of a tensor after Concat operation is applied to the given +// input. +absl::Status CalculateOutputShape(const std::vector& input, + const ConcatAttributes& attr, + BHWDC* output_shape); + // @return padding for pooling operation to make sure output keep the same shape // as the given input. Padding2D CalculateSamePadding(const BHWC& input, @@ -425,6 +431,17 @@ struct PadAttributes { // @return shape of a tensor after Pad operation is applied to the given input. BHWC CalculateOutputShape(const BHWC& input, const PadAttributes& attr); +struct Pad3DAttributes { + PaddingContentType type = PaddingContentType::ZEROS; + + BHWDC prepended; + BHWDC appended; +}; + +// @return shape of a tensor after Pad3D operation is applied to the given +// input. +BHWDC CalculateOutputShape(const BHWDC& input, const Pad3DAttributes& attr); + struct ConstTensorAttributes { Tensor tensor; }; diff --git a/tensorflow/lite/delegates/gpu/delegate.cc b/tensorflow/lite/delegates/gpu/delegate.cc index 58da8862937..4b6727e66e7 100644 --- a/tensorflow/lite/delegates/gpu/delegate.cc +++ b/tensorflow/lite/delegates/gpu/delegate.cc @@ -263,12 +263,12 @@ class DelegateKernel { input_refs->clear(); output_refs->clear(); - const auto& inputs = graph->inputs(); + const auto inputs = graph->inputs(); input_refs->reserve(inputs.size()); for (const auto& input : inputs) { input_refs->push_back(input->tensor.ref); } - const auto& outputs = graph->outputs(); + const auto outputs = graph->outputs(); output_refs->reserve(outputs.size()); for (const auto& output : outputs) { output_refs->push_back(output->tensor.ref); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD index d2ef617a8e2..700a553a125 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD @@ -73,6 +73,7 @@ cc_library( cc_test( name = "add_test", srcs = ["add_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -102,6 +103,7 @@ cc_library( cc_test( name = "concat_test", srcs = ["concat_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -136,6 +138,7 @@ cc_library( cc_test( name = "conv_test", srcs = ["conv_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -176,6 +179,7 @@ cc_library( cc_test( name = "depthwise_conv_test", srcs = ["depthwise_conv_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -205,6 +209,7 @@ cc_library( cc_test( name = "elementwise_test", srcs = ["elementwise_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -235,6 +240,7 @@ cc_library( cc_test( name = "fully_connected_test", srcs = ["fully_connected_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -263,6 +269,7 @@ cc_library( cc_test( name = "lstm_test", srcs = ["lstm_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -292,6 +299,7 @@ cc_library( cc_test( name = "max_unpooling_test", srcs = ["max_unpooling_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -322,6 +330,7 @@ cc_library( cc_test( name = "mean_test", srcs = ["mean_test.cc"], + linkstatic = True, tags = [ "notap", "tflite_not_portable_ios", @@ -351,6 +360,7 @@ cc_library( cc_test( name = "mul_test", srcs = ["mul_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -380,6 +390,7 @@ cc_library( cc_test( name = "pad_test", srcs = ["pad_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -409,6 +420,7 @@ cc_library( cc_test( name = "pooling_test", srcs = ["pooling_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -440,6 +452,7 @@ cc_library( cc_test( name = "prelu_test", srcs = ["prelu_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -471,6 +484,7 @@ cc_library( cc_test( name = "quantize_and_dequantize_test", srcs = ["quantize_and_dequantize_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -501,6 +515,7 @@ cc_library( cc_test( name = "relu_test", srcs = ["relu_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -529,6 +544,7 @@ cc_library( cc_test( name = "reshape_test", srcs = ["reshape_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -558,6 +574,7 @@ cc_library( cc_test( name = "slice_test", srcs = ["slice_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -589,6 +606,7 @@ cc_library( cc_test( name = "softmax_test", srcs = ["softmax_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -618,6 +636,7 @@ cc_library( cc_test( name = "space_to_depth_test", srcs = ["space_to_depth_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -679,6 +698,7 @@ cc_library( cc_test( name = "transpose_conv_test", srcs = ["transpose_conv_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", @@ -708,6 +728,7 @@ cc_library( cc_test( name = "resize_test", srcs = ["resize_test.cc"], + linkstatic = True, tags = tf_gpu_tests_tags() + [ "notap", "tflite_not_portable_ios", diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD index 192c787b0db..4db8f3d071d 100644 --- a/tensorflow/lite/delegates/gpu/metal/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/BUILD @@ -80,7 +80,7 @@ objc_library( ios_unit_test( name = "common_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -117,7 +117,7 @@ objc_library( ios_unit_test( name = "compiled_model_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -205,7 +205,7 @@ objc_library( ios_unit_test( name = "inference_context_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -235,7 +235,7 @@ ios_application( "iphone", ], infoplists = ["Info.plist"], - minimum_os_version = "10.0", + minimum_os_version = "11.0", provisioning_profile = "//tensorflow/lite/delegates/gpu/metal:provisioning_profile.mobileprovision", tags = tf_gpu_tests_tags() + [ "local", @@ -267,7 +267,7 @@ objc_library( ios_unit_test( name = "ComponentsTests", - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + ["notap"], test_host = ":TestApplication", diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD index a1052b8adf4..657e9b53a59 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD @@ -71,7 +71,7 @@ objc_library( ios_unit_test( name = "add_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -109,7 +109,7 @@ objc_library( ios_unit_test( name = "concat_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -151,7 +151,7 @@ objc_library( ios_unit_test( name = "conv_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -213,7 +213,7 @@ objc_library( ios_unit_test( name = "depthwise_conv_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -253,7 +253,7 @@ objc_library( ios_unit_test( name = "elementwise_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -293,7 +293,7 @@ objc_library( ios_unit_test( name = "fully_connected_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -332,7 +332,7 @@ objc_library( ios_unit_test( name = "max_unpooling_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -371,7 +371,7 @@ objc_library( ios_unit_test( name = "mean_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = [ "notap", @@ -450,7 +450,7 @@ objc_library( ios_unit_test( name = "padding_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -490,7 +490,7 @@ objc_library( ios_unit_test( name = "pooling_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -530,7 +530,7 @@ objc_library( ios_unit_test( name = "prelu_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -569,7 +569,7 @@ objc_library( ios_unit_test( name = "relu_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -608,7 +608,7 @@ objc_library( ios_unit_test( name = "resize_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -648,7 +648,7 @@ objc_library( ios_unit_test( name = "reshape_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -687,7 +687,7 @@ objc_library( ios_unit_test( name = "slice_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -727,7 +727,7 @@ objc_library( ios_unit_test( name = "softmax_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -764,7 +764,7 @@ objc_library( ios_unit_test( name = "space_to_depth_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -804,7 +804,7 @@ objc_library( ios_unit_test( name = "transpose_conv_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", @@ -885,7 +885,7 @@ objc_library( ios_unit_test( name = "winograd_test", testonly = 1, - minimum_os_version = "10.0", + minimum_os_version = "11.0", runner = tflite_ios_lab_runner("IOS_LATEST"), tags = tf_gpu_tests_tags() + [ "notap", diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 01d56fb2102..cc9e049123e 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -300,13 +300,15 @@ VariedShapeSpec/ReshapeOpTest/RegularShapes/1 VariedShapeSpec/ReshapeOpTest/WithStretchDimension/1 # resize_bilinear_test +// align_corners & half_pixel_centers are not implemented in NNAPI before API 30 +ResizeBilinearOpTest/ResizeBilinearOpTest.+HalfPixelCenters.*,30 // Only models with constant size tensor are accelerated ResizeBilinearOpTest/ResizeBilinearOpTest/.+/0,29 # resize_nearest_neighbor_test -// align_corners & half_pixel_centers are not implemented in NNAPI. --ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+AlignCorners.*,29 --ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+HalfPixelCenters.*,29 +// align_corners & half_pixel_centers are not implemented in NNAPI before API 30 +ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+AlignCorners.*,30 +ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+HalfPixelCenters.*,30 // Only models with constant size tensor are accelerated ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest/.+/0,29 diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 67e038e962e..002c29915c6 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -1648,13 +1648,14 @@ bool NNAPIDelegateKernel::Validate( } auto builtin = reinterpret_cast(node->builtin_data); - Expect(!builtin->align_corners, - NNAPIValidationFailureType::kUnsupportedOperandValue, - "NNAPI does not support align_corners == true.", &val_ctx); - // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior. - Expect(!builtin->half_pixel_centers, - NNAPIValidationFailureType::kUnsupportedOperandValue, - "NNAPI does not support half_pixel_centers == true.", &val_ctx); + if (android_sdk_version <= kMinSdkVersionForNNAPI12) { + Expect(!builtin->align_corners, + NNAPIValidationFailureType::kUnsupportedOperandValue, + "NNAPI does not support align_corners == true.", &val_ctx); + Expect(!builtin->half_pixel_centers, + NNAPIValidationFailureType::kUnsupportedOperandValue, + "NNAPI does not support half_pixel_centers == true.", &val_ctx); + } if (android_sdk_version < kMinSdkVersionForNNAPI12) { Expect(input.type == kTfLiteFloat32, NNAPIValidationFailureType::kUnsupportedInputType, @@ -1668,14 +1669,14 @@ bool NNAPIDelegateKernel::Validate( ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); auto builtin = reinterpret_cast( node->builtin_data); - // TODO(b/149823713): Update when NNAPI delegate can support align_corners - // & half_pixel_centers. - Expect(!builtin->align_corners, - NNAPIValidationFailureType::kUnsupportedOperandValue, - "NNAPI does not support align_corners == true.", &val_ctx); - Expect(!builtin->half_pixel_centers, - NNAPIValidationFailureType::kUnsupportedOperandValue, - "NNAPI does not support half_pixel_centers == true.", &val_ctx); + if (android_sdk_version <= kMinSdkVersionForNNAPI12) { + Expect(!builtin->align_corners, + NNAPIValidationFailureType::kUnsupportedOperandValue, + "NNAPI does not support align_corners == true.", &val_ctx); + Expect(!builtin->half_pixel_centers, + NNAPIValidationFailureType::kUnsupportedOperandValue, + "NNAPI does not support half_pixel_centers == true.", &val_ctx); + } } break; case kTfLiteBuiltinSqueeze: { ExpectOpVersion(version, 1, &val_ctx); @@ -2436,6 +2437,14 @@ TfLiteStatus NNAPIDelegateKernel::Map( const int output_width = output.dims->data[2]; mapping_args.builder->AddScalarInt32Operand(output_width); mapping_args.builder->AddScalarInt32Operand(output_height); + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + if (builtin->align_corners == true || + builtin->half_pixel_centers == true) { + mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format + mapping_args.builder->AddScalarBoolOperand(builtin->align_corners); + mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers); + } *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR; } break; case kTfLiteBuiltinResizeNearestNeighbor: { @@ -2445,7 +2454,13 @@ TfLiteStatus NNAPIDelegateKernel::Map( mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]); mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]); mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format - + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + if (builtin->align_corners == true || + builtin->half_pixel_centers == true) { + mapping_args.builder->AddScalarBoolOperand(builtin->align_corners); + mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers); + } *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR; } break; case kTfLiteBuiltinSqueeze: { @@ -3754,7 +3769,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, } } else if (reg->builtin_code == kTfLiteBuiltinMaximum || reg->builtin_code == kTfLiteBuiltinMinimum) { - const TfLiteTensor& operand_tensor = context->tensors[input_pos]; + const TfLiteTensor& operand_tensor = + context->tensors[node->inputs->data[input_pos]]; if (operand_tensor.dims->size == 0) { int tensor_index; @@ -3799,7 +3815,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, reg->builtin_code == kTfLiteBuiltinSum) && (input_pos == 1)) { // The axis needs, be converted to a tensor if specified as scalar - const TfLiteTensor& axis_tensor = context->tensors[1]; + const TfLiteTensor& axis_tensor = + context->tensors[node->inputs->data[input_pos]]; if (axis_tensor.dims->size == 0) { TF_LITE_ENSURE_STATUS( builder.AddVectorInt32Operand(axis_tensor.data.i32, 1)); diff --git a/tensorflow/lite/delegates/utils/BUILD b/tensorflow/lite/delegates/utils/BUILD new file mode 100644 index 00000000000..069da167455 --- /dev/null +++ b/tensorflow/lite/delegates/utils/BUILD @@ -0,0 +1,36 @@ +package( + default_visibility = [ + "//visibility:public", + ], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "simple_delegate", + srcs = [ + "simple_delegate.cc", + ], + hdrs = [ + "simple_delegate.h", + ], + deps = [ + "//tensorflow/lite:kernel_api", + "//tensorflow/lite:minimal_logging", + "//tensorflow/lite/c:common", + "//tensorflow/lite/delegates:utils", + "//tensorflow/lite/kernels/internal:compatibility", + ], +) + +cc_test( + name = "simple_delegate_test", + srcs = ["simple_delegate_test.cc"], + deps = [ + ":simple_delegate", + "//tensorflow/lite:framework", + "//tensorflow/lite:kernel_api", + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels:builtin_ops", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tensorflow/lite/delegates/utils/simple_delegate.cc b/tensorflow/lite/delegates/utils/simple_delegate.cc new file mode 100644 index 00000000000..51736e56d26 --- /dev/null +++ b/tensorflow/lite/delegates/utils/simple_delegate.cc @@ -0,0 +1,140 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/delegates/utils/simple_delegate.h" + +#include +#include + +#include "tensorflow/lite/builtin_ops.h" +#include "tensorflow/lite/context_util.h" +#include "tensorflow/lite/delegates/utils.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/minimal_logging.h" + +namespace tflite { +namespace { +TfLiteRegistration GetDelegateKernelRegistration( + SimpleDelegateInterface* delegate) { + TfLiteRegistration kernel_registration; + kernel_registration.profiling_string = nullptr; + kernel_registration.builtin_code = kTfLiteBuiltinDelegate; + kernel_registration.custom_name = delegate->name(); + kernel_registration.free = [](TfLiteContext* context, void* buffer) -> void { + delete reinterpret_cast(buffer); + }; + kernel_registration.init = [](TfLiteContext* context, const char* buffer, + size_t length) -> void* { + const TfLiteDelegateParams* params = + reinterpret_cast(buffer); + if (params == nullptr) { + TF_LITE_KERNEL_LOG(context, "NULL TfLiteDelegateParams passed."); + return nullptr; + } + auto* delegate = + reinterpret_cast(params->delegate->data_); + std::unique_ptr delegate_kernel( + delegate->CreateDelegateKernelInterface()); + if (delegate_kernel->Init(context, params) != kTfLiteOk) { + return nullptr; + } + return delegate_kernel.release(); + }; + kernel_registration.prepare = [](TfLiteContext* context, + TfLiteNode* node) -> TfLiteStatus { + if (node->user_data == nullptr) { + TF_LITE_KERNEL_LOG(context, "Delegate kernel was not initialized"); + return kTfLiteError; + } + SimpleDelegateKernelInterface* delegate_kernel = + reinterpret_cast(node->user_data); + return delegate_kernel->Prepare(context, node); + }; + kernel_registration.invoke = [](TfLiteContext* context, + TfLiteNode* node) -> TfLiteStatus { + SimpleDelegateKernelInterface* delegate_kernel = + reinterpret_cast(node->user_data); + TFLITE_DCHECK(delegate_kernel != nullptr); + return delegate_kernel->Invoke(context, node); + }; + + return kernel_registration; +} + +TfLiteStatus DelegatePrepare(TfLiteContext* context, + TfLiteDelegate* base_delegate) { + auto* delegate = + reinterpret_cast(base_delegate->data_); + delegates::IsNodeSupportedFn node_supported_fn = + [=](TfLiteContext* context, TfLiteNode* node, + TfLiteRegistration* registration, + std::string* unsupported_details) -> bool { + return delegate->IsNodeSupportedByDelegate(registration, node, context); + }; + // TODO(b/149484598): Update to have method that gets all supported nodes. + delegates::GraphPartitionHelper helper(context, node_supported_fn); + TF_LITE_ENSURE_STATUS(helper.Partition(nullptr)); + + const auto delegate_partitions = helper.GetFirstNLargestPartitions(); + + // To avoid creating a new TfLiteIntArray and free it later, we reserve one + // element to represent TfLiteIntArray.size which is the 1st element of + // TfLiteIntArray C struct. + std::vector supported_nodes(1); + for (const auto partition : delegate_partitions) { + auto* nodes = partition->nodes_to_replace; + supported_nodes.insert(supported_nodes.end(), nodes->data, + nodes->data + nodes->size); + } + // Set first element to the number of nodes to replace. + supported_nodes[0] = supported_nodes.size() - 1; + + TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, + "%s delegate: %d nodes delegated out of %d nodes with " + "%d partitions.\n", + delegate->name(), supported_nodes[0], + helper.num_total_nodes(), delegate_partitions.size()); + TfLiteRegistration delegate_kernel_registration = + GetDelegateKernelRegistration(delegate); + + return context->ReplaceNodeSubsetsWithDelegateKernels( + context, delegate_kernel_registration, + reinterpret_cast(supported_nodes.data()), base_delegate); +} +} // namespace + +TfLiteDelegate* TfLiteDelegateFactory::CreateSimpleDelegate( + std::unique_ptr simple_delegate) { + if (simple_delegate == nullptr) { + return nullptr; + } + auto delegate = new TfLiteDelegate(); + delegate->Prepare = &DelegatePrepare; + delegate->flags = kTfLiteDelegateFlagsNone; + delegate->CopyFromBufferHandle = nullptr; + delegate->CopyToBufferHandle = nullptr; + delegate->FreeBufferHandle = nullptr; + delegate->data_ = simple_delegate.release(); + return delegate; +} + +void TfLiteDelegateFactory::DeleteSimpleDelegate(TfLiteDelegate* delegate) { + if (!delegate) return; + SimpleDelegateInterface* simple_delegate = + reinterpret_cast(delegate->data_); + delete simple_delegate; + delete delegate; +} + +} // namespace tflite diff --git a/tensorflow/lite/delegates/utils/simple_delegate.h b/tensorflow/lite/delegates/utils/simple_delegate.h new file mode 100644 index 00000000000..bf35fbc47aa --- /dev/null +++ b/tensorflow/lite/delegates/utils/simple_delegate.h @@ -0,0 +1,109 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file has utilities that facilitates creating new delegates. +// - SimpleDelegateKernelInterface: Represents a Kernel which handles a subgraph +// to be delegated. It has Init/Prepare/Invoke which are going to be called +// during inference, similar to TFLite Kernels. Delegate owner should implement +// this interface to build/prepare/invoke the delegated subgraph. +// - SimpleDelegateInterface: +// This class wraps TFLiteDelegate and users need to implement the interface and +// then Call GetFinalizedDelegate() to get TfLiteDelegate* that can be passed to +// ModifyGraphWithDelegate. +#ifndef TENSORFLOW_LITE_DELEGATES_UTILS_SIMPLE_DELEGATE_H_ +#define TENSORFLOW_LITE_DELEGATES_UTILS_SIMPLE_DELEGATE_H_ + +#include + +#include "tensorflow/lite/c/common.h" + +namespace tflite { + +// Users should inherit from this class and implement the interface below. +// Each instance represents a single part of the graph (subgraph). +class SimpleDelegateKernelInterface { + public: + virtual ~SimpleDelegateKernelInterface() {} + + // Initializes a delegated subgraph. + // The nodes in the subgraph are inside TfLiteDelegateParams->nodes_to_replace + virtual TfLiteStatus Init(TfLiteContext* context, + const TfLiteDelegateParams* params) = 0; + + // Will be called by the framework. Should handle any needed preparation + // for the subgraph e.g. allocating buffers, compiling model. + // Returns status, and signalling any errors. + virtual TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) = 0; + + // Actual subgraph inference should happen on this call. + // Returns status, and signalling any errors. + virtual TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) = 0; +}; + +// Pure Interface that clients should implement. +// The Interface represents a delegate capabilities and provide factory +// for SimpleDelegateKernelInterface +// +// Clients should implement the following methods: +// - IsNodeSupportedByDelegate +// - name +// - CreateDelegateKernelInterface +class SimpleDelegateInterface { + public: + SimpleDelegateInterface() {} + + virtual ~SimpleDelegateInterface() {} + + // Returns true if 'node' is supported by the delegate. False otherwise. + virtual bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, + const TfLiteNode* node, + TfLiteContext* context) const = 0; + + // Returns a name that identifies the delegate. + // This name is used for debugging/logging/profiling. + virtual const char* name() const = 0; + + // Returns instance of an object that implements the interface + // SimpleDelegateKernelInterface. + // An instance of SimpleDelegateKernelInterface represents one subgraph to + // be delegated. + // Caller takes ownership of the returned object. + virtual std::unique_ptr + CreateDelegateKernelInterface() = 0; +}; + +// Factory class that provides two static methods +// CreateSimpleDelegate +// DeleteSimpleDelegate +// Which should be used to construct TfLiteDelegate from +// Simple Delegate and delete TfLiteDelegate and SimpleDelegate give +// tfLiteDelegate* created from 'CreateSimpleDelegate' method. +// Users should use these methods to Create and Destroy the delegate. +class TfLiteDelegateFactory { + public: + // Creates TfLiteDelegate from the provided SimpleDelegateInterface. + // The returned TfLiteDelegate should be deleted using DeleteSimpleDelegate. + static TfLiteDelegate* CreateSimpleDelegate( + std::unique_ptr simple_delegate); + + // Deletes 'delegate' the passed pointer must be the one returned + // from GetFinalizedDelegate. + // This function will destruct the SimpleDelegate object too. + static void DeleteSimpleDelegate(TfLiteDelegate* delegate); +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_UTILS_SIMPLE_DELEGATE_H_ diff --git a/tensorflow/lite/delegates/utils/simple_delegate_test.cc b/tensorflow/lite/delegates/utils/simple_delegate_test.cc new file mode 100644 index 00000000000..fa6d528a537 --- /dev/null +++ b/tensorflow/lite/delegates/utils/simple_delegate_test.cc @@ -0,0 +1,194 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/delegates/utils/simple_delegate.h" + +#include + +#include +#include +#include "tensorflow/lite/builtin_ops.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/builtin_op_kernels.h" + +namespace tflite { +namespace { +// Delegate options. +struct TestSimpleDelegateOptions { + // Allowed ops to delegate. + int allowed_builtin_code; + // Report error during init. + bool error_during_init = false; + // Report error during prepare. + bool error_during_prepare = false; + // Report error during invoke. + bool error_during_invoke = false; +}; + +// Dummy delegate kernel. +class TestSimpleDelegateKernel : public SimpleDelegateKernelInterface { + public: + explicit TestSimpleDelegateKernel(TestSimpleDelegateOptions options) + : options_(options) {} + + TfLiteStatus Init(TfLiteContext* context, + const TfLiteDelegateParams* params) override { + return !options_.error_during_init ? kTfLiteOk : kTfLiteError; + } + + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) override { + return !options_.error_during_prepare ? kTfLiteOk : kTfLiteError; + } + + TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) override { + return !options_.error_during_invoke ? kTfLiteOk : kTfLiteError; + } + + private: + TestSimpleDelegateOptions options_; +}; + +// Simple delegate which implements the interface of SimpleDelegateInterface. +// This holds the Delegate capabilities. +class TestSimpleDelegate : public SimpleDelegateInterface { + public: + explicit TestSimpleDelegate(TestSimpleDelegateOptions options) + : options_(options) {} + bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, + const TfLiteNode* node, + TfLiteContext* context) const override { + return options_.allowed_builtin_code == registration->builtin_code; + } + + const char* name() const override { return "TestSimpleDelegate"; } + + std::unique_ptr CreateDelegateKernelInterface() + override { + return std::make_unique(options_); + } + + private: + TestSimpleDelegateOptions options_; +}; + +class TestDelegate : public ::testing::Test { + protected: + void SetUp() override { + interpreter_.reset(new Interpreter); + interpreter_->AddTensors(5); + interpreter_->SetInputs({0, 1}); + interpreter_->SetOutputs({3, 4}); + TfLiteQuantizationParams quant; + interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3}, + quant); + TfLiteRegistration* reg = ops::builtin::Register_ADD(); + void* builtin_data_1 = malloc(sizeof(int)); + void* builtin_data_2 = malloc(sizeof(int)); + void* builtin_data_3 = malloc(sizeof(int)); + interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, builtin_data_1, + reg); + interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, builtin_data_2, + reg); + interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, builtin_data_3, + reg); + } + + void TearDown() override { + interpreter_.reset(); + TfLiteDelegateFactory::DeleteSimpleDelegate(delegate_); + } + + protected: + std::unique_ptr interpreter_; + TfLiteDelegate* delegate_ = nullptr; +}; + +TEST_F(TestDelegate, BasicDelegate) { + TestSimpleDelegateOptions options; + options.allowed_builtin_code = kTfLiteBuiltinAdd; + delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate( + std::make_unique(options)); + interpreter_->ModifyGraphWithDelegate(delegate_); + + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + int node = interpreter_->execution_plan()[0]; + const auto* node_and_reg = interpreter_->node_and_registration(node); + EXPECT_EQ("TestSimpleDelegate", node_and_reg->second.custom_name); + + const TfLiteDelegateParams* params = static_cast( + node_and_reg->first.builtin_data); + ASSERT_EQ(params->nodes_to_replace->size, 3); + EXPECT_EQ(params->nodes_to_replace->data[0], 0); + EXPECT_EQ(params->nodes_to_replace->data[1], 1); + EXPECT_EQ(params->nodes_to_replace->data[2], 2); + + ASSERT_EQ(params->input_tensors->size, 2); + EXPECT_EQ(params->input_tensors->data[0], 0); + EXPECT_EQ(params->input_tensors->data[1], 1); + + ASSERT_EQ(params->output_tensors->size, 2); + EXPECT_EQ(params->output_tensors->data[0], 3); + EXPECT_EQ(params->output_tensors->data[1], 4); +} + +TEST_F(TestDelegate, NoNodesToDelegate) { + TestSimpleDelegateOptions options; + options.allowed_builtin_code = kTfLiteBuiltinSub; + delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate( + std::make_unique(options)); + interpreter_->ModifyGraphWithDelegate(delegate_); + + ASSERT_EQ(interpreter_->execution_plan().size(), 3); +} + +TEST_F(TestDelegate, DelegateFailedPrepare) { + TestSimpleDelegateOptions options; + options.allowed_builtin_code = kTfLiteBuiltinAdd; + options.error_during_prepare = true; + delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate( + std::make_unique(options)); + ASSERT_EQ(kTfLiteDelegateError, + interpreter_->ModifyGraphWithDelegate(delegate_)); +} + +TEST_F(TestDelegate, DelegateFailedInvoke) { + TestSimpleDelegateOptions options; + options.allowed_builtin_code = kTfLiteBuiltinAdd; + options.error_during_invoke = true; + delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate( + std::make_unique(options)); + ASSERT_EQ(kTfLiteOk, interpreter_->ModifyGraphWithDelegate(delegate_)); + ASSERT_EQ(kTfLiteError, interpreter_->Invoke()); +} + +TEST_F(TestDelegate, DelegateFailedInit) { + TestSimpleDelegateOptions options; + options.allowed_builtin_code = kTfLiteBuiltinAdd; + options.error_during_init = true; + delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate( + std::make_unique(options)); + ASSERT_EQ(kTfLiteDelegateError, + interpreter_->ModifyGraphWithDelegate(delegate_)); +} +} // namespace +} // namespace tflite diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc index 09c386b55f0..2581b58f1e4 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc @@ -87,6 +87,16 @@ OpBuilder* GraphBuilder::AddBuilder( CoreML::Specification::Model* GraphBuilder::BuildModel() { CoreML::Specification::Model* model = new CoreML::Specification::Model(); + if (coreml_version_ == 2) { // Core ML 2, iOS >= 12.0 + model->set_specificationversion(3); + } else if (coreml_version_ == 3) { // Core ML 3, iOS >= 13.0 + model->set_specificationversion(4); + model->mutable_neuralnetwork()->set_arrayinputshapemapping( + CoreML::Specification::EXACT_ARRAY_MAPPING); + } else { + fprintf(stderr, "Unsupported Core ML version: %d\n", coreml_version_); + return nullptr; + } auto* neural_network = model->mutable_neuralnetwork(); for (auto& builder : builders_) { CoreML::Specification::NeuralNetworkLayer* layer = builder->Build(); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h index 5367ae20d2f..c59c30a5a28 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h @@ -52,6 +52,8 @@ class TensorID { // API is experimental and subject to change. class GraphBuilder { public: + explicit GraphBuilder(int coreml_version) : coreml_version_(coreml_version) {} + // Returns pointer to the created builder. Ownership still belongs // to the GraphBuilder. OpBuilder* AddBuilder(int builtin_code, const TfLiteNode* node); @@ -79,6 +81,8 @@ class GraphBuilder { // This information is used to mark constant tensors that are used as input. bool IsTensorUsed(int tflite_tensor_index); + const int coreml_version_; + private: std::vector> builders_; // Index in the vector is the tflite_tensor_index, the value diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h index b0fe24ee288..501a304706c 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h @@ -32,7 +32,8 @@ bool IsFullyConnectedOpSupported(const TfLiteRegistration* registration, const TfLiteNode* node, TfLiteContext* context); bool IsReshapeOpSupported(const TfLiteRegistration* registration, - const TfLiteNode* node, TfLiteContext* context); + const TfLiteNode* node, TfLiteContext* context, + int coreml_version); bool IsResizeBilinearOpSupported(const TfLiteRegistration* registration, const TfLiteNode* node, TfLiteContext* context); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc index 33040e2e070..b7b78653d36 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc @@ -114,7 +114,11 @@ TfLiteStatus ReshapeOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs, } bool IsReshapeOpSupported(const TfLiteRegistration* registration, - const TfLiteNode* node, TfLiteContext* context) { + const TfLiteNode* node, TfLiteContext* context, + int coreml_version) { + if (coreml_version >= 3) { + return false; + } if (node->inputs->size == 1) { const auto* params = reinterpret_cast(node->builtin_data); diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h index 0d75afc8e34..8ad81040499 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h @@ -31,6 +31,13 @@ typedef enum { typedef struct { // Only create delegate when Neural Engine is available on the device. TfLiteCoreMlDelegateEnabledDevices enabled_devices; + // Specifies target Core ML version for model conversion. + // Core ML 3 come with a lot more ops, but some ops (e.g. reshape) is not + // delegated due to input rank constraint. + // if not set to one of the valid versions, the delegate will use highest + // version possible in the platform. + // Valid versions: (2, 3) + int coreml_version; // This sets the maximum number of Core ML delegates created. // Each graph corresponds to one delegated node subset in the // TFLite model. Set this to 0 to delegate all possible partitions. diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm index 5d0564ebc48..58728659894 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm @@ -36,7 +36,7 @@ constexpr int kMinNodesPerCoreMlDelegate = 2; using delegates::coreml::CoreMlDelegateKernel; bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfLiteNode* node, - TfLiteContext* context) { + TfLiteContext* context, const TfLiteCoreMlDelegateOptions* options) { if (@available(iOS 11.0, *)) { } else { return false; @@ -120,7 +120,8 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL return true; } case kTfLiteBuiltinReshape: { - return delegates::coreml::IsReshapeOpSupported(registration, node, context); + return delegates::coreml::IsReshapeOpSupported(registration, node, context, + options->coreml_version); } case kTfLiteBuiltinResizeBilinear: { return delegates::coreml::IsResizeBilinearOpSupported(registration, node, context); @@ -142,6 +143,39 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL return false; } +class CoreMlDelegate : public TfLiteDelegate { + public: + explicit CoreMlDelegate(const TfLiteCoreMlDelegateOptions* params) + : params_(params != nullptr ? *params : TfLiteCoreMlDelegateOptions()) { + { + if (@available(iOS 13.0, *)) { + if (params_.coreml_version != 2 && params_.coreml_version != 3) { + NSLog(@"coreml_version must be 2 or 3. Setting to 3."); + params_.coreml_version = 3; + } + } else if (@available(iOS 12.0, *)) { + if (params_.coreml_version != 2) { + NSLog(@"coreml_version must be 2 - using Core ML version 2."); + params_.coreml_version = 2; + } + } + if (params_.max_delegated_partitions <= 0) { + params_.max_delegated_partitions = std::numeric_limits::max(); + } + if (params_.min_nodes_per_partition <= 0) { + params_.min_nodes_per_partition = kMinNodesPerCoreMlDelegate; + } + } + } + + TfLiteCoreMlDelegateOptions* params() { return ¶ms_; } + + bool VerifyDelegate() { return true; } + + private: + TfLiteCoreMlDelegateOptions params_; +}; + TfLiteRegistration GetCoreMlKernelRegistration() { // This is the registration for the Delegate Node that gets added to // the TFLite graph instead of the subGraph it replaces it. @@ -158,8 +192,10 @@ TfLiteRegistration GetCoreMlKernelRegistration() { }; kernel_registration.init = [](TfLiteContext* context, const char* buffer, size_t length) -> void* { - const TfLiteDelegateParams* params = reinterpret_cast(buffer); - CoreMlDelegateKernel* coreml_kernel = new CoreMlDelegateKernel(); + const auto* params = reinterpret_cast(buffer); + const auto* coreml_options = + (reinterpret_cast(params->delegate))->params(); + CoreMlDelegateKernel* coreml_kernel = new CoreMlDelegateKernel(coreml_options->coreml_version); if (coreml_kernel->Init(context, params) != kTfLiteOk) { delete coreml_kernel; return nullptr; @@ -187,14 +223,12 @@ TfLiteRegistration GetCoreMlKernelRegistration() { } TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { - const auto* params = - reinterpret_cast(delegate->data_); + const auto* params = reinterpret_cast(delegate->data_); - delegates::IsNodeSupportedFn node_supported_fn = - [=](TfLiteContext* context, TfLiteNode* node, - TfLiteRegistration* registration, - std::string* unsupported_details) -> bool { - return IsNodeSupportedByDelegate(registration, node, context); + delegates::IsNodeSupportedFn node_supported_fn = [=](TfLiteContext* context, TfLiteNode* node, + TfLiteRegistration* registration, + std::string* unsupported_details) -> bool { + return IsNodeSupportedByDelegate(registration, node, context, params); }; delegates::GraphPartitionHelper helper(context, node_supported_fn); @@ -214,7 +248,8 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { // Set first element to the number of nodes to replace. supported_nodes[0] = supported_nodes.size() - 1; - TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, "CoreML delegate: %d nodes delegated out of %d nodes, " + TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, + "CoreML delegate: %d nodes delegated out of %d nodes, " "with %d partitions.\n", supported_nodes[0], helper.num_total_nodes(), delegate_partitions.size()); @@ -223,28 +258,6 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { reinterpret_cast(supported_nodes.data()), delegate); } -class CoreMlDelegate : public TfLiteDelegate { - public: - explicit CoreMlDelegate(const TfLiteCoreMlDelegateOptions* params) - : params_(params != nullptr ? *params : TfLiteCoreMlDelegateOptions()) { - { - if (params_.max_delegated_partitions <= 0) { - params_.max_delegated_partitions = std::numeric_limits::max(); - } - if (params_.min_nodes_per_partition <= 0) { - params_.min_nodes_per_partition = kMinNodesPerCoreMlDelegate; - } - } - } - - TfLiteCoreMlDelegateOptions* params() { return ¶ms_; } - - bool VerifyDelegate() { return true; } - - private: - TfLiteCoreMlDelegateOptions params_; -}; - TfLiteDelegate* CreateCoreMlDelegate(const TfLiteCoreMlDelegateOptions* options) { TfLiteDelegate* delegate = new CoreMlDelegate(options); if (!static_cast(delegate)->VerifyDelegate()) { @@ -288,7 +301,7 @@ bool IsNeuralEngineAvailable() { } // namespace TfLiteDelegate* TfLiteCoreMlDelegateCreate(const TfLiteCoreMlDelegateOptions* options) { - if (@available(iOS 11.0, *)) { + if (@available(iOS 12.0, *)) { if (options->enabled_devices == TfLiteCoreMlDelegateDevicesWithNeuralEngine && !IsNeuralEngineAvailable()) { NSLog(@"This device does not have Neural Engine, so Core ML delegate will not be enabled. " @@ -299,7 +312,7 @@ TfLiteDelegate* TfLiteCoreMlDelegateCreate(const TfLiteCoreMlDelegateOptions* op return tflite::CreateCoreMlDelegate(options); } else { NSLog(@"Core ML delegate is not supported in this iOS version. " - "Minimum required iOS version is 11.0."); + "Minimum required iOS version is 12.0."); return nullptr; } } diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h index 04053ea81c1..8c983fb11aa 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h @@ -29,6 +29,8 @@ namespace coreml { // implements Init/Prepare/Invoke as TFLite kernel nodes. class CoreMlDelegateKernel { public: + explicit CoreMlDelegateKernel(int coreml_version) + : coreml_version_(coreml_version) {} // Initialize the delegated graph and add required nodes. TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params); @@ -56,6 +58,7 @@ class CoreMlDelegateKernel { std::unique_ptr builder_; std::unique_ptr model_; ::CoreMlExecutor* executor_; + int coreml_version_; std::vector input_tensor_ids_; std::vector inputs_; diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm index a36837bcc44..6a668bc971b 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm @@ -60,7 +60,7 @@ void TransposeToHWC(const float* chw, float* hwc, const TfLiteIntArray* hwc_dims TfLiteStatus CoreMlDelegateKernel::Init(TfLiteContext* context, const TfLiteDelegateParams* delegate_params) { - if (@available(iOS 11.0, *)) { + if (@available(iOS 12.0, *)) { executor_ = [[::CoreMlExecutor alloc] init]; TF_LITE_ENSURE_STATUS(BuildModel(context, delegate_params)); // Serialize the model protocol buffer and compile it. @@ -76,7 +76,7 @@ TfLiteStatus CoreMlDelegateKernel::Init(TfLiteContext* context, } return kTfLiteOk; } else { - TF_LITE_KERNEL_LOG(context, "Minimum required iOS version is 11.0."); + TF_LITE_KERNEL_LOG(context, "Minimum required iOS version is 12.0."); return kTfLiteError; } } @@ -104,6 +104,9 @@ void CoreMlDelegateKernel::AddOutputTensors(const TfLiteIntArray* output_tensors int batch_size, height_size, width_size, depth_size; GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); multi_array->set_datatype(CoreML::Specification::ArrayFeatureType::FLOAT32); + if (coreml_version_ >= 3) { + multi_array->mutable_shape()->Add(batch_size); + } multi_array->mutable_shape()->Add(depth_size); multi_array->mutable_shape()->Add(height_size); multi_array->mutable_shape()->Add(width_size); @@ -114,7 +117,7 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context, const TfLiteDelegateParams* delegate_params) { TfLiteNode* node; TfLiteRegistration* reg; - builder_.reset(new delegates::coreml::GraphBuilder()); + builder_.reset(new delegates::coreml::GraphBuilder(coreml_version_)); // Add Inputs AddInputTensors(delegate_params->input_tensors, context); // Build all ops. @@ -144,8 +147,6 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context, return kTfLiteError; } AddOutputTensors(delegate_params->output_tensors, context); - // TODO(karimnosseir): Set correct version ? - model_->set_specificationversion(1); auto* model_description = model_->mutable_description(); for (int i = 0; i < delegate_params->input_tensors->size; ++i) { const int tensor_id = delegate_params->input_tensors->data[i]; @@ -158,6 +159,9 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context, int batch_size, height_size, width_size, depth_size; GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); multi_array->set_datatype(CoreML::Specification::ArrayFeatureType::FLOAT32); + if (coreml_version_ >= 3) { + multi_array->mutable_shape()->Add(batch_size); + } multi_array->mutable_shape()->Add(depth_size); multi_array->mutable_shape()->Add(height_size); multi_array->mutable_shape()->Add(width_size); @@ -181,9 +185,12 @@ TfLiteStatus CoreMlDelegateKernel::Prepare(TfLiteContext* context, TfLiteNode* n int batch_size, height_size, width_size, depth_size; GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor->dims); - inputs_.push_back({std::vector(input_size), - builder_->GetTensorName(tensor_index), - {depth_size, height_size, width_size}}); + std::vector input_shape = {depth_size, height_size, width_size}; + if (coreml_version_ >= 3) { + input_shape.insert(input_shape.begin(), batch_size); + } + inputs_.push_back( + {std::vector(input_size), builder_->GetTensorName(tensor_index), input_shape}); } outputs_.reserve(node->outputs->size); @@ -222,9 +229,7 @@ TfLiteStatus CoreMlDelegateKernel::Invoke(TfLiteContext* context, TfLiteNode* no } } -CoreMlDelegateKernel::~CoreMlDelegateKernel() { - [executor_ cleanup]; -} +CoreMlDelegateKernel::~CoreMlDelegateKernel() { [executor_ cleanup]; } } // namespace coreml } // namespace delegates diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h index edec3020cbc..5ce0a0ade6c 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h @@ -45,4 +45,5 @@ struct TensorData { @property MLModel* model API_AVAILABLE(ios(11)); @property NSString* mlModelFilePath; @property NSString* compiledModelFilePath; +@property(nonatomic, readonly) int coreMlVersion; @end diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm index 2091c0d7ca0..1f808e08d49 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm @@ -39,17 +39,22 @@ NSURL* createTemporaryFile() { NSSet* _featureNames; } -- (instancetype)initWithInputs:(const std::vector*)inputs; +- (instancetype)initWithInputs:(const std::vector*)inputs + coreMlVersion:(int)coreMlVersion; - (MLFeatureValue*)featureValueForName:(NSString*)featureName API_AVAILABLE(ios(11)); - (NSSet*)featureNames; +@property(nonatomic, readonly) int coreMlVersion; + @end @implementation MultiArrayFeatureProvider -- (instancetype)initWithInputs:(const std::vector*)inputs { +- (instancetype)initWithInputs:(const std::vector*)inputs + coreMlVersion:(int)coreMlVersion { self = [super init]; _inputs = inputs; + _coreMlVersion = coreMlVersion; for (auto& input : *_inputs) { if (input.name.empty()) { return nil; @@ -74,8 +79,31 @@ NSURL* createTemporaryFile() { for (auto& input : *_inputs) { if ([featureName cStringUsingEncoding:NSUTF8StringEncoding] == input.name) { // TODO(b/141492326): Update shape handling for higher ranks - NSArray* shape = @[ @(input.shape[0]), @(input.shape[1]), @(input.shape[2]) ]; - NSArray* strides = @[ @(input.shape[1] * input.shape[2]), @(input.shape[2]), @1 ]; + NSArray* shape = @[ + @(input.shape[0]), + @(input.shape[1]), + @(input.shape[2]), + ]; + NSArray* strides = @[ + @(input.shape[1] * input.shape[2]), + @(input.shape[2]), + @1, + ]; + + if ([self coreMlVersion] >= 3) { + shape = @[ + @(input.shape[0]), + @(input.shape[1]), + @(input.shape[2]), + @(input.shape[3]), + ]; + strides = @[ + @(input.shape[1] * input.shape[2] * input.shape[3]), + @(input.shape[2] * input.shape[3]), + @(input.shape[3]), + @1, + ]; + }; NSError* error = nil; MLMultiArray* mlArray = [[MLMultiArray alloc] initWithDataPointer:(float*)input.data.data() shape:shape @@ -106,7 +134,7 @@ NSURL* createTemporaryFile() { } NSError* error = nil; MultiArrayFeatureProvider* inputFeature = - [[MultiArrayFeatureProvider alloc] initWithInputs:&inputs]; + [[MultiArrayFeatureProvider alloc] initWithInputs:&inputs coreMlVersion:[self coreMlVersion]]; if (inputFeature == nil) { NSLog(@"inputFeature is not initialized."); return NO; @@ -153,6 +181,14 @@ NSURL* createTemporaryFile() { - (NSURL*)saveModel:(CoreML::Specification::Model*)model { NSURL* modelUrl = createTemporaryFile(); NSString* modelPath = [modelUrl path]; + if (model->specificationversion() == 3) { + _coreMlVersion = 2; + } else if (model->specificationversion() == 4) { + _coreMlVersion = 3; + } else { + NSLog(@"Only Core ML models with specification version 3 or 4 are supported"); + return nil; + } // Flush data to file. // TODO(karimnosseir): Can we mmap this instead of actual writing it to phone ? std::ofstream file_stream([modelPath UTF8String], std::ios::out | std::ios::binary); diff --git a/tensorflow/lite/experimental/delegates/hexagon/README.md b/tensorflow/lite/experimental/delegates/hexagon/README.md index 5cf71fdb5bf..a97342c9fdc 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/README.md +++ b/tensorflow/lite/experimental/delegates/hexagon/README.md @@ -80,6 +80,8 @@ are verified in `IsNodeSupportedByHexagon`: * L2Normalization (without any activation) * Logistic (aka Sigmoid) * MaxPool2D (without any activation) (b/129276536) +* Mean +* MirrorPad * Mul (without any activation) (b/129276536) * Neg * Pad: Only supports 0 padding (b/139277813) diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD b/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD index ae8ffe293e9..ff764984de9 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD @@ -19,6 +19,7 @@ cc_library( "hardswish_builder.cc", "l2_normalization_builder.cc", "matmul_builder.cc", + "mirror_pad_builder.cc", "neg_op_builder.cc", "op_builder.cc", "pad_builder.cc", @@ -45,6 +46,7 @@ cc_library( "hardswish_builder.h", "l2_normalization_builder.h", "matmul_builder.h", + "mirror_pad_builder.h", "neg_op_builder.h", "op_builder.h", "pad_builder.h", diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.cc new file mode 100644 index 00000000000..2a04088f4f3 --- /dev/null +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.cc @@ -0,0 +1,112 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h" + +#include + +#include + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h" +#include "tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace delegates { +namespace hexagon { +TfLiteStatus MirrorPadOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, + const TfLiteIntArray* outputs, + TfLiteContext* context) { + static int quant_bound_shape[] = {1, 1, 1, 1}; + int tensor_id; + + // Input data tensor. + tensor_id = inputs->data[0]; + const auto& input_tensor = context->tensors[tensor_id]; + AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); + + // Padding tensor. + // Should be a constant. + tensor_id = inputs->data[1]; + const auto& padding_tensor = context->tensors[tensor_id]; + if (padding_tensor.dims->size != 2 || padding_tensor.dims->data[0] > 4 || + padding_tensor.dims->data[1] != 2) { + TF_LITE_KERNEL_LOG(context, "Invalid padding tensor shape"); + return kTfLiteError; + } + paddings_shape_ = {1, 1, 4, 2}; + std::vector padding_data(8, 0); + // Hexagon always expects padding data for each dimension in order {b, h, w, + // d}. This start value ensures we pad the non-relevant dimensions with 0. + int padding_data_start = 8 - padding_tensor.dims->data[0] * 2; + for (int i = 0; i < padding_tensor.dims->data[0] * 2; ++i) { + padding_data[padding_data_start + i] = padding_tensor.data.i32[i]; + } + auto* const_padding_node = graph_builder_->AddConstNodeWithData( + paddings_shape_.data(), reinterpret_cast(padding_data.data()), + padding_data.size() * sizeof(padding_data[0])); + AddInput(TensorID(const_padding_node->GetID(), 0)); + // Padding type. + const TfLiteMirrorPaddingParams* params = + reinterpret_cast(builtin_data_); + if (params->mode == kTfLiteMirrorPaddingReflect) { + SetPaddingType(NN_PAD_MIRROR_REFLECT); + } else if (params->mode == kTfLiteMirrorPaddingSymmetric) { + SetPaddingType(NN_PAD_MIRROR_SYMMETRIC); + } + + // Min/max values for input tensor. + TF_LITE_ENSURE_STATUS( + ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); + auto* input_min_const = graph_builder_->AddConstNodeWithData( + quant_bound_shape, reinterpret_cast(&input_min_), + sizeof(input_min_)); + auto* input_max_const = graph_builder_->AddConstNodeWithData( + quant_bound_shape, reinterpret_cast(&input_max_), + sizeof(input_max_)); + AddInput(TensorID(input_min_const->GetID(), 0)); + AddInput(TensorID(input_max_const->GetID(), 0)); + + // Hexagon outputs for this node. + int output_batch_size, output_height_size, output_width_size, + output_depth_size; + GetDims(&output_batch_size, &output_height_size, &output_width_size, + &output_depth_size, context->tensors[outputs->data[0]].dims); + node_output_ = AddOutput(sizeof(uint8_t), 4, + {output_batch_size, output_height_size, + output_width_size, output_depth_size}); + AddOutput(sizeof(float), 4, {1, 1, 1, 1}); + AddOutput(sizeof(float), 4, {1, 1, 1, 1}); + + return kTfLiteOk; +} + +TfLiteStatus MirrorPadOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs, + TfLiteContext* context) { + // Should be only 1 output. + graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first, + node_output_.second); + return kTfLiteOk; +} + +MirrorPadOpBuilder::~MirrorPadOpBuilder() {} + +OpBuilder* CreateMirrorPadBuilder(GraphBuilder* graph_builder, int op_type) { + return new MirrorPadOpBuilder(graph_builder, op_type); +} + +} // namespace hexagon +} // namespace delegates +} // namespace tflite diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h new file mode 100644 index 00000000000..6fcb2606701 --- /dev/null +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h @@ -0,0 +1,49 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIRROR_PAD_BUILDER_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIRROR_PAD_BUILDER_H_ + +#include + +#include "tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h" + +namespace tflite { +namespace delegates { +namespace hexagon { + +class MirrorPadOpBuilder : public OpBuilder { + public: + explicit MirrorPadOpBuilder(GraphBuilder* graph_builder, int op_type) + : OpBuilder(graph_builder, op_type) {} + TfLiteStatus PopulateSubGraph(const TfLiteIntArray* inputs, + const TfLiteIntArray* outputs, + TfLiteContext* context) override; + + TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs, + TfLiteContext* context) override; + + ~MirrorPadOpBuilder() override; + + private: + TensorID node_output_; + float input_min_, input_max_; + std::vector paddings_shape_; +}; + +} // namespace hexagon +} // namespace delegates +} // namespace tflite + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIRROR_PAD_BUILDER_H_ diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc index e20127ac6c1..c7432e64c79 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc @@ -43,6 +43,8 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type) { return CreateReduceBuilder(this, OP_QuantizedSum_8to32); case kTfLiteBuiltinPad: return CreatePadBuilder(this, OP_QuantizedPad_8); + case kTfLiteBuiltinMirrorPad: + return CreateMirrorPadBuilder(this, OP_MirrorPad_8); case kTfLiteBuiltinFullyConnected: return CreateMatMulBuilder(this, OP_QuantizedMatMul_8x8to32); case kTfLiteBuiltinAveragePool2d: diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h b/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h index e7236fb0e00..0beb88cc68e 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h @@ -35,6 +35,7 @@ OpBuilder* CreatePool2DBuilder(GraphBuilder* graph_builder, int op_type); OpBuilder* CreateReshapeBuilder(GraphBuilder* graph_builder, int op_type); OpBuilder* CreateSoftmaxBuilder(GraphBuilder* graph_builder, int op_type); OpBuilder* CreateReduceBuilder(GraphBuilder* graph_builder, int op_type); +OpBuilder* CreateMirrorPadBuilder(GraphBuilder* graph_builder, int op_type); OpBuilder* CreatePadBuilder(GraphBuilder* graph_builder, int op_type); OpBuilder* CreateResizeNearestNeighborBuilder(GraphBuilder* graph_builder, int op_type); diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc index 8401f76cf4d..066c82560a8 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/util.h" namespace tflite { namespace delegates { @@ -35,9 +36,7 @@ TfLiteStatus ReduceOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_, - std::numeric_limits::min(), - std::numeric_limits::max()); + ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_); auto* input_min_const = graph_builder_->AddConstNodeWithData( quant_bound_shape, reinterpret_cast(&input_min_), sizeof(input_min_)); @@ -63,37 +62,48 @@ TfLiteStatus ReduceOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, return kTfLiteError; } + auto& output_tensor = context->tensors[outputs->data[0]]; int output_batch_size, output_height_size, output_width_size, output_depth_size; GetDims(&output_batch_size, &output_height_size, &output_width_size, - &output_depth_size, context->tensors[outputs->data[0]].dims); + &output_depth_size, output_tensor.dims); - // Hexagon's sum-reduction outputs int32, so we shrink it down to UInt8. - if (op_node_.op_type == OP_QuantizedSum_8to32) { - const auto& reduce_out = AddOutput(sizeof(int32_t), 4, - {output_batch_size, output_height_size, - output_width_size, output_depth_size}); - const auto& reduce_out_min = AddOutput(sizeof(float), 4, {1, 1, 1, 1}); - const auto& reduce_out_max = AddOutput(sizeof(float), 4, {1, 1, 1, 1}); + float output_min = -1, output_max = -1; + ComputeMinAndMaxQuantValues(output_tensor, &output_min, &output_max); + auto* output_min_const = graph_builder_->AddConstNodeWithData( + quant_bound_shape, reinterpret_cast(&output_min), + sizeof(output_min)); + auto* output_max_const = graph_builder_->AddConstNodeWithData( + quant_bound_shape, reinterpret_cast(&output_max), + sizeof(output_max)); + // Min/max values for output tensor. + AddInput(TensorID(output_min_const->GetID(), 0)); + AddInput(TensorID(output_max_const->GetID(), 0)); - auto* quantize_output_op = graph_builder_->AddNode(GetTFLiteNodeID()); - quantize_output_op->SetOpType(OP_QuantizeDownAndShrinkRange_32to8); - quantize_output_op->AddInput(reduce_out); - quantize_output_op->AddInput(reduce_out_min); - quantize_output_op->AddInput(reduce_out_max); - node_output_ = - quantize_output_op->AddOutput(sizeof(uint8_t), 4, - {output_batch_size, output_height_size, - output_width_size, output_depth_size}); - quantize_output_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1}); - quantize_output_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1}); - } else { - node_output_ = AddOutput(sizeof(uint8_t), 4, - {output_batch_size, output_height_size, - output_width_size, output_depth_size}); - AddOutput(sizeof(float), 4, {1, 1, 1, 1}); - AddOutput(sizeof(float), 4, {1, 1, 1, 1}); - } + // Add outputs + size_t output_element_size = 0; + TF_LITE_ENSURE_STATUS( + GetSizeOfType(context, output_tensor.type, &output_element_size)); + auto mean_output = AddOutput(output_element_size, 4, + {output_batch_size, output_height_size, + output_width_size, output_depth_size}); + auto mean_out_min = AddOutput(output_element_size, 4, {1, 1, 1, 1}); + auto mean_out_max = AddOutput(output_element_size, 4, {1, 1, 1, 1}); + // Mean op doesn't honor the passed min/max for output, so we need + // to add requantize. + auto* requantize_op = graph_builder_->AddNode(GetTFLiteNodeID()); + requantize_op->SetOpType(OP_Requantize_8to8); + requantize_op->AddInput(mean_output); + requantize_op->AddInput(mean_out_min); + requantize_op->AddInput(mean_out_max); + requantize_op->AddInput(TensorID(output_min_const->GetID(), 0)); + requantize_op->AddInput(TensorID(output_max_const->GetID(), 0)); + node_output_ = + requantize_op->AddOutput(sizeof(uint8_t), 4, + {output_batch_size, output_height_size, + output_width_size, output_depth_size}); + requantize_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1}); + requantize_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1}); return kTfLiteOk; } diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD index b1df59c4098..47a78dca6ac 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD @@ -30,6 +30,7 @@ hexagon_op_tests( "conv_test.cc", "l2_norm_test.cc", "matmul_test.cc", + "mirror_pad_test.cc", "mul_test.cc", "neg_test.cc", "pad_test.cc", diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/mirror_pad_test.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/mirror_pad_test.cc new file mode 100644 index 00000000000..4caf96ac8ce --- /dev/null +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/mirror_pad_test.cc @@ -0,0 +1,127 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/lite/experimental/delegates/hexagon/builders/tests/hexagon_delegate_op_model.h" + +namespace tflite { +using testing::ElementsAreArray; + +template +class MirrorPadOpModel : public SingleOpModelWithHexagon { + public: + MirrorPadOpModel(const TensorData& input, + std::initializer_list paddings_shape, + std::initializer_list paddings, + const TensorData& output, const tflite::MirrorPadMode mode) { + input_id_ = AddInput(input); + padding_matrix_id_ = + AddConstInput(TensorType_INT32, paddings, paddings_shape); + output_id_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_MIRROR_PAD, BuiltinOptions_MirrorPadOptions, + CreateMirrorPadOptions(builder_, mode).Union()); + BuildInterpreter({GetShape(input_id_), GetShape(padding_matrix_id_)}); + } + + int input_tensor_id() { return input_id_; } + + std::vector GetOutput() { return ExtractVector(output_id_); } + + protected: + int input_id_; + int padding_matrix_id_; + int output_id_; +}; + +TEST(MirrorPadTest, EmptyPad_UInt8) { + MirrorPadOpModel model( + {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {0, 0, 0, 0}, + {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_REFLECT); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3, 4, 5, 6}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6})); +} + +TEST(MirrorPadTest, PadBothSides_Symmetric_Int8) { + MirrorPadOpModel model({TensorType_INT8, {2, 3}, -1.0, 1.0}, {2, 2}, + {1, 1, 1, 1}, {TensorType_INT8, {}, -1.0, 1.0}, + tflite::MirrorPadMode_SYMMETRIC); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3, 4, 5, 6}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({1, 1, 2, 3, 3, 1, 1, 2, 3, 3, + 4, 4, 5, 6, 6, 4, 4, 5, 6, 6})); +} + +TEST(MirrorPadTest, PadBothSides_Reflect_UInt8) { + MirrorPadOpModel model( + {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {1, 1, 1, 1}, + {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_REFLECT); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3, 4, 5, 6}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({5, 4, 5, 6, 5, 2, 1, 2, 3, 2, + 5, 4, 5, 6, 5, 2, 1, 2, 3, 2})); +} + +TEST(MirrorPadTest, PadOneSide_left_Reflect_Int8) { + MirrorPadOpModel model({TensorType_INT8, {2, 3}, -1.0, 1.0}, {2, 2}, + {1, 0, 1, 0}, {TensorType_INT8, {}, -1.0, 1.0}, + tflite::MirrorPadMode_REFLECT); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3, 4, 5, 6}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({5, 4, 5, 6, 2, 1, 2, 3, 5, 4, 5, 6})); +} + +TEST(MirrorPadTest, PadOneSide_right_Symmetric_UInt8) { + MirrorPadOpModel model( + {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {0, 1, 0, 1}, + {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_SYMMETRIC); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3, 4, 5, 6}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({1, 2, 3, 3, 4, 5, 6, 6, 4, 5, 6, 6})); +} + +TEST(MirrorPadTest, Pad_1D_Reflect_Int8) { + MirrorPadOpModel model({TensorType_INT8, {3}, -1.0, 1.0}, {1, 2}, + {0, 2}, {TensorType_INT8, {}, -1.0, 1.0}, + tflite::MirrorPadMode_REFLECT); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 2, 1})); +} + +TEST(MirrorPadTest, Pad_1D_Symmetric_UInt8) { + MirrorPadOpModel model({TensorType_UINT8, {3}, -1.0, 1.0}, {1, 2}, + {0, 2}, {TensorType_UINT8, {}, -1.0, 1.0}, + tflite::MirrorPadMode_SYMMETRIC); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 3, 2})); +} + +TEST(MirrorPadTest, PadBothSides_Reflect_Whole_UInt8) { + MirrorPadOpModel model( + {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {1, 1, 2, 2}, + {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_REFLECT); + model.PopulateTensor(model.input_tensor_id(), {1, 2, 3, 4, 5, 6}); + model.ApplyDelegateAndInvoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({6, 5, 4, 5, 6, 5, 4, 3, 2, 1, 2, 3, 2, 1, + 6, 5, 4, 5, 6, 5, 4, 3, 2, 1, 2, 3, 2, 1})); +} + +} // namespace tflite diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc index 7e4f95ffa96..a3cd8c8255b 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc +++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tflite { using testing::ElementsAreArray; -// TODO(b/148390890): All tests are disabled, enable after fix is availabel -// and op is enabled. +// TODO(b/148390890): Reduce Sum tests are disabled, enable after fix is +// available and op is enabled. class ReduceOpModel : public SingleOpModelWithHexagon { public: ReduceOpModel(BuiltinOperator type, const TensorData& input, @@ -49,32 +49,52 @@ class ReduceOpModel : public SingleOpModelWithHexagon { int output_; }; -TEST(ReduceOpModel, DISABLED_MeanNotKeepDims) { +template +void TestMeanImpl() { float kQuantizedTolerance = 2.0 / 255; std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - ReduceOpModel m(BuiltinOperator_MEAN, - {TensorType_UINT8, {1, 1, 3, 2}, -1.0, 1.0}, - {TensorType_UINT8, {2}, -1.0, 1.0}, {1}, {2}, false); - m.QuantizeAndPopulate(m.Input(), data); + ReduceOpModel m(BuiltinOperator_MEAN, {Tensor_Type, {1, 1, 3, 2}, -1.0, 1.0}, + {Tensor_Type, {2}, -1.0, 1.0}, {1}, {2}, false); + m.QuantizeAndPopulate(m.Input(), data); + m.Invoke(); + auto reference_output = m.GetDequantizedOutput(); m.ApplyDelegateAndInvoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 2})); EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({0.4, 0.4}, kQuantizedTolerance))); + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(reference_output, kQuantizedTolerance))); } -TEST(ReduceOpModel, DISABLED_MeanKeepDims) { +TEST(ReduceOpModel, MeanNotKeepDims_Uint8) { + TestMeanImpl(); +} + +TEST(ReduceOpModel, MeanNotKeepDims_Int8) { + TestMeanImpl(); +} + +template +void TestMeanKeppDimsImpl() { float kQuantizedTolerance = 2.0 / 255; std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - ReduceOpModel m(BuiltinOperator_MEAN, - {TensorType_UINT8, {1, 1, 3, 2}, -1.0, 1.0}, - {TensorType_UINT8, {3}, -1.0, 1.0}, {1}, {3}, true); - m.QuantizeAndPopulate(m.Input(), data); + ReduceOpModel m(BuiltinOperator_MEAN, {Tensor_Type, {1, 1, 3, 2}, -1.0, 1.0}, + {Tensor_Type, {3}, -1.0, 1.0}, {1}, {3}, true); + m.QuantizeAndPopulate(m.Input(), data); + m.Invoke(); + auto reference_output = m.GetDequantizedOutput(); m.ApplyDelegateAndInvoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 3, 1})); EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({0.3, 0.35, 0.55}, kQuantizedTolerance))); + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(reference_output, kQuantizedTolerance))); +} + +TEST(ReduceOpModel, MeanKeepDims_Int8) { + TestMeanKeppDimsImpl(); +} + +TEST(ReduceOpModel, MeanKeepDims_Uint8) { + TestMeanKeppDimsImpl(); } TEST(ReduceOpModel, DISABLED_SumNotKeepDims) { diff --git a/tensorflow/lite/experimental/delegates/hexagon/utils.cc b/tensorflow/lite/experimental/delegates/hexagon/utils.cc index df7d7424e37..1df0a6df66c 100644 --- a/tensorflow/lite/experimental/delegates/hexagon/utils.cc +++ b/tensorflow/lite/experimental/delegates/hexagon/utils.cc @@ -80,6 +80,8 @@ bool CheckOpVersion(const TfLiteRegistration* registration) { case kTfLiteBuiltinL2Normalization: case kTfLiteBuiltinLogistic: case kTfLiteBuiltinMaxPool2d: + case kTfLiteBuiltinMean: + case kTfLiteBuiltinMirrorPad: case kTfLiteBuiltinMul: case kTfLiteBuiltinPad: case kTfLiteBuiltinQuantize: @@ -153,11 +155,26 @@ bool IsNodeSupportedByHexagon(const TfLiteRegistration* registration, return IsActivationReluOrNone(sub_params->activation); } case kTfLiteBuiltinSum: - case kTfLiteBuiltinMean: { // TODO(b/139277813): Enable these when they pass unit tests. These seem // to recompute the output min/max instead of taking them as inputs, which // causes an unexpected shift in dequantized values. return false; + case kTfLiteBuiltinMean: { + return InputsWithCorrectTypes( + node, context, + {{kTfLiteUInt8, kTfLiteInt8}, {kTfLiteInt32}}) && + IsConstantTensor(GetInput(context, node, 1)); + } + case kTfLiteBuiltinMirrorPad: { + if (!InputsWithCorrectTypes( + node, context, {{kTfLiteUInt8, kTfLiteInt8}, {kTfLiteInt32}}) || + !IsConstantTensor(GetInput(context, node, 1))) + return false; + const TfLiteMirrorPaddingParams* params = + reinterpret_cast( + node->builtin_data); + return params->mode == kTfLiteMirrorPaddingReflect || + params->mode == kTfLiteMirrorPaddingSymmetric; } case kTfLiteBuiltinPad: { // TODO(b/139277813): Currently we only support padding with the default diff --git a/tensorflow/lite/experimental/ios/BUILD.apple b/tensorflow/lite/experimental/ios/BUILD.apple index faa3f12971c..8e7b32eba91 100644 --- a/tensorflow/lite/experimental/ios/BUILD.apple +++ b/tensorflow/lite/experimental/ios/BUILD.apple @@ -22,13 +22,6 @@ genrule( """, ) -TFL_LIBRARY_HDRS = [ - "//tensorflow/lite/delegates/gpu:metal_delegate.h", - "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate.h", - "//tensorflow/lite/c:c_api.h", - "//tensorflow/lite/c:common.h", -] - TFL_FRAMEWORK_HDRS = [ "//tensorflow/lite/delegates/gpu:metal_delegate.h", ":coreml_delegate.h", @@ -42,19 +35,6 @@ ios_static_framework( hdrs = TFL_FRAMEWORK_HDRS, bundle_name = "TensorFlowLiteC", minimum_os_version = TFL_MINIMUM_OS_VERSION, - deps = [ - ":TensorFlowLiteC", - ], -) - -objc_library( - name = "TensorFlowLiteC", - hdrs = TFL_LIBRARY_HDRS, - module_name = "TensorFlowLiteC", - weak_sdk_frameworks = [ - "Metal", - "CoreML", - ], deps = [ ":tensorflow_lite_c", ], @@ -78,20 +58,22 @@ ios_static_framework( ], ) -# Using this intermediate target is a workaround for a bug in bazel build rules -# involving mixed objc_library & cc_library deps mentioned in (b/74809458). -# When these dependencies are declared directly under the "TensorFlowLiteC" -# target above, the resulting static library incorrectly contains duplicate -# symbols from some ObjC code in the transitive dependencies. -# -# When a new dependency should be added to the TensorFlowLiteC framework, the -# dependency should be added under this target instead. -# When a new header file needs to be exposed, the header should be added to the -# TFL_LIBRARY_HDRS list above. cc_library( name = "tensorflow_lite_c", - hdrs = TFL_LIBRARY_HDRS, - tags = ["nobuilder"], + hdrs = [ + "//tensorflow/lite/c:c_api.h", + "//tensorflow/lite/c:common.h", + "//tensorflow/lite/delegates/gpu:metal_delegate.h", + "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate.h", + ], + linkopts = [ + "-Wl,-weak_framework,CoreML", + "-Wl,-weak_framework,Metal", + ], + tags = [ + "nobuilder", + "swift_module=TensorFlowLiteC", + ], deps = [ "//tensorflow/lite/c:c_api", "//tensorflow/lite/delegates/gpu:metal_delegate", diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec index 21194bbb455..e039fb57114 100644 --- a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec +++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec @@ -33,7 +33,7 @@ Pod::Spec.new do |s| 'HEADER_SEARCH_PATHS' => '"${PODS_TARGET_SRCROOT}" ' + '"${PODS_TARGET_SRCROOT}/' + objc_dir + 'apis"', - 'VALID_ARCHS' => 'x86_64 armv7 arm64', + 'VALID_ARCHS' => 'i386 x86_64 armv7 arm64', } s.test_spec 'Tests' do |ts| diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec index 4b04c5e65f2..c673cfad759 100644 --- a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec +++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec @@ -33,7 +33,7 @@ Pod::Spec.new do |s| 'HEADER_SEARCH_PATHS' => '"${PODS_TARGET_SRCROOT}" ' + '"${PODS_TARGET_SRCROOT}/' + objc_dir + 'apis"', - 'VALID_ARCHS' => 'x86_64 armv7 arm64', + 'VALID_ARCHS' => 'i386 x86_64 armv7 arm64', } s.test_spec 'Tests' do |ts| diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec.template b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec.template index 47e60f33c47..fc9e10e4a2c 100644 --- a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec.template +++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec.template @@ -33,7 +33,7 @@ Pod::Spec.new do |s| 'HEADER_SEARCH_PATHS' => '"${PODS_TARGET_SRCROOT}" ' + '"${PODS_TARGET_SRCROOT}/' + objc_dir + 'apis"', - 'VALID_ARCHS' => 'x86_64 armv7 arm64', + 'VALID_ARCHS' => 'i386 x86_64 armv7 arm64', } s.test_spec 'Tests' do |ts| diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java index b19ef2e3b62..bced23e6f67 100644 --- a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java +++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java @@ -231,6 +231,26 @@ public class TensorImage { return container.getDataType(); } + /** + * Gets the image width. + * + * @throws IllegalStateException if the TensorImage never loads data. + * @throws IllegalArgumentException if the container data is corrupted. + */ + public int getWidth() { + return container.getWidth(); + } + + /** + * Gets the image height. + * + * @throws IllegalStateException if the TensorImage never loads data. + * @throws IllegalArgumentException if the container data is corrupted. + */ + public int getHeight() { + return container.getHeight(); + } + // Requires tensor shape [h, w, 3] or [1, h, w, 3]. static void checkImageTensorShape(int[] shape) { SupportPreconditions.checkArgument( @@ -273,6 +293,41 @@ public class TensorImage { isBufferUpdated = true; } + int getWidth() { + SupportPreconditions.checkState( + isBitmapUpdated || isBufferUpdated, + "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?"); + if (isBitmapUpdated) { + return bitmapImage.getWidth(); + } + return getBufferDimensionSize(-2); + } + + int getHeight() { + SupportPreconditions.checkState( + isBitmapUpdated || isBufferUpdated, + "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?"); + if (isBitmapUpdated) { + return bitmapImage.getHeight(); + } + return getBufferDimensionSize(-3); + } + + // Internal helper method to get the size of one dimension in the shape of the `bufferImage`. + // Requires `isBufferUpdated` is true. + // Throws `IllegalArgumentException` if data is corrupted. + private int getBufferDimensionSize(int dim) { + int[] shape = bufferImage.getShape(); + // The defensive check is needed because bufferImage might be invalidly changed by user + // (a.k.a internal data is corrupted) + TensorImage.checkImageTensorShape(shape); + dim = dim % shape.length; + if (dim < 0) { + dim += shape.length; + } + return shape[dim]; + } + public DataType getDataType() { return dataType; } @@ -284,7 +339,8 @@ public class TensorImage { return bitmapImage; } if (!isBufferUpdated) { - throw new IllegalStateException("Both buffer and bitmap data are obsolete."); + throw new IllegalStateException( + "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?"); } if (bufferImage.getDataType() != DataType.UINT8) { throw new IllegalStateException( @@ -310,7 +366,8 @@ public class TensorImage { return bufferImage; } SupportPreconditions.checkArgument( - isBitmapUpdated, "Both buffer and bitmap data are obsolete."); + isBitmapUpdated, + "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?"); int requiredFlatSize = bitmapImage.getWidth() * bitmapImage.getHeight() * 3; if (bufferImage == null || (!bufferImage.isDynamic() && bufferImage.getFlatSize() != requiredFlatSize)) { diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java index 16622a25333..fa05be363a6 100644 --- a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java +++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java @@ -379,13 +379,13 @@ public abstract class TensorBuffer { // Check if the new shape is the same as current shape. int newFlatSize = computeFlatSize(shape); + this.shape = shape.clone(); if (flatSize == newFlatSize) { return; } // Update to the new shape. flatSize = newFlatSize; - this.shape = shape.clone(); buffer = ByteBuffer.allocateDirect(flatSize * getTypeSize()); buffer.order(ByteOrder.nativeOrder()); } diff --git a/tensorflow/lite/experimental/support/metadata/java/BUILD b/tensorflow/lite/experimental/support/metadata/java/BUILD index f1cd6173b9e..82b6e9866a9 100644 --- a/tensorflow/lite/experimental/support/metadata/java/BUILD +++ b/tensorflow/lite/experimental/support/metadata/java/BUILD @@ -25,6 +25,10 @@ java_library( name = "tensorflow-lite-support-metadata-lib", srcs = glob(["src/java/org/tensorflow/lite/support/metadata/**/*.java"]), javacopts = JAVACOPTS, + resource_jars = [ + "//tensorflow/lite/experimental/support/metadata:libmetadata_schema_java.jar", + "//tensorflow/lite/experimental/support/metadata:libschema_fbs_java.jar", + ], deps = [ "//tensorflow/lite/experimental/support/metadata:metadata_schema_java", "//tensorflow/lite/experimental/support/metadata:schema_fbs_java", diff --git a/tensorflow/lite/experimental/swift/BUILD.apple b/tensorflow/lite/experimental/swift/BUILD.apple index 2ce8428b1ce..50130fc194a 100644 --- a/tensorflow/lite/experimental/swift/BUILD.apple +++ b/tensorflow/lite/experimental/swift/BUILD.apple @@ -13,11 +13,15 @@ package( swift_library( name = "TensorFlowLite", srcs = glob(["Sources/*.swift"]), + linkopts = [ + "-Wl,-weak_framework,CoreML", + "-Wl,-weak_framework,Metal", + ], module_name = "TensorFlowLite", tags = TFL_DEFAULT_TAGS, visibility = ios_visibility_whitelist(), deps = [ - "//tensorflow/lite/experimental/ios:TensorFlowLiteC", + "//tensorflow/lite/experimental/ios:tensorflow_lite_c", ], ) diff --git a/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift b/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift index 9862de31e2c..5a1526d45ea 100644 --- a/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift +++ b/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift @@ -35,6 +35,7 @@ public final class CoreMLDelegate: Delegate { self.options = options var delegateOptions = TfLiteCoreMlDelegateOptions() delegateOptions.enabled_devices = options.enabledDevices.cEnabledDevices + delegateOptions.coreml_version = Int32(options.coreMLVersion) delegateOptions.max_delegated_partitions = Int32(options.maxDelegatedPartitions) delegateOptions.min_nodes_per_partition = Int32(options.minNodesPerPartition) guard let delegate = TfLiteCoreMlDelegateCreate(&delegateOptions) else { return nil } @@ -72,6 +73,9 @@ extension CoreMLDelegate { /// value is `.neuralEngine` indicating that the delegate is enabled for Neural Engine devices /// only. public var enabledDevices: EnabledDevices = .neuralEngine + /// Target Core ML version for the model conversion. When it's not set, Core ML version will + /// be set to highest available version for the platform. + public var coreMLVersion = 0 /// The maximum number of Core ML delegate partitions created. Each graph corresponds to one /// delegated node subset in the TFLite model. The default value is `0` indicating that all /// possible partitions are delegated. diff --git a/tensorflow/lite/g3doc/convert/1x_compatibility.md b/tensorflow/lite/g3doc/convert/1x_compatibility.md index adb2af4d8ad..9f9f277a8d9 100644 --- a/tensorflow/lite/g3doc/convert/1x_compatibility.md +++ b/tensorflow/lite/g3doc/convert/1x_compatibility.md @@ -1,30 +1,32 @@ -# TensorFlow 1.x compatibility +# TensorFlow 1.x Compatibility -The `tf.lite.TFLiteConverter` was updated between TensorFlow 1.X and 2.0. This -document explains the differences between the 1.X and 2.0 versions of the -converter, and provides information about how to use the 1.X version if -required. +The `tf.lite.TFLiteConverter` Python API was updated between TensorFlow 1.x and +2.x. This document explains the differences between the two versions, and +provides information about how to use the 1.x version if required. -## Summary of changes in Python API between 1.X and 2.0 - -The following section summarizes the changes in the Python API from 1.X to 2.0. If any of the changes raise concerns, please file a -[GitHub issue](https://github.com/tensorflow/tensorflow/issues). +[GitHub Issue](https://github.com/tensorflow/tensorflow/issues). -### Formats supported by `TFLiteConverter` +Note: We highly recommend that you +[migrate your TensorFlow 1.x code to TensorFlow 2.x code](https://www.tensorflow.org/guide/migrate) +. -The 2.0 version of the converter supports SavedModel and Keras model files -generated in both 1.X and 2.0. However, the conversion process no longer -supports "frozen graph" `GraphDef` files generated in 1.X. +## Model formats -#### Converting frozen graphs +#### SavedModel and Keras -Users who want to convert frozen graph `GraphDef` files (`.pb` files) to -TensorFlow Lite should use `tf.compat.v1.lite.TFLiteConverter`. +The `tf.lite.TFLiteConverter` API supports SavedModel and Keras HDF5 files +generated in both TensorFlow 1.x and 2.x. -The following snippet shows a frozen graph file being converted: +#### Frozen Graph + +Note: TensorFlow 2.x no longer supports the generation of frozen graph models. + +The `tf.compat.v1.lite.TFLiteConverter` API supports frozen graph models +generated in TensorFlow 1.x, as shown below: ```python +import tensorflow as tf # Path to the frozen graph file graph_def_file = 'frozen_graph.pb' # A list of the names of the model's input tensors @@ -32,70 +34,68 @@ input_arrays = ['input_name'] # A list of the names of the model's output tensors output_arrays = ['output_name'] # Load and convert the frozen graph -converter = lite.TFLiteConverter.from_frozen_graph( +converter = tf.lite.TFLiteConverter.from_frozen_graph( graph_def_file, input_arrays, output_arrays) tflite_model = converter.convert() # Write the converted model to disk open("converted_model.tflite", "wb").write(tflite_model) ``` -### Quantization-aware training +## Converter attributes -The following attributes and methods associated with -[quantization-aware training](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/quantize) -have been removed from `TFLiteConverter` in TensorFlow 2.0: +#### Renamed attributes -* `inference_type` -* `inference_input_type` -* `quantized_input_stats` -* `default_ranges_stats` -* `reorder_across_fake_quant` -* `change_concat_input_ranges` -* `post_training_quantize` - Deprecated in the 1.X API -* `get_input_arrays()` +The following 1.x attribute has been renamed in 2.x. -The rewriter function that supports quantization-aware training does not support -models generated by TensorFlow 2.0. Additionally, TensorFlow Lite’s quantization -API is being reworked and streamlined in a direction that supports -quantization-aware training through the Keras API. These attributes will be -removed in the 2.0 API until the new quantization API is launched. Users who -want to convert models generated by the rewriter function can use -`tf.compat.v1.lite.TFLiteConverter`. +* `target_ops` has been renamed to `target_spec.supported_ops` - In 2.x, in + line with future additions to the optimization framework, it has become an + attribute of `TargetSpec` and has been renamed to `supported_ops`. -### Changes to `TFLiteConverter` attributes +#### Unsupported attributes -The `target_ops` attribute has become an attribute of `TargetSpec` and renamed -to `supported_ops` in line with future additions to the optimization framework. +The following 1.x attributes have been removed in 2.x. -Additionally, the following attributes have been removed: - -* `drop_control_dependency` (default: `True`) -* _Graph visualization_ - The recommended approach for visualizing a - TensorFlow Lite graph in TensorFlow 2.0 will be to use - [visualize.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/visualize.py). - Unlike GraphViz, it enables users to visualize the graph after post training - quantization has occurred. The following attributes related to graph - visualization will be removed: +* _Quantization_ - In 2.x, + [quantize aware training](https://www.tensorflow.org/model_optimization/guide/quantization/training) + is supported through the Keras API and + [post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) + uses fewer streamlined converter flags. Thus, the following attributes and + methods related to quantization have been removed: + * `inference_type` + * `quantized_input_stats` + * `post_training_quantize` + * `default_ranges_stats` + * `reorder_across_fake_quant` + * `change_concat_input_ranges` + * `get_input_arrays()` +* _Visualization_ - In 2.x, the recommended approach for visualizing a + TensorFlow Lite graph is to use + [visualize.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/visualize.py) + . Unlike GraphViz, it enables users to visualize the graph after post + training quantization has occurred. Thus, the following attributes related + to graph visualization have been removed: * `output_format` * `dump_graphviz_dir` * `dump_graphviz_video` +* _Frozen graph_ - In 2.x, the frozen graph model format has been removed. + Thus, the following attribute related to frozen graphs has been removed: + * `drop_control_dependency` -### General API changes +## Unsupported APIs -The following section explains several significant API changes between -TensorFlow 1.X and 2.0. +The following section explains several significant features in 1.x that have +been removed in 2.x. -#### Conversion methods +#### Conversion APIs -The following methods that were previously deprecated in 1.X will no longer be -exported in 2.0: +The following methods were deprecated in 1.x and have been removed in 2.x: * `lite.toco_convert` * `lite.TocoConverter` -#### `lite.constants` +#### `lite.constants` API -The `lite.constants` API was removed in 2.0 in order to decrease duplication +The `lite.constants` API was removed in 2.x in order to decrease duplication between TensorFlow and TensorFlow Lite. The following list maps the `lite.constant` type to the TensorFlow type: @@ -106,12 +106,15 @@ between TensorFlow and TensorFlow Lite. The following list maps the * `lite.constants.STRING`: `tf.string` * `lite.constants.QUANTIZED_UINT8`: `tf.uint8` -Additionally, `lite.constants.TFLITE` and `lite.constants.GRAPHVIZ_DOT` were -removed due to the deprecation of the `output_format` flag in `TFLiteConverter`. +Additionally, the deprecation of the `output_format` flag in `TFLiteConverter` +led to the removal of the following constants: -#### `lite.OpHint` +* `lite.constants.TFLITE` +* `lite.constants.GRAPHVIZ_DOT` -The `OpHint` API is currently not available in 2.0 due to an incompatibility -with the 2.0 APIs. This API enables conversion of LSTM based models. Support for -LSTMs in 2.0 is being investigated. All related `lite.experimental` APIs have -been removed due to this issue. +#### `lite.OpHint` API + +The `OpHint` API is currently unsupported due to an incompatibility with the 2.x +APIs. This API enables conversion of LSTM based models. Support for LSTMs in 2.x +is being investigated. All related `lite.experimental` APIs have been removed +due to this issue. diff --git a/tensorflow/lite/g3doc/performance/coreml_delegate.md b/tensorflow/lite/g3doc/performance/coreml_delegate.md index da3b943fd89..c267347cf3f 100644 --- a/tensorflow/lite/g3doc/performance/coreml_delegate.md +++ b/tensorflow/lite/g3doc/performance/coreml_delegate.md @@ -6,7 +6,7 @@ which results in faster model inference on iOS devices. Note: This delegate is in experimental (beta) phase. -Note: Core ML delegate is using Core ML version 2.1. +Note: Core ML delegate supports Core ML version 2 and later. **Supported iOS versions and devices:** @@ -158,6 +158,14 @@ for more detail. Alternatively, you can implement your own set of blacklist devices using other libraries such as [DeviceKit](https://github.com/devicekit/DeviceKit). +### Using older Core ML version + +Although iOS 13 supprots Core ML 3, the model might work better when it is +converted with Core ML 2 model specification. The target conversion version is +set to the latest version by default, but you can change this by setting +`coreMLVersion` (in Swift, `coreml_version` in C API) in the delegate option to +older version. + ## Supported ops Following ops are supported by the Core ML delegate. @@ -187,6 +195,8 @@ Following ops are supported by the Core ML delegate. * ReluN1To1 * Relu6 * Reshape + * Only supported when target Core ML version is 2, not supported when + targeting Core ML 3. * ResizeBilinear * SoftMax * Tanh diff --git a/tensorflow/lite/g3doc/performance/hexagon_delegate.md b/tensorflow/lite/g3doc/performance/hexagon_delegate.md index 51af59891dc..60fe9465bf4 100644 --- a/tensorflow/lite/g3doc/performance/hexagon_delegate.md +++ b/tensorflow/lite/g3doc/performance/hexagon_delegate.md @@ -259,43 +259,7 @@ ro.board.platform`). * This is tentatively planned for a future release, though there is no concrete timeline. * Which ops are supported by the delegate? - * Initial list of supported ops: - * Add - * ArgMax - * ArgMin - * AveragePool2D (without any activation) - * Concat - * Conv2D with following constraints: - * stride width/height <= 3 - * DepthToSpace - * DepthwiseConv2D with following constraints: - * Filter width == 3 - * depth_multiplier == 1 - * dilation only supported when stride == 1 - * Otherwise, stride height/width <= 3 - * FullyConnected (without any activation) - * Hardswish - * L2Normalization (without any activation) - * Logistic (aka Sigmoid) - * MaxPool2D (without any activation) - * Mul (without any activation) - * Neg - * Pad: Only supports 0 padding - * Relu - * Relu6 - * Reshape - * Resize Bilinear with following constraints: - * Requested size <= 65 - * Resize Nearest Neighbor - * SoftMax - * SpaceToDepth - * Split - * Sub - * Tanh - * Transpose - * TransposeConv2D with following constraints: - * stride height/width <= 3 - * dilation height/width == 1 + * See the current list of [supported ops and constraints](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/delegates/hexagon/README.md) * How can I tell that the model is using the DSP when I enable the delegate? * Two log messages will be printed when you enable the delegate - one to indicate if the delegate was created and another to indicate how many diff --git a/tensorflow/lite/g3doc/performance/post_training_quantization.md b/tensorflow/lite/g3doc/performance/post_training_quantization.md index 194d102d43d..a526be75b61 100644 --- a/tensorflow/lite/g3doc/performance/post_training_quantization.md +++ b/tensorflow/lite/g3doc/performance/post_training_quantization.md @@ -4,51 +4,44 @@ Post-training quantization is a conversion technique that can reduce model size while also improving CPU and hardware accelerator latency, with little degradation in model accuracy. You can perform these techniques using an already-trained float TensorFlow model when you convert it to TensorFlow Lite -format. +format using the [TensorFlow Lite Converter](../convert/). Note: The procedures on this page require TensorFlow 1.15 or higher. - -### Optimization options +### Optimization Methods There are several post-training quantization options to choose from. Here is a summary table of the choices and the benefits they provide: -| Technique | Benefits | Hardware | -| ------------------------- | ------------------------- | ------------------- | -| Dynamic range | 4x smaller, 2-3x speedup, | CPU | -: quantization : accuracy : : -| Full integer quantization | 4x smaller, 3x+ speedup | CPU, Edge TPU, etc. | -| Float16 quantization | 2x smaller, potential GPU | CPU/GPU | -: : acceleration : : +| Technique | Benefits | Hardware | +| -------------------- | ------------------------- | ---------------- | +| Dynamic range | 4x smaller, 2-3x speedup | CPU | +: quantization : : : +| Full integer | 4x smaller, 3x+ speedup | CPU, Edge TPU, | +: quantization : : Microcontrollers : +| Float16 quantization | 2x smaller, potential GPU | CPU, GPU | +: : acceleration : : This decision tree can help determine which post-training quantization method is best for your use case: ![post-training optimization options](images/optimization.jpg) -Alternatively, you might achieve higher accuracy if you perform -[quantization-aware training]( -https://github.com/tensorflow/tensorflow/tree/r1.14/tensorflow/contrib/quantize). -However, doing so requires some model modifications to add fake quantization -nodes, whereas the post-training quantization techniques on this page use an -existing pre-trained model. - ### Dynamic range quantization The simplest form of post-training quantization statically quantizes only the -weights from floating point to 8-bits of precision. This technique is enabled as -an option in the [TensorFlow Lite converter](../convert/): +weights from floating point to integer, which has 8-bits of precision: -``` +
 import tensorflow as tf
 converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
-converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
 tflite_quant_model = converter.convert()
-```
+
-At inference, weights are converted from 8-bits of precision to floating point and -computed using floating-point kernels. This conversion is done once and cached to reduce latency. +At inference, weights are converted from 8-bits of precision to floating point +and computed using floating-point kernels. This conversion is done once and +cached to reduce latency. To further improve latency, "dynamic-range" operators dynamically quantize activations based on their range to 8-bits and perform computations with 8-bit @@ -58,89 +51,105 @@ point, so that the speedup with dynamic-range ops is less than a full fixed-point computation. Dynamic-range ops are available for the most compute-intensive operators in a network: -* [tf.contrib.layers.fully_connected](https://www.tensorflow.org/api_docs/python/tf/contrib/layers/fully_connected) -* [tf.nn.conv2d](https://www.tensorflow.org/api_docs/python/tf/nn/conv2d) -* [tf.nn.embedding_lookup](https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup) -* [BasicRNN](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/BasicRNNCell) -* [tf.nn.bidirectional_dynamic_rnn for BasicRNNCell type](https://www.tensorflow.org/api_docs/python/tf/nn/bidirectional_dynamic_rnn) -* [tf.nn.dynamic_rnn for LSTM and BasicRNN Cell types](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn) +* `tf.keras.layers.Dense` +* `tf.keras.layers.Conv2D` +* `tf.keras.layers.LSTM` +* `tf.nn.embedding_lookup` +* `tf.compat.v1.nn.rnn_cell.BasicRNNCell` +* `tf.compat.v1.nn.bidirectional_dynamic_rnn` +* `tf.compat.v1.nn.dynamic_rnn` - -### Full integer quantization of weights and activations +### Full integer quantization You can get further latency improvements, reductions in peak memory usage, and -access to integer only hardware accelerators by making sure all model math is -quantized. +access to integer only hardware devices or accelerators by making sure all model +math is integer quantized. To do this, you need to measure the dynamic range of activations and inputs by -supplying a representative data set. You can simply create an input data -generator and provide it to our converter. For example: +supplying sample input data to the converter. Refer to the +`representative_dataset_gen()` function used in the following code. -``` +#### Integer with float fallback (using default float input/output) + +In order to fully integer quantize a model, but use float operators when they +don't have an integer implementation (to ensure conversion occurs smoothly), use +the following steps: + +
 import tensorflow as tf
-
+converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
 def representative_dataset_gen():
   for _ in range(num_calibration_steps):
     # Get sample input data as a numpy array in a method of your choosing.
     yield [input]
-
-converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
-converter.optimizations = [tf.lite.Optimize.DEFAULT]
-converter.representative_dataset = representative_dataset_gen
+converter.representative_dataset = representative_dataset_gen
 tflite_quant_model = converter.convert()
-```
+
-The resulting model should be fully quantized, but any -ops that do not have quantized implementations are left in -floating point. This allows conversion to occur smoothly, but the model won't be -compatible with accelerators that require full integer quantization. +Note: This won't be compatible with integer only devices (such as 8-bit +microcontrollers) and accelerators (such as the Coral Edge TPU). For convenience +during inference, the input and output still remain float in order to have the +same interface as the original float only model. -Additionally, the model still uses float input and output for convenience. +#### Integer only -To ensure compatibility with some accelerators (such as the Coral Edge TPU), you -can enforce full integer quantization for all ops and use integer input and -output by adding the following lines before you convert: +*This is a common use case for +[TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers) +and [Coral Edge TPUs](https://coral.ai/).* -``` -converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] -converter.inference_input_type = tf.uint8 -converter.inference_output_type = tf.uint8 -``` +Additionally, to ensure compatibility with integer only devices (such as 8-bit +microcontrollers) and accelerators (such as the Coral Edge TPU), you can enforce +full integer quantization for all ops including the input and output, by using +the following steps: -The first line makes the converter throw an error if it encounters an operation -it cannot currently quantize. - -Note: `target_spec.supported_ops` was previously `target_ops` in the Python API. - - -### Float16 quantization of weights - -You can reduce the size of a floating point model by quantizing the weights to -float16, the IEEE standard for 16-bit floating point numbers. The advantages of -this quantization are as follows: - -- reduce model size by up to half (since all weights are now half the original - size) -- minimal loss in accuracy -- some delegates (e.g. the GPU delegate) can operate directly on float16 data, - which results in faster execution than float32 computations. - -This quantization may not be a good choice if you need maximum performance (a -quantization to fixed point math would be better in that case). To enable -float16 quantization of weights, specify "DEFAULT" optimization as above and -then specify that float16 is in supported types for the target_spec: - -``` +
 import tensorflow as tf
 converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
 converter.optimizations = [tf.lite.Optimize.DEFAULT]
-converter.target_spec.supported_types = [tf.lite.constants.FLOAT16]
+def representative_dataset_gen():
+  for _ in range(num_calibration_steps):
+    # Get sample input data as a numpy array in a method of your choosing.
+    yield [input]
+converter.representative_dataset = representative_dataset_gen
+converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+converter.inference_input_type = tf.int8  # or tf.uint8
+converter.inference_output_type = tf.int8  # or tf.uint8
 tflite_quant_model = converter.convert()
-```
+
-By default, a float16 quantized model will "dequantize" the weights values to -float32 when run on the CPU. The GPU delegate will not perform this -dequantization, since it can operate on float16 data. +Note: The converter will throw an error if it encounters an operation it cannot +currently quantize. + +### Float16 quantization + +You can reduce the size of a floating point model by quantizing the weights to +float16, the IEEE standard for 16-bit floating point numbers. To enable float16 +quantization of weights, use the following steps: + +
+import tensorflow as tf
+converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_types = [tf.lite.constants.FLOAT16]
+tflite_quant_model = converter.convert()
+
+ +The advantages of this quantization are as follows: + +* Reduce model size by up to half (since all weights are now half the original + size). +* Minimal loss in accuracy. +* Supports some delegates (e.g. the GPU delegate) can operate directly on + float16 data, which results in faster execution than float32 computations. + +The disadvantages of this quantization are as follows: + +* Not a good choice for maximum performance (a quantization to fixed point + math would be better in that case). +* By default, a float16 quantized model will "dequantize" the weights values + to float32 when run on the CPU. (Note that the GPU delegate will not perform + this dequantization, since it can operate on float16 data.) ### Model accuracy @@ -152,13 +161,18 @@ accuracy of the quantized model to verify that any degradation in accuracy is within acceptable limits. There is a tool to evaluate [TensorFlow Lite model accuracy](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/accuracy/ilsvrc/README.md){:.external}. -If the accuracy drop is too high, consider using -[quantization aware training](https://github.com/tensorflow/tensorflow/tree/r1.13/tensorflow/contrib/quantize){:.external}. +Alternatively, if the accuracy drop is too high, consider using +[quantization aware training](https://www.tensorflow.org/model_optimization/guide/quantization/training) +. However, doing so requires modifications during model training to add fake +quantization nodes, whereas the post-training quantization techniques on this +page use an existing pre-trained model. ### Representation for quantized tensors 8-bit quantization approximates floating point values using the following -formula. `real_value = (int8_value - zero_point) * scale`. +formula. + +$$real\_value = (int8\_value - zero\_point) \times scale$$ The representation has two main parts: diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb index 64a03f0fc85..464a5d1b5ef 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb @@ -49,7 +49,7 @@ "metadata": { "colab_type": "text", "id": "nDABAblytltI" - }, + }, "source": [ "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", " \u003ctd\u003e\n", @@ -101,7 +101,7 @@ }, "outputs": [], "source": [ - "!pip install git+git://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" + "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" ] }, { diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb index 4c6a8a72154..8261d6c9e34 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb @@ -101,7 +101,7 @@ }, "outputs": [], "source": [ - "!pip install git+git://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" + "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" ] }, { diff --git a/tensorflow/lite/interpreter_builder.cc b/tensorflow/lite/interpreter_builder.cc index e32e0768995..fb87702fd13 100644 --- a/tensorflow/lite/interpreter_builder.cc +++ b/tensorflow/lite/interpreter_builder.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/core/api/flatbuffer_conversions.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/util.h" #include "tensorflow/lite/version.h" @@ -209,7 +210,15 @@ class MallocDataAllocator : public BuiltinDataAllocator { public: void* Allocate(size_t size, size_t alignment_hint) override { #ifdef TFLITE_USE_STD_ALIGNED_ALLOC - return aligned_alloc(alignment_hint, size); + // Ensure that alignment is a power of two and a multiple of sizeof(void *) + // and that size is an integral multiple of alignment. + size_t used_alignment = std::max(alignment_hint, sizeof(void*)); + size_t used_size = + ((size + used_alignment - 1) / used_alignment) * used_alignment; + TFLITE_DCHECK( + (used_alignment != 0) && + ((used_alignment & (used_alignment - 1)) == 0)); // is power-of-two + return aligned_alloc(used_alignment, used_size); #else return malloc(size); #endif diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index 49c2136ffb4..2fcb4b631be 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -240,6 +240,7 @@ java_test( data = [ "src/testdata/add.bin", "src/testdata/add_unknown_dimensions.bin", + "//tensorflow/lite:testdata/dynamic_shapes.bin", "//tensorflow/lite:testdata/multi_add.bin", "//tensorflow/lite:testdata/multi_add_flex.bin", ], diff --git a/tensorflow/lite/java/ovic/BUILD b/tensorflow/lite/java/ovic/BUILD index 947fbee1a45..e64bd3036ac 100644 --- a/tensorflow/lite/java/ovic/BUILD +++ b/tensorflow/lite/java/ovic/BUILD @@ -58,7 +58,6 @@ android_library( deps = [ "//tensorflow/lite/java:tensorflowlite", "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", - "@org_checkerframework_qual", ], ) @@ -75,7 +74,6 @@ java_library( "//tensorflow/lite/java:tensorflowlite_java", "//tensorflow/lite/java/src/main/native", "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", - "@org_checkerframework_qual", ], ) @@ -114,7 +112,6 @@ android_library( deps = [ "//tensorflow/lite/java:tensorflowlite", "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", - "@org_checkerframework_qual", ], ) @@ -131,6 +128,5 @@ java_library( "//tensorflow/lite/java:tensorflowlite_java", "//tensorflow/lite/java/src/main/native", "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", - "@org_checkerframework_qual", ], ) diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java index 89a2a6a0639..cc9a6a451ac 100644 --- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java +++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java @@ -196,7 +196,7 @@ public final class Tensor { } private void setTo(Buffer src) { - // Note that we attempt to use zero-copy optimization for direct, native-ordered buffers. + // Note that we attempt to use a direct memcpy optimization for direct, native-ordered buffers. // There are no base Buffer#order() or Buffer#put() methods, so again we have to ugly cast. if (src instanceof ByteBuffer) { ByteBuffer srcBuffer = (ByteBuffer) src; diff --git a/tensorflow/lite/java/src/main/native/tensor_jni.cc b/tensorflow/lite/java/src/main/native/tensor_jni.cc index 99be71ba37d..dfa4e22162a 100644 --- a/tensorflow/lite/java/src/main/native/tensor_jni.cc +++ b/tensorflow/lite/java/src/main/native/tensor_jni.cc @@ -402,14 +402,26 @@ JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_writeDirectBuffer( TfLiteTensor* tensor = GetTensorFromHandle(env, handle); if (tensor == nullptr) return; - char* src_data_raw = static_cast(env->GetDirectBufferAddress(src)); + void* src_data_raw = env->GetDirectBufferAddress(src); if (!src_data_raw) { ThrowException(env, kIllegalArgumentException, "Input ByteBuffer is not a direct buffer"); return; } - tensor->data.raw = src_data_raw; + if (!tensor->data.data) { + ThrowException(env, kIllegalArgumentException, + "Internal error: Tensor hasn't been allocated."); + return; + } + + // Historically, we would simply overwrite the tensor buffer pointer with + // the direct Buffer address. However, that is generally unsafe, and + // specifically wrong if the graph happens to have dynamic shapes where + // arena-allocated input buffers will be refreshed during invocation. + // TODO(b/156094015): Explore whether this is actually faster than + // using ByteBuffer.put(ByteBuffer). + memcpy(tensor->data.data, src_data_raw, tensor->bytes); } JNIEXPORT void JNICALL diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index cd782c7f5aa..6b6799eaad9 100644 --- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -40,6 +40,8 @@ public final class InterpreterTest { "tensorflow/lite/testdata/multi_add_flex.bin"; private static final String UNKNOWN_DIMS_MODEL_PATH = "tensorflow/lite/java/src/testdata/add_unknown_dimensions.bin"; + private static final String DYNAMIC_SHAPES_MODEL_PATH = + "tensorflow/lite/testdata/dynamic_shapes.bin"; private static final ByteBuffer MODEL_BUFFER = TestUtils.getTestFileAsBuffer(MODEL_PATH); private static final ByteBuffer MULTIPLE_INPUTS_MODEL_BUFFER = @@ -48,6 +50,8 @@ public final class InterpreterTest { TestUtils.getTestFileAsBuffer(FLEX_MODEL_PATH); private static final ByteBuffer UNKNOWN_DIMS_MODEL_PATH_BUFFER = TestUtils.getTestFileAsBuffer(UNKNOWN_DIMS_MODEL_PATH); + private static final ByteBuffer DYNAMIC_SHAPES_MODEL_BUFFER = + TestUtils.getTestFileAsBuffer(DYNAMIC_SHAPES_MODEL_PATH); @Test public void testInterpreter() throws Exception { @@ -434,7 +438,7 @@ public final class InterpreterTest { interpreter.close(); } - /** Smoke test validating that flex model loading fails when the flex delegate is not linked. */ + // Smoke test validating that flex model loading fails when the flex delegate is not linked. @Test public void testFlexModel() throws Exception { try { @@ -573,6 +577,45 @@ public final class InterpreterTest { } } + private static FloatBuffer fill(FloatBuffer buffer, float value) { + while (buffer.hasRemaining()) { + buffer.put(value); + } + buffer.rewind(); + return buffer; + } + + // Regression test case to ensure that graphs with dynamically computed shapes work properly. + // Historically, direct ByteBuffer addresses would overwrite the arena-allocated tensor input + // pointers. Normally this works fine, but for dynamic graphs, the original input tensor pointers + // may be "restored" at invocation time by the arena allocator, resetting the direct ByteBuffer + // address and leading to stale input data being used. + @Test + public void testDynamicShapesWithDirectBufferInputs() { + try (Interpreter interpreter = new Interpreter(DYNAMIC_SHAPES_MODEL_BUFFER)) { + ByteBuffer input0 = + ByteBuffer.allocateDirect(8 * 42 * 1024 * 4).order(ByteOrder.nativeOrder()); + ByteBuffer input1 = + ByteBuffer.allocateDirect(1 * 90 * 1024 * 4).order(ByteOrder.nativeOrder()); + ByteBuffer input2 = ByteBuffer.allocateDirect(1 * 4).order(ByteOrder.nativeOrder()); + Object[] inputs = {input0, input1, input2}; + + fill(input0.asFloatBuffer(), 2.0f); + fill(input1.asFloatBuffer(), 0.5f); + // Note that the value of this input dictates the shape of the output. + fill(input2.asFloatBuffer(), 1.0f); + + FloatBuffer output = FloatBuffer.allocate(8 * 1 * 1024); + Map outputs = new HashMap<>(); + outputs.put(0, output); + + interpreter.runForMultipleInputsOutputs(inputs, outputs); + + FloatBuffer expected = fill(FloatBuffer.allocate(8 * 1 * 1024), 2.0f); + assertThat(output.array()).usingTolerance(0.1f).containsExactly(expected.array()).inOrder(); + } + } + private static native long getNativeHandleForDelegate(); private static native long getNativeHandleForInvalidDelegate(); diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index 5b6fe4b5b21..6f6d111fd77 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -357,9 +357,9 @@ cc_test( ":cpu_backend_context", ":cpu_backend_gemm", "@com_google_googletest//:gtest", - # ruy's reference path provides the reference implementation + # ruy:reference_mul provides the reference implementation # that this test compares against. - "@ruy//ruy", + "@ruy//ruy:reference_mul", ], ) diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc index 4d52b5c7446..47146771b50 100644 --- a/tensorflow/lite/kernels/activations.cc +++ b/tensorflow/lite/kernels/activations.cc @@ -84,8 +84,10 @@ struct LeakyReluOpData : public OpData { }; struct PreluOpData : public OpData { - int32_t output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier_1 = 0; + int32_t output_shift_1 = 0; + int32_t output_multiplier_2 = 0; + int32_t output_shift_2 = 0; }; struct HardSwishData { @@ -364,7 +366,8 @@ TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) { LeakyReluOpData* data = reinterpret_cast(node->user_data); - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + output->type == kTfLiteInt16) { const auto* params = reinterpret_cast(node->builtin_data); @@ -436,21 +439,29 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); int input_scale_log2_rounded; - TF_LITE_ENSURE(context, - CheckedLog2(input->params.scale, &input_scale_log2_rounded)); + bool param_scale_pot = + CheckedLog2(input->params.scale, &input_scale_log2_rounded); + + data->input_left_shift = + (15 - kInputIntegerBits) + input_scale_log2_rounded; + param_scale_pot &= + (data->input_left_shift == 0 || data->input_left_shift == 1); + + if (!param_scale_pot) { + // In case of general scale parameter, we need to do a rescaling. + // Magic constant 4096: + // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval + // from 16-bit (-2^15, 2^15), + // so we need to multiply by + // 2^(15 - kInputIntegerBits) = 2^12 = 4096. + data->input_multiplier = static_cast(input->params.scale * 4096); + } int output_scale_log2_rounded; TF_LITE_ENSURE( context, CheckedLog2(output->params.scale, &output_scale_log2_rounded)); TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded, -kOutputFractionalBits); - - data->input_left_shift = - (15 - kInputIntegerBits) + input_scale_log2_rounded; - // Support for shifts is limited until we have a parameterized version of - // SaturatingRoundingMultiplyByPOT(). - TF_LITE_ENSURE(context, data->input_left_shift >= 0); - TF_LITE_ENSURE(context, data->input_left_shift <= 1); } return context->ResizeTensor(context, output, @@ -524,19 +535,28 @@ TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); int input_scale_log2_rounded; - TF_LITE_ENSURE(context, - CheckedLog2(input->params.scale, &input_scale_log2_rounded)); + bool param_scale_pot = + CheckedLog2(input->params.scale, &input_scale_log2_rounded); + + data->input_left_shift = + (15 - kInputIntegerBits) + input_scale_log2_rounded; + param_scale_pot &= (data->input_left_shift == 0); + + if (!param_scale_pot) { + // In case of general scale parameter, we need to do a rescaling. + // Magic constant 4096: + // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval + // from 16-bit (-2^15, 2^15), + // so we need to multiply by + // 2^(15 - kInputIntegerBits) = 2^12 = 4096. + data->input_multiplier = static_cast(input->params.scale * 4096); + } int output_scale_log2_rounded; TF_LITE_ENSURE( context, CheckedLog2(output->params.scale, &output_scale_log2_rounded)); TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded, -kOutputFractionalBits); - - data->input_left_shift = - (15 - kInputIntegerBits) + input_scale_log2_rounded; - // The int16 logistic implementation does not support shifting of the input. - TF_LITE_ENSURE_EQ(context, data->input_left_shift, 0); } return context->ResizeTensor(context, output, @@ -647,7 +667,6 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - // This scale check is actually needed for quantized path: // prelu(x) = x if x >= 0 else x * alpha. // So if we translate that for quantized computation: // @@ -659,19 +678,19 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { // ouput_q = (input_q - input_zp) * input_scale / output_scale + output_q // else: // output_q = (input_q - input_zp) * (alpha_q - alpha_zp) * input_scale - // * alpha_scale / output_scale +output_q + // * alpha_scale / output_scale + output_q // - // So we have two float values which we need to translate into multiplier - // shift languages. - // For simplicity & efficiency, if we make sure input_scale - // & output_scale are the same, we only need to translate the latter one - // into multiplier & shift format. - TF_LITE_ENSURE(context, - std::abs(input->params.scale - output->params.scale) < 1e-4); - double real_multiplier = + // So for input_q - input_zp >= 0: + // output real multiplier 1 is input_scale / output_scale; + // for input_q - input_zp < 0: + // output real multiplier 2 is input_scale * alpha_scale/ output_scale. + double real_multiplier_1 = input->params.scale / output->params.scale; + double real_multiplier_2 = input->params.scale * alpha->params.scale / output->params.scale; - QuantizeMultiplierSmallerThanOneExp( - real_multiplier, &data->output_multiplier, &data->output_shift); + QuantizeMultiplier(real_multiplier_1, &data->output_multiplier_1, + &data->output_shift_1); + QuantizeMultiplier(real_multiplier_2, &data->output_multiplier_2, + &data->output_shift_2); } // PRelu (parameteric Relu) shares the same alpha value on "shared axis". @@ -849,13 +868,13 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt16: { TanhParams params; params.input_left_shift = data->input_left_shift; - if (kernel_type == kReference) { + if (kernel_type == kReference || (data->input_multiplier > 0)) { const int size = MatchingFlatSize(GetTensorShape(input), GetTensorShape(output)); - reference_integer_ops::Tanh(data->input_left_shift, size, - GetTensorData(input), - GetTensorData(output)); + reference_integer_ops::Tanh( + data->input_multiplier, data->input_left_shift, size, + GetTensorData(input), GetTensorData(output)); } else { optimized_ops::Tanh( params, GetTensorShape(input), GetTensorData(input), @@ -924,11 +943,12 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) { } case kTfLiteInt16: { LogisticParams params; - if (kernel_type == kReference) { + if (kernel_type == kReference || (data->input_multiplier > 0)) { const int size = MatchingFlatSize(GetTensorShape(input), GetTensorShape(output)); - reference_integer_ops::Logistic(size, GetTensorData(input), + reference_integer_ops::Logistic(data->input_multiplier, size, + GetTensorData(input), GetTensorData(output)); } else { optimized_ops::Logistic( @@ -1153,8 +1173,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { op_params.input_offset = -input->params.zero_point; op_params.alpha_offset = -alpha->params.zero_point; op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; + op_params.output_multiplier_1 = data->output_multiplier_1; + op_params.output_shift_1 = data->output_shift_1; + op_params.output_multiplier_2 = data->output_multiplier_2; + op_params.output_shift_2 = data->output_shift_2; reference_ops::BroadcastPrelu4DSlow( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(alpha), GetTensorData(alpha), @@ -1166,8 +1188,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { op_params.input_offset = -input->params.zero_point; op_params.alpha_offset = -alpha->params.zero_point; op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; + op_params.output_multiplier_1 = data->output_multiplier_1; + op_params.output_shift_1 = data->output_shift_1; + op_params.output_multiplier_2 = data->output_multiplier_2; + op_params.output_shift_2 = data->output_shift_2; reference_ops::BroadcastPrelu4DSlow( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(alpha), GetTensorData(alpha), @@ -1183,6 +1207,22 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { } } +template +void QuantizeLeakyRelu(const TfLiteTensor* input, TfLiteTensor* output, + const LeakyReluOpData* data) { + LeakyReluParams op_params; + + op_params.input_offset = input->params.zero_point; + op_params.output_offset = output->params.zero_point; + op_params.output_multiplier_alpha = data->output_multiplier_alpha; + op_params.output_shift_alpha = data->output_shift_alpha; + op_params.output_multiplier_identity = data->output_multiplier_identity; + op_params.output_shift_identity = data->output_shift_identity; + reference_ops::QuantizeLeakyRelu( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +} + TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); @@ -1201,33 +1241,21 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } break; case kTfLiteUInt8: { - op_params.input_offset = input->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier_alpha = data->output_multiplier_alpha; - op_params.output_shift_alpha = data->output_shift_alpha; - op_params.output_multiplier_identity = data->output_multiplier_identity; - op_params.output_shift_identity = data->output_shift_identity; - reference_ops::QuantizeLeakyRelu( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + QuantizeLeakyRelu(input, output, data); return kTfLiteOk; } break; case kTfLiteInt8: { - op_params.input_offset = input->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier_alpha = data->output_multiplier_alpha; - op_params.output_shift_alpha = data->output_shift_alpha; - op_params.output_multiplier_identity = data->output_multiplier_identity; - op_params.output_shift_identity = data->output_shift_identity; - reference_ops::QuantizeLeakyRelu( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + QuantizeLeakyRelu(input, output, data); + return kTfLiteOk; + } break; + case kTfLiteInt16: { + QuantizeLeakyRelu(input, output, data); return kTfLiteOk; } break; default: TF_LITE_KERNEL_LOG( context, - "Only float32, int8 and uint8 is supported currently, got %s.", + "Only float32, int8, int16 and uint8 is supported currently, got %s.", TfLiteTypeGetName(input->type)); return kTfLiteError; } diff --git a/tensorflow/lite/kernels/activations_test.cc b/tensorflow/lite/kernels/activations_test.cc index b4711216524..9f6fb932d34 100644 --- a/tensorflow/lite/kernels/activations_test.cc +++ b/tensorflow/lite/kernels/activations_test.cc @@ -108,10 +108,20 @@ class BaseActivationsOpModel : public SingleOpModel { BaseActivationsOpModel(TensorData input, float alpha) { input_ = AddInput(input); // The output scale and input scale might be different. - if (input.type == TensorType_UINT8 || input.type == TensorType_INT8) { + if (input.type == TensorType_UINT8 || input.type == TensorType_INT8 || + input.type == TensorType_INT16) { auto output_min = (input.min >= 0) ? input.min : input.min * alpha; auto output_max = (input.max >= 0) ? input.max : input.max * alpha; - output_ = AddOutput({input.type, {}, output_min, output_max}); + if (input.type == TensorType_INT16) { + output_ = AddOutput({TensorType_INT16, + {}, + 0, + 0, + output_max / (std::numeric_limits::max()), + 0}); + } else { + output_ = AddOutput({input.type, {}, output_min, output_max}); + } } else { output_ = AddOutput({input.type, {}}); } @@ -504,14 +514,15 @@ TEST(QuantizedActivationsOpTest, LeakyReluUint8) { kQuantizedTolerance * 8))); } -TEST(QuantizedActivationsOpTest, LeakyReluInt8) { +template +void QuantizedActivationsOpTestLeakyRelu() { const float kMin = -1; const float kMax = 127.f / 128.f; QuantizedActivationsOpModel m( - /*input=*/{TensorType_INT8, {5, 5}, 5 * kMin, 5 * kMax}, 0.1); + /*input=*/{tensor_type, {5, 5}, 5 * kMin, 5 * kMax}, 0.1); - m.SetInput({ + m.SetInput({ -5.0f, -4.6f, -4.2f, -3.8f, -3.4f, // Row 1 -3.0f, -2.6f, -2.2f, -1.8f, -1.4f, // Row 2 -1.0f, -0.6f, -0.2f, 0.2f, 0.6f, // Row 3 @@ -519,7 +530,12 @@ TEST(QuantizedActivationsOpTest, LeakyReluInt8) { 3.0f, 3.4f, 3.8f, 4.2f, 4.6f, // Row 5 }); m.Invoke(); - EXPECT_THAT(m.GetDequantizedOutput(), + + float kTestQuantizedTolerance = tensor_type == TensorType_INT16 + ? kQuantizedToleranceInt16 + : kQuantizedTolerance * 5; + + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( { -0.50f, -0.46f, -0.42f, -0.38f, -0.34f, // Row 1 @@ -528,7 +544,15 @@ TEST(QuantizedActivationsOpTest, LeakyReluInt8) { 1.00f, 1.40f, 1.80f, 2.20f, 2.60f, // Row 4 3.00f, 3.40f, 3.80f, 4.20f, 4.60f, // Row 5 }, - kQuantizedTolerance * 5))); + kTestQuantizedTolerance))); +} + +TEST(QuantizedActivationsOpTest, LeakyReluInt8) { + QuantizedActivationsOpTestLeakyRelu(); +} + +TEST(QuantizedActivationsOpTest, LeakyReluInt16) { + QuantizedActivationsOpTestLeakyRelu(); } TEST(QuantizedActivationsOpTest, Relu1Int8) { @@ -773,19 +797,73 @@ TEST_P(TanhOpTest, TanhInt16) { const float kMax = 32767.f / 32768.f; QuantizedActivationsOpModel m( GetRegistration(), BuiltinOperator_TANH, - /*input=*/{TensorType_INT16, {1, 2, 8, 1}, 8 * kMin, 8 * kMax}, - /*output=*/{TensorType_INT16, {1, 2, 8, 1}, kMin, kMax}); - m.SetInput({0, -6, 2, 4, // - -4, -2, 8, 1, // - 7, -8, 3, -5, // - 6, -1, -3, 5}); + /*input=*/{TensorType_INT16, {89}, 8 * kMin, 8 * kMax}, + /*output=*/{TensorType_INT16, {89}, kMin, kMax}); + m.SetInput( + {-8.0000000000, -7.8181818182, -7.6363636364, -7.4545454545, + -7.2727272727, -7.0909090909, -6.9090909091, -6.7272727273, + -6.5454545455, -6.3636363636, -6.1818181818, -6.0000000000, + -5.8181818182, -5.6363636364, -5.4545454545, -5.2727272727, + -5.0909090909, -4.9090909091, -4.7272727273, -4.5454545455, + -4.3636363636, -4.1818181818, -4.0000000000, -3.8181818182, + -3.6363636364, -3.4545454545, -3.2727272727, -3.0909090909, + -2.9090909091, -2.7272727273, -2.5454545455, -2.3636363636, + -2.1818181818, -2.0000000000, -1.8181818182, -1.6363636364, + -1.4545454545, -1.2727272727, -1.0909090909, -0.9090909091, + -0.7272727273, -0.5454545455, -0.3636363636, -0.1818181818, + 0.0000000000, 0.1818181818, 0.3636363636, 0.5454545455, + 0.7272727273, 0.9090909091, 1.0909090909, 1.2727272727, + 1.4545454545, 1.6363636364, 1.8181818182, 2.0000000000, + 2.1818181818, 2.3636363636, 2.5454545455, 2.7272727273, + 2.9090909091, 3.0909090909, 3.2727272727, 3.4545454545, + 3.6363636364, 3.8181818182, 4.0000000000, 4.1818181818, + 4.3636363636, 4.5454545455, 4.7272727273, 4.9090909091, + 5.0909090909, 5.2727272727, 5.4545454545, 5.6363636364, + 5.8181818182, 6.0000000000, 6.1818181818, 6.3636363636, + 6.5454545455, 6.7272727273, 6.9090909091, 7.0909090909, + 7.2727272727, 7.4545454545, 7.6363636364, 7.8181818182, + 8.0000000000}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( - {0.0, -0.999987, 0.964027, 0.999329, // - -0.999329, -0.96402, 0.99999, 0.76159, // - 0.999998337, -0.99999, 0.995054754, -0.999909204, // - 0.999999996, -0.76159, -0.995054754, 0.999909204}, + {-0.9999997749, -0.9999996762, -0.9999995342, -0.9999993300, + -0.9999990361, -0.9999986134, -0.9999980053, -0.9999971306, + -0.9999958722, -0.9999940619, -0.9999914578, -0.9999877117, + -0.9999823226, -0.9999745703, -0.9999634183, -0.9999473758, + -0.9999242982, -0.9998911009, -0.9998433469, -0.9997746542, + -0.9996758446, -0.9995337191, -0.9993292997, -0.9990353053, + -0.9986125310, -0.9980046622, -0.9971308601, -0.9958751909, + -0.9940716137, -0.9914827859, -0.9877703933, -0.9824541388, + -0.9748561217, -0.9640275801, -0.9486568273, -0.9269625051, + -0.8965880154, -0.8545351057, -0.7972097087, -0.7206956332, + -0.6213939966, -0.4971057414, -0.3484130125, -0.1798408185, + 0.0000000000, 0.1798408185, 0.3484130125, 0.4971057414, + 0.6213939966, 0.7206956332, 0.7972097087, 0.8545351057, + 0.8965880154, 0.9269625051, 0.9486568273, 0.9640275801, + 0.9748561217, 0.9824541388, 0.9877703933, 0.9914827859, + 0.9940716137, 0.9958751909, 0.9971308601, 0.9980046622, + 0.9986125310, 0.9990353053, 0.9993292997, 0.9995337191, + 0.9996758446, 0.9997746542, 0.9998433469, 0.9998911009, + 0.9999242982, 0.9999473758, 0.9999634183, 0.9999745703, + 0.9999823226, 0.9999877117, 0.9999914578, 0.9999940619, + 0.9999958722, 0.9999971306, 0.9999980053, 0.9999986134, + 0.9999990361, 0.9999993300, 0.9999995342, 0.9999996762, + 0.9999997749}, + kQuantizedToleranceInt16))); +} + +TEST_P(TanhOpTest, TanhInt16General) { + const float kMin = -1; + const float kMax = 32767.f / 32768.f; + QuantizedActivationsOpModel m( + GetRegistration(), BuiltinOperator_TANH, + /*input=*/{TensorType_INT16, {6}, 11 * kMin, 11 * kMax}, + /*output=*/{TensorType_INT16, {5}, kMin, kMax}); + m.SetInput({-10, -4, 0, 6, 7.0909090909, 8}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {-0.999969, -0.99408, 0, 0.999664, 0.999939, 0.999969}, kQuantizedToleranceInt16))); } @@ -914,20 +992,74 @@ TEST_P(LogisticOpTest, SigmoidInt16) { const float kMax = 32767.f / 32768.f; QuantizedActivationsOpModel m( GetRegistration(), BuiltinOperator_LOGISTIC, - /*input=*/{TensorType_INT16, {1, 2, 6, 1}, 8 * kMin, 8 * kMax}, - /*output=*/{TensorType_INT16, {1, 2, 6, 1}, kMin, kMax}); - m.SetInput({0, -6, 2, 4, // - 3, -2, 8, 1, // - 5, -8, 7, -3}); + /*input=*/{TensorType_INT16, {89}, 8 * kMin, 8 * kMax}, + /*output=*/{TensorType_INT16, {89}, kMin, kMax}); + m.SetInput( + {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, + -9.0909090909, -8.8636363636, -8.6363636364, -8.4090909091, + -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000, + -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, + -6.3636363636, -6.1363636364, -5.9090909091, -5.6818181818, + -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727, + -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, + -3.6363636364, -3.4090909091, -3.1818181818, -2.9545454545, + -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455, + -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, + -0.9090909091, -0.6818181818, -0.4545454545, -0.2272727273, + 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818, + 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, + 1.8181818182, 2.0454545455, 2.2727272727, 2.5000000000, + 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091, + 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, + 4.5454545455, 4.7727272727, 5.0000000000, 5.2272727273, + 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364, + 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, + 7.2727272727, 7.5000000000, 7.7272727273, 7.9545454545, + 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636, + 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, + 10.0000000000}); m.Invoke(); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear( - { - 0.5, 0.002473, 0.880797, 0.982014, // - 0.952574, 0.119203, 0.9995, 0.731059, // - 0.993307, 0.0003535, 0.999089, 0.047426 // - }, - kQuantizedToleranceInt16))); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, + 0.0001414198, 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, + 0.0004404502, 0.0005527786, 0.0006937345, 0.0008706021, 0.0010925128, + 0.0013709094, 0.0017201256, 0.0021581065, 0.0027073042, 0.0033957870, + 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576, 0.0105038445, + 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562, + 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, + 0.1145124805, 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, + 0.2871859014, 0.3358556241, 0.3882805886, 0.4434251301, 0.5000000000, + 0.5565748699, 0.6117194114, 0.6641443759, 0.7128140986, 0.7570113728, + 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195, 0.9065929953, + 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438, + 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, + 0.9916136424, 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, + 0.9972926958, 0.9978418935, 0.9982798744, 0.9986290906, 0.9989074872, + 0.9991293979, 0.9993062655, 0.9994472214, 0.9995595498, 0.9996490604, + 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802, 0.9998873271, + 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021}, + kQuantizedToleranceInt16))); +} + +TEST_P(LogisticOpTest, SigmoidInt16General) { + const float kMin = -1; + const float kMax = 32767.f / 32768.f; + QuantizedActivationsOpModel m( + GetRegistration(), BuiltinOperator_LOGISTIC, + /*input=*/{TensorType_INT16, {8}, 10 * kMin, 10 * kMax}, + /*output=*/{TensorType_INT16, {8}, kMin, kMax}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({0.5, 0.00814819, 0.832031, 0.960846, // + 0.916809, 0.167969, 0.999664, 0.689972}, + kQuantizedToleranceInt16))); } TEST(FloatActivationsOpTest, Softmax4D) { diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc index 7f148dfa9f1..110eb3a07ef 100644 --- a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc +++ b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc @@ -25,7 +25,7 @@ limitations under the License. #include #include -#include "ruy/ruy.h" // from @ruy +#include "ruy/reference_mul.h" // from @ruy #include "tensorflow/lite/kernels/cpu_backend_context.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h" @@ -353,8 +353,7 @@ void ReferenceGemm( ruy::MulParams ruy_mul_params; cpu_backend_gemm::detail::MakeRuyMulParams(params, &ruy_mul_params); - ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, - context->ruy_context(), &ruy_dst); + ruy::ReferenceMul(ruy_lhs, ruy_rhs, ruy_mul_params, &ruy_dst); } template 0) ? input_multiplier : 1; + for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) { - int32_t input_data = *ptr_input_data; + int32_t input_data = (*ptr_input_data) * input_data_mul; // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and // we do interpolation on unsigned values. diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h index 8c07c6f6d6c..baae65ab30e 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h @@ -57,12 +57,16 @@ inline void Tanh(int32_t input_zero_point, int32_t input_range_radius, } } -inline void Tanh(int32_t input_left_shift, int32_t input_size, - const int16_t* ptr_input_data, int16_t* ptr_output_data) { +inline void Tanh(int32_t input_multiplier, int32_t input_left_shift, + int32_t input_size, const int16_t* ptr_input_data, + int16_t* ptr_output_data) { // We use the LUT for sigmoid and take into account, that // tanh(x) = 2*sigmoid(2*x) - 1 + + int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1; + for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) { - int32_t input_data = *ptr_input_data; + int32_t input_data = (*ptr_input_data) * input_data_mul; if (input_left_shift == 1) { input_data <<= 1; diff --git a/tensorflow/lite/kernels/internal/reference/prelu.h b/tensorflow/lite/kernels/internal/reference/prelu.h index d3d7d78a4a4..50d9ad24dd9 100644 --- a/tensorflow/lite/kernels/internal/reference/prelu.h +++ b/tensorflow/lite/kernels/internal/reference/prelu.h @@ -48,14 +48,16 @@ inline void BroadcastPrelu4DSlow( params.input_offset + input_data[input_index]; int32 output_value; if (input_value >= 0) { - output_value = input_value; + output_value = MultiplyByQuantizedMultiplier( + input_value, params.output_multiplier_1, params.output_shift_1); } else { auto alpha_index = SubscriptToIndex(desc2, b, y, x, c); const int32 alpha_value = params.alpha_offset + alpha_data[alpha_index]; + output_value = MultiplyByQuantizedMultiplier( - input_value * alpha_value, params.output_multiplier, - params.output_shift); + input_value * alpha_value, params.output_multiplier_2, + params.output_shift_2); } output_value += params.output_offset; diff --git a/tensorflow/lite/kernels/internal/types.h b/tensorflow/lite/kernels/internal/types.h index cbdedd88901..52d74d1eca4 100644 --- a/tensorflow/lite/kernels/internal/types.h +++ b/tensorflow/lite/kernels/internal/types.h @@ -972,8 +972,10 @@ struct PreluParams { int32 input_offset; int32 alpha_offset; int32 output_offset; - int32 output_multiplier; - int output_shift; + int32 output_multiplier_1; + int32 output_shift_1; + int32 output_multiplier_2; + int32 output_shift_2; }; struct PoolParams { diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h index ad068ddd3fd..5793b08616d 100644 --- a/tensorflow/lite/kernels/kernel_util.h +++ b/tensorflow/lite/kernels/kernel_util.h @@ -87,6 +87,10 @@ inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context, } // Determines whether tensor is constant. +// TODO(b/138199592): Introduce new query which checks for constant OR +// persistent-read-only, which would be useful for most tensor kernels that +// are potentially dynamic based on the input tensor value availability at the +// time of prepare. inline bool IsConstantTensor(const TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteMmapRo; } @@ -105,6 +109,14 @@ inline void SetTensorToDynamic(TfLiteTensor* tensor) { } } +// Sets tensor to persistent and read-only. +inline void SetTensorToPersistentRo(TfLiteTensor* tensor) { + if (tensor->allocation_type != kTfLitePersistentRo) { + tensor->allocation_type = kTfLitePersistentRo; + tensor->data.raw = nullptr; + } +} + // Determines whether it is a hybrid op - one that has float inputs and // quantized weights. inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) { diff --git a/tensorflow/lite/kernels/rank.cc b/tensorflow/lite/kernels/rank.cc index 8e27ebcc325..53fd92f1682 100644 --- a/tensorflow/lite/kernels/rank.cc +++ b/tensorflow/lite/kernels/rank.cc @@ -30,19 +30,23 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); output->type = kTfLiteInt32; + // By design, the input shape is always known at the time of Prepare, even + // if the preceding op that generates |input| is dynamic. Thus, we can + // always compute the rank immediately, without waiting for Eval. + SetTensorToPersistentRo(output); + // Rank produces a 0-D int32 Tensor representing the rank of input. TfLiteIntArray* output_size = TfLiteIntArrayCreate(0); - return context->ResizeTensor(context, output, output_size); -} + TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_size)); -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TF_LITE_ENSURE_EQ(context, NumDimensions(output), 0); + // Immediately propagate the known rank to the output tensor. This allows + // downstream ops that rely on the value to use it during prepare. if (output->type == kTfLiteInt32) { int32_t* output_data = GetTensorData(output); *output_data = NumDimensions(input); @@ -53,6 +57,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + } // namespace rank TfLiteRegistration* Register_RANK() { diff --git a/tensorflow/lite/kernels/rank_test.cc b/tensorflow/lite/kernels/rank_test.cc index f3dc97126ba..5373a0a66fe 100644 --- a/tensorflow/lite/kernels/rank_test.cc +++ b/tensorflow/lite/kernels/rank_test.cc @@ -43,6 +43,9 @@ class RankOpModel : public SingleOpModel { std::vector GetOutput() { return ExtractVector(output_); } std::vector GetOutputShape() { return GetTensorShape(output_); } + TfLiteAllocationType GetOutputAllocationType() const { + return interpreter_->tensor(interpreter_->outputs()[0])->allocation_type; + } private: int input_; @@ -51,6 +54,13 @@ class RankOpModel : public SingleOpModel { TEST(RankOpTest, InputTypeFloat) { RankOpModel model({1, 3, 1, 3, 5}, TensorType_FLOAT32); + ASSERT_EQ(model.GetOutputAllocationType(), kTfLitePersistentRo); + + // Unlike most ops, Rank populates outputs in Prepare(). + EXPECT_THAT(model.GetOutput(), ElementsAreArray({5})); + EXPECT_TRUE(model.GetOutputShape().empty()); + + // Invoke is superfluous and shouldn't change the output. model.Invoke(); EXPECT_THAT(model.GetOutput(), ElementsAreArray({5})); @@ -59,7 +69,6 @@ TEST(RankOpTest, InputTypeFloat) { TEST(RankOpTest, InputTypeInt) { RankOpModel model({1, 3, 1, 3, 5}, TensorType_INT32); - model.Invoke(); EXPECT_THAT(model.GetOutput(), ElementsAreArray({5})); EXPECT_TRUE(model.GetOutputShape().empty()); @@ -67,7 +76,6 @@ TEST(RankOpTest, InputTypeInt) { TEST(RankOpTest, ScalarTensor) { RankOpModel model({}, TensorType_FLOAT32); - model.Invoke(); EXPECT_THAT(model.GetOutput(), ElementsAreArray({0})); EXPECT_TRUE(model.GetOutputShape().empty()); @@ -75,7 +83,6 @@ TEST(RankOpTest, ScalarTensor) { TEST(RankOpTest, EmptyTensor) { RankOpModel model({1, 0}, TensorType_FLOAT32); - model.Invoke(); EXPECT_THAT(model.GetOutput(), ElementsAreArray({2})); EXPECT_TRUE(model.GetOutputShape().empty()); diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 8b6ed20c009..f3a321e325b 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -39,10 +39,10 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU6, Register_RELU6(), /* min_version = */ 1, /* max_version = */ 2); AddBuiltin(BuiltinOperator_TANH, Register_TANH(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(), /* min_version */ 1, /* max_version */ 3); diff --git a/tensorflow/lite/kernels/resize_bilinear_test.cc b/tensorflow/lite/kernels/resize_bilinear_test.cc index 5cbba026010..d4d414ae29c 100644 --- a/tensorflow/lite/kernels/resize_bilinear_test.cc +++ b/tensorflow/lite/kernels/resize_bilinear_test.cc @@ -190,10 +190,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches) { TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches_HalfPixelCenters) { - // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior. - if (SingleOpModel::GetForceUseNnapi()) { - return; - } ResizeBilinearOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}}, {3, 3}, GetParam(), /**half_pixel_centers**/ true); m.SetInput({ @@ -253,10 +249,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesUInt8) { TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesUInt8_HalfPixelCenters) { - // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior. - if (SingleOpModel::GetForceUseNnapi()) { - return; - } ResizeBilinearOpModel m({TensorType_UINT8, {2, 2, 2, 1}}, {3, 3}, GetParam(), /**half_pixel_centers**/ true); m.SetInput({ diff --git a/tensorflow/lite/kernels/shape.cc b/tensorflow/lite/kernels/shape.cc index 88794fefac4..d979f083f70 100644 --- a/tensorflow/lite/kernels/shape.cc +++ b/tensorflow/lite/kernels/shape.cc @@ -54,19 +54,22 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } + // By design, the input shape is always known at the time of Prepare, even + // if the preceding op that generates |input| is dynamic. Thus, we can + // always compute the shape immediately, without waiting for Eval. + SetTensorToPersistentRo(output); + // Shape always produces a 1-dimensional output tensor, where each output // element is the length of the corresponding input tensor's dimension. TfLiteIntArray* output_size = TfLiteIntArrayCreate(1); output_size->data[0] = NumDimensions(input); - return context->ResizeTensor(context, output, output_size); -} + TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_size)); -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TFLITE_DCHECK_EQ(NumDimensions(output), 1); TFLITE_DCHECK_EQ(SizeOfDimension(output, 0), NumDimensions(input)); + // Immediately propagate the known shape to the output tensor. This allows + // downstream ops that rely on the value to use it during prepare. switch (output->type) { case kTfLiteInt32: ExtractShape(input, GetTensorData(output)); @@ -81,6 +84,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + } // namespace shape TfLiteRegistration* Register_SHAPE() { diff --git a/tensorflow/lite/kernels/shape_test.cc b/tensorflow/lite/kernels/shape_test.cc index 6a7dad4d3e0..3eeb83f5000 100644 --- a/tensorflow/lite/kernels/shape_test.cc +++ b/tensorflow/lite/kernels/shape_test.cc @@ -45,6 +45,9 @@ class ShapeOpModel : public SingleOpModel { int32_t GetOutputSize() { return GetTensorSize(output_); } std::vector GetOutput() { return ExtractVector(output_); } std::vector GetOutputShape() { return GetTensorShape(output_); } + TfLiteAllocationType GetOutputAllocationType() const { + return interpreter_->tensor(interpreter_->outputs()[0])->allocation_type; + } private: int input_; @@ -54,6 +57,13 @@ class ShapeOpModel : public SingleOpModel { TEST(ShapeOpTest, OutTypeInt) { ShapeOpModel model({1, 3, 1, 3, 5}, TensorType_FLOAT32, TensorType_INT32); + ASSERT_EQ(model.GetOutputAllocationType(), kTfLitePersistentRo); + + // Unlike most ops, Rank populates outputs in Prepare(). + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 3, 1, 3, 5})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({5})); + + // Invoke is superfluous and shouldn't change the output. model.Invoke(); EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 3, 1, 3, 5})); @@ -63,7 +73,6 @@ TEST(ShapeOpTest, OutTypeInt) { TEST(ShapeOpTest, OutTypeInt64) { ShapeOpModel model({1, 3, 1, 3, 5}, TensorType_FLOAT32, TensorType_INT64); - model.Invoke(); EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 3, 1, 3, 5})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({5})); @@ -71,7 +80,6 @@ TEST(ShapeOpTest, OutTypeInt64) { TEST(ShapeOpTest, ScalarTensor) { ShapeOpModel model({}, TensorType_FLOAT32, TensorType_INT32); - model.Invoke(); EXPECT_EQ(model.GetOutputSize(), 0); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({0})); @@ -79,7 +87,6 @@ TEST(ShapeOpTest, ScalarTensor) { TEST(ShapeOpTest, EmptyTensor) { ShapeOpModel model({1, 0}, TensorType_FLOAT32, TensorType_INT32); - model.Invoke(); EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 0})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); diff --git a/tensorflow/lite/micro/apollo3evb/micro_time.cc b/tensorflow/lite/micro/apollo3evb/micro_time.cc new file mode 100644 index 00000000000..12c9ae5c633 --- /dev/null +++ b/tensorflow/lite/micro/apollo3evb/micro_time.cc @@ -0,0 +1,72 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Reference implementation of timer functions. Platforms are not required to +// implement these timer methods, but they are required to enable profiling. + +// On platforms that have a POSIX stack or C library, it can be written using +// methods from or clock() from . + +// To add an equivalent function for your own platform, create your own +// implementation file, and place it in a subfolder with named after the OS +// you're targeting. For example, see the Cortex M bare metal version in +// tensorflow/lite/micro/bluepill/micro_timer.cc or the mbed one on +// tensorflow/lite/micro/mbed/micro_timer.cc. + +#include "tensorflow/lite/micro/micro_time.h" + +// These are headers from Ambiq's Apollo3 SDK. +#include "am_bsp.h" // NOLINT +#include "am_mcu_apollo.h" // NOLINT +#include "am_util.h" // NOLINT + +namespace tflite { +namespace { + +// Select CTIMER 1 as benchmarking timer on Sparkfun Edge. This timer must not +// be used elsewhere. +constexpr int kTimerNum = 1; + +// Clock set to operate at 12MHz. +constexpr int kClocksPerSecond = 12e6; + +} // namespace + +int32_t ticks_per_second() { return kClocksPerSecond; } + +// Calling this method enables a timer that runs for eternity. The user is +// responsible for avoiding trampling on this timer's config, otherwise timing +// measurements may no longer be valid. +int32_t GetCurrentTimeTicks() { + // TODO(b/150808076): Split out initialization, intialize in interpreter. + static bool is_initialized = false; + if (!is_initialized) { + am_hal_ctimer_config_t timer_config; + // Operate as a 32-bit timer. + timer_config.ui32Link = 1; + // Set timer A to continuous mode at 12MHz. + timer_config.ui32TimerAConfig = + AM_HAL_CTIMER_FN_CONTINUOUS | AM_HAL_CTIMER_HFRC_12MHZ; + + am_hal_ctimer_stop(kTimerNum, AM_HAL_CTIMER_BOTH); + am_hal_ctimer_clear(kTimerNum, AM_HAL_CTIMER_BOTH); + am_hal_ctimer_config(kTimerNum, &timer_config); + am_hal_ctimer_start(kTimerNum, AM_HAL_CTIMER_TIMERA); + is_initialized = true; + } + return CTIMERn(kTimerNum)->TMR0; +} + +} // namespace tflite diff --git a/tensorflow/lite/micro/examples/micro_speech/main_functions.cc b/tensorflow/lite/micro/examples/micro_speech/main_functions.cc index 23c63a32986..d3989c07333 100644 --- a/tensorflow/lite/micro/examples/micro_speech/main_functions.cc +++ b/tensorflow/lite/micro/examples/micro_speech/main_functions.cc @@ -74,14 +74,22 @@ void setup() { // // tflite::ops::micro::AllOpsResolver resolver; // NOLINTNEXTLINE(runtime-global-variables) - static tflite::MicroOpResolver<3> micro_op_resolver; - micro_op_resolver.AddBuiltin( - tflite::BuiltinOperator_DEPTHWISE_CONV_2D, - tflite::ops::micro::Register_DEPTHWISE_CONV_2D()); - micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_FULLY_CONNECTED, - tflite::ops::micro::Register_FULLY_CONNECTED()); - micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_SOFTMAX, - tflite::ops::micro::Register_SOFTMAX()); + static tflite::MicroOpResolver<3> micro_op_resolver(error_reporter); + if (micro_op_resolver.AddBuiltin( + tflite::BuiltinOperator_DEPTHWISE_CONV_2D, + tflite::ops::micro::Register_DEPTHWISE_CONV_2D()) != kTfLiteOk) { + return; + } + if (micro_op_resolver.AddBuiltin( + tflite::BuiltinOperator_FULLY_CONNECTED, + tflite::ops::micro::Register_FULLY_CONNECTED()) != kTfLiteOk) { + return; + } + if (micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_SOFTMAX, + tflite::ops::micro::Register_SOFTMAX()) != + kTfLiteOk) { + return; + } // Build an interpreter to run the model with. static tflite::MicroInterpreter static_interpreter( diff --git a/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb b/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb index 40f56f8012b..2a64ecd7078 100644 --- a/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb +++ b/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb @@ -1,2020 +1 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "train_micro_speech_model.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "pO4-CY_TCZZS", - "colab_type": "text" - }, - "source": [ - "# Train a Simple Audio Recognition Model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BaFfr7DHRmGF", - "colab_type": "text" - }, - "source": [ - "This notebook demonstrates how to train a 20 kB [Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition) model to recognize keywords in speech.\n", - "\n", - "The model created in this notebook is used in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) example for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview).\n", - "\n", - "\n", - " \n", - " \n", - "
\n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XaVtYN4nlCft", - "colab_type": "text" - }, - "source": [ - "**Training is much faster using GPU acceleration.** Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and set **Hardware accelerator: GPU**. Training 15,000 iterations will take 1.5 - 2 hours on a GPU runtime.\n", - "\n", - "## Configure Defaults\n", - "\n", - "**MODIFY** the following constants for your specific use case." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ludfxbNIaegy", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# A comma-delimited list of the words you want to train for.\n", - "# The options are: yes,no,up,down,left,right,on,off,stop,go\n", - "# All the other words will be used to train an \"unknown\" label and silent\n", - "# audio data with no spoken words will be used to train a \"silence\" label.\n", - "WANTED_WORDS = \"yes,no\"\n", - "\n", - "# The number of steps and learning rates can be specified as comma-separated\n", - "# lists to define the rate at each stage. For example,\n", - "# TRAINING_STEPS=12000,3000 and LEARNING_RATE=0.001,0.0001\n", - "# will run 12,000 training loops in total, with a rate of 0.001 for the first\n", - "# 8,000, and 0.0001 for the final 3,000.\n", - "TRAINING_STEPS = \"12000,3000\"\n", - "LEARNING_RATE = \"0.001,0.0001\"\n", - "\n", - "# Calculate the total number of steps, which is used to identify the checkpoint\n", - "# file name.\n", - "TOTAL_STEPS = str(sum(map(lambda string: int(string), TRAINING_STEPS.split(\",\"))))\n", - "\n", - "# Print the configuration to confirm it\n", - "!echo \"Training these words:\" $WANTED_WORDS\n", - "!echo \"Training steps in each stage:\" $TRAINING_STEPS\n", - "!echo \"Learning rate in each stage:\" $LEARNING_RATE\n", - "!echo \"Total number of training steps:\" $TOTAL_STEPS" - ], - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Training these words: yes,no\n", - "Training steps in each stage: 12000,3000\n", - "Learning rate in each stage: 0.001,0.0001\n", - "Total number of training steps: 15000\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gCgeOpvY9pAi", - "colab_type": "text" - }, - "source": [ - "**DO NOT MODIFY** the following constants as they include filepaths used in this notebook and data that is shared during training and inference." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Nd1iM1o2ymvA", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Calculate the percentage of 'silence' and 'unknown' training samples required\n", - "# to ensure that we have equal number of samples for each label.\n", - "number_of_labels = WANTED_WORDS.count(',') + 1\n", - "number_of_total_labels = number_of_labels + 2 # for 'silence' and 'unknown' label\n", - "equal_percentage_of_training_samples = int(100.0/(number_of_total_labels))\n", - "SILENT_PERCENTAGE = equal_percentage_of_training_samples\n", - "UNKNOWN_PERCENTAGE = equal_percentage_of_training_samples\n", - "\n", - "# Constants which are shared during training and inference\n", - "PREPROCESS = 'micro'\n", - "WINDOW_STRIDE ='20'\n", - "MODEL_ARCHITECTURE = 'tiny_conv' # Other options include: single_fc, conv,\n", - " # low_latency_conv, low_latency_svdf, tiny_embedding_conv\n", - "QUANTIZE = '1' # For booleans, we provide 1 or 0 (instead of True or False)\n", - "\n", - "# Constants used during training only\n", - "VERBOSITY = 'WARN'\n", - "EVAL_STEP_INTERVAL = '1000'\n", - "SAVE_STEP_INTERVAL = '5000'\n", - "\n", - "# Constants for training directories and filepaths\n", - "DATASET_DIR = 'dataset/'\n", - "LOGS_DIR = 'logs/'\n", - "TRAIN_DIR = 'train/' # for training checkpoints and other files.\n", - "\n", - "# Constants for inference directories and filepaths\n", - "import os\n", - "MODELS_DIR = 'models/'\n", - "os.mkdir(MODELS_DIR)\n", - "MODEL_TF = MODELS_DIR + 'model.pb'\n", - "MODEL_TFLITE = MODELS_DIR + 'model.tflite'\n", - "MODEL_TFLITE_MICRO = MODELS_DIR + 'model.cc'" - ], - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6rLYpvtg9P4o", - "colab_type": "text" - }, - "source": [ - "## Setup Environment\n", - "\n", - "Install Dependencies" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ed_XpUrU5DvY", - "colab_type": "code", - "colab": {} - }, - "source": [ - "%tensorflow_version 1.x\n", - "import tensorflow as tf" - ], - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "text": [ - "TensorFlow 1.x selected.\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "T9Ty5mR58E4i", - "colab_type": "text" - }, - "source": [ - "**DELETE** any old data from previous runs\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "APGx0fEh7hFF", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!rm -rf {DATASET_DIR} {LOGS_DIR} {TRAIN_DIR} {MODELS_DIR}" - ], - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GfEUlfFBizio", - "colab_type": "text" - }, - "source": [ - "Clone the TensorFlow Github Repository, which contains the relevant code required to run this tutorial." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yZArmzT85SLq", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!git clone -q https://github.com/tensorflow/tensorflow" - ], - "execution_count": 6, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nS9swHLSi7Bi", - "colab_type": "text" - }, - "source": [ - "Load TensorBoard to visualize the accuracy and loss as training proceeds.\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "q4qF1VxP3UE4", - "colab_type": "code", - "colab": {} - }, - "source": [ - "%load_ext tensorboard\n", - "%tensorboard --logdir {LOGS_DIR}" - ], - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x1J96Ron-O4R", - "colab_type": "text" - }, - "source": [ - "## Training\n", - "\n", - "The following script downloads the dataset and begin training." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "VJsEZx6lynbY", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!python tensorflow/tensorflow/examples/speech_commands/train.py \\\n", - "--data_dir={DATASET_DIR} \\\n", - "--wanted_words={WANTED_WORDS} \\\n", - "--silence_percentage={SILENT_PERCENTAGE} \\\n", - "--unknown_percentage={UNKNOWN_PERCENTAGE} \\\n", - "--preprocess={PREPROCESS} \\\n", - "--window_stride={WINDOW_STRIDE} \\\n", - "--model_architecture={MODEL_ARCHITECTURE} \\\n", - "--quantize={QUANTIZE} \\\n", - "--how_many_training_steps={TRAINING_STEPS} \\\n", - "--learning_rate={LEARNING_RATE} \\\n", - "--train_dir={TRAIN_DIR} \\\n", - "--summaries_dir={LOGS_DIR} \\\n", - "--verbosity={VERBOSITY} \\\n", - "--eval_step_interval={EVAL_STEP_INTERVAL} \\\n", - "--save_step_interval={SAVE_STEP_INTERVAL} \\" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "text": [ - "..\n", - "..\n", - "..\n", - "..\n", - "WARNING:tensorflow:Confusion Matrix:\n", - " [[205 0 0 1]\n", - " [ 3 162 13 28]\n", - " [ 3 9 401 6]\n", - " [ 2 22 6 375]]\n", - "W0402 00:25:28.115174 139938153863040 train.py:320] Confusion Matrix:\n", - " [[205 0 0 1]\n", - " [ 3 162 13 28]\n", - " [ 3 9 401 6]\n", - " [ 2 22 6 375]]\n", - "WARNING:tensorflow:Final test accuracy = 92.5% (N=1236)\n", - "W0402 00:25:28.115574 139938153863040 train.py:322] Final test accuracy = 92.5% (N=1236)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XQUJLrdS-ftl", - "colab_type": "text" - }, - "source": [ - "## Generate a TensorFlow Model for Inference\n", - "\n", - "Combine relevant training results (graph, weights, etc) into a single file for inference. This process is known as freezing a model and the resulting model is known as a frozen model/graph, as it cannot be further re-trained after this process." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "xyc3_eLh9sAg", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!python tensorflow/tensorflow/examples/speech_commands/freeze.py \\\n", - "--wanted_words=$WANTED_WORDS \\\n", - "--window_stride_ms=$WINDOW_STRIDE \\\n", - "--preprocess=$PREPROCESS \\\n", - "--model_architecture=$MODEL_ARCHITECTURE \\\n", - "--quantize=$QUANTIZE \\\n", - "--start_checkpoint=$TRAIN_DIR$MODEL_ARCHITECTURE'.ckpt-'$TOTAL_STEPS \\\n", - "--output_file=$MODEL_TF \\" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "..\n", - "..\n", - "..\n", - "..\n", - "INFO:tensorflow:Restoring parameters from /content/train/tiny_conv.ckpt-15000\n", - "I0402 00:25:47.086113 140352379615104 saver.py:1284] Restoring parameters from /content/train/tiny_conv.ckpt-15000\n", - "INFO:tensorflow:Froze 12 variables.\n", - "I0402 00:25:47.663757 140352379615104 graph_util_impl.py:334] Froze 12 variables.\n", - "INFO:tensorflow:Converted 12 variables to const ops.\n", - "I0402 00:25:47.665771 140352379615104 graph_util_impl.py:394] Converted 12 variables to const ops.\n", - "INFO:tensorflow:Saved frozen graph to /content/models/model.pb\n", - "I0402 00:25:47.667117 140352379615104 freeze.py:186] Saved frozen graph to /content/models/model.pb\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_DBGDxVI-nKG", - "colab_type": "text" - }, - "source": [ - "## Generate a TensorFlow Lite Model\n", - "\n", - "Convert the frozen graph into a TensorFlow Lite model, which is fully quantized for use with embedded devices.\n", - "\n", - "The following cell will also print the model size, which will be under 20 kilobytes." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "lBj_AyCh1cC0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "input_tensor = 'Reshape_2'\n", - "output_tensor = 'labels_softmax'\n", - "\n", - "converter = tf.lite.TFLiteConverter.from_frozen_graph(\n", - " MODEL_TF, [input_tensor], [output_tensor])\n", - "converter.inference_type = tf.uint8\n", - "converter.quantized_input_stats = {input_tensor: (0.0, 9.8077)} # (mean, standard deviation)\n", - "tflite_model = converter.convert()\n", - "\n", - "tflite_model_size = open(MODEL_TFLITE, \"wb\").write(tflite_model)\n", - "print(\"Model is %d bytes\" % tflite_model_size)\n" - ], - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Model is 18288 bytes\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dt6Zqbxu-wIi", - "colab_type": "text" - }, - "source": [ - "## Generate a TensorFlow Lite for MicroControllers Model\n", - "Convert the TensorFlow Lite model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "XohZOTjR8ZyE", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Install xxd if it is not available\n", - "!apt-get update && apt-get -qq install xxd\n", - "# Convert to a C source file\n", - "!xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}\n", - "# Update variable names\n", - "REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')\n", - "!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}" - ], - "execution_count": 11, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2pQnN0i_-0L2", - "colab_type": "text" - }, - "source": [ - "## Deploy to a Microcontroller\n", - "\n", - "Follow the instructions in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) README.md for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview) to deploy this model on a specific microcontroller.\n", - "\n", - "**Reference Model:** If you have not modified this notebook, you can follow the instructions as is, to deploy the model. Refer to the [`micro_speech/train/models`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/models) directory to access the models generated in this notebook. \n", - "\n", - "**New Model:** If you have generated a new model to identify different words: (i) Update `kCategoryCount` and `kCategoryLabels` in [`micro_speech/micro_features/micro_model_settings.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_model_settings.h) and (ii) Update the values assigned to the variables defined in [`micro_speech/micro_features/model.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/model.cc) with values displayed after running the following cell." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "eoYyh0VU8pca", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Print the C source file\n", - "!cat {MODEL_TFLITE_MICRO}" - ], - "execution_count": 12, - "outputs": [ - { - "output_type": "stream", - "text": [ - "unsigned char g_model[] = {\n", - " 0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,\n", - " 0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,\n", - " 0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n", - " 0x1c, 0x47, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n", - " 0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,\n", - " 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e,\n", - " 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00, 0x0a, 0x00, 0x00, 0x00,\n", - " 0x60, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,\n", - " 0x3c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,\n", - " 0x20, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x0e, 0xba, 0xff, 0xff, 0x38, 0x00, 0x00, 0x00,\n", - " 0xbc, 0xb9, 0xff, 0xff, 0xc0, 0xb9, 0xff, 0xff, 0x1e, 0xba, 0xff, 0xff,\n", - " 0xe0, 0x01, 0x00, 0x00, 0xcc, 0xb9, 0xff, 0xff, 0xd0, 0xb9, 0xff, 0xff,\n", - " 0x2e, 0xba, 0xff, 0xff, 0x60, 0x03, 0x00, 0x00, 0x36, 0xba, 0xff, 0xff,\n", - " 0x7c, 0x06, 0x00, 0x00, 0x3e, 0xba, 0xff, 0xff, 0x68, 0x45, 0x00, 0x00,\n", - " 0xec, 0xb9, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x35, 0x2e,\n", - " 0x30, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00,\n", - " 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n", - " 0x13, 0x00, 0x00, 0x00, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x75, 0x6e, 0x74,\n", - " 0x69, 0x6d, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x00,\n", - " 0x10, 0xfa, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x08, 0x00, 0x00, 0x00, 0x2c, 0x45, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n", - " 0x08, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x9c, 0x44, 0x00, 0x00,\n", - " 0x8c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xdc, 0x01, 0x00, 0x00,\n", - " 0x68, 0x01, 0x00, 0x00, 0x3c, 0x02, 0x00, 0x00, 0x50, 0x05, 0x00, 0x00,\n", - " 0x8e, 0xbb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n", - " 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,\n", - " 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n", - " 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f, 0x32, 0x00, 0x00, 0x00,\n", - " 0x94, 0xfa, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,\n", - " 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0xc6, 0xd0, 0xd0, 0x3d, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xff, 0xcf, 0x41,\n", - " 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xbc, 0xff, 0xff,\n", - " 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,\n", - " 0x1c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75,\n", - " 0x00, 0x00, 0x00, 0x00, 0x04, 0xfb, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,\n", - " 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x09, 0xf5, 0x83, 0x3d, 0x01, 0x00, 0x00, 0x00,\n", - " 0x14, 0x71, 0x83, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x72, 0xbc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,\n", - " 0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,\n", - " 0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00,\n", - " 0x64, 0xbc, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2d, 0x95, 0x98, 0x38,\n", - " 0x20, 0x00, 0x00, 0x00, 0x27, 0xff, 0xff, 0xff, 0x97, 0xff, 0xff, 0xff,\n", - " 0x58, 0x00, 0x00, 0x00, 0x66, 0xff, 0xff, 0xff, 0x13, 0xff, 0xff, 0xff,\n", - " 0x72, 0xfe, 0xff, 0xff, 0x5d, 0xfb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,\n", - " 0xea, 0xbc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n", - " 0x05, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,\n", - " 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", - " 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,\n", - " 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0xec, 0xfb, 0xff, 0xff,\n", - " 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b,\n", - " 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x5a, 0xbd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,\n", - " 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f,\n", - " 0x31, 0x00, 0x00, 0x00, 0x54, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,\n", - " 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x9c, 0xd2, 0xb5, 0x3d, 0x01, 0x00, 0x00, 0x00,\n", - " 0x48, 0x18, 0x1f, 0x41, 0x01, 0x00, 0x00, 0x00, 0x4a, 0x21, 0x4b, 0xc1,\n", - " 0xc2, 0xbd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n", - " 0x03, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,\n", - " 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,\n", - " 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,\n", - " 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57,\n", - " 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72,\n", - " 0x73, 0x00, 0x00, 0x00, 0xe4, 0xfc, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,\n", - " 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x8a, 0x0f, 0x3b, 0x3a,\n", - " 0x01, 0x00, 0x00, 0x00, 0xfc, 0x0b, 0xb4, 0x3d, 0x01, 0x00, 0x00, 0x00,\n", - " 0xd9, 0x26, 0xbf, 0xbd, 0x80, 0x02, 0x00, 0x00, 0x60, 0x38, 0xab, 0xcb,\n", - " 0xfa, 0x7e, 0xa2, 0x55, 0x6e, 0x87, 0xa5, 0x9b, 0xb4, 0x66, 0x5c, 0x6f,\n", - " 0xae, 0xdb, 0xcd, 0xb6, 0xc2, 0x60, 0xa9, 0x7d, 0xd4, 0xac, 0xa6, 0x90,\n", - " 0x87, 0x6b, 0x50, 0x95, 0xde, 0xcd, 0xaa, 0xa1, 0x9c, 0x65, 0xb5, 0x6d,\n", - " 0xb0, 0xa5, 0xa5, 0x7f, 0x73, 0x95, 0x63, 0x81, 0x7a, 0xc6, 0xaf, 0x82,\n", - " 0x69, 0x89, 0xc3, 0x3c, 0x47, 0x73, 0x89, 0x4f, 0x33, 0xbc, 0x85, 0x5d,\n", - " 0x69, 0x11, 0x5b, 0xb9, 0xf1, 0x95, 0x8f, 0x5c, 0x7c, 0x59, 0x6c, 0xa0,\n", - " 0xa5, 0x7c, 0x5a, 0x7c, 0xb5, 0xa9, 0x7e, 0xa1, 0xb8, 0x65, 0xb3, 0x86,\n", - " 0xc1, 0x9f, 0x5c, 0x86, 0x7f, 0x74, 0x52, 0xa8, 0xc9, 0xc5, 0x71, 0x96,\n", - " 0x7a, 0x65, 0xc7, 0x69, 0x94, 0xa7, 0x65, 0x68, 0x69, 0x8d, 0x6d, 0x9e,\n", - " 0x59, 0xd4, 0x75, 0x7a, 0x4f, 0x70, 0xca, 0x48, 0x25, 0x8a, 0x69, 0x4d,\n", - " 0x2a, 0xa6, 0x76, 0x69, 0x6a, 0x02, 0x3b, 0xa2, 0xea, 0xc2, 0x73, 0x6b,\n", - " 0x86, 0x4d, 0x3a, 0xa2, 0xa2, 0x88, 0x4e, 0x6c, 0xb3, 0x83, 0x39, 0x93,\n", - " 0xa6, 0x85, 0xb8, 0x7a, 0xa8, 0x7d, 0x2e, 0x7b, 0x7f, 0x69, 0x56, 0xb5,\n", - " 0xbb, 0xae, 0x23, 0x78, 0x67, 0x5c, 0xd2, 0x82, 0x7d, 0x96, 0x46, 0x74,\n", - " 0x70, 0x72, 0x6a, 0x90, 0x43, 0xce, 0x44, 0x75, 0x4a, 0x58, 0xc7, 0x5c,\n", - " 0x34, 0x84, 0x46, 0x4b, 0x41, 0x6c, 0x62, 0x83, 0x7e, 0x01, 0x9b, 0x9b,\n", - " 0xeb, 0xf7, 0x58, 0x6f, 0x8a, 0x43, 0xb3, 0x9f, 0x9c, 0x9e, 0x55, 0xa8,\n", - " 0xaa, 0x84, 0x8f, 0x8f, 0xb0, 0x9e, 0xc8, 0x81, 0xb6, 0x80, 0xa0, 0x81,\n", - " 0x86, 0x73, 0x5d, 0xdc, 0xb9, 0xae, 0xa2, 0x6c, 0x46, 0x67, 0xfa, 0x79,\n", - " 0x89, 0xaf, 0xa0, 0x74, 0x76, 0x85, 0x72, 0xb1, 0x2a, 0xbb, 0xa0, 0x6d,\n", - " 0x4f, 0x50, 0xc9, 0x5d, 0x2f, 0xaa, 0x9c, 0x63, 0x3f, 0x59, 0x63, 0x90,\n", - " 0x73, 0x1e, 0xb3, 0x94, 0xcd, 0xff, 0x3c, 0x63, 0x9b, 0x59, 0xc5, 0xa2,\n", - " 0x9f, 0x9a, 0x53, 0xab, 0xb0, 0x74, 0xb2, 0x6f, 0x8a, 0xa7, 0xd5, 0x8d,\n", - " 0xb8, 0x7e, 0x9e, 0x78, 0x84, 0x61, 0x66, 0xe7, 0xa7, 0x9f, 0xb7, 0x45,\n", - " 0x24, 0x61, 0xfd, 0x69, 0x87, 0xb8, 0xb2, 0x7a, 0x7c, 0x58, 0x64, 0xa3,\n", - " 0x07, 0xa9, 0xaf, 0x69, 0x49, 0x2f, 0xc2, 0x46, 0x3b, 0xaf, 0x9a, 0x70,\n", - " 0x6b, 0x25, 0x5f, 0x9d, 0x82, 0x33, 0xa1, 0x54, 0xae, 0xff, 0x31, 0x5d,\n", - " 0xaf, 0x51, 0xb2, 0x82, 0x9c, 0xa9, 0x5b, 0x8c, 0xab, 0x75, 0xb3, 0x32,\n", - " 0x42, 0xbd, 0xcd, 0x77, 0xb6, 0x67, 0x9a, 0x5f, 0x6c, 0x71, 0x6e, 0xc2,\n", - " 0xac, 0x97, 0x9f, 0x4b, 0x21, 0x6a, 0xfc, 0x77, 0x83, 0xa1, 0xa3, 0x6a,\n", - " 0x7a, 0x6d, 0x5e, 0x87, 0x02, 0xa6, 0x8f, 0x7f, 0x5c, 0x2e, 0xc1, 0x51,\n", - " 0x4a, 0xa7, 0x96, 0x79, 0x83, 0x2e, 0x5a, 0x84, 0x82, 0x5c, 0x61, 0x3a,\n", - " 0x4a, 0xff, 0x2a, 0x51, 0xa4, 0x6b, 0x82, 0x5e, 0x67, 0xb3, 0x71, 0x80,\n", - " 0xad, 0x62, 0x59, 0x40, 0x26, 0xd7, 0xcf, 0x68, 0xab, 0x7c, 0x6a, 0x69,\n", - " 0x5b, 0x7c, 0x84, 0xbc, 0x95, 0x68, 0x77, 0x63, 0x3f, 0x85, 0xed, 0x7b,\n", - " 0x71, 0xa0, 0x76, 0x90, 0x8c, 0x6c, 0x61, 0x81, 0x16, 0x74, 0x72, 0x94,\n", - " 0x74, 0x37, 0xb5, 0x3d, 0x55, 0x96, 0x86, 0xad, 0x87, 0x39, 0x59, 0x88,\n", - " 0x5b, 0x65, 0x60, 0x33, 0x33, 0xe6, 0x2b, 0x4a, 0xb6, 0x82, 0x50, 0x56,\n", - " 0x51, 0x97, 0x71, 0x83, 0xa6, 0x60, 0x57, 0x51, 0x58, 0xe4, 0xd0, 0x87,\n", - " 0xa1, 0x78, 0x4c, 0x67, 0x72, 0x74, 0x86, 0xc6, 0x60, 0x47, 0x50, 0x96,\n", - " 0x67, 0x96, 0xdd, 0x7d, 0x63, 0x85, 0x5e, 0x98, 0xa2, 0x64, 0x5f, 0x8a,\n", - " 0x3b, 0x40, 0x54, 0xcb, 0xa0, 0x61, 0xa7, 0x44, 0x5f, 0x6d, 0x57, 0xb3,\n", - " 0xb9, 0x2e, 0x61, 0x8e, 0x54, 0x78, 0x85, 0x58, 0x43, 0xb0, 0x27, 0x5d,\n", - " 0x8a, 0x7c, 0x8a, 0x58, 0x40, 0x83, 0x82, 0x9b, 0x6c, 0x60, 0x6b, 0x72,\n", - " 0x7f, 0xde, 0xc9, 0x7d, 0x6f, 0x5f, 0x90, 0x7e, 0x7e, 0x7e, 0x8b, 0xe5,\n", - " 0x51, 0x37, 0x7a, 0xa9, 0xa2, 0xc5, 0xd3, 0x81, 0x32, 0x4b, 0x80, 0xa9,\n", - " 0xc5, 0x76, 0x56, 0x99, 0x33, 0x19, 0x72, 0xe6, 0xdb, 0x90, 0xa8, 0x50,\n", - " 0x65, 0x44, 0x77, 0xdb, 0xc7, 0x48, 0x65, 0x8d, 0x3d, 0x7f, 0xa2, 0x7c,\n", - " 0x53, 0x55, 0x26, 0x49, 0x5d, 0x7d, 0xa2, 0x6d, 0x3b, 0x5b, 0x87, 0x64,\n", - " 0x3a, 0x5b, 0x8d, 0x93, 0x7a, 0xb4, 0xca, 0x6d, 0x16, 0x5a, 0x99, 0x82,\n", - " 0x8d, 0x6a, 0x92, 0xa0, 0x39, 0x2c, 0x95, 0xc8, 0xb8, 0xf5, 0xc8, 0x66,\n", - " 0x2a, 0x45, 0x84, 0x9c, 0xc7, 0x8e, 0x61, 0x7b, 0x43, 0x28, 0x86, 0xff,\n", - " 0xd2, 0xc8, 0x9c, 0x46, 0x65, 0x33, 0x82, 0xd8, 0xcb, 0x73, 0x63, 0x80,\n", - " 0xda, 0xc0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n", - " 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00,\n", - " 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0x0f, 0x00, 0x00,\n", - " 0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f,\n", - " 0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65,\n", - " 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e,\n", - " 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e,\n", - " 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,\n", - " 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,\n", - " 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x87, 0xff, 0xdb, 0x39,\n", - " 0x01, 0x00, 0x00, 0x00, 0xd8, 0xb2, 0x5d, 0x3d, 0x01, 0x00, 0x00, 0x00,\n", - " 0x37, 0xdc, 0x56, 0xbd, 0x80, 0x3e, 0x00, 0x00, 0x67, 0x6d, 0x74, 0x77,\n", - " 0x35, 0x66, 0x87, 0x95, 0x8e, 0x82, 0x5e, 0x70, 0x6e, 0xa7, 0x60, 0x64,\n", - " 0x86, 0x5e, 0x93, 0x7a, 0x76, 0x74, 0x71, 0x8c, 0x61, 0x71, 0x60, 0x8b,\n", - " 0x83, 0x48, 0x8b, 0x5f, 0x95, 0x99, 0x5b, 0x59, 0x49, 0x44, 0x79, 0x62,\n", - " 0x8e, 0x77, 0x71, 0x89, 0x64, 0x46, 0x8f, 0x8e, 0x80, 0x73, 0x71, 0x81,\n", - " 0x85, 0x4a, 0x73, 0x57, 0x66, 0x58, 0x75, 0x93, 0x99, 0x58, 0x8a, 0x7b,\n", - " 0x87, 0x81, 0xa1, 0x46, 0x79, 0x6c, 0x83, 0x7a, 0x92, 0x74, 0x6f, 0x6b,\n", - " 0x79, 0x77, 0x97, 0x8a, 0x95, 0x75, 0xa2, 0x49, 0x80, 0x4e, 0x7f, 0x6d,\n", - " 0xaa, 0xac, 0x6c, 0x5d, 0x57, 0x82, 0x97, 0x77, 0x6f, 0x75, 0x95, 0x73,\n", - " 0x7e, 0x51, 0x9f, 0x5b, 0x54, 0x92, 0x60, 0x72, 0x80, 0x6a, 0x92, 0x83,\n", - " 0x9b, 0x85, 0x7b, 0x4d, 0x55, 0x4d, 0xb2, 0x7d, 0x65, 0x95, 0x76, 0x42,\n", - " 0x61, 0x49, 0xa2, 0x73, 0x9f, 0x7d, 0x7c, 0x54, 0x51, 0x76, 0xa1, 0x7f,\n", - " 0x86, 0x69, 0x98, 0x59, 0x6d, 0x84, 0x9f, 0x7b, 0x86, 0x79, 0x88, 0x55,\n", - " 0x9c, 0x72, 0x95, 0x8a, 0x91, 0x7a, 0x77, 0x95, 0x7b, 0x87, 0x87, 0x85,\n", - " 0x95, 0x72, 0x77, 0x59, 0x7c, 0x80, 0x90, 0x8f, 0x8a, 0x62, 0x76, 0x9f,\n", - " 0x64, 0x84, 0x71, 0x7e, 0x7c, 0x66, 0x8e, 0x94, 0x6e, 0xaa, 0x77, 0x5c,\n", - " 0x6b, 0x63, 0x68, 0x82, 0x89, 0x46, 0x61, 0x74, 0x8e, 0x85, 0x6b, 0x57,\n", - " 0x74, 0x50, 0x87, 0x66, 0x87, 0x98, 0x59, 0x7d, 0xa2, 0x59, 0x75, 0x64,\n", - " 0x72, 0x8c, 0x6a, 0x92, 0x8c, 0x56, 0x88, 0x7a, 0x6e, 0x77, 0x9c, 0x82,\n", - " 0x7e, 0x5a, 0x91, 0x80, 0x9c, 0x9e, 0x60, 0x8b, 0x6d, 0x76, 0x8d, 0x68,\n", - " 0x6c, 0x70, 0x6f, 0x8b, 0x61, 0x6e, 0x86, 0x78, 0x81, 0x81, 0x77, 0x79,\n", - " 0x76, 0x69, 0x7d, 0x7b, 0x96, 0x8b, 0x95, 0x91, 0xa2, 0x7b, 0x86, 0x8d,\n", - " 0x8b, 0x89, 0x86, 0x5a, 0x5c, 0x4d, 0x96, 0x80, 0x81, 0x55, 0x80, 0x80,\n", - " 0x7a, 0x76, 0x99, 0x98, 0x61, 0x95, 0x5a, 0x78, 0x5a, 0x6c, 0x89, 0x81,\n", - " 0x98, 0x77, 0x62, 0x77, 0x93, 0x4d, 0x9f, 0x77, 0x72, 0x87, 0x95, 0x71,\n", - " 0x65, 0x72, 0xac, 0x8c, 0xa2, 0x89, 0x90, 0x7b, 0x67, 0x60, 0x8a, 0xb3,\n", - " 0x72, 0x8f, 0x5c, 0x82, 0x74, 0x76, 0x7c, 0x85, 0x78, 0x6b, 0x97, 0x6d,\n", - " 0x86, 0x82, 0x76, 0x84, 0x89, 0x89, 0x7f, 0x6a, 0x7a, 0x7f, 0x6c, 0x77,\n", - " 0x80, 0x35, 0x7d, 0x66, 0x96, 0x7e, 0x88, 0x55, 0x6b, 0x55, 0x7c, 0xa7,\n", - " 0x7f, 0x9f, 0x64, 0x8b, 0xa0, 0x81, 0x80, 0x97, 0xaf, 0x7a, 0x7d, 0x61,\n", - " 0x7a, 0x77, 0x6f, 0x8c, 0x5e, 0x69, 0x6b, 0x94, 0x70, 0x6a, 0x66, 0x5d,\n", - " 0x78, 0x6e, 0x76, 0x64, 0xa0, 0x73, 0x8f, 0xa2, 0x9d, 0x50, 0x8e, 0x52,\n", - " 0x51, 0x85, 0x78, 0x83, 0x8f, 0x94, 0x83, 0x7c, 0x9c, 0x64, 0x59, 0x7d,\n", - " 0x66, 0x6a, 0x73, 0x80, 0x6a, 0x9b, 0x92, 0x7e, 0x7a, 0x78, 0x7d, 0xa0,\n", - " 0x8a, 0x9b, 0x61, 0x9e, 0x6c, 0x64, 0x6c, 0x8e, 0x86, 0x75, 0x8a, 0x95,\n", - " 0x8e, 0x89, 0x87, 0x8a, 0x5d, 0x8b, 0x82, 0x7c, 0x60, 0x63, 0x85, 0x85,\n", - " 0x63, 0x96, 0xa3, 0x7f, 0x93, 0x78, 0x8c, 0x86, 0x7b, 0x78, 0x8e, 0x71,\n", - " 0x72, 0x8b, 0x8a, 0x5e, 0x8d, 0x75, 0x78, 0xa3, 0x84, 0x67, 0xa7, 0x54,\n", - " 0x6c, 0x80, 0x8e, 0xa8, 0x83, 0x51, 0x6e, 0x9f, 0x8b, 0x86, 0x75, 0x95,\n", - " 0x7f, 0x7a, 0x80, 0x81, 0x8d, 0x9c, 0x83, 0x8a, 0x7b, 0x8a, 0x74, 0x6f,\n", - " 0x8d, 0x96, 0x5b, 0x9c, 0x8d, 0x7b, 0x83, 0x79, 0x7f, 0x65, 0x7e, 0x87,\n", - " 0x7c, 0x5d, 0x71, 0x97, 0x77, 0x44, 0x9a, 0x7f, 0xaa, 0x56, 0x75, 0x5f,\n", - " 0x7c, 0x51, 0x8c, 0x90, 0x84, 0x9a, 0x49, 0x5d, 0x86, 0x52, 0x94, 0x95,\n", - " 0x5b, 0x86, 0x66, 0x7d, 0x51, 0x4f, 0x7a, 0x91, 0x6d, 0x6e, 0x72, 0x70,\n", - " 0x83, 0x4f, 0x9b, 0x9a, 0x8a, 0x77, 0x6a, 0xa1, 0x71, 0x60, 0x61, 0x98,\n", - " 0x67, 0x4e, 0x7a, 0x8a, 0x53, 0x6b, 0x99, 0xa0, 0x91, 0x46, 0x8a, 0x8b,\n", - " 0x47, 0x78, 0xa9, 0x7b, 0x71, 0x6c, 0x81, 0x68, 0x53, 0x73, 0xaf, 0x70,\n", - " 0x62, 0x6d, 0x69, 0x97, 0x70, 0x83, 0x5f, 0x7f, 0x81, 0x87, 0x65, 0x93,\n", - " 0x67, 0x87, 0x70, 0x82, 0x79, 0x9e, 0x80, 0x77, 0x6c, 0x80, 0x92, 0x81,\n", - " 0x8d, 0x8c, 0x89, 0x8b, 0x4e, 0x91, 0x77, 0x84, 0x99, 0x8c, 0x71, 0x88,\n", - " 0x57, 0x7a, 0x9a, 0x8c, 0x82, 0x9b, 0x97, 0x72, 0x69, 0xac, 0x7c, 0x62,\n", - " 0x85, 0x7d, 0x76, 0x7f, 0x59, 0x85, 0x68, 0x63, 0x94, 0x8b, 0x7b, 0x92,\n", - " 0x7b, 0x6f, 0x77, 0x98, 0x66, 0x78, 0x74, 0x99, 0x85, 0x8c, 0x94, 0x89,\n", - " 0x6c, 0x77, 0x89, 0x80, 0x79, 0x8a, 0xa6, 0x95, 0xa9, 0x86, 0x6f, 0x95,\n", - " 0x90, 0x69, 0x98, 0x85, 0xa0, 0x7f, 0x56, 0xab, 0x6f, 0x5a, 0x94, 0x8b,\n", - " 0x5a, 0x72, 0x61, 0x83, 0x54, 0x70, 0x8d, 0x8d, 0x9c, 0x5e, 0x36, 0x9b,\n", - " 0x84, 0x32, 0x6e, 0x84, 0x79, 0x72, 0x64, 0x95, 0x83, 0x58, 0x67, 0x6c,\n", - " 0x9e, 0x8d, 0x6e, 0x9e, 0x4f, 0x78, 0x71, 0x85, 0x75, 0x60, 0x4d, 0x7d,\n", - " 0x64, 0x89, 0x8e, 0x89, 0x6e, 0x92, 0x53, 0x7c, 0x86, 0x8f, 0xa9, 0xb0,\n", - " 0x8e, 0x5e, 0x76, 0x96, 0x65, 0x7c, 0x8a, 0x89, 0x75, 0x8f, 0x65, 0x94,\n", - " 0x6c, 0x6c, 0x8d, 0x6d, 0x66, 0x6a, 0x62, 0x98, 0x53, 0x8f, 0x67, 0x76,\n", - " 0x80, 0x89, 0x66, 0x60, 0x55, 0x81, 0x85, 0x61, 0x75, 0x78, 0x80, 0x92,\n", - " 0x6f, 0x79, 0x66, 0x64, 0x99, 0xa7, 0x88, 0xa1, 0x86, 0x6b, 0x94, 0x88,\n", - " 0x77, 0x83, 0x8f, 0x61, 0x72, 0x7c, 0x6f, 0x8f, 0x61, 0x56, 0x8a, 0x7b,\n", - " 0x66, 0x8b, 0x98, 0x9d, 0x82, 0x65, 0x77, 0x98, 0x55, 0x83, 0x7a, 0x8c,\n", - " 0x74, 0x79, 0x6e, 0x85, 0x82, 0x9a, 0x7d, 0x8d, 0x76, 0x72, 0x64, 0x81,\n", - " 0x9a, 0x8d, 0x9f, 0x7b, 0x7c, 0x7b, 0x7b, 0x84, 0x90, 0x6b, 0xa4, 0x84,\n", - " 0x98, 0x6f, 0x81, 0xb8, 0x6f, 0x6c, 0x87, 0x6d, 0x8c, 0x72, 0x53, 0x85,\n", - " 0x59, 0x4d, 0x9c, 0x94, 0x7d, 0x6f, 0x4f, 0x82, 0x5d, 0x71, 0x6e, 0x78,\n", - " 0x61, 0x61, 0x34, 0x71, 0x6a, 0x5a, 0x73, 0xa3, 0x89, 0x65, 0x4d, 0x80,\n", - " 0x5c, 0x51, 0x81, 0x8e, 0x6c, 0x53, 0x4a, 0x95, 0x3b, 0x72, 0xa7, 0x86,\n", - " 0x7f, 0x75, 0x61, 0xa3, 0x85, 0x6c, 0x99, 0x88, 0x7c, 0x64, 0x7a, 0x8d,\n", - " 0x81, 0x7b, 0x6a, 0x7b, 0x8f, 0x74, 0x6d, 0xae, 0x42, 0x67, 0x88, 0xa1,\n", - " 0x90, 0x4d, 0x7c, 0x7b, 0x62, 0x55, 0x9a, 0x80, 0x4d, 0x76, 0x5c, 0x88,\n", - " 0x60, 0x86, 0x6f, 0x65, 0x67, 0x77, 0x8a, 0x97, 0x99, 0x7c, 0x89, 0x78,\n", - " 0x92, 0xa7, 0x6a, 0x7f, 0x8e, 0x88, 0x9d, 0xa1, 0x7b, 0xb0, 0x69, 0x8c,\n", - " 0x7e, 0x51, 0x76, 0x84, 0x7d, 0x91, 0x7a, 0x88, 0x7b, 0x88, 0x92, 0x79,\n", - " 0x6d, 0x82, 0x6c, 0x8a, 0x99, 0x62, 0x82, 0x9d, 0x99, 0x97, 0x78, 0x6a,\n", - " 0x6e, 0x83, 0x64, 0x7d, 0x8c, 0x78, 0x7c, 0x7a, 0x7d, 0x7b, 0x77, 0x84,\n", - " 0x76, 0x57, 0x63, 0x85, 0x97, 0x94, 0x80, 0x92, 0x88, 0x73, 0x91, 0x91,\n", - " 0x8f, 0x6d, 0x99, 0x86, 0x91, 0x7f, 0x8b, 0x87, 0x98, 0x62, 0x84, 0x70,\n", - " 0x97, 0x7b, 0x2e, 0x9b, 0x6e, 0x2a, 0xa4, 0x9c, 0x79, 0x88, 0x54, 0x81,\n", - " 0x4f, 0x41, 0xa0, 0x85, 0xaf, 0x9a, 0x47, 0x5a, 0x7d, 0x62, 0x7a, 0x84,\n", - " 0x81, 0x6e, 0x41, 0xb4, 0x60, 0x47, 0x8f, 0x98, 0x6c, 0x3c, 0x3b, 0x73,\n", - " 0x59, 0x55, 0x7c, 0xb0, 0x6e, 0x5f, 0x61, 0x97, 0x73, 0x59, 0x9f, 0x92,\n", - " 0x89, 0x5c, 0x70, 0x96, 0x5c, 0x7c, 0x7c, 0x64, 0x7e, 0x54, 0x5c, 0x94,\n", - " 0x56, 0x73, 0x8d, 0x95, 0x59, 0x83, 0x6c, 0x99, 0x6e, 0x5e, 0x7a, 0x99,\n", - " 0x83, 0x93, 0x88, 0x76, 0x5a, 0x5a, 0xa5, 0x95, 0x5d, 0x63, 0x8f, 0x6e,\n", - " 0x74, 0x65, 0x85, 0x86, 0x98, 0x83, 0x7b, 0x8a, 0x5c, 0x5e, 0x7f, 0x88,\n", - " 0x78, 0x68, 0x8f, 0x9f, 0x94, 0x8d, 0x74, 0x7b, 0x6a, 0x91, 0x7a, 0x9a,\n", - " 0x70, 0x67, 0xb2, 0x92, 0x75, 0x4e, 0x74, 0xa3, 0x68, 0x74, 0x91, 0x80,\n", - " 0x55, 0x8e, 0x88, 0x73, 0x70, 0x81, 0xa1, 0xb8, 0x96, 0x48, 0x67, 0xb2,\n", - " 0x76, 0xa1, 0x98, 0xa9, 0x61, 0x6c, 0x5f, 0x98, 0x84, 0x92, 0xa9, 0x83,\n", - " 0x9e, 0x74, 0x7b, 0xa2, 0x6f, 0x72, 0x95, 0xa3, 0xb9, 0x80, 0x81, 0x7b,\n", - " 0x65, 0x6b, 0x96, 0x8b, 0xae, 0x79, 0x2b, 0x86, 0x5c, 0x2c, 0x8b, 0xa3,\n", - " 0x84, 0x74, 0x53, 0x7c, 0x54, 0x4a, 0x65, 0x89, 0xa6, 0x89, 0x47, 0x77,\n", - " 0x50, 0x6d, 0x8b, 0x94, 0x8a, 0x61, 0x32, 0x7c, 0x6f, 0x47, 0x78, 0xa2,\n", - " 0x9f, 0x42, 0x42, 0x71, 0x78, 0x76, 0x9e, 0x88, 0x70, 0x70, 0x56, 0x8a,\n", - " 0x83, 0x95, 0xa7, 0x9d, 0x9d, 0x88, 0x9a, 0x92, 0x48, 0x63, 0xaf, 0x91,\n", - " 0x6c, 0x75, 0x5d, 0x5e, 0x83, 0x86, 0xaa, 0x6f, 0x79, 0x84, 0x67, 0x79,\n", - " 0x63, 0x69, 0x8e, 0x81, 0x6a, 0x96, 0x8d, 0x86, 0x7b, 0x9f, 0xaa, 0x8e,\n", - " 0x63, 0x89, 0x9a, 0x7a, 0x5e, 0x7c, 0x87, 0x83, 0x81, 0x64, 0x7e, 0x59,\n", - " 0x6d, 0x5c, 0xa4, 0x72, 0x78, 0x85, 0x9b, 0x79, 0x85, 0x7d, 0x9c, 0x7d,\n", - " 0x9c, 0x5c, 0x66, 0x75, 0x66, 0x72, 0xb4, 0x7c, 0x83, 0x9e, 0x90, 0xae,\n", - " 0x69, 0x71, 0xb0, 0x84, 0x86, 0x50, 0x66, 0xab, 0x75, 0x96, 0xa8, 0x6c,\n", - " 0x87, 0x7b, 0x7e, 0x7c, 0x60, 0x55, 0x96, 0xb0, 0x6a, 0x79, 0x42, 0x9c,\n", - " 0x97, 0xa8, 0xb2, 0x9a, 0xa0, 0x84, 0x68, 0x90, 0x90, 0x98, 0x67, 0x9c,\n", - " 0xa3, 0x81, 0x71, 0xaa, 0x93, 0x6a, 0x84, 0x8c, 0x77, 0x79, 0x4d, 0x82,\n", - " 0x45, 0x1e, 0x7b, 0x94, 0x86, 0x86, 0x26, 0x82, 0x41, 0x6f, 0x8b, 0x86,\n", - " 0xa4, 0x80, 0x38, 0x71, 0x5e, 0x5b, 0x9a, 0x73, 0x86, 0x60, 0x5a, 0x9d,\n", - " 0x7b, 0x53, 0x89, 0xa0, 0x99, 0x76, 0x57, 0x81, 0x76, 0x5a, 0x9e, 0x85,\n", - " 0x5a, 0x7b, 0x56, 0x74, 0x71, 0x6a, 0x9c, 0x68, 0x7e, 0x76, 0x7d, 0x7f,\n", - " 0x52, 0x71, 0x85, 0xa2, 0x96, 0x63, 0x73, 0x7c, 0x7a, 0x97, 0x9f, 0x7c,\n", - " 0x77, 0x77, 0x59, 0x6b, 0x62, 0x77, 0xbc, 0x6b, 0x7c, 0x79, 0x75, 0x90,\n", - " 0x67, 0x82, 0x92, 0x9c, 0x81, 0x92, 0x84, 0x7a, 0x72, 0x5b, 0x86, 0x82,\n", - " 0x87, 0x73, 0x87, 0x7c, 0x57, 0x76, 0xa6, 0x7d, 0x7d, 0x94, 0x6a, 0x67,\n", - " 0x76, 0x89, 0x9a, 0x6d, 0x7d, 0xa4, 0x6d, 0x7e, 0x74, 0x7e, 0x8f, 0xad,\n", - " 0x99, 0x55, 0x5c, 0x82, 0x75, 0x9e, 0xae, 0x76, 0x6b, 0x93, 0x5d, 0x92,\n", - " 0x6e, 0x54, 0x88, 0x8f, 0x6a, 0x72, 0x64, 0x93, 0x6e, 0x63, 0x8c, 0xa7,\n", - " 0xa6, 0x7a, 0x57, 0x9f, 0x94, 0x91, 0xbd, 0xa4, 0x92, 0x7a, 0x68, 0x9d,\n", - " 0x7d, 0x6b, 0x6b, 0xbc, 0xad, 0x7a, 0x73, 0x92, 0x7b, 0x6d, 0x91, 0x6a,\n", - " 0x66, 0x8d, 0x34, 0x9b, 0x75, 0x3b, 0x93, 0x78, 0x88, 0x58, 0x1a, 0x7f,\n", - " 0x52, 0x61, 0xa3, 0xb1, 0x9c, 0x60, 0x1d, 0x90, 0x7b, 0x37, 0x9f, 0x84,\n", - " 0xa3, 0x6c, 0x2e, 0xac, 0x73, 0x62, 0x92, 0x9a, 0x94, 0x6b, 0x5c, 0x82,\n", - " 0x5f, 0x4c, 0x9a, 0x8c, 0x76, 0x69, 0x77, 0x5f, 0x5d, 0x91, 0x80, 0x9a,\n", - " 0x60, 0x4c, 0x7b, 0x57, 0x67, 0x6b, 0x92, 0x93, 0x64, 0x91, 0x55, 0x75,\n", - " 0x41, 0x82, 0x78, 0x68, 0xa2, 0x55, 0x6a, 0x69, 0x59, 0x70, 0x8a, 0x7b,\n", - " 0x70, 0x6e, 0x63, 0x83, 0x7f, 0xa4, 0x80, 0x85, 0x86, 0x93, 0x7e, 0x6f,\n", - " 0x7b, 0x94, 0xa4, 0xa7, 0x97, 0x7a, 0x87, 0x64, 0x4a, 0x97, 0x94, 0x6a,\n", - " 0x96, 0x73, 0x5e, 0x79, 0x6a, 0x99, 0x86, 0xa0, 0x93, 0xac, 0x79, 0x76,\n", - " 0x7f, 0x7b, 0xa7, 0x75, 0x8a, 0x71, 0x53, 0x87, 0x93, 0x7f, 0x9e, 0x7b,\n", - " 0x81, 0x70, 0x68, 0x8b, 0x8c, 0x9c, 0xaf, 0xa7, 0x6a, 0x9b, 0x49, 0x6d,\n", - " 0x67, 0x80, 0x8b, 0x86, 0x9f, 0x80, 0x74, 0x7a, 0x96, 0x74, 0xc8, 0x9d,\n", - " 0xa4, 0x74, 0x71, 0x6c, 0x75, 0x6a, 0x9a, 0x95, 0x97, 0x8c, 0x6e, 0x8a,\n", - " 0x85, 0x62, 0x5f, 0x7e, 0x9e, 0x6b, 0x48, 0x93, 0x44, 0x37, 0x83, 0xa2,\n", - " 0x97, 0x72, 0x25, 0x79, 0x32, 0x39, 0x68, 0x8f, 0x93, 0x61, 0x2b, 0x96,\n", - " 0x94, 0x43, 0x82, 0x6e, 0x8f, 0x6d, 0x53, 0x9b, 0x65, 0x50, 0x70, 0x9d,\n", - " 0x7d, 0x53, 0x3b, 0x86, 0x77, 0x6c, 0xa6, 0x90, 0x6b, 0x3e, 0x7b, 0x7a,\n", - " 0x50, 0x81, 0xb4, 0x76, 0xa5, 0x74, 0x8b, 0x73, 0x79, 0x69, 0xa8, 0x9a,\n", - " 0x82, 0x4a, 0x5e, 0x6c, 0x8d, 0x66, 0xa3, 0x80, 0x8d, 0x74, 0x5b, 0x7c,\n", - " 0x77, 0xaa, 0x82, 0x69, 0x5e, 0x7d, 0x7f, 0x63, 0xa3, 0x8c, 0xb3, 0x9a,\n", - " 0x81, 0x8f, 0x7b, 0x77, 0x60, 0x89, 0x6a, 0x82, 0x5a, 0x7a, 0x71, 0x61,\n", - " 0x93, 0x73, 0x8b, 0xb0, 0xa2, 0x92, 0x7c, 0x84, 0x8b, 0x72, 0x91, 0x8d,\n", - " 0x91, 0x80, 0x6c, 0x75, 0x7a, 0xb3, 0x95, 0x5e, 0xa5, 0x5d, 0x54, 0x8b,\n", - " 0x63, 0x91, 0xa7, 0x68, 0x96, 0x4c, 0x5a, 0x86, 0x76, 0x82, 0xb6, 0xa0,\n", - " 0x68, 0x6b, 0x53, 0x76, 0x60, 0x65, 0x90, 0xaf, 0x82, 0x66, 0x80, 0x7b,\n", - " 0x84, 0xa0, 0xb0, 0xb8, 0x81, 0x6e, 0x81, 0x8a, 0x74, 0x6e, 0x97, 0xa8,\n", - " 0x89, 0x7b, 0x7b, 0x6e, 0x63, 0x74, 0x5a, 0x7b, 0x7e, 0x84, 0x40, 0x95,\n", - " 0x73, 0x3c, 0x7c, 0x72, 0x9b, 0x92, 0x27, 0x87, 0x69, 0x5b, 0x99, 0x8a,\n", - " 0xa8, 0x65, 0x36, 0x8f, 0x86, 0x3e, 0xa1, 0x79, 0x9f, 0x4d, 0x41, 0xc5,\n", - " 0x8c, 0x6a, 0x7e, 0x7f, 0x68, 0x49, 0x5c, 0x91, 0x50, 0x6a, 0x8c, 0x81,\n", - " 0x75, 0x4c, 0x6a, 0x74, 0x8a, 0x87, 0xa0, 0x93, 0x7e, 0x6d, 0x52, 0x79,\n", - " 0x86, 0x6a, 0x68, 0x6c, 0x83, 0x67, 0x79, 0x73, 0x6f, 0x72, 0x97, 0x84,\n", - " 0x8b, 0x78, 0x64, 0x69, 0x8f, 0x92, 0x86, 0x61, 0x5d, 0x85, 0x70, 0x64,\n", - " 0x7d, 0xa3, 0x92, 0xa0, 0x72, 0x71, 0x5d, 0x63, 0x7c, 0x70, 0xaf, 0x6f,\n", - " 0x93, 0x6a, 0x7e, 0x7f, 0x64, 0xab, 0x85, 0x73, 0x8f, 0x8a, 0x7e, 0x5f,\n", - " 0x7a, 0x6f, 0xaa, 0x71, 0x97, 0x7d, 0x60, 0x7c, 0x48, 0x69, 0xa9, 0xaa,\n", - " 0x98, 0x7c, 0x61, 0x85, 0x66, 0x97, 0xa2, 0x73, 0x74, 0x65, 0x52, 0x67,\n", - " 0x79, 0x8a, 0x79, 0x71, 0x85, 0x6e, 0x6d, 0x67, 0x5e, 0x7f, 0xb9, 0x93,\n", - " 0x96, 0x53, 0x69, 0x6e, 0x7f, 0x8f, 0xab, 0x93, 0xa9, 0x70, 0x6e, 0x71,\n", - " 0x7e, 0x87, 0x98, 0x7a, 0xae, 0x90, 0x64, 0x88, 0x8a, 0x4f, 0x6d, 0x9e,\n", - " 0xac, 0x7e, 0x31, 0x92, 0x50, 0x26, 0x95, 0xb2, 0x90, 0x99, 0x0c, 0x84,\n", - " 0x40, 0x4f, 0x8f, 0x76, 0xa4, 0x46, 0x4c, 0x9d, 0x8b, 0x57, 0x81, 0x79,\n", - " 0x7b, 0x47, 0x4d, 0x9c, 0x5f, 0x3b, 0x6f, 0x90, 0x7a, 0x3f, 0x66, 0x9d,\n", - " 0x6c, 0x45, 0x8b, 0x71, 0x79, 0x62, 0x72, 0x78, 0x93, 0x95, 0x7e, 0x86,\n", - " 0x7a, 0x6b, 0x77, 0x74, 0x6b, 0x86, 0xa4, 0x7e, 0x84, 0x48, 0x78, 0x75,\n", - " 0x6e, 0x8b, 0x8e, 0x56, 0x69, 0x7b, 0x59, 0x68, 0x5d, 0x77, 0x69, 0x66,\n", - " 0x67, 0x9f, 0x75, 0x7b, 0x76, 0x64, 0xc1, 0x78, 0x7d, 0x74, 0x82, 0x73,\n", - " 0x73, 0x90, 0xb8, 0x82, 0x7e, 0x70, 0x7b, 0x7a, 0x64, 0xa1, 0x7e, 0x85,\n", - " 0x83, 0x81, 0x60, 0x7b, 0x91, 0x82, 0x6f, 0x95, 0xa0, 0x86, 0x6d, 0x88,\n", - " 0x75, 0x8d, 0x94, 0x90, 0x76, 0x6d, 0x6e, 0x79, 0x64, 0x74, 0xa8, 0xb1,\n", - " 0x92, 0x6e, 0x61, 0x79, 0x74, 0x91, 0x95, 0x74, 0x65, 0x74, 0x5e, 0x7f,\n", - " 0x8b, 0x60, 0x9b, 0x9f, 0x74, 0x77, 0x4c, 0x66, 0x7c, 0x80, 0x97, 0x98,\n", - " 0x9d, 0x86, 0x55, 0x8a, 0x8a, 0x79, 0x8c, 0x82, 0xb0, 0x7d, 0x63, 0x8c,\n", - " 0x5d, 0x5b, 0x82, 0x58, 0x84, 0x56, 0x51, 0x92, 0x75, 0x24, 0x97, 0x92,\n", - " 0x75, 0x6e, 0x19, 0x8e, 0x47, 0x3e, 0x7b, 0x7b, 0x87, 0x6b, 0x3f, 0xa9,\n", - " 0x59, 0x40, 0x86, 0x74, 0x69, 0x4a, 0x2d, 0xad, 0x91, 0x62, 0xb2, 0xa9,\n", - " 0x74, 0x6c, 0x47, 0x94, 0x51, 0x75, 0xb2, 0x6f, 0x75, 0x4b, 0x60, 0xa2,\n", - " 0x8e, 0x6a, 0xa4, 0x79, 0x6f, 0x57, 0x80, 0x8c, 0x6c, 0x8e, 0x9e, 0x74,\n", - " 0x70, 0x5f, 0x66, 0x80, 0x80, 0x89, 0xb5, 0x8a, 0x7a, 0x96, 0x87, 0x7a,\n", - " 0x7b, 0x85, 0x90, 0x79, 0x59, 0x6d, 0x77, 0x8c, 0x8f, 0x82, 0xb3, 0x9c,\n", - " 0x6a, 0x6a, 0x6b, 0x70, 0x77, 0x89, 0x96, 0x86, 0x94, 0x72, 0x7e, 0x72,\n", - " 0xa9, 0x93, 0x8d, 0x7a, 0x6d, 0x8f, 0x66, 0x72, 0x9a, 0x91, 0x9e, 0x98,\n", - " 0xa0, 0x8b, 0x50, 0x76, 0x5c, 0x74, 0xbc, 0x9a, 0x98, 0x73, 0x80, 0x7d,\n", - " 0x73, 0x7c, 0xc0, 0x8b, 0x86, 0x7a, 0x66, 0x86, 0x83, 0x72, 0x8f, 0x96,\n", - " 0x98, 0x56, 0x45, 0x7b, 0x77, 0x92, 0xac, 0x8a, 0xae, 0x43, 0x33, 0x73,\n", - " 0x78, 0x83, 0x98, 0x84, 0x86, 0x78, 0x54, 0x7e, 0x70, 0x5f, 0xa6, 0xa1,\n", - " 0x94, 0x81, 0x73, 0x8d, 0x83, 0x5b, 0x88, 0x71, 0xb2, 0x91, 0x50, 0x99,\n", - " 0x6b, 0x47, 0x72, 0x92, 0x87, 0x6d, 0x07, 0x99, 0x57, 0x3d, 0x8d, 0x83,\n", - " 0x9d, 0x49, 0x40, 0x9d, 0x5c, 0x57, 0x95, 0x73, 0x6e, 0x4b, 0x49, 0xab,\n", - " 0x97, 0x58, 0x8b, 0x7a, 0x7a, 0x48, 0x47, 0x8b, 0x7e, 0x5d, 0xa9, 0x6d,\n", - " 0x8a, 0x3f, 0x60, 0x82, 0x86, 0x98, 0xa9, 0x7c, 0x74, 0x59, 0x9b, 0x80,\n", - " 0x4e, 0x75, 0x9c, 0x5e, 0x75, 0x8c, 0x67, 0x7e, 0x78, 0x75, 0x87, 0x6c,\n", - " 0x79, 0x73, 0x63, 0x77, 0x6e, 0x7a, 0x8d, 0x73, 0x4e, 0x72, 0x4a, 0x7c,\n", - " 0x8f, 0x79, 0x70, 0x7a, 0x70, 0x73, 0x7b, 0x7a, 0x62, 0xa1, 0x7b, 0x63,\n", - " 0x9a, 0x89, 0x76, 0x64, 0x84, 0x7d, 0x9c, 0x94, 0xb0, 0x7f, 0x6c, 0x7b,\n", - " 0x8d, 0x89, 0x89, 0x7b, 0x9d, 0x99, 0x64, 0x8b, 0x5c, 0x88, 0xa6, 0x8e,\n", - " 0x81, 0x86, 0x7e, 0x85, 0x73, 0x72, 0xad, 0x5d, 0x5f, 0x7e, 0x63, 0x74,\n", - " 0x64, 0xa1, 0x9c, 0x83, 0x7c, 0x83, 0x7b, 0x7b, 0x71, 0xa0, 0x9e, 0xaf,\n", - " 0x89, 0x79, 0x4c, 0x7c, 0x8c, 0x78, 0x91, 0x87, 0x8a, 0x87, 0x5e, 0x85,\n", - " 0x7b, 0x61, 0x9c, 0x88, 0xa5, 0x8d, 0x7c, 0x9c, 0x6b, 0x47, 0x95, 0x85,\n", - " 0x81, 0x80, 0x59, 0xb2, 0x4f, 0x3d, 0xae, 0x8c, 0x8d, 0x71, 0x11, 0x95,\n", - " 0x31, 0x65, 0x9d, 0xa0, 0x8e, 0x64, 0x42, 0xb9, 0x6a, 0x5c, 0x91, 0x82,\n", - " 0x91, 0x50, 0x33, 0xb2, 0x7a, 0x54, 0xac, 0x88, 0x92, 0x61, 0x4e, 0xad,\n", - " 0x65, 0x5c, 0x91, 0xb0, 0x72, 0x65, 0x4a, 0x79, 0x68, 0x77, 0x75, 0x5f,\n", - " 0x79, 0x6d, 0x6f, 0x7c, 0x4d, 0x71, 0xb8, 0x78, 0x8a, 0x87, 0x6e, 0x72,\n", - " 0x7d, 0x79, 0x87, 0x80, 0x5a, 0x78, 0x77, 0x78, 0x80, 0x8f, 0x8c, 0x56,\n", - " 0x7a, 0x8b, 0x62, 0x82, 0x5a, 0x96, 0x82, 0x68, 0x71, 0x5d, 0x75, 0x65,\n", - " 0x93, 0xb5, 0x71, 0x82, 0x82, 0x8a, 0x4b, 0x7c, 0x62, 0x6f, 0xc1, 0x86,\n", - " 0x9d, 0x90, 0x63, 0x71, 0x86, 0x9e, 0x9f, 0x77, 0x90, 0x97, 0x68, 0x81,\n", - " 0x5a, 0x8c, 0xab, 0x5e, 0x81, 0x76, 0x83, 0x79, 0x8f, 0xa1, 0x89, 0x79,\n", - " 0x81, 0x8a, 0x7e, 0x6c, 0x65, 0x79, 0xc7, 0x89, 0x92, 0x68, 0x78, 0x70,\n", - " 0x65, 0x96, 0x9e, 0x82, 0x7d, 0x5f, 0x7b, 0x77, 0x72, 0x84, 0x7e, 0x92,\n", - " 0x97, 0x7b, 0x6e, 0x67, 0x81, 0xa1, 0x9a, 0xab, 0x8d, 0x78, 0x61, 0x78,\n", - " 0x52, 0x66, 0xaa, 0x77, 0x75, 0xa3, 0x5e, 0xa0, 0x51, 0x40, 0x68, 0xb0,\n", - " 0x9a, 0x93, 0x11, 0x82, 0x69, 0x48, 0x9c, 0x77, 0x8d, 0x62, 0x36, 0xac,\n", - " 0x6c, 0x4c, 0xa3, 0xab, 0x8f, 0x32, 0x4f, 0xa9, 0x80, 0x68, 0xab, 0x7a,\n", - " 0x90, 0x61, 0x5c, 0xa5, 0x84, 0x4c, 0x8c, 0x7a, 0x95, 0x54, 0x72, 0xa0,\n", - " 0x66, 0x85, 0xb3, 0x91, 0x69, 0x64, 0x68, 0x56, 0x66, 0x8d, 0xa0, 0x9f,\n", - " 0x7a, 0x88, 0x5d, 0x7d, 0x48, 0x80, 0x7f, 0x7c, 0x7c, 0x99, 0x65, 0x81,\n", - " 0x73, 0x8b, 0x8c, 0x61, 0x44, 0x60, 0x53, 0x8e, 0x64, 0x80, 0x9c, 0x74,\n", - " 0x5d, 0x70, 0x8f, 0x5a, 0x68, 0x7a, 0x82, 0xa1, 0x75, 0x7b, 0x83, 0x60,\n", - " 0x75, 0x5e, 0xa2, 0x94, 0x6a, 0x88, 0x78, 0x71, 0x95, 0x70, 0x8b, 0x86,\n", - " 0x7e, 0x94, 0x5f, 0x65, 0x5f, 0xb1, 0x97, 0x99, 0x94, 0x84, 0x88, 0x7d,\n", - " 0x50, 0x8c, 0xaa, 0x81, 0x7b, 0x7c, 0x77, 0x65, 0x5e, 0x91, 0x9c, 0x89,\n", - " 0x8c, 0x85, 0x75, 0x62, 0x7b, 0x78, 0xc3, 0x7a, 0x62, 0x8c, 0x66, 0x6f,\n", - " 0x79, 0x7a, 0x9c, 0x6d, 0x7c, 0x6b, 0x5c, 0x7d, 0x6d, 0x54, 0x93, 0x87,\n", - " 0x7a, 0x7a, 0x50, 0x85, 0x60, 0x56, 0x5e, 0x6b, 0x90, 0x7c, 0x52, 0xa5,\n", - " 0x54, 0x42, 0x7b, 0x75, 0x83, 0x8c, 0x2c, 0xa6, 0x6f, 0x62, 0x78, 0x78,\n", - " 0x86, 0x36, 0x4b, 0xaa, 0x86, 0x54, 0x92, 0x8d, 0x7f, 0x53, 0x37, 0xbe,\n", - " 0x86, 0x7a, 0x90, 0x7e, 0x8e, 0x50, 0x58, 0xa6, 0x82, 0x58, 0x73, 0x74,\n", - " 0x66, 0x5c, 0x6a, 0x7f, 0xa2, 0x69, 0xbd, 0xa9, 0x74, 0x76, 0x75, 0x6f,\n", - " 0x45, 0x6c, 0xa5, 0x79, 0x82, 0x67, 0x56, 0x7c, 0x7f, 0x81, 0x67, 0x6d,\n", - " 0x81, 0x87, 0x71, 0x69, 0x69, 0x81, 0x85, 0x84, 0x5a, 0x8c, 0x5f, 0x73,\n", - " 0x80, 0x9c, 0x9e, 0x90, 0x77, 0xa0, 0x9c, 0x6c, 0x73, 0x8a, 0x84, 0x72,\n", - " 0x87, 0xa1, 0x67, 0x64, 0x5d, 0x9b, 0x9d, 0x9b, 0x97, 0x83, 0x5f, 0x61,\n", - " 0x77, 0x91, 0xa0, 0x8f, 0x8a, 0x6c, 0x45, 0x5f, 0x6d, 0xa6, 0x9b, 0x76,\n", - " 0x86, 0x93, 0x91, 0x7d, 0x54, 0x61, 0xa4, 0x6a, 0x5b, 0x69, 0x5f, 0x6d,\n", - " 0x83, 0xaf, 0xa0, 0x78, 0x9d, 0x62, 0x65, 0x69, 0x5f, 0x78, 0xbf, 0x91,\n", - " 0x7b, 0x7b, 0x52, 0x5d, 0x70, 0x78, 0xa9, 0x87, 0x93, 0x74, 0x61, 0x74,\n", - " 0x8c, 0x61, 0x97, 0x86, 0x9b, 0x7c, 0x7d, 0x75, 0x4b, 0x64, 0xa7, 0x81,\n", - " 0x8a, 0x9c, 0x29, 0xa2, 0x5f, 0x38, 0x6a, 0xb0, 0x82, 0x53, 0x1a, 0xa7,\n", - " 0x38, 0x47, 0x97, 0x90, 0x8d, 0x41, 0x25, 0xa7, 0x65, 0x63, 0x8b, 0x79,\n", - " 0x8f, 0x3e, 0x21, 0xd0, 0x5e, 0x5d, 0x9d, 0x68, 0x75, 0x3e, 0x68, 0xb6,\n", - " 0x6a, 0x50, 0x9a, 0x71, 0x81, 0x45, 0x6d, 0x9a, 0x7f, 0x86, 0x9c, 0x63,\n", - " 0x7d, 0x74, 0x69, 0x7d, 0x5a, 0x6a, 0x8d, 0x72, 0x6b, 0x69, 0x4c, 0x6f,\n", - " 0x7c, 0x8e, 0xa6, 0x83, 0x70, 0x65, 0x5f, 0x78, 0x69, 0x67, 0x7f, 0x8d,\n", - " 0x58, 0x76, 0x4a, 0x85, 0x80, 0x89, 0x9f, 0x91, 0x52, 0x62, 0x72, 0x60,\n", - " 0x7b, 0x5c, 0x77, 0x6f, 0x9d, 0xa4, 0x98, 0x70, 0x6f, 0xad, 0x94, 0x9f,\n", - " 0x7b, 0x89, 0x74, 0x7e, 0x5d, 0x8d, 0xab, 0x98, 0x8f, 0x90, 0x82, 0x84,\n", - " 0x60, 0x7c, 0xb7, 0x8e, 0x79, 0x83, 0x56, 0x86, 0x87, 0x79, 0x95, 0x75,\n", - " 0x78, 0x71, 0x58, 0x73, 0x87, 0x5d, 0xc6, 0x9f, 0x75, 0x61, 0x4f, 0x71,\n", - " 0x91, 0x88, 0xb3, 0x8c, 0x7d, 0x7c, 0x6a, 0x75, 0x6d, 0x66, 0x8e, 0x94,\n", - " 0x96, 0x74, 0x59, 0x6f, 0x6d, 0x65, 0xb0, 0x8e, 0x7b, 0x89, 0x7a, 0x6a,\n", - " 0x7d, 0x57, 0x82, 0x7a, 0x61, 0x9f, 0x50, 0xab, 0x57, 0x46, 0x86, 0x8d,\n", - " 0xa3, 0x96, 0x18, 0xab, 0x51, 0x6e, 0xb3, 0x7e, 0x90, 0x6d, 0x6d, 0xc0,\n", - " 0x54, 0x35, 0x96, 0x84, 0x8e, 0x49, 0x28, 0xe4, 0x81, 0x5f, 0x9b, 0x87,\n", - " 0x8c, 0x33, 0x56, 0xb4, 0x61, 0x5e, 0x8b, 0x81, 0x99, 0x61, 0x6b, 0x96,\n", - " 0x75, 0x82, 0x9e, 0x7c, 0x90, 0x63, 0x64, 0x6b, 0x55, 0x6e, 0xb6, 0x7f,\n", - " 0x5f, 0x55, 0x65, 0x60, 0x35, 0x8a, 0x85, 0x91, 0x4d, 0x62, 0x90, 0x90,\n", - " 0x57, 0x5a, 0x9f, 0x7b, 0x4c, 0x86, 0x73, 0x83, 0x4a, 0x6d, 0xb0, 0x67,\n", - " 0x65, 0x89, 0x54, 0x68, 0x89, 0x7b, 0x72, 0x4f, 0x7a, 0x93, 0x61, 0x7e,\n", - " 0x79, 0x89, 0x8f, 0x9c, 0x7b, 0x70, 0x48, 0x67, 0x82, 0x75, 0xaa, 0x92,\n", - " 0x9a, 0x8f, 0x79, 0x8c, 0x64, 0x94, 0x98, 0x83, 0x7c, 0x8f, 0x5c, 0x77,\n", - " 0x70, 0x90, 0x91, 0x88, 0x7d, 0x51, 0x5d, 0x5d, 0x8b, 0x9f, 0xbc, 0x78,\n", - " 0x9e, 0x73, 0x67, 0x6d, 0x82, 0x8d, 0xc9, 0x86, 0x96, 0x6a, 0x5d, 0x79,\n", - " 0x7e, 0x6b, 0xb2, 0x79, 0x88, 0x85, 0x65, 0x73, 0x75, 0x6b, 0x9e, 0x7f,\n", - " 0x8e, 0x94, 0x8e, 0x7d, 0x74, 0x61, 0x97, 0x56, 0x97, 0x6b, 0x30, 0xb6,\n", - " 0x5f, 0x5a, 0xaa, 0xa5, 0x85, 0x5d, 0x01, 0xbc, 0x79, 0x63, 0x6e, 0x82,\n", - " 0x72, 0x26, 0x4f, 0xc8, 0x98, 0x56, 0x85, 0x9a, 0x81, 0x1f, 0x48, 0xcf,\n", - " 0x84, 0x74, 0x75, 0x87, 0xae, 0x43, 0x6f, 0xdf, 0x6a, 0x4e, 0x97, 0x5d,\n", - " 0x8f, 0x37, 0x55, 0x89, 0x7d, 0x82, 0xb1, 0x89, 0x6d, 0x52, 0x65, 0x8b,\n", - " 0x71, 0x87, 0x8d, 0x6a, 0x99, 0x5d, 0x65, 0x78, 0x67, 0x8d, 0x7b, 0x51,\n", - " 0x60, 0x8a, 0x59, 0x72, 0x78, 0x93, 0x88, 0x75, 0x46, 0x60, 0x6e, 0x79,\n", - " 0x7b, 0x9d, 0x9c, 0x8c, 0x5c, 0x7c, 0x69, 0x71, 0x60, 0x6f, 0xb0, 0x7d,\n", - " 0x4c, 0x5e, 0x88, 0x77, 0x74, 0x6a, 0x6f, 0x9a, 0xa2, 0x83, 0x48, 0x5a,\n", - " 0x6e, 0xa2, 0x8b, 0x7a, 0x65, 0x5b, 0x4b, 0x80, 0x5b, 0x8f, 0xaf, 0x8e,\n", - " 0x93, 0x4a, 0x59, 0x6e, 0x5e, 0x89, 0x91, 0x87, 0x73, 0x6a, 0x47, 0x6c,\n", - " 0x6c, 0x81, 0xad, 0x5a, 0x76, 0x51, 0x51, 0x6c, 0x80, 0x92, 0x9d, 0xae,\n", - " 0x90, 0x71, 0x6c, 0x7a, 0x7c, 0x84, 0xa7, 0x7d, 0x82, 0x7c, 0x80, 0x59,\n", - " 0x7d, 0x86, 0xa9, 0x94, 0x8e, 0x7b, 0x7c, 0x67, 0x67, 0x66, 0x8f, 0x49,\n", - " 0x5d, 0xa4, 0x4a, 0xbc, 0x5a, 0x34, 0xa7, 0xaa, 0x9e, 0x86, 0x17, 0xc0,\n", - " 0x53, 0x67, 0x76, 0xae, 0x8d, 0x37, 0x4a, 0xd6, 0x76, 0x69, 0x95, 0x7a,\n", - " 0x8a, 0x0e, 0x3f, 0xe8, 0x60, 0x4d, 0x9e, 0x90, 0xad, 0x44, 0x46, 0xc5,\n", - " 0x4c, 0x6e, 0x72, 0x8c, 0x89, 0x49, 0x51, 0xa0, 0x60, 0x84, 0x84, 0x9d,\n", - " 0xa4, 0x5a, 0x84, 0x8d, 0x69, 0x6a, 0x97, 0x78, 0x72, 0x66, 0x72, 0x9b,\n", - " 0x74, 0x7a, 0x95, 0x7c, 0x7a, 0x6e, 0x74, 0x7f, 0x65, 0x94, 0x77, 0x7e,\n", - " 0x85, 0x6d, 0x65, 0x7b, 0x63, 0x7b, 0x87, 0x49, 0x80, 0x74, 0x74, 0x85,\n", - " 0x6e, 0x78, 0xad, 0x66, 0x8a, 0x65, 0x54, 0x7c, 0x4e, 0x62, 0x97, 0x7f,\n", - " 0x82, 0x6c, 0x58, 0x79, 0x91, 0x94, 0xb3, 0x7a, 0x88, 0x82, 0x60, 0x7f,\n", - " 0x8c, 0xa7, 0x7b, 0x93, 0x77, 0x49, 0x6f, 0x6f, 0x5a, 0x8d, 0x93, 0x8b,\n", - " 0x87, 0x59, 0x7d, 0x5e, 0x83, 0x7e, 0x8c, 0x7a, 0x91, 0x4e, 0x6f, 0x89,\n", - " 0x8a, 0x87, 0x8b, 0x85, 0x8e, 0x43, 0x63, 0x8d, 0x90, 0x6c, 0xa5, 0x73,\n", - " 0x8a, 0x78, 0x5f, 0x73, 0x88, 0x57, 0x9e, 0x8f, 0x7f, 0x91, 0x70, 0x77,\n", - " 0x8a, 0x76, 0xa2, 0x77, 0x53, 0x86, 0x51, 0xd8, 0xa9, 0x5b, 0x9b, 0x96,\n", - " 0x7c, 0x71, 0x01, 0xd4, 0x56, 0x4a, 0x95, 0xab, 0x91, 0x54, 0x45, 0xe5,\n", - " 0x74, 0x4f, 0x87, 0x6a, 0xa2, 0x3e, 0x47, 0xff, 0x91, 0x4d, 0x94, 0x97,\n", - " 0x6d, 0x74, 0x77, 0xe0, 0x5d, 0x4e, 0x5f, 0x73, 0x70, 0x3a, 0x68, 0xb2,\n", - " 0x78, 0x61, 0x8c, 0x77, 0xa8, 0x57, 0x8c, 0x99, 0x23, 0x5a, 0x84, 0x78,\n", - " 0x9b, 0x7f, 0x5e, 0xa0, 0x49, 0x84, 0x83, 0x94, 0x99, 0x4d, 0x8d, 0x9a,\n", - " 0x86, 0x90, 0x9b, 0x51, 0x75, 0x73, 0x78, 0x89, 0x59, 0x64, 0x78, 0x91,\n", - " 0x72, 0x9c, 0x72, 0x7e, 0x65, 0x6a, 0x80, 0xaa, 0x94, 0x65, 0x6d, 0x87,\n", - " 0x73, 0x93, 0x97, 0x7d, 0x99, 0x63, 0x75, 0x89, 0x67, 0xa1, 0x90, 0x7f,\n", - " 0x88, 0x65, 0x6d, 0x8f, 0x7d, 0x62, 0x91, 0xa7, 0x8b, 0x73, 0x51, 0x88,\n", - " 0x66, 0x66, 0x99, 0xa7, 0x7c, 0x54, 0x82, 0x67, 0x64, 0x8a, 0x95, 0x7c,\n", - " 0x8a, 0x5d, 0x5e, 0x68, 0x4b, 0x75, 0x92, 0x7a, 0x9f, 0x66, 0x71, 0x8d,\n", - " 0x76, 0x72, 0x8e, 0x77, 0x76, 0x8c, 0x5b, 0x88, 0x9a, 0x92, 0x7c, 0x74,\n", - " 0x95, 0xaa, 0x71, 0x77, 0x97, 0x93, 0x9e, 0x62, 0x96, 0x6a, 0x49, 0xd8,\n", - " 0x81, 0x99, 0xae, 0x87, 0x6c, 0x76, 0x3e, 0xd9, 0x6e, 0x95, 0xa3, 0x86,\n", - " 0x60, 0x6c, 0x5c, 0xbe, 0x98, 0x8a, 0x99, 0x7c, 0x47, 0x45, 0x69, 0xeb,\n", - " 0x9d, 0x7d, 0xbb, 0x90, 0x66, 0x69, 0x70, 0xc6, 0x7b, 0x59, 0x9e, 0x87,\n", - " 0x58, 0x76, 0x7c, 0xae, 0x72, 0x7d, 0x9f, 0x92, 0x82, 0x58, 0x51, 0x7a,\n", - " 0x5d, 0x77, 0xa8, 0x7c, 0x56, 0x68, 0x88, 0x8a, 0x7e, 0x8a, 0x98, 0x68,\n", - " 0x64, 0x79, 0x6e, 0x7a, 0x60, 0x96, 0x98, 0x60, 0x60, 0x71, 0x60, 0x8e,\n", - " 0x7c, 0x8c, 0x92, 0x92, 0x77, 0x80, 0x90, 0x91, 0x81, 0x82, 0x9c, 0x80,\n", - " 0x61, 0x7f, 0x5a, 0x8e, 0x88, 0x7c, 0x8e, 0x79, 0x69, 0x8e, 0x4e, 0x7e,\n", - " 0x84, 0x9e, 0x67, 0x72, 0x5c, 0x78, 0x7b, 0x8c, 0x65, 0x7d, 0x8e, 0xa4,\n", - " 0x5e, 0x7a, 0x5c, 0x97, 0x6a, 0x81, 0xab, 0x85, 0x4d, 0x73, 0x83, 0x96,\n", - " 0x8b, 0x7d, 0xa6, 0x69, 0x74, 0x86, 0x73, 0x79, 0x52, 0x8c, 0xa0, 0x86,\n", - " 0x64, 0x7b, 0x84, 0x77, 0x87, 0x93, 0x7d, 0x6d, 0x98, 0x6d, 0x88, 0x5f,\n", - " 0x7c, 0x84, 0x92, 0x82, 0x81, 0x76, 0x85, 0x77, 0x98, 0x85, 0x88, 0x68,\n", - " 0x7d, 0x71, 0x3c, 0xf1, 0x83, 0x86, 0xa2, 0xb3, 0x6e, 0x77, 0x53, 0xe8,\n", - " 0xa8, 0xc7, 0xb3, 0x83, 0x93, 0x83, 0x63, 0xe8, 0x94, 0xb3, 0x86, 0x6e,\n", - " 0x75, 0x5d, 0x54, 0xf0, 0x89, 0xa7, 0x94, 0xb1, 0x7e, 0x91, 0x9a, 0xb8,\n", - " 0x91, 0x7e, 0x99, 0x50, 0x71, 0x82, 0x8a, 0x91, 0x7a, 0x8a, 0x8b, 0x80,\n", - " 0x64, 0x6a, 0x5f, 0xbe, 0x5d, 0x96, 0xb1, 0x82, 0x45, 0x71, 0x8b, 0x95,\n", - " 0x7c, 0x9b, 0x89, 0x6d, 0x5b, 0x73, 0x81, 0x90, 0x76, 0xab, 0xa6, 0x88,\n", - " 0x62, 0x7d, 0x75, 0x99, 0x7a, 0x8b, 0x6e, 0x9b, 0x83, 0x89, 0x99, 0x93,\n", - " 0x81, 0x9e, 0x8a, 0x76, 0x75, 0x7d, 0x6c, 0x93, 0x68, 0x7a, 0x8d, 0x78,\n", - " 0x88, 0x93, 0x66, 0xa5, 0x6c, 0xae, 0xb1, 0x83, 0x72, 0x8f, 0x6b, 0x7b,\n", - " 0x79, 0x9b, 0x98, 0x7c, 0x82, 0x84, 0x7d, 0x7d, 0x71, 0x7c, 0xb0, 0x81,\n", - " 0x74, 0x89, 0x72, 0x89, 0x98, 0xa0, 0x7d, 0x62, 0x2f, 0x50, 0x7d, 0x8b,\n", - " 0x4c, 0x83, 0x87, 0x89, 0x57, 0x9e, 0x92, 0x8c, 0x81, 0x7e, 0xb9, 0x95,\n", - " 0x7f, 0x76, 0x8e, 0x90, 0x9d, 0x68, 0x78, 0x95, 0x7d, 0xab, 0x84, 0x8a,\n", - " 0x64, 0x9f, 0x80, 0x94, 0x8d, 0x89, 0x76, 0x8e, 0x6f, 0x8b, 0x75, 0x7d,\n", - " 0x89, 0x74, 0x67, 0x8a, 0x7d, 0x63, 0x79, 0x6d, 0x79, 0x8a, 0x78, 0x7f,\n", - " 0x7a, 0x9b, 0x70, 0x70, 0x84, 0x86, 0x80, 0x95, 0x5a, 0x77, 0x80, 0x91,\n", - " 0x9c, 0x92, 0x76, 0x81, 0x69, 0x89, 0x78, 0xa5, 0x7a, 0x8d, 0x86, 0x64,\n", - " 0x8f, 0x8d, 0x7d, 0xa1, 0x8c, 0x7b, 0x77, 0x7e, 0x80, 0x93, 0x86, 0x68,\n", - " 0x90, 0x9c, 0x71, 0x8c, 0x68, 0x52, 0x85, 0x88, 0x89, 0x92, 0x64, 0x8f,\n", - " 0x74, 0x64, 0x7c, 0x88, 0x8d, 0x97, 0x77, 0x97, 0x91, 0xac, 0x74, 0x7f,\n", - " 0x60, 0x7e, 0x6e, 0x70, 0x86, 0x83, 0x7f, 0x81, 0x6f, 0x94, 0x62, 0xa4,\n", - " 0x86, 0x7d, 0x90, 0x7c, 0x89, 0x63, 0x7b, 0x89, 0x75, 0xa1, 0x67, 0x69,\n", - " 0xa6, 0x76, 0x69, 0x9c, 0x71, 0x79, 0x76, 0x7a, 0x8e, 0x78, 0x94, 0x75,\n", - " 0x5a, 0x76, 0x6b, 0x91, 0x84, 0x75, 0x72, 0x93, 0x79, 0x7e, 0x75, 0x9a,\n", - " 0x6f, 0x7a, 0x7b, 0x80, 0x5f, 0x90, 0x74, 0x7d, 0x9b, 0x76, 0x70, 0x89,\n", - " 0x8f, 0x5f, 0x7f, 0x9c, 0x93, 0x6d, 0x81, 0x7f, 0x8d, 0x7d, 0x74, 0x5d,\n", - " 0x75, 0x88, 0x7b, 0x91, 0x75, 0x6b, 0x7f, 0x8c, 0x71, 0x74, 0x87, 0x88,\n", - " 0x83, 0x75, 0x77, 0x96, 0x7f, 0x67, 0x7d, 0x95, 0x81, 0x5c, 0x71, 0x5c,\n", - " 0x6e, 0x75, 0x86, 0x92, 0x5d, 0x7a, 0x77, 0x9f, 0x6e, 0x79, 0x68, 0x60,\n", - " 0x94, 0x88, 0x88, 0x88, 0x79, 0x7e, 0x8a, 0x6d, 0x84, 0xa7, 0x5b, 0x8e,\n", - " 0x67, 0x9c, 0x7e, 0x75, 0x82, 0x96, 0x7c, 0x7b, 0x72, 0x85, 0x8c, 0xa3,\n", - " 0x96, 0x5b, 0x93, 0x67, 0x7e, 0x9f, 0x71, 0x82, 0x79, 0x8c, 0x93, 0x9d,\n", - " 0x6b, 0x90, 0x8a, 0x8a, 0x55, 0x82, 0x94, 0x74, 0x7d, 0xaa, 0x81, 0x78,\n", - " 0x8a, 0x8d, 0x83, 0x7b, 0x97, 0x92, 0x68, 0x64, 0x8c, 0x5d, 0x78, 0x9b,\n", - " 0x73, 0x95, 0x78, 0x77, 0x6f, 0x61, 0x7c, 0x9d, 0x85, 0x6e, 0x84, 0x4c,\n", - " 0x87, 0x57, 0x93, 0x68, 0x8e, 0x77, 0x78, 0x72, 0x87, 0x91, 0x5f, 0x7e,\n", - " 0xa6, 0x75, 0x66, 0x86, 0x7a, 0x7d, 0x70, 0x6f, 0x87, 0x8b, 0x74, 0x85,\n", - " 0x7d, 0x8b, 0x7f, 0x70, 0x7e, 0x82, 0x84, 0x75, 0x89, 0xa6, 0x7b, 0x7a,\n", - " 0xa5, 0x69, 0x73, 0x74, 0x82, 0x65, 0x8f, 0x98, 0x7b, 0x77, 0x84, 0x92,\n", - " 0x73, 0x8a, 0xa1, 0x93, 0x80, 0x81, 0x72, 0x8a, 0x6b, 0x75, 0x8f, 0x98,\n", - " 0x73, 0x74, 0x6f, 0x70, 0x51, 0x6a, 0x84, 0x9e, 0x78, 0x9b, 0x8c, 0x81,\n", - " 0x7e, 0x75, 0x80, 0x88, 0x73, 0x4e, 0x71, 0x74, 0x8c, 0x74, 0x6a, 0x84,\n", - " 0x7f, 0x6b, 0x78, 0xab, 0x77, 0xa2, 0x98, 0x93, 0x77, 0x75, 0x72, 0x5c,\n", - " 0x60, 0x74, 0x84, 0x67, 0x83, 0x7d, 0x7f, 0x7c, 0x5c, 0x72, 0x70, 0x7f,\n", - " 0x6c, 0x84, 0x90, 0xab, 0x97, 0x7f, 0x6b, 0x82, 0x7f, 0x78, 0x73, 0x7d,\n", - " 0x8f, 0x8e, 0x8a, 0x8f, 0x8d, 0xa3, 0x74, 0x6e, 0x5e, 0x8c, 0x94, 0x86,\n", - " 0x57, 0xb0, 0x79, 0xa8, 0x7b, 0x8d, 0x83, 0x77, 0x89, 0xb6, 0x60, 0x9d,\n", - " 0x77, 0x59, 0x72, 0x4d, 0x6f, 0x94, 0x71, 0x75, 0x61, 0x96, 0x86, 0x5d,\n", - " 0x84, 0x68, 0x86, 0x82, 0x8d, 0x70, 0x9a, 0x86, 0x73, 0x64, 0x74, 0x7d,\n", - " 0x80, 0x5a, 0x64, 0x81, 0xa1, 0x71, 0x77, 0x65, 0xa3, 0x76, 0xa3, 0x9d,\n", - " 0x73, 0x7b, 0x8f, 0x7b, 0x79, 0x7d, 0x6c, 0x85, 0x8e, 0x75, 0x65, 0x6a,\n", - " 0x87, 0x70, 0x68, 0x8e, 0x76, 0x5d, 0x66, 0x7c, 0x83, 0x83, 0x7e, 0x89,\n", - " 0x59, 0x8c, 0x75, 0x59, 0x87, 0x7e, 0x7f, 0x90, 0x6b, 0x7b, 0x7e, 0x6d,\n", - " 0x6e, 0x86, 0x69, 0x92, 0x83, 0x8f, 0x8a, 0x60, 0x78, 0x75, 0x61, 0x91,\n", - " 0x73, 0x66, 0x86, 0x86, 0x9f, 0x6f, 0x7b, 0x9a, 0x7c, 0x54, 0x75, 0x8e,\n", - " 0x7e, 0x72, 0x8e, 0x98, 0x94, 0x5f, 0x71, 0x7c, 0x95, 0x9f, 0x8e, 0x83,\n", - " 0x96, 0x4b, 0x8d, 0x84, 0x81, 0x7d, 0x70, 0x84, 0x70, 0x53, 0x8d, 0x84,\n", - " 0x5a, 0x91, 0x88, 0x9a, 0x8f, 0x69, 0x8b, 0x52, 0x85, 0x89, 0x6e, 0x99,\n", - " 0x79, 0x89, 0x9a, 0x82, 0x6e, 0x8b, 0x65, 0x62, 0x80, 0xa8, 0x8f, 0x8a,\n", - " 0x71, 0x61, 0x7e, 0x7d, 0x7e, 0xaa, 0x7f, 0xa0, 0x5e, 0x67, 0x90, 0x86,\n", - " 0x6d, 0xac, 0x74, 0x50, 0x61, 0x91, 0x7d, 0x69, 0x8b, 0x7f, 0x81, 0x7a,\n", - " 0x93, 0x8c, 0x72, 0x64, 0x98, 0x88, 0x91, 0x83, 0x69, 0x6d, 0x78, 0x7a,\n", - " 0x68, 0x7c, 0x76, 0x81, 0xa7, 0x88, 0x8f, 0x79, 0x7d, 0x6c, 0x8a, 0x60,\n", - " 0x88, 0x6d, 0x79, 0x9d, 0x80, 0x82, 0x66, 0x7d, 0x7e, 0x96, 0x78, 0x70,\n", - " 0x9b, 0x70, 0x7e, 0x90, 0x77, 0x94, 0x7b, 0x89, 0x78, 0x84, 0x74, 0x6d,\n", - " 0x7d, 0xa7, 0x75, 0x97, 0x85, 0x83, 0x86, 0x65, 0x75, 0x9a, 0x7c, 0x68,\n", - " 0x87, 0x82, 0x75, 0x68, 0x4c, 0x8a, 0x68, 0x93, 0x7d, 0x88, 0x84, 0x72,\n", - " 0x58, 0x81, 0x5d, 0x83, 0x89, 0x63, 0x83, 0x7d, 0x8e, 0x75, 0x8c, 0x88,\n", - " 0x7f, 0x57, 0x8c, 0x8f, 0xa6, 0x71, 0x8a, 0x95, 0x88, 0x51, 0x74, 0x8a,\n", - " 0x8a, 0x98, 0x72, 0x80, 0x8a, 0x52, 0x90, 0x66, 0x54, 0x8e, 0x7f, 0x94,\n", - " 0x81, 0x49, 0x84, 0x70, 0x5c, 0x93, 0x89, 0x6d, 0x82, 0x7f, 0x70, 0x5d,\n", - " 0x87, 0x8a, 0x71, 0x70, 0x6f, 0xa1, 0x90, 0x9f, 0x74, 0x7c, 0x8c, 0x8b,\n", - " 0x72, 0xbf, 0x89, 0x90, 0x5c, 0x8c, 0x75, 0x72, 0x6f, 0xb2, 0x84, 0x6d,\n", - " 0x61, 0x80, 0x7d, 0x7a, 0x66, 0xaa, 0x75, 0x71, 0x89, 0x6d, 0x69, 0x72,\n", - " 0x73, 0x98, 0x8c, 0x78, 0x5a, 0x8e, 0x8c, 0x81, 0x55, 0x81, 0x96, 0x67,\n", - " 0x6f, 0x71, 0x74, 0x7d, 0x8e, 0x66, 0x9a, 0x67, 0xaa, 0x81, 0x90, 0x79,\n", - " 0x89, 0x59, 0x86, 0x66, 0x8f, 0x7d, 0x7e, 0xa2, 0xa4, 0x99, 0x68, 0x7a,\n", - " 0x8c, 0x73, 0x85, 0x77, 0x8b, 0x74, 0x75, 0x66, 0xaa, 0x98, 0x59, 0x8b,\n", - " 0x91, 0x6c, 0x76, 0x73, 0x87, 0xa4, 0x82, 0x82, 0x63, 0x70, 0x7e, 0x73,\n", - " 0x96, 0x97, 0x6f, 0x86, 0x81, 0x6f, 0x83, 0x82, 0x7b, 0x82, 0xa3, 0xa7,\n", - " 0x95, 0x77, 0x84, 0x65, 0x9b, 0x94, 0x6e, 0xb0, 0x75, 0x66, 0x78, 0x82,\n", - " 0x9c, 0x7a, 0x5f, 0xab, 0x99, 0x2f, 0x7f, 0x68, 0xa4, 0x69, 0x8f, 0x9a,\n", - " 0x91, 0x56, 0x6e, 0x75, 0x63, 0x9b, 0x9e, 0x97, 0x95, 0x68, 0x80, 0x6a,\n", - " 0x40, 0x95, 0x53, 0x72, 0x6f, 0x6b, 0x91, 0x78, 0x7f, 0x93, 0x70, 0x8d,\n", - " 0x62, 0x83, 0x7e, 0x64, 0x5b, 0xaa, 0x70, 0x6c, 0x7e, 0x9c, 0x88, 0x76,\n", - " 0x60, 0x70, 0x66, 0x69, 0x84, 0x97, 0x9d, 0x63, 0x5e, 0x9a, 0x7e, 0x52,\n", - " 0x58, 0xb8, 0x95, 0x7c, 0x4d, 0x96, 0x8f, 0x70, 0x71, 0xbf, 0x83, 0x83,\n", - " 0x9e, 0x70, 0x6f, 0x57, 0x70, 0x9a, 0x8d, 0x6e, 0x98, 0x5a, 0x69, 0x6f,\n", - " 0x90, 0x71, 0x8a, 0x5d, 0x8e, 0x6e, 0x69, 0x7a, 0x90, 0x86, 0x89, 0x88,\n", - " 0xb6, 0x77, 0x84, 0x79, 0x76, 0x86, 0x86, 0x7c, 0xbf, 0x6d, 0x5c, 0x90,\n", - " 0xa1, 0x93, 0x72, 0x63, 0x9a, 0x82, 0x7b, 0x61, 0x91, 0x76, 0x82, 0x96,\n", - " 0xb9, 0x80, 0x77, 0x7f, 0xa0, 0x73, 0x61, 0x80, 0x83, 0xc1, 0x92, 0x67,\n", - " 0x7c, 0x81, 0x90, 0x67, 0x8b, 0xbe, 0x81, 0x91, 0x6c, 0x7e, 0x8d, 0x6c,\n", - " 0x62, 0x83, 0x7e, 0x72, 0x64, 0x8a, 0x83, 0x82, 0xaa, 0x8c, 0x74, 0xab,\n", - " 0x79, 0x85, 0x91, 0x79, 0x90, 0x68, 0x5c, 0x9a, 0x7c, 0x36, 0x80, 0x6e,\n", - " 0x93, 0x76, 0x5e, 0xa0, 0xa5, 0x63, 0x73, 0x7e, 0x8d, 0x94, 0x63, 0x99,\n", - " 0x8f, 0x6a, 0x7f, 0x57, 0x57, 0x6f, 0x6d, 0x86, 0x8e, 0x6b, 0x8d, 0x53,\n", - " 0x94, 0xba, 0x84, 0x6f, 0x5a, 0x7b, 0x8c, 0x5f, 0x73, 0x93, 0x8b, 0x87,\n", - " 0x6f, 0x9e, 0x8a, 0x87, 0x62, 0x97, 0x86, 0x7c, 0x69, 0xab, 0xa1, 0x95,\n", - " 0x42, 0x8c, 0x8b, 0x66, 0x68, 0x99, 0xa8, 0x74, 0x80, 0xa5, 0x7d, 0x82,\n", - " 0x55, 0xb3, 0x6f, 0x81, 0xa8, 0x9a, 0x80, 0x67, 0x62, 0x7f, 0x78, 0x93,\n", - " 0x90, 0x83, 0x83, 0x7b, 0x77, 0x73, 0x8c, 0x56, 0xa7, 0x85, 0x7b, 0x71,\n", - " 0x8f, 0x5d, 0x92, 0x69, 0xbe, 0x5e, 0x7f, 0x7f, 0x8e, 0x71, 0x84, 0x75,\n", - " 0x95, 0x69, 0x88, 0x6b, 0x96, 0x85, 0x78, 0x39, 0xc2, 0x86, 0x7c, 0x99,\n", - " 0xa1, 0x94, 0x6b, 0x86, 0xb5, 0x5e, 0x7e, 0x6e, 0x81, 0x95, 0x6a, 0x88,\n", - " 0x7b, 0x92, 0x8f, 0x68, 0x97, 0x77, 0x84, 0x73, 0x68, 0x96, 0x5a, 0x92,\n", - " 0x66, 0x74, 0x74, 0x6c, 0x7d, 0x81, 0x6c, 0x93, 0x7f, 0x72, 0x86, 0x74,\n", - " 0xbf, 0x8f, 0x53, 0xa4, 0x89, 0x76, 0xa0, 0x87, 0x97, 0x6a, 0x6b, 0xb1,\n", - " 0x91, 0x50, 0x74, 0x68, 0xa3, 0x60, 0x8d, 0xbc, 0xc1, 0x3e, 0x62, 0x59,\n", - " 0x71, 0x72, 0x6d, 0x80, 0x9f, 0x52, 0x82, 0x6b, 0x5d, 0x7f, 0x74, 0x7e,\n", - " 0x74, 0x84, 0x8a, 0x59, 0x5c, 0x85, 0x6d, 0x9c, 0x75, 0x9a, 0x88, 0x89,\n", - " 0x81, 0x9f, 0x81, 0x88, 0x6a, 0x94, 0x84, 0x5f, 0x6b, 0x9b, 0x83, 0x4f,\n", - " 0x7e, 0xca, 0x99, 0x6d, 0x45, 0x7f, 0x87, 0x71, 0x69, 0xad, 0x95, 0x53,\n", - " 0x6e, 0x9b, 0x90, 0x73, 0x5d, 0xb0, 0x8d, 0x67, 0x83, 0x82, 0xa3, 0x70,\n", - " 0x70, 0x92, 0x82, 0x9a, 0x8a, 0x69, 0x6a, 0x6e, 0x7f, 0x89, 0xa4, 0x76,\n", - " 0x97, 0x62, 0x94, 0x80, 0x87, 0x55, 0x80, 0x76, 0xb3, 0x7e, 0x7e, 0x71,\n", - " 0x94, 0x88, 0x8e, 0x74, 0xb6, 0x4d, 0x7b, 0x73, 0x90, 0x86, 0x7c, 0x66,\n", - " 0xb5, 0x80, 0x7f, 0x84, 0x87, 0x82, 0x67, 0x83, 0x97, 0x91, 0x8a, 0x78,\n", - " 0x8b, 0x83, 0x5d, 0x84, 0x82, 0x9f, 0x8c, 0x91, 0x84, 0x8b, 0x6a, 0x68,\n", - " 0x86, 0x82, 0x73, 0x77, 0x7b, 0x83, 0x6a, 0x84, 0x92, 0x93, 0x90, 0x8b,\n", - " 0x4c, 0x94, 0x98, 0x76, 0xb8, 0x7b, 0xa0, 0xa2, 0x7d, 0x3e, 0x95, 0x88,\n", - " 0xa3, 0x6f, 0x5e, 0xc8, 0x9a, 0x52, 0x81, 0x86, 0xa3, 0x79, 0x88, 0xc3,\n", - " 0xbd, 0x54, 0x6c, 0x5e, 0x83, 0x8a, 0x98, 0x88, 0x92, 0x66, 0x73, 0x5b,\n", - " 0x6c, 0x7f, 0x6e, 0x97, 0x8d, 0x58, 0x89, 0x6e, 0x65, 0x7a, 0x7d, 0x7c,\n", - " 0x7e, 0x89, 0x94, 0x89, 0x55, 0xb8, 0x8f, 0x82, 0x6c, 0x9c, 0x96, 0x5e,\n", - " 0x6f, 0xb2, 0x70, 0x76, 0x95, 0xc8, 0x86, 0x78, 0x49, 0xac, 0x7e, 0x6c,\n", - " 0x68, 0xb6, 0xaf, 0x89, 0x68, 0xa5, 0x72, 0x85, 0x69, 0x9c, 0x94, 0x84,\n", - " 0xa4, 0x97, 0x91, 0x61, 0x7a, 0xa3, 0x8f, 0x8e, 0x93, 0x80, 0x8d, 0x76,\n", - " 0x74, 0x84, 0x9b, 0x79, 0x97, 0x4e, 0x67, 0x87, 0x9b, 0x69, 0x85, 0x7d,\n", - " 0xb2, 0x68, 0x76, 0x63, 0xa2, 0x86, 0x97, 0x7f, 0xb5, 0x63, 0x79, 0x76,\n", - " 0x8a, 0x7c, 0x7c, 0x91, 0xb1, 0x42, 0x7d, 0x7a, 0x8c, 0x8e, 0x72, 0xab,\n", - " 0xb8, 0x76, 0xab, 0x81, 0x98, 0x85, 0x56, 0x98, 0x84, 0x9f, 0x70, 0x86,\n", - " 0x76, 0x88, 0x70, 0x8d, 0x71, 0x7b, 0x7a, 0x8d, 0x76, 0x75, 0x62, 0x80,\n", - " 0x81, 0x94, 0x82, 0x6e, 0x57, 0x8d, 0xaf, 0x84, 0xbf, 0x85, 0x82, 0xa7,\n", - " 0x80, 0x89, 0x95, 0x81, 0x91, 0x49, 0x72, 0xa1, 0xa7, 0x3f, 0x72, 0x8b,\n", - " 0x99, 0x72, 0x86, 0xb2, 0xc3, 0x61, 0x55, 0x77, 0x86, 0x77, 0x83, 0xa7,\n", - " 0x95, 0x5a, 0x68, 0x68, 0x6a, 0x63, 0x6a, 0x77, 0x93, 0x7c, 0x88, 0x62,\n", - " 0x79, 0x84, 0x8b, 0x82, 0x58, 0x8f, 0x9c, 0x56, 0x77, 0xb1, 0x65, 0x8c,\n", - " 0x76, 0x91, 0x83, 0x5b, 0x62, 0x91, 0x87, 0x68, 0x71, 0xb0, 0x87, 0x64,\n", - " 0x62, 0x91, 0x94, 0x58, 0x7f, 0xac, 0xa3, 0x84, 0x75, 0xaa, 0xa3, 0x4d,\n", - " 0x7a, 0xc2, 0x84, 0x8a, 0x6d, 0xa2, 0x76, 0x74, 0x8c, 0x9e, 0x7c, 0x71,\n", - " 0x86, 0x70, 0x6d, 0x79, 0x9a, 0x74, 0xb0, 0x8d, 0xa5, 0x7e, 0x6b, 0x63,\n", - " 0x96, 0x74, 0x99, 0x76, 0xd0, 0x62, 0x85, 0x9d, 0x8f, 0x6d, 0x83, 0x88,\n", - " 0xb0, 0x62, 0x9b, 0x87, 0x91, 0x82, 0x7a, 0x90, 0x9c, 0x61, 0x6d, 0x97,\n", - " 0x84, 0x7c, 0x74, 0x8e, 0x8b, 0x75, 0x9a, 0x7e, 0x7c, 0x7d, 0x96, 0x81,\n", - " 0x94, 0x69, 0x83, 0x6f, 0x8e, 0x7c, 0x7b, 0x7a, 0x73, 0x98, 0x74, 0x9e,\n", - " 0x72, 0x8c, 0x5f, 0x7d, 0x99, 0x79, 0x5b, 0x73, 0x65, 0x78, 0xa5, 0x7d,\n", - " 0xa2, 0x98, 0x91, 0x91, 0x87, 0x7b, 0x8c, 0x82, 0xb8, 0x6b, 0x82, 0xba,\n", - " 0xa5, 0x3f, 0x83, 0x7a, 0x9b, 0x73, 0x93, 0xa1, 0xbe, 0x55, 0x6b, 0x75,\n", - " 0x94, 0x7d, 0x9c, 0xa1, 0x82, 0x50, 0x75, 0x5a, 0x88, 0x6e, 0x72, 0x7f,\n", - " 0x99, 0x64, 0x72, 0x49, 0x69, 0x79, 0x6d, 0x94, 0x73, 0x79, 0x80, 0x6f,\n", - " 0x72, 0xbc, 0x9d, 0x71, 0x7a, 0x9d, 0x8a, 0x55, 0x74, 0xaa, 0xa1, 0x85,\n", - " 0x7e, 0xc4, 0xa0, 0x7e, 0x50, 0x99, 0x68, 0x8c, 0x8a, 0xb0, 0x99, 0x6c,\n", - " 0x6d, 0xaf, 0x7b, 0x7b, 0x79, 0xba, 0x8a, 0x7a, 0x9d, 0x8b, 0x67, 0x87,\n", - " 0x76, 0xa9, 0x7f, 0x7e, 0x8b, 0x7b, 0x87, 0x84, 0x82, 0x74, 0xa3, 0x91,\n", - " 0x9a, 0x6a, 0x93, 0x7e, 0x87, 0x5b, 0x95, 0x89, 0xbb, 0x5d, 0x74, 0x6c,\n", - " 0x88, 0x7e, 0x81, 0x7e, 0xb6, 0x6b, 0x91, 0x92, 0x83, 0x78, 0x79, 0x95,\n", - " 0x90, 0x5e, 0x68, 0x8f, 0xa8, 0x92, 0x66, 0x8e, 0x6b, 0x8c, 0x86, 0x80,\n", - " 0x7e, 0x7e, 0x70, 0x84, 0x7d, 0x71, 0x67, 0x94, 0x71, 0x69, 0x84, 0x8f,\n", - " 0x6c, 0x72, 0x85, 0x83, 0x69, 0x76, 0x57, 0x62, 0x83, 0x96, 0x83, 0x77,\n", - " 0x64, 0x5f, 0xae, 0x7c, 0xa7, 0x88, 0x91, 0x8c, 0x9e, 0x7f, 0xa8, 0x8a,\n", - " 0x93, 0x6f, 0x58, 0xae, 0xb4, 0x4b, 0x7f, 0x64, 0x9f, 0x5a, 0x9e, 0xb6,\n", - " 0xa6, 0x6b, 0x79, 0x84, 0x6b, 0x7c, 0x8b, 0x94, 0x85, 0x60, 0x6b, 0x55,\n", - " 0x79, 0x68, 0x77, 0x75, 0x85, 0x5c, 0x91, 0x5e, 0x5a, 0x71, 0x68, 0x7b,\n", - " 0x73, 0x91, 0x6c, 0x6e, 0x71, 0x8b, 0x76, 0x86, 0x99, 0xb8, 0x91, 0x68,\n", - " 0x51, 0xa7, 0x6f, 0x7a, 0x8a, 0xc3, 0x8e, 0x65, 0x64, 0x9e, 0x80, 0x78,\n", - " 0x6c, 0xc5, 0xa2, 0x75, 0x71, 0xa5, 0x96, 0x4f, 0x70, 0xa4, 0x7a, 0x7c,\n", - " 0x8c, 0x80, 0x89, 0x97, 0x9a, 0x9a, 0x85, 0x89, 0x92, 0x8f, 0x81, 0x6f,\n", - " 0x82, 0x6a, 0xb8, 0x74, 0x8f, 0x51, 0x7b, 0x8b, 0x8c, 0x55, 0x7e, 0x8c,\n", - " 0xb2, 0x41, 0x85, 0x77, 0x9c, 0x73, 0x75, 0x8d, 0x9f, 0x64, 0x92, 0x77,\n", - " 0xa0, 0x87, 0x5f, 0x71, 0x85, 0x68, 0x8a, 0x78, 0x91, 0x78, 0x75, 0x7a,\n", - " 0x81, 0x67, 0x96, 0x64, 0x96, 0x85, 0x7a, 0x7e, 0x83, 0x74, 0x82, 0x8f,\n", - " 0x98, 0x75, 0x77, 0x84, 0x7e, 0x88, 0x94, 0x7d, 0x79, 0x8c, 0x47, 0x79,\n", - " 0x96, 0x7f, 0x8e, 0x90, 0x50, 0x7f, 0xa3, 0x77, 0xa8, 0x7f, 0x65, 0x9f,\n", - " 0xb9, 0x4c, 0xa7, 0x7f, 0xaa, 0x6e, 0xa2, 0xb0, 0xb8, 0x51, 0x6b, 0x74,\n", - " 0xaa, 0x63, 0x6c, 0xa3, 0xb6, 0x5e, 0x74, 0x6a, 0x75, 0x69, 0x87, 0x7f,\n", - " 0x9d, 0x71, 0x73, 0x72, 0x70, 0x57, 0x5a, 0x7e, 0x8b, 0x64, 0x9a, 0x4d,\n", - " 0x97, 0x81, 0x7b, 0x75, 0x6e, 0x92, 0x5f, 0x67, 0x7e, 0xaa, 0x90, 0x7a,\n", - " 0x92, 0xae, 0x92, 0x68, 0x79, 0x9d, 0x4f, 0x6c, 0x79, 0xb4, 0x9c, 0x58,\n", - " 0x86, 0x8e, 0x62, 0x72, 0x71, 0xc1, 0xac, 0x7d, 0x7a, 0x94, 0x8f, 0x7b,\n", - " 0x88, 0xa8, 0x8d, 0x82, 0x75, 0x9b, 0x5f, 0x83, 0x82, 0xb3, 0x7a, 0x93,\n", - " 0x94, 0x76, 0x70, 0x7e, 0x72, 0x7e, 0x8f, 0x8c, 0xa7, 0x53, 0x72, 0x77,\n", - " 0x7a, 0x64, 0xa8, 0x83, 0xc5, 0x56, 0x71, 0x7b, 0x96, 0x73, 0x7c, 0x73,\n", - " 0x93, 0x49, 0x83, 0x99, 0xa2, 0x83, 0x74, 0x79, 0xa4, 0x61, 0x8e, 0x84,\n", - " 0x7a, 0x7d, 0x56, 0x98, 0x97, 0x6d, 0x87, 0x8c, 0x7a, 0x77, 0x6a, 0x67,\n", - " 0x8a, 0x6f, 0xa2, 0x82, 0x8d, 0x85, 0x6d, 0x8f, 0x7e, 0x74, 0x72, 0x74,\n", - " 0x91, 0x75, 0x58, 0x7f, 0x9e, 0x7c, 0x9c, 0x75, 0x61, 0x6f, 0x85, 0x7b,\n", - " 0xbe, 0x84, 0x85, 0x9b, 0x8c, 0x3b, 0x9a, 0x90, 0xab, 0x77, 0x8e, 0xa2,\n", - " 0xbd, 0x55, 0x96, 0x70, 0xa8, 0x78, 0x98, 0x9c, 0xc3, 0x67, 0x6e, 0x81,\n", - " 0x70, 0x75, 0x96, 0x9c, 0x8a, 0x5b, 0x73, 0x54, 0x69, 0x6c, 0x5d, 0x82,\n", - " 0x99, 0x5b, 0x8c, 0x6d, 0x87, 0x80, 0x67, 0x86, 0x88, 0x7c, 0x70, 0x6b,\n", - " 0x75, 0xab, 0x8e, 0x79, 0x90, 0x91, 0xaf, 0x67, 0x5c, 0xa1, 0x5c, 0x6f,\n", - " 0x75, 0xa1, 0x95, 0x5f, 0x82, 0x8f, 0x78, 0x5d, 0x7c, 0xb8, 0x8a, 0x8a,\n", - " 0x6a, 0x98, 0x6e, 0x51, 0x6b, 0xaa, 0x7d, 0x7c, 0x80, 0x94, 0x79, 0x6d,\n", - " 0xaa, 0x8a, 0x7e, 0x77, 0xa4, 0x78, 0xa5, 0x6d, 0x7c, 0x75, 0xa8, 0x6f,\n", - " 0xa6, 0x51, 0x8e, 0x80, 0x96, 0x5b, 0x9d, 0x7b, 0xb8, 0x4e, 0x6c, 0x87,\n", - " 0x95, 0x7c, 0x78, 0x71, 0xb0, 0x5a, 0x99, 0xa0, 0x90, 0x87, 0x65, 0x8b,\n", - " 0x98, 0x68, 0x92, 0x76, 0x82, 0x77, 0x6a, 0x8a, 0x91, 0x84, 0x87, 0x8b,\n", - " 0x87, 0x84, 0x7a, 0x81, 0x77, 0x55, 0x8e, 0x86, 0x7a, 0x74, 0x65, 0x88,\n", - " 0x62, 0x51, 0xa1, 0x91, 0x88, 0x76, 0x5f, 0x89, 0x9f, 0x86, 0x66, 0x67,\n", - " 0x64, 0x75, 0x9e, 0x74, 0xc1, 0x80, 0x58, 0xa9, 0x8f, 0x5e, 0x94, 0x88,\n", - " 0xaf, 0x6f, 0x6c, 0xa4, 0xa1, 0x4d, 0x68, 0x66, 0xc2, 0x6e, 0x89, 0x9b,\n", - " 0xa3, 0x5a, 0x63, 0x5b, 0x9c, 0x7a, 0x93, 0x76, 0x9d, 0x6d, 0x71, 0x5d,\n", - " 0x80, 0x66, 0x79, 0x80, 0x7c, 0x65, 0x74, 0x64, 0x88, 0x90, 0x79, 0x89,\n", - " 0x72, 0x88, 0x67, 0x75, 0x6a, 0x96, 0x56, 0x67, 0x88, 0xa1, 0x8c, 0x6c,\n", - " 0x55, 0xb2, 0x8a, 0x71, 0x88, 0xdc, 0x7a, 0x72, 0x94, 0x9d, 0x7c, 0x76,\n", - " 0x6a, 0xaa, 0xa8, 0x7f, 0x80, 0xa0, 0x6b, 0x6f, 0x84, 0xe0, 0x68, 0x93,\n", - " 0xa6, 0x99, 0x69, 0x68, 0x93, 0xa0, 0x93, 0x6b, 0x87, 0x8b, 0x80, 0x90,\n", - " 0x90, 0x89, 0x8f, 0x7f, 0xaf, 0x6f, 0x82, 0x6d, 0x94, 0x70, 0x97, 0x8f,\n", - " 0xb0, 0x40, 0x9b, 0x67, 0x78, 0x86, 0x90, 0x8b, 0xa7, 0x51, 0x7f, 0x79,\n", - " 0x90, 0x71, 0x6d, 0x80, 0x95, 0x63, 0x7d, 0x87, 0xa0, 0x7e, 0x7b, 0x85,\n", - " 0x8e, 0x6d, 0xa1, 0x76, 0x70, 0x7b, 0x66, 0x87, 0x90, 0x7a, 0x86, 0x88,\n", - " 0x89, 0x87, 0x6a, 0x91, 0x78, 0x74, 0x76, 0x8d, 0x7e, 0x86, 0x63, 0x90,\n", - " 0x98, 0x7d, 0x4a, 0x85, 0x4f, 0x9d, 0xa2, 0x7c, 0xb4, 0x88, 0x78, 0xb5,\n", - " 0x8f, 0x3f, 0xa7, 0x7d, 0xa4, 0x7c, 0x60, 0x9c, 0xa8, 0x41, 0x6b, 0x7f,\n", - " 0xa2, 0x7f, 0x68, 0xaa, 0xb4, 0x73, 0x56, 0x62, 0x87, 0x72, 0xa5, 0x7c,\n", - " 0x97, 0x69, 0x58, 0x6b, 0x89, 0x57, 0x51, 0x80, 0x92, 0x7a, 0x7c, 0x4c,\n", - " 0x7c, 0x7b, 0x69, 0x5f, 0x90, 0x77, 0x78, 0x67, 0x7a, 0xad, 0x79, 0x5c,\n", - " 0x9c, 0xbf, 0xa6, 0x64, 0x53, 0xb3, 0x5e, 0x59, 0x86, 0xb9, 0x94, 0x65,\n", - " 0x70, 0x9d, 0x7a, 0x80, 0x7c, 0xae, 0x9c, 0x7b, 0x66, 0xae, 0x83, 0x5f,\n", - " 0x81, 0xc5, 0x8b, 0x7e, 0x9b, 0x89, 0x84, 0x7f, 0x7c, 0xa5, 0x5c, 0x89,\n", - " 0x8a, 0x75, 0x99, 0x6d, 0x8e, 0x90, 0x9f, 0x81, 0x81, 0x6b, 0x87, 0x76,\n", - " 0x92, 0x6f, 0xab, 0x95, 0x95, 0x4c, 0x97, 0x72, 0x80, 0x87, 0x83, 0x87,\n", - " 0xa3, 0x59, 0xad, 0x74, 0x93, 0x7f, 0x77, 0x78, 0x8d, 0x66, 0x9b, 0x7a,\n", - " 0x7d, 0x95, 0x64, 0x7f, 0x6d, 0x5c, 0x8e, 0x94, 0x92, 0x82, 0x60, 0x8d,\n", - " 0x75, 0x55, 0x8c, 0x8b, 0x8f, 0x86, 0x7d, 0x7c, 0x74, 0x57, 0x78, 0x9d,\n", - " 0x71, 0x65, 0x66, 0x7f, 0xaa, 0x92, 0x66, 0x81, 0x5a, 0x71, 0xa6, 0x78,\n", - " 0x9d, 0x8a, 0x5a, 0x8a, 0x91, 0x59, 0xb7, 0x5c, 0xc3, 0x73, 0x89, 0x9d,\n", - " 0xa7, 0x62, 0x77, 0x72, 0x9f, 0x92, 0x6a, 0x9f, 0xaa, 0x71, 0x6b, 0x5e,\n", - " 0x7d, 0x73, 0x8d, 0x89, 0xba, 0x61, 0x73, 0x6e, 0x71, 0x8a, 0x79, 0x7c,\n", - " 0x94, 0x76, 0x76, 0x65, 0x81, 0x6f, 0x4e, 0x75, 0x6e, 0x8b, 0x7d, 0x50,\n", - " 0x56, 0xb8, 0x72, 0x67, 0x93, 0xc6, 0x88, 0x6f, 0x57, 0xb7, 0x80, 0x4c,\n", - " 0x97, 0xc4, 0xb6, 0x71, 0x72, 0x9e, 0x6f, 0x72, 0x8d, 0xa5, 0x8f, 0x89,\n", - " 0x74, 0xae, 0x78, 0x70, 0x6e, 0xbb, 0x8f, 0x73, 0x74, 0x8b, 0x5e, 0x86,\n", - " 0x8b, 0x8a, 0x72, 0x71, 0x84, 0x84, 0x77, 0xa3, 0xa6, 0x73, 0xa4, 0x7e,\n", - " 0xab, 0x5d, 0x75, 0x96, 0x94, 0x5f, 0x8b, 0x74, 0x9c, 0x63, 0x8d, 0x81,\n", - " 0x80, 0x6a, 0x91, 0x88, 0x93, 0x53, 0x80, 0x75, 0x79, 0x8d, 0x78, 0x74,\n", - " 0x7c, 0x73, 0xb2, 0x89, 0x8e, 0xab, 0x75, 0x6c, 0x7a, 0x79, 0x99, 0x77,\n", - " 0x7d, 0x89, 0x5a, 0x81, 0x7c, 0x75, 0x6a, 0x7e, 0x8c, 0x83, 0x78, 0x8e,\n", - " 0x62, 0x76, 0x77, 0x6b, 0x79, 0x66, 0x6e, 0x82, 0xa1, 0x8d, 0x52, 0x79,\n", - " 0x70, 0x7d, 0xa9, 0x6a, 0x95, 0x7f, 0x59, 0x94, 0x8f, 0x73, 0xb7, 0x85,\n", - " 0xb3, 0x80, 0x77, 0x9f, 0xb8, 0x4d, 0x82, 0x7c, 0xa0, 0xa4, 0x7b, 0x8c,\n", - " 0xa9, 0x78, 0x62, 0x6b, 0x8a, 0x93, 0x80, 0x68, 0x9b, 0x6d, 0x6b, 0x7b,\n", - " 0x84, 0x8f, 0x86, 0x70, 0x70, 0x73, 0x84, 0x4f, 0x7c, 0x75, 0x64, 0x8d,\n", - " 0x6e, 0x81, 0x7c, 0x72, 0x81, 0xb0, 0x74, 0x65, 0xa7, 0xae, 0x80, 0x70,\n", - " 0x5e, 0xa4, 0x58, 0x54, 0x8e, 0xa7, 0x96, 0x65, 0x66, 0x8b, 0x6c, 0x5d,\n", - " 0x6b, 0xbe, 0x94, 0x79, 0x80, 0xa1, 0x91, 0x78, 0x6d, 0xc2, 0x82, 0x85,\n", - " 0x81, 0x7d, 0x88, 0x79, 0x93, 0x96, 0x7f, 0x7e, 0x7d, 0x92, 0x75, 0xa2,\n", - " 0x9f, 0x7b, 0x92, 0x77, 0x8a, 0x7c, 0x80, 0x8b, 0x9b, 0x64, 0xa5, 0x74,\n", - " 0xa1, 0x74, 0x7f, 0x7e, 0x85, 0x78, 0x9c, 0x86, 0x9f, 0x62, 0x8f, 0x7f,\n", - " 0x8a, 0x90, 0x6d, 0x7d, 0x93, 0x61, 0x9d, 0x81, 0x9b, 0x99, 0x69, 0x87,\n", - " 0x74, 0x7d, 0x8e, 0x8e, 0x7b, 0x7c, 0x6a, 0x71, 0x7d, 0x7f, 0x74, 0x74,\n", - " 0x7b, 0x65, 0x6e, 0x91, 0x7c, 0x6e, 0x80, 0x8c, 0x8a, 0x6c, 0x6b, 0x76,\n", - " 0xad, 0x94, 0x64, 0x81, 0x69, 0x7b, 0xac, 0x76, 0x9f, 0x71, 0x85, 0x85,\n", - " 0x8b, 0x66, 0xb5, 0x87, 0xb3, 0x63, 0x8b, 0x95, 0x8e, 0x50, 0x91, 0x77,\n", - " 0xa1, 0x99, 0x64, 0x81, 0xb3, 0x63, 0x6e, 0x7a, 0x7f, 0x73, 0x7a, 0x7b,\n", - " 0x93, 0x6d, 0x75, 0x75, 0x7c, 0x7b, 0x59, 0x7c, 0x7c, 0x68, 0x67, 0x78,\n", - " 0x79, 0x75, 0x53, 0x86, 0x84, 0x84, 0x91, 0x71, 0x85, 0xb1, 0x84, 0x64,\n", - " 0x88, 0xc0, 0x94, 0x5f, 0x6f, 0x9b, 0x69, 0x67, 0x97, 0x94, 0x88, 0x6a,\n", - " 0x7e, 0x94, 0x9e, 0x7f, 0x81, 0x9c, 0xa7, 0x7f, 0x7a, 0xa2, 0x63, 0x69,\n", - " 0x82, 0xc2, 0x5e, 0x8d, 0x7c, 0x89, 0x63, 0x93, 0x84, 0xb8, 0x76, 0x89,\n", - " 0x96, 0x87, 0x79, 0x88, 0xa6, 0x8e, 0x9b, 0x93, 0x9c, 0x5d, 0x92, 0x92,\n", - " 0x82, 0x5e, 0x85, 0x88, 0xad, 0x73, 0xa4, 0x6f, 0x74, 0x8e, 0x77, 0x89,\n", - " 0x9b, 0x6e, 0x82, 0x76, 0x93, 0xae, 0x82, 0x87, 0x76, 0x6f, 0x80, 0x76,\n", - " 0x95, 0x8e, 0x5e, 0x85, 0x7b, 0x68, 0x7f, 0x7c, 0x82, 0x94, 0x80, 0x91,\n", - " 0x77, 0x71, 0x7c, 0x94, 0x80, 0x62, 0x65, 0x7c, 0x5e, 0x70, 0x76, 0x75,\n", - " 0x7b, 0x60, 0x5f, 0x69, 0xb3, 0x6e, 0x95, 0x9d, 0x5a, 0x5b, 0x9e, 0x6e,\n", - " 0xa6, 0x80, 0x5d, 0xa5, 0x83, 0x5b, 0xa4, 0x80, 0xb3, 0x79, 0x83, 0xb6,\n", - " 0xa3, 0x73, 0x84, 0x67, 0x8d, 0x8f, 0x9d, 0x78, 0xb8, 0x8a, 0x7b, 0x6c,\n", - " 0x85, 0x87, 0x6d, 0x75, 0xae, 0x75, 0x53, 0x71, 0x6b, 0x87, 0x67, 0x7b,\n", - " 0x7f, 0x86, 0x58, 0x73, 0x7d, 0x87, 0x5d, 0x7f, 0x7d, 0x63, 0x92, 0x65,\n", - " 0x7a, 0x9c, 0x6f, 0x87, 0x81, 0xa9, 0x91, 0x54, 0x66, 0x8e, 0x58, 0x6d,\n", - " 0x92, 0xc2, 0xa9, 0x7b, 0x6e, 0x96, 0x7c, 0x60, 0x7e, 0xa8, 0x85, 0x94,\n", - " 0x90, 0x8b, 0x77, 0x79, 0x77, 0xa7, 0x8f, 0x83, 0x80, 0x99, 0x8c, 0x80,\n", - " 0x93, 0x9c, 0x73, 0x9e, 0x75, 0x90, 0x67, 0x74, 0x99, 0x98, 0x7e, 0x76,\n", - " 0x9f, 0x82, 0x90, 0x95, 0x9d, 0x5f, 0x95, 0x98, 0x8c, 0x5f, 0x77, 0x83,\n", - " 0x7b, 0x72, 0x85, 0x7c, 0x97, 0x74, 0x81, 0x80, 0x8d, 0x89, 0x7d, 0x69,\n", - " 0x95, 0x85, 0x83, 0x5e, 0x95, 0x74, 0x54, 0x7f, 0x6c, 0x67, 0x9b, 0x83,\n", - " 0x88, 0x8e, 0x6f, 0x96, 0x81, 0x7f, 0x6e, 0x87, 0x8f, 0x6f, 0x61, 0x87,\n", - " 0x63, 0x66, 0x72, 0x77, 0x75, 0x6d, 0x59, 0x7d, 0xaa, 0x85, 0x62, 0x83,\n", - " 0x97, 0x94, 0x96, 0x89, 0x9d, 0x90, 0x7d, 0x91, 0x78, 0x57, 0xa0, 0x7f,\n", - " 0xa2, 0x62, 0x63, 0x99, 0x77, 0x71, 0x7f, 0x61, 0x99, 0x89, 0x6f, 0xa2,\n", - " 0xae, 0x92, 0x88, 0x51, 0x87, 0x7a, 0x6f, 0x89, 0xa8, 0x89, 0x64, 0x81,\n", - " 0x84, 0x79, 0x5b, 0x73, 0x82, 0x6e, 0x7e, 0x5d, 0x8f, 0x82, 0x51, 0x69,\n", - " 0x8e, 0x76, 0x8b, 0x58, 0x89, 0xb2, 0x52, 0x72, 0x7f, 0xae, 0x96, 0x5a,\n", - " 0x80, 0xa1, 0x74, 0x62, 0x8d, 0xbe, 0x87, 0x6c, 0x6d, 0xad, 0x83, 0x5a,\n", - " 0x6c, 0xa5, 0x7f, 0x7c, 0x7a, 0xa1, 0x75, 0x6d, 0x85, 0xbe, 0x91, 0x8e,\n", - " 0x96, 0x8c, 0x87, 0x74, 0x8b, 0x82, 0x96, 0x8f, 0x8f, 0x93, 0x8f, 0x8c,\n", - " 0x9a, 0x78, 0x73, 0x6e, 0x91, 0x8d, 0x7e, 0x81, 0x81, 0x52, 0x90, 0x85,\n", - " 0x77, 0x66, 0x7e, 0x75, 0x8a, 0x67, 0x72, 0x76, 0x82, 0x7b, 0x6e, 0x67,\n", - " 0x96, 0x7b, 0x75, 0x76, 0x8d, 0x76, 0x7f, 0x79, 0x84, 0x7b, 0x57, 0x81,\n", - " 0x76, 0x80, 0x67, 0x8c, 0x7c, 0x80, 0x67, 0x85, 0x79, 0x5b, 0x97, 0x74,\n", - " 0x91, 0x75, 0x82, 0x75, 0x6b, 0x94, 0x7e, 0x85, 0x8e, 0x77, 0x5d, 0x78,\n", - " 0xb5, 0x8b, 0x73, 0x7f, 0x62, 0x8f, 0xb1, 0x7d, 0xa2, 0x85, 0x6b, 0x92,\n", - " 0x75, 0x75, 0xb8, 0x7d, 0xb3, 0x67, 0x5f, 0xa6, 0x9b, 0x85, 0x9a, 0x67,\n", - " 0xbe, 0x8d, 0x92, 0x88, 0xa5, 0x7c, 0xaa, 0x5a, 0x71, 0x7b, 0x70, 0x77,\n", - " 0xa0, 0xa4, 0x5e, 0x55, 0x6b, 0x8e, 0x53, 0x89, 0x8a, 0x5a, 0x7c, 0x54,\n", - " 0x7c, 0x8b, 0x53, 0x77, 0x67, 0x77, 0x67, 0x5d, 0x91, 0xac, 0x78, 0x81,\n", - " 0x8e, 0xb5, 0x6d, 0x58, 0x78, 0xa6, 0x7c, 0x85, 0x87, 0xb3, 0x76, 0x5d,\n", - " 0x7c, 0x87, 0x57, 0x68, 0x82, 0x8f, 0x89, 0x76, 0x86, 0x9f, 0x6c, 0x68,\n", - " 0x7c, 0x87, 0x79, 0x9f, 0x86, 0x9e, 0x83, 0x70, 0x8d, 0xb2, 0x84, 0x71,\n", - " 0x71, 0x91, 0x9f, 0x8e, 0x83, 0x84, 0x87, 0x80, 0x94, 0x80, 0x7d, 0x8d,\n", - " 0x7c, 0x56, 0x5f, 0x80, 0x7d, 0x84, 0x61, 0x6e, 0x69, 0x80, 0x8b, 0x67,\n", - " 0xa4, 0x8b, 0x98, 0x7a, 0x8a, 0x6c, 0x77, 0x66, 0x7d, 0x6e, 0x84, 0x78,\n", - " 0x82, 0x7d, 0x61, 0x88, 0x6e, 0x53, 0x92, 0x75, 0x88, 0x77, 0x82, 0x9f,\n", - " 0x9e, 0x6f, 0x9c, 0x76, 0x91, 0x78, 0x69, 0x7f, 0x71, 0x6c, 0x6f, 0x7d,\n", - " 0x83, 0x6e, 0x3c, 0x84, 0x90, 0x8b, 0x71, 0x69, 0x75, 0x81, 0xc8, 0x84,\n", - " 0xa7, 0x8a, 0x8a, 0x90, 0x96, 0x86, 0x9e, 0x68, 0x99, 0x84, 0x8c, 0xa0,\n", - " 0x8a, 0x71, 0x7d, 0x41, 0xa1, 0x98, 0x77, 0x91, 0xaa, 0x86, 0x96, 0x5e,\n", - " 0x86, 0x76, 0xa7, 0x83, 0xac, 0x86, 0x66, 0x46, 0x6a, 0x81, 0x64, 0x77,\n", - " 0x67, 0x53, 0x80, 0x59, 0x73, 0x71, 0x63, 0x71, 0x76, 0x86, 0x62, 0x4f,\n", - " 0x83, 0xa4, 0x5d, 0x66, 0x93, 0x87, 0x87, 0x5b, 0x7f, 0x9d, 0x61, 0x9d,\n", - " 0x94, 0xa4, 0x84, 0x75, 0x67, 0xb3, 0x7b, 0x6d, 0x64, 0x98, 0x62, 0x77,\n", - " 0x7d, 0x98, 0x8e, 0x75, 0x7d, 0xa6, 0xa4, 0x8c, 0x83, 0x8b, 0x7a, 0x97,\n", - " 0x6c, 0x7f, 0x66, 0x7f, 0x8f, 0x98, 0x72, 0x6e, 0x75, 0x65, 0x80, 0x8d,\n", - " 0x88, 0x7d, 0x8c, 0x8d, 0x67, 0x68, 0xab, 0x8c, 0x8b, 0x76, 0x87, 0x69,\n", - " 0x88, 0x6c, 0x83, 0x6e, 0x88, 0x64, 0xa8, 0x67, 0xa5, 0x5b, 0x65, 0x60,\n", - " 0x6b, 0x62, 0x76, 0x78, 0x8c, 0x5b, 0x61, 0x6f, 0x66, 0x65, 0x92, 0x67,\n", - " 0x84, 0x7b, 0x80, 0x86, 0x7b, 0x6c, 0x86, 0x7a, 0x72, 0x7b, 0x4d, 0x94,\n", - " 0x80, 0x67, 0x8e, 0x8d, 0x7f, 0x79, 0x65, 0x78, 0xa3, 0x71, 0x80, 0x74,\n", - " 0xa7, 0xa8, 0x97, 0x78, 0x91, 0x77, 0x98, 0x86, 0x82, 0x64, 0xa5, 0x6e,\n", - " 0x7a, 0x5d, 0x6f, 0xad, 0x9b, 0x7a, 0x91, 0x4b, 0xa1, 0x75, 0x95, 0x76,\n", - " 0xac, 0x9d, 0xa3, 0x65, 0x65, 0x6a, 0x81, 0x8b, 0x9f, 0x67, 0x6b, 0x6a,\n", - " 0x60, 0x5b, 0x77, 0x96, 0x73, 0x78, 0x5a, 0x77, 0x5f, 0x68, 0x70, 0x72,\n", - " 0x78, 0x65, 0x81, 0x20, 0x86, 0x99, 0x80, 0x7a, 0xa5, 0xb1, 0x69, 0x45,\n", - " 0x7d, 0xa6, 0x7d, 0x85, 0xaa, 0xa9, 0x65, 0x60, 0x75, 0x9b, 0x61, 0x92,\n", - " 0x91, 0x8f, 0x8a, 0x81, 0x88, 0x9c, 0x81, 0x7d, 0x7b, 0x8f, 0x7e, 0x9e,\n", - " 0x82, 0x94, 0x95, 0x80, 0x73, 0xae, 0x7b, 0x7a, 0x79, 0x8c, 0x8b, 0x65,\n", - " 0x71, 0x75, 0x8d, 0x7a, 0x90, 0x83, 0x7b, 0x77, 0x71, 0x4f, 0x70, 0x95,\n", - " 0x87, 0x69, 0x97, 0x8e, 0x70, 0x92, 0x6e, 0x91, 0x9d, 0x72, 0x75, 0x82,\n", - " 0xad, 0x81, 0x78, 0x8d, 0x6f, 0x65, 0x88, 0x86, 0x8c, 0x8e, 0x59, 0x8b,\n", - " 0x67, 0x69, 0x8b, 0x78, 0x7f, 0x59, 0x73, 0x87, 0x6f, 0x86, 0x66, 0x7c,\n", - " 0x96, 0x68, 0x59, 0x78, 0x67, 0x92, 0x7b, 0x76, 0x80, 0x6e, 0x4a, 0x7b,\n", - " 0x99, 0x67, 0x72, 0x9c, 0x7a, 0x80, 0x76, 0x5f, 0x8e, 0x4f, 0x71, 0x77,\n", - " 0xab, 0x78, 0x99, 0x50, 0x83, 0x65, 0x78, 0x8c, 0xbb, 0x8d, 0x4e, 0x54,\n", - " 0x81, 0x6f, 0x7f, 0x91, 0xb9, 0x79, 0x9c, 0x65, 0x5a, 0x5a, 0x73, 0x8c,\n", - " 0x9a, 0xac, 0x99, 0x44, 0x7d, 0x4f, 0x78, 0x5a, 0x7d, 0x79, 0x57, 0x44,\n", - " 0x6f, 0x6a, 0x75, 0x7f, 0x5f, 0x6f, 0x72, 0x62, 0x7f, 0x89, 0x57, 0x91,\n", - " 0x8d, 0x83, 0x7e, 0x63, 0x8c, 0x95, 0x48, 0x78, 0xa9, 0x88, 0x84, 0x5b,\n", - " 0x8c, 0xa5, 0x65, 0x71, 0x88, 0x82, 0x7e, 0xa4, 0x8d, 0x7d, 0x7d, 0x8d,\n", - " 0x91, 0x7c, 0x73, 0x7d, 0x99, 0x89, 0x6d, 0xa1, 0x98, 0x84, 0x8b, 0x6b,\n", - " 0x89, 0x86, 0x84, 0x7e, 0x86, 0x87, 0x78, 0x8c, 0x96, 0x92, 0x5a, 0xa0,\n", - " 0x64, 0x73, 0x91, 0x88, 0x8f, 0x6b, 0x96, 0x5c, 0x99, 0x62, 0x78, 0x6c,\n", - " 0x87, 0x4d, 0x5d, 0x69, 0x7b, 0x81, 0x4a, 0x61, 0x71, 0x69, 0x7d, 0x91,\n", - " 0x67, 0x92, 0x68, 0x6f, 0x50, 0x5e, 0x61, 0x7e, 0x81, 0x70, 0x5f, 0x7b,\n", - " 0x6b, 0x55, 0x71, 0x6c, 0x70, 0x53, 0x3f, 0x80, 0x6e, 0x57, 0x96, 0x84,\n", - " 0x75, 0x51, 0x60, 0x9a, 0x7f, 0xa5, 0x80, 0x94, 0x95, 0x74, 0x7c, 0x83,\n", - " 0xa0, 0x93, 0x5d, 0x92, 0x83, 0x66, 0x67, 0x8a, 0x8b, 0x9b, 0x81, 0x69,\n", - " 0x73, 0x91, 0x6b, 0x79, 0x93, 0x88, 0x64, 0x68, 0x81, 0x8c, 0x6f, 0x81,\n", - " 0x6f, 0x80, 0x68, 0x5f, 0x9c, 0x95, 0x76, 0x93, 0x87, 0x68, 0x83, 0x94,\n", - " 0x8b, 0x85, 0x72, 0x7f, 0x64, 0x8c, 0x6a, 0x95, 0x8d, 0x80, 0x69, 0x6b,\n", - " 0x98, 0x86, 0x75, 0x92, 0x7a, 0x7f, 0x5b, 0x7f, 0x9b, 0x57, 0x99, 0x8d,\n", - " 0x8a, 0x7b, 0x58, 0x73, 0x88, 0x6d, 0x8a, 0x8c, 0x8e, 0x82, 0x85, 0xaa,\n", - " 0x72, 0xa6, 0x7f, 0x7a, 0x83, 0x59, 0x6d, 0x6e, 0x79, 0x83, 0x88, 0x84,\n", - " 0x74, 0x85, 0x74, 0x78, 0x80, 0x7c, 0x97, 0x86, 0x94, 0x65, 0x7e, 0x80,\n", - " 0x6f, 0x97, 0x70, 0x74, 0x92, 0x76, 0x71, 0x91, 0x85, 0x72, 0x6e, 0x84,\n", - " 0x78, 0x7e, 0x88, 0x79, 0x7f, 0x80, 0x83, 0x7a, 0x85, 0x75, 0x82, 0x81,\n", - " 0x82, 0x7b, 0x7a, 0xa0, 0x76, 0x7f, 0x75, 0xa7, 0x67, 0x8e, 0x81, 0x98,\n", - " 0xa5, 0x86, 0x77, 0x78, 0x7f, 0x97, 0x90, 0x86, 0x80, 0x6b, 0x89, 0x66,\n", - " 0x9b, 0x5c, 0x8b, 0x74, 0xac, 0x89, 0x89, 0x92, 0x92, 0xa8, 0x61, 0x85,\n", - " 0x8c, 0x86, 0x88, 0x91, 0x92, 0x66, 0x63, 0x6c, 0x7a, 0x80, 0x7d, 0x90,\n", - " 0x6f, 0x7f, 0x92, 0x94, 0x8e, 0x7a, 0x86, 0x98, 0xa1, 0x59, 0x71, 0x8c,\n", - " 0x63, 0xa3, 0x60, 0x7d, 0x88, 0x6a, 0x83, 0x6e, 0x7a, 0x94, 0x7b, 0x81,\n", - " 0x7d, 0x83, 0x77, 0x7e, 0x63, 0xab, 0x75, 0x7b, 0x71, 0x8f, 0x76, 0x6e,\n", - " 0x78, 0x7b, 0x79, 0x86, 0x69, 0x67, 0x67, 0x70, 0x6c, 0x7a, 0x6c, 0x84,\n", - " 0x74, 0xa2, 0x74, 0x77, 0x8a, 0x58, 0x7d, 0xa0, 0x65, 0x7b, 0x79, 0x71,\n", - " 0x7c, 0x3c, 0x85, 0x96, 0x59, 0x76, 0x6a, 0x94, 0xa5, 0x5b, 0x70, 0x99,\n", - " 0x7f, 0x9a, 0x69, 0x7c, 0x6f, 0x79, 0x72, 0x8b, 0x83, 0x6e, 0x73, 0x7f,\n", - " 0x6f, 0x6d, 0x7e, 0xa3, 0x72, 0x87, 0x83, 0x8c, 0x8c, 0x70, 0x77, 0x75,\n", - " 0xa4, 0x5a, 0x89, 0x7d, 0xa0, 0x97, 0x67, 0x80, 0x78, 0x7e, 0x86, 0x6a,\n", - " 0x7b, 0x9c, 0x77, 0x67, 0x7b, 0x74, 0x7f, 0xa5, 0x90, 0x94, 0x92, 0x4d,\n", - " 0x7a, 0x79, 0x9f, 0x87, 0x64, 0x6e, 0x6d, 0x59, 0x83, 0x54, 0x79, 0x82,\n", - " 0x6c, 0x74, 0x82, 0x98, 0x77, 0x90, 0x85, 0xa4, 0x88, 0x81, 0x71, 0x85,\n", - " 0x90, 0x8e, 0x88, 0x68, 0x51, 0x6d, 0x71, 0x7b, 0x80, 0xbc, 0xa5, 0x57,\n", - " 0x8f, 0x9f, 0x95, 0x89, 0xb1, 0x96, 0x69, 0x65, 0x61, 0x73, 0x6f, 0x6c,\n", - " 0x5b, 0x95, 0x99, 0x7f, 0x76, 0x9d, 0x7c, 0x7d, 0x8d, 0xb1, 0x8f, 0x6a,\n", - " 0x76, 0x95, 0x74, 0x7a, 0x7b, 0xae, 0x77, 0x76, 0x6d, 0x99, 0x7d, 0x80,\n", - " 0x6e, 0x89, 0x7f, 0x74, 0x6f, 0x72, 0x89, 0x8b, 0x86, 0x7b, 0x7c, 0x72,\n", - " 0x6b, 0x4f, 0x71, 0x94, 0x80, 0x96, 0x83, 0x7e, 0x75, 0x74, 0x68, 0x83,\n", - " 0x95, 0x8c, 0x85, 0x7a, 0x82, 0x74, 0x85, 0x83, 0x8c, 0x7e, 0x7a, 0xa0,\n", - " 0x8e, 0x67, 0x6b, 0x82, 0x9b, 0x66, 0x6c, 0x8a, 0x88, 0x7e, 0x74, 0x9e,\n", - " 0x88, 0x82, 0x73, 0x73, 0x79, 0x7c, 0x72, 0x6b, 0x74, 0x8b, 0xa4, 0xa4,\n", - " 0xa3, 0x73, 0x73, 0x88, 0x8d, 0x94, 0x84, 0x9a, 0x9e, 0x93, 0x6c, 0x86,\n", - " 0x7a, 0x7a, 0x7e, 0xaa, 0x66, 0x8f, 0x99, 0xa4, 0x70, 0x4c, 0x6f, 0x66,\n", - " 0x8a, 0xaa, 0x69, 0x80, 0x6a, 0x5e, 0x71, 0x8f, 0x8b, 0x84, 0x75, 0x9d,\n", - " 0x5c, 0x60, 0x61, 0x4a, 0x6f, 0x91, 0x78, 0x6e, 0x8c, 0x62, 0x88, 0x75,\n", - " 0x64, 0x7c, 0x7d, 0x92, 0x9b, 0x96, 0x62, 0x72, 0x6c, 0x6f, 0x87, 0x5d,\n", - " 0xa0, 0xa7, 0x7c, 0x58, 0x6e, 0x8c, 0x82, 0x84, 0x7f, 0x8b, 0x54, 0x77,\n", - " 0x5b, 0x9a, 0x6a, 0x78, 0x5d, 0xb9, 0x8e, 0x7d, 0x6e, 0xa1, 0x66, 0x7c,\n", - " 0x87, 0xd2, 0x7a, 0x6c, 0x82, 0xa1, 0x83, 0x59, 0x64, 0x9e, 0x65, 0x6d,\n", - " 0x77, 0x80, 0x7c, 0x9a, 0x50, 0x9f, 0x8b, 0x7a, 0x73, 0x80, 0x92, 0x6d,\n", - " 0x97, 0x7f, 0x74, 0x6a, 0x5f, 0x44, 0x7d, 0x99, 0x95, 0x91, 0x8f, 0x6a,\n", - " 0x63, 0x56, 0x89, 0x96, 0xba, 0xa6, 0x71, 0x98, 0x9d, 0x3a, 0x8f, 0x77,\n", - " 0x6d, 0x76, 0x68, 0xb4, 0x8d, 0x79, 0x7a, 0x83, 0x7f, 0x96, 0x75, 0x94,\n", - " 0x9e, 0x51, 0x83, 0x5b, 0x66, 0x73, 0xa1, 0xbc, 0x8c, 0x70, 0x88, 0x80,\n", - " 0x92, 0x60, 0x7d, 0xa9, 0x97, 0x74, 0x7d, 0x98, 0x7b, 0x78, 0x85, 0xa7,\n", - " 0x8f, 0x8c, 0x91, 0x9d, 0x6a, 0x80, 0x6c, 0x8e, 0x8e, 0x91, 0x76, 0x8b,\n", - " 0x79, 0x59, 0x7d, 0x9c, 0x69, 0x83, 0x8c, 0x95, 0x8e, 0x75, 0x9d, 0x83,\n", - " 0x92, 0x99, 0x8a, 0x59, 0x61, 0x54, 0x63, 0x86, 0x83, 0x86, 0x98, 0x83,\n", - " 0x73, 0x74, 0x91, 0x52, 0x60, 0x8a, 0x7c, 0x57, 0xbc, 0x9d, 0x86, 0x6b,\n", - " 0x63, 0xa2, 0x78, 0x80, 0x75, 0xb1, 0x74, 0x76, 0x69, 0x8b, 0x7e, 0x76,\n", - " 0x7b, 0xb3, 0x77, 0x5b, 0x6c, 0x8b, 0x83, 0x80, 0x7f, 0xd1, 0x7c, 0x58,\n", - " 0x6f, 0x98, 0x71, 0x57, 0x60, 0xd0, 0x84, 0x62, 0x74, 0xa6, 0x8f, 0x7b,\n", - " 0x70, 0xaa, 0x81, 0x6b, 0x7f, 0x89, 0x6a, 0x74, 0x5a, 0x8c, 0x9c, 0x77,\n", - " 0x5d, 0x84, 0x63, 0x94, 0x8e, 0x91, 0x83, 0x4a, 0x49, 0x74, 0x6b, 0x70,\n", - " 0xc0, 0xa0, 0x6a, 0x90, 0x8e, 0x5a, 0x70, 0x96, 0xab, 0x72, 0x7e, 0xba,\n", - " 0xa7, 0x46, 0x86, 0x5d, 0x90, 0x76, 0x95, 0x8d, 0xa5, 0x40, 0x82, 0x8a,\n", - " 0x7d, 0x5e, 0x73, 0x94, 0x9d, 0x58, 0x8c, 0x8b, 0x69, 0x6c, 0x9a, 0x90,\n", - " 0xaa, 0x6f, 0x85, 0x8d, 0x64, 0x58, 0x7b, 0x97, 0xa9, 0x79, 0xa5, 0xa2,\n", - " 0x5f, 0x57, 0x9a, 0xb4, 0x89, 0x70, 0x84, 0x73, 0x46, 0x6c, 0x6e, 0x87,\n", - " 0x70, 0x94, 0x8a, 0x8a, 0x69, 0x7b, 0x6c, 0x68, 0x8e, 0xa2, 0x90, 0x84,\n", - " 0x78, 0x45, 0x63, 0x78, 0x7f, 0x90, 0x9f, 0x90, 0x68, 0x43, 0x92, 0x77,\n", - " 0x78, 0x77, 0x82, 0x7d, 0x8f, 0x6a, 0x7a, 0x70, 0x76, 0x75, 0x87, 0x63,\n", - " 0xbc, 0x8e, 0x6a, 0x71, 0x51, 0x51, 0x75, 0x6b, 0x8a, 0xb4, 0x6a, 0x5b,\n", - " 0x99, 0x84, 0x76, 0x84, 0x74, 0xaf, 0x86, 0x6a, 0x53, 0x97, 0x6e, 0x8e,\n", - " 0x61, 0xc4, 0x7e, 0x5d, 0x4d, 0x96, 0x73, 0x73, 0x53, 0xc0, 0x8f, 0x68,\n", - " 0x58, 0xae, 0x81, 0x83, 0x62, 0x98, 0x7b, 0x89, 0x54, 0x86, 0x78, 0x67,\n", - " 0x70, 0x9b, 0x63, 0x5f, 0x2d, 0x77, 0x84, 0x79, 0x6b, 0xa4, 0x7b, 0x65,\n", - " 0x45, 0x65, 0x56, 0x86, 0xbb, 0x8a, 0x8e, 0x92, 0x86, 0x48, 0x7c, 0x6d,\n", - " 0xb4, 0x7d, 0x56, 0xa4, 0x86, 0x52, 0x8b, 0x6a, 0x8d, 0x5b, 0x9d, 0xa2,\n", - " 0xbf, 0x36, 0x7c, 0x99, 0x9d, 0x65, 0x75, 0xa4, 0x9f, 0x6a, 0x7c, 0x6b,\n", - " 0x6f, 0x55, 0x70, 0x7f, 0xc2, 0x38, 0x6e, 0xa4, 0x74, 0x4c, 0x75, 0xbb,\n", - " 0xa4, 0x75, 0x8e, 0x8f, 0x56, 0x65, 0x57, 0x92, 0x73, 0x7f, 0x7d, 0x86,\n", - " 0x65, 0x76, 0x92, 0x84, 0x70, 0xa8, 0x91, 0x5b, 0x69, 0x74, 0x8e, 0x82,\n", - " 0x78, 0x8a, 0xaa, 0x71, 0x70, 0x50, 0x85, 0x82, 0x7d, 0x94, 0xa0, 0x76,\n", - " 0x6d, 0x55, 0x86, 0x79, 0x71, 0x7f, 0x9b, 0x71, 0x8a, 0x42, 0x87, 0x64,\n", - " 0x57, 0x88, 0xa0, 0x77, 0xa8, 0x91, 0x72, 0x65, 0x7e, 0x6b, 0x7e, 0x81,\n", - " 0x8d, 0x97, 0x7e, 0x6a, 0x92, 0x88, 0x84, 0x7a, 0x61, 0xa9, 0x86, 0x59,\n", - " 0x6c, 0x87, 0x61, 0x72, 0x4f, 0xc8, 0x99, 0x6c, 0x66, 0xa3, 0x80, 0x8b,\n", - " 0x5c, 0xc0, 0x69, 0x7a, 0x6c, 0xb8, 0x8e, 0x91, 0x51, 0x9f, 0x8c, 0x85,\n", - " 0x75, 0x96, 0x8c, 0x84, 0x6b, 0xa6, 0x71, 0x62, 0x42, 0x60, 0x74, 0x72,\n", - " 0x92, 0x91, 0x70, 0x5b, 0x3d, 0x71, 0x5e, 0x91, 0xa3, 0xa5, 0x6a, 0x7c,\n", - " 0x60, 0x58, 0x82, 0x80, 0xa3, 0x73, 0x8f, 0xa0, 0xb2, 0x4b, 0x94, 0x5e,\n", - " 0x9f, 0x75, 0x4d, 0x83, 0xbc, 0x42, 0x5e, 0x80, 0x8f, 0x59, 0x53, 0xac,\n", - " 0xb2, 0x45, 0x68, 0x7d, 0x9a, 0x65, 0x8a, 0xaa, 0xa0, 0x4e, 0x77, 0x72,\n", - " 0x4d, 0x62, 0x6e, 0x98, 0x8c, 0x73, 0x92, 0x5a, 0x49, 0x55, 0x7b, 0x98,\n", - " 0x8d, 0x84, 0x80, 0x8e, 0x2e, 0x56, 0x78, 0x73, 0x7b, 0x8f, 0x9a, 0x69,\n", - " 0x73, 0x68, 0x7a, 0x88, 0x78, 0xa5, 0xb1, 0x5c, 0x8f, 0x55, 0x71, 0x99,\n", - " 0x7a, 0xa9, 0xb0, 0x75, 0x69, 0x44, 0x5f, 0x66, 0x81, 0x7d, 0x9e, 0x4f,\n", - " 0x66, 0x7f, 0x87, 0x7d, 0x5d, 0x7c, 0x95, 0x62, 0xa5, 0x86, 0x90, 0x6f,\n", - " 0x60, 0xa5, 0x6e, 0x70, 0x80, 0x96, 0x6f, 0x55, 0x77, 0x87, 0x99, 0x7b,\n", - " 0x21, 0xaa, 0x7f, 0x60, 0x63, 0xae, 0x47, 0x79, 0x44, 0xb5, 0x83, 0x6e,\n", - " 0x6d, 0x93, 0x76, 0x54, 0x4b, 0xad, 0x91, 0x6b, 0x6a, 0x9c, 0x8c, 0x83,\n", - " 0x62, 0x8a, 0x88, 0x71, 0x73, 0xa0, 0x75, 0x95, 0x54, 0x80, 0x92, 0x65,\n", - " 0x45, 0x80, 0x63, 0x9a, 0x93, 0x9b, 0x78, 0x4e, 0x4d, 0x5f, 0x69, 0x9e,\n", - " 0xbd, 0xa5, 0x75, 0x6b, 0x6e, 0x6a, 0x82, 0x97, 0xab, 0x60, 0x76, 0xb3,\n", - " 0xc1, 0x39, 0x82, 0x5b, 0x71, 0x31, 0x7b, 0x9c, 0xb5, 0x4f, 0x75, 0x79,\n", - " 0x6c, 0x5d, 0x80, 0xa6, 0x9c, 0x53, 0x6f, 0x85, 0x84, 0x5e, 0x7d, 0xb5,\n", - " 0x95, 0x5f, 0x7c, 0x98, 0x72, 0x7c, 0x67, 0x99, 0xbb, 0x6c, 0x73, 0x66,\n", - " 0x59, 0x5c, 0x6c, 0x9a, 0x9b, 0x72, 0x9b, 0x5f, 0x4b, 0x51, 0x63, 0x84,\n", - " 0x74, 0xa0, 0xb3, 0x6e, 0x63, 0xa0, 0x84, 0x90, 0x71, 0x91, 0xba, 0x64,\n", - " 0x6d, 0x72, 0x78, 0x83, 0x6f, 0x8e, 0xbd, 0x64, 0x69, 0x60, 0x95, 0x67,\n", - " 0x70, 0x93, 0x78, 0x4d, 0x91, 0x3f, 0x7b, 0x6d, 0x69, 0x87, 0x7d, 0x8a,\n", - " 0xa3, 0x95, 0x9d, 0x66, 0x6d, 0x8b, 0x7a, 0x75, 0x94, 0x7b, 0x89, 0x52,\n", - " 0x66, 0x65, 0x79, 0x84, 0x49, 0x9c, 0x60, 0x66, 0x3e, 0xab, 0x4a, 0x86,\n", - " 0x54, 0xcd, 0x7c, 0x83, 0x7c, 0xac, 0x8b, 0x53, 0x67, 0xbb, 0x7c, 0x6d,\n", - " 0x72, 0xb3, 0x83, 0x85, 0x4f, 0x97, 0x86, 0x60, 0x7d, 0x93, 0x70, 0x8b,\n", - " 0x64, 0x78, 0x82, 0x73, 0x54, 0x87, 0x6c, 0xaa, 0x6f, 0x97, 0x8d, 0x51,\n", - " 0x2d, 0x50, 0x75, 0xa9, 0xc2, 0x94, 0x8d, 0x6f, 0x6d, 0x71, 0x7b, 0x87,\n", - " 0x93, 0x67, 0x7d, 0xa5, 0xa2, 0x4f, 0x99, 0x83, 0x95, 0x49, 0x70, 0x9c,\n", - " 0xcf, 0x37, 0x84, 0x86, 0x94, 0x5c, 0x95, 0xa1, 0xb6, 0x73, 0x80, 0x8d,\n", - " 0x89, 0x62, 0x6f, 0xb4, 0xa1, 0x5b, 0x64, 0x91, 0x41, 0x4f, 0x53, 0xa6,\n", - " 0xae, 0x75, 0x84, 0x82, 0x58, 0x8e, 0x63, 0x95, 0xa3, 0x8d, 0x8b, 0x76,\n", - " 0x5d, 0x78, 0x80, 0x82, 0x6e, 0x9d, 0xb8, 0x7d, 0x64, 0x8a, 0x7e, 0x80,\n", - " 0x72, 0x99, 0xcf, 0x76, 0x66, 0x77, 0x7c, 0x81, 0x71, 0x6f, 0xa1, 0x6c,\n", - " 0x6b, 0x70, 0x80, 0x7c, 0x6d, 0x83, 0x8e, 0x74, 0x7a, 0x58, 0x69, 0x53,\n", - " 0x58, 0x7d, 0x7f, 0x84, 0x96, 0x9c, 0x75, 0x6e, 0x62, 0x7c, 0x88, 0x7e,\n", - " 0x7f, 0x98, 0x93, 0x61, 0x98, 0x98, 0x80, 0x83, 0x2e, 0x7d, 0x64, 0x69,\n", - " 0x50, 0xa5, 0x38, 0x96, 0x2e, 0xc5, 0x66, 0x56, 0x64, 0xaa, 0x63, 0x64,\n", - " 0x6d, 0xb3, 0x8a, 0x6c, 0x59, 0xb6, 0x69, 0x7a, 0x54, 0x91, 0x58, 0x96,\n", - " 0x6b, 0x9f, 0x6d, 0x88, 0x4a, 0x82, 0x94, 0x67, 0x38, 0x93, 0x60, 0x87,\n", - " 0x8c, 0x93, 0x8c, 0x52, 0x31, 0x43, 0x66, 0xa9, 0xb3, 0x7a, 0x88, 0x64,\n", - " 0x60, 0x5b, 0x80, 0x84, 0xb7, 0x5a, 0x7a, 0x9d, 0x92, 0x50, 0x89, 0x80,\n", - " 0x72, 0x51, 0x7f, 0x85, 0xae, 0x47, 0x76, 0x9a, 0x7a, 0x74, 0x6d, 0x93,\n", - " 0xbd, 0x42, 0x72, 0x6d, 0x58, 0x5e, 0x6e, 0xa4, 0xb5, 0x4e, 0x76, 0x8f,\n", - " 0x75, 0x9b, 0x5d, 0x92, 0xad, 0x77, 0x7f, 0x73, 0x62, 0x7d, 0x65, 0xaf,\n", - " 0x98, 0x87, 0x80, 0x7c, 0x61, 0x81, 0x45, 0xa0, 0x84, 0x99, 0xbb, 0x72,\n", - " 0x86, 0x8f, 0x70, 0x97, 0x6a, 0x8a, 0xd3, 0x70, 0x7c, 0x91, 0x77, 0x82,\n", - " 0x70, 0x8c, 0xd5, 0x6c, 0x7f, 0x51, 0x5f, 0x69, 0x72, 0x89, 0x9a, 0x68,\n", - " 0x79, 0x70, 0x8b, 0x80, 0x52, 0x98, 0x86, 0x7a, 0xa0, 0x7b, 0x61, 0x6e,\n", - " 0x66, 0x6f, 0x77, 0x78, 0x64, 0xac, 0x7e, 0x73, 0x5d, 0x71, 0x6f, 0x80,\n", - " 0x2e, 0xa9, 0x90, 0x5c, 0x56, 0xa1, 0x32, 0x88, 0x55, 0xb9, 0x67, 0x6f,\n", - " 0x5c, 0xa5, 0x87, 0x61, 0x6b, 0xbd, 0x77, 0x7c, 0x62, 0xae, 0x7c, 0x7a,\n", - " 0x66, 0xac, 0x7a, 0x62, 0x5c, 0x9a, 0x58, 0x89, 0x5a, 0x74, 0x72, 0x66,\n", - " 0x5c, 0x8e, 0x51, 0x8e, 0x99, 0x92, 0xa0, 0x49, 0x31, 0x55, 0x68, 0x99,\n", - " 0xba, 0x82, 0xa2, 0x7a, 0x5e, 0x6f, 0x84, 0x98, 0x96, 0x52, 0x73, 0x99,\n", - " 0xb4, 0x5e, 0x7c, 0x59, 0x7d, 0x4a, 0x7e, 0xa0, 0xbe, 0x63, 0x67, 0x8e,\n", - " 0x7f, 0x71, 0x80, 0xaf, 0x93, 0x4e, 0x78, 0x7e, 0x6d, 0x52, 0x66, 0xb3,\n", - " 0x94, 0x56, 0x84, 0x8f, 0x50, 0x6d, 0x65, 0xa8, 0xb3, 0x4b, 0x91, 0x7f,\n", - " 0x4c, 0x8d, 0x69, 0x79, 0x95, 0x8f, 0x8f, 0x7c, 0x66, 0x98, 0x75, 0x9b,\n", - " 0x73, 0x9b, 0xac, 0x79, 0x6e, 0x84, 0x69, 0x9e, 0x80, 0xa0, 0xb0, 0x6c,\n", - " 0x46, 0x8b, 0x3f, 0x7a, 0x79, 0x79, 0xb3, 0x62, 0x6b, 0x60, 0x67, 0x81,\n", - " 0x4a, 0x7e, 0xa7, 0x8c, 0x74, 0x7f, 0x67, 0x4c, 0x4b, 0x8c, 0x8e, 0x67,\n", - " 0x78, 0x9d, 0x94, 0x79, 0x75, 0x7c, 0x86, 0x7b, 0x67, 0x9f, 0xa4, 0x61,\n", - " 0x5b, 0x6e, 0x85, 0x70, 0x20, 0xa5, 0x66, 0x5e, 0x55, 0xad, 0x3e, 0x7c,\n", - " 0x2d, 0xb4, 0x78, 0x6f, 0x4c, 0xc6, 0x7e, 0x6d, 0x54, 0xb4, 0x71, 0x78,\n", - " 0x54, 0xc3, 0x66, 0x6e, 0x4a, 0xa0, 0x7b, 0x85, 0x66, 0x94, 0x75, 0x8d,\n", - " 0x34, 0x88, 0x71, 0x4e, 0x49, 0x8a, 0x3b, 0x9c, 0x88, 0x76, 0x7f, 0x6a,\n", - " 0x37, 0x64, 0x66, 0xb6, 0xa3, 0x82, 0x76, 0x82, 0x6d, 0x65, 0x6f, 0x8c,\n", - " 0x99, 0x5e, 0x77, 0xa1, 0x99, 0x51, 0xa1, 0x67, 0x6f, 0x4c, 0x7f, 0x9e,\n", - " 0xad, 0x40, 0x65, 0x82, 0x76, 0x66, 0x72, 0xb5, 0xb2, 0x5b, 0x71, 0x8a,\n", - " 0x76, 0x74, 0x52, 0xa0, 0x91, 0x37, 0x86, 0x72, 0x6c, 0x75, 0x62, 0xa5,\n", - " 0xb6, 0x57, 0x75, 0x90, 0x3e, 0x7f, 0x49, 0x9f, 0x8e, 0x92, 0x81, 0x87,\n", - " 0x69, 0x9e, 0x6b, 0x86, 0x8d, 0xb1, 0x9e, 0x65, 0x6f, 0x93, 0x70, 0x79,\n", - " 0x7b, 0x87, 0xbe, 0x59, 0x69, 0x7a, 0x56, 0x7a, 0x81, 0x7d, 0xb8, 0x67,\n", - " 0x67, 0x7f, 0x54, 0x8f, 0x71, 0x85, 0xa0, 0x74, 0x89, 0x5d, 0x67, 0x52,\n", - " 0x65, 0x96, 0x89, 0x84, 0x81, 0x83, 0x82, 0x9a, 0x85, 0x73, 0x78, 0x62,\n", - " 0x87, 0x98, 0x75, 0x6a, 0x73, 0x95, 0x86, 0x71, 0x11, 0x9a, 0x91, 0x66,\n", - " 0x6e, 0xa4, 0x35, 0x89, 0x47, 0xbb, 0x5e, 0x46, 0x3a, 0xa8, 0x70, 0x4a,\n", - " 0x65, 0xb9, 0x70, 0x96, 0x66, 0xcf, 0x80, 0x79, 0x60, 0xa4, 0x79, 0x70,\n", - " 0x68, 0x92, 0x7f, 0x89, 0x6b, 0x87, 0x77, 0x67, 0x5b, 0x74, 0x3f, 0x9e,\n", - " 0x94, 0x9b, 0xa1, 0x61, 0x4b, 0x66, 0x70, 0xad, 0xb7, 0x67, 0x70, 0x6c,\n", - " 0x3f, 0x5b, 0x94, 0x88, 0xb3, 0x4f, 0x97, 0x97, 0x8c, 0x55, 0xb8, 0x78,\n", - " 0x60, 0x25, 0x51, 0x91, 0xcd, 0x44, 0x6f, 0x85, 0x5c, 0x65, 0x67, 0xa5,\n", - " 0x9e, 0x5f, 0x6d, 0x85, 0x6d, 0x56, 0x80, 0xae, 0x79, 0x63, 0x4f, 0x7d,\n", - " 0x5f, 0x6b, 0x6e, 0xa7, 0x8e, 0x76, 0x8f, 0x90, 0x6e, 0x8c, 0x88, 0x92,\n", - " 0x81, 0x81, 0x96, 0x7d, 0x48, 0x6b, 0x3f, 0xa1, 0x8c, 0xa2, 0x9f, 0x7f,\n", - " 0x77, 0x97, 0x73, 0x9c, 0x67, 0x95, 0xae, 0x77, 0x7f, 0x7a, 0x52, 0x7e,\n", - " 0x91, 0x77, 0xa8, 0x54, 0x6a, 0x74, 0x52, 0x8a, 0x67, 0x8e, 0x90, 0x8d,\n", - " 0x8b, 0x52, 0x72, 0x5a, 0x73, 0x8f, 0x94, 0x87, 0x7c, 0x88, 0x89, 0x76,\n", - " 0x77, 0x88, 0x5c, 0x77, 0x8f, 0x94, 0xac, 0x58, 0x70, 0x79, 0x75, 0x8a,\n", - " 0x20, 0x9c, 0x91, 0x55, 0x55, 0xa4, 0x5b, 0x84, 0x30, 0xc6, 0x8a, 0x51,\n", - " 0x31, 0xc3, 0x72, 0x6b, 0x65, 0xb9, 0x79, 0x7d, 0x62, 0xad, 0x88, 0x75,\n", - " 0x37, 0xb0, 0x76, 0x8a, 0x7d, 0x85, 0x7f, 0xb4, 0x46, 0x9c, 0x83, 0x7b,\n", - " 0x79, 0x78, 0x56, 0xac, 0x8d, 0xa2, 0xa9, 0x54, 0x44, 0x5a, 0x63, 0xb2,\n", - " 0xa8, 0x72, 0xa4, 0x6b, 0x5d, 0x4d, 0x8e, 0x95, 0x9e, 0x4a, 0x98, 0x8c,\n", - " 0xb0, 0x5c, 0xa5, 0x75, 0x83, 0x3b, 0x46, 0x92, 0xa7, 0x3b, 0x6a, 0x75,\n", - " 0x59, 0x57, 0x52, 0xa1, 0xab, 0x54, 0x68, 0x7c, 0x94, 0x6e, 0x5b, 0x9a,\n", - " 0xa3, 0x5d, 0x73, 0x74, 0x5a, 0x63, 0x56, 0x9e, 0xc1, 0x71, 0x82, 0x79,\n", - " 0x49, 0x92, 0x63, 0xa6, 0x99, 0x7d, 0x71, 0x81, 0x5e, 0x90, 0x5c, 0x8b,\n", - " 0x7e, 0xb4, 0xa0, 0x8c, 0x67, 0x93, 0x4e, 0x72, 0x65, 0x83, 0xb5, 0x77,\n", - " 0x83, 0x92, 0x43, 0x67, 0x8c, 0x81, 0xb1, 0x75, 0x6a, 0x61, 0x66, 0x6f,\n", - " 0x5d, 0x7f, 0x8d, 0x7b, 0x6b, 0x68, 0x6f, 0x85, 0x6e, 0x87, 0x97, 0x89,\n", - " 0x9b, 0x81, 0x7e, 0x7e, 0x9d, 0x83, 0x6b, 0x6a, 0xa5, 0x92, 0x7e, 0x70,\n", - " 0x60, 0x8f, 0x6f, 0x8b, 0x15, 0xa6, 0x66, 0x4e, 0x61, 0xbc, 0x38, 0x67,\n", - " 0x46, 0xab, 0x84, 0x5e, 0x3a, 0xac, 0x74, 0x58, 0x76, 0xc4, 0x7a, 0x76,\n", - " 0x67, 0xc0, 0x76, 0x6f, 0x52, 0xa6, 0xa2, 0x97, 0x76, 0xa6, 0x7f, 0x99,\n", - " 0x5d, 0xa5, 0x5f, 0x60, 0x58, 0x88, 0x3f, 0x9e, 0x7d, 0x81, 0x71, 0x63,\n", - " 0x42, 0x55, 0x3e, 0xbd, 0xa9, 0x7a, 0xa5, 0x67, 0x62, 0x7a, 0x80, 0x9e,\n", - " 0xc3, 0x54, 0x7f, 0x9f, 0x93, 0x73, 0xbd, 0x79, 0x74, 0x2e, 0x54, 0x9e,\n", - " 0xaa, 0x76, 0x68, 0x80, 0x78, 0x64, 0x57, 0x93, 0xa4, 0x56, 0x75, 0x72,\n", - " 0x81, 0x7f, 0x48, 0xad, 0x89, 0x67, 0x60, 0x7e, 0x7a, 0x83, 0x6e, 0x95,\n", - " 0xb0, 0x57, 0x89, 0x91, 0x4d, 0x86, 0x78, 0x7b, 0x74, 0x8c, 0x8f, 0x8d,\n", - " 0x67, 0xa4, 0x64, 0x8d, 0x77, 0x9a, 0xa1, 0x88, 0x6e, 0x94, 0x33, 0x95,\n", - " 0x81, 0x76, 0xc6, 0x7d, 0x7d, 0x85, 0x5a, 0x6e, 0x8e, 0x69, 0x9e, 0x71,\n", - " 0x82, 0x81, 0x59, 0x5b, 0x71, 0x9a, 0x91, 0x8e, 0x80, 0x69, 0x71, 0x73,\n", - " 0x6e, 0x9a, 0x95, 0x94, 0x7b, 0x80, 0x82, 0x7e, 0x76, 0x84, 0x70, 0x72,\n", - " 0x9c, 0xa0, 0x77, 0x66, 0x55, 0xa1, 0x8c, 0x73, 0x35, 0xa0, 0x68, 0x4d,\n", - " 0x3b, 0xaa, 0x44, 0x6f, 0x3c, 0xc0, 0x96, 0x78, 0x33, 0xbd, 0x64, 0x5b,\n", - " 0x75, 0xd2, 0x83, 0x87, 0x59, 0xbd, 0x80, 0x80, 0x6e, 0x8e, 0x65, 0x7a,\n", - " 0x87, 0xb6, 0x8d, 0x94, 0x39, 0x95, 0x8b, 0x5d, 0x66, 0x71, 0x4e, 0x9f,\n", - " 0x96, 0x8a, 0x98, 0x47, 0x41, 0x6c, 0x4c, 0xac, 0x95, 0x81, 0x90, 0x75,\n", - " 0x59, 0x4c, 0xa2, 0x93, 0x99, 0x58, 0x7b, 0xaf, 0xa3, 0x52, 0xb0, 0x6c,\n", - " 0x5f, 0x47, 0x6e, 0x8e, 0xae, 0x3d, 0x81, 0x6d, 0x78, 0x52, 0x4f, 0x81,\n", - " 0x80, 0x68, 0x4b, 0x81, 0x74, 0x71, 0x67, 0xa7, 0x9a, 0x55, 0x84, 0x72,\n", - " 0x64, 0x6b, 0x6e, 0x9d, 0xab, 0x76, 0x79, 0x85, 0x40, 0x84, 0x80, 0x85,\n", - " 0x70, 0x91, 0x9a, 0x81, 0x5b, 0x89, 0x6b, 0x8a, 0x92, 0x8c, 0xa4, 0x7b,\n", - " 0x75, 0x89, 0x54, 0x76, 0x69, 0x69, 0xb3, 0x6c, 0x47, 0x7d, 0x4c, 0x7f,\n", - " 0x81, 0x86, 0x8f, 0x63, 0x71, 0x6a, 0x63, 0x67, 0x7c, 0x8f, 0xa0, 0x68,\n", - " 0x86, 0x58, 0x5b, 0x87, 0x6a, 0x82, 0x89, 0x78, 0x9d, 0x8d, 0xaa, 0x82,\n", - " 0x6e, 0xa4, 0x6f, 0x6d, 0x70, 0x9f, 0x7f, 0x77, 0x41, 0xa5, 0x86, 0x61,\n", - " 0x2d, 0x99, 0xa9, 0x5f, 0x5a, 0xb3, 0x51, 0x70, 0x5a, 0xce, 0x77, 0x68,\n", - " 0x2c, 0xb8, 0x90, 0x44, 0x58, 0xb9, 0x74, 0x8e, 0x70, 0xb3, 0x9a, 0x75,\n", - " 0x6d, 0xc0, 0x9e, 0x8e, 0x8d, 0xa8, 0x7b, 0xa8, 0x4a, 0x89, 0x6e, 0x7f,\n", - " 0x5d, 0x6e, 0x46, 0x91, 0x6d, 0x81, 0x89, 0x3e, 0x35, 0x69, 0x44, 0xaf,\n", - " 0x99, 0x8d, 0x94, 0x54, 0x60, 0x5b, 0xaf, 0x97, 0x92, 0x4e, 0x80, 0xae,\n", - " 0x9e, 0x62, 0xa3, 0x77, 0x6e, 0x5d, 0x71, 0xa0, 0xa6, 0x59, 0x84, 0x5d,\n", - " 0x65, 0x4a, 0x69, 0xa1, 0xa1, 0x40, 0x75, 0x65, 0x6b, 0x68, 0x60, 0xb3,\n", - " 0x92, 0x27, 0x70, 0x67, 0x9b, 0x5e, 0x50, 0xaf, 0xae, 0x64, 0x7a, 0x6e,\n", - " 0x61, 0x94, 0x3b, 0x8f, 0x86, 0x7f, 0x98, 0x88, 0x7a, 0x7f, 0x61, 0x7b,\n", - " 0x64, 0x96, 0x96, 0x79, 0x5c, 0x96, 0x52, 0x92, 0x76, 0x7e, 0xc4, 0x60,\n", - " 0x6d, 0x7b, 0x41, 0x8c, 0x7b, 0x8e, 0x9a, 0x66, 0x79, 0x95, 0x67, 0x6a,\n", - " 0x7a, 0x9b, 0xa9, 0x85, 0x6d, 0x66, 0x55, 0x65, 0x76, 0x8b, 0x90, 0x86,\n", - " 0x88, 0x8b, 0x8f, 0x7e, 0x83, 0x7c, 0x75, 0x5f, 0x78, 0x96, 0x76, 0x47,\n", - " 0x54, 0x9c, 0x8d, 0x7d, 0x24, 0x9f, 0x79, 0x5c, 0x55, 0xb2, 0x3b, 0x67,\n", - " 0x4e, 0xd2, 0x90, 0x79, 0x3c, 0xc3, 0x8b, 0x4a, 0x7c, 0xd7, 0x70, 0x75,\n", - " 0x5b, 0xaf, 0xa8, 0x6b, 0x59, 0xc1, 0x6d, 0x5f, 0x5d, 0x96, 0x87, 0x9a,\n", - " 0x5d, 0x7f, 0x8e, 0x6d, 0x5c, 0x75, 0x3f, 0xb6, 0x8e, 0x81, 0x7b, 0x31,\n", - " 0x47, 0x67, 0x56, 0xb6, 0x90, 0x71, 0x89, 0x63, 0x61, 0x75, 0x8d, 0x8b,\n", - " 0x97, 0x62, 0x62, 0x85, 0x9c, 0x64, 0xb7, 0x61, 0x71, 0x3f, 0x6c, 0x8b,\n", - " 0xaa, 0x43, 0x82, 0x70, 0x52, 0x52, 0x80, 0xaa, 0x9e, 0x5d, 0x90, 0x69,\n", - " 0x8a, 0x77, 0x6d, 0x9f, 0x9e, 0x5f, 0x84, 0x61, 0x87, 0x70, 0x43, 0xab,\n", - " 0x97, 0x6e, 0x84, 0x6c, 0x5d, 0x82, 0x64, 0x85, 0x83, 0x7e, 0x82, 0x7c,\n", - " 0x7b, 0x91, 0x55, 0x7e, 0x77, 0x88, 0xba, 0x71, 0x6d, 0x7b, 0x71, 0x8a,\n", - " 0x7f, 0x84, 0xb5, 0x63, 0x4a, 0x9a, 0x3c, 0x70, 0x7a, 0x99, 0xa3, 0x50,\n", - " 0x84, 0x82, 0x56, 0x4c, 0x74, 0x8e, 0xa3, 0x77, 0x8f, 0x4e, 0x5f, 0x6d,\n", - " 0x97, 0x89, 0xa0, 0x6b, 0x7c, 0x8c, 0x85, 0x82, 0x8e, 0xa1, 0x89, 0x5b,\n", - " 0x7f, 0x8b, 0x8f, 0x5e, 0x74, 0x96, 0x8a, 0x7d, 0x15, 0x7b, 0x8f, 0x88,\n", - " 0x5f, 0xa7, 0x63, 0x5b, 0x39, 0xbd, 0x96, 0x56, 0x4c, 0xb4, 0x7b, 0x53,\n", - " 0x5a, 0xaf, 0x79, 0x7b, 0x5c, 0xa6, 0xaa, 0x74, 0x5f, 0xa0, 0x76, 0x9e,\n", - " 0x71, 0x9a, 0x60, 0xa4, 0x33, 0x87, 0x66, 0x66, 0x64, 0x7d, 0x6d, 0xac,\n", - " 0x9e, 0x8c, 0x78, 0x4f, 0x3d, 0x7b, 0x53, 0xb1, 0x97, 0x8a, 0x96, 0x6e,\n", - " 0x60, 0x4b, 0xa9, 0x9e, 0x93, 0x6e, 0x93, 0xb7, 0xae, 0x46, 0xb9, 0x60,\n", - " 0x72, 0x46, 0x80, 0x95, 0xb5, 0x57, 0x82, 0x53, 0x6e, 0x4e, 0x5b, 0xa2,\n", - " 0x9a, 0x3d, 0x8b, 0x6c, 0x84, 0x65, 0x69, 0xa1, 0x8c, 0x60, 0x83, 0x74,\n", - " 0x73, 0x53, 0x5d, 0x7e, 0x7f, 0x79, 0x6e, 0x81, 0x89, 0x8f, 0x51, 0x81,\n", - " 0x99, 0x97, 0x81, 0x8a, 0x87, 0x83, 0x43, 0x90, 0x89, 0x94, 0x93, 0x7a,\n", - " 0x66, 0x80, 0x82, 0x82, 0x79, 0x85, 0xb0, 0x6b, 0x87, 0x7b, 0x53, 0x89,\n", - " 0x79, 0x9d, 0xab, 0x6e, 0x82, 0x84, 0x50, 0x8f, 0x7e, 0x74, 0x90, 0x74,\n", - " 0x6e, 0x65, 0x84, 0x70, 0x82, 0x7a, 0x9e, 0x6d, 0x8f, 0x62, 0xb2, 0x84,\n", - " 0x78, 0x7e, 0x72, 0x5a, 0x7a, 0x85, 0x8c, 0x4b, 0x70, 0x99, 0x87, 0x78,\n", - " 0x26, 0x95, 0xb9, 0x77, 0x4d, 0xb6, 0x51, 0x6a, 0x41, 0xbf, 0x76, 0x68,\n", - " 0x56, 0xb6, 0x80, 0x53, 0x83, 0xaf, 0x87, 0x79, 0x79, 0xb4, 0x89, 0x7d,\n", - " 0x47, 0x9d, 0xa0, 0x86, 0x89, 0xc3, 0x6d, 0x99, 0x41, 0x89, 0x9a, 0x59,\n", - " 0x54, 0x83, 0x79, 0x9d, 0x7b, 0x73, 0x88, 0x4a, 0x42, 0x64, 0x7a, 0x9f,\n", - " 0x7b, 0x6e, 0x71, 0x7b, 0x6a, 0x61, 0xae, 0xa3, 0xa0, 0x68, 0x95, 0x9d,\n", - " 0x94, 0x49, 0x8b, 0x70, 0x8a, 0x5f, 0x49, 0xbb, 0xa7, 0x4a, 0xa1, 0x59,\n", - " 0x59, 0x59, 0x6d, 0xa0, 0x9f, 0x50, 0xa0, 0x7b, 0x75, 0x49, 0x5a, 0x8c,\n", - " 0x84, 0x68, 0x78, 0x57, 0x7a, 0x6e, 0x6b, 0x87, 0x9c, 0x7b, 0x84, 0x83,\n", - " 0x79, 0x7d, 0x5a, 0x77, 0x77, 0x6f, 0x6f, 0x7c, 0x8f, 0x83, 0x40, 0x62,\n", - " 0x6a, 0x87, 0xab, 0x74, 0x86, 0x96, 0x7a, 0x7d, 0x7b, 0x81, 0x9a, 0x65,\n", - " 0x60, 0x82, 0x61, 0x73, 0x71, 0x77, 0xa7, 0x79, 0x87, 0x8c, 0x4e, 0x72,\n", - " 0x8d, 0x89, 0x94, 0x6d, 0x75, 0x6d, 0x6e, 0x82, 0x7a, 0x8d, 0xa9, 0x77,\n", - " 0x77, 0x7c, 0x74, 0xa7, 0xb7, 0x67, 0x75, 0x67, 0x7e, 0x9f, 0x73, 0x60,\n", - " 0x6c, 0x95, 0x7f, 0x62, 0x31, 0x70, 0x85, 0x7a, 0x5f, 0xc0, 0x69, 0x66,\n", - " 0x71, 0xb0, 0x81, 0x5d, 0x48, 0xc9, 0x86, 0x39, 0x93, 0xa4, 0x8e, 0x7c,\n", - " 0x5e, 0xbb, 0x98, 0x5c, 0x74, 0x9c, 0x89, 0x6d, 0x74, 0xbd, 0x8e, 0x6e,\n", - " 0x5f, 0x9a, 0x6d, 0x70, 0x57, 0x9c, 0x58, 0xb7, 0x8e, 0x94, 0xa0, 0x3f,\n", - " 0x39, 0x75, 0x6f, 0xb4, 0xa2, 0x94, 0xa9, 0x70, 0x61, 0x8a, 0x70, 0x92,\n", - " 0xa7, 0x7f, 0x7f, 0x8d, 0x7a, 0x73, 0xa1, 0x5f, 0x8a, 0x4a, 0x65, 0xaa,\n", - " 0x92, 0x6e, 0x98, 0x51, 0x81, 0x47, 0x57, 0xb8, 0x89, 0x50, 0x8a, 0x6d,\n", - " 0x8b, 0x50, 0x8a, 0x86, 0x9b, 0x7d, 0x5b, 0x4a, 0x68, 0x74, 0x53, 0x9b,\n", - " 0x94, 0x74, 0x7c, 0x6f, 0x62, 0x86, 0x5b, 0x8f, 0x82, 0x96, 0x6e, 0x7c,\n", - " 0x80, 0x8f, 0x47, 0x5b, 0x70, 0x95, 0x97, 0x77, 0x8d, 0x8e, 0x69, 0x62,\n", - " 0x78, 0x8f, 0xbf, 0x5e, 0x76, 0xae, 0x4d, 0x84, 0x73, 0x76, 0xab, 0x6f,\n", - " 0x7f, 0x8c, 0x4b, 0x7d, 0x96, 0x7d, 0xb3, 0x55, 0x78, 0x8d, 0x76, 0x73,\n", - " 0x8d, 0x8e, 0x98, 0x6a, 0x91, 0x86, 0x6d, 0x8c, 0x7d, 0x93, 0x97, 0x56,\n", - " 0x79, 0x8f, 0xa3, 0x7f, 0x7e, 0x82, 0xa0, 0x63, 0x3d, 0x6b, 0x88, 0x5e,\n", - " 0x61, 0xc0, 0x45, 0x5f, 0x66, 0xb0, 0x6c, 0x6d, 0x29, 0xd5, 0x95, 0x3b,\n", - " 0x77, 0xaa, 0x62, 0x70, 0x63, 0xce, 0x8c, 0x6e, 0x56, 0xaa, 0x77, 0x6e,\n", - " 0x90, 0xcc, 0x6d, 0x7e, 0x41, 0x9f, 0x88, 0x4f, 0x5d, 0xb4, 0x4c, 0x9b,\n", - " 0x80, 0x97, 0x98, 0x59, 0x4c, 0x71, 0x53, 0xb4, 0x90, 0x97, 0x93, 0x90,\n", - " 0x46, 0x63, 0xa6, 0x87, 0x9d, 0x56, 0x7f, 0xab, 0x8e, 0x68, 0xc6, 0x5d,\n", - " 0x6e, 0x58, 0x4b, 0x85, 0xa1, 0x70, 0x8a, 0x60, 0x84, 0x44, 0x68, 0x8e,\n", - " 0x9b, 0x3a, 0x8c, 0x57, 0x91, 0x4c, 0x6b, 0x9c, 0xa7, 0x64, 0x82, 0x5f,\n", - " 0x68, 0x6d, 0x4d, 0xa1, 0x6c, 0x91, 0x6c, 0x6b, 0x64, 0x97, 0x86, 0x81,\n", - " 0x8d, 0x8e, 0x80, 0x72, 0x88, 0x96, 0x5d, 0x6e, 0x7c, 0x67, 0x97, 0x69,\n", - " 0x95, 0x93, 0x61, 0x8b, 0x9b, 0x7d, 0xc8, 0x6f, 0x85, 0x80, 0x67, 0x68,\n", - " 0x90, 0x6b, 0xcc, 0x7c, 0xa3, 0xa0, 0x58, 0x81, 0x7a, 0x8d, 0x9f, 0x65,\n", - " 0x81, 0x82, 0x78, 0x6b, 0x85, 0x7b, 0x9b, 0x69, 0x86, 0x6c, 0x83, 0x6c,\n", - " 0x8e, 0x59, 0xab, 0x56, 0x7c, 0x7f, 0x7b, 0x84, 0x71, 0x63, 0x7d, 0x73,\n", - " 0x60, 0x8b, 0x7a, 0x7b, 0x5e, 0xbb, 0x4b, 0x40, 0x30, 0xcc, 0x80, 0x65,\n", - " 0x6c, 0xb7, 0x80, 0x35, 0x7d, 0xa3, 0x5c, 0x6c, 0x49, 0xa6, 0x9b, 0x7b,\n", - " 0x53, 0xba, 0x62, 0x76, 0x78, 0xa0, 0x72, 0x80, 0x78, 0x93, 0x87, 0x62,\n", - " 0x64, 0x84, 0x6f, 0xa1, 0x70, 0x90, 0x9a, 0x6b, 0x42, 0x55, 0x6d, 0xc5,\n", - " 0xa6, 0x8a, 0x79, 0x64, 0x4c, 0x72, 0x7b, 0xa9, 0xa3, 0x70, 0x84, 0x8f,\n", - " 0x63, 0x7a, 0x9c, 0x4e, 0x5a, 0x76, 0x91, 0x67, 0xaf, 0x76, 0xbf, 0x46,\n", - " 0x62, 0x3f, 0x7d, 0xa7, 0x8d, 0x62, 0x90, 0x5b, 0x9a, 0x44, 0x51, 0x80,\n", - " 0xa6, 0x7e, 0x8d, 0x6a, 0x73, 0x65, 0x72, 0x82, 0x99, 0xb4, 0x6a, 0x75,\n", - " 0x85, 0x90, 0x47, 0x62, 0x9e, 0x95, 0x94, 0x78, 0x89, 0x74, 0x5d, 0xa3,\n", - " 0x7f, 0x9d, 0x7d, 0x63, 0x96, 0x86, 0x8d, 0xa2, 0x95, 0xab, 0xae, 0x5d,\n", - " 0x93, 0x8d, 0x3d, 0x76, 0x9e, 0x9c, 0xc4, 0x71, 0x7d, 0xa3, 0x75, 0x7e,\n", - " 0x6d, 0x9d, 0xa3, 0x7f, 0x94, 0x89, 0x47, 0x71, 0x8b, 0x95, 0xb1, 0x72,\n", - " 0x90, 0x53, 0x7e, 0x8f, 0x8c, 0x90, 0xa1, 0x4d, 0x59, 0x62, 0x73, 0xa0,\n", - " 0x69, 0x88, 0x86, 0x71, 0x60, 0x3b, 0x81, 0x57, 0x7d, 0x86, 0x58, 0x63,\n", - " 0x7d, 0x98, 0x74, 0x67, 0x5d, 0xb0, 0x67, 0x45, 0x9b, 0xa9, 0x94, 0x68,\n", - " 0x43, 0x8b, 0x85, 0x56, 0x63, 0x96, 0x87, 0x78, 0x88, 0xbf, 0x92, 0x8d,\n", - " 0x60, 0xa8, 0x7e, 0x7e, 0x78, 0x80, 0x66, 0x92, 0x6e, 0x97, 0xab, 0x7f,\n", - " 0x4f, 0x65, 0x59, 0xb0, 0x9b, 0x6b, 0x9f, 0x70, 0x6f, 0x5c, 0xac, 0x95,\n", - " 0xa3, 0x54, 0x8e, 0xa9, 0x9e, 0x8c, 0xa5, 0x66, 0x5f, 0x5b, 0x6c, 0x83,\n", - " 0x90, 0x73, 0x85, 0x64, 0x61, 0x51, 0x4a, 0x63, 0xa1, 0x96, 0x7e, 0x4e,\n", - " 0x87, 0x60, 0x68, 0xb5, 0x9a, 0x8d, 0x75, 0x4e, 0x8a, 0x7a, 0x5f, 0x9f,\n", - " 0x74, 0x80, 0x69, 0x6d, 0x73, 0x92, 0x79, 0x7e, 0x85, 0x68, 0x83, 0x9d,\n", - " 0xb6, 0x9d, 0x6e, 0x8f, 0x78, 0x91, 0xaf, 0x8f, 0xa0, 0x9d, 0x73, 0x55,\n", - " 0x91, 0x8f, 0xb2, 0x76, 0x97, 0xab, 0x63, 0x63, 0x68, 0x7b, 0xab, 0x5c,\n", - " 0x77, 0xae, 0x4c, 0x72, 0x6e, 0x93, 0xb8, 0x51, 0x79, 0x84, 0x7d, 0x6b,\n", - " 0x7f, 0x8a, 0xba, 0x68, 0x7a, 0x43, 0x9a, 0x8d, 0x77, 0x8a, 0x6d, 0x56,\n", - " 0x79, 0x41, 0x7a, 0x4b, 0x81, 0x7a, 0x5c, 0x68, 0x58, 0x36, 0x6f, 0x6f,\n", - " 0x9f, 0xa6, 0x5f, 0x60, 0x4e, 0x67, 0x70, 0x4c, 0x69, 0x69, 0x94, 0x63,\n", - " 0x6d, 0x7b, 0x88, 0x9e, 0x6d, 0x98, 0x69, 0x68, 0x88, 0x80, 0x80, 0x7a,\n", - " 0x8e, 0x78, 0x5e, 0x8d, 0x7e, 0x91, 0x76, 0x64, 0x7e, 0x7f, 0x4e, 0xc9,\n", - " 0x79, 0x8f, 0x9c, 0x82, 0x3d, 0x62, 0x63, 0xc3, 0xb8, 0x7b, 0x72, 0x7b,\n", - " 0x50, 0x56, 0x95, 0x72, 0x8f, 0x6b, 0x90, 0x9d, 0x76, 0xa4, 0xa5, 0x79,\n", - " 0x54, 0x4f, 0x59, 0x85, 0xc5, 0x92, 0x97, 0x4d, 0x6f, 0x69, 0x77, 0x7f,\n", - " 0x71, 0x7c, 0x87, 0x59, 0x98, 0x61, 0x80, 0x81, 0x88, 0x6b, 0x6d, 0x7f,\n", - " 0x7f, 0x77, 0x60, 0xa2, 0x96, 0x73, 0x69, 0x86, 0x83, 0x8d, 0x60, 0x66,\n", - " 0x88, 0x8c, 0x93, 0x67, 0x98, 0x82, 0x7e, 0x91, 0x99, 0x59, 0x8e, 0x6e,\n", - " 0x90, 0xa1, 0x62, 0x8a, 0x98, 0x7b, 0xc8, 0x67, 0x85, 0x8d, 0x6c, 0xa1,\n", - " 0xa1, 0x92, 0xd0, 0x49, 0x85, 0x76, 0x89, 0x75, 0x88, 0x83, 0xa3, 0x77,\n", - " 0x85, 0x68, 0x82, 0x83, 0x7f, 0x79, 0xae, 0x85, 0x76, 0x84, 0x80, 0x9a,\n", - " 0x9d, 0x7b, 0x83, 0x90, 0x79, 0x88, 0x79, 0x9a, 0x93, 0x6c, 0x69, 0x79,\n", - " 0x5f, 0x90, 0x81, 0x7b, 0x87, 0x9d, 0x86, 0x82, 0x7a, 0x77, 0x71, 0x85,\n", - " 0x8b, 0x99, 0x8f, 0x7b, 0x58, 0x98, 0x84, 0x6e, 0x9a, 0xa1, 0x7a, 0x8c,\n", - " 0x77, 0xa8, 0x86, 0x93, 0x7b, 0x90, 0x79, 0x8a, 0x85, 0x8f, 0x84, 0x97,\n", - " 0x73, 0x83, 0x7b, 0x76, 0x8e, 0xa1, 0x89, 0x8a, 0x83, 0x9c, 0x65, 0x68,\n", - " 0x7b, 0x89, 0x92, 0x84, 0x6d, 0x90, 0x61, 0x78, 0x98, 0x8c, 0x8d, 0x87,\n", - " 0xa0, 0x99, 0x79, 0x7b, 0x69, 0xa4, 0x7a, 0x8d, 0x73, 0x71, 0x70, 0x80,\n", - " 0x82, 0x77, 0x81, 0x67, 0x75, 0x97, 0x71, 0x73, 0x85, 0x6d, 0x8e, 0x86,\n", - " 0x6e, 0x80, 0x86, 0x9e, 0x6f, 0x70, 0x67, 0x59, 0x65, 0x89, 0x67, 0x8b,\n", - " 0x7d, 0x68, 0x69, 0x7a, 0x5b, 0x7e, 0x87, 0xa1, 0x92, 0x7b, 0x64, 0x7e,\n", - " 0x76, 0x72, 0x71, 0xab, 0x7c, 0x83, 0x6f, 0xa1, 0x86, 0x76, 0x71, 0x6f,\n", - " 0x91, 0x77, 0x6c, 0x71, 0x92, 0x78, 0x70, 0x7f, 0x6e, 0x65, 0x77, 0x93,\n", - " 0x7e, 0x6c, 0x85, 0x9d, 0x78, 0x8b, 0x7c, 0x5f, 0x94, 0x86, 0x7c, 0x7f,\n", - " 0x83, 0x6e, 0x72, 0x9e, 0x6e, 0x6b, 0x8d, 0x91, 0x97, 0x8b, 0x7b, 0x72,\n", - " 0x86, 0x75, 0x7f, 0x96, 0x7d, 0x81, 0xa1, 0x55, 0xa6, 0x88, 0x96, 0x87,\n", - " 0x93, 0x68, 0x89, 0x72, 0x6f, 0x9c, 0x75, 0x7c, 0x79, 0x6c, 0x74, 0x84,\n", - " 0x7d, 0xa4, 0x86, 0x84, 0x84, 0x8d, 0x63, 0x7a, 0x63, 0xbc, 0x7e, 0x93,\n", - " 0x80, 0x8d, 0x71, 0x7a, 0x5f, 0x8c, 0x74, 0x96, 0x7e, 0x9b, 0x9d, 0x8d,\n", - " 0x5b, 0xa4, 0x71, 0x5e, 0x83, 0x78, 0x86, 0x7f, 0x70, 0x99, 0x87, 0x85,\n", - " 0x8e, 0x81, 0x93, 0x80, 0x89, 0xa0, 0x7a, 0x77, 0x8e, 0x73, 0x5f, 0x80,\n", - " 0x6d, 0x87, 0x5b, 0x7a, 0x85, 0x7c, 0x85, 0x63, 0x61, 0x9d, 0x6f, 0x68,\n", - " 0x77, 0x86, 0x61, 0x6d, 0x84, 0x98, 0x7c, 0x78, 0x69, 0x84, 0x91, 0x6d,\n", - " 0x81, 0xa1, 0x6c, 0x62, 0x95, 0x6d, 0x86, 0x8b, 0x95, 0x8f, 0x5e, 0x86,\n", - " 0x73, 0xa1, 0x83, 0x58, 0x5f, 0x8e, 0x76, 0x79, 0x9e, 0x92, 0x7c, 0x7b,\n", - " 0x81, 0x8b, 0x83, 0x7b, 0x78, 0x75, 0x70, 0x83, 0x70, 0x5a, 0x6a, 0x59,\n", - " 0xa3, 0x82, 0x7a, 0x91, 0x8b, 0x6e, 0x82, 0x8e, 0x70, 0x73, 0x91, 0x76,\n", - " 0xa5, 0x7f, 0x70, 0x81, 0x6f, 0x85, 0x94, 0xa6, 0x8c, 0x50, 0x76, 0x6e,\n", - " 0x64, 0x95, 0xa0, 0x64, 0x6c, 0x68, 0x8e, 0x8b, 0xa1, 0x7d, 0xa0, 0x7f,\n", - " 0x76, 0x8b, 0x7b, 0x93, 0x7b, 0x6e, 0x7e, 0x64, 0x8a, 0xa7, 0x78, 0x64,\n", - " 0x93, 0x67, 0x7d, 0x68, 0x5c, 0xa0, 0x76, 0x98, 0xaf, 0x80, 0x55, 0x96,\n", - " 0x97, 0x9c, 0x78, 0x75, 0x87, 0x85, 0x77, 0x77, 0x62, 0x93, 0x76, 0x68,\n", - " 0xa0, 0x80, 0x81, 0x7f, 0x9a, 0x68, 0x74, 0x69, 0x94, 0x77, 0x77, 0x72,\n", - " 0x90, 0x9a, 0x6f, 0x95, 0x89, 0x6b, 0x6b, 0x94, 0x7e, 0x9c, 0x6f, 0x67,\n", - " 0x8f, 0x82, 0x80, 0x92, 0x76, 0x80, 0x65, 0x9b, 0x6a, 0x7c, 0x75, 0x5a,\n", - " 0x87, 0xa1, 0x69, 0x7a, 0x79, 0x9e, 0x9a, 0x58, 0x81, 0x92, 0x72, 0x67,\n", - " 0x90, 0x80, 0x82, 0x61, 0x9f, 0x9e, 0x6a, 0x8d, 0x8d, 0x8a, 0x73, 0x81,\n", - " 0x68, 0x7f, 0x5b, 0x59, 0x98, 0x89, 0x71, 0x72, 0x58, 0x7b, 0x94, 0x5d,\n", - " 0xa9, 0x8b, 0x72, 0x7b, 0x65, 0x73, 0x5b, 0x8b, 0x7d, 0x86, 0x6e, 0x8c,\n", - " 0x66, 0x6f, 0x6b, 0x8b, 0x71, 0x80, 0x7f, 0x70, 0x70, 0x88, 0x70, 0x7e,\n", - " 0x84, 0x89, 0x7f, 0x81, 0x87, 0x77, 0x71, 0x88, 0x7f, 0x8f, 0x5e, 0x80,\n", - " 0x5d, 0xa1, 0x89, 0x77, 0x93, 0x8e, 0x55, 0x64, 0x88, 0x9a, 0x8b, 0x80,\n", - " 0x77, 0x6f, 0x91, 0x83, 0x6b, 0x9b, 0x85, 0x5c, 0x57, 0x7e, 0xa9, 0x63,\n", - " 0x83, 0xaa, 0x7c, 0xa1, 0x91, 0x5f, 0x68, 0x76, 0x7a, 0x97, 0x96, 0x84,\n", - " 0xca, 0x8d, 0x8c, 0x8b, 0x71, 0x81, 0x88, 0x92, 0xaa, 0x74, 0x49, 0x7a,\n", - " 0x90, 0x93, 0x7a, 0x61, 0x8c, 0x66, 0x71, 0xa0, 0xab, 0x7d, 0x86, 0x6c,\n", - " 0x9f, 0x77, 0x67, 0x6a, 0x89, 0x89, 0x88, 0x70, 0xad, 0x88, 0x69, 0x84,\n", - " 0x70, 0x8f, 0x79, 0x7c, 0x66, 0xa6, 0x71, 0x8d, 0x77, 0x99, 0x69, 0x76,\n", - " 0x79, 0x7d, 0x9c, 0x6f, 0x64, 0x8b, 0x70, 0x82, 0x69, 0xa4, 0x65, 0x6e,\n", - " 0x7f, 0x9e, 0x7e, 0x84, 0x8c, 0x9c, 0x6c, 0x5b, 0x6e, 0xa7, 0x6d, 0x7a,\n", - " 0x92, 0x78, 0x9a, 0x6f, 0x81, 0x91, 0x71, 0x7d, 0x6b, 0x99, 0x6b, 0x92,\n", - " 0x5e, 0x7e, 0x64, 0x95, 0x78, 0x90, 0x6f, 0x68, 0x8a, 0x85, 0x6f, 0x88,\n", - " 0x64, 0x66, 0x7f, 0x78, 0x7c, 0x95, 0x66, 0x6c, 0x76, 0x6a, 0x9b, 0x8f,\n", - " 0x9d, 0x78, 0x86, 0x95, 0x73, 0x66, 0x6d, 0x71, 0x8b, 0x7f, 0x6f, 0x70,\n", - " 0x64, 0x94, 0xa0, 0x83, 0x6b, 0x6d, 0x85, 0x89, 0x68, 0x92, 0x8e, 0x51,\n", - " 0x81, 0x85, 0x86, 0x6e, 0x83, 0x85, 0x8a, 0x5e, 0x68, 0xbf, 0xc4, 0xa5,\n", - " 0x8b, 0x67, 0x86, 0x59, 0x85, 0x9e, 0x96, 0x67, 0x82, 0x7c, 0x6c, 0x80,\n", - " 0x84, 0xae, 0x9d, 0x80, 0xc2, 0x58, 0x5d, 0x95, 0x85, 0x8b, 0x7f, 0x5d,\n", - " 0xc7, 0x75, 0x75, 0x87, 0xa2, 0x8c, 0x62, 0x71, 0x9c, 0x61, 0x7f, 0x9c,\n", - " 0xca, 0x8d, 0x89, 0x6e, 0x7c, 0x71, 0x81, 0x99, 0x95, 0xa4, 0x76, 0x6f,\n", - " 0x64, 0x7b, 0x6c, 0x72, 0x8b, 0x83, 0x70, 0x70, 0x8b, 0xa4, 0x69, 0x76,\n", - " 0x6e, 0x8d, 0x7a, 0x80, 0x8f, 0x9e, 0x73, 0x4b, 0x75, 0x78, 0x77, 0x7b,\n", - " 0x8e, 0x92, 0x88, 0x49, 0x54, 0x9f, 0x7a, 0x7f, 0x68, 0x9f, 0x7f, 0x57,\n", - " 0x6b, 0xad, 0x85, 0x6f, 0x81, 0xa1, 0x96, 0x6f, 0x73, 0x8d, 0x5e, 0x65,\n", - " 0x7a, 0x8c, 0x7c, 0x6a, 0x7e, 0x7a, 0x6a, 0x97, 0x59, 0x86, 0x62, 0x77,\n", - " 0x70, 0x7a, 0x68, 0x62, 0x68, 0x86, 0x7e, 0x76, 0x9a, 0x7f, 0x6c, 0x7e,\n", - " 0x8a, 0x76, 0x65, 0x8f, 0x7d, 0x65, 0x76, 0xa4, 0x95, 0x62, 0x78, 0x97,\n", - " 0x7a, 0x6e, 0x7a, 0x7a, 0x7e, 0x91, 0x8c, 0x8a, 0x91, 0x82, 0x89, 0x6d,\n", - " 0x87, 0x90, 0x69, 0x71, 0x96, 0xa6, 0x7c, 0x7c, 0xa8, 0xa8, 0x62, 0x77,\n", - " 0x76, 0x99, 0xdd, 0x76, 0x8a, 0x5c, 0x86, 0x6a, 0x69, 0x9c, 0xa5, 0x7d,\n", - " 0x78, 0x6a, 0x88, 0x77, 0x77, 0xae, 0x8a, 0x99, 0xcb, 0x85, 0x59, 0x84,\n", - " 0x7b, 0x97, 0x8a, 0x82, 0xc5, 0x65, 0x8c, 0x93, 0xc3, 0x8c, 0x87, 0x64,\n", - " 0x91, 0x41, 0x70, 0xa8, 0xd1, 0x8b, 0x82, 0x71, 0x9c, 0x71, 0x4e, 0x86,\n", - " 0x98, 0x86, 0x7f, 0x7e, 0x69, 0x99, 0x79, 0x78, 0x77, 0xb3, 0x6b, 0x80,\n", - " 0x84, 0x8b, 0x56, 0x73, 0x84, 0x95, 0x82, 0x94, 0x5b, 0x92, 0x83, 0x46,\n", - " 0x66, 0x89, 0x6d, 0x61, 0x99, 0xa6, 0x99, 0x3f, 0x6c, 0xab, 0x5d, 0x5f,\n", - " 0x6c, 0x8e, 0x6b, 0x4a, 0x72, 0xb6, 0x6c, 0x75, 0x78, 0xa6, 0x6f, 0x5b,\n", - " 0x56, 0x8b, 0x57, 0x74, 0x8f, 0xab, 0x53, 0x56, 0x5d, 0x63, 0x63, 0x8b,\n", - " 0x65, 0x78, 0x71, 0x67, 0x7a, 0x62, 0x8d, 0x78, 0x99, 0x76, 0x94, 0x7a,\n", - " 0xa3, 0x70, 0x55, 0x87, 0x7e, 0x7c, 0x57, 0x57, 0x6e, 0x79, 0x94, 0x8f,\n", - " 0x86, 0x80, 0x90, 0x7d, 0x7d, 0x7f, 0x7f, 0x68, 0x41, 0x86, 0x8c, 0x6f,\n", - " 0x8a, 0x7f, 0x87, 0x8a, 0x7e, 0x7f, 0x5d, 0x71, 0x91, 0x81, 0x93, 0x71,\n", - " 0x91, 0xc6, 0x70, 0x4a, 0x74, 0xa8, 0xf3, 0x72, 0xa7, 0x80, 0x7e, 0x41,\n", - " 0x84, 0xa3, 0xb6, 0x94, 0xba, 0x84, 0x70, 0x74, 0x71, 0xac, 0x9f, 0x9d,\n", - " 0xe4, 0x67, 0x6a, 0x87, 0x92, 0x8e, 0x92, 0x82, 0xdb, 0x5e, 0x9b, 0x90,\n", - " 0xd5, 0x87, 0x8d, 0x7c, 0x9c, 0x3c, 0x6c, 0xab, 0xc2, 0x86, 0x83, 0x79,\n", - " 0x6c, 0x61, 0x51, 0xa9, 0x99, 0x79, 0x72, 0x80, 0x6f, 0x85, 0x57, 0x6c,\n", - " 0x81, 0x86, 0x6e, 0x88, 0x87, 0x8d, 0x8e, 0x81, 0x67, 0x88, 0x62, 0x99,\n", - " 0x87, 0xab, 0x8f, 0x57, 0x60, 0x77, 0x64, 0x81, 0x96, 0xa3, 0x81, 0x3d,\n", - " 0x4e, 0xb9, 0x57, 0x6e, 0x99, 0xad, 0x6a, 0x3e, 0x74, 0x96, 0x7e, 0x79,\n", - " 0x65, 0xa4, 0x7c, 0x6a, 0x53, 0x87, 0x56, 0x6f, 0x5e, 0x97, 0x85, 0x42,\n", - " 0x56, 0x6b, 0x67, 0x78, 0x7d, 0xa6, 0x7c, 0x7c, 0x7d, 0x78, 0x7b, 0x84,\n", - " 0x99, 0x7b, 0x89, 0x71, 0x76, 0x8b, 0x76, 0x73, 0x7d, 0x83, 0x56, 0x4f,\n", - " 0x86, 0x72, 0x83, 0x88, 0x6a, 0x93, 0x69, 0x90, 0x6c, 0x73, 0x6f, 0x63,\n", - " 0x55, 0x88, 0x6b, 0x88, 0x7c, 0x86, 0x87, 0x7b, 0x6c, 0x7e, 0x60, 0x57,\n", - " 0xa8, 0x81, 0xa3, 0x72, 0xba, 0xbf, 0x66, 0x65, 0x70, 0xb9, 0xe4, 0x78,\n", - " 0x99, 0x67, 0x8c, 0x72, 0x88, 0x96, 0xb5, 0x72, 0x8a, 0x66, 0x81, 0x39,\n", - " 0x85, 0x93, 0xa0, 0x9c, 0xdf, 0x74, 0x8a, 0x6d, 0x93, 0xa1, 0x8c, 0x7a,\n", - " 0xb5, 0x4b, 0x89, 0xae, 0xba, 0x9c, 0x96, 0x9a, 0xb4, 0x33, 0x5a, 0xb1,\n", - " 0xcd, 0x88, 0x84, 0x63, 0x8c, 0x5e, 0x71, 0x6d, 0xa7, 0x8a, 0x62, 0x85,\n", - " 0x77, 0x75, 0x62, 0x79, 0x96, 0x73, 0x4f, 0x7d, 0x93, 0x8a, 0x88, 0x7e,\n", - " 0x59, 0x6c, 0x7f, 0x87, 0x6f, 0x91, 0x88, 0x59, 0x6d, 0x83, 0x70, 0x7c,\n", - " 0x7f, 0x8d, 0x7f, 0x26, 0x41, 0xcf, 0x6b, 0x6e, 0x75, 0xa3, 0x90, 0x5e,\n", - " 0x3a, 0x94, 0x61, 0x9a, 0x6f, 0x9f, 0x69, 0x7d, 0x55, 0x8c, 0x60, 0x7c,\n", - " 0x93, 0x85, 0x85, 0x4b, 0x54, 0x71, 0x60, 0x8a, 0x6d, 0x8c, 0x9c, 0x7e,\n", - " 0x5b, 0x79, 0x74, 0x7b, 0x7b, 0x9d, 0x5b, 0x65, 0x81, 0x82, 0x66, 0x89,\n", - " 0x82, 0x72, 0x77, 0x78, 0x75, 0x76, 0x6b, 0x74, 0x89, 0x73, 0x6c, 0x6b,\n", - " 0x77, 0x7e, 0x67, 0x84, 0x41, 0x90, 0x58, 0x87, 0x98, 0x60, 0x96, 0x81,\n", - " 0x6b, 0x74, 0x7d, 0x56, 0x72, 0x71, 0x9a, 0x7d, 0xc5, 0xd0, 0x88, 0x6e,\n", - " 0x4d, 0xbe, 0xef, 0x8a, 0xa7, 0x92, 0x82, 0x67, 0x7f, 0x91, 0xc5, 0x7d,\n", - " 0xad, 0x77, 0x6b, 0x4e, 0x8e, 0x99, 0x9b, 0x8e, 0xc7, 0x7f, 0x8a, 0x8e,\n", - " 0x8f, 0x87, 0x9c, 0x75, 0xb0, 0x53, 0x75, 0x97, 0xc7, 0x98, 0xa4, 0xa4,\n", - " 0x80, 0x41, 0x79, 0xc3, 0xdb, 0x86, 0x9d, 0x75, 0x7f, 0x67, 0x7a, 0x96,\n", - " 0xc3, 0x83, 0x54, 0x8e, 0x6f, 0xa8, 0x7c, 0x65, 0x78, 0x7e, 0x59, 0xa3,\n", - " 0x8a, 0x97, 0x8b, 0x82, 0x5e, 0x66, 0x82, 0x9b, 0x9e, 0x9f, 0x70, 0x49,\n", - " 0x55, 0x88, 0x8a, 0x7e, 0x90, 0xa7, 0x6b, 0x3b, 0x28, 0xc0, 0x63, 0x7e,\n", - " 0x60, 0x90, 0x7c, 0x3f, 0x54, 0x9c, 0x7d, 0x8a, 0x6a, 0xa9, 0x6f, 0x61,\n", - " 0x76, 0x86, 0x64, 0x88, 0x72, 0xa5, 0x6b, 0x4d, 0x56, 0x6c, 0x52, 0xa1,\n", - " 0x84, 0x69, 0x69, 0x5b, 0x71, 0x84, 0x76, 0x9b, 0x92, 0x70, 0x86, 0x8b,\n", - " 0x71, 0x68, 0x56, 0x92, 0x76, 0x8f, 0x8f, 0x72, 0x5a, 0x77, 0x6f, 0x92,\n", - " 0x72, 0x72, 0x5e, 0x7a, 0x70, 0x73, 0x60, 0x7d, 0x5a, 0x93, 0x7f, 0x6b,\n", - " 0x89, 0x6b, 0xa1, 0x85, 0x5c, 0x8d, 0x76, 0x7c, 0x6f, 0x73, 0x96, 0x6d,\n", - " 0xbb, 0xad, 0x53, 0x53, 0x5f, 0x9a, 0xe2, 0x8d, 0xa7, 0x6d, 0x8a, 0x5b,\n", - " 0x85, 0x9c, 0xb4, 0x7b, 0xb3, 0x52, 0x75, 0x7f, 0x7a, 0x8c, 0x91, 0x7e,\n", - " 0xca, 0x5f, 0x64, 0x71, 0x85, 0x9a, 0x91, 0x72, 0xbd, 0x6e, 0x9b, 0x81,\n", - " 0x8f, 0xa8, 0xac, 0x7d, 0xb4, 0x5f, 0x45, 0xc5, 0xc8, 0x7a, 0x93, 0x8e,\n", - " 0x7b, 0x41, 0x69, 0x94, 0x8b, 0x76, 0x59, 0x81, 0x73, 0x92, 0x8e, 0x63,\n", - " 0x8e, 0x74, 0x33, 0xa5, 0x9c, 0xa2, 0x88, 0x48, 0x5d, 0x8c, 0x7d, 0xa6,\n", - " 0x68, 0x9a, 0x6f, 0x58, 0x6c, 0x8f, 0x77, 0x65, 0x97, 0x9d, 0x7a, 0x37,\n", - " 0x59, 0xab, 0x6e, 0x8f, 0x7a, 0xae, 0x65, 0x3e, 0x46, 0xa9, 0x82, 0x82,\n", - " 0x9c, 0x9d, 0x62, 0x79, 0x66, 0x7f, 0x5e, 0x88, 0x9e, 0x8f, 0x84, 0x71,\n", - " 0x5d, 0x6d, 0x70, 0xa0, 0x69, 0x92, 0x7f, 0x70, 0x66, 0x6f, 0x75, 0x8c,\n", - " 0x96, 0x7a, 0x85, 0x6a, 0x5a, 0x7c, 0x72, 0x8a, 0x8d, 0x7b, 0x8b, 0x5c,\n", - " 0x76, 0x69, 0x70, 0x7f, 0x74, 0xa1, 0x71, 0x91, 0x5a, 0x8c, 0x6e, 0x83,\n", - " 0x52, 0x78, 0x71, 0x6d, 0xa9, 0x63, 0x9d, 0x81, 0x52, 0x9e, 0x5d, 0x60,\n", - " 0x76, 0x93, 0x97, 0x67, 0xce, 0xc1, 0x75, 0x5e, 0x5f, 0x8c, 0xea, 0x76,\n", - " 0xad, 0x7a, 0x7d, 0x62, 0x85, 0x92, 0xd0, 0x6a, 0xbc, 0x53, 0x55, 0x5c,\n", - " 0x6d, 0x89, 0x9e, 0x71, 0xd2, 0x8b, 0x64, 0x61, 0x85, 0x9a, 0x77, 0x75,\n", - " 0xb9, 0x67, 0x8a, 0xac, 0x90, 0x8a, 0xb4, 0x91, 0xbb, 0x58, 0x94, 0xaf,\n", - " 0xb2, 0x76, 0xa2, 0x71, 0x95, 0x5e, 0x73, 0xa5, 0x92, 0x8c, 0x52, 0x96,\n", - " 0x53, 0x95, 0x84, 0x91, 0x93, 0x7a, 0x40, 0x88, 0xab, 0xa5, 0x63, 0x70,\n", - " 0x66, 0x88, 0x7e, 0x92, 0x89, 0x84, 0x78, 0x57, 0x3d, 0x8d, 0x84, 0x77,\n", - " 0x9b, 0x87, 0x5e, 0x4e, 0x42, 0xa0, 0x76, 0x8a, 0x77, 0x90, 0x83, 0x4c,\n", - " 0x42, 0x9b, 0x75, 0x7a, 0x88, 0x94, 0x98, 0x69, 0x4c, 0xa2, 0x6b, 0x7b,\n", - " 0x6e, 0x9b, 0x5d, 0x5f, 0x53, 0x6a, 0x63, 0x95, 0x69, 0x8a, 0x61, 0x75,\n", - " 0x6c, 0x7a, 0x58, 0x89, 0x84, 0x8f, 0x6b, 0x5a, 0x71, 0x6f, 0x59, 0x89,\n", - " 0x7d, 0x87, 0x5f, 0x77, 0x4b, 0x61, 0x77, 0x92, 0x67, 0x8e, 0x5c, 0x6f,\n", - " 0x5b, 0x77, 0x76, 0x6b, 0x44, 0x9d, 0x9f, 0x7f, 0x8b, 0x94, 0x9e, 0x7c,\n", - " 0x62, 0x94, 0x60, 0x55, 0x77, 0x8f, 0xa6, 0x62, 0xb5, 0xb2, 0x3c, 0x61,\n", - " 0x5c, 0x99, 0xeb, 0x5b, 0x90, 0x6c, 0x7f, 0x5f, 0x75, 0xa6, 0xcf, 0x77,\n", - " 0x98, 0x5d, 0x75, 0x69, 0x7f, 0x8a, 0xa7, 0x73, 0xc8, 0x74, 0x70, 0x82,\n", - " 0x76, 0x8f, 0xa2, 0x7a, 0xa4, 0x7a, 0x66, 0x81, 0x9b, 0x8f, 0x9e, 0x8b,\n", - " 0xa1, 0x51, 0x7b, 0xba, 0xc8, 0x90, 0xab, 0x92, 0x72, 0x57, 0x5b, 0xa3,\n", - " 0xb0, 0x7f, 0x4c, 0x7d, 0x5f, 0x8e, 0x6c, 0x7d, 0x71, 0x7e, 0x4e, 0x87,\n", - " 0xb7, 0x97, 0x7a, 0x4c, 0x5f, 0x72, 0x78, 0x84, 0x82, 0x7e, 0x63, 0x65,\n", - " 0x68, 0x78, 0x73, 0x85, 0x90, 0x99, 0x80, 0x57, 0x42, 0x8b, 0x8a, 0x77,\n", - " 0x71, 0x97, 0x6d, 0x44, 0x41, 0x8f, 0x78, 0x7d, 0x95, 0x81, 0x95, 0x5f,\n", - " 0x64, 0x87, 0x66, 0x80, 0x89, 0x9a, 0x61, 0x4d, 0x68, 0x7b, 0x72, 0x73,\n", - " 0x85, 0x92, 0x77, 0x7d, 0x73, 0x77, 0x54, 0x7a, 0x77, 0x7d, 0x7d, 0x7a,\n", - " 0x6e, 0x8e, 0x4f, 0x7d, 0x80, 0x9a, 0x79, 0x8b, 0x7b, 0x68, 0x6e, 0x86,\n", - " 0x7f, 0x93, 0x7a, 0x76, 0x72, 0x85, 0x6a, 0x7b, 0x57, 0x84, 0x96, 0x9a,\n", - " 0x8f, 0x91, 0x9b, 0x72, 0x73, 0x91, 0x53, 0x66, 0x76, 0x80, 0xae, 0x63,\n", - " 0xbf, 0x99, 0x5e, 0x77, 0x73, 0x9c, 0xd8, 0x74, 0xa7, 0x79, 0x52, 0x64,\n", - " 0x82, 0x95, 0xc7, 0x4f, 0xa8, 0x4f, 0x6d, 0x42, 0x7c, 0x89, 0xab, 0x83,\n", - " 0xc0, 0x82, 0x6a, 0x5f, 0x83, 0x92, 0xa8, 0x76, 0xc1, 0x77, 0x6e, 0x7b,\n", - " 0xa3, 0x9b, 0xaf, 0x87, 0xab, 0x60, 0x8d, 0xc2, 0xd2, 0x83, 0xb2, 0x78,\n", - " 0x8d, 0x39, 0x57, 0x9c, 0x90, 0x8e, 0x6e, 0x6a, 0x74, 0x79, 0x81, 0x6d,\n", - " 0x6f, 0x8e, 0x77, 0x92, 0x93, 0x7d, 0x5f, 0x68, 0x6a, 0x6c, 0x80, 0x8f,\n", - " 0x99, 0x84, 0x4f, 0x64, 0x5c, 0x93, 0x7c, 0x91, 0x98, 0x82, 0x62, 0x3f,\n", - " 0x41, 0x9f, 0x5d, 0x89, 0x98, 0x89, 0x73, 0x50, 0x32, 0xa8, 0xa0, 0x7a,\n", - " 0xa0, 0x95, 0x78, 0x69, 0x74, 0x7c, 0x89, 0x7b, 0x80, 0x65, 0x56, 0x6b,\n", - " 0x69, 0x78, 0x62, 0x87, 0xaf, 0x94, 0x7a, 0x64, 0x53, 0x86, 0x45, 0x99,\n", - " 0x88, 0x79, 0x4d, 0x74, 0x59, 0x91, 0x5f, 0x7b, 0x88, 0x90, 0x80, 0x86,\n", - " 0x7d, 0x7b, 0x64, 0xa3, 0x7f, 0x74, 0x89, 0x80, 0x7d, 0x7c, 0x7a, 0x87,\n", - " 0x5f, 0x8a, 0x5a, 0x72, 0x79, 0x74, 0x8c, 0x7c, 0x86, 0x91, 0x6e, 0x5d,\n", - " 0x61, 0x8e, 0xa2, 0x68, 0xd4, 0x92, 0x67, 0x66, 0x62, 0xa1, 0xf3, 0x63,\n", - " 0x91, 0x81, 0x74, 0x5f, 0x88, 0x98, 0xbb, 0x5a, 0x9b, 0x54, 0x6a, 0x5c,\n", - " 0x75, 0x88, 0xad, 0x7c, 0xb4, 0x7c, 0x69, 0x74, 0x84, 0x76, 0x9d, 0x9a,\n", - " 0xb0, 0x91, 0x5d, 0xa3, 0xa4, 0x7f, 0xbb, 0x80, 0xa4, 0x5d, 0x83, 0xaf,\n", - " 0xb7, 0x66, 0xb0, 0x7f, 0x89, 0x4b, 0x72, 0x9e, 0x99, 0x7c, 0x66, 0x71,\n", - " 0x6a, 0x6f, 0x6d, 0x67, 0x8d, 0x6d, 0x46, 0xa5, 0x9b, 0x84, 0x7a, 0x61,\n", - " 0x64, 0x5c, 0x88, 0x89, 0x95, 0x8c, 0x70, 0x4b, 0x6c, 0x85, 0x83, 0x8b,\n", - " 0x98, 0x87, 0x6a, 0x44, 0x4d, 0x9d, 0x78, 0x71, 0x78, 0x7e, 0x91, 0x5b,\n", - " 0x3f, 0x9f, 0x80, 0x62, 0xa7, 0x95, 0x5d, 0x74, 0x65, 0x9c, 0x6d, 0x7a,\n", - " 0x98, 0x79, 0x80, 0x61, 0x49, 0x82, 0x65, 0x92, 0x80, 0x96, 0x7c, 0x72,\n", - " 0x4f, 0x76, 0x5e, 0x8d, 0x97, 0xa5, 0x72, 0x57, 0x79, 0x87, 0x67, 0x87,\n", - " 0x80, 0x84, 0x7c, 0x6f, 0x66, 0x6b, 0x70, 0x9b, 0x64, 0x90, 0x59, 0x96,\n", - " 0x7a, 0x6f, 0x75, 0x89, 0x4e, 0x8a, 0x62, 0x6e, 0x9c, 0x8c, 0x9a, 0x78,\n", - " 0x8e, 0x91, 0x3d, 0x50, 0x72, 0x92, 0x9f, 0x63, 0xda, 0x92, 0x72, 0x60,\n", - " 0x59, 0xa6, 0xd0, 0x56, 0xc1, 0x6b, 0x5e, 0x76, 0x6e, 0x81, 0xbb, 0x4b,\n", - " 0xbb, 0x59, 0x68, 0x4f, 0x77, 0x87, 0xa1, 0x73, 0xbf, 0x65, 0x56, 0x67,\n", - " 0x77, 0x84, 0x8a, 0x7e, 0xb8, 0x85, 0x66, 0xa6, 0x99, 0xa0, 0xa5, 0x73,\n", - " 0x8d, 0x4a, 0x7d, 0xab, 0xb0, 0x6a, 0x94, 0x84, 0x87, 0x4c, 0x74, 0xa3,\n", - " 0xb3, 0xa9, 0x62, 0x7a, 0x71, 0x7f, 0x53, 0x79, 0x7a, 0x7c, 0x5e, 0x8f,\n", - " 0xa0, 0x90, 0x5c, 0x76, 0x6c, 0x92, 0x70, 0x9c, 0xb3, 0x8b, 0x7e, 0x57,\n", - " 0x5b, 0x9d, 0x96, 0x85, 0x70, 0x93, 0x8b, 0x67, 0x4c, 0x9c, 0x6a, 0x83,\n", - " 0x84, 0x90, 0x8e, 0x60, 0x56, 0xb3, 0x87, 0x7d, 0x86, 0x88, 0x79, 0x5b,\n", - " 0x58, 0x94, 0x92, 0x8e, 0x90, 0x76, 0x58, 0x51, 0x52, 0x63, 0x57, 0x88,\n", - " 0x9b, 0x7a, 0x85, 0x6c, 0x8b, 0x87, 0x5f, 0x8b, 0x90, 0x92, 0x81, 0x64,\n", - " 0x52, 0x8b, 0x77, 0x94, 0x96, 0x98, 0x69, 0x5b, 0x79, 0x87, 0x61, 0x96,\n", - " 0x7b, 0x9a, 0x61, 0x74, 0x7e, 0x8b, 0x82, 0x92, 0x4f, 0x87, 0x7f, 0x80,\n", - " 0x74, 0x97, 0x98, 0x7a, 0x79, 0x97, 0x65, 0x67, 0x66, 0xb1, 0xb1, 0x49,\n", - " 0xd6, 0x97, 0x58, 0x47, 0x62, 0x94, 0xd5, 0x82, 0xa0, 0x60, 0x3f, 0x67,\n", - " 0x6c, 0x9d, 0xb6, 0x58, 0xb1, 0x6e, 0x58, 0x4e, 0x7c, 0x83, 0x8b, 0x83,\n", - " 0xd5, 0x62, 0x8d, 0x84, 0x84, 0x8c, 0xa9, 0x6e, 0xac, 0x7f, 0x6d, 0x88,\n", - " 0xab, 0x8b, 0xb1, 0x77, 0x9b, 0x46, 0x76, 0xa7, 0xb8, 0x7b, 0xc5, 0x6e,\n", - " 0x73, 0x62, 0x68, 0x95, 0xab, 0x7c, 0x6f, 0x74, 0x56, 0x71, 0x61, 0x83,\n", - " 0x8a, 0x73, 0x54, 0x94, 0x86, 0x91, 0x60, 0x69, 0x65, 0x6b, 0x76, 0x85,\n", - " 0xae, 0x87, 0x8f, 0x55, 0x41, 0x98, 0x68, 0x87, 0x5e, 0x7a, 0x80, 0x38,\n", - " 0x50, 0xaf, 0x93, 0x79, 0x57, 0x96, 0x7b, 0x53, 0x4e, 0xc0, 0xa0, 0x85,\n", - " 0x87, 0x95, 0x86, 0x70, 0x4c, 0x9f, 0x77, 0x7d, 0x8b, 0x7a, 0x7b, 0x6d,\n", - " 0x57, 0x74, 0x81, 0x7d, 0xa2, 0x79, 0x64, 0x6c, 0x55, 0x70, 0x3c, 0x88,\n", - " 0x8a, 0x7a, 0x58, 0x72, 0x71, 0x7d, 0x6a, 0x8d, 0x78, 0x7e, 0x95, 0x8b,\n", - " 0x84, 0x7e, 0x73, 0x7c, 0x7e, 0x67, 0x89, 0x8b, 0x6d, 0x68, 0x66, 0x73,\n", - " 0x5a, 0x93, 0x82, 0x85, 0x97, 0x6b, 0x9a, 0x72, 0x51, 0xa2, 0x4f, 0x67,\n", - " 0x67, 0x7e, 0xbb, 0x37, 0xe3, 0x9c, 0x57, 0x5b, 0x6f, 0xa0, 0xdc, 0x5c,\n", - " 0xa6, 0x7c, 0x71, 0x77, 0x72, 0x88, 0xd0, 0x4d, 0x93, 0x58, 0x74, 0x6d,\n", - " 0x8f, 0x77, 0xa3, 0x76, 0xb7, 0x76, 0x6d, 0x6d, 0x6f, 0x7b, 0xaa, 0x6d,\n", - " 0xaa, 0x6a, 0x72, 0x98, 0x8d, 0x98, 0xb0, 0x52, 0x76, 0x5d, 0x61, 0xb7,\n", - " 0xac, 0x90, 0xa5, 0x75, 0x7e, 0x3d, 0x5b, 0x9a, 0xbf, 0x81, 0x83, 0x7b,\n", - " 0x5c, 0x77, 0x74, 0x82, 0x8d, 0x7e, 0x4f, 0x9f, 0x8f, 0x97, 0x7c, 0x75,\n", - " 0x5b, 0x73, 0x97, 0x73, 0x85, 0x7f, 0x70, 0x5a, 0x53, 0x81, 0x81, 0x89,\n", - " 0x73, 0x8d, 0x8a, 0x5c, 0x5f, 0x84, 0x86, 0x6f, 0x76, 0x78, 0x82, 0x6d,\n", - " 0x4f, 0xbb, 0x91, 0x61, 0x7e, 0x97, 0x6c, 0x67, 0x62, 0x83, 0x61, 0x7d,\n", - " 0x89, 0x76, 0x7b, 0x67, 0x56, 0x74, 0x49, 0x7b, 0x6b, 0x8b, 0x89, 0x74,\n", - " 0x5b, 0x7f, 0x78, 0x7b, 0x80, 0x7e, 0x63, 0x71, 0x5e, 0x91, 0x81, 0x92,\n", - " 0x7b, 0x90, 0x9c, 0x7a, 0x73, 0x85, 0x79, 0x9b, 0x66, 0x93, 0x60, 0x87,\n", - " 0x79, 0x69, 0x73, 0x8b, 0x53, 0x8c, 0x8d, 0x68, 0x93, 0xa0, 0x91, 0x65,\n", - " 0x57, 0x8d, 0x71, 0x65, 0x6c, 0x7e, 0xb3, 0x4f, 0xc7, 0xaa, 0x5a, 0x77,\n", - " 0x6e, 0x85, 0xe4, 0x6c, 0xa3, 0x89, 0x69, 0x54, 0x6d, 0x99, 0xb9, 0x77,\n", - " 0xa0, 0x80, 0x85, 0x71, 0x70, 0x78, 0x99, 0x66, 0xaf, 0x8a, 0x59, 0x64,\n", - " 0x54, 0x62, 0xbf, 0x5c, 0xbd, 0x77, 0x7f, 0xab, 0x95, 0x85, 0xaa, 0x6e,\n", - " 0xaa, 0x5a, 0x7b, 0x9f, 0xc3, 0x65, 0x93, 0x64, 0x7c, 0x2d, 0x4e, 0x8f,\n", - " 0xb2, 0x5f, 0x4e, 0x61, 0x64, 0x73, 0x56, 0x75, 0x79, 0x90, 0x5c, 0x81,\n", - " 0x8a, 0x8c, 0x70, 0x64, 0x74, 0x86, 0x86, 0x82, 0xab, 0x7e, 0x62, 0x4f,\n", - " 0x51, 0x89, 0x7b, 0x88, 0x73, 0x97, 0x77, 0x75, 0x5c, 0x9e, 0x97, 0x70,\n", - " 0x5a, 0x98, 0x7a, 0x54, 0x47, 0x99, 0xab, 0x5d, 0x91, 0xa0, 0x64, 0x51,\n", - " 0x57, 0x88, 0x88, 0x85, 0x81, 0x83, 0xa1, 0x89, 0x6a, 0x88, 0x69, 0x81,\n", - " 0x92, 0x63, 0x6a, 0x71, 0x72, 0x6a, 0x75, 0x8e, 0x90, 0x9d, 0x69, 0x60,\n", - " 0x73, 0x95, 0x79, 0x7b, 0x79, 0x7f, 0x77, 0x6e, 0x69, 0x63, 0x60, 0xa0,\n", - " 0x84, 0x91, 0x80, 0x96, 0x92, 0x70, 0x69, 0x7c, 0x3f, 0x90, 0x5c, 0x79,\n", - " 0x82, 0x63, 0x8d, 0x63, 0x56, 0x8a, 0x8e, 0x7a, 0x5c, 0x8d, 0xb8, 0x4e,\n", - " 0xb6, 0x84, 0x57, 0x79, 0x59, 0x79, 0xe8, 0x7e, 0xa8, 0x71, 0x61, 0x62,\n", - " 0x89, 0x71, 0xb7, 0x83, 0x7b, 0x53, 0x86, 0x88, 0x74, 0x71, 0xb1, 0x61,\n", - " 0xae, 0x7e, 0x8f, 0x69, 0x6b, 0x69, 0xb2, 0x6d, 0xb1, 0x7f, 0x5c, 0x9f,\n", - " 0xaa, 0x8c, 0xbd, 0x74, 0xaa, 0x5b, 0x7f, 0xa5, 0xb0, 0x6e, 0xc1, 0x5c,\n", - " 0x94, 0x34, 0x5b, 0xa6, 0xbc, 0x49, 0x75, 0x5b, 0x6e, 0x74, 0x7a, 0x92,\n", - " 0x92, 0x79, 0x78, 0x8a, 0x9e, 0x97, 0x7c, 0x5f, 0x76, 0x86, 0x59, 0x81,\n", - " 0x83, 0x7a, 0x65, 0x5b, 0x42, 0x95, 0x84, 0x99, 0x81, 0x8d, 0x6a, 0x5e,\n", - " 0x59, 0xb7, 0x96, 0x8a, 0x77, 0x86, 0x7a, 0x67, 0x3b, 0xa8, 0xae, 0x7a,\n", - " 0xa0, 0x97, 0x6c, 0x73, 0x5b, 0x9b, 0x77, 0x84, 0x7a, 0x77, 0x75, 0x6f,\n", - " 0x7d, 0x7a, 0x71, 0x86, 0x6c, 0x6f, 0x7d, 0x71, 0x68, 0x60, 0x64, 0x86,\n", - " 0x90, 0x75, 0x6a, 0x61, 0x60, 0x87, 0x68, 0x99, 0x87, 0x7e, 0x92, 0x87,\n", - " 0x87, 0x5f, 0x60, 0x91, 0x68, 0x8c, 0x7b, 0x67, 0x79, 0x5d, 0x67, 0x77,\n", - " 0x47, 0x72, 0x76, 0x88, 0x82, 0xa2, 0x7a, 0x5d, 0x64, 0x87, 0x75, 0x78,\n", - " 0x5e, 0x6f, 0xa4, 0x52, 0xc2, 0x9d, 0x81, 0x89, 0x55, 0x86, 0xc9, 0x6f,\n", - " 0x95, 0x71, 0x9d, 0x87, 0x95, 0x74, 0xac, 0x7f, 0x95, 0x6c, 0x68, 0x66,\n", - " 0x8a, 0x5f, 0x96, 0x69, 0x95, 0x79, 0x7f, 0x71, 0x86, 0x7e, 0x98, 0x71,\n", - " 0xac, 0x8f, 0x75, 0xa5, 0xac, 0x7a, 0xca, 0x63, 0xa0, 0x63, 0x69, 0xbf,\n", - " 0xae, 0x62, 0xc9, 0x46, 0x74, 0x2c, 0x66, 0x96, 0xb7, 0x70, 0x7c, 0x6b,\n", - " 0x7b, 0x90, 0x72, 0x74, 0x8d, 0x5f, 0x63, 0x93, 0x97, 0x78, 0x79, 0x64,\n", - " 0x67, 0x84, 0x64, 0x82, 0x90, 0x83, 0x91, 0x5f, 0x72, 0x93, 0x91, 0xae,\n", - " 0x6d, 0x99, 0x5b, 0x69, 0x54, 0x9f, 0x97, 0x80, 0x80, 0xa4, 0x91, 0x66,\n", - " 0x65, 0xa4, 0xa7, 0x7b, 0x97, 0x87, 0x72, 0x68, 0x6a, 0x96, 0x7b, 0x79,\n", - " 0x69, 0x83, 0x6f, 0x85, 0x6b, 0x92, 0x7f, 0x71, 0x84, 0x87, 0x6a, 0x7b,\n", - " 0x63, 0x72, 0x5f, 0x87, 0x98, 0x7b, 0x96, 0x71, 0x62, 0x90, 0x71, 0xa3,\n", - " 0x8c, 0x77, 0x90, 0x6f, 0x83, 0x76, 0x65, 0x87, 0x72, 0x8a, 0x64, 0x87,\n", - " 0x75, 0x75, 0x6d, 0x84, 0x54, 0x89, 0x88, 0xa0, 0x87, 0x73, 0x7f, 0x6f,\n", - " 0x5f, 0x90, 0x5e, 0x94, 0x5d, 0x61, 0xa6, 0x56, 0xb3, 0x91, 0x95, 0x75,\n", - " 0x4d, 0x74, 0xd9, 0x87, 0x92, 0x74, 0x7f, 0x79, 0x97, 0x6e, 0x90, 0x54,\n", - " 0x84, 0x5d, 0x5f, 0x75, 0x8b, 0x84, 0xa6, 0x75, 0xb4, 0x77, 0x78, 0x85,\n", - " 0x90, 0x76, 0xbd, 0x78, 0xd1, 0xa0, 0x5d, 0x96, 0xa9, 0x7c, 0xc1, 0x61,\n", - " 0xc2, 0x71, 0x8b, 0xa5, 0xa5, 0x5b, 0xc8, 0x50, 0x7b, 0x4b, 0x93, 0x99,\n", - " 0xae, 0x72, 0x67, 0x54, 0x81, 0x89, 0x96, 0x81, 0x6e, 0x68, 0x55, 0x7f,\n", - " 0x93, 0x8c, 0x5e, 0x65, 0x6c, 0x84, 0x7f, 0x8f, 0x9e, 0x7b, 0x73, 0x7f,\n", - " 0x51, 0x63, 0x8a, 0x8b, 0x6b, 0x9b, 0x9d, 0x57, 0x68, 0x89, 0x98, 0x70,\n", - " 0x73, 0xa3, 0x7f, 0x69, 0x44, 0x89, 0xae, 0x68, 0x89, 0x80, 0x7e, 0x6d,\n", - " 0x70, 0x95, 0x85, 0x65, 0x91, 0x7f, 0x66, 0x74, 0x96, 0x72, 0x60, 0x7a,\n", - " 0x87, 0x85, 0x79, 0x54, 0x53, 0x6c, 0x88, 0x87, 0xa9, 0x90, 0x75, 0x8b,\n", - " 0x69, 0x98, 0x7d, 0x95, 0x85, 0x7a, 0x8b, 0x82, 0x87, 0x6f, 0x86, 0x7f,\n", - " 0x74, 0xab, 0x93, 0x6c, 0x8a, 0x78, 0x68, 0x81, 0x62, 0x88, 0x78, 0x91,\n", - " 0x8b, 0x55, 0xa7, 0x58, 0x64, 0x88, 0x71, 0x93, 0x7d, 0x69, 0xbc, 0x58,\n", - " 0xbe, 0x9a, 0x6f, 0x74, 0x6f, 0x7f, 0xeb, 0x9e, 0xb7, 0x60, 0x63, 0x98,\n", - " 0x82, 0x77, 0x94, 0x63, 0x80, 0x6f, 0x7d, 0x8f, 0x8b, 0x85, 0xa5, 0x62,\n", - " 0xad, 0x86, 0x5f, 0x76, 0x88, 0x74, 0xa5, 0x66, 0xa5, 0x94, 0x88, 0x9b,\n", - " 0x87, 0x9e, 0xa8, 0x5a, 0xc9, 0x81, 0x92, 0xcd, 0xb5, 0x67, 0xb9, 0x63,\n", - " 0x86, 0x65, 0x8d, 0xad, 0x98, 0x7c, 0x8a, 0x40, 0x67, 0x65, 0x60, 0x71,\n", - " 0x8e, 0x84, 0x73, 0x64, 0x98, 0x80, 0x73, 0x81, 0x48, 0x75, 0x71, 0x9e,\n", - " 0x73, 0x89, 0x89, 0x68, 0x73, 0xa6, 0x84, 0x8a, 0x7e, 0x9f, 0x78, 0x83,\n", - " 0x60, 0x77, 0xa1, 0x87, 0x76, 0xab, 0x74, 0x57, 0x6d, 0x99, 0xa5, 0x5e,\n", - " 0x9d, 0x91, 0x6d, 0x6a, 0x76, 0x9c, 0x7b, 0x66, 0x96, 0x84, 0x85, 0x6e,\n", - " 0x6c, 0x75, 0x86, 0x6a, 0x71, 0x67, 0x8a, 0x66, 0x66, 0x68, 0x73, 0x90,\n", - " 0x92, 0x68, 0x8f, 0x71, 0x82, 0x7e, 0x71, 0xad, 0x9f, 0x84, 0x9e, 0x7d,\n", - " 0x77, 0x6b, 0x67, 0x8f, 0x73, 0x9a, 0x91, 0x74, 0x8a, 0x74, 0x5a, 0x87,\n", - " 0x37, 0x80, 0x8c, 0x8f, 0x7f, 0x75, 0xa8, 0x49, 0x63, 0x9b, 0x67, 0x68,\n", - " 0x4f, 0x87, 0xbf, 0x59, 0x9c, 0xbe, 0x93, 0x7e, 0x6f, 0x8a, 0xea, 0x77,\n", - " 0x83, 0x7a, 0x75, 0x8e, 0x7d, 0x50, 0x95, 0x60, 0x74, 0x60, 0x6f, 0x97,\n", - " 0x72, 0x5c, 0xa3, 0x6d, 0xb9, 0x86, 0x7b, 0x89, 0x9a, 0x76, 0xc7, 0x56,\n", - " 0xba, 0x86, 0x8d, 0x93, 0xa9, 0x98, 0xbb, 0x6a, 0x97, 0x74, 0x68, 0x84,\n", - " 0xc3, 0x65, 0xb6, 0x68, 0x89, 0x58, 0x87, 0xa1, 0xac, 0x60, 0x65, 0x68,\n", - " 0x7d, 0x98, 0x67, 0x8f, 0x8e, 0x84, 0x50, 0x75, 0x83, 0x91, 0x8a, 0x90,\n", - " 0x66, 0x74, 0x96, 0x89, 0x81, 0x7a, 0x7a, 0x64, 0x7f, 0x73, 0x8f, 0x95,\n", - " 0x8c, 0x89, 0x96, 0x76, 0x7a, 0x6c, 0x89, 0x91, 0x6d, 0x84, 0x68, 0x8d,\n", - " 0x47, 0x94, 0x9a, 0x67, 0x8f, 0x89, 0x8e, 0x79, 0x73, 0xa8, 0x7f, 0x6c,\n", - " 0x80, 0x64, 0x75, 0x81, 0x96, 0x9c, 0x68, 0x65, 0x76, 0x68, 0x74, 0x72,\n", - " 0x68, 0x76, 0x62, 0x6d, 0x6e, 0x6a, 0x84, 0x65, 0x8a, 0x73, 0x76, 0x91,\n", - " 0x78, 0x7c, 0x7a, 0x88, 0x6a, 0x87, 0x60, 0x99, 0x88, 0x75, 0x7b, 0x71,\n", - " 0x81, 0x7b, 0x76, 0x7d, 0x58, 0x75, 0x65, 0xa3, 0x95, 0x7e, 0x96, 0x3e,\n", - " 0x4c, 0x97, 0x86, 0x7a, 0x62, 0x92, 0xd1, 0x72, 0x8e, 0xaa, 0x85, 0x8e,\n", - " 0x59, 0x5f, 0xec, 0x77, 0x96, 0x66, 0x91, 0x9a, 0x89, 0x6c, 0xa2, 0x69,\n", - " 0x7d, 0x6e, 0x76, 0x63, 0x82, 0x72, 0x9c, 0x72, 0xa3, 0x75, 0x85, 0x7b,\n", - " 0x6d, 0x96, 0xc2, 0x69, 0xa7, 0x6a, 0x6b, 0x83, 0xa2, 0x7d, 0xce, 0x5c,\n", - " 0x94, 0x61, 0x7d, 0xae, 0xc3, 0x6d, 0x9f, 0x3c, 0x52, 0x4d, 0x8e, 0x92,\n", - " 0xae, 0x6e, 0x70, 0x5a, 0x76, 0x84, 0x7f, 0x72, 0x92, 0x72, 0x76, 0x5e,\n", - " 0x73, 0x8e, 0x82, 0x6d, 0x72, 0x81, 0x79, 0x94, 0x81, 0x88, 0x8b, 0x81,\n", - " 0x72, 0x72, 0x69, 0x84, 0x59, 0x6e, 0x74, 0x7d, 0x66, 0x74, 0x8d, 0x7b,\n", - " 0x7d, 0x7e, 0x7a, 0x83, 0x4d, 0x7e, 0x6a, 0x5a, 0x87, 0x66, 0x84, 0xa5,\n", - " 0x50, 0x5d, 0x6a, 0x8e, 0x87, 0x74, 0x88, 0x7c, 0x7d, 0x6c, 0x93, 0x98,\n", - " 0x8c, 0x76, 0x7f, 0xa3, 0x6e, 0x5d, 0x7d, 0x9f, 0x7c, 0x7a, 0x98, 0x88,\n", - " 0x74, 0x73, 0x50, 0x8c, 0x78, 0x8b, 0x71, 0x77, 0x9d, 0x56, 0x71, 0x85,\n", - " 0x6b, 0x8a, 0x93, 0x82, 0x8c, 0x79, 0x68, 0x8b, 0x57, 0x7b, 0x7c, 0x8a,\n", - " 0x6c, 0x87, 0x98, 0x54, 0x63, 0x7e, 0x78, 0x6b, 0x63, 0x77, 0xc1, 0x52,\n", - " 0xcd, 0xab, 0x75, 0x8e, 0x64, 0x68, 0xce, 0x68, 0x88, 0x6d, 0x67, 0x6d,\n", - " 0x68, 0x76, 0xa7, 0x78, 0x83, 0x67, 0x65, 0x5b, 0x8f, 0x63, 0x90, 0x5b,\n", - " 0xa1, 0x6f, 0x6a, 0x88, 0x70, 0x5c, 0x78, 0x49, 0xbc, 0x85, 0x8d, 0x8e,\n", - " 0xa3, 0x90, 0x97, 0x84, 0xa2, 0x46, 0x7a, 0x8e, 0x9e, 0xb1, 0xaa, 0x53,\n", - " 0x7d, 0x6b, 0x72, 0x86, 0x8c, 0x67, 0x6b, 0x48, 0x6f, 0x9c, 0x51, 0x94,\n", - " 0x6d, 0x66, 0x8e, 0x90, 0x79, 0x81, 0x66, 0x9f, 0x82, 0x9f, 0x98, 0x97,\n", - " 0x7c, 0x86, 0x7f, 0x57, 0x57, 0x83, 0x97, 0x8f, 0x73, 0x6f, 0x75, 0x6c,\n", - " 0x56, 0x8f, 0x7f, 0x73, 0x71, 0x84, 0x7d, 0x5f, 0x69, 0x69, 0x8e, 0x67,\n", - " 0x8a, 0x7f, 0x8c, 0x5a, 0x7a, 0x67, 0x82, 0x5a, 0x7a, 0x68, 0x73, 0x58,\n", - " 0x84, 0x83, 0x8d, 0x6d, 0x83, 0x72, 0x80, 0x7a, 0x8e, 0x7a, 0x68, 0x88,\n", - " 0x65, 0x74, 0x78, 0x73, 0x83, 0x97, 0x7b, 0x84, 0x77, 0x6d, 0x95, 0x99,\n", - " 0x76, 0x69, 0x5f, 0x9b, 0x7c, 0x75, 0x91, 0x80, 0x7b, 0x73, 0x6f, 0x9f,\n", - " 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,\n", - " 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,\n", - " 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", - " 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", - " 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62,\n", - " 0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0xaa, 0xcc, 0xe2, 0x37, 0x10, 0x00, 0x00, 0x00, 0xd6, 0x01, 0x00, 0x00,\n", - " 0xfd, 0xfd, 0xff, 0xff, 0x53, 0xfe, 0xff, 0xff, 0x74, 0x01, 0x00, 0x00,\n", - " 0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n", - " 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,\n", - " 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00,\n", - " 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,\n", - " 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n", - " 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n", - " 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00,\n", - " 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,\n", - " 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,\n", - " 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,\n", - " 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n", - " 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", - " 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,\n", - " 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00,\n", - " 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,\n", - " 0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n", - " 0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x19, 0x06, 0x00,\n", - " 0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00,\n", - " 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04\n", - "};\n", - "unsigned int g_model_len = 18288;\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"train_micro_speech_model.ipynb","provenance":[{"file_id":"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb","timestamp":1587690382292}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"pO4-CY_TCZZS","colab_type":"text"},"source":["# Train a Simple Audio Recognition Model"]},{"cell_type":"markdown","metadata":{"id":"BaFfr7DHRmGF","colab_type":"text"},"source":["This notebook demonstrates how to train a 20 kB [Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition) model to recognize keywords in speech.\n","\n","The model created in this notebook is used in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) example for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview).\n","\n","\n"," \n"," \n","
\n"," Run in Google Colab\n"," \n"," View source on GitHub\n","
\n"]},{"cell_type":"markdown","metadata":{"id":"XaVtYN4nlCft","colab_type":"text"},"source":["**Training is much faster using GPU acceleration.** Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and set **Hardware accelerator: GPU**. Training 15,000 iterations will take 1.5 - 2 hours on a GPU runtime.\n","\n","## Configure Defaults\n","\n","**MODIFY** the following constants for your specific use case."]},{"cell_type":"code","metadata":{"id":"ludfxbNIaegy","colab_type":"code","outputId":"1667d949-267c-4588-fe25-c0674d1dd074","executionInfo":{"status":"ok","timestamp":1588895159583,"user_tz":420,"elapsed":3711,"user":{"displayName":"Pete Warden","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg9RGhKK9hlUJPY0U8OJIEUEeTc3V08ZIBIs175=s64","userId":"17073007660171926128"}},"colab":{"base_uri":"https://localhost:8080/","height":85}},"source":["# A comma-delimited list of the words you want to train for.\n","# The options are: yes,no,up,down,left,right,on,off,stop,go\n","# All the other words will be used to train an \"unknown\" label and silent\n","# audio data with no spoken words will be used to train a \"silence\" label.\n","WANTED_WORDS = \"yes,no\"\n","\n","# The number of steps and learning rates can be specified as comma-separated\n","# lists to define the rate at each stage. For example,\n","# TRAINING_STEPS=12000,3000 and LEARNING_RATE=0.001,0.0001\n","# will run 12,000 training loops in total, with a rate of 0.001 for the first\n","# 8,000, and 0.0001 for the final 3,000.\n","TRAINING_STEPS = \"12000,3000\"\n","LEARNING_RATE = \"0.001,0.0001\"\n","\n","# Calculate the total number of steps, which is used to identify the checkpoint\n","# file name.\n","TOTAL_STEPS = str(sum(map(lambda string: int(string), TRAINING_STEPS.split(\",\"))))\n","\n","# Print the configuration to confirm it\n","!echo \"Training these words:\" $WANTED_WORDS\n","!echo \"Training steps in each stage:\" $TRAINING_STEPS\n","!echo \"Learning rate in each stage:\" $LEARNING_RATE\n","!echo \"Total number of training steps:\" $TOTAL_STEPS"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Training these words: yes,no\n","Training steps in each stage: 12000,3000\n","Learning rate in each stage: 0.001,0.0001\n","Total number of training steps: 15000\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"gCgeOpvY9pAi","colab_type":"text"},"source":["**DO NOT MODIFY** the following constants as they include filepaths used in this notebook and data that is shared during training and inference."]},{"cell_type":"code","metadata":{"id":"Nd1iM1o2ymvA","colab_type":"code","colab":{}},"source":["# Calculate the percentage of 'silence' and 'unknown' training samples required\n","# to ensure that we have equal number of samples for each label.\n","number_of_labels = WANTED_WORDS.count(',') + 1\n","number_of_total_labels = number_of_labels + 2 # for 'silence' and 'unknown' label\n","equal_percentage_of_training_samples = int(100.0/(number_of_total_labels))\n","SILENT_PERCENTAGE = equal_percentage_of_training_samples\n","UNKNOWN_PERCENTAGE = equal_percentage_of_training_samples\n","\n","# Constants which are shared during training and inference\n","PREPROCESS = 'micro'\n","WINDOW_STRIDE =20\n","MODEL_ARCHITECTURE = 'tiny_conv' # Other options include: single_fc, conv,\n"," # low_latency_conv, low_latency_svdf, tiny_embedding_conv\n","\n","# Constants used during training only\n","VERBOSITY = 'WARN'\n","EVAL_STEP_INTERVAL = '1000'\n","SAVE_STEP_INTERVAL = '1000'\n","\n","# Constants for training directories and filepaths\n","DATASET_DIR = 'dataset/'\n","LOGS_DIR = 'logs/'\n","TRAIN_DIR = 'train/' # for training checkpoints and other files.\n","\n","# Constants for inference directories and filepaths\n","import os\n","MODELS_DIR = 'models'\n","if not os.path.exists(MODELS_DIR):\n"," os.mkdir(MODELS_DIR)\n","MODEL_TF = os.path.join(MODELS_DIR, 'model.pb')\n","MODEL_TFLITE = os.path.join(MODELS_DIR, 'model.tflite')\n","FLOAT_MODEL_TFLITE = os.path.join(MODELS_DIR, 'float_model.tflite')\n","MODEL_TFLITE_MICRO = os.path.join(MODELS_DIR, 'model.cc')\n","SAVED_MODEL = os.path.join(MODELS_DIR, 'saved_model')\n","\n","QUANT_INPUT_MIN = 0.0\n","QUANT_INPUT_MAX = 9.8077\n","QUANT_INPUT_RANGE = QUANT_INPUT_MAX - QUANT_INPUT_MIN"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6rLYpvtg9P4o","colab_type":"text"},"source":["## Setup Environment\n","\n","Install Dependencies"]},{"cell_type":"code","metadata":{"id":"ed_XpUrU5DvY","colab_type":"code","colab":{}},"source":["%tensorflow_version 1.x\n","import tensorflow as tf"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"T9Ty5mR58E4i","colab_type":"text"},"source":["**DELETE** any old data from previous runs\n"]},{"cell_type":"code","metadata":{"id":"APGx0fEh7hFF","colab_type":"code","colab":{}},"source":["!rm -rf {DATASET_DIR} {LOGS_DIR} {TRAIN_DIR} {MODELS_DIR}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GfEUlfFBizio","colab_type":"text"},"source":["Clone the TensorFlow Github Repository, which contains the relevant code required to run this tutorial."]},{"cell_type":"code","metadata":{"id":"yZArmzT85SLq","colab_type":"code","colab":{}},"source":["!git clone -q --depth 1 https://github.com/tensorflow/tensorflow"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nS9swHLSi7Bi","colab_type":"text"},"source":["Load TensorBoard to visualize the accuracy and loss as training proceeds.\n"]},{"cell_type":"code","metadata":{"id":"q4qF1VxP3UE4","colab_type":"code","colab":{}},"source":["%load_ext tensorboard\n","%tensorboard --logdir {LOGS_DIR}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"x1J96Ron-O4R","colab_type":"text"},"source":["## Training\n","\n","The following script downloads the dataset and begin training."]},{"cell_type":"code","metadata":{"id":"VJsEZx6lynbY","colab_type":"code","colab":{}},"source":["!python tensorflow/tensorflow/examples/speech_commands/train.py \\\n","--data_dir={DATASET_DIR} \\\n","--wanted_words={WANTED_WORDS} \\\n","--silence_percentage={SILENT_PERCENTAGE} \\\n","--unknown_percentage={UNKNOWN_PERCENTAGE} \\\n","--preprocess={PREPROCESS} \\\n","--window_stride={WINDOW_STRIDE} \\\n","--model_architecture={MODEL_ARCHITECTURE} \\\n","--how_many_training_steps={TRAINING_STEPS} \\\n","--learning_rate={LEARNING_RATE} \\\n","--train_dir={TRAIN_DIR} \\\n","--summaries_dir={LOGS_DIR} \\\n","--verbosity={VERBOSITY} \\\n","--eval_step_interval={EVAL_STEP_INTERVAL} \\\n","--save_step_interval={SAVE_STEP_INTERVAL}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"XQUJLrdS-ftl","colab_type":"text"},"source":["## Generate a TensorFlow Model for Inference\n","\n","Combine relevant training results (graph, weights, etc) into a single file for inference. This process is known as freezing a model and the resulting model is known as a frozen model/graph, as it cannot be further re-trained after this process."]},{"cell_type":"code","metadata":{"id":"xyc3_eLh9sAg","colab_type":"code","colab":{}},"source":["!rm -rf {SAVED_MODEL}\n","!python tensorflow/tensorflow/examples/speech_commands/freeze.py \\\n","--wanted_words=$WANTED_WORDS \\\n","--window_stride_ms=$WINDOW_STRIDE \\\n","--preprocess=$PREPROCESS \\\n","--model_architecture=$MODEL_ARCHITECTURE \\\n","--start_checkpoint=$TRAIN_DIR$MODEL_ARCHITECTURE'.ckpt-'$TOTAL_STEPS \\\n","--save_format=saved_model \\\n","--output_file={SAVED_MODEL}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"_DBGDxVI-nKG","colab_type":"text"},"source":["## Generate a TensorFlow Lite Model\n","\n","Convert the frozen graph into a TensorFlow Lite model, which is fully quantized for use with embedded devices.\n","\n","The following cell will also print the model size, which will be under 20 kilobytes."]},{"cell_type":"code","metadata":{"id":"RIitkqvGWmre","colab_type":"code","colab":{}},"source":["import sys\n","# We add this path so we can import the speech processing modules.\n","sys.path.append(\"/content/tensorflow/tensorflow/examples/speech_commands/\")\n","import input_data\n","import models\n","import numpy as np"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"kzqECqMxgBh4","colab_type":"code","colab":{}},"source":["SAMPLE_RATE = 16000\n","CLIP_DURATION_MS = 1000\n","WINDOW_SIZE_MS = 30.0\n","FEATURE_BIN_COUNT = 40\n","BACKGROUND_FREQUENCY = 0.8\n","BACKGROUND_VOLUME_RANGE = 0.1\n","TIME_SHIFT_MS = 100.0\n","\n","DATA_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz'\n","VALIDATION_PERCENTAGE = 10\n","TESTING_PERCENTAGE = 10"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"rNQdAplJV1fz","colab_type":"code","colab":{}},"source":["model_settings = models.prepare_model_settings(\n"," len(input_data.prepare_words_list(WANTED_WORDS.split(','))),\n"," SAMPLE_RATE, CLIP_DURATION_MS, WINDOW_SIZE_MS,\n"," WINDOW_STRIDE, FEATURE_BIN_COUNT, PREPROCESS)\n","audio_processor = input_data.AudioProcessor(\n"," DATA_URL, DATASET_DIR,\n"," SILENT_PERCENTAGE, UNKNOWN_PERCENTAGE,\n"," WANTED_WORDS.split(','), VALIDATION_PERCENTAGE,\n"," TESTING_PERCENTAGE, model_settings, LOGS_DIR)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"lBj_AyCh1cC0","colab_type":"code","colab":{}},"source":["with tf.Session() as sess:\n"," float_converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL)\n"," float_tflite_model = float_converter.convert()\n"," float_tflite_model_size = open(FLOAT_MODEL_TFLITE, \"wb\").write(float_tflite_model)\n"," print(\"Float model is %d bytes\" % float_tflite_model_size)\n","\n"," converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL)\n"," converter.optimizations = [tf.lite.Optimize.DEFAULT]\n"," converter.quantized_input_stats = {\"Reshape_1\": (QUANT_INPUT_MIN, QUANT_INPUT_MAX)}\n"," def representative_dataset_gen():\n"," for i in range(100):\n"," data, _ = audio_processor.get_data(1, i*1, model_settings,\n"," BACKGROUND_FREQUENCY, \n"," BACKGROUND_VOLUME_RANGE,\n"," TIME_SHIFT_MS,\n"," 'testing',\n"," sess)\n"," flattened_data = np.array(data.flatten(), dtype=np.float32).reshape(1, 1960)\n"," yield [flattened_data]\n"," converter.representative_dataset = representative_dataset_gen\n"," tflite_model = converter.convert()\n"," tflite_model_size = open(MODEL_TFLITE, \"wb\").write(tflite_model)\n"," print(\"Quantized model is %d bytes\" % tflite_model_size)\n"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"EeLiDZTbLkzv","colab_type":"text"},"source":["# Testing the TensorFlow Lite model's accuracy\n","\n","Verify that the model we've exported is still accurate, using the TF Lite Python API and our test set."]},{"cell_type":"code","metadata":{"id":"wQsEteKRLryJ","colab_type":"code","outputId":"d4a7c3eb-3d74-40e6-9eb5-7d2ffc5e3b6d","executionInfo":{"status":"ok","timestamp":1588901109389,"user_tz":420,"elapsed":9673,"user":{"displayName":"Pete Warden","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg9RGhKK9hlUJPY0U8OJIEUEeTc3V08ZIBIs175=s64","userId":"17073007660171926128"}},"colab":{"base_uri":"https://localhost:8080/","height":51}},"source":["with tf.Session() as sess:\n"," test_data, test_labels = audio_processor.get_data(\n"," -1, 0, model_settings, BACKGROUND_FREQUENCY, BACKGROUND_VOLUME_RANGE,\n"," TIME_SHIFT_MS, 'testing', sess)\n","\n","float_interpreter = tf.lite.Interpreter(FLOAT_MODEL_TFLITE)\n","float_interpreter.allocate_tensors()\n","\n","float_input_index = float_interpreter.get_input_details()[0][\"index\"]\n","\n","float_output_index = float_interpreter.get_output_details()[0][\"index\"]\n","float_model_output = float_interpreter.tensor(float_output_index)\n","\n","float_correct_predictions = 0\n","for i in range(len(test_data)):\n"," current_input = test_data[i]\n"," current_label = test_labels[i]\n"," flattened_input = np.array(current_input.flatten(), dtype=np.float32).reshape(1, 1960)\n"," float_interpreter.set_tensor(float_input_index, flattened_input)\n"," float_interpreter.invoke()\n"," top_prediction = float_model_output()[0].argmax()\n"," if top_prediction == current_label:\n"," float_correct_predictions += 1\n","\n","print('Float accuracy is %f%% (N=%d)' % ((float_correct_predictions * 100) / len(test_data), len(test_data)))\n","\n","interpreter = tf.lite.Interpreter(MODEL_TFLITE)\n","interpreter.allocate_tensors()\n","\n","input_index = interpreter.get_input_details()[0][\"index\"]\n","\n","output_index = interpreter.get_output_details()[0][\"index\"]\n","model_output = interpreter.tensor(output_index)\n","\n","with tf.Session() as sess:\n"," test_data, test_labels = audio_processor.get_data(\n"," -1, 0, model_settings, BACKGROUND_FREQUENCY, BACKGROUND_VOLUME_RANGE,\n"," TIME_SHIFT_MS, 'testing', sess)\n","\n","correct_predictions = 0\n","for i in range(len(test_data)):\n"," current_input = test_data[i]\n"," current_label = test_labels[i]\n"," flattened_input = np.array(current_input.flatten(), dtype=np.float32).reshape(1, 1960)\n"," interpreter.set_tensor(input_index, flattened_input)\n"," interpreter.invoke()\n"," top_prediction = model_output()[0].argmax()\n"," if top_prediction == current_label:\n"," correct_predictions += 1\n","\n","print('Quantized accuracy is %f%% (N=%d)' % ((correct_predictions * 100) / len(test_data), len(test_data)))\n"],"execution_count":15,"outputs":[{"output_type":"stream","text":["Float accuracy is 91.343042% (N=1236)\n","Quantized accuracy is 90.857605% (N=1236)\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"dt6Zqbxu-wIi","colab_type":"text"},"source":["## Generate a TensorFlow Lite for MicroControllers Model\n","Convert the TensorFlow Lite model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers."]},{"cell_type":"code","metadata":{"id":"XohZOTjR8ZyE","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":442},"outputId":"415d733c-86c4-4f19-9aa0-edc4112e6efb","executionInfo":{"status":"ok","timestamp":1588901187730,"user_tz":420,"elapsed":11964,"user":{"displayName":"Pete Warden","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg9RGhKK9hlUJPY0U8OJIEUEeTc3V08ZIBIs175=s64","userId":"17073007660171926128"}}},"source":["# Install xxd if it is not available\n","!apt-get update && apt-get -qq install xxd\n","# Convert to a C source file\n","!xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}\n","# Update variable names\n","REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')\n","!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}"],"execution_count":16,"outputs":[{"output_type":"stream","text":["Get:1 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n","Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n","Get:3 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/ InRelease [3,626 B]\n","Hit:4 http://archive.ubuntu.com/ubuntu bionic InRelease\n","Hit:5 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n","Ign:6 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n","Hit:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release\n","Hit:8 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n","Get:9 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n","Get:10 http://ppa.launchpad.net/marutter/c2d4u3.5/ubuntu bionic InRelease [15.4 kB]\n","Get:11 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [908 kB]\n","Get:12 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [844 kB]\n","Get:13 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]\n","Get:16 http://ppa.launchpad.net/marutter/c2d4u3.5/ubuntu bionic/main Sources [1,814 kB]\n","Get:17 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [1,376 kB]\n","Get:18 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [1,205 kB]\n","Get:19 http://ppa.launchpad.net/marutter/c2d4u3.5/ubuntu bionic/main amd64 Packages [875 kB]\n","Fetched 7,294 kB in 3s (2,429 kB/s)\n","Reading package lists... Done\n","Selecting previously unselected package xxd.\n","(Reading database ... 144429 files and directories currently installed.)\n","Preparing to unpack .../xxd_2%3a8.0.1453-1ubuntu1.3_amd64.deb ...\n","Unpacking xxd (2:8.0.1453-1ubuntu1.3) ...\n","Setting up xxd (2:8.0.1453-1ubuntu1.3) ...\n","Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2pQnN0i_-0L2","colab_type":"text"},"source":["## Deploy to a Microcontroller\n","\n","Follow the instructions in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) README.md for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview) to deploy this model on a specific microcontroller.\n","\n","**Reference Model:** If you have not modified this notebook, you can follow the instructions as is, to deploy the model. Refer to the [`micro_speech/train/models`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/models) directory to access the models generated in this notebook. \n","\n","**New Model:** If you have generated a new model to identify different words: (i) Update `kCategoryCount` and `kCategoryLabels` in [`micro_speech/micro_features/micro_model_settings.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_model_settings.h) and (ii) Update the values assigned to the variables defined in [`micro_speech/micro_features/model.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/model.cc) with values displayed after running the following cell."]},{"cell_type":"code","metadata":{"id":"eoYyh0VU8pca","colab_type":"code","outputId":"dbaba37d-8a8d-4e11-d780-478971d9ee95","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1588901241295,"user_tz":420,"elapsed":1288,"user":{"displayName":"Pete Warden","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg9RGhKK9hlUJPY0U8OJIEUEeTc3V08ZIBIs175=s64","userId":"17073007660171926128"}}},"source":["# Print the C source file\n","!cat {MODEL_TFLITE_MICRO}"],"execution_count":17,"outputs":[{"output_type":"stream","text":["unsigned char g_model[] = {\n"," 0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,\n"," 0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,\n"," 0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n"," 0x64, 0x49, 0x00, 0x00, 0x34, 0x42, 0x00, 0x00, 0x1c, 0x42, 0x00, 0x00,\n"," 0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00,\n"," 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,\n"," 0x13, 0x00, 0x00, 0x00, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x75, 0x6e, 0x74,\n"," 0x69, 0x6d, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x00,\n"," 0x0c, 0x00, 0x00, 0x00, 0xd4, 0x41, 0x00, 0x00, 0xb4, 0x41, 0x00, 0x00,\n"," 0x24, 0x03, 0x00, 0x00, 0xf4, 0x02, 0x00, 0x00, 0xec, 0x02, 0x00, 0x00,\n"," 0xe4, 0x02, 0x00, 0x00, 0xc4, 0x02, 0x00, 0x00, 0xbc, 0x02, 0x00, 0x00,\n"," 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0xee, 0xbc, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n"," 0x05, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00,\n"," 0xd0, 0xb9, 0xff, 0xff, 0xd4, 0xb9, 0xff, 0xff, 0x0a, 0xbd, 0xff, 0xff,\n"," 0x04, 0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0xd4, 0x3e, 0x2e, 0xa6,\n"," 0xd9, 0x4c, 0x23, 0x25, 0xd3, 0x2f, 0x09, 0xcb, 0xf6, 0x04, 0xc4, 0x1d,\n"," 0xe5, 0x46, 0xf2, 0xcf, 0xd5, 0x53, 0x0c, 0x2b, 0x28, 0x06, 0xf8, 0xe9,\n"," 0xe1, 0xdb, 0xdd, 0xf0, 0xbe, 0x0c, 0xfc, 0xa5, 0xb9, 0x1b, 0xca, 0x13,\n"," 0x0d, 0xed, 0x0b, 0xd3, 0xff, 0xc8, 0x0d, 0xee, 0x04, 0xfe, 0xe1, 0x08,\n"," 0xd9, 0xec, 0x26, 0x06, 0x0c, 0xcb, 0x1b, 0xc3, 0xf8, 0x81, 0xd5, 0xbc,\n"," 0xc8, 0x48, 0xe6, 0x46, 0x0e, 0x34, 0x09, 0x0c, 0xea, 0x23, 0xe0, 0x14,\n"," 0x17, 0xf5, 0xe0, 0x07, 0xe2, 0x3a, 0xaa, 0xea, 0x05, 0x5f, 0x26, 0x31,\n"," 0x4e, 0xf6, 0xce, 0xe6, 0x0b, 0xed, 0xa7, 0xea, 0xbe, 0x08, 0xa4, 0x1b,\n"," 0xd0, 0x50, 0x11, 0x2a, 0x16, 0xd3, 0xca, 0x11, 0xeb, 0xd8, 0xcb, 0xeb,\n"," 0xfc, 0xee, 0xa5, 0x12, 0xda, 0x19, 0xfd, 0x1e, 0x1e, 0xc1, 0xc8, 0xe7,\n"," 0xfc, 0x99, 0xae, 0xca, 0xe9, 0x57, 0x19, 0xe8, 0x1e, 0xff, 0xc4, 0xef,\n"," 0xdc, 0x0d, 0x25, 0xef, 0x1c, 0xef, 0x2e, 0xed, 0xf3, 0x39, 0xd6, 0x76,\n"," 0xe5, 0x4b, 0xb2, 0x2d, 0x4a, 0xf0, 0xf5, 0xcb, 0xc7, 0xf4, 0xbe, 0xea,\n"," 0xcb, 0xed, 0xce, 0x0a, 0xa4, 0x69, 0x1a, 0x34, 0x0a, 0xdc, 0xca, 0x37,\n"," 0xd4, 0xdf, 0x34, 0xe6, 0xf1, 0xd2, 0xb9, 0x1d, 0xb1, 0x42, 0xa3, 0x3a,\n"," 0x0f, 0xc0, 0xc3, 0x0a, 0xcf, 0xc4, 0xe7, 0xd2, 0xfa, 0x62, 0x14, 0x18,\n"," 0x49, 0xe1, 0x07, 0xe2, 0xec, 0x29, 0x4c, 0xd0, 0x53, 0xda, 0xdb, 0xe8,\n"," 0xf9, 0x2f, 0x0e, 0xf6, 0x17, 0x2a, 0x23, 0x29, 0x7d, 0xec, 0x04, 0x2b,\n"," 0x27, 0xf8, 0xb2, 0xdc, 0xbf, 0xec, 0xec, 0xb0, 0xe4, 0x62, 0x01, 0x42,\n"," 0x28, 0xe2, 0x13, 0xe7, 0x13, 0xf3, 0xd3, 0xe1, 0xf7, 0xc3, 0xee, 0xf9,\n"," 0xc4, 0x62, 0xfc, 0x58, 0x12, 0xc5, 0x02, 0x19, 0xe3, 0xe1, 0xf0, 0xe8,\n"," 0xc4, 0x5e, 0xf9, 0xf3, 0x31, 0xce, 0xf0, 0xc0, 0xf8, 0x2e, 0x34, 0x37,\n"," 0x7f, 0xc7, 0xa1, 0xdf, 0xf3, 0x31, 0xf8, 0xed, 0x27, 0x11, 0xc9, 0x19,\n"," 0x72, 0xf3, 0x18, 0x1b, 0x2b, 0xe6, 0xef, 0xd8, 0xd1, 0xd4, 0x14, 0xf8,\n"," 0xd5, 0x51, 0x40, 0x42, 0x2d, 0xe5, 0x0b, 0x94, 0x03, 0xf4, 0xde, 0xdf,\n"," 0xf1, 0xc0, 0x08, 0xf9, 0xc4, 0x71, 0xf5, 0x75, 0x20, 0xc8, 0xf9, 0xcb,\n"," 0xe0, 0x0c, 0x81, 0xf5, 0xc2, 0x6f, 0x25, 0xe3, 0x15, 0xca, 0x40, 0xac,\n"," 0xe6, 0x37, 0x60, 0xb4, 0x30, 0xb8, 0x19, 0xdb, 0xf1, 0x22, 0x56, 0xfe,\n"," 0x02, 0xf7, 0xfb, 0x0e, 0x68, 0xe6, 0x5e, 0x81, 0x15, 0xe4, 0xc5, 0xd9,\n"," 0xc3, 0xbd, 0x42, 0xe5, 0xbe, 0x2f, 0xde, 0x3d, 0x04, 0xe3, 0x4a, 0x97,\n"," 0xdb, 0xf6, 0xb1, 0xdf, 0xe5, 0xb2, 0x4b, 0xf2, 0xbc, 0x5e, 0x22, 0x7f,\n"," 0xfd, 0xd7, 0x37, 0xda, 0xd2, 0x1a, 0x22, 0xf8, 0xbf, 0x69, 0x1b, 0x22,\n"," 0x07, 0xcc, 0x11, 0xa3, 0xf8, 0x2c, 0x35, 0xdf, 0x60, 0xc8, 0xc9, 0xd9,\n"," 0xeb, 0x0c, 0x4e, 0x2e, 0x28, 0xe4, 0x44, 0x02, 0x7f, 0xda, 0x62, 0x25,\n"," 0x14, 0xe6, 0xbd, 0xe1, 0xcf, 0x9c, 0x50, 0x17, 0xff, 0x1e, 0xc3, 0x3c,\n"," 0x25, 0xde, 0x4c, 0x14, 0xf7, 0xfc, 0x02, 0xe1, 0xdd, 0xd3, 0x3d, 0xf8,\n"," 0xef, 0x49, 0x0c, 0x7b, 0x0a, 0xff, 0x24, 0x34, 0xfe, 0x2b, 0x14, 0x0b,\n"," 0xb6, 0x4f, 0xc5, 0x23, 0xe6, 0xe2, 0x12, 0x9f, 0xeb, 0x21, 0xc9, 0x45,\n"," 0x35, 0xcc, 0xbf, 0xea, 0x01, 0xf4, 0xe0, 0x15, 0x0e, 0xe8, 0x9d, 0xff,\n"," 0x54, 0xc7, 0xec, 0x27, 0x32, 0xed, 0xe3, 0xef, 0xd6, 0xa7, 0xf5, 0xea,\n"," 0xfa, 0x09, 0xc3, 0x32, 0x1d, 0xfd, 0x05, 0x19, 0x03, 0xf6, 0x05, 0xe9,\n"," 0xed, 0xe6, 0x05, 0x64, 0xf0, 0x35, 0xdc, 0x61, 0x12, 0x1d, 0x20, 0x3c,\n"," 0x0f, 0x33, 0xf8, 0x12, 0xa1, 0x1c, 0x81, 0x1d, 0xdc, 0xe1, 0x0a, 0x99,\n"," 0xd1, 0xf7, 0x9f, 0xc9, 0x1b, 0xd8, 0x32, 0xf2, 0xee, 0xb3, 0xaf, 0x0f,\n"," 0x01, 0xdd, 0x49, 0xf8, 0x7c, 0xa6, 0xbd, 0xac, 0x36, 0xeb, 0x0f, 0x01,\n"," 0xdb, 0xca, 0xb8, 0xb8, 0xf8, 0xf6, 0xf9, 0x27, 0x32, 0xf8, 0xde, 0xef,\n"," 0x19, 0xff, 0xf9, 0xf7, 0xf3, 0xde, 0xc7, 0x93, 0xfb, 0x1e, 0x1d, 0x50,\n"," 0xf3, 0x31, 0xc5, 0x00, 0x18, 0x27, 0xb8, 0x1a, 0x9e, 0xdf, 0xd0, 0x2c,\n"," 0xce, 0xe0, 0xa3, 0xa9, 0x9d, 0xb8, 0xaf, 0x67, 0x13, 0xd3, 0x19, 0xf7,\n"," 0xed, 0x81, 0xb1, 0x3d, 0xe9, 0xd5, 0x00, 0xf4, 0x45, 0x93, 0xcd, 0x62,\n"," 0x1e, 0xd6, 0x3a, 0x08, 0xd9, 0xb9, 0xd2, 0x1e, 0xeb, 0xe9, 0xbb, 0x1e,\n"," 0x1f, 0xf9, 0xe0, 0x20, 0xf6, 0xf2, 0x30, 0xf9, 0xfe, 0xfb, 0xe9, 0x66,\n"," 0xeb, 0xf5, 0x13, 0x40, 0xcf, 0x2d, 0xce, 0x0f, 0xe9, 0x06, 0x9a, 0x0c,\n"," 0x64, 0xbc, 0xff, 0xff, 0x9a, 0xbf, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n"," 0x10, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x31, 0x00, 0x00, 0x00,\n"," 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x84, 0xbc, 0xff, 0xff,\n"," 0x88, 0xbc, 0xff, 0xff, 0xbe, 0xbf, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n"," 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe1, 0xfe, 0xff, 0xff,\n"," 0x78, 0x00, 0x00, 0x00, 0xb1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x43, 0xfd, 0xff, 0xff, 0xa9, 0xff, 0xff, 0xff, 0x97, 0xfc, 0xff, 0xff,\n"," 0xea, 0xbf, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x80, 0x3e, 0x00, 0x00,\n"," 0xf5, 0xf9, 0xff, 0x08, 0xea, 0x05, 0x0f, 0x0c, 0xf3, 0x0e, 0xf6, 0x0f,\n"," 0xfa, 0x01, 0x11, 0xf1, 0xf6, 0xea, 0xfc, 0x0f, 0xfc, 0xf1, 0xdd, 0x0e,\n"," 0x1c, 0xef, 0xe6, 0xff, 0x05, 0xe8, 0x03, 0x11, 0xf6, 0xf1, 0x11, 0x0c,\n"," 0xd7, 0x08, 0xf5, 0x30, 0xd9, 0x10, 0x14, 0x11, 0x10, 0x17, 0xee, 0x23,\n"," 0x0c, 0xeb, 0x00, 0x06, 0xf6, 0xf7, 0x18, 0x0e, 0x18, 0x13, 0xfe, 0xfa,\n"," 0xf3, 0xdd, 0xfa, 0xfb, 0x01, 0xfd, 0xe5, 0xe4, 0x00, 0x0d, 0xfe, 0x09,\n"," 0xe9, 0x0a, 0x10, 0x1d, 0xf8, 0xf4, 0x0a, 0x1a, 0x10, 0x12, 0x18, 0xf1,\n"," 0xfc, 0x1d, 0x00, 0x25, 0xd8, 0x08, 0xf8, 0xff, 0x06, 0x19, 0xf5, 0x0f,\n"," 0x1c, 0x17, 0x0c, 0x16, 0xf3, 0x29, 0x20, 0x32, 0xfe, 0x19, 0xfb, 0x02,\n"," 0x04, 0x15, 0xf3, 0x2b, 0x06, 0x14, 0x0e, 0xde, 0x04, 0x0e, 0xfc, 0x2d,\n"," 0x1b, 0xdb, 0xec, 0xee, 0x00, 0xf6, 0x01, 0x33, 0x02, 0xe7, 0x06, 0xdd,\n"," 0xf9, 0x03, 0x13, 0x03, 0xf8, 0xec, 0x14, 0xe4, 0x0f, 0xfa, 0xd4, 0x22,\n"," 0x00, 0x11, 0x09, 0x02, 0x0e, 0xf4, 0x05, 0xfb, 0x04, 0x15, 0x04, 0x03,\n"," 0xff, 0x0f, 0x09, 0xf2, 0xeb, 0xfc, 0x06, 0x00, 0xe5, 0x0a, 0xf2, 0xfc,\n"," 0xfd, 0x12, 0xee, 0xe9, 0xf2, 0xfd, 0xf9, 0xf3, 0xce, 0x0f, 0xe9, 0xee,\n"," 0xff, 0x14, 0x15, 0x0b, 0xcb, 0x03, 0xf2, 0x1b, 0xdb, 0x09, 0x1d, 0x07,\n"," 0xd8, 0xde, 0xe6, 0x13, 0xd8, 0xf0, 0xe6, 0x00, 0xe7, 0xec, 0xd3, 0x00,\n"," 0xc5, 0x25, 0xdb, 0x0a, 0xde, 0x1f, 0xd9, 0x11, 0xc1, 0x06, 0x01, 0x2e,\n"," 0x09, 0x19, 0x09, 0x0f, 0xbe, 0x00, 0xf7, 0x08, 0x10, 0x12, 0xff, 0x10,\n"," 0xf4, 0x05, 0xdf, 0x16, 0xe7, 0xe6, 0xef, 0xf4, 0xdd, 0x18, 0x18, 0x16,\n"," 0xeb, 0x1a, 0xd7, 0xdb, 0xee, 0x15, 0xf1, 0x1e, 0xfc, 0x02, 0xfe, 0x0a,\n"," 0xed, 0x17, 0x1c, 0x39, 0x01, 0xde, 0x06, 0xf3, 0xdb, 0x27, 0xfc, 0x1e,\n"," 0xe4, 0x01, 0x03, 0x1d, 0xc5, 0x0d, 0xea, 0x0b, 0xfe, 0x05, 0xfc, 0x10,\n"," 0xc2, 0x06, 0x0a, 0x51, 0xf4, 0xd8, 0xe8, 0x03, 0xcd, 0x1a, 0xe7, 0x13,\n"," 0xfb, 0xfd, 0xe2, 0x2a, 0xf7, 0x0d, 0xea, 0x29, 0xfc, 0xea, 0x1c, 0x08,\n"," 0x0a, 0x13, 0xfc, 0xf8, 0x15, 0xf3, 0x06, 0xe9, 0x1d, 0x0c, 0x1c, 0x14,\n"," 0xdc, 0x17, 0x16, 0xff, 0x00, 0x06, 0x0c, 0xfe, 0x0c, 0x0a, 0xe6, 0x18,\n"," 0xef, 0xd6, 0x1d, 0xee, 0xd2, 0x1c, 0xfe, 0x0d, 0xec, 0xfc, 0xe8, 0x02,\n"," 0xf8, 0x13, 0xf9, 0x17, 0x08, 0xf8, 0xf9, 0x06, 0x04, 0x07, 0xcf, 0x07,\n"," 0xfb, 0xde, 0xf2, 0x0c, 0xe4, 0xf2, 0x1d, 0xdd, 0xd7, 0xfd, 0xec, 0xfd,\n"," 0xd8, 0xd9, 0x0a, 0xf5, 0xf4, 0x02, 0x1f, 0x0e, 0xf8, 0x1a, 0xe0, 0x06,\n"," 0x0a, 0x23, 0xf6, 0x1f, 0xea, 0x07, 0xde, 0x00, 0xf5, 0x10, 0xe7, 0x06,\n"," 0xf3, 0xe1, 0x0a, 0x2a, 0xf0, 0x00, 0x18, 0x09, 0xe8, 0xd6, 0xec, 0x00,\n"," 0xef, 0x1c, 0xf2, 0x07, 0xf1, 0xf5, 0x16, 0x13, 0xdf, 0x0f, 0xdd, 0x1b,\n"," 0x10, 0xdb, 0xfb, 0x07, 0xda, 0x17, 0xdf, 0x28, 0xf5, 0xe9, 0x07, 0x0b,\n"," 0x02, 0xf4, 0xf0, 0x0e, 0xda, 0x1e, 0x1d, 0xff, 0xde, 0x0e, 0x1e, 0x24,\n"," 0xf5, 0xfc, 0x08, 0x1f, 0xff, 0x12, 0x09, 0x18, 0x20, 0xd8, 0x08, 0xf0,\n"," 0xef, 0x07, 0x02, 0x19, 0xe8, 0xf3, 0x02, 0x03, 0xdf, 0x22, 0x0e, 0x04,\n"," 0x0d, 0xf9, 0xea, 0x1c, 0xf1, 0x17, 0x08, 0x02, 0x0b, 0x02, 0x00, 0x22,\n"," 0xf0, 0x0e, 0xdf, 0x07, 0xea, 0x01, 0xf3, 0xef, 0xfb, 0xff, 0x07, 0xfd,\n"," 0xf7, 0xf2, 0x14, 0x1e, 0x17, 0xe7, 0x12, 0xf8, 0xee, 0xfc, 0x09, 0xe0,\n"," 0x08, 0xd5, 0x07, 0xff, 0x11, 0xf7, 0xee, 0x14, 0xfd, 0xe0, 0xda, 0x03,\n"," 0xd5, 0xcd, 0x04, 0xe5, 0xea, 0xde, 0xf7, 0x02, 0x0b, 0xfb, 0x03, 0x10,\n"," 0xf7, 0xcf, 0x0c, 0xfb, 0xee, 0x06, 0x0a, 0x12, 0x0e, 0xd7, 0xfb, 0x06,\n"," 0xf6, 0xe0, 0xfb, 0xf1, 0xec, 0xf6, 0x13, 0xf6, 0x0a, 0xea, 0x24, 0x0a,\n"," 0xfd, 0xe6, 0xf8, 0x19, 0x06, 0xe2, 0x05, 0x20, 0x08, 0xe3, 0xd8, 0x05,\n"," 0x00, 0xcd, 0xeb, 0x0f, 0xfd, 0xec, 0xf6, 0xfc, 0xe1, 0xf8, 0xf4, 0xfe,\n"," 0xdf, 0x10, 0xf8, 0x0d, 0xf3, 0xf9, 0x06, 0x06, 0xd5, 0xfb, 0x16, 0x18,\n"," 0x00, 0xfe, 0xf9, 0x17, 0x12, 0xe2, 0xfb, 0xf8, 0xe5, 0x06, 0x29, 0xdf,\n"," 0xfb, 0xfd, 0x08, 0x11, 0xf8, 0x10, 0x13, 0x03, 0xe1, 0xf9, 0xf8, 0xfd,\n"," 0x06, 0xf2, 0x11, 0xff, 0xf8, 0xfe, 0x12, 0xf5, 0xf2, 0xe1, 0x26, 0x0b,\n"," 0xe9, 0xfe, 0x04, 0xf1, 0xeb, 0xfd, 0x0c, 0x26, 0xfd, 0xfb, 0x12, 0xf8,\n"," 0xfd, 0x01, 0x03, 0x05, 0x09, 0x27, 0x28, 0xff, 0x0f, 0x0a, 0xe9, 0xff,\n"," 0x00, 0xec, 0xf7, 0xf4, 0x04, 0x03, 0x08, 0x10, 0xfe, 0xf3, 0x1f, 0xf5,\n"," 0xf0, 0xff, 0x0a, 0x20, 0x0c, 0xd4, 0xef, 0xdb, 0xf5, 0xf4, 0x1a, 0x02,\n"," 0xfe, 0xda, 0x04, 0xe4, 0x0b, 0xd9, 0x1a, 0xee, 0xfd, 0xc6, 0xf8, 0x0d,\n"," 0xec, 0xfe, 0x19, 0xe1, 0x1f, 0xc5, 0x1d, 0x02, 0xf6, 0xd6, 0x04, 0xe6,\n"," 0x06, 0xe4, 0x0c, 0xf0, 0x31, 0xe8, 0xe2, 0xec, 0x1d, 0xe8, 0x0f, 0x02,\n"," 0x2d, 0xe8, 0xf1, 0xf7, 0x0f, 0xf9, 0x13, 0xfd, 0x1f, 0xd8, 0x24, 0x17,\n"," 0xfb, 0xf8, 0x01, 0xe3, 0x14, 0xaf, 0x14, 0x01, 0x1c, 0xe5, 0x10, 0xf2,\n"," 0x16, 0xd3, 0xed, 0xe3, 0x15, 0x02, 0x27, 0xeb, 0x1e, 0x12, 0x19, 0xff,\n"," 0x16, 0xeb, 0x13, 0x11, 0xfa, 0x14, 0xf4, 0x02, 0x11, 0x08, 0xfc, 0xf9,\n"," 0x07, 0xdc, 0x1c, 0xeb, 0x16, 0xf0, 0x1c, 0x06, 0x08, 0xfa, 0xf9, 0x11,\n"," 0xee, 0x07, 0xf3, 0x06, 0xfd, 0xfd, 0x19, 0xf9, 0xf1, 0xe2, 0x1f, 0xf2,\n"," 0x0f, 0xe9, 0x0c, 0xfb, 0x1d, 0x03, 0x02, 0xe2, 0x1c, 0x11, 0xfb, 0xf7,\n"," 0x04, 0x04, 0x18, 0xe7, 0x27, 0xe2, 0xfc, 0xf5, 0x06, 0x00, 0x08, 0xfd,\n"," 0x15, 0xdb, 0x16, 0xfe, 0x04, 0x08, 0xf8, 0xff, 0xfb, 0xeb, 0xeb, 0xfe,\n"," 0xed, 0xf4, 0xf0, 0xe4, 0xfe, 0x22, 0x09, 0x02, 0x21, 0xc8, 0x0b, 0xe4,\n"," 0xf4, 0xf2, 0x04, 0x02, 0xef, 0xce, 0x13, 0x07, 0xfa, 0xe0, 0xff, 0xf1,\n"," 0xfe, 0xd5, 0xfc, 0xdc, 0x0f, 0xf2, 0x05, 0x10, 0x00, 0xd4, 0x24, 0xea,\n"," 0x1e, 0xe3, 0x2a, 0x18, 0xf3, 0xd2, 0x01, 0xe0, 0x0e, 0xdb, 0x2a, 0xeb,\n"," 0x02, 0xdd, 0xec, 0xd7, 0x12, 0xec, 0x31, 0xfc, 0x25, 0xd9, 0x04, 0x08,\n"," 0x15, 0xd0, 0xe8, 0x14, 0x18, 0xf9, 0xfa, 0xf6, 0x24, 0xea, 0x0a, 0x06,\n"," 0x02, 0xfb, 0x05, 0xea, 0x02, 0xf0, 0x04, 0xf1, 0x1f, 0x13, 0x04, 0x17,\n"," 0x14, 0xf0, 0x0d, 0x10, 0x03, 0x05, 0x26, 0xec, 0xfe, 0xe8, 0x19, 0xe9,\n"," 0x0a, 0xee, 0xe4, 0x04, 0x2a, 0xec, 0x1b, 0x06, 0x05, 0xff, 0xd7, 0xf5,\n"," 0x1c, 0x0c, 0x20, 0xfe, 0xe3, 0xe1, 0x11, 0xdc, 0x2b, 0x03, 0x04, 0x1d,\n"," 0x1a, 0xd4, 0x1d, 0xea, 0x06, 0x04, 0x04, 0x1a, 0x1e, 0xef, 0x00, 0xe0,\n"," 0x1e, 0xf8, 0x0c, 0xfe, 0x12, 0xd8, 0x0b, 0xe5, 0xf2, 0x03, 0x21, 0x06,\n"," 0x01, 0x22, 0xef, 0xf3, 0xfb, 0xfb, 0x25, 0x17, 0x08, 0xeb, 0xf3, 0xec,\n"," 0xf4, 0x06, 0x21, 0xec, 0xe3, 0xe3, 0xe4, 0xe5, 0xf9, 0xe8, 0x0d, 0xec,\n"," 0x1c, 0xc3, 0x0b, 0xdf, 0x12, 0x05, 0xe6, 0xdd, 0xde, 0xc5, 0xe6, 0xea,\n"," 0x1a, 0xf1, 0x0f, 0xe3, 0x11, 0xcf, 0xea, 0xe5, 0xfe, 0xf6, 0x02, 0x0b,\n"," 0x0e, 0xd5, 0x03, 0xd6, 0x11, 0x02, 0x2d, 0xfc, 0xed, 0xec, 0xee, 0xfa,\n"," 0xf8, 0xf2, 0x01, 0x0e, 0x19, 0xf1, 0x14, 0x03, 0x1a, 0xf3, 0x0c, 0xf9,\n"," 0xf5, 0xf4, 0xf2, 0xdf, 0xf0, 0xd6, 0x32, 0xf6, 0x18, 0x06, 0xf3, 0x01,\n"," 0x02, 0xe8, 0x09, 0x14, 0xff, 0x0f, 0x23, 0x26, 0x05, 0xf3, 0x08, 0xf3,\n"," 0x16, 0xfb, 0xed, 0x0d, 0x13, 0xe8, 0x25, 0xf1, 0xe9, 0xf2, 0xf5, 0x0c,\n"," 0x19, 0xf0, 0x1f, 0xfa, 0x00, 0xe4, 0xfe, 0x22, 0xf2, 0xd5, 0x14, 0xe9,\n"," 0x06, 0xe9, 0xfe, 0x13, 0x07, 0x08, 0x00, 0xfd, 0x16, 0xdb, 0xe0, 0x12,\n"," 0x07, 0x14, 0x09, 0x1c, 0x17, 0x10, 0x20, 0xd3, 0xfd, 0xe9, 0x25, 0xfb,\n"," 0x19, 0xd8, 0x0b, 0xf9, 0xf3, 0xde, 0xfe, 0x21, 0x12, 0xec, 0xf4, 0xe4,\n"," 0xf7, 0xff, 0x21, 0xef, 0x26, 0x0f, 0xf9, 0xee, 0xe6, 0x03, 0x2f, 0xf7,\n"," 0x0e, 0x10, 0xfa, 0x08, 0x0b, 0xfa, 0xe9, 0xff, 0xf9, 0xdd, 0x01, 0xe3,\n"," 0xfb, 0x01, 0xfc, 0xf4, 0x1a, 0xb9, 0xf6, 0xd5, 0x1b, 0x01, 0xfd, 0xe2,\n"," 0x03, 0xd2, 0x11, 0xf5, 0x10, 0xd9, 0x07, 0x07, 0xe1, 0xc1, 0xff, 0xd4,\n"," 0x10, 0xef, 0x23, 0x10, 0x01, 0xba, 0x09, 0xd1, 0xfd, 0xe3, 0x0d, 0xe3,\n"," 0x00, 0xcf, 0x03, 0xcd, 0xfd, 0xf9, 0xfe, 0xe9, 0x07, 0xe4, 0x04, 0xfc,\n"," 0xf1, 0x00, 0x21, 0x01, 0xf6, 0x01, 0xda, 0x14, 0xe8, 0xd9, 0x14, 0x05,\n"," 0x08, 0x01, 0x26, 0xf8, 0xfb, 0xc1, 0x2c, 0x1a, 0x06, 0xed, 0xef, 0xf5,\n"," 0xf1, 0x00, 0x0e, 0x19, 0x1f, 0x08, 0xff, 0x0c, 0x04, 0xf6, 0x25, 0x17,\n"," 0x1a, 0x0b, 0xeb, 0xe6, 0x0f, 0x10, 0x13, 0x14, 0x12, 0xfa, 0x22, 0xee,\n"," 0xe6, 0x0b, 0x2d, 0xf9, 0x1e, 0xf0, 0x04, 0x09, 0x00, 0x0f, 0x2f, 0x05,\n"," 0xe8, 0xf9, 0x03, 0xd7, 0x02, 0xea, 0x1f, 0xfd, 0x22, 0xed, 0xf1, 0xed,\n"," 0xfe, 0xdc, 0x0d, 0x0e, 0x0c, 0xf0, 0x19, 0xf1, 0x09, 0xe0, 0x2c, 0xfb,\n"," 0x02, 0xdc, 0xf3, 0xd9, 0x32, 0xf7, 0x09, 0xe3, 0x09, 0x17, 0x03, 0xf3,\n"," 0x08, 0x01, 0x1b, 0xfa, 0x06, 0xfa, 0x1f, 0x15, 0x16, 0xe7, 0x16, 0xfe,\n"," 0xfe, 0xf4, 0xe0, 0xe2, 0x12, 0x21, 0xfa, 0x15, 0x00, 0xcb, 0x07, 0xb6,\n"," 0x1b, 0xf2, 0x34, 0xfa, 0xfd, 0xba, 0x19, 0xd4, 0x2c, 0xde, 0xf2, 0x1c,\n"," 0x0c, 0xc5, 0xef, 0xe4, 0x0a, 0xfb, 0x03, 0x03, 0xf2, 0xcd, 0x01, 0xe0,\n"," 0xf2, 0xf6, 0xf5, 0x0a, 0xf6, 0xc5, 0x0d, 0xe2, 0x09, 0xdc, 0x00, 0x05,\n"," 0x10, 0xe1, 0x14, 0xf7, 0x02, 0x08, 0x14, 0x12, 0xf5, 0xf8, 0x1c, 0xe9,\n"," 0xf5, 0xf1, 0x26, 0xd8, 0x16, 0x06, 0x00, 0xf8, 0xf4, 0xe0, 0x32, 0x03,\n"," 0x07, 0x15, 0xea, 0x10, 0xf2, 0xfa, 0x17, 0x1f, 0x07, 0x07, 0x17, 0x06,\n"," 0x06, 0xe7, 0x05, 0xfe, 0xe5, 0x1b, 0x16, 0xff, 0xf8, 0xfe, 0x2c, 0xf8,\n"," 0x00, 0x03, 0xf3, 0xf3, 0xf3, 0xf0, 0xfb, 0xdf, 0x02, 0xe5, 0x16, 0xed,\n"," 0xf9, 0x01, 0x23, 0x03, 0x16, 0xe6, 0xfe, 0xeb, 0x00, 0xf0, 0x27, 0x1b,\n"," 0xeb, 0xee, 0x03, 0xe9, 0x02, 0xd8, 0x2f, 0xe4, 0x0d, 0xde, 0x14, 0xe3,\n"," 0xfd, 0xf6, 0x13, 0x06, 0x10, 0xf4, 0xeb, 0xe5, 0x19, 0xf0, 0x17, 0xea,\n"," 0x15, 0x0d, 0xe4, 0x0b, 0x31, 0xf3, 0x13, 0x1b, 0xf9, 0xe0, 0x0b, 0xfc,\n"," 0x09, 0x03, 0x26, 0xe6, 0xeb, 0xd1, 0xd9, 0xc8, 0x00, 0xf7, 0x26, 0x0a,\n"," 0x08, 0xd4, 0xe3, 0xd6, 0x1b, 0x06, 0x1a, 0xed, 0xf4, 0xee, 0xfd, 0xe7,\n"," 0x14, 0xe1, 0x06, 0x11, 0xf9, 0xaa, 0xf6, 0xd7, 0x0c, 0xdf, 0x25, 0x17,\n"," 0x11, 0xd8, 0xfa, 0x08, 0x0e, 0xed, 0x29, 0x0c, 0xec, 0xeb, 0x0b, 0x02,\n"," 0xf3, 0xfb, 0x19, 0x1c, 0x13, 0x11, 0x10, 0xeb, 0x0d, 0xef, 0x11, 0xff,\n"," 0x14, 0xe4, 0xd9, 0x02, 0xed, 0xe6, 0x23, 0xdf, 0xfb, 0xf4, 0xef, 0xee,\n"," 0xf9, 0xf2, 0x24, 0x04, 0x03, 0x02, 0x0b, 0x0e, 0xed, 0x08, 0x19, 0xf9,\n"," 0xf2, 0x02, 0xf4, 0x02, 0xf0, 0x1b, 0x03, 0x08, 0xf7, 0xe7, 0xf9, 0xf3,\n"," 0xf7, 0x15, 0x11, 0x18, 0x18, 0x0e, 0x13, 0x13, 0x0d, 0x0e, 0x0e, 0x06,\n"," 0xfb, 0xe8, 0x13, 0x09, 0x07, 0xf2, 0x24, 0x0c, 0x22, 0xf8, 0x08, 0xef,\n"," 0xee, 0xec, 0x25, 0x09, 0x17, 0xde, 0xfb, 0xdd, 0x0d, 0xd0, 0x3c, 0x29,\n"," 0x13, 0xf5, 0xeb, 0xeb, 0xfc, 0xd2, 0x33, 0xf9, 0x05, 0xe0, 0x15, 0x04,\n"," 0x08, 0xfd, 0x14, 0x14, 0xfe, 0x0a, 0xee, 0xe7, 0x14, 0xfb, 0x15, 0xef,\n"," 0x07, 0xdf, 0x12, 0x14, 0x00, 0xf0, 0xff, 0x03, 0xf9, 0xe5, 0xf7, 0xcf,\n"," 0x07, 0xeb, 0x0b, 0xd8, 0xf4, 0xce, 0xe1, 0xaf, 0x20, 0x0b, 0xfa, 0x09,\n"," 0xf6, 0xbf, 0x18, 0xe9, 0x06, 0xcc, 0x03, 0xf4, 0x0e, 0xb8, 0x08, 0xd0,\n"," 0x07, 0xe9, 0x10, 0x17, 0x0a, 0xcf, 0x21, 0xf7, 0x03, 0xf9, 0x26, 0xe0,\n"," 0x04, 0xe8, 0x0c, 0xff, 0x0b, 0xfe, 0x16, 0x16, 0xfe, 0xda, 0x17, 0x04,\n"," 0xfd, 0x0b, 0x15, 0x0d, 0xf8, 0x08, 0xf9, 0xf3, 0x00, 0xe8, 0x07, 0x0a,\n"," 0xf4, 0xf9, 0x0e, 0xdc, 0xfb, 0xe3, 0xfe, 0x09, 0xff, 0x07, 0xfa, 0xfd,\n"," 0xe6, 0x05, 0xf9, 0x0e, 0xf2, 0xef, 0xfe, 0xf6, 0x04, 0xee, 0x2d, 0x0e,\n"," 0x04, 0xe7, 0xec, 0xfb, 0xf1, 0x08, 0x17, 0x04, 0xf9, 0xf9, 0x15, 0xff,\n"," 0x00, 0xfc, 0x23, 0xf6, 0x00, 0x1a, 0xf4, 0x1c, 0x02, 0x04, 0x1e, 0x11,\n"," 0x00, 0xee, 0xf3, 0xe6, 0xed, 0xfa, 0x24, 0xe0, 0xfb, 0xe7, 0x10, 0xd7,\n"," 0xdc, 0xf5, 0x4c, 0xf3, 0x19, 0x01, 0xf9, 0xef, 0x00, 0xee, 0x13, 0xeb,\n"," 0xf9, 0xd7, 0x0b, 0xf1, 0xef, 0x05, 0x45, 0xf7, 0x01, 0x0b, 0xf3, 0xfa,\n"," 0x0d, 0x10, 0x18, 0x1c, 0xf5, 0xf5, 0x0a, 0xef, 0x0c, 0x19, 0x06, 0xf8,\n"," 0x06, 0xf1, 0x29, 0xd0, 0x0c, 0x07, 0x17, 0xf7, 0x18, 0xb0, 0x26, 0xcf,\n"," 0x16, 0x01, 0x03, 0xf4, 0xf0, 0xc8, 0x04, 0xe8, 0x1a, 0xf4, 0x0f, 0xeb,\n"," 0x0e, 0xb6, 0x00, 0xd3, 0x04, 0xf8, 0x26, 0xf8, 0x1a, 0xa8, 0xf9, 0xcb,\n"," 0x04, 0xeb, 0x22, 0x0a, 0x0d, 0xcd, 0xeb, 0xea, 0x03, 0xe2, 0x09, 0xed,\n"," 0x0b, 0xe3, 0x09, 0xf1, 0xf1, 0xec, 0x21, 0xee, 0x0e, 0xf4, 0x1c, 0x04,\n"," 0xee, 0xfb, 0x0d, 0x1a, 0xfc, 0xf4, 0xfe, 0xef, 0x06, 0xe0, 0x13, 0x0e,\n"," 0xfd, 0x05, 0x0b, 0x1d, 0xfd, 0xf6, 0x09, 0x1b, 0x04, 0x27, 0xf5, 0x0e,\n"," 0xf0, 0xed, 0x1e, 0xf7, 0xea, 0xfa, 0x1a, 0xf9, 0xe5, 0x07, 0x15, 0x0e,\n"," 0x00, 0xea, 0xfa, 0xe9, 0xf7, 0xec, 0x31, 0xec, 0x04, 0x09, 0x10, 0xec,\n"," 0xfd, 0xe4, 0x27, 0x00, 0x0c, 0xdc, 0xdc, 0xde, 0xed, 0xe9, 0x1f, 0xe4,\n"," 0xfa, 0x02, 0xd9, 0xfe, 0x06, 0xf1, 0x15, 0xee, 0xf1, 0xf3, 0x14, 0xe2,\n"," 0x00, 0xdb, 0x28, 0x17, 0x09, 0xdc, 0xfe, 0xea, 0xfc, 0x14, 0x20, 0x13,\n"," 0xf9, 0xed, 0xf1, 0xe8, 0xfd, 0x04, 0x3a, 0xfd, 0x00, 0x15, 0xf1, 0xee,\n"," 0x10, 0xe3, 0x0b, 0x20, 0x10, 0xeb, 0x10, 0xc3, 0x14, 0xf8, 0x03, 0x0b,\n"," 0x11, 0xc3, 0x27, 0xc5, 0x2d, 0xdb, 0x15, 0x0e, 0xf5, 0xce, 0xfa, 0xd8,\n"," 0x1c, 0xf0, 0x20, 0x04, 0xec, 0xc4, 0xf9, 0xda, 0x1c, 0xd9, 0x01, 0x05,\n"," 0x1f, 0xbb, 0xf8, 0xff, 0xef, 0x06, 0x10, 0xe3, 0x02, 0xe6, 0xdb, 0xee,\n"," 0x02, 0xfe, 0xfc, 0x15, 0xfe, 0xf0, 0xdb, 0xfb, 0xf5, 0xfc, 0x16, 0x02,\n"," 0xed, 0x01, 0x12, 0xe2, 0x06, 0xeb, 0x10, 0x16, 0x03, 0xed, 0x1a, 0x07,\n"," 0xf0, 0xe4, 0x29, 0xf5, 0xfa, 0xe1, 0x07, 0xe8, 0xf8, 0xfd, 0xf5, 0x03,\n"," 0xfc, 0x18, 0x03, 0xe2, 0x00, 0xf7, 0x13, 0xf9, 0xe4, 0x10, 0x25, 0xfc,\n"," 0x0e, 0x1f, 0x1c, 0x12, 0x1e, 0xfd, 0x01, 0xf9, 0xef, 0x1d, 0x17, 0x1b,\n"," 0x04, 0xfd, 0x25, 0x12, 0xf5, 0x20, 0x0a, 0x02, 0x03, 0xff, 0xe6, 0xe5,\n"," 0xf4, 0x05, 0x42, 0x1a, 0x0b, 0xdc, 0xfd, 0xed, 0xf3, 0xd0, 0x43, 0xf3,\n"," 0x10, 0x09, 0x0a, 0xed, 0xff, 0xe2, 0x1b, 0x1d, 0x08, 0xe4, 0xfe, 0xf7,\n"," 0xff, 0xf9, 0x2e, 0xfa, 0xf8, 0xe7, 0xe7, 0xeb, 0xfd, 0xfe, 0x30, 0x06,\n"," 0x00, 0x1d, 0x12, 0xf4, 0x0d, 0xf4, 0x1c, 0xed, 0x01, 0xd2, 0x17, 0xb3,\n"," 0x0c, 0x0c, 0xf4, 0x1e, 0x26, 0xd8, 0xf7, 0xbd, 0x24, 0xe7, 0x11, 0x12,\n"," 0xf9, 0xb9, 0xf6, 0xde, 0x3c, 0xf7, 0xfe, 0x0c, 0x16, 0xc5, 0x14, 0xcd,\n"," 0x24, 0x06, 0xfa, 0x21, 0x03, 0xcb, 0xf7, 0xf0, 0xfc, 0xff, 0xfe, 0xf8,\n"," 0x0a, 0xed, 0xdf, 0xe4, 0x0f, 0x19, 0x10, 0x0f, 0xf9, 0xf9, 0x11, 0xf3,\n"," 0xf1, 0xf1, 0x33, 0xdc, 0x02, 0xd6, 0xde, 0xe0, 0xf9, 0xec, 0xfe, 0x09,\n"," 0xfc, 0xd4, 0xeb, 0x0b, 0xec, 0xe3, 0x10, 0x0e, 0x0d, 0x13, 0x00, 0xe6,\n"," 0xf2, 0xf2, 0x12, 0xec, 0x05, 0xf7, 0xff, 0x03, 0x02, 0x0f, 0x0c, 0x00,\n"," 0xf3, 0xfc, 0x02, 0xd9, 0xf0, 0x02, 0xef, 0xfa, 0x06, 0xda, 0x0a, 0xe4,\n"," 0xf6, 0x10, 0x14, 0x03, 0x12, 0xe6, 0x25, 0x09, 0x06, 0xf1, 0x26, 0x04,\n"," 0xfa, 0xe1, 0xdd, 0xfa, 0xef, 0x06, 0x11, 0xfd, 0xf9, 0xf8, 0xfd, 0xe8,\n"," 0xf8, 0x0b, 0x24, 0x22, 0xf9, 0xd1, 0x1a, 0xfe, 0xf0, 0xed, 0x3c, 0xfd,\n"," 0xf6, 0xfc, 0xe4, 0xf6, 0xf1, 0x05, 0x25, 0xf9, 0xee, 0x1b, 0x0d, 0xe2,\n"," 0xf8, 0xff, 0x2b, 0x16, 0xf6, 0xf4, 0x27, 0xe0, 0x02, 0x05, 0x0a, 0x11,\n"," 0x1d, 0xd1, 0xfb, 0xcb, 0x17, 0xf3, 0x23, 0xf9, 0x17, 0xb7, 0xec, 0x9f,\n"," 0x1d, 0xf2, 0x0f, 0x27, 0x10, 0xc5, 0xfa, 0xdf, 0x21, 0xe7, 0x0e, 0x01,\n"," 0x06, 0xb1, 0x02, 0xe2, 0x0e, 0xf8, 0x07, 0x04, 0x1a, 0xc1, 0x04, 0xed,\n"," 0xfe, 0xf6, 0x0c, 0x1c, 0x1d, 0xe1, 0xe5, 0xed, 0x03, 0xd7, 0xfb, 0x28,\n"," 0x00, 0xdf, 0xe9, 0xcd, 0xef, 0x04, 0x20, 0xe9, 0x10, 0xde, 0x00, 0xee,\n"," 0xf3, 0xd0, 0x02, 0x09, 0x0b, 0x0e, 0xee, 0xf8, 0xea, 0xf3, 0x31, 0x0d,\n"," 0xf7, 0x1e, 0x0f, 0xe9, 0xe9, 0xff, 0x16, 0xda, 0x12, 0xf3, 0xec, 0x1c,\n"," 0xfd, 0x04, 0x0a, 0x09, 0x01, 0xed, 0xf9, 0x0d, 0xf9, 0x12, 0xfc, 0x08,\n"," 0xfa, 0xd6, 0x12, 0x0b, 0x02, 0xff, 0xfe, 0x06, 0x0f, 0xe3, 0xf0, 0xdb,\n"," 0xf0, 0xf5, 0x0f, 0x17, 0x0d, 0xe4, 0x2a, 0xf4, 0x13, 0xe9, 0x3a, 0x0c,\n"," 0x04, 0x11, 0xee, 0xf0, 0xf1, 0xf5, 0x31, 0x04, 0xf2, 0x04, 0x14, 0x02,\n"," 0xfd, 0xe7, 0x2a, 0xf6, 0xff, 0x17, 0xed, 0xea, 0xe1, 0xf9, 0x27, 0x20,\n"," 0x0b, 0xe6, 0x1f, 0xfe, 0x00, 0xf9, 0x10, 0x05, 0x04, 0x0e, 0xf0, 0xf7,\n"," 0x18, 0x17, 0x13, 0xf0, 0x21, 0xcd, 0xf9, 0xcd, 0x13, 0xfb, 0x05, 0xe6,\n"," 0x1b, 0xba, 0xf5, 0xb2, 0x2b, 0xd4, 0x19, 0x18, 0xf4, 0xc8, 0xee, 0xce,\n"," 0x31, 0xf4, 0xec, 0x2d, 0xfa, 0xc0, 0xeb, 0xe9, 0x0e, 0xe0, 0x2f, 0xfe,\n"," 0x17, 0xd1, 0x09, 0xfc, 0xf6, 0xdc, 0xf1, 0x00, 0x11, 0xd2, 0xf4, 0xe4,\n"," 0xfc, 0x0f, 0x02, 0x27, 0x0e, 0xdd, 0x19, 0x08, 0x03, 0xf8, 0x1f, 0xeb,\n"," 0xfa, 0x0d, 0xf1, 0x11, 0x0c, 0xe4, 0x31, 0x07, 0x02, 0xe7, 0xec, 0xf0,\n"," 0xe7, 0x02, 0x1b, 0xf0, 0xf8, 0x22, 0xfa, 0xe2, 0xfd, 0xf2, 0x13, 0x17,\n"," 0x0d, 0xf3, 0xfc, 0x01, 0xe4, 0xe2, 0x01, 0x09, 0xf4, 0xf1, 0x0c, 0x0d,\n"," 0x00, 0xf9, 0xfa, 0x07, 0x0c, 0xf4, 0xf5, 0xe9, 0xfa, 0x2f, 0x3d, 0x11,\n"," 0xef, 0x0b, 0x12, 0x04, 0xed, 0xfb, 0x17, 0x0e, 0x0d, 0xfb, 0xfb, 0xe1,\n"," 0x0e, 0xf0, 0x22, 0x13, 0x07, 0xed, 0xee, 0xda, 0xf2, 0xe8, 0x48, 0x07,\n"," 0xfc, 0xd2, 0xe3, 0xf0, 0xfa, 0xf9, 0x10, 0x0c, 0xe7, 0xeb, 0x01, 0xd3,\n"," 0xfb, 0xff, 0x3b, 0xf9, 0xf8, 0xef, 0xe9, 0xea, 0xe3, 0x01, 0x03, 0x04,\n"," 0xfb, 0xf9, 0x1a, 0x1e, 0x18, 0xf4, 0x05, 0x22, 0x21, 0xc9, 0x0c, 0xbf,\n"," 0x27, 0xfb, 0x06, 0x1d, 0x17, 0xce, 0x0e, 0xb7, 0x3c, 0xfa, 0xea, 0x0f,\n"," 0x12, 0xa6, 0xff, 0xd6, 0x25, 0xd4, 0x1e, 0xe4, 0x12, 0xaf, 0xdd, 0xd6,\n"," 0x2c, 0xfc, 0x08, 0xf5, 0x0e, 0xbb, 0x0a, 0xe2, 0x06, 0xfc, 0x27, 0x2e,\n"," 0x0f, 0xc7, 0xf8, 0x00, 0x00, 0x04, 0x1c, 0x0b, 0x0e, 0x04, 0x17, 0x11,\n"," 0x06, 0x0c, 0x17, 0x13, 0xfb, 0xf3, 0xe0, 0xe7, 0x06, 0xdf, 0x0b, 0x11,\n"," 0x01, 0xfb, 0xef, 0x05, 0xf3, 0xc7, 0x01, 0xfc, 0xfc, 0x0b, 0x04, 0x00,\n"," 0x04, 0x13, 0x25, 0x2a, 0x05, 0xfb, 0x24, 0xf1, 0xe6, 0xfd, 0x19, 0x09,\n"," 0x01, 0xe0, 0xf2, 0xf5, 0x03, 0xfd, 0xfe, 0x06, 0x08, 0xe9, 0xde, 0x1a,\n"," 0xfd, 0x17, 0x1b, 0x11, 0x0c, 0xf7, 0x0c, 0xf6, 0xfb, 0xf6, 0x29, 0x1b,\n"," 0x1e, 0x00, 0xea, 0xe2, 0xfe, 0xeb, 0x1d, 0x22, 0xff, 0x15, 0xec, 0xcd,\n"," 0xef, 0xc4, 0x18, 0x15, 0xed, 0xed, 0x08, 0xeb, 0xf8, 0xe4, 0x35, 0x08,\n"," 0x0b, 0xe4, 0x13, 0xf4, 0xf6, 0xff, 0x12, 0xfc, 0xfc, 0x05, 0x0b, 0xf6,\n"," 0xeb, 0x07, 0x0d, 0x0f, 0xf8, 0x21, 0xf0, 0xe1, 0x1e, 0xf2, 0xf1, 0xfe,\n"," 0x2b, 0xe6, 0x2a, 0xd2, 0x15, 0xf1, 0x02, 0xfc, 0x22, 0xce, 0xe2, 0xbc,\n"," 0x35, 0xf9, 0x1e, 0x1c, 0x17, 0xaf, 0xf7, 0xfa, 0x2a, 0xea, 0x13, 0xfe,\n"," 0x08, 0xbe, 0x1b, 0xcf, 0x19, 0x16, 0x00, 0x1b, 0x1c, 0xbe, 0xe9, 0xee,\n"," 0x05, 0xe6, 0xec, 0x03, 0x26, 0xd2, 0xec, 0x0c, 0xf7, 0xeb, 0xf8, 0xf8,\n"," 0x1f, 0xde, 0xf3, 0xdd, 0x0f, 0x01, 0x26, 0xf9, 0x00, 0xf0, 0xe9, 0xe0,\n"," 0x0f, 0xc3, 0x0b, 0xe9, 0x01, 0xee, 0x03, 0xd8, 0xf4, 0xee, 0x29, 0x14,\n"," 0xf2, 0xfe, 0xf1, 0x09, 0xfc, 0x09, 0x0e, 0xfe, 0x06, 0x04, 0xfb, 0x07,\n"," 0xf0, 0xfe, 0x24, 0xfa, 0xf7, 0xf9, 0x0b, 0xfa, 0xf1, 0xf3, 0x1c, 0xf9,\n"," 0x05, 0xdb, 0x09, 0xf9, 0x10, 0xf5, 0x17, 0x2d, 0x09, 0xf9, 0xf3, 0x06,\n"," 0xfd, 0xe4, 0x07, 0xf6, 0xff, 0xfb, 0xfe, 0xf7, 0xfb, 0xf2, 0x22, 0xfe,\n"," 0xfb, 0xfb, 0x12, 0xe4, 0xf0, 0xec, 0x2a, 0x1c, 0xf8, 0xfa, 0x01, 0xd9,\n"," 0xef, 0x00, 0x1d, 0x06, 0xf8, 0xff, 0x05, 0x0b, 0xf4, 0x00, 0x38, 0x16,\n"," 0xf3, 0xf5, 0x1e, 0x07, 0xde, 0x0b, 0x32, 0x25, 0xfe, 0x03, 0x0d, 0x0a,\n"," 0x1f, 0x05, 0x28, 0x01, 0x19, 0xd3, 0xff, 0xc2, 0x0a, 0x01, 0xf6, 0x1e,\n"," 0x24, 0xda, 0xf9, 0xb2, 0x4f, 0xef, 0xf9, 0x13, 0xf5, 0xd2, 0xd7, 0xe6,\n"," 0x37, 0xf4, 0x02, 0x09, 0x05, 0xa3, 0xf7, 0xd9, 0x14, 0xf2, 0x0b, 0x05,\n"," 0x36, 0xbd, 0x0c, 0x17, 0xfc, 0xfa, 0x22, 0x27, 0x1f, 0xc2, 0xf6, 0xf3,\n"," 0xff, 0xe6, 0x25, 0x17, 0x08, 0xd0, 0x04, 0x1a, 0xfb, 0xff, 0x08, 0x24,\n"," 0xf1, 0xf3, 0x15, 0xf4, 0xf6, 0xf2, 0x12, 0xe5, 0x01, 0xd8, 0xec, 0x17,\n"," 0x00, 0xd9, 0x08, 0x11, 0x04, 0x11, 0x02, 0xe9, 0xea, 0xe9, 0x20, 0xf4,\n"," 0x12, 0xe7, 0xe3, 0x00, 0xfe, 0x10, 0x1d, 0xeb, 0xfe, 0xe6, 0xd6, 0x05,\n"," 0xfa, 0xf3, 0x14, 0x19, 0x03, 0xdc, 0x0e, 0xe3, 0xf7, 0xfd, 0x31, 0xf3,\n"," 0x05, 0x11, 0xf5, 0xe3, 0x01, 0x05, 0x2c, 0x03, 0x15, 0xdf, 0x21, 0x0e,\n"," 0xe7, 0xfb, 0x09, 0x0c, 0xfb, 0xf9, 0x1b, 0xdc, 0xe3, 0xf3, 0x14, 0xdb,\n"," 0x02, 0xe8, 0x0a, 0xfd, 0xf7, 0xf9, 0x05, 0xdb, 0xfb, 0xe7, 0xf2, 0xfe,\n"," 0xf5, 0xe5, 0x10, 0xdd, 0x00, 0xf0, 0xe0, 0xf5, 0xf0, 0x04, 0x19, 0x24,\n"," 0xff, 0xe4, 0xf0, 0xf0, 0x23, 0x19, 0x17, 0xf6, 0x11, 0xdd, 0xdf, 0xde,\n"," 0x2a, 0xee, 0x0a, 0xfb, 0x2b, 0xc5, 0x05, 0xb4, 0x51, 0xf3, 0x09, 0x10,\n"," 0x0a, 0xb3, 0xfd, 0xe6, 0x48, 0xdf, 0x14, 0x0b, 0x1b, 0xcc, 0xd9, 0xfa,\n"," 0x15, 0xe5, 0xff, 0x24, 0x30, 0xbf, 0x05, 0x02, 0x09, 0x14, 0x25, 0x18,\n"," 0x2d, 0xc2, 0xfe, 0xf5, 0x0a, 0x17, 0xfd, 0x03, 0x15, 0xd3, 0x21, 0x11,\n"," 0x10, 0xe5, 0x02, 0xe3, 0xf7, 0x06, 0x15, 0xfa, 0xf5, 0xd3, 0x17, 0x02,\n"," 0xf9, 0x05, 0x16, 0xe0, 0x16, 0xd4, 0x0c, 0xe9, 0xf4, 0xfd, 0x28, 0x15,\n"," 0x04, 0xe2, 0x03, 0xfd, 0xf6, 0xf5, 0xfb, 0xf8, 0xf4, 0xf1, 0x10, 0xe6,\n"," 0x02, 0xfe, 0x03, 0xca, 0xe8, 0x05, 0x14, 0x02, 0xf9, 0xdc, 0xef, 0xf7,\n"," 0x09, 0x0f, 0x1e, 0x11, 0xfb, 0xfb, 0x13, 0x23, 0xf8, 0x06, 0x14, 0x12,\n"," 0x1b, 0x13, 0x2a, 0xf4, 0x04, 0xe5, 0x24, 0x1c, 0x03, 0xf8, 0x01, 0xd3,\n"," 0xe4, 0xd0, 0x3d, 0xe7, 0x0c, 0xde, 0xf1, 0xe3, 0xf1, 0xe8, 0x12, 0xf1,\n"," 0x10, 0xdb, 0xe5, 0xd3, 0xe5, 0xf7, 0x0f, 0xeb, 0xf9, 0xee, 0x18, 0xe5,\n"," 0xe9, 0x13, 0x18, 0x26, 0x14, 0x00, 0xfc, 0xf7, 0x2b, 0x0f, 0x05, 0xf5,\n"," 0x39, 0xd3, 0xf1, 0xd8, 0x29, 0xf4, 0x0f, 0x15, 0x14, 0xbc, 0x00, 0xc9,\n"," 0x3f, 0xe1, 0x05, 0x11, 0x23, 0xb4, 0xe3, 0xf6, 0x51, 0xde, 0x26, 0xf6,\n"," 0x27, 0xb3, 0xf7, 0xdd, 0x2d, 0xf1, 0x10, 0x09, 0x3d, 0xcd, 0xea, 0xf1,\n"," 0x0c, 0x0e, 0xfe, 0x21, 0x24, 0xd6, 0xf9, 0x08, 0xff, 0xee, 0x12, 0x08,\n"," 0xfd, 0xe8, 0x19, 0xeb, 0x0b, 0xeb, 0x0f, 0x23, 0x0e, 0xd1, 0xfe, 0xf1,\n"," 0xf3, 0xd7, 0xf7, 0x1f, 0xff, 0xe5, 0xfe, 0x12, 0x05, 0xee, 0x13, 0x20,\n"," 0x22, 0xdd, 0x03, 0x19, 0x08, 0xee, 0xfd, 0x01, 0x12, 0x1a, 0xfc, 0x0c,\n"," 0xf5, 0xf4, 0xfd, 0xef, 0x05, 0xe8, 0x17, 0x08, 0xf2, 0xea, 0x08, 0x13,\n"," 0x03, 0xff, 0xf0, 0xe9, 0xfe, 0xff, 0x22, 0xfb, 0xff, 0xee, 0x0c, 0xfb,\n"," 0xff, 0x06, 0x27, 0x01, 0x08, 0xe3, 0x0c, 0xf1, 0x06, 0xe4, 0x19, 0x0d,\n"," 0x0e, 0xe1, 0xdc, 0xe8, 0xdb, 0xed, 0x2a, 0x0a, 0x06, 0xfd, 0x0e, 0xfb,\n"," 0xfb, 0x06, 0x25, 0x27, 0xfc, 0xf2, 0xf5, 0xf6, 0xef, 0xf7, 0x35, 0xf2,\n"," 0xe9, 0xea, 0x05, 0xf1, 0xdf, 0x06, 0x16, 0xf2, 0xfe, 0xde, 0xf0, 0x05,\n"," 0x2c, 0x25, 0x0a, 0x15, 0x0e, 0xc2, 0x03, 0xad, 0x3a, 0xee, 0x09, 0x27,\n"," 0x31, 0xb8, 0x20, 0xb5, 0x53, 0xd7, 0x09, 0xea, 0x0b, 0xc9, 0x04, 0xf9,\n"," 0x61, 0xda, 0xde, 0x19, 0x2d, 0xc3, 0xe7, 0xd4, 0x1b, 0xe7, 0xf9, 0x0f,\n"," 0x43, 0xc2, 0xff, 0xe6, 0x0c, 0xef, 0x13, 0xf3, 0x1b, 0xe0, 0x0b, 0x08,\n"," 0x05, 0x03, 0x09, 0x03, 0x23, 0xf4, 0xe8, 0xf5, 0x15, 0xfe, 0xee, 0xe8,\n"," 0x06, 0xe1, 0xe8, 0xf0, 0x20, 0xb3, 0xf4, 0x02, 0x06, 0xe4, 0xfa, 0x14,\n"," 0x02, 0xef, 0x13, 0x16, 0x08, 0x0f, 0x0e, 0x22, 0x0b, 0xed, 0xf3, 0x1b,\n"," 0x1d, 0x01, 0x22, 0xec, 0x01, 0xe0, 0xf5, 0x18, 0x0c, 0xd5, 0xff, 0x0e,\n"," 0x09, 0x06, 0x0b, 0xf1, 0x12, 0xe2, 0xe4, 0xd5, 0x07, 0xfb, 0xfc, 0xfe,\n"," 0xf7, 0xf7, 0x04, 0x02, 0xfe, 0xee, 0x05, 0x06, 0x04, 0xd9, 0x00, 0x06,\n"," 0xfb, 0x01, 0x28, 0x06, 0x09, 0xfe, 0x1c, 0xd7, 0xf9, 0xdc, 0x1a, 0xf3,\n"," 0xf6, 0xc9, 0xfd, 0xfe, 0x06, 0xdc, 0x09, 0xf6, 0xfe, 0xe7, 0x18, 0xf9,\n"," 0xf7, 0xe4, 0x24, 0xf5, 0xe9, 0x0a, 0x08, 0xf0, 0xf1, 0x08, 0x2c, 0xfd,\n"," 0xf9, 0xe4, 0xf9, 0x03, 0x38, 0x05, 0x0d, 0xf6, 0x1e, 0xda, 0xfc, 0xb9,\n"," 0x58, 0x01, 0xff, 0xf5, 0x33, 0xb4, 0xf7, 0xb7, 0x72, 0x12, 0x14, 0xf7,\n"," 0xff, 0xd5, 0x06, 0xda, 0x61, 0xd0, 0x06, 0x05, 0x1e, 0xca, 0x0a, 0xfa,\n"," 0x30, 0xcf, 0xfa, 0xf2, 0x31, 0xd2, 0x0d, 0xcd, 0x2f, 0xd8, 0x13, 0x13,\n"," 0x2c, 0xcc, 0x08, 0xd6, 0x23, 0xd9, 0x12, 0x11, 0x18, 0xfa, 0x0c, 0xe3,\n"," 0x18, 0xef, 0xef, 0x00, 0x26, 0xf0, 0xf3, 0xe7, 0x1e, 0xc9, 0x0e, 0x26,\n"," 0x04, 0xeb, 0xf0, 0x0a, 0x26, 0xc9, 0xf6, 0xfb, 0x0c, 0xf1, 0x11, 0x00,\n"," 0x18, 0xec, 0x10, 0x07, 0x0e, 0x06, 0xde, 0xed, 0x0b, 0xd8, 0x13, 0xfe,\n"," 0x05, 0xfc, 0x00, 0xd0, 0x13, 0x07, 0x1f, 0xf2, 0x11, 0x13, 0x0a, 0x1d,\n"," 0x10, 0xf8, 0xfd, 0x06, 0x02, 0x06, 0xf5, 0xdf, 0x10, 0xfa, 0x11, 0xe0,\n"," 0xf7, 0xf5, 0xf9, 0xe8, 0x0d, 0xda, 0x02, 0xf3, 0xf2, 0xef, 0x0c, 0xe9,\n"," 0xfc, 0xc3, 0x18, 0x12, 0xea, 0xfb, 0x08, 0x0f, 0xf7, 0xdf, 0x23, 0x08,\n"," 0x03, 0xeb, 0xe9, 0x1e, 0xf2, 0xe2, 0x13, 0xea, 0x01, 0xf2, 0xec, 0xe8,\n"," 0xed, 0x0d, 0x15, 0xfc, 0x0f, 0xfd, 0x03, 0xfd, 0x61, 0xee, 0x12, 0xe4,\n"," 0x01, 0xd0, 0x0d, 0xc4, 0x4a, 0x10, 0x07, 0x1d, 0x2e, 0xab, 0xe3, 0xa9,\n"," 0x7f, 0xf8, 0x1f, 0xe3, 0x00, 0xe5, 0xe6, 0xcd, 0x6c, 0xc4, 0x2a, 0xfb,\n"," 0x18, 0xd8, 0xf7, 0xb7, 0x49, 0xf7, 0x19, 0xe2, 0x2e, 0xe3, 0xf5, 0xfd,\n"," 0x33, 0xfa, 0x0b, 0xfd, 0x0a, 0xdc, 0xf0, 0x0c, 0x34, 0xd0, 0x02, 0xf4,\n"," 0x22, 0xe4, 0xf8, 0xe3, 0x2f, 0xe4, 0x11, 0xe5, 0x0e, 0x0c, 0x1e, 0xe6,\n"," 0x21, 0xe8, 0x10, 0xfa, 0x07, 0xfa, 0xef, 0x03, 0x01, 0xde, 0x02, 0x08,\n"," 0x0d, 0xdc, 0x17, 0x00, 0x01, 0xe1, 0x1c, 0x0e, 0xfc, 0x02, 0x04, 0xe8,\n"," 0x07, 0xee, 0x06, 0xff, 0x09, 0xcd, 0x1a, 0xd1, 0x18, 0x2c, 0xff, 0xf4,\n"," 0xf4, 0xee, 0x19, 0xec, 0x1b, 0xf4, 0x09, 0x0e, 0x02, 0xee, 0x15, 0xe3,\n"," 0x0f, 0xe4, 0x02, 0x08, 0xfb, 0x15, 0x09, 0xf1, 0x01, 0xcd, 0x22, 0x19,\n"," 0xee, 0x04, 0x1f, 0xd7, 0x0c, 0xd5, 0x10, 0xea, 0x0c, 0x06, 0x14, 0xd1,\n"," 0xef, 0xef, 0x22, 0x22, 0xf1, 0xf1, 0xfc, 0x0d, 0xf7, 0x00, 0x0e, 0x07,\n"," 0xf4, 0x0d, 0x12, 0x01, 0xde, 0x1d, 0x04, 0xe5, 0x03, 0x15, 0xe8, 0xda,\n"," 0x62, 0x0f, 0x1a, 0xeb, 0x13, 0xd1, 0x09, 0xe7, 0x79, 0x25, 0xfb, 0xff,\n"," 0x43, 0xa8, 0xef, 0xa4, 0x61, 0xfe, 0x15, 0x16, 0x28, 0xbc, 0x07, 0xd6,\n"," 0x59, 0xd3, 0x00, 0xf0, 0x18, 0xcb, 0x05, 0xca, 0x2f, 0x08, 0xf4, 0x2d,\n"," 0x1f, 0xe5, 0x07, 0xfb, 0x1c, 0x0e, 0x26, 0xf3, 0x3c, 0xd1, 0xe7, 0xf7,\n"," 0x0f, 0xf2, 0xfc, 0x24, 0x3a, 0xf4, 0xfa, 0xfc, 0x09, 0xe1, 0x0e, 0x00,\n"," 0x06, 0xe2, 0x04, 0xe8, 0x15, 0xdd, 0xf6, 0x06, 0x21, 0xe5, 0xfb, 0xe7,\n"," 0xfe, 0xed, 0xfb, 0x14, 0x1c, 0xdd, 0xf8, 0xf6, 0x26, 0x02, 0x02, 0xf1,\n"," 0xf7, 0xd3, 0x13, 0xeb, 0x18, 0x03, 0x12, 0xf4, 0xe5, 0xf0, 0xef, 0xe9,\n"," 0x2c, 0x0d, 0xe3, 0x19, 0x12, 0xc8, 0xdd, 0xee, 0x08, 0x0b, 0xee, 0x19,\n"," 0xf9, 0xf3, 0xf4, 0xf9, 0x0a, 0xfd, 0xf2, 0x0e, 0x15, 0xf8, 0xd6, 0x03,\n"," 0x1f, 0xe9, 0xfd, 0x04, 0x15, 0x1f, 0x21, 0xe1, 0x0c, 0xf8, 0xec, 0xf4,\n"," 0xee, 0x0c, 0xef, 0xfd, 0x0a, 0xf4, 0x06, 0x14, 0x10, 0xe1, 0xdd, 0x0b,\n"," 0x0b, 0x05, 0x0e, 0x0f, 0x01, 0xf7, 0xfd, 0xe0, 0xe2, 0x26, 0x28, 0x26,\n"," 0x10, 0x00, 0xe8, 0xfd, 0xfa, 0xec, 0xf7, 0x14, 0x08, 0xff, 0xf7, 0x0c,\n"," 0x06, 0x09, 0xf3, 0x0b, 0xf3, 0xfe, 0xec, 0xfd, 0x1a, 0xf8, 0xf1, 0xdb,\n"," 0xfe, 0x0f, 0xff, 0x0b, 0x17, 0x1f, 0xfb, 0xe7, 0x0c, 0x13, 0x10, 0xf6,\n"," 0x04, 0x11, 0xf3, 0xfd, 0xec, 0xd0, 0xf3, 0xfa, 0x01, 0xfe, 0x03, 0x07,\n"," 0x0d, 0xde, 0xf8, 0x05, 0xee, 0xf0, 0xff, 0x08, 0xff, 0xf0, 0x1d, 0x05,\n"," 0x14, 0xea, 0xfe, 0x04, 0xf1, 0x0e, 0x19, 0xfb, 0x1a, 0xff, 0xef, 0xf2,\n"," 0x02, 0xf5, 0xe7, 0x0e, 0xe4, 0x1d, 0xfa, 0x14, 0xf0, 0xde, 0xf0, 0xe4,\n"," 0xf6, 0x04, 0x07, 0xe6, 0xf1, 0x1b, 0xff, 0xfb, 0x16, 0x02, 0x01, 0x10,\n"," 0x08, 0x14, 0x08, 0x03, 0xf7, 0x01, 0x02, 0xf6, 0xf9, 0xe7, 0xe9, 0xf5,\n"," 0x05, 0x14, 0xfc, 0xe1, 0xfb, 0x20, 0x03, 0x18, 0xfa, 0xe9, 0xf0, 0x1d,\n"," 0xf9, 0xf0, 0xfb, 0xed, 0x0a, 0xd9, 0xf4, 0xeb, 0xed, 0x05, 0xf7, 0x0b,\n"," 0x0f, 0xf0, 0x0a, 0x07, 0xee, 0xdd, 0x17, 0x08, 0xfb, 0x1c, 0xf4, 0x23,\n"," 0xfd, 0x0f, 0x07, 0xdf, 0x03, 0x1f, 0xed, 0xf1, 0xfd, 0xfb, 0xdc, 0x0a,\n"," 0x18, 0xf9, 0x00, 0xea, 0xf7, 0xe8, 0xf6, 0x07, 0xee, 0xf8, 0xec, 0xf7,\n"," 0x04, 0x0e, 0x0f, 0x00, 0x18, 0xfc, 0x09, 0x1a, 0xfb, 0x00, 0xe5, 0xff,\n"," 0x0f, 0x08, 0xeb, 0xfc, 0x0f, 0xe6, 0x14, 0x03, 0xf6, 0xfc, 0x0f, 0xfc,\n"," 0x0b, 0xf2, 0x1c, 0x06, 0xf9, 0x09, 0xf9, 0xdf, 0x14, 0xfb, 0xd6, 0xeb,\n"," 0xfb, 0xeb, 0x0d, 0x0b, 0x15, 0xe6, 0xf6, 0x04, 0x17, 0xfc, 0x10, 0xf4,\n"," 0x05, 0xf7, 0xf7, 0xf2, 0xf9, 0xf0, 0xfc, 0x10, 0x08, 0x0d, 0xe1, 0x0c,\n"," 0x06, 0x12, 0xf1, 0xfd, 0x10, 0x2a, 0xfb, 0xec, 0x0c, 0x05, 0x0b, 0x18,\n"," 0x2b, 0x0c, 0x08, 0xeb, 0x22, 0xfb, 0xfe, 0x07, 0x08, 0x17, 0x0d, 0xed,\n"," 0xe8, 0xf2, 0x0d, 0xdf, 0x14, 0xf5, 0xed, 0xe3, 0x00, 0x06, 0xfb, 0x15,\n"," 0x01, 0x03, 0xf9, 0xfe, 0x08, 0x14, 0x01, 0xf3, 0xe4, 0xfb, 0xfe, 0xde,\n"," 0x0f, 0xe8, 0xff, 0xf1, 0x03, 0xe5, 0x18, 0xff, 0xfd, 0x02, 0x10, 0xec,\n"," 0xfb, 0xf5, 0x12, 0x06, 0x0c, 0xde, 0x0f, 0x0e, 0x03, 0xf1, 0xf9, 0x02,\n"," 0xfa, 0x01, 0x07, 0xf3, 0x02, 0x0f, 0x03, 0x13, 0xf4, 0xee, 0x0a, 0x04,\n"," 0x0f, 0x1c, 0x1a, 0x03, 0x08, 0x06, 0xf6, 0x16, 0xff, 0xec, 0x14, 0xfe,\n"," 0x09, 0xf5, 0x06, 0x1d, 0xf3, 0xf0, 0x22, 0xf7, 0x28, 0xe3, 0x09, 0x28,\n"," 0xf2, 0x1a, 0x1c, 0x0e, 0x1a, 0xd5, 0xf6, 0xdd, 0x03, 0xce, 0xff, 0x03,\n"," 0xf5, 0xf2, 0x14, 0x02, 0x11, 0xd2, 0x08, 0xfa, 0xf2, 0xf7, 0xf6, 0xef,\n"," 0xf8, 0xea, 0xf3, 0xf7, 0xe7, 0x0e, 0x03, 0xf5, 0x07, 0x04, 0x21, 0xf5,\n"," 0xec, 0xf6, 0xf1, 0x0f, 0x09, 0x0a, 0x06, 0x03, 0x14, 0xee, 0x03, 0x26,\n"," 0x01, 0x0a, 0x09, 0xf8, 0x0a, 0x17, 0xf6, 0x19, 0x1c, 0xfc, 0x0f, 0xf1,\n"," 0xf8, 0x06, 0xf7, 0xd9, 0x0b, 0x0e, 0x04, 0xda, 0x03, 0xe8, 0x15, 0x0a,\n"," 0x35, 0xfe, 0x03, 0xe5, 0x07, 0xfc, 0x11, 0xfa, 0xfc, 0xf4, 0xe9, 0x06,\n"," 0xfd, 0xe4, 0x15, 0x07, 0x10, 0xef, 0xf6, 0xfc, 0x13, 0x14, 0x08, 0x09,\n"," 0x12, 0xe6, 0xfb, 0xe1, 0x17, 0x04, 0xf8, 0xfc, 0xfc, 0xf1, 0xf3, 0xee,\n"," 0x27, 0x0d, 0xf7, 0xfd, 0x0a, 0xf7, 0x14, 0x00, 0x0d, 0xff, 0xf3, 0x0a,\n"," 0xf9, 0x01, 0x04, 0xfd, 0xf2, 0xf4, 0x13, 0x16, 0xfb, 0x09, 0xe4, 0xef,\n"," 0xf8, 0xf1, 0x10, 0xff, 0x14, 0xfa, 0xda, 0xf6, 0xff, 0xff, 0xfb, 0x10,\n"," 0x0b, 0x08, 0x0d, 0xf8, 0x04, 0x10, 0xf8, 0xf2, 0x10, 0x00, 0x16, 0x0b,\n"," 0x00, 0x00, 0x14, 0x0b, 0xee, 0xf7, 0x0e, 0x0b, 0xf8, 0xed, 0xf6, 0x0f,\n"," 0xff, 0xc1, 0xfc, 0x04, 0xf6, 0x0a, 0xfa, 0x01, 0xe3, 0xdc, 0x05, 0x07,\n"," 0x00, 0x27, 0x01, 0x06, 0xe1, 0xeb, 0x25, 0x05, 0xf1, 0x22, 0x17, 0x1a,\n"," 0x0a, 0xff, 0x15, 0x18, 0xf3, 0x0f, 0x01, 0x19, 0xfd, 0x0e, 0xec, 0x08,\n"," 0xfa, 0xfd, 0x0f, 0xeb, 0x09, 0x0e, 0xe2, 0x23, 0x07, 0xfa, 0xef, 0xfe,\n"," 0xe9, 0xfc, 0x27, 0x0d, 0x08, 0xf9, 0x0d, 0xf8, 0x1f, 0x15, 0x15, 0xd7,\n"," 0x1d, 0x1a, 0x0e, 0x12, 0x10, 0x23, 0x0d, 0xef, 0xf4, 0x04, 0xff, 0xec,\n"," 0x05, 0xfc, 0x05, 0x07, 0xf0, 0x0c, 0xfb, 0xf9, 0x07, 0xf4, 0x01, 0x0b,\n"," 0xf5, 0x02, 0x14, 0xfa, 0xe3, 0xee, 0xe5, 0x08, 0xea, 0x11, 0x08, 0x0f,\n"," 0xfc, 0xfc, 0xf4, 0xfb, 0xf6, 0x37, 0x0f, 0xea, 0xfe, 0xfe, 0xf6, 0xf5,\n"," 0x11, 0x27, 0xed, 0xe9, 0xfb, 0x09, 0xfb, 0x05, 0xeb, 0xf8, 0x00, 0xf0,\n"," 0xf1, 0x0c, 0x2b, 0x07, 0xe3, 0x0d, 0x27, 0xdc, 0x06, 0x22, 0xf3, 0x02,\n"," 0xf9, 0x0a, 0x07, 0x24, 0xfe, 0x0a, 0x17, 0x1a, 0x07, 0xf7, 0xee, 0xf3,\n"," 0x14, 0x0c, 0x04, 0x08, 0xf2, 0xec, 0xf7, 0x1d, 0xf1, 0xef, 0xf8, 0xef,\n"," 0x19, 0xe8, 0x1d, 0x1a, 0xe1, 0xd8, 0x0c, 0xee, 0xe7, 0x17, 0x16, 0xe4,\n"," 0xf4, 0xe8, 0x26, 0x08, 0x05, 0x24, 0x06, 0x0b, 0xf7, 0xe8, 0x27, 0x17,\n"," 0xe5, 0xe7, 0xeb, 0xe8, 0x0d, 0xe2, 0xf7, 0x11, 0xfd, 0xdb, 0xf9, 0x17,\n"," 0xfc, 0x15, 0x0f, 0x17, 0xe6, 0xeb, 0xf4, 0xf9, 0x03, 0x19, 0xe0, 0x1e,\n"," 0x09, 0xed, 0xfe, 0xf7, 0x2a, 0x26, 0x12, 0x1a, 0xed, 0xe9, 0x0b, 0xf5,\n"," 0x15, 0x20, 0x1c, 0x07, 0x07, 0xf7, 0x0a, 0x0d, 0x0f, 0x1e, 0x1a, 0xe6,\n"," 0x0f, 0x24, 0x03, 0x1b, 0x20, 0xfc, 0x13, 0x04, 0x0c, 0x03, 0xfe, 0xea,\n"," 0x00, 0x07, 0xec, 0x0f, 0xde, 0x16, 0x19, 0x07, 0xe7, 0xe5, 0x15, 0xfd,\n"," 0xd4, 0x1a, 0xfb, 0x01, 0x07, 0xdb, 0x04, 0xfe, 0xda, 0x20, 0xf9, 0x0f,\n"," 0xce, 0xf6, 0x19, 0x14, 0xe6, 0x2f, 0xed, 0x0b, 0x02, 0xfb, 0xd8, 0xf8,\n"," 0xec, 0x1f, 0x03, 0xfe, 0x14, 0x1e, 0xfd, 0x00, 0xff, 0x13, 0xf4, 0xfb,\n"," 0x01, 0x08, 0xd7, 0x03, 0x03, 0xe0, 0x03, 0xef, 0xfe, 0x0a, 0xe3, 0x05,\n"," 0x03, 0x0b, 0x1e, 0xf0, 0xf1, 0x16, 0x18, 0x01, 0xfb, 0xe5, 0xf5, 0xdc,\n"," 0x03, 0xed, 0x02, 0xff, 0x0b, 0x1a, 0xf7, 0x24, 0xf9, 0xda, 0x1a, 0xe7,\n"," 0x05, 0x1d, 0xf8, 0xf1, 0xf6, 0xf2, 0xd6, 0xf0, 0xfb, 0x16, 0xf1, 0x10,\n"," 0x17, 0xf5, 0x08, 0x09, 0xf7, 0xfa, 0xed, 0x02, 0x09, 0xfc, 0xf1, 0xf2,\n"," 0xfd, 0xea, 0xfc, 0x01, 0x07, 0x06, 0x09, 0x06, 0x08, 0xfb, 0xea, 0x0c,\n"," 0x03, 0x1e, 0x0b, 0x2b, 0xe3, 0xf1, 0x0b, 0xe4, 0x1b, 0x27, 0xea, 0x1c,\n"," 0x0b, 0xfb, 0x01, 0x04, 0x1c, 0x26, 0xf2, 0xf2, 0xf6, 0xf2, 0xfb, 0xfb,\n"," 0x05, 0x2c, 0xef, 0xe9, 0xfb, 0x05, 0x10, 0x0b, 0x08, 0x05, 0x1c, 0xf1,\n"," 0xd2, 0x07, 0x0b, 0xe0, 0xf9, 0x03, 0xe7, 0xf3, 0xfa, 0x12, 0xee, 0xf3,\n"," 0xe0, 0xf8, 0x0e, 0xf0, 0xf1, 0x30, 0x17, 0x01, 0x00, 0xe0, 0x1a, 0xfe,\n"," 0xde, 0x2c, 0x03, 0x05, 0x00, 0xe5, 0xf7, 0x02, 0xfb, 0x34, 0xdd, 0x08,\n"," 0x09, 0x06, 0x1f, 0x0a, 0x00, 0x14, 0xec, 0xdd, 0xf7, 0xf0, 0xdb, 0xe9,\n"," 0xf8, 0x14, 0xff, 0xee, 0xf5, 0xf9, 0x12, 0x01, 0x0c, 0xf7, 0xfd, 0x23,\n"," 0xff, 0x0d, 0x19, 0x12, 0xfa, 0xf6, 0xf9, 0xfe, 0xe6, 0x00, 0x21, 0x0b,\n"," 0xf8, 0xfd, 0x15, 0xfb, 0xee, 0xf2, 0xfe, 0x0a, 0x12, 0x1d, 0x09, 0xee,\n"," 0xf4, 0xc4, 0xff, 0xe7, 0xfd, 0x2a, 0x22, 0x00, 0xe9, 0xff, 0xea, 0xf1,\n"," 0xfb, 0x15, 0xe0, 0x19, 0xde, 0xe6, 0xf1, 0x00, 0xee, 0xfd, 0xf5, 0x0a,\n"," 0x00, 0xfd, 0x0a, 0x0d, 0xf4, 0xf9, 0xf2, 0xe6, 0x02, 0x15, 0x1c, 0x00,\n"," 0xee, 0xfb, 0xfe, 0xed, 0xf0, 0x3e, 0xff, 0x2f, 0xf6, 0xf7, 0xf7, 0xda,\n"," 0x11, 0x22, 0x15, 0x26, 0xfc, 0xfe, 0xfb, 0xfc, 0xf6, 0x2f, 0x02, 0x14,\n"," 0x18, 0xe9, 0x14, 0x19, 0x14, 0x22, 0x02, 0xfd, 0xff, 0x1a, 0x13, 0xf9,\n"," 0xfd, 0x08, 0x06, 0xeb, 0xeb, 0x1e, 0xf0, 0xf6, 0xf4, 0x01, 0xf9, 0x0f,\n"," 0xe5, 0x03, 0xf4, 0xea, 0x02, 0xe0, 0x04, 0x09, 0xe2, 0x2d, 0xf7, 0x16,\n"," 0x04, 0xde, 0xd8, 0xf2, 0xe2, 0x46, 0xe3, 0x08, 0xe8, 0x0d, 0xf6, 0xfc,\n"," 0xfb, 0x2b, 0xf6, 0x0d, 0xe4, 0x01, 0xfa, 0x03, 0xeb, 0x28, 0x03, 0x24,\n"," 0x1d, 0xf3, 0xff, 0xe9, 0xe7, 0x19, 0x1a, 0xe3, 0x04, 0xf7, 0xed, 0xfd,\n"," 0x02, 0x04, 0x14, 0x09, 0x09, 0x1c, 0x0b, 0x08, 0x09, 0xe8, 0x0b, 0xef,\n"," 0x04, 0x02, 0xfe, 0x19, 0xfc, 0xf4, 0x08, 0xf8, 0xef, 0xd4, 0x04, 0x13,\n"," 0xf6, 0x1c, 0x16, 0x0b, 0xe1, 0xc3, 0xe0, 0xc7, 0x0f, 0x40, 0x12, 0xff,\n"," 0xdf, 0x02, 0xf5, 0xf2, 0xfd, 0x0a, 0xfa, 0x12, 0xef, 0xe6, 0xfb, 0x0c,\n"," 0xfa, 0x0d, 0xfa, 0x18, 0xed, 0xfe, 0x21, 0xf9, 0xed, 0xf3, 0x00, 0x1f,\n"," 0xfc, 0x08, 0x1d, 0x20, 0xdd, 0x14, 0xf8, 0x0e, 0x15, 0x40, 0xeb, 0x30,\n"," 0xdb, 0x09, 0xfc, 0xf1, 0xee, 0x1d, 0x0d, 0x3a, 0x02, 0x0c, 0x0d, 0xf3,\n"," 0x2b, 0x2c, 0x0e, 0x0a, 0x04, 0xf6, 0xfe, 0xe6, 0x17, 0x21, 0xee, 0x0a,\n"," 0x11, 0x05, 0xf4, 0x19, 0x05, 0x2b, 0xe7, 0xfa, 0xfa, 0x25, 0x08, 0xd8,\n"," 0xdd, 0xf6, 0xf6, 0x22, 0xf0, 0xfa, 0x06, 0xdf, 0xe5, 0xe1, 0x09, 0xf2,\n"," 0xfc, 0x2d, 0x07, 0xfa, 0xf2, 0xe8, 0xf7, 0xee, 0xf7, 0x46, 0x03, 0xfb,\n"," 0xe9, 0xf7, 0x07, 0x01, 0x1b, 0x23, 0xf3, 0x09, 0xff, 0x07, 0xfa, 0xeb,\n"," 0xfb, 0x38, 0x05, 0xf1, 0xed, 0xf9, 0x13, 0xfd, 0xf9, 0x16, 0x04, 0x12,\n"," 0x00, 0x06, 0xf1, 0xf2, 0x0c, 0xfe, 0xf4, 0xd7, 0x08, 0x15, 0xe2, 0x11,\n"," 0x14, 0x0c, 0x02, 0xeb, 0x06, 0x21, 0x00, 0x0c, 0x14, 0x0a, 0x24, 0xfe,\n"," 0xda, 0xdb, 0x0f, 0x0a, 0xf5, 0x3a, 0x11, 0xe3, 0xed, 0xcc, 0xfb, 0xbb,\n"," 0x12, 0x27, 0x0a, 0x02, 0xe8, 0x00, 0xfe, 0xf2, 0xfe, 0x1c, 0x05, 0xfb,\n"," 0xf9, 0x0c, 0xf8, 0x1c, 0xe9, 0xfa, 0xe5, 0x10, 0xdc, 0xea, 0xdb, 0xfd,\n"," 0xe4, 0x0a, 0xe9, 0xf5, 0xe9, 0x01, 0x2a, 0x19, 0xf9, 0x10, 0xfc, 0xff,\n"," 0x06, 0x27, 0x0a, 0x4c, 0xe9, 0x03, 0xf4, 0x10, 0x25, 0x48, 0xef, 0x3f,\n"," 0xfe, 0x00, 0xf9, 0x0a, 0x21, 0x2d, 0x08, 0x18, 0x0a, 0xed, 0x06, 0xe4,\n"," 0x2d, 0x13, 0x09, 0x0c, 0x0c, 0x0f, 0x11, 0x06, 0x18, 0x18, 0xf0, 0xff,\n"," 0xf2, 0x1e, 0xf8, 0x13, 0xe6, 0xf3, 0xea, 0x1e, 0xf5, 0x18, 0xfb, 0x1c,\n"," 0xe2, 0xdb, 0x13, 0xf8, 0x03, 0x35, 0xfc, 0xf8, 0xed, 0xf1, 0x05, 0xf6,\n"," 0x0b, 0x3c, 0xfe, 0x06, 0xe1, 0x0f, 0x03, 0x07, 0x11, 0x29, 0x16, 0x0e,\n"," 0xec, 0x01, 0xf3, 0xf3, 0x11, 0x29, 0x07, 0x04, 0x15, 0x11, 0x10, 0xf0,\n"," 0x04, 0x11, 0xf2, 0x22, 0x08, 0x0b, 0xff, 0xe8, 0x08, 0xf5, 0x00, 0xe1,\n"," 0x01, 0x09, 0x04, 0xfd, 0x03, 0xea, 0x06, 0xf6, 0x01, 0x08, 0xed, 0x0d,\n"," 0xfe, 0x0f, 0x07, 0x00, 0xe3, 0xd8, 0x02, 0x1e, 0xf3, 0x3d, 0x35, 0x0f,\n"," 0xcb, 0xe2, 0x13, 0xd6, 0x0c, 0x4e, 0x16, 0xe3, 0xe0, 0xf2, 0xf4, 0xf4,\n"," 0xf5, 0x28, 0xf8, 0xf8, 0xe8, 0x05, 0xe8, 0x12, 0xf9, 0x04, 0xee, 0x0e,\n"," 0xdf, 0xee, 0xed, 0x0d, 0xe1, 0xff, 0xfa, 0x0c, 0xfd, 0x05, 0xe7, 0x13,\n"," 0x07, 0x1c, 0xfd, 0x05, 0x0c, 0x42, 0xf3, 0x42, 0xf8, 0x17, 0xe6, 0x0d,\n"," 0x03, 0x3a, 0x27, 0x33, 0x03, 0x02, 0x1c, 0x10, 0x15, 0x31, 0xdf, 0x18,\n"," 0x00, 0xfb, 0x02, 0xe1, 0x12, 0x29, 0x23, 0x05, 0x03, 0x0c, 0x07, 0x11,\n"," 0x0e, 0x06, 0xfa, 0xf8, 0x05, 0x1d, 0x24, 0x10, 0x00, 0xfa, 0xea, 0x10,\n"," 0x06, 0xf4, 0xfd, 0x16, 0xec, 0xf0, 0x0c, 0x0e, 0x0d, 0x1b, 0xf0, 0xfd,\n"," 0xf9, 0xe4, 0x16, 0xeb, 0x01, 0x2b, 0x0b, 0xeb, 0xef, 0x10, 0xdf, 0x04,\n"," 0x14, 0x2f, 0x03, 0xe5, 0xf6, 0x11, 0x26, 0xf9, 0xf8, 0x36, 0x14, 0xf0,\n"," 0x03, 0x05, 0xf3, 0xed, 0x13, 0x0a, 0x07, 0x03, 0xfb, 0xff, 0xf6, 0xe7,\n"," 0x08, 0xf4, 0xf5, 0x08, 0x05, 0xfe, 0xe8, 0x1e, 0x05, 0x08, 0xe2, 0xfd,\n"," 0x25, 0x14, 0x1c, 0x15, 0x0b, 0x1b, 0x0e, 0x01, 0xf1, 0xe1, 0x19, 0x15,\n"," 0xec, 0x4b, 0x26, 0xfe, 0xd9, 0xe4, 0x09, 0xd5, 0xf9, 0x40, 0xfe, 0xff,\n"," 0xf0, 0x07, 0x10, 0xfa, 0x0a, 0x29, 0x08, 0x01, 0xdd, 0xf1, 0xe8, 0x12,\n"," 0xf3, 0x13, 0xf8, 0x02, 0xe2, 0xe2, 0x28, 0x11, 0xee, 0xf6, 0xd4, 0x0c,\n"," 0xee, 0x19, 0xff, 0x10, 0xf2, 0x25, 0xff, 0xfd, 0x0d, 0x19, 0x06, 0x53,\n"," 0x06, 0x06, 0xf9, 0x1a, 0x0a, 0x45, 0xe5, 0x43, 0x0a, 0x05, 0xfd, 0x0e,\n"," 0x00, 0x17, 0x02, 0x14, 0xf4, 0x12, 0x08, 0xd7, 0x14, 0x0d, 0xf3, 0x1a,\n"," 0x0b, 0xfe, 0x21, 0x1b, 0x2f, 0x1e, 0xf1, 0xf4, 0xfb, 0x21, 0x08, 0x00,\n"," 0xf3, 0xfe, 0x02, 0x0e, 0x00, 0xfb, 0x13, 0xfc, 0xee, 0xf0, 0x04, 0x04,\n"," 0x04, 0x10, 0x02, 0xfd, 0xdc, 0xfc, 0x0c, 0xe8, 0xfa, 0x36, 0xf4, 0x0a,\n"," 0xf1, 0xfa, 0xf8, 0xea, 0x00, 0x23, 0x0c, 0x18, 0x00, 0xfb, 0x12, 0xf1,\n"," 0xff, 0x29, 0x05, 0x05, 0x05, 0x0a, 0xf6, 0xec, 0x0c, 0x12, 0x05, 0x16,\n"," 0xff, 0x11, 0x16, 0xed, 0x01, 0x0c, 0x16, 0x1a, 0xf6, 0x06, 0x09, 0x00,\n"," 0x0b, 0x21, 0xf3, 0x0b, 0xdd, 0x1a, 0x12, 0x14, 0x06, 0x2f, 0x11, 0xdc,\n"," 0xff, 0xdf, 0x1d, 0x17, 0xe4, 0x3f, 0x0c, 0xf5, 0xe6, 0xdb, 0xfa, 0xdf,\n"," 0x0c, 0x33, 0x27, 0xe8, 0xe8, 0xf1, 0x0f, 0x06, 0x05, 0x18, 0x11, 0xfb,\n"," 0xe2, 0xf9, 0xd9, 0xef, 0xf7, 0x0c, 0xf3, 0xfe, 0xce, 0xec, 0xfd, 0x04,\n"," 0xfe, 0xf5, 0xf6, 0x19, 0xe0, 0x0f, 0x0a, 0x0a, 0xed, 0x0c, 0xf9, 0x26,\n"," 0x08, 0x30, 0x00, 0x2a, 0xe7, 0xfd, 0xf4, 0xfc, 0x0c, 0x1c, 0xe9, 0x5e,\n"," 0x1c, 0x0b, 0x07, 0xf0, 0x10, 0x23, 0xea, 0x17, 0xfc, 0x01, 0x0d, 0xfb,\n"," 0x2f, 0x2f, 0xe7, 0xfb, 0x04, 0x1b, 0x0f, 0xd9, 0x14, 0x21, 0xf0, 0x0e,\n"," 0xe9, 0x1c, 0x0d, 0xdd, 0xf2, 0x0c, 0xe7, 0x09, 0x01, 0x12, 0x0b, 0xe5,\n"," 0xe4, 0xe7, 0x05, 0xdb, 0x10, 0x25, 0xf1, 0xfa, 0xfa, 0xeb, 0x18, 0x0f,\n"," 0xf7, 0x3d, 0x22, 0xf3, 0xed, 0xfa, 0x01, 0x0d, 0x1b, 0x28, 0x00, 0xe8,\n"," 0xfc, 0x0d, 0xf3, 0x00, 0x00, 0x16, 0x19, 0x05, 0x0b, 0x07, 0xfb, 0xfe,\n"," 0x18, 0x15, 0x24, 0xeb, 0xf2, 0x16, 0x0a, 0xe6, 0x13, 0x03, 0xf5, 0xff,\n"," 0x04, 0x00, 0xfe, 0x0e, 0x03, 0xf3, 0x0e, 0x0d, 0x04, 0x27, 0xeb, 0x0d,\n"," 0x09, 0x23, 0x15, 0xf3, 0xdb, 0xf0, 0xf6, 0x14, 0x06, 0x1f, 0x19, 0xfa,\n"," 0xe0, 0xe9, 0xfc, 0xd8, 0x09, 0x54, 0x21, 0xfb, 0xd8, 0x0d, 0xd8, 0x07,\n"," 0x12, 0x1f, 0x04, 0x14, 0xdd, 0x03, 0x14, 0xf1, 0xdf, 0xfa, 0x01, 0x0e,\n"," 0xe4, 0xfa, 0x0c, 0x20, 0xe6, 0x06, 0xf4, 0xfc, 0xf2, 0xf9, 0x01, 0x20,\n"," 0xed, 0x18, 0xf3, 0x07, 0xfe, 0x3c, 0xfa, 0x3f, 0xfc, 0x14, 0x0b, 0xfe,\n"," 0x0d, 0x29, 0x15, 0x30, 0x04, 0x01, 0xe6, 0x0f, 0x19, 0x23, 0xff, 0x22,\n"," 0x0d, 0xfc, 0xfe, 0x13, 0x1a, 0x26, 0xd8, 0x1a, 0x13, 0x03, 0xfd, 0xfc,\n"," 0x0c, 0x12, 0xed, 0x08, 0x18, 0x11, 0x1b, 0xfe, 0x19, 0xfa, 0xf4, 0x07,\n"," 0xf6, 0x11, 0x09, 0xf5, 0x02, 0xdb, 0x21, 0xe0, 0x06, 0x13, 0xec, 0x06,\n"," 0xf3, 0xe5, 0xfb, 0xfb, 0xf9, 0x2d, 0xf7, 0xe4, 0xfe, 0xfb, 0xdc, 0xf9,\n"," 0x03, 0x1d, 0x0c, 0xec, 0xf1, 0x14, 0x03, 0x00, 0x04, 0x16, 0x1d, 0x00,\n"," 0x18, 0x01, 0x20, 0xf8, 0x0f, 0x26, 0x11, 0xdb, 0x06, 0x0a, 0xe9, 0xf1,\n"," 0x0b, 0xfe, 0x07, 0xf7, 0xff, 0xfe, 0x08, 0xff, 0x05, 0xf4, 0x13, 0x05,\n"," 0xe3, 0x02, 0x24, 0xfe, 0x0e, 0x0d, 0x01, 0xea, 0xec, 0xd9, 0x0e, 0x0e,\n"," 0xf2, 0x2f, 0x23, 0x11, 0xf3, 0xcd, 0x0c, 0xea, 0x1f, 0x49, 0x16, 0x04,\n"," 0xec, 0x0a, 0x18, 0xef, 0x20, 0x0e, 0x20, 0xda, 0xc3, 0xfd, 0x09, 0xeb,\n"," 0xe5, 0x20, 0xfb, 0x06, 0xe7, 0x04, 0xfc, 0x10, 0xfa, 0xf6, 0xfb, 0xee,\n"," 0xe6, 0x0b, 0xee, 0x13, 0xeb, 0x11, 0xea, 0xed, 0x20, 0x34, 0x0f, 0x2e,\n"," 0xf3, 0x1c, 0x00, 0xd4, 0x15, 0x3e, 0x12, 0x31, 0xf4, 0x06, 0xf9, 0xdd,\n"," 0x11, 0x1c, 0x23, 0x11, 0xf8, 0xfb, 0x11, 0xfb, 0x19, 0x10, 0xd8, 0x24,\n"," 0x10, 0x18, 0x0d, 0x27, 0x04, 0x0f, 0xdf, 0xf5, 0x08, 0x07, 0x12, 0xdb,\n"," 0x08, 0x01, 0x07, 0xfe, 0xf3, 0x00, 0x09, 0xf9, 0x01, 0xd3, 0x00, 0xf9,\n"," 0x05, 0x1d, 0xf9, 0xf2, 0xf4, 0xf9, 0x1a, 0xfd, 0xf2, 0x38, 0x01, 0x12,\n"," 0xef, 0xf6, 0x06, 0xfb, 0x0a, 0x1c, 0xf6, 0x10, 0x06, 0x05, 0xf2, 0x03,\n"," 0xf9, 0x07, 0x07, 0xf8, 0x0f, 0xff, 0xf3, 0xff, 0x17, 0x18, 0x08, 0x0d,\n"," 0xf2, 0xff, 0xf1, 0x03, 0x2e, 0xfb, 0xff, 0xd5, 0xf0, 0x05, 0x01, 0x0d,\n"," 0xf5, 0xf0, 0xeb, 0x05, 0x0c, 0x0d, 0xff, 0x13, 0x0c, 0x13, 0x24, 0xf1,\n"," 0xf9, 0xf1, 0x07, 0x06, 0xe9, 0x45, 0x2c, 0x0e, 0xdc, 0xe5, 0x1c, 0xea,\n"," 0x0e, 0x4e, 0x32, 0x05, 0xed, 0xfb, 0xfa, 0xf6, 0x0d, 0x15, 0xfb, 0xfb,\n"," 0xe2, 0xf7, 0xea, 0xfb, 0xf1, 0x14, 0xef, 0x07, 0xf8, 0x08, 0x1d, 0x24,\n"," 0xed, 0x06, 0xe3, 0xed, 0xf1, 0x09, 0x1f, 0x0e, 0xef, 0x1b, 0xec, 0xfb,\n"," 0x10, 0x2c, 0x08, 0x3e, 0xef, 0x0d, 0x07, 0xf4, 0x0c, 0x35, 0x18, 0x30,\n"," 0xf5, 0xf7, 0xf4, 0xf8, 0x12, 0x29, 0x00, 0x0e, 0xfe, 0x00, 0x03, 0xe6,\n"," 0x13, 0x29, 0x1f, 0x22, 0x08, 0xfc, 0x0c, 0x06, 0x1f, 0x16, 0x0d, 0x0c,\n"," 0xff, 0xf6, 0xfd, 0x1a, 0xfc, 0x00, 0xef, 0xff, 0x09, 0x0f, 0x0c, 0x02,\n"," 0xfd, 0xe2, 0x0a, 0xf5, 0xfb, 0x1b, 0xf2, 0xdc, 0xff, 0xf7, 0x14, 0xf9,\n"," 0x17, 0x2a, 0x19, 0x1c, 0xfc, 0x0d, 0xf0, 0x02, 0x09, 0x22, 0x13, 0x05,\n"," 0x0c, 0x02, 0xff, 0x0c, 0x04, 0x08, 0x0d, 0xd3, 0x0b, 0x04, 0x12, 0xe4,\n"," 0x0a, 0x16, 0x00, 0xf1, 0x10, 0x13, 0x07, 0xe6, 0x2a, 0xf1, 0xf3, 0xdf,\n"," 0x08, 0x11, 0x0b, 0x07, 0x08, 0x0b, 0xe9, 0xef, 0xed, 0x0d, 0x06, 0x1e,\n"," 0x06, 0x1d, 0x04, 0xf9, 0xfe, 0xde, 0xf6, 0x1b, 0xea, 0x4d, 0x12, 0xfd,\n"," 0xe1, 0xec, 0x1e, 0xeb, 0xfc, 0x2f, 0x0b, 0x01, 0xdc, 0x03, 0xf7, 0xef,\n"," 0x08, 0x07, 0x16, 0x04, 0xd2, 0x07, 0x08, 0xf0, 0xe7, 0x13, 0xfd, 0x04,\n"," 0xdf, 0xf3, 0xfb, 0x25, 0xef, 0x06, 0x00, 0x07, 0xf1, 0x0d, 0x05, 0x00,\n"," 0x01, 0x1a, 0xf9, 0xf1, 0x09, 0x42, 0x19, 0x2b, 0x0b, 0x12, 0xfc, 0x16,\n"," 0x15, 0x2b, 0x19, 0x27, 0xfa, 0xfb, 0x04, 0xec, 0x15, 0x0e, 0x26, 0x26,\n"," 0x11, 0xef, 0xf9, 0xeb, 0x29, 0x23, 0xf9, 0x05, 0xf6, 0x01, 0x17, 0x14,\n"," 0x08, 0x14, 0x0a, 0x03, 0x05, 0x05, 0x10, 0x02, 0x0f, 0x0a, 0x0e, 0x0a,\n"," 0x00, 0xff, 0x02, 0x03, 0xf0, 0xec, 0xe3, 0xf2, 0xf4, 0x16, 0x08, 0xf0,\n"," 0x07, 0xda, 0x20, 0x05, 0x17, 0x34, 0x0b, 0xda, 0x02, 0xeb, 0x05, 0x14,\n"," 0xfb, 0x19, 0x10, 0xe8, 0x08, 0xfa, 0xed, 0x07, 0xfe, 0x25, 0xf3, 0x1d,\n"," 0xfd, 0xf6, 0xfa, 0xe5, 0x10, 0x12, 0x28, 0x09, 0x01, 0xfc, 0x0e, 0xde,\n"," 0x2f, 0x05, 0x18, 0xfa, 0xf4, 0xf4, 0xed, 0x1d, 0x05, 0xfc, 0x01, 0xfd,\n"," 0x10, 0x0a, 0x1c, 0x09, 0x23, 0x21, 0x0c, 0x18, 0xfe, 0xdc, 0xf7, 0x1d,\n"," 0xea, 0x3c, 0x0d, 0x0c, 0x07, 0xe7, 0xe4, 0xe0, 0x03, 0x2c, 0xf7, 0xea,\n"," 0xd2, 0x01, 0xfd, 0xe7, 0x24, 0x19, 0x04, 0xf1, 0xce, 0x02, 0xda, 0xe9,\n"," 0xf7, 0x1d, 0xf2, 0x00, 0xd7, 0x15, 0x13, 0x15, 0xf1, 0x0b, 0xf6, 0xe8,\n"," 0xf3, 0x11, 0xe5, 0x12, 0xea, 0x1f, 0xee, 0x18, 0x0c, 0x39, 0x02, 0x1c,\n"," 0x03, 0x13, 0xf6, 0x1f, 0x0b, 0x39, 0xfb, 0x1d, 0x04, 0x03, 0xfb, 0xe9,\n"," 0x12, 0x29, 0xfd, 0xfc, 0x18, 0x13, 0xff, 0x13, 0x18, 0x30, 0x0b, 0x1f,\n"," 0xf9, 0x04, 0x02, 0xf8, 0x17, 0xfb, 0x26, 0xfe, 0x13, 0x02, 0x12, 0x1e,\n"," 0x03, 0xfa, 0xf2, 0x06, 0x04, 0xe9, 0xed, 0x22, 0xfa, 0xfa, 0xf9, 0x00,\n"," 0xfa, 0x18, 0x1b, 0xf9, 0x17, 0xf9, 0xed, 0x1c, 0xff, 0x22, 0x08, 0xfc,\n"," 0xfd, 0x0a, 0xfe, 0x0e, 0x00, 0x17, 0xf9, 0x0b, 0xfa, 0x18, 0x0f, 0xff,\n"," 0x01, 0x14, 0x27, 0xda, 0x10, 0x0a, 0xf0, 0xde, 0x10, 0x16, 0xfd, 0xef,\n"," 0xfb, 0x04, 0x04, 0xf9, 0x2d, 0xfb, 0x00, 0xfe, 0xf5, 0xff, 0xfe, 0xdf,\n"," 0x0a, 0x17, 0xfa, 0x04, 0xf6, 0x17, 0xf7, 0x11, 0xf4, 0x2a, 0xfa, 0x26,\n"," 0x09, 0xfa, 0xdf, 0x16, 0xea, 0x29, 0x03, 0xf4, 0xdc, 0xdc, 0xf2, 0xd7,\n"," 0x0a, 0x3e, 0x01, 0xfe, 0xf6, 0x04, 0x0a, 0x00, 0x0a, 0x38, 0xfb, 0xf7,\n"," 0xdc, 0x02, 0x0b, 0xff, 0xeb, 0x0e, 0xee, 0x00, 0xe8, 0x0c, 0x0c, 0x23,\n"," 0xf1, 0x00, 0xf0, 0x11, 0xec, 0x25, 0xf0, 0x0a, 0xfa, 0x23, 0xf7, 0xf2,\n"," 0x10, 0x1b, 0x09, 0x08, 0x04, 0x10, 0xed, 0x03, 0x19, 0x33, 0x16, 0x1c,\n"," 0xfb, 0x19, 0x08, 0x07, 0x07, 0x26, 0xfc, 0x11, 0x19, 0x05, 0xfb, 0xf6,\n"," 0x38, 0x0e, 0xed, 0x2a, 0x0a, 0x14, 0x13, 0xe5, 0x15, 0x01, 0x07, 0x08,\n"," 0xfc, 0x05, 0x1d, 0xf2, 0x08, 0x01, 0xde, 0xf3, 0x0d, 0xe8, 0x1b, 0xff,\n"," 0xf3, 0xf5, 0xfa, 0xea, 0x07, 0x16, 0x25, 0x01, 0x07, 0xfc, 0x09, 0x12,\n"," 0xf5, 0x12, 0xfc, 0x04, 0xf6, 0x0c, 0xef, 0xfd, 0x05, 0x2c, 0x04, 0x13,\n"," 0xf8, 0xfc, 0xec, 0x15, 0x04, 0xfb, 0x05, 0x14, 0x03, 0xea, 0x02, 0xfe,\n"," 0x0c, 0x14, 0x0d, 0x15, 0xfb, 0x07, 0xfd, 0xeb, 0x24, 0x06, 0xff, 0xeb,\n"," 0xf4, 0x07, 0xfb, 0x22, 0x07, 0xe8, 0xee, 0x0e, 0xfb, 0xfe, 0xde, 0x19,\n"," 0xf1, 0x38, 0xf6, 0x00, 0x0c, 0xf0, 0x00, 0x11, 0xfe, 0x36, 0xef, 0xd6,\n"," 0xe6, 0xed, 0x0f, 0xe7, 0xfe, 0x38, 0xf2, 0x21, 0xe0, 0x02, 0xe4, 0x06,\n"," 0x03, 0x1f, 0x08, 0xd9, 0xfd, 0xf6, 0x13, 0xfc, 0xe1, 0x0b, 0xfd, 0xfc,\n"," 0xf2, 0xff, 0x29, 0x1a, 0xef, 0x04, 0xfb, 0xf0, 0xe2, 0x1d, 0x0a, 0x16,\n"," 0xe3, 0x26, 0xdd, 0x0c, 0xf6, 0x23, 0xf0, 0x13, 0xfc, 0x0b, 0x10, 0x0e,\n"," 0x0f, 0x35, 0xfb, 0x16, 0x14, 0x20, 0x04, 0x01, 0x0a, 0x0e, 0xf1, 0x0f,\n"," 0x08, 0xf8, 0xf9, 0xf8, 0x1c, 0x32, 0x1a, 0x14, 0x05, 0x0c, 0x10, 0xda,\n"," 0x24, 0x25, 0x13, 0x1f, 0x14, 0xfb, 0x06, 0xdf, 0x01, 0x0c, 0xfa, 0x03,\n"," 0x00, 0xe5, 0x15, 0xf4, 0xf3, 0xdf, 0x0d, 0xee, 0xfe, 0x0e, 0x0e, 0x27,\n"," 0x11, 0xe9, 0x0c, 0x11, 0xed, 0x2b, 0x03, 0x16, 0x09, 0xec, 0x06, 0xe7,\n"," 0xf2, 0x33, 0x04, 0x09, 0xf8, 0x0f, 0x0e, 0xf2, 0xfd, 0xfa, 0x04, 0xf4,\n"," 0x10, 0x0a, 0x0b, 0xfc, 0x02, 0xfc, 0xfc, 0xf8, 0x1e, 0x04, 0xe8, 0xdb,\n"," 0x10, 0xf8, 0x0a, 0x0f, 0xfb, 0xf8, 0x1f, 0xee, 0x0e, 0xde, 0xed, 0xd8,\n"," 0x09, 0x0a, 0x1c, 0x2c, 0x06, 0x35, 0x01, 0x0a, 0x0f, 0xf5, 0xf7, 0x0f,\n"," 0xf7, 0x30, 0x15, 0x08, 0xcf, 0xf9, 0x20, 0xd9, 0x05, 0x35, 0xe3, 0x04,\n"," 0xe1, 0x12, 0xf0, 0x04, 0xfa, 0x1d, 0xfd, 0xff, 0x04, 0xfb, 0x28, 0xfd,\n"," 0xed, 0x2f, 0xf5, 0x04, 0xe6, 0x04, 0xe6, 0x16, 0xe5, 0xe8, 0xf2, 0xeb,\n"," 0xf1, 0x02, 0x07, 0x08, 0xe9, 0x22, 0xed, 0xf6, 0x01, 0x3c, 0xf4, 0x17,\n"," 0xe5, 0x0e, 0x07, 0x06, 0x00, 0x34, 0xfc, 0x1a, 0x1a, 0x17, 0x0c, 0xfe,\n"," 0xf7, 0x1d, 0xe9, 0x30, 0x11, 0xf8, 0x19, 0x05, 0x1d, 0x1b, 0xda, 0xfe,\n"," 0x07, 0xf1, 0x08, 0xf2, 0x2b, 0xff, 0xef, 0x01, 0xf9, 0x04, 0x05, 0xfb,\n"," 0xf8, 0x06, 0x0d, 0x04, 0x14, 0xfb, 0x14, 0x06, 0xf2, 0xe3, 0xfe, 0x07,\n"," 0xf4, 0x0c, 0xfd, 0x1d, 0x18, 0xeb, 0x05, 0xee, 0x12, 0x24, 0x00, 0x0b,\n"," 0xff, 0xf9, 0x01, 0xec, 0xfa, 0x1e, 0x1b, 0xfe, 0x01, 0x07, 0x26, 0x06,\n"," 0x02, 0x0c, 0xf7, 0x03, 0x1c, 0xf2, 0x14, 0xdc, 0x09, 0x1f, 0xf4, 0x14,\n"," 0x0e, 0x0c, 0xf8, 0xec, 0x1c, 0x0f, 0xf8, 0xf8, 0x0a, 0xf7, 0x1b, 0xfb,\n"," 0xfe, 0x1b, 0xfa, 0xee, 0x05, 0x06, 0xef, 0x20, 0xe5, 0x4e, 0xef, 0xea,\n"," 0xf5, 0xe7, 0x06, 0x17, 0xd8, 0x1e, 0x12, 0xfa, 0xed, 0xf5, 0x01, 0xf2,\n"," 0xfe, 0x2a, 0x07, 0xfd, 0xdd, 0x01, 0xfa, 0x02, 0x12, 0x2f, 0xf5, 0x0e,\n"," 0xf2, 0xff, 0x03, 0xfc, 0xe7, 0x23, 0xd8, 0x08, 0xef, 0x00, 0xef, 0x0c,\n"," 0xe4, 0xe7, 0xf6, 0xfc, 0xcb, 0x18, 0x0d, 0x0d, 0xe9, 0x12, 0x0c, 0x00,\n"," 0xf8, 0x23, 0xea, 0x28, 0xeb, 0x26, 0xfa, 0xe5, 0x1a, 0x32, 0x1a, 0x1b,\n"," 0x15, 0x16, 0xf1, 0x07, 0xf5, 0x2b, 0x01, 0x11, 0x12, 0x0f, 0x09, 0x00,\n"," 0x23, 0x23, 0xf4, 0xfb, 0x08, 0x0b, 0x10, 0x1f, 0x21, 0x0e, 0x0a, 0x08,\n"," 0x0a, 0xff, 0x01, 0x17, 0xf2, 0xe3, 0xdc, 0x12, 0x0f, 0x05, 0x16, 0xec,\n"," 0xf3, 0xef, 0xeb, 0xeb, 0xfb, 0x12, 0x11, 0xf8, 0x17, 0xe7, 0x0c, 0xf6,\n"," 0x08, 0x0e, 0x15, 0xe4, 0x0a, 0x00, 0xd8, 0xf8, 0xf6, 0x00, 0x08, 0x22,\n"," 0xfd, 0xfa, 0x0c, 0xfe, 0x08, 0x14, 0xfc, 0x04, 0x06, 0xfa, 0x15, 0xf5,\n"," 0x0f, 0xf6, 0xf0, 0x03, 0x03, 0xf2, 0x0e, 0xf1, 0x27, 0xf0, 0xf0, 0xf4,\n"," 0xf9, 0x07, 0xf0, 0x07, 0x02, 0x1b, 0xfc, 0x00, 0xf2, 0xfc, 0x13, 0x06,\n"," 0xef, 0x2a, 0xf2, 0xeb, 0xf9, 0xe5, 0xfa, 0x0c, 0xe6, 0x20, 0xf4, 0x03,\n"," 0xe2, 0x12, 0xfb, 0x03, 0x0b, 0x39, 0xed, 0x0f, 0xf4, 0x10, 0x04, 0xf8,\n"," 0x04, 0x27, 0xee, 0x05, 0xfe, 0x00, 0x07, 0xf3, 0xd6, 0x0e, 0xe8, 0x10,\n"," 0xd9, 0xf6, 0x04, 0x10, 0xd3, 0xf3, 0xef, 0x01, 0xf8, 0x18, 0x00, 0x1b,\n"," 0xec, 0x28, 0xe1, 0x18, 0xee, 0x38, 0x13, 0x1b, 0x0d, 0x1b, 0xf8, 0x05,\n"," 0xec, 0x20, 0xe6, 0x10, 0x0a, 0x14, 0x0a, 0xe4, 0x0a, 0x1b, 0x05, 0xef,\n"," 0x1b, 0xed, 0xf7, 0x07, 0x13, 0x0d, 0x1a, 0x06, 0x05, 0x09, 0x05, 0xff,\n"," 0x32, 0x04, 0xee, 0x10, 0x1d, 0xf5, 0x0e, 0xe8, 0x0e, 0x07, 0x1e, 0xf4,\n"," 0xff, 0x00, 0xfa, 0x0d, 0xf6, 0xdd, 0xf8, 0xfd, 0xed, 0x1a, 0x1c, 0xe0,\n"," 0x0d, 0xf7, 0xfe, 0xe6, 0xf9, 0x1a, 0x25, 0x0a, 0x11, 0x04, 0xf7, 0xe8,\n"," 0x0a, 0x09, 0x09, 0x0b, 0x03, 0x03, 0x1e, 0xe8, 0x07, 0x1a, 0xdf, 0x15,\n"," 0x03, 0xe5, 0xdb, 0xe9, 0x1c, 0xf4, 0x0e, 0xf5, 0xfb, 0x08, 0x03, 0xfe,\n"," 0x15, 0xfa, 0xfc, 0x22, 0xef, 0x01, 0xf8, 0x0a, 0xf2, 0x04, 0x0a, 0x15,\n"," 0xdf, 0x09, 0x0b, 0x18, 0xef, 0x28, 0x05, 0x05, 0xe1, 0xf2, 0x16, 0x1a,\n"," 0xd9, 0x1a, 0xfc, 0x01, 0xe2, 0x11, 0x25, 0xdc, 0xe9, 0x13, 0xf6, 0xf9,\n"," 0xd5, 0xfd, 0xfd, 0xf9, 0xd2, 0x14, 0xf3, 0x0c, 0xd5, 0x08, 0x15, 0xff,\n"," 0xe1, 0xfa, 0xe0, 0xf9, 0xfe, 0x10, 0x00, 0x15, 0xd5, 0xf0, 0x05, 0x1d,\n"," 0xf1, 0x28, 0x04, 0xf7, 0xea, 0x20, 0xf8, 0x0e, 0x08, 0x29, 0x10, 0x0a,\n"," 0xf1, 0x21, 0x0d, 0x16, 0xfc, 0x24, 0xf0, 0x1e, 0x14, 0x09, 0xec, 0x07,\n"," 0x1a, 0x1f, 0x09, 0xf0, 0x19, 0xe5, 0x19, 0xf4, 0x04, 0x0d, 0x01, 0x00,\n"," 0xf4, 0xf8, 0x07, 0xfd, 0x18, 0x10, 0x1d, 0x01, 0x0a, 0xfc, 0x18, 0x1b,\n"," 0xf5, 0x15, 0xfa, 0xf9, 0x09, 0xe7, 0x0d, 0x0d, 0xff, 0xfe, 0xf0, 0xf2,\n"," 0xf9, 0x10, 0x0f, 0x09, 0xfa, 0xdb, 0xf2, 0xe4, 0xfa, 0x14, 0x08, 0x0b,\n"," 0x04, 0xfb, 0x12, 0xe8, 0xf7, 0x02, 0x05, 0x0d, 0xfe, 0x0d, 0xe4, 0xf9,\n"," 0xef, 0x20, 0x0d, 0xe6, 0xff, 0x00, 0xea, 0xf0, 0x09, 0x07, 0x02, 0x0a,\n"," 0x14, 0xec, 0x1e, 0xe4, 0x20, 0xf8, 0xf5, 0x05, 0x02, 0x09, 0x0a, 0x10,\n"," 0xf8, 0xff, 0x01, 0xf3, 0x16, 0x10, 0x00, 0x02, 0xda, 0x24, 0xd2, 0x00,\n"," 0x0c, 0xff, 0xdd, 0x13, 0xdc, 0x2b, 0xd6, 0xf5, 0xfd, 0x1d, 0xf7, 0x0d,\n"," 0xd9, 0x18, 0xee, 0x0e, 0xef, 0x0f, 0xf2, 0xf8, 0xd5, 0x11, 0xdd, 0x0f,\n"," 0xf2, 0xfb, 0xfc, 0x07, 0xda, 0x01, 0xe1, 0xf2, 0xf4, 0xf8, 0xf6, 0xfa,\n"," 0xd4, 0xfa, 0xee, 0xdd, 0xce, 0x06, 0xf7, 0x0c, 0xf5, 0x1a, 0xe8, 0x0a,\n"," 0xf5, 0x1e, 0xf4, 0x12, 0xf1, 0x23, 0x02, 0x09, 0xfa, 0x00, 0xea, 0x21,\n"," 0xed, 0x04, 0xf7, 0x05, 0xf9, 0x1a, 0xf7, 0x09, 0x0c, 0xf8, 0x08, 0x20,\n"," 0x12, 0x21, 0xdd, 0x08, 0x04, 0xfd, 0x17, 0x08, 0x06, 0x17, 0xec, 0x12,\n"," 0x05, 0xfb, 0x07, 0x14, 0x01, 0x1c, 0x13, 0xf4, 0x1e, 0xea, 0x09, 0xf0,\n"," 0x1a, 0x04, 0xfb, 0xfe, 0xf2, 0x1e, 0x1a, 0xfb, 0x20, 0xef, 0x10, 0xff,\n"," 0x04, 0x19, 0x09, 0x07, 0x15, 0xee, 0xda, 0xc8, 0x01, 0x22, 0xf5, 0xe3,\n"," 0x05, 0xfb, 0x18, 0xf2, 0xf1, 0x07, 0xfb, 0x05, 0xf8, 0xef, 0xf9, 0xf4,\n"," 0xfa, 0xfa, 0xf0, 0xfa, 0x00, 0xe5, 0xf1, 0x0a, 0xfc, 0xf5, 0xe9, 0x25,\n"," 0xff, 0xed, 0xf8, 0xfd, 0xec, 0xf0, 0xfc, 0x00, 0xf3, 0x0b, 0x0a, 0x1c,\n"," 0xdb, 0x03, 0x0b, 0x0b, 0xfa, 0xfc, 0xda, 0xfc, 0xef, 0x2d, 0xff, 0xf5,\n"," 0x0b, 0x04, 0xfa, 0xe2, 0xd8, 0x03, 0x0f, 0x1c, 0xf8, 0x0c, 0x1c, 0x1e,\n"," 0xe3, 0x12, 0x18, 0xeb, 0xe7, 0x18, 0xfb, 0xdd, 0xcc, 0xf2, 0xf2, 0xe3,\n"," 0xfe, 0x11, 0xed, 0x15, 0xf6, 0xff, 0xdf, 0xf9, 0xfb, 0x04, 0x03, 0x23,\n"," 0xc6, 0x1b, 0x2e, 0xfa, 0x0a, 0x2d, 0x02, 0x08, 0xf2, 0x14, 0xeb, 0xe5,\n"," 0xfb, 0xf7, 0x03, 0x15, 0x12, 0x06, 0x1d, 0x05, 0x07, 0x10, 0x0d, 0x0c,\n"," 0x13, 0x0c, 0xf5, 0xf6, 0x14, 0x05, 0xee, 0xfa, 0xf6, 0x06, 0x0c, 0xe3,\n"," 0x06, 0xf9, 0xea, 0xf6, 0x23, 0xea, 0x0e, 0xfb, 0xf6, 0x10, 0x17, 0x11,\n"," 0x08, 0xfb, 0x25, 0xef, 0x07, 0x1e, 0xf4, 0xeb, 0xed, 0x27, 0xef, 0xf7,\n"," 0xfa, 0xe5, 0xee, 0xe9, 0x06, 0x18, 0xed, 0xea, 0x09, 0xec, 0x0e, 0x01,\n"," 0xfc, 0xe9, 0xff, 0xec, 0xee, 0xe0, 0x1f, 0xe7, 0xfb, 0x12, 0x20, 0x07,\n"," 0x16, 0xea, 0x08, 0xd3, 0x1d, 0xe8, 0xea, 0xff, 0xf1, 0xf2, 0x0f, 0xd1,\n"," 0x24, 0xfb, 0xe3, 0x1b, 0xff, 0xe4, 0xef, 0xf3, 0xfd, 0xf8, 0xfd, 0xe0,\n"," 0xf1, 0x01, 0x11, 0x0d, 0x0f, 0xf5, 0x05, 0xe3, 0xf8, 0xe3, 0xe5, 0xfd,\n"," 0x01, 0x03, 0xfc, 0x04, 0x0f, 0x08, 0xfe, 0xf8, 0xeb, 0x1b, 0x0e, 0xdc,\n"," 0x19, 0xf9, 0x11, 0xe6, 0x0e, 0xfd, 0x0d, 0x14, 0xfa, 0x06, 0x20, 0x01,\n"," 0x00, 0x0e, 0x0f, 0xe5, 0x24, 0xe0, 0x0e, 0xf3, 0xfa, 0xfe, 0xf6, 0xfb,\n"," 0x05, 0x18, 0xfe, 0xf7, 0xe9, 0x1b, 0x07, 0xe0, 0xf9, 0xf2, 0x0f, 0xc7,\n"," 0xf8, 0x08, 0x01, 0x16, 0xfa, 0xfe, 0x0a, 0xed, 0xfd, 0xfd, 0xf9, 0x14,\n"," 0xf7, 0xe0, 0x0c, 0xf9, 0xf9, 0x0d, 0x0e, 0x06, 0x03, 0x1f, 0x17, 0x00,\n"," 0xfa, 0x00, 0xfb, 0x15, 0x12, 0xe4, 0xde, 0xf7, 0x03, 0xec, 0x14, 0x22,\n"," 0x02, 0xfe, 0x0a, 0xfe, 0x06, 0xf8, 0xfe, 0xed, 0x0a, 0xed, 0x0b, 0x08,\n"," 0xe4, 0xec, 0xf8, 0xe5, 0x06, 0x07, 0xe8, 0xf4, 0xfa, 0xe0, 0xfa, 0x0f,\n"," 0x05, 0x13, 0x0a, 0xf3, 0xf2, 0x09, 0x19, 0x0c, 0xfa, 0xf1, 0xfc, 0xec,\n"," 0xe2, 0x01, 0xfa, 0xfb, 0x0b, 0xfc, 0x09, 0x06, 0x0d, 0x11, 0x04, 0xf2,\n"," 0x0a, 0xf7, 0x28, 0x0f, 0xf4, 0x17, 0xda, 0xff, 0xfb, 0xe9, 0x26, 0x02,\n"," 0xf5, 0x17, 0x00, 0xe2, 0xfe, 0x10, 0xe8, 0x05, 0x21, 0xed, 0x0b, 0x1e,\n"," 0x01, 0x00, 0xf5, 0xff, 0x29, 0xe8, 0x11, 0xf4, 0x20, 0xf9, 0x16, 0xec,\n"," 0x0c, 0xd9, 0x23, 0xf0, 0x0f, 0x27, 0xdd, 0xe9, 0x05, 0xd9, 0x0d, 0x20,\n"," 0x0e, 0x34, 0xfe, 0xda, 0x27, 0xf9, 0x1a, 0x26, 0x17, 0x0b, 0xd8, 0xd7,\n"," 0xfe, 0x0a, 0x0c, 0xe3, 0xf8, 0x0c, 0xdd, 0xf9, 0x12, 0xef, 0xff, 0x1b,\n"," 0x01, 0xf7, 0xe9, 0xf6, 0x05, 0xf1, 0x1d, 0xdf, 0xf3, 0x05, 0xf2, 0xf2,\n"," 0x00, 0x05, 0x00, 0xf8, 0x0f, 0xdf, 0x00, 0x03, 0x00, 0x13, 0x0b, 0xf0,\n"," 0x0e, 0xec, 0x13, 0x11, 0x17, 0xf1, 0x07, 0xe7, 0x1d, 0xe1, 0xe7, 0xe9,\n"," 0x07, 0x00, 0x1e, 0x21, 0x0b, 0xe8, 0xeb, 0xf5, 0x14, 0xf0, 0x10, 0xe9,\n"," 0xf5, 0xf1, 0xf2, 0xcd, 0xf3, 0x0e, 0x10, 0xff, 0x11, 0xf0, 0x03, 0xf8,\n"," 0x1c, 0x0f, 0x13, 0xeb, 0xf1, 0xe3, 0x0e, 0xfc, 0x02, 0x0b, 0xf3, 0xf2,\n"," 0xfb, 0x01, 0x01, 0xfc, 0xee, 0xfb, 0xeb, 0xf9, 0xf4, 0x0d, 0x03, 0xfe,\n"," 0xda, 0x13, 0x15, 0xfa, 0x14, 0xf1, 0xd9, 0x09, 0xf5, 0x07, 0x04, 0x03,\n"," 0xfe, 0x15, 0xfe, 0x0b, 0x0e, 0xf9, 0x20, 0xeb, 0x11, 0xef, 0xf8, 0xf7,\n"," 0x21, 0xeb, 0x0e, 0x0f, 0x0d, 0x1e, 0x13, 0x02, 0xec, 0xef, 0xf5, 0xf4,\n"," 0xf9, 0x1a, 0xf3, 0xf8, 0x01, 0xfb, 0xfd, 0xe1, 0xfb, 0x22, 0x09, 0xee,\n"," 0xf0, 0xf0, 0x0c, 0xf9, 0x12, 0x3d, 0xfe, 0xef, 0xec, 0x02, 0x0e, 0x15,\n"," 0x0c, 0x18, 0x25, 0xf2, 0x0d, 0xf4, 0x13, 0xf6, 0x0e, 0x02, 0xf1, 0xf4,\n"," 0x0d, 0xff, 0x0f, 0xf0, 0x20, 0x00, 0xf4, 0xf2, 0x12, 0xfc, 0x2a, 0xda,\n"," 0xe8, 0xe6, 0xfa, 0xfc, 0xf7, 0x14, 0x07, 0xf2, 0x11, 0xe9, 0x00, 0x00,\n"," 0x02, 0x02, 0x06, 0x10, 0x05, 0xf6, 0x17, 0xf7, 0xf6, 0x0a, 0x06, 0xea,\n"," 0xfc, 0xf6, 0xe9, 0x01, 0x05, 0xfd, 0x01, 0x12, 0x12, 0xdd, 0xf4, 0xfb,\n"," 0x02, 0x10, 0xf6, 0x03, 0x06, 0xf5, 0x24, 0xed, 0x16, 0xf0, 0xfe, 0x1e,\n"," 0xff, 0xf8, 0x20, 0xeb, 0xf7, 0xf4, 0x01, 0x09, 0x1f, 0x03, 0x0e, 0x07,\n"," 0xe8, 0xe1, 0x04, 0xf1, 0x0e, 0xec, 0x1c, 0x07, 0x0a, 0xf3, 0xf3, 0xed,\n"," 0xfe, 0xf3, 0x0d, 0xf9, 0xeb, 0xf9, 0xff, 0xf5, 0x1f, 0xf9, 0x14, 0xff,\n"," 0xee, 0x09, 0xda, 0xfc, 0x09, 0xe2, 0x24, 0xfa, 0x00, 0x15, 0x0b, 0xf7,\n"," 0x00, 0xf8, 0x1b, 0x08, 0x09, 0xea, 0x13, 0x11, 0xf5, 0x0c, 0x11, 0x10,\n"," 0xfa, 0xf6, 0x11, 0xf5, 0xed, 0x05, 0x17, 0xff, 0xfd, 0x0f, 0x19, 0xe7,\n"," 0xf3, 0x39, 0xf7, 0x04, 0x06, 0xf6, 0x01, 0x07, 0xf5, 0x38, 0x03, 0xfe,\n"," 0xf8, 0xfe, 0xea, 0xfe, 0xfb, 0x43, 0x0f, 0x04, 0x21, 0xe5, 0x14, 0x1a,\n"," 0xdb, 0x20, 0x24, 0xfa, 0x23, 0xdb, 0x24, 0xf8, 0xfa, 0x0f, 0x02, 0x00,\n"," 0xf0, 0xd6, 0x07, 0x0e, 0xe6, 0xeb, 0x01, 0x14, 0x08, 0x12, 0x0e, 0x16,\n"," 0xf4, 0xe5, 0x02, 0xed, 0x0c, 0x07, 0xe0, 0x13, 0xf7, 0xf6, 0xea, 0xf2,\n"," 0xf2, 0x08, 0xe6, 0xe6, 0xfc, 0xe9, 0x0d, 0x0b, 0xf3, 0xff, 0xf7, 0xe0,\n"," 0x1c, 0xed, 0x0b, 0xfe, 0x0a, 0x0f, 0xfb, 0x10, 0xf5, 0xf2, 0x1e, 0x1d,\n"," 0xdf, 0x0a, 0x25, 0x0e, 0x09, 0x11, 0xf4, 0xf4, 0xfc, 0xf1, 0x0a, 0x0b,\n"," 0x0f, 0x1b, 0x14, 0x09, 0x0f, 0xf7, 0xf9, 0x04, 0x06, 0x19, 0x0b, 0x0b,\n"," 0xea, 0xdc, 0xf9, 0x03, 0x05, 0xf6, 0x10, 0x20, 0xfc, 0xea, 0xe9, 0x0b,\n"," 0x0c, 0x1c, 0xe1, 0x11, 0x18, 0x0d, 0x09, 0x11, 0x0a, 0xf6, 0xf5, 0xf7,\n"," 0x07, 0x14, 0x1c, 0x09, 0xf3, 0x04, 0x0d, 0x02, 0xed, 0x25, 0x13, 0xf2,\n"," 0xe9, 0x07, 0xfe, 0xf3, 0xed, 0xf3, 0xdf, 0x00, 0xe9, 0x27, 0xf3, 0xf6,\n"," 0xe6, 0xd8, 0x11, 0xfc, 0x13, 0x50, 0xfd, 0x0d, 0xd8, 0xd8, 0x1e, 0x0f,\n"," 0xe4, 0x49, 0x01, 0x1c, 0xfb, 0xee, 0x20, 0x19, 0x23, 0x2a, 0xfe, 0x10,\n"," 0x02, 0xd0, 0x1d, 0x15, 0x1e, 0x3f, 0x07, 0x09, 0x02, 0xc5, 0x1c, 0x22,\n"," 0xe9, 0x0a, 0x07, 0xfe, 0x00, 0x00, 0x09, 0x04, 0x0f, 0x04, 0x09, 0x09,\n"," 0xe6, 0x14, 0x10, 0x0d, 0xe9, 0xea, 0xf6, 0x18, 0x07, 0x36, 0xe7, 0xf9,\n"," 0xe7, 0xde, 0xe4, 0xf6, 0x1d, 0x12, 0xfd, 0xee, 0xf1, 0xed, 0x05, 0x0d,\n"," 0x08, 0x10, 0xf9, 0xee, 0x09, 0xf9, 0xf9, 0x09, 0xf1, 0xff, 0xea, 0xf4,\n"," 0xeb, 0xfd, 0xf3, 0x1e, 0xff, 0xe6, 0xdb, 0x09, 0xf7, 0xf9, 0x1c, 0x15,\n"," 0x0c, 0xfb, 0xf2, 0xfc, 0x02, 0x19, 0xe5, 0x2a, 0xe6, 0xfc, 0xda, 0xec,\n"," 0xea, 0x05, 0xfd, 0x0a, 0xe2, 0xec, 0x01, 0xf4, 0x08, 0x27, 0xf8, 0x25,\n"," 0x0c, 0xf6, 0xf1, 0xec, 0xe5, 0xfb, 0x05, 0x0d, 0xed, 0xe3, 0xf0, 0xdc,\n"," 0x05, 0xe4, 0x09, 0xf6, 0xfe, 0xdf, 0xff, 0xf9, 0xe9, 0x05, 0x26, 0xfe,\n"," 0x03, 0x08, 0xfe, 0x07, 0xed, 0xfb, 0xef, 0xfa, 0xe0, 0xfd, 0xf0, 0x14,\n"," 0x0b, 0x3c, 0x06, 0x04, 0xfe, 0xe0, 0x04, 0xfa, 0xe2, 0x4a, 0x07, 0x0d,\n"," 0xee, 0xda, 0x03, 0xf3, 0xee, 0x40, 0x07, 0x0e, 0xfd, 0xbd, 0x11, 0xfe,\n"," 0x20, 0x41, 0xdd, 0x09, 0xf6, 0xd6, 0x20, 0xfe, 0xec, 0x20, 0xf1, 0xff,\n"," 0x06, 0xc9, 0x15, 0x02, 0xfb, 0x04, 0x1f, 0xe8, 0xe5, 0xe4, 0xfc, 0xe4,\n"," 0x06, 0xfe, 0x23, 0xf3, 0xfe, 0x1b, 0xe2, 0xf5, 0xfd, 0xce, 0x17, 0xea,\n"," 0xdc, 0x16, 0xf0, 0x0f, 0xd7, 0xd9, 0x11, 0xf9, 0xf9, 0x19, 0x15, 0x10,\n"," 0x04, 0xf3, 0xef, 0xff, 0xff, 0xff, 0xf0, 0xdf, 0xfb, 0xe8, 0x1d, 0xff,\n"," 0x1a, 0xf9, 0xed, 0xfb, 0xfe, 0x03, 0xd6, 0x13, 0x09, 0x04, 0x0a, 0x10,\n"," 0x1e, 0xfb, 0xfc, 0x17, 0x04, 0xf4, 0x00, 0x0b, 0xf9, 0x05, 0x14, 0x2b,\n"," 0xe7, 0xe2, 0x0c, 0x06, 0x10, 0x29, 0x16, 0x2b, 0xdb, 0xe1, 0xf5, 0x00,\n"," 0xf0, 0xf8, 0x13, 0x2b, 0x01, 0xea, 0xf5, 0xdf, 0xe9, 0xe5, 0x00, 0x16,\n"," 0xf4, 0xf8, 0xff, 0xfb, 0xfe, 0xd8, 0x29, 0x11, 0xf2, 0xfe, 0xdd, 0xf0,\n"," 0x1d, 0xfb, 0x1a, 0x09, 0xf9, 0x0e, 0x1d, 0xff, 0xda, 0x08, 0x0f, 0x06,\n"," 0xf3, 0xec, 0xe0, 0x1b, 0xef, 0x2a, 0xf9, 0x0e, 0xef, 0xb9, 0xf2, 0x08,\n"," 0xfe, 0x58, 0xfd, 0x14, 0xfd, 0xea, 0xea, 0x17, 0xf4, 0x30, 0x1e, 0x08,\n"," 0x0c, 0xe2, 0x23, 0xe7, 0x09, 0x38, 0x05, 0x18, 0x12, 0xd1, 0x1a, 0x24,\n"," 0x07, 0x2f, 0x16, 0xe1, 0x01, 0xc5, 0x1e, 0x00, 0x08, 0x13, 0xe1, 0xdd,\n"," 0xff, 0xe5, 0xea, 0xe0, 0xe4, 0xee, 0xe8, 0xeb, 0xe1, 0x1a, 0xfe, 0xf5,\n"," 0xef, 0xbe, 0xe9, 0xff, 0xff, 0x0f, 0xec, 0xfa, 0xcc, 0xd1, 0x09, 0xfc,\n"," 0x06, 0x23, 0x0e, 0xf7, 0xe8, 0xec, 0xdb, 0x07, 0x18, 0x09, 0xdc, 0xf6,\n"," 0xec, 0xe2, 0x02, 0x15, 0x13, 0x0a, 0xea, 0xee, 0x17, 0xf8, 0xf0, 0x31,\n"," 0x04, 0xf2, 0x0a, 0x07, 0xfe, 0x05, 0x0d, 0x2f, 0xfe, 0xe2, 0xf5, 0x1f,\n"," 0x13, 0x2b, 0xe4, 0x21, 0xe0, 0xdc, 0x04, 0x0e, 0x01, 0x14, 0x25, 0x47,\n"," 0xe6, 0xf6, 0xe5, 0x05, 0x07, 0x0c, 0x12, 0x23, 0xeb, 0xe1, 0xd5, 0x18,\n"," 0xe4, 0xe5, 0xde, 0x12, 0xf0, 0x01, 0x04, 0xf2, 0x07, 0xe1, 0xed, 0xeb,\n"," 0x03, 0xf2, 0x0d, 0x0c, 0xf3, 0xf1, 0xfb, 0x14, 0xfc, 0x26, 0xe6, 0xe9,\n"," 0xee, 0xf4, 0xfe, 0x0a, 0xdf, 0xe2, 0xfa, 0x0e, 0x0b, 0x27, 0xe9, 0xf9,\n"," 0xec, 0xc4, 0x16, 0xe3, 0x00, 0x50, 0x0b, 0x0f, 0x0d, 0xd2, 0x0d, 0x01,\n"," 0xf7, 0x35, 0xfe, 0x1f, 0x1f, 0xe1, 0x0c, 0x05, 0x19, 0x2f, 0xfd, 0xf4,\n"," 0x0d, 0xc3, 0x1d, 0xff, 0x11, 0x29, 0x04, 0xfe, 0x01, 0xc8, 0x2c, 0x02,\n"," 0xf5, 0x26, 0xf6, 0xdd, 0xfe, 0xe3, 0x0b, 0x09, 0xe6, 0xe0, 0xfc, 0xdf,\n"," 0xfa, 0x17, 0xf0, 0xfa, 0xf0, 0xd7, 0x02, 0xe9, 0x00, 0x1d, 0xd8, 0x04,\n"," 0xcb, 0xd3, 0x1a, 0xe2, 0x12, 0x0d, 0xdc, 0xf6, 0x06, 0x08, 0x0d, 0xfb,\n"," 0x27, 0x06, 0xf5, 0xdc, 0xfc, 0xdc, 0xfb, 0x2a, 0x0f, 0x16, 0xf3, 0xd9,\n"," 0xfa, 0xf7, 0xf0, 0x2d, 0x0a, 0x0a, 0xf8, 0x01, 0xd7, 0x04, 0xfe, 0x3e,\n"," 0x07, 0xd4, 0xe6, 0xf3, 0x1d, 0x19, 0x07, 0x3c, 0x01, 0xca, 0xea, 0x06,\n"," 0x0a, 0x06, 0xe9, 0x20, 0xe7, 0xdf, 0xd6, 0x03, 0x06, 0x16, 0xf9, 0x42,\n"," 0xf0, 0xf0, 0xd8, 0xfc, 0xfe, 0xee, 0x18, 0x0f, 0xf3, 0xf9, 0xf7, 0x03,\n"," 0x08, 0xf0, 0xf8, 0xf6, 0x09, 0xd9, 0xfc, 0x06, 0xed, 0x1c, 0x0f, 0x07,\n"," 0x0b, 0x00, 0xfc, 0xfb, 0xc4, 0xfe, 0xf9, 0xfb, 0xe1, 0x02, 0x0d, 0x02,\n"," 0x02, 0x2e, 0xff, 0xed, 0xef, 0x9c, 0xfb, 0x09, 0x16, 0x4d, 0x27, 0x1d,\n"," 0x0e, 0xce, 0xf8, 0x1b, 0x16, 0x4e, 0x12, 0x24, 0x14, 0xeb, 0x04, 0x0b,\n"," 0xfd, 0x20, 0x28, 0xfb, 0x18, 0xb5, 0xff, 0xf2, 0x15, 0x15, 0xff, 0xf6,\n"," 0x1d, 0xb7, 0x34, 0xe3, 0x02, 0x13, 0xfb, 0x09, 0x09, 0xdc, 0xfa, 0xf1,\n"," 0x13, 0x06, 0x22, 0xe1, 0xf2, 0x15, 0xea, 0xdb, 0xee, 0xbf, 0x07, 0xe7,\n"," 0xfe, 0x2a, 0xe4, 0xe9, 0xe8, 0xcf, 0xfa, 0xf3, 0x23, 0x16, 0x0f, 0xf9,\n"," 0xed, 0xe9, 0x09, 0x01, 0x0c, 0x00, 0xef, 0xea, 0xe6, 0xd8, 0x13, 0x11,\n"," 0x0f, 0x2b, 0xdf, 0xd6, 0xfb, 0xf8, 0xf8, 0x30, 0x0a, 0x00, 0xf2, 0xd8,\n"," 0xf7, 0x16, 0xfd, 0x45, 0x07, 0xf9, 0xd5, 0x03, 0x0d, 0x17, 0x12, 0x47,\n"," 0xf8, 0xe0, 0xed, 0xfb, 0xf9, 0x06, 0xfb, 0x36, 0xfe, 0xe9, 0x01, 0xea,\n"," 0xef, 0x13, 0x0d, 0x40, 0xe1, 0xf5, 0xe6, 0xfe, 0xff, 0xf9, 0xe9, 0x06,\n"," 0xdd, 0xf2, 0xe1, 0x02, 0x01, 0xf0, 0xeb, 0x03, 0xf8, 0x09, 0x0a, 0x09,\n"," 0xf8, 0x01, 0x01, 0x00, 0xff, 0x09, 0xf4, 0x0e, 0xd1, 0x11, 0xe4, 0x0f,\n"," 0xe0, 0xe6, 0x05, 0x24, 0x00, 0x3e, 0xf7, 0xf4, 0xe7, 0xc9, 0xf8, 0x1c,\n"," 0x1a, 0x44, 0x05, 0x12, 0xfe, 0xc4, 0x26, 0x09, 0x16, 0x53, 0x11, 0x22,\n"," 0x16, 0xe5, 0x16, 0xf5, 0x24, 0x34, 0x08, 0x0b, 0x0a, 0xbf, 0xfb, 0xdf,\n"," 0x0f, 0x2a, 0x0b, 0xf6, 0x30, 0xa4, 0x3a, 0xf9, 0x08, 0x2f, 0x20, 0xe6,\n"," 0x01, 0xea, 0x10, 0x26, 0xf0, 0xf7, 0x05, 0xd1, 0xf2, 0x07, 0x03, 0x13,\n"," 0xeb, 0xcc, 0x07, 0xf0, 0xfe, 0x16, 0xf3, 0xfb, 0xd0, 0xd8, 0x10, 0xe4,\n"," 0x0a, 0x09, 0xdc, 0x0b, 0xe5, 0xdf, 0xed, 0x00, 0x13, 0x09, 0xf7, 0xf7,\n"," 0xf8, 0xf4, 0xf6, 0x1c, 0x28, 0xfc, 0xe5, 0xf2, 0xe6, 0x01, 0x0b, 0x41,\n"," 0x0e, 0xe8, 0xf5, 0xe0, 0x14, 0x02, 0x12, 0x38, 0x02, 0xd9, 0xd2, 0x0d,\n"," 0x04, 0x10, 0x06, 0x4f, 0x07, 0xdd, 0x03, 0xf0, 0xf9, 0x0e, 0xff, 0x3e,\n"," 0xef, 0xf4, 0xed, 0xed, 0x04, 0x11, 0xed, 0x4e, 0xdd, 0xeb, 0xe1, 0xfd,\n"," 0xff, 0xfe, 0xef, 0x0f, 0xba, 0xeb, 0xe7, 0xe3, 0x0b, 0xee, 0x0f, 0xd9,\n"," 0xf7, 0x0b, 0xff, 0xed, 0xff, 0x09, 0x03, 0xfd, 0xff, 0xf2, 0x09, 0xf0,\n"," 0xdd, 0x1d, 0xed, 0x07, 0xde, 0xe8, 0x05, 0xfd, 0x11, 0x3f, 0x04, 0xf5,\n"," 0xe4, 0x9e, 0x01, 0x26, 0x30, 0x5c, 0x08, 0x12, 0x11, 0xcc, 0x07, 0xf3,\n"," 0x24, 0x38, 0xf9, 0x1b, 0x1f, 0xd6, 0x16, 0x18, 0x1d, 0x28, 0x11, 0x05,\n"," 0x21, 0xa4, 0x09, 0xfa, 0x1d, 0x2f, 0xe5, 0xdd, 0x15, 0xa6, 0x24, 0xe2,\n"," 0xf6, 0x0c, 0xd6, 0xfb, 0x1c, 0xd9, 0x1c, 0x06, 0xf8, 0xf9, 0x1e, 0xdd,\n"," 0xf9, 0x12, 0xec, 0x10, 0xdb, 0xb6, 0x0b, 0xe4, 0xf1, 0x32, 0xd0, 0x18,\n"," 0xd2, 0xce, 0xf5, 0xf5, 0x26, 0x08, 0xf8, 0x0b, 0xfc, 0xfa, 0x11, 0xf1,\n"," 0x0b, 0x13, 0xfc, 0xfe, 0xf0, 0xf3, 0xfa, 0x16, 0x0e, 0x02, 0xd6, 0x17,\n"," 0xf3, 0x10, 0xf9, 0x47, 0x07, 0xfd, 0xe6, 0xf9, 0x00, 0x13, 0x04, 0x2b,\n"," 0xf6, 0xe5, 0xf8, 0xeb, 0x0e, 0xfd, 0x1a, 0x2e, 0xee, 0xf2, 0xf4, 0xf0,\n"," 0xfc, 0xf9, 0xeb, 0x47, 0xf7, 0xe9, 0xd5, 0xec, 0xf7, 0x0c, 0xee, 0x3c,\n"," 0xe4, 0xfb, 0xd4, 0xf3, 0x02, 0x03, 0x03, 0x22, 0xd1, 0x01, 0xeb, 0x0c,\n"," 0xf8, 0xde, 0x0e, 0x15, 0xf8, 0x10, 0xf8, 0x04, 0xfc, 0x0f, 0x05, 0x06,\n"," 0xff, 0xe1, 0x0e, 0xe1, 0xf3, 0x14, 0xf7, 0x09, 0xda, 0xfd, 0xfe, 0xf8,\n"," 0x16, 0x2c, 0x0a, 0xfc, 0xed, 0xb0, 0x15, 0x07, 0x1a, 0x48, 0x03, 0x1f,\n"," 0x05, 0xcc, 0x14, 0xee, 0x2c, 0x47, 0x0b, 0x0d, 0x29, 0xda, 0x0b, 0x1b,\n"," 0x25, 0x31, 0x1e, 0xf8, 0x1d, 0xc5, 0x26, 0xe9, 0x14, 0x1b, 0x0c, 0xe8,\n"," 0x0d, 0x91, 0x1d, 0x07, 0x06, 0x21, 0x22, 0xd9, 0xfc, 0xd1, 0x17, 0x05,\n"," 0x03, 0xd7, 0xe2, 0xfa, 0xfa, 0x09, 0xf3, 0xf6, 0xe1, 0xc6, 0xf6, 0xe1,\n"," 0xff, 0x18, 0xdd, 0x19, 0xe8, 0xee, 0x1f, 0xeb, 0xfe, 0x1b, 0xe5, 0x11,\n"," 0xdc, 0xf7, 0x06, 0x0c, 0xf8, 0x0a, 0xee, 0xf9, 0xdb, 0x06, 0x04, 0x21,\n"," 0x0a, 0x1f, 0x05, 0x15, 0xe6, 0xfc, 0x1f, 0x30, 0x0e, 0xe6, 0xe0, 0x03,\n"," 0xff, 0x11, 0xe2, 0x43, 0xfd, 0xe1, 0xf6, 0x13, 0xf7, 0x04, 0x21, 0x59,\n"," 0x0c, 0xf8, 0xcd, 0x11, 0xf9, 0x0c, 0xf3, 0x5c, 0x00, 0xd7, 0xf3, 0x0f,\n"," 0xfa, 0x16, 0x0f, 0x45, 0xe9, 0xed, 0xec, 0xff, 0xf5, 0x0c, 0x1e, 0x1a,\n"," 0xd1, 0xfe, 0xe6, 0xfc, 0x0b, 0xfa, 0xe2, 0xef, 0xf5, 0x0b, 0x02, 0x10,\n"," 0xea, 0x10, 0xf3, 0xfe, 0xf3, 0xe8, 0x17, 0x08, 0xdd, 0x28, 0x15, 0x04,\n"," 0xf2, 0xdf, 0xfd, 0xff, 0x1d, 0x39, 0xf6, 0xf3, 0xdd, 0xc1, 0x0c, 0x09,\n"," 0x18, 0x60, 0x0c, 0x22, 0x0a, 0xe3, 0x34, 0xf7, 0x26, 0x5b, 0x0a, 0x18,\n"," 0x09, 0xd7, 0x29, 0x18, 0x2b, 0x44, 0xe1, 0x16, 0x14, 0xd0, 0x21, 0xfb,\n"," 0x15, 0x0b, 0x06, 0xe4, 0x07, 0xa5, 0x2a, 0x02, 0xf8, 0x10, 0x15, 0xe1,\n"," 0x0c, 0xe3, 0x19, 0xfc, 0x0f, 0xef, 0xf3, 0xf9, 0xed, 0x01, 0xd7, 0x05,\n"," 0xe7, 0xbe, 0xdf, 0xe8, 0xff, 0x05, 0xf0, 0x12, 0xcf, 0xcf, 0xf3, 0xf3,\n"," 0x13, 0x1e, 0xef, 0x11, 0xe1, 0x01, 0xf0, 0x0e, 0x26, 0xed, 0xef, 0xfb,\n"," 0xee, 0xf7, 0xfe, 0x17, 0x0a, 0x18, 0xd6, 0x09, 0xf3, 0x05, 0xec, 0x28,\n"," 0x0b, 0xef, 0xd2, 0xf8, 0xec, 0x05, 0x00, 0x36, 0x03, 0xf8, 0xe8, 0x1f,\n"," 0x1d, 0x00, 0x1a, 0x3c, 0xfe, 0xee, 0xea, 0xf2, 0xf6, 0x0d, 0x00, 0x3c,\n"," 0xf4, 0xe1, 0xf2, 0xf7, 0x0d, 0x10, 0x21, 0x47, 0xf7, 0x0a, 0xea, 0xfe,\n"," 0x0c, 0x07, 0xfd, 0x26, 0xbe, 0xfb, 0xd6, 0x04, 0x11, 0xf9, 0x12, 0xfd,\n"," 0xf3, 0x17, 0xff, 0xe6, 0x0d, 0xfc, 0xf7, 0xee, 0xf9, 0xf1, 0x07, 0xf6,\n"," 0xfc, 0x0d, 0x03, 0x01, 0xdd, 0xe7, 0x01, 0x14, 0x23, 0x31, 0x09, 0xff,\n"," 0x02, 0xa4, 0x14, 0xe5, 0x22, 0x5c, 0xf9, 0x01, 0x18, 0xcb, 0x13, 0x05,\n"," 0x13, 0x50, 0x0d, 0x11, 0x12, 0xe8, 0x2f, 0xe6, 0x33, 0x37, 0x1a, 0xf4,\n"," 0x2b, 0xc0, 0x20, 0xdd, 0x12, 0x1c, 0xe8, 0xca, 0x0b, 0xa8, 0x1d, 0xe3,\n"," 0x16, 0xf8, 0xf5, 0xcf, 0x0f, 0xeb, 0xfb, 0xfc, 0xfe, 0x00, 0xf0, 0xe1,\n"," 0xe0, 0x11, 0xe1, 0x03, 0xef, 0xc2, 0xfa, 0xfc, 0xf3, 0x11, 0xf1, 0x26,\n"," 0xdb, 0xbe, 0xef, 0xee, 0x13, 0x0c, 0xf1, 0xe3, 0xdc, 0xfb, 0x00, 0xfa,\n"," 0x07, 0xe7, 0xe4, 0xe5, 0xdd, 0xfd, 0x0f, 0x17, 0x15, 0x13, 0xf8, 0xe8,\n"," 0xfb, 0x04, 0x0e, 0x33, 0x09, 0xfe, 0xcc, 0xfa, 0x00, 0x0a, 0x25, 0x33,\n"," 0x09, 0xfb, 0xd3, 0xf0, 0x00, 0x06, 0xfd, 0x39, 0x08, 0xf8, 0xd0, 0x0c,\n"," 0x0c, 0x13, 0xf5, 0x47, 0xf4, 0xe9, 0xe9, 0xf7, 0xfa, 0x1e, 0xfe, 0x50,\n"," 0xf7, 0xfe, 0xdc, 0x19, 0xf4, 0xf7, 0xfc, 0x29, 0xd2, 0xf9, 0xd7, 0xe0,\n"," 0xfb, 0xed, 0xed, 0xf7, 0xe1, 0x04, 0xe3, 0x16, 0xf5, 0xfc, 0x08, 0x03,\n"," 0xe8, 0xda, 0x00, 0x10, 0x02, 0x1a, 0xe5, 0xf9, 0xca, 0x08, 0x23, 0x00,\n"," 0x22, 0x3b, 0x11, 0x0f, 0xf0, 0xc2, 0x1d, 0xdf, 0x24, 0x49, 0x21, 0x1a,\n"," 0x08, 0xdc, 0x0c, 0x0a, 0x2d, 0x51, 0x0b, 0x27, 0x1f, 0xdd, 0x2c, 0xf5,\n"," 0x13, 0x31, 0xe1, 0xfe, 0x25, 0xc8, 0x0c, 0x28, 0x1a, 0x13, 0xfe, 0xca,\n"," 0x0c, 0xa4, 0x08, 0x1e, 0x1b, 0x11, 0x06, 0xf0, 0x08, 0xd5, 0x03, 0xec,\n"," 0x08, 0x09, 0x01, 0x09, 0xeb, 0xf5, 0xf8, 0xf7, 0xef, 0xb5, 0xda, 0xf7,\n"," 0xf2, 0x0c, 0xe7, 0x0c, 0xce, 0xcb, 0x03, 0xf8, 0x13, 0x1a, 0xfe, 0xf6,\n"," 0xe4, 0xfe, 0xf0, 0x0b, 0x11, 0xe0, 0xeb, 0x13, 0xd8, 0xfa, 0x1c, 0x06,\n"," 0x19, 0x2d, 0xdd, 0xf5, 0xe4, 0xfd, 0xe5, 0x27, 0x07, 0xfd, 0xbe, 0xdc,\n"," 0x00, 0x13, 0x1e, 0x2a, 0x26, 0xf8, 0xf6, 0x0b, 0x13, 0x1e, 0xf4, 0x37,\n"," 0x0b, 0xf6, 0xcb, 0xe6, 0xfb, 0x0f, 0xf6, 0x48, 0xfb, 0xe3, 0xd6, 0x01,\n"," 0xf5, 0x13, 0xf1, 0x22, 0xe9, 0xfe, 0xe9, 0xf5, 0x0b, 0x11, 0xfa, 0x1c,\n"," 0xe2, 0xfb, 0xd3, 0x06, 0x0e, 0xf1, 0x0b, 0x0e, 0xf5, 0x13, 0x27, 0x10,\n"," 0xea, 0xfc, 0x18, 0x0f, 0xf3, 0xe6, 0x0e, 0x09, 0x00, 0x10, 0x02, 0xfe,\n"," 0xc1, 0xfc, 0xfa, 0x0a, 0x1a, 0x3e, 0x15, 0x01, 0xe3, 0xb5, 0x1d, 0x08,\n"," 0x21, 0x58, 0xeb, 0x14, 0xf2, 0xcb, 0x1a, 0x04, 0x2d, 0x59, 0xe0, 0x21,\n"," 0x0a, 0xf9, 0x2d, 0xe9, 0x18, 0x26, 0x03, 0xf8, 0x2d, 0xc2, 0x1a, 0x06,\n"," 0x16, 0x03, 0x19, 0xcf, 0x2a, 0xae, 0x32, 0xfc, 0x0a, 0x15, 0x1e, 0xde,\n"," 0x19, 0xd3, 0x0d, 0xed, 0x0c, 0xf4, 0x02, 0xf8, 0xdc, 0x03, 0x11, 0xff,\n"," 0xf7, 0xa7, 0x0c, 0xf5, 0x0c, 0x0b, 0xee, 0xeb, 0xd6, 0xcd, 0x0d, 0xde,\n"," 0xf8, 0x11, 0xf4, 0xfb, 0xd3, 0xff, 0xf3, 0x00, 0x17, 0xe9, 0xf6, 0xeb,\n"," 0xe6, 0xef, 0x12, 0x04, 0x1d, 0x34, 0xd9, 0xf9, 0xd6, 0xff, 0xff, 0x2b,\n"," 0xfb, 0x09, 0xef, 0xf4, 0xeb, 0x10, 0xfd, 0x30, 0x26, 0x04, 0xe2, 0x04,\n"," 0x09, 0x06, 0xe5, 0x41, 0x02, 0xff, 0xed, 0x17, 0x05, 0xfd, 0x08, 0x39,\n"," 0xfd, 0xfd, 0xce, 0xdd, 0xed, 0x03, 0x05, 0x30, 0x01, 0x12, 0xec, 0xfb,\n"," 0xff, 0x10, 0x1a, 0x21, 0xe2, 0x08, 0xeb, 0xf9, 0x07, 0xf5, 0xf0, 0xd8,\n"," 0xea, 0xfb, 0xea, 0xf4, 0x10, 0xf8, 0x1c, 0xfd, 0xfd, 0xe9, 0x16, 0xfd,\n"," 0x0d, 0x0e, 0xf9, 0x06, 0xbb, 0xed, 0x08, 0x00, 0x3d, 0x3d, 0xd7, 0x12,\n"," 0xda, 0xb9, 0x0d, 0x05, 0x10, 0x4f, 0x02, 0x1c, 0x1b, 0xd1, 0x02, 0xf0,\n"," 0x12, 0x4b, 0x11, 0xfc, 0x20, 0x0a, 0x05, 0x01, 0x0e, 0x3c, 0x05, 0xf9,\n"," 0x23, 0xcb, 0x1b, 0x15, 0x2b, 0xfe, 0x20, 0xda, 0x23, 0xa0, 0x1e, 0x12,\n"," 0x1a, 0x09, 0x14, 0xd0, 0x11, 0xcc, 0x07, 0xfa, 0x13, 0x08, 0xed, 0x11,\n"," 0xdf, 0x05, 0xf9, 0x1e, 0xfb, 0xc5, 0x12, 0xf5, 0x0d, 0x2f, 0xe5, 0xfc,\n"," 0xd0, 0xd4, 0xe9, 0x05, 0xf8, 0x17, 0x04, 0x07, 0xd6, 0xe1, 0x0c, 0xef,\n"," 0x19, 0xec, 0x01, 0xff, 0xd5, 0xf3, 0x07, 0x18, 0x0f, 0x12, 0xc2, 0xf9,\n"," 0xd8, 0x07, 0xf0, 0x2f, 0x03, 0x10, 0xe2, 0x16, 0x05, 0x14, 0x0a, 0x30,\n"," 0x21, 0xf4, 0xd6, 0x08, 0x08, 0x1e, 0x12, 0x3c, 0x01, 0x05, 0xd9, 0x0d,\n"," 0xfb, 0x18, 0xfb, 0x2f, 0x02, 0xf2, 0xee, 0x08, 0xf3, 0x14, 0x09, 0x3d,\n"," 0x02, 0x13, 0xf4, 0xfc, 0xe7, 0x1d, 0x23, 0x43, 0xe7, 0x07, 0xd5, 0x1c,\n"," 0x0b, 0xe5, 0xf5, 0x08, 0xe6, 0xe4, 0xea, 0x08, 0xeb, 0x0e, 0x00, 0x06,\n"," 0xd8, 0x10, 0xfe, 0x06, 0x09, 0x28, 0x03, 0xf7, 0xb2, 0xe8, 0x02, 0x27,\n"," 0x22, 0x28, 0xe8, 0x04, 0xd9, 0xb5, 0x12, 0x08, 0x22, 0x4b, 0x0a, 0x29,\n"," 0x0d, 0xf0, 0x11, 0xdc, 0x24, 0x46, 0x02, 0x21, 0x18, 0xf8, 0x0d, 0x03,\n"," 0x36, 0x3c, 0x04, 0xf5, 0x1c, 0xc6, 0x1f, 0x05, 0x11, 0x0c, 0xe2, 0xcc,\n"," 0x2b, 0x92, 0x11, 0xfb, 0x22, 0x01, 0xf7, 0xd8, 0x16, 0xd5, 0xf4, 0x00,\n"," 0x20, 0xff, 0x02, 0xec, 0xe6, 0x0f, 0xef, 0x06, 0xf4, 0xb4, 0xf2, 0xd3,\n"," 0xf7, 0x17, 0x02, 0x00, 0xd3, 0xb2, 0xdb, 0x1b, 0x05, 0x24, 0xdf, 0xef,\n"," 0xce, 0xea, 0x21, 0x04, 0x01, 0xf1, 0xf1, 0xfa, 0xd7, 0xeb, 0xf6, 0x01,\n"," 0x19, 0x14, 0xeb, 0x09, 0xe5, 0x04, 0x0d, 0x16, 0x08, 0x14, 0xf1, 0x19,\n"," 0xf0, 0x18, 0x05, 0x30, 0x0e, 0xe2, 0xea, 0xeb, 0xf1, 0x1b, 0xe6, 0x28,\n"," 0x0c, 0x11, 0xd9, 0x01, 0xfd, 0x06, 0x0b, 0x38, 0x05, 0xdf, 0xd4, 0x1d,\n"," 0xe8, 0x08, 0x0e, 0x3d, 0xfb, 0x04, 0xee, 0x04, 0xf2, 0x11, 0x0e, 0x35,\n"," 0xe3, 0x13, 0xeb, 0x11, 0x05, 0x01, 0xf9, 0x07, 0xec, 0xff, 0x21, 0x26,\n"," 0xf4, 0xf0, 0xf4, 0x00, 0xc7, 0xf4, 0xf3, 0x22, 0xfe, 0x20, 0x0e, 0x0e,\n"," 0xbd, 0x01, 0xfa, 0x01, 0x25, 0x42, 0xff, 0x10, 0xdb, 0xca, 0x14, 0x1f,\n"," 0x17, 0x4f, 0xfa, 0x2e, 0x04, 0xe7, 0x19, 0xe0, 0x17, 0x40, 0xfd, 0x11,\n"," 0x02, 0xfc, 0x1e, 0xee, 0x21, 0x30, 0x1a, 0x0a, 0x27, 0xd3, 0x0f, 0x13,\n"," 0x1e, 0x05, 0x02, 0xee, 0x1f, 0x9a, 0x05, 0x1f, 0x12, 0xfd, 0x14, 0xea,\n"," 0x0c, 0xcf, 0x06, 0xea, 0x27, 0xf1, 0xfb, 0xf2, 0xe2, 0x1f, 0x04, 0xee,\n"," 0xe5, 0xbe, 0xee, 0xe7, 0xea, 0x19, 0xed, 0x01, 0xc8, 0xd8, 0x10, 0x17,\n"," 0x12, 0x16, 0xe9, 0x09, 0xd0, 0xfb, 0xf4, 0x20, 0x0c, 0x14, 0xfb, 0x03,\n"," 0xcf, 0xff, 0xf3, 0xfe, 0xfd, 0x1b, 0xe8, 0xf0, 0xdc, 0xf6, 0xd7, 0x13,\n"," 0x11, 0x07, 0xe6, 0xdf, 0xe5, 0x08, 0x05, 0x2e, 0x0c, 0xef, 0xc4, 0xec,\n"," 0xf1, 0x0a, 0xe9, 0x14, 0xf2, 0x1f, 0xf3, 0x0d, 0xfe, 0x08, 0x29, 0x34,\n"," 0x09, 0xfb, 0xd4, 0xf1, 0xe0, 0x30, 0x06, 0x54, 0xfa, 0xfd, 0xe6, 0x16,\n"," 0xfe, 0x12, 0xe5, 0x1f, 0xea, 0x02, 0xfa, 0xe1, 0x06, 0xf7, 0xe8, 0xe9,\n"," 0xe3, 0x0d, 0x02, 0xfe, 0xe9, 0xfc, 0xfb, 0x1a, 0xeb, 0xf9, 0x06, 0x04,\n"," 0x0a, 0x11, 0x09, 0xf4, 0xbe, 0x04, 0x18, 0xf7, 0x35, 0x3a, 0xf1, 0xf6,\n"," 0xdc, 0xbf, 0x14, 0xf7, 0x16, 0x4b, 0xe8, 0x20, 0x03, 0xd6, 0x15, 0xfc,\n"," 0x1f, 0x38, 0xea, 0x0b, 0x12, 0x2e, 0x0c, 0xd5, 0x30, 0x2b, 0x00, 0x00,\n"," 0x2d, 0xc7, 0x15, 0xd6, 0x1c, 0xeb, 0xec, 0xcc, 0x2c, 0x99, 0x14, 0xf4,\n"," 0x12, 0x09, 0x1e, 0xf5, 0xf7, 0xc4, 0xf7, 0xf8, 0x0f, 0xe7, 0x0c, 0xf4,\n"," 0xf6, 0xfb, 0x00, 0x01, 0xe6, 0xce, 0xe6, 0x23, 0xe9, 0x0c, 0xf2, 0xf1,\n"," 0xc6, 0xf5, 0x1a, 0xfc, 0xf5, 0x0e, 0xfc, 0xfa, 0xc2, 0xef, 0x0a, 0x1f,\n"," 0xed, 0x1c, 0xcf, 0xfd, 0xd1, 0xfb, 0x0a, 0x07, 0x11, 0x2b, 0xe4, 0x01,\n"," 0xd9, 0x0c, 0xfd, 0x3f, 0x02, 0x09, 0xe4, 0xee, 0xea, 0x06, 0xf3, 0x2d,\n"," 0x1c, 0xe6, 0xd6, 0x1c, 0xfd, 0x0d, 0x17, 0x25, 0xf4, 0x18, 0xfe, 0xe7,\n"," 0xfd, 0xff, 0x04, 0x36, 0xfe, 0x06, 0xda, 0xee, 0xf1, 0x20, 0x02, 0x41,\n"," 0xee, 0x18, 0xdc, 0xf9, 0xf4, 0x27, 0x03, 0x2f, 0xee, 0x19, 0xe3, 0xf1,\n"," 0x10, 0xf2, 0xdf, 0xe2, 0xdf, 0x0b, 0x23, 0x09, 0x02, 0xfc, 0xe2, 0x11,\n"," 0xca, 0xf1, 0xf1, 0xf0, 0xf1, 0x10, 0xfe, 0x0c, 0xbd, 0xfc, 0x1b, 0x1a,\n"," 0x30, 0x3a, 0x04, 0x03, 0xce, 0xce, 0x21, 0x00, 0xfc, 0x4a, 0xda, 0x1b,\n"," 0x05, 0xdc, 0x07, 0xeb, 0x0b, 0x41, 0x21, 0x17, 0x0e, 0x0d, 0x1f, 0xfe,\n"," 0x2c, 0x29, 0xe6, 0x15, 0x26, 0xb9, 0x06, 0xf7, 0x22, 0x09, 0x03, 0xea,\n"," 0x30, 0x95, 0x28, 0xf6, 0x20, 0x02, 0xfc, 0xf3, 0xf6, 0xce, 0xee, 0x00,\n"," 0x2d, 0xee, 0xf2, 0xf1, 0xd6, 0x0b, 0xe3, 0x08, 0xfa, 0xe1, 0xe2, 0x0c,\n"," 0xef, 0x22, 0xf1, 0x06, 0xd3, 0xed, 0x08, 0x1b, 0xfc, 0x03, 0xec, 0x03,\n"," 0xb6, 0x03, 0xec, 0xfd, 0xfd, 0xf3, 0xd9, 0x0e, 0xd4, 0xd7, 0xd5, 0x15,\n"," 0x0c, 0x1a, 0xd9, 0xeb, 0xdd, 0x11, 0x14, 0x1c, 0x10, 0x07, 0xe9, 0xf0,\n"," 0xdf, 0x07, 0xdb, 0x15, 0x1e, 0xe8, 0xe6, 0xe1, 0x00, 0x13, 0x12, 0x1b,\n"," 0xef, 0x0a, 0xc8, 0xfd, 0x0d, 0x0f, 0x0a, 0x40, 0x07, 0xf6, 0xcb, 0x02,\n"," 0xde, 0x16, 0x13, 0x4f, 0xfb, 0x08, 0xd3, 0xee, 0xde, 0x07, 0xe5, 0x22,\n"," 0xe7, 0xfe, 0xec, 0xea, 0x06, 0xf0, 0xfe, 0xdf, 0xd6, 0xd5, 0xfb, 0x14,\n"," 0xf9, 0xff, 0x0d, 0xfd, 0xd2, 0xeb, 0x02, 0x03, 0xf5, 0x06, 0xf8, 0xfb,\n"," 0xb0, 0xee, 0x06, 0x1a, 0x22, 0x47, 0x0d, 0xf9, 0xd0, 0xec, 0x03, 0xd5,\n"," 0x0c, 0x3f, 0x07, 0x1b, 0xf1, 0xcc, 0x03, 0xec, 0x1d, 0x47, 0xeb, 0xf6,\n"," 0x04, 0x18, 0x19, 0x09, 0x27, 0x27, 0x01, 0xeb, 0x18, 0xda, 0x10, 0xf9,\n"," 0x1f, 0xf7, 0x11, 0xe2, 0x2e, 0xa8, 0x0a, 0x05, 0x2f, 0x06, 0xf9, 0x02,\n"," 0x18, 0xc5, 0x0f, 0x20, 0x24, 0xe0, 0xf4, 0xea, 0xd5, 0xf0, 0xeb, 0xf3,\n"," 0xef, 0xd1, 0x11, 0xfd, 0xeb, 0xf8, 0xfa, 0x02, 0xcd, 0xe6, 0x11, 0xf8,\n"," 0x04, 0x01, 0xcd, 0x15, 0xd4, 0xea, 0x08, 0xe3, 0x0e, 0x1e, 0xe5, 0x0d,\n"," 0xe7, 0xe8, 0x04, 0xd6, 0x02, 0x30, 0xe6, 0x06, 0xe5, 0xf6, 0x08, 0x0e,\n"," 0x09, 0x1a, 0xf1, 0x08, 0xed, 0x13, 0xfb, 0x2b, 0x0c, 0xf1, 0xe5, 0xfc,\n"," 0x03, 0x19, 0xfb, 0x24, 0x00, 0x1a, 0xcd, 0xf8, 0x03, 0x12, 0x0c, 0x21,\n"," 0x14, 0x00, 0xc4, 0xf2, 0xe3, 0x08, 0x09, 0x2f, 0xf0, 0x04, 0xe4, 0xdd,\n"," 0xd7, 0x2d, 0x24, 0x26, 0xf3, 0x05, 0xfd, 0x1e, 0xfb, 0xf4, 0x07, 0xe8,\n"," 0xce, 0xd8, 0xe9, 0xe1, 0x09, 0x0d, 0xdc, 0x0a, 0xd5, 0x00, 0xf8, 0xd7,\n"," 0x0d, 0x05, 0xfe, 0x02, 0xbd, 0x07, 0x14, 0xe9, 0x18, 0x23, 0xe2, 0xfa,\n"," 0xbf, 0xeb, 0x19, 0x1d, 0x18, 0x39, 0xf4, 0x13, 0xe2, 0xe6, 0x08, 0x10,\n"," 0x11, 0x2b, 0xf6, 0x29, 0x08, 0x2c, 0x15, 0xfb, 0x33, 0x1f, 0x25, 0x24,\n"," 0x24, 0xe4, 0x1e, 0xfd, 0x0f, 0x0e, 0xfc, 0xe9, 0x1d, 0xb9, 0x0a, 0xe4,\n"," 0x36, 0xed, 0x10, 0xf4, 0x03, 0xd2, 0x04, 0xff, 0x14, 0xe5, 0x1f, 0xf7,\n"," 0xe4, 0x0c, 0xdf, 0x0d, 0xfc, 0xf1, 0x1a, 0xee, 0xe2, 0x0b, 0xe7, 0xe9,\n"," 0xbf, 0xee, 0x03, 0xe8, 0xeb, 0xf9, 0xf7, 0x22, 0xab, 0x0a, 0xef, 0x0a,\n"," 0x12, 0x05, 0xfc, 0xea, 0xfc, 0xee, 0x20, 0xf2, 0x01, 0x27, 0xc8, 0xf6,\n"," 0xf1, 0x04, 0x00, 0x0a, 0x03, 0x17, 0xf7, 0xe2, 0xe2, 0x16, 0xef, 0x0e,\n"," 0x15, 0xfb, 0xd5, 0xee, 0xe3, 0x2c, 0x15, 0x13, 0xef, 0x15, 0xe6, 0xe3,\n"," 0xf1, 0x2a, 0xed, 0x32, 0xef, 0x0d, 0xea, 0xf6, 0xe6, 0x27, 0xfc, 0x3e,\n"," 0xeb, 0x09, 0xe7, 0xef, 0xf4, 0x0f, 0xf2, 0x0a, 0xfa, 0x0e, 0xda, 0xf6,\n"," 0xfe, 0xfc, 0x12, 0xe3, 0xd7, 0x03, 0x10, 0xf9, 0xfd, 0x0a, 0x28, 0x0e,\n"," 0xd1, 0xf5, 0xdc, 0xfc, 0xf4, 0x0f, 0xf3, 0x1b, 0xb8, 0x16, 0x0c, 0xf4,\n"," 0xf8, 0x24, 0xf9, 0x18, 0xc6, 0xf1, 0x0d, 0xf3, 0x22, 0x2b, 0x06, 0x2a,\n"," 0xf2, 0xfe, 0xf3, 0xe7, 0x10, 0x10, 0x02, 0x15, 0x15, 0x4c, 0x01, 0xe5,\n"," 0x0d, 0xfc, 0x17, 0x19, 0x36, 0xeb, 0x01, 0xfe, 0x0a, 0xf3, 0xfa, 0x09,\n"," 0x31, 0xa5, 0x04, 0x2c, 0x32, 0x0d, 0xe8, 0x05, 0x04, 0xc2, 0x0c, 0x1b,\n"," 0x4e, 0xe3, 0xeb, 0xf0, 0xb5, 0x06, 0xee, 0xea, 0x16, 0xea, 0xde, 0xfd,\n"," 0xcf, 0x24, 0xf0, 0x04, 0xcf, 0xeb, 0xf5, 0x07, 0xe3, 0xff, 0xde, 0x11,\n"," 0xce, 0xee, 0x13, 0x0e, 0x0a, 0xfb, 0xfb, 0xf5, 0xda, 0xed, 0x04, 0xe9,\n"," 0x17, 0x34, 0xec, 0x11, 0xe2, 0xff, 0x04, 0x07, 0x15, 0x26, 0xf7, 0xff,\n"," 0x07, 0x20, 0x1d, 0x34, 0x13, 0x0d, 0xfd, 0xf9, 0x04, 0x39, 0xf5, 0x1e,\n"," 0xfd, 0x28, 0xf5, 0x04, 0x02, 0x3a, 0xea, 0x2a, 0xe7, 0xf8, 0x03, 0xda,\n"," 0xf7, 0x27, 0xfc, 0x2b, 0xd5, 0x15, 0xf4, 0x14, 0xdf, 0x1c, 0xfc, 0x2e,\n"," 0xdc, 0x17, 0xee, 0xe7, 0xe9, 0x06, 0x0a, 0xe0, 0xd9, 0xd9, 0xee, 0x13,\n"," 0xe5, 0x1e, 0x06, 0x02, 0xe9, 0xfa, 0xfb, 0xf9, 0xed, 0x10, 0xe8, 0x0a,\n"," 0xdf, 0x0f, 0x2e, 0xee, 0x0a, 0x21, 0xfc, 0xff, 0xe9, 0xe5, 0x1b, 0xe5,\n"," 0xfb, 0x29, 0x05, 0x23, 0xfa, 0x11, 0x07, 0x09, 0xeb, 0x11, 0x1c, 0x12,\n"," 0xf9, 0x33, 0x09, 0xfa, 0x1a, 0x20, 0xdd, 0x03, 0x24, 0xf6, 0xf8, 0x04,\n"," 0x14, 0x07, 0xef, 0xf6, 0x0c, 0xd1, 0x0f, 0x0f, 0x36, 0x05, 0x06, 0xeb,\n"," 0xf6, 0xcf, 0x03, 0xfd, 0x3a, 0xe2, 0x03, 0xf6, 0xd7, 0x11, 0x0b, 0x0a,\n"," 0xf7, 0xfe, 0x00, 0x10, 0xdf, 0xf5, 0xdb, 0x1b, 0xca, 0x13, 0x02, 0xf9,\n"," 0x03, 0xef, 0x10, 0xf8, 0xe3, 0x1e, 0xe8, 0x13, 0x16, 0x12, 0xea, 0xe9,\n"," 0xee, 0x15, 0xfd, 0xef, 0xfe, 0x11, 0xf1, 0x05, 0x00, 0x1d, 0x02, 0xf0,\n"," 0x00, 0x21, 0xe9, 0xfa, 0xdc, 0x1b, 0xef, 0x16, 0x24, 0x19, 0x0f, 0x10,\n"," 0xf2, 0x35, 0xed, 0x1b, 0x01, 0x2c, 0x1f, 0xea, 0x01, 0x3f, 0x15, 0x0e,\n"," 0xfe, 0x1d, 0xed, 0xf0, 0xf8, 0x22, 0x09, 0x01, 0x03, 0x1c, 0xfa, 0x08,\n"," 0xe5, 0x39, 0x02, 0x27, 0x15, 0x19, 0xe3, 0xff, 0xe0, 0x11, 0x0e, 0x0b,\n"," 0x01, 0x1a, 0xfb, 0xf3, 0x07, 0x01, 0xec, 0x0e, 0x06, 0xf9, 0xfb, 0x12,\n"," 0xf6, 0x12, 0x17, 0x00, 0xf5, 0x04, 0xfa, 0x15, 0x07, 0xff, 0xf3, 0xfa,\n"," 0x20, 0xf5, 0x0d, 0x0e, 0x0e, 0xf1, 0xd9, 0x03, 0x11, 0x1a, 0xfb, 0x0e,\n"," 0xed, 0xe9, 0xe5, 0xf1, 0x04, 0x14, 0x0f, 0xf3, 0x15, 0xec, 0xfd, 0x0b,\n"," 0x04, 0x0f, 0xf8, 0x1b, 0x08, 0xf4, 0xe1, 0x1c, 0x10, 0x0f, 0x06, 0xf8,\n"," 0xed, 0xee, 0x05, 0x0d, 0xff, 0x22, 0xec, 0xe8, 0xf8, 0x0c, 0xdb, 0x0e,\n"," 0x18, 0xe6, 0xf0, 0x03, 0xf2, 0xed, 0x06, 0xef, 0xf5, 0x19, 0x01, 0x12,\n"," 0xf4, 0xe4, 0x29, 0x29, 0x12, 0xdb, 0x03, 0x0e, 0x0e, 0x07, 0x1a, 0x0c,\n"," 0xed, 0x01, 0x09, 0x06, 0x00, 0xfe, 0x0b, 0xd8, 0x13, 0xf0, 0x00, 0x1c,\n"," 0xf8, 0x0c, 0xf7, 0x0c, 0x0b, 0x15, 0xf8, 0x15, 0xf0, 0x28, 0x10, 0x1e,\n"," 0xe6, 0xf0, 0xfa, 0x06, 0xec, 0xff, 0x0b, 0xfc, 0xfe, 0x03, 0x10, 0x0a,\n"," 0xea, 0xed, 0xf7, 0xff, 0xeb, 0xf6, 0xea, 0xe7, 0xf7, 0x0c, 0xe9, 0x23,\n"," 0xfe, 0xe3, 0xec, 0xd6, 0x04, 0xfa, 0x05, 0x0a, 0xf7, 0xf0, 0xf4, 0xd9,\n"," 0xf3, 0xd6, 0xf4, 0xf7, 0xf1, 0xdf, 0xfc, 0xde, 0x06, 0x10, 0x08, 0x03,\n"," 0x16, 0x03, 0x18, 0xe7, 0x0d, 0xfc, 0xf9, 0x02, 0xee, 0x04, 0xf7, 0xec,\n"," 0x15, 0x05, 0xf0, 0x0b, 0xf6, 0x1a, 0x09, 0x03, 0x23, 0xff, 0xe4, 0xf3,\n"," 0xed, 0xfc, 0xf4, 0xf7, 0x18, 0x17, 0x26, 0xdb, 0xe3, 0x0b, 0x03, 0xda,\n"," 0x26, 0xfb, 0x08, 0xf6, 0xff, 0x0f, 0x0d, 0xf8, 0xff, 0xf7, 0xf9, 0xf7,\n"," 0xe4, 0xf4, 0xf0, 0x1a, 0x02, 0x09, 0xf6, 0xfd, 0xee, 0x1a, 0x07, 0xed,\n"," 0x14, 0x03, 0xe8, 0xf7, 0x07, 0xfd, 0x1b, 0x1e, 0x35, 0xfb, 0xe6, 0xf4,\n"," 0xf6, 0x17, 0xf0, 0xed, 0xfc, 0x0f, 0xfd, 0x11, 0xef, 0x03, 0x11, 0x07,\n"," 0x1a, 0xf7, 0xef, 0xef, 0x0b, 0x15, 0x14, 0xf8, 0x1c, 0x0d, 0x1d, 0xf7,\n"," 0x10, 0xec, 0x1f, 0x0a, 0x05, 0x11, 0x0b, 0xda, 0xe7, 0xee, 0xfd, 0xdc,\n"," 0x15, 0xf0, 0xfd, 0xeb, 0xe1, 0x16, 0xf9, 0x06, 0x02, 0xeb, 0x09, 0x03,\n"," 0x04, 0xe7, 0x19, 0x15, 0xff, 0xf0, 0x05, 0xf5, 0xf7, 0x0a, 0x11, 0xe7,\n"," 0xf8, 0x15, 0x10, 0xf8, 0xfe, 0x11, 0x05, 0x00, 0x0d, 0xee, 0xde, 0x00,\n"," 0xe5, 0x0f, 0xf0, 0x05, 0xf6, 0x1a, 0x0b, 0x08, 0x10, 0x13, 0x0c, 0xf1,\n"," 0x1e, 0xf2, 0x01, 0xfb, 0x1b, 0x0b, 0x05, 0x05, 0x1b, 0x2d, 0xde, 0x0b,\n"," 0xed, 0x11, 0xfc, 0xfe, 0x2a, 0x0d, 0xfc, 0xf8, 0xf9, 0xf4, 0x13, 0xe4,\n"," 0x1a, 0xf6, 0xf0, 0xf3, 0x1d, 0x01, 0x18, 0xdb, 0xf8, 0x0f, 0xf9, 0xf5,\n"," 0x11, 0x1b, 0x0f, 0xf9, 0x19, 0xf6, 0x05, 0xf3, 0xf5, 0x3a, 0x12, 0xdd,\n"," 0x08, 0xe4, 0x15, 0xfb, 0x01, 0x3f, 0xfd, 0x1e, 0x03, 0xf0, 0x06, 0xf9,\n"," 0xfe, 0x13, 0x03, 0x17, 0x1d, 0xea, 0xf6, 0xec, 0xe3, 0x05, 0xe0, 0x0f,\n"," 0xf4, 0xfd, 0x01, 0xea, 0x13, 0xf8, 0xe1, 0x07, 0x10, 0xed, 0xff, 0xf0,\n"," 0xfe, 0xd8, 0xfe, 0x06, 0xf0, 0xfe, 0x0b, 0x00, 0xf1, 0x24, 0xe8, 0xfc,\n"," 0x20, 0x0d, 0x13, 0xed, 0x00, 0x0e, 0x05, 0x10, 0x04, 0x0a, 0x18, 0xdc,\n"," 0xfa, 0x08, 0xed, 0x0c, 0x0f, 0x05, 0x0a, 0xe0, 0xe6, 0xe6, 0x12, 0x02,\n"," 0x05, 0xf2, 0x04, 0xee, 0x1f, 0xf6, 0xe2, 0xf2, 0xff, 0x00, 0x05, 0xf5,\n"," 0x25, 0xe4, 0xf4, 0xf7, 0x00, 0x05, 0x06, 0xeb, 0x1d, 0xf8, 0x03, 0xe7,\n"," 0x06, 0xef, 0x06, 0xeb, 0x1c, 0xfc, 0x00, 0x16, 0x06, 0xfe, 0x02, 0xfc,\n"," 0x01, 0xeb, 0xe9, 0x08, 0x00, 0x05, 0x0a, 0x14, 0x02, 0xf6, 0xdd, 0xff,\n"," 0x18, 0x1b, 0x07, 0x14, 0x00, 0x02, 0x03, 0x06, 0x0a, 0x07, 0xf1, 0x25,\n"," 0xf3, 0x02, 0x06, 0x07, 0x0c, 0x0c, 0x19, 0x07, 0x06, 0x0d, 0xf1, 0xfb,\n"," 0xec, 0x0c, 0x03, 0x09, 0xfa, 0x29, 0xf5, 0x08, 0x0a, 0xff, 0xf5, 0x00,\n"," 0xfe, 0x3e, 0x12, 0xee, 0x18, 0xe4, 0xef, 0x10, 0xe3, 0x3f, 0x08, 0x14,\n"," 0x06, 0xf7, 0x16, 0x1c, 0x21, 0x17, 0xfd, 0x10, 0xd9, 0xee, 0xf7, 0x0a,\n"," 0xf8, 0x09, 0x00, 0x11, 0x17, 0xec, 0xe8, 0xe3, 0xfe, 0xf4, 0xe8, 0x0b,\n"," 0xf3, 0x06, 0x17, 0x04, 0x01, 0xe7, 0xe6, 0x00, 0xe0, 0x0a, 0x02, 0x04,\n"," 0x04, 0xf7, 0xf6, 0xda, 0x1f, 0x16, 0xe5, 0xfc, 0xf0, 0x1d, 0xfd, 0xfb,\n"," 0x15, 0x0c, 0xf7, 0x09, 0xeb, 0x15, 0x0a, 0xe7, 0xf6, 0x0e, 0xfb, 0xeb,\n"," 0x00, 0xee, 0xe2, 0xff, 0x05, 0x13, 0x04, 0xe9, 0x09, 0x0b, 0xeb, 0xfb,\n"," 0x02, 0x17, 0x01, 0xf3, 0x03, 0x07, 0x09, 0xe1, 0xfb, 0x03, 0x02, 0x0b,\n"," 0xe2, 0x27, 0x07, 0xe7, 0x09, 0x0e, 0x19, 0xfa, 0xf4, 0x02, 0x09, 0xfc,\n"," 0x04, 0x04, 0x07, 0x21, 0x0a, 0x09, 0xfe, 0x03, 0xf0, 0x0f, 0xf0, 0x19,\n"," 0xef, 0xf7, 0x19, 0xdb, 0x0f, 0x35, 0xe1, 0xf5, 0x24, 0xf2, 0x04, 0xe6,\n"," 0x05, 0x21, 0xea, 0x30, 0x10, 0x1d, 0xe7, 0x08, 0x01, 0x20, 0xf4, 0x24,\n"," 0x0d, 0x12, 0xfa, 0x07, 0x0e, 0x0f, 0xf1, 0x14, 0xe7, 0x10, 0x15, 0xef,\n"," 0x0c, 0xeb, 0xf6, 0x07, 0x05, 0x48, 0xce, 0xf6, 0xea, 0xe9, 0x04, 0x1d,\n"," 0xf3, 0x45, 0xea, 0xf6, 0xf9, 0xdc, 0xfb, 0x10, 0x0c, 0x25, 0xff, 0xf5,\n"," 0xfe, 0xf2, 0x1f, 0x01, 0x0b, 0x06, 0xd6, 0x1b, 0xe8, 0x03, 0x04, 0x0f,\n"," 0x0e, 0xe5, 0xf7, 0x07, 0x04, 0xfa, 0x04, 0x0d, 0x03, 0xf4, 0x00, 0xf8,\n"," 0xfc, 0xfd, 0x07, 0x07, 0x14, 0x06, 0x17, 0xec, 0x1d, 0x24, 0xef, 0x01,\n"," 0xff, 0xf5, 0xec, 0xfb, 0x19, 0x17, 0x16, 0x06, 0x06, 0xe5, 0xdb, 0x1b,\n"," 0x0e, 0x04, 0xe7, 0xe7, 0xfe, 0x07, 0xf5, 0xf1, 0xf3, 0x0c, 0x23, 0xfb,\n"," 0xf6, 0x09, 0xd6, 0xd9, 0xe5, 0x0d, 0xe1, 0xf4, 0x13, 0x08, 0xe9, 0x0e,\n"," 0x03, 0x19, 0x04, 0x0d, 0x04, 0x0e, 0xf5, 0x1e, 0xe6, 0xef, 0x0e, 0xfa,\n"," 0x07, 0xe1, 0x14, 0xf4, 0xfb, 0xfa, 0x0f, 0x0c, 0x02, 0xfc, 0xda, 0xf4,\n"," 0xf9, 0x1f, 0x0b, 0x0f, 0x09, 0x19, 0x06, 0xfb, 0x0e, 0x43, 0xfe, 0x0f,\n"," 0x13, 0x04, 0xea, 0xfe, 0x16, 0x3f, 0x14, 0x4a, 0xff, 0xf5, 0xda, 0xf7,\n"," 0x0f, 0x01, 0xed, 0x10, 0xfb, 0x27, 0xe9, 0x01, 0xfe, 0x12, 0x0c, 0x0b,\n"," 0x01, 0x25, 0x07, 0xee, 0xfe, 0x10, 0xf7, 0xf9, 0x04, 0x63, 0xd7, 0x13,\n"," 0xf4, 0xd8, 0xf3, 0x11, 0x11, 0x50, 0xe3, 0x15, 0xf2, 0xc6, 0x22, 0x13,\n"," 0x08, 0x2a, 0xe0, 0x17, 0xfe, 0xf1, 0xe7, 0xf8, 0x2e, 0x1b, 0xed, 0x14,\n"," 0x1c, 0xf9, 0xf9, 0xf0, 0xf2, 0xf1, 0xff, 0xdc, 0xff, 0xfc, 0x0a, 0x07,\n"," 0x00, 0xf3, 0x00, 0x1d, 0x0d, 0xfa, 0xe3, 0x07, 0xfb, 0xde, 0x02, 0x1e,\n"," 0xfe, 0x18, 0xf1, 0xfe, 0x10, 0x00, 0xec, 0xfa, 0x18, 0x23, 0x21, 0xfc,\n"," 0x02, 0xf0, 0x04, 0x07, 0xf8, 0x08, 0xf4, 0xee, 0x0d, 0xe9, 0xe7, 0xe4,\n"," 0x05, 0xf5, 0x07, 0xe9, 0xf7, 0x04, 0xe9, 0xde, 0x0b, 0x20, 0x21, 0x03,\n"," 0x07, 0xec, 0xe6, 0xeb, 0xf8, 0xed, 0xf0, 0xeb, 0x06, 0x09, 0x08, 0xf4,\n"," 0x13, 0xe8, 0xf3, 0xfd, 0xfa, 0xfa, 0xfb, 0xf5, 0xfd, 0x09, 0xf8, 0x03,\n"," 0xfd, 0x11, 0xfa, 0xf9, 0xfa, 0x14, 0xe1, 0x14, 0x03, 0x11, 0xe7, 0x29,\n"," 0x1c, 0x55, 0x07, 0x17, 0x1c, 0x07, 0xf1, 0x14, 0x14, 0x28, 0x28, 0x66,\n"," 0xfd, 0x0e, 0xd3, 0x24, 0x18, 0x0a, 0x0a, 0x1c, 0xf7, 0x2d, 0xfe, 0xfb,\n"," 0x0e, 0xf6, 0x09, 0xf6, 0x0b, 0x24, 0xeb, 0xf8, 0xf6, 0x0d, 0x03, 0x08,\n"," 0x03, 0x71, 0xe8, 0xf5, 0xdd, 0xe0, 0xe9, 0x08, 0xf0, 0x52, 0xf0, 0x08,\n"," 0xe0, 0xd4, 0x0c, 0xe5, 0x20, 0x37, 0xe0, 0x03, 0xf9, 0xe9, 0x00, 0xf0,\n"," 0x10, 0x12, 0x00, 0x15, 0x10, 0xfd, 0xee, 0x03, 0x22, 0xf0, 0x0b, 0xfc,\n"," 0x08, 0xf1, 0x04, 0x11, 0xfe, 0x0c, 0xec, 0x05, 0xf4, 0xfc, 0x0a, 0xf8,\n"," 0x0d, 0xee, 0xe1, 0xe1, 0x29, 0x0f, 0x2a, 0x06, 0xfe, 0xea, 0xf0, 0xf7,\n"," 0x27, 0x0b, 0xf2, 0x07, 0xf5, 0xdb, 0xf8, 0x19, 0xf5, 0x05, 0xda, 0xf3,\n"," 0x01, 0xec, 0xea, 0x15, 0xfb, 0x1d, 0x00, 0xde, 0xeb, 0xfe, 0xf0, 0x01,\n"," 0x04, 0x03, 0xfc, 0x04, 0xf7, 0x1a, 0xf8, 0xda, 0x0c, 0xfb, 0x03, 0xeb,\n"," 0xf8, 0x08, 0xdc, 0xff, 0xed, 0xf7, 0xf5, 0xfd, 0x07, 0x06, 0xfc, 0xf6,\n"," 0x02, 0xf8, 0xf3, 0x11, 0x0e, 0xe9, 0xf1, 0x18, 0xf2, 0x0c, 0x00, 0x22,\n"," 0x10, 0xea, 0x10, 0x16, 0x24, 0x42, 0x0d, 0x26, 0x06, 0x15, 0xde, 0xe9,\n"," 0x05, 0x1d, 0xec, 0x4c, 0xfd, 0x23, 0xf1, 0x1e, 0x1c, 0xf9, 0x02, 0x19,\n"," 0xff, 0x10, 0xe3, 0xf0, 0xff, 0xf5, 0xfe, 0x03, 0x1a, 0x29, 0xcf, 0xdb,\n"," 0xe2, 0x0f, 0xf4, 0xf1, 0x0d, 0x5a, 0xfd, 0x27, 0xde, 0xe8, 0xff, 0x17,\n"," 0xdd, 0x52, 0xd9, 0x15, 0xdf, 0xd5, 0x00, 0xc9, 0x0f, 0x39, 0x03, 0xee,\n"," 0xe5, 0xe2, 0xe8, 0xf5, 0x2e, 0x1b, 0x03, 0xf3, 0x19, 0x0c, 0xf8, 0xe9,\n"," 0x13, 0xf4, 0x00, 0xe2, 0x0f, 0x05, 0x02, 0x17, 0x06, 0xf1, 0xe9, 0x1b,\n"," 0x1c, 0x11, 0xd9, 0xef, 0x03, 0xe5, 0xeb, 0x14, 0x10, 0x05, 0xfe, 0x01,\n"," 0xfd, 0xef, 0x11, 0x1e, 0x12, 0x0a, 0x13, 0xea, 0xf2, 0xf0, 0xf4, 0x19,\n"," 0x04, 0x05, 0x01, 0xe6, 0x0c, 0xfe, 0xf4, 0x25, 0x1a, 0x12, 0x1e, 0xdd,\n"," 0xfc, 0x06, 0xd7, 0x11, 0x12, 0xfe, 0xe4, 0xe0, 0x03, 0xef, 0xe3, 0x14,\n"," 0x06, 0xf9, 0x06, 0x00, 0x0e, 0x08, 0xe2, 0x01, 0xf5, 0xfb, 0xfe, 0xf6,\n"," 0x02, 0xfc, 0xf5, 0x12, 0x00, 0xf1, 0x07, 0x01, 0x14, 0x0f, 0x06, 0xf6,\n"," 0xee, 0x38, 0x21, 0x1a, 0x18, 0xe5, 0xff, 0x0d, 0xf7, 0x46, 0xea, 0x1c,\n"," 0x07, 0xf0, 0xdc, 0xf9, 0x19, 0x13, 0x15, 0x44, 0x08, 0x1a, 0xd2, 0x05,\n"," 0x18, 0xf4, 0x17, 0x1a, 0xf9, 0x23, 0xe9, 0xff, 0x16, 0xff, 0xe9, 0x0f,\n"," 0xf6, 0x2b, 0xe8, 0xec, 0xe7, 0xf8, 0x20, 0x10, 0x15, 0x5d, 0xdb, 0x00,\n"," 0xe4, 0xe3, 0xe1, 0x2b, 0x04, 0x4e, 0xec, 0x05, 0xe3, 0xb5, 0xf7, 0xda,\n"," 0x16, 0x2c, 0xe8, 0xfd, 0x01, 0xfd, 0x10, 0xe9, 0x11, 0x17, 0xec, 0x13,\n"," 0x1d, 0x15, 0xeb, 0xf5, 0x09, 0x00, 0xf8, 0x20, 0x0e, 0xf5, 0xef, 0x0a,\n"," 0x03, 0xec, 0x13, 0x2a, 0x02, 0xfb, 0x1d, 0xea, 0xf0, 0x01, 0xea, 0xf9,\n"," 0x16, 0x0b, 0x01, 0x07, 0xfd, 0xf4, 0xe1, 0xff, 0x19, 0x04, 0x14, 0xeb,\n"," 0xf5, 0xf8, 0xfc, 0xf5, 0x0d, 0x0e, 0xde, 0xe2, 0x15, 0xff, 0xfa, 0xe5,\n"," 0x03, 0x25, 0xf6, 0xec, 0xf9, 0x06, 0xfe, 0x29, 0xee, 0xfc, 0xee, 0xe5,\n"," 0x0d, 0xea, 0xe5, 0x01, 0x01, 0xf8, 0x0d, 0xeb, 0x09, 0x00, 0xca, 0xff,\n"," 0x0c, 0x03, 0xf1, 0xef, 0xf7, 0xf1, 0xed, 0x03, 0xf2, 0xe8, 0xe9, 0xe9,\n"," 0x07, 0xfc, 0xeb, 0x1f, 0xdb, 0x19, 0x01, 0x17, 0x03, 0x0e, 0xfb, 0x11,\n"," 0x08, 0x51, 0xdc, 0x2d, 0x09, 0xef, 0xeb, 0x18, 0x07, 0x21, 0xec, 0x4b,\n"," 0xf7, 0x43, 0xd9, 0x00, 0x00, 0xee, 0xf5, 0x19, 0xe4, 0x25, 0xe3, 0xfc,\n"," 0x09, 0x05, 0xf6, 0x11, 0x07, 0x30, 0xcb, 0x0f, 0xef, 0x04, 0x01, 0x0f,\n"," 0x06, 0x4b, 0xfa, 0xf2, 0xe7, 0xe9, 0xea, 0x10, 0x0d, 0x4f, 0xe5, 0xf2,\n"," 0xf9, 0xd3, 0x07, 0xe4, 0x22, 0x37, 0xeb, 0xed, 0xfb, 0xf5, 0xda, 0xd7,\n"," 0x16, 0x12, 0x0d, 0x11, 0x07, 0x1f, 0x11, 0xe0, 0xff, 0xf2, 0x07, 0x1d,\n"," 0xfa, 0x03, 0xfe, 0xf6, 0xf4, 0xe6, 0xde, 0xe9, 0x05, 0xed, 0xfd, 0xfa,\n"," 0xf3, 0x03, 0xe8, 0x01, 0x26, 0x20, 0xfd, 0xf3, 0x04, 0xd1, 0xff, 0x09,\n"," 0x28, 0x20, 0xfc, 0xfe, 0x02, 0xed, 0x03, 0x02, 0x0d, 0x04, 0xe5, 0xd4,\n"," 0x04, 0xf8, 0xea, 0xfb, 0xfc, 0x14, 0x1b, 0xd6, 0x0b, 0xfb, 0xf9, 0x15,\n"," 0xf5, 0xf6, 0x08, 0xd9, 0x03, 0x05, 0xed, 0x00, 0x12, 0xfe, 0xfb, 0xf6,\n"," 0x13, 0xf3, 0xd7, 0xe3, 0xed, 0xfd, 0x13, 0xfb, 0x00, 0xf2, 0xe6, 0x29,\n"," 0xfc, 0x09, 0x01, 0xdf, 0x03, 0x08, 0x04, 0xfe, 0x07, 0x25, 0xf4, 0x1d,\n"," 0x0a, 0xdb, 0xf6, 0x1a, 0x09, 0x41, 0x12, 0x2c, 0x0a, 0xf6, 0xe4, 0xf9,\n"," 0x0e, 0x13, 0x27, 0x45, 0xe1, 0x29, 0xd8, 0x05, 0x0c, 0xf0, 0x09, 0x19,\n"," 0xf5, 0x1f, 0xef, 0xef, 0x08, 0xeb, 0xf2, 0x0f, 0x0c, 0x24, 0xde, 0x06,\n"," 0xee, 0xfb, 0xf5, 0x14, 0x18, 0x30, 0xdd, 0x0f, 0xe8, 0xfb, 0x1a, 0x1b,\n"," 0x06, 0x50, 0xe0, 0xfa, 0xf5, 0xb5, 0xf2, 0xf5, 0x27, 0x35, 0xed, 0x12,\n"," 0xf3, 0x08, 0x0a, 0xd1, 0x1a, 0x15, 0xf7, 0x03, 0xf3, 0x0c, 0xec, 0xe7,\n"," 0xff, 0xfb, 0xef, 0x2a, 0xf7, 0xf5, 0x12, 0x13, 0x08, 0xe3, 0x00, 0x2a,\n"," 0x05, 0x0f, 0xfb, 0xf1, 0xed, 0x13, 0xf5, 0x02, 0x0c, 0x14, 0xf1, 0xf4,\n"," 0xfa, 0xdb, 0x00, 0x03, 0x26, 0x2e, 0x26, 0x08, 0xf5, 0xe9, 0xfd, 0xe9,\n"," 0x04, 0x20, 0x13, 0xcc, 0xfe, 0xf9, 0x02, 0x15, 0xf7, 0x05, 0xea, 0xc3,\n"," 0xee, 0xfa, 0xf8, 0x10, 0xf8, 0xf1, 0xfe, 0xdb, 0x07, 0x06, 0xdb, 0xfa,\n"," 0x08, 0x01, 0x23, 0xfb, 0x0d, 0xff, 0xdf, 0xf0, 0xfc, 0xfd, 0x03, 0xff,\n"," 0x02, 0x0b, 0xf7, 0x04, 0xea, 0xf0, 0x0a, 0x19, 0x04, 0xfa, 0xee, 0x00,\n"," 0xf5, 0x25, 0x09, 0x24, 0x09, 0xfc, 0xff, 0xff, 0x11, 0x39, 0x05, 0x2a,\n"," 0xf8, 0xf9, 0xcc, 0x28, 0x08, 0x05, 0x07, 0x4c, 0xe3, 0x27, 0xd4, 0x06,\n"," 0xf8, 0xe8, 0xf9, 0x1d, 0xee, 0x10, 0xdb, 0x06, 0xfd, 0xf2, 0x05, 0xf9,\n"," 0x16, 0x26, 0xe3, 0xf3, 0xf8, 0x00, 0xdd, 0xf9, 0x16, 0x3b, 0xe9, 0xfa,\n"," 0xe8, 0xfd, 0xf0, 0x26, 0xf1, 0x30, 0xc5, 0xe0, 0xe6, 0xbd, 0xf1, 0xd7,\n"," 0x00, 0x24, 0xf6, 0x19, 0xea, 0xca, 0xf1, 0xf8, 0x1f, 0x16, 0xf7, 0xf2,\n"," 0xf7, 0x16, 0x00, 0xf6, 0x09, 0xe5, 0x06, 0xfb, 0x12, 0x1f, 0xfc, 0xe7,\n"," 0xf8, 0xfc, 0xed, 0x01, 0x03, 0x13, 0x07, 0xff, 0xd3, 0x17, 0xfb, 0x01,\n"," 0x12, 0x1d, 0x1c, 0xf6, 0xf1, 0xef, 0xf3, 0x02, 0x15, 0x22, 0x06, 0xed,\n"," 0xff, 0xea, 0xef, 0x11, 0x0d, 0x0d, 0xe7, 0xe4, 0xff, 0x09, 0x02, 0xf8,\n"," 0xf0, 0x00, 0x02, 0xe2, 0x0d, 0x0c, 0xf7, 0x1b, 0xfa, 0xff, 0xe3, 0xe8,\n"," 0x10, 0xe9, 0xea, 0x01, 0x0e, 0xfe, 0x1f, 0xf8, 0x0b, 0x04, 0xe7, 0xfe,\n"," 0xf9, 0x02, 0x01, 0xf5, 0x09, 0xf8, 0xfe, 0x0a, 0xfb, 0x06, 0x1b, 0xe2,\n"," 0x00, 0xef, 0xde, 0x15, 0xf8, 0x2d, 0xf1, 0x1a, 0x05, 0xff, 0xf0, 0x11,\n"," 0x00, 0x41, 0xe2, 0x26, 0x14, 0xd3, 0xde, 0xf3, 0x09, 0x0d, 0xfa, 0x28,\n"," 0xdc, 0x37, 0xc7, 0x06, 0xf3, 0xf9, 0x07, 0x27, 0xe9, 0x14, 0xd4, 0x24,\n"," 0xfa, 0x04, 0x13, 0x08, 0xf7, 0x11, 0xf0, 0x0d, 0x01, 0x03, 0x06, 0x16,\n"," 0x08, 0x47, 0xe2, 0x13, 0xe6, 0xf0, 0xdc, 0x21, 0xf4, 0x3e, 0xeb, 0x19,\n"," 0xe2, 0xcc, 0xf5, 0xf7, 0x15, 0x34, 0xde, 0x2c, 0xe0, 0xd6, 0xde, 0xd3,\n"," 0x11, 0x0f, 0x01, 0x0f, 0xf2, 0x14, 0x02, 0xee, 0x16, 0xdb, 0xe1, 0xfd,\n"," 0x01, 0x13, 0x1d, 0x09, 0x14, 0xf2, 0xd2, 0x05, 0xfe, 0x0a, 0xe9, 0x03,\n"," 0x0b, 0x13, 0xf2, 0x21, 0x35, 0x0d, 0x0a, 0xf6, 0xed, 0xf5, 0xf5, 0x0d,\n"," 0x2c, 0x2a, 0xf3, 0xec, 0xf3, 0xde, 0xef, 0x0c, 0x07, 0x06, 0x16, 0xd1,\n"," 0xf4, 0xfe, 0xe7, 0x1c, 0xf9, 0xfe, 0xf3, 0xc6, 0x04, 0x01, 0xef, 0x03,\n"," 0xeb, 0x04, 0x06, 0xd1, 0x05, 0xee, 0xf7, 0x19, 0x25, 0x09, 0x2a, 0xff,\n"," 0x20, 0x11, 0xf3, 0x02, 0x0c, 0xf7, 0x08, 0xf2, 0x00, 0xf5, 0xd9, 0x24,\n"," 0xfd, 0xfb, 0xe7, 0x06, 0x04, 0xd9, 0x0f, 0xe2, 0xf5, 0x16, 0x03, 0x07,\n"," 0xfd, 0xf3, 0xe3, 0xfa, 0xfc, 0x30, 0x27, 0x22, 0x04, 0xf3, 0xdf, 0x0b,\n"," 0x12, 0x09, 0xe5, 0x2d, 0xf9, 0x34, 0xbb, 0x13, 0xeb, 0xff, 0xe8, 0x0a,\n"," 0xf4, 0x03, 0xea, 0xed, 0xdf, 0xf8, 0x0a, 0xfe, 0x07, 0x31, 0xe7, 0xe8,\n"," 0xfc, 0x03, 0x03, 0x03, 0x1a, 0x2a, 0xe5, 0x0a, 0xe5, 0x0d, 0x1d, 0x2a,\n"," 0xed, 0x40, 0xd3, 0x05, 0xee, 0xc5, 0xda, 0xf8, 0x12, 0x3f, 0xe6, 0xfc,\n"," 0xde, 0xe0, 0xd6, 0xc6, 0x0b, 0x0d, 0x05, 0x01, 0xe7, 0x18, 0xd7, 0xec,\n"," 0x05, 0xed, 0xfb, 0x19, 0x0d, 0xf9, 0x03, 0x02, 0x0a, 0xe9, 0xe2, 0x1e,\n"," 0x0e, 0x11, 0x05, 0xe6, 0xed, 0x05, 0xe5, 0xe0, 0x1d, 0x18, 0xfb, 0xed,\n"," 0xf1, 0xcf, 0xf7, 0x17, 0x2f, 0x20, 0x0a, 0x11, 0x02, 0xed, 0xf0, 0x01,\n"," 0x0d, 0x14, 0x09, 0xc8, 0xf0, 0x00, 0xf9, 0xf9, 0xf1, 0x01, 0xe5, 0xce,\n"," 0x02, 0xf4, 0xdb, 0x13, 0xfe, 0x07, 0xf5, 0xee, 0x05, 0xe9, 0xef, 0x25,\n"," 0x1a, 0x1a, 0x0d, 0x02, 0x18, 0x05, 0xc8, 0xe2, 0xf8, 0xf1, 0x00, 0xf9,\n"," 0x1a, 0xf7, 0xf5, 0xf0, 0xef, 0x07, 0xff, 0xf0, 0xee, 0xeb, 0xf5, 0x28,\n"," 0xd0, 0x15, 0x1b, 0x1e, 0x08, 0xdc, 0xeb, 0xfa, 0xf3, 0x3b, 0xee, 0x18,\n"," 0x03, 0xfa, 0xdb, 0x11, 0x00, 0x10, 0x21, 0x43, 0xe6, 0x39, 0xea, 0xfd,\n"," 0xf0, 0xf3, 0xef, 0x1f, 0xd8, 0x19, 0xbc, 0xfb, 0xea, 0xf5, 0xda, 0x01,\n"," 0x0e, 0x09, 0xd3, 0xf7, 0x01, 0xfa, 0xec, 0x12, 0x0a, 0x40, 0xd9, 0xec,\n"," 0xea, 0x05, 0x13, 0x17, 0xed, 0x4b, 0xc5, 0xfc, 0xf0, 0xc8, 0xf7, 0x07,\n"," 0x02, 0x2a, 0xe4, 0xef, 0xd7, 0xed, 0x04, 0xcc, 0x00, 0xf7, 0xf8, 0x0c,\n"," 0xe7, 0x1d, 0xfc, 0xe3, 0x07, 0xd8, 0xf7, 0x06, 0x00, 0x15, 0x0c, 0xff,\n"," 0x0c, 0xe6, 0xf2, 0xda, 0x1a, 0x1a, 0x0f, 0x04, 0xec, 0xf2, 0xe4, 0x15,\n"," 0x13, 0x10, 0x04, 0xf0, 0x01, 0xeb, 0x04, 0x02, 0x21, 0x29, 0x25, 0x03,\n"," 0xf9, 0xde, 0xe4, 0x07, 0x0b, 0x13, 0x13, 0xce, 0x1c, 0xfd, 0xed, 0xf3,\n"," 0x00, 0x14, 0x1b, 0xd1, 0x0b, 0xf3, 0xf0, 0x06, 0x01, 0x0a, 0x05, 0xe0,\n"," 0x16, 0xe2, 0xec, 0xeb, 0x07, 0x05, 0x03, 0xd8, 0x14, 0x02, 0xdd, 0xf8,\n"," 0x16, 0x03, 0x07, 0xda, 0x27, 0xf0, 0xf3, 0x10, 0xfc, 0x14, 0xf3, 0x0f,\n"," 0x01, 0x0f, 0xfe, 0xee, 0xe0, 0x14, 0x02, 0x22, 0xfd, 0xd8, 0xff, 0xfe,\n"," 0xe7, 0x2b, 0x21, 0x2d, 0x05, 0xfc, 0xcb, 0x07, 0xe4, 0x12, 0x17, 0x36,\n"," 0xe4, 0x23, 0xf6, 0x19, 0xcf, 0x05, 0xd7, 0x16, 0xd4, 0xfb, 0xc2, 0x20,\n"," 0xe3, 0xfe, 0xe9, 0xf8, 0xfc, 0xfd, 0xee, 0x15, 0xf0, 0xf4, 0xe2, 0x12,\n"," 0x04, 0x39, 0xdc, 0xff, 0xf9, 0xf4, 0xf9, 0x0b, 0xf4, 0x45, 0xed, 0x0e,\n"," 0xcd, 0xda, 0x16, 0xfc, 0x15, 0x37, 0xe4, 0x26, 0xe1, 0xda, 0x22, 0xd8,\n"," 0xfc, 0x03, 0x06, 0x06, 0xec, 0x01, 0x04, 0xec, 0x1f, 0xdf, 0xfa, 0xf6,\n"," 0x1c, 0x0a, 0x22, 0xda, 0xf7, 0xea, 0x07, 0xe2, 0x0d, 0x0e, 0x04, 0xfa,\n"," 0xf1, 0x01, 0xe7, 0x10, 0x2a, 0x18, 0x0d, 0xfa, 0xf0, 0xe9, 0x03, 0xf5,\n"," 0x18, 0x24, 0x1b, 0xf0, 0xf2, 0xe0, 0xf2, 0xea, 0x1a, 0x05, 0x13, 0xde,\n"," 0x0d, 0xfb, 0xe6, 0x15, 0x0a, 0xf9, 0x0d, 0xe0, 0x00, 0x00, 0xf6, 0x12,\n"," 0xf6, 0x09, 0x06, 0xe4, 0x0c, 0xfb, 0xe7, 0xeb, 0xff, 0xfe, 0xf2, 0xde,\n"," 0x21, 0x14, 0x03, 0x04, 0x0d, 0xff, 0x21, 0xe9, 0x24, 0xf9, 0x03, 0x00,\n"," 0xf0, 0xfb, 0xff, 0xf5, 0xf6, 0x20, 0xfd, 0x25, 0xe7, 0x06, 0xf8, 0x09,\n"," 0x00, 0xdf, 0xef, 0xf0, 0xdd, 0x30, 0xde, 0x33, 0x11, 0xe9, 0x01, 0x04,\n"," 0x00, 0x13, 0xf7, 0x32, 0xf9, 0x1f, 0xf6, 0x05, 0xda, 0xfb, 0x1c, 0x1d,\n"," 0xdf, 0x18, 0xda, 0x10, 0xda, 0x04, 0x1a, 0xe0, 0x15, 0x09, 0xd7, 0x0c,\n"," 0xe6, 0x19, 0xf9, 0x0a, 0xfe, 0x47, 0xdb, 0x09, 0xdf, 0x00, 0xe8, 0x22,\n"," 0xe6, 0x4e, 0xd7, 0x0d, 0xde, 0xeb, 0xf7, 0x0d, 0x0f, 0x2d, 0xe5, 0xfd,\n"," 0xdf, 0xe7, 0x14, 0xed, 0x0c, 0x09, 0xf7, 0x11, 0x02, 0x1c, 0x0f, 0xcc,\n"," 0x1e, 0xf7, 0xf2, 0xf1, 0x09, 0x0e, 0xe2, 0xfb, 0xfd, 0xe4, 0x07, 0x07,\n"," 0x15, 0xfd, 0x28, 0xf6, 0xf2, 0xec, 0xe7, 0xf5, 0x17, 0x1e, 0xe8, 0x0a,\n"," 0xec, 0xd7, 0xe9, 0x27, 0x1f, 0x36, 0xf0, 0xee, 0xf6, 0xe0, 0xf2, 0x0e,\n"," 0x26, 0x1d, 0x0d, 0xdd, 0x02, 0xe7, 0xe0, 0x09, 0xf9, 0x0d, 0xde, 0xe2,\n"," 0xfe, 0xef, 0xee, 0x06, 0xfa, 0x1a, 0x0e, 0xd9, 0x10, 0xf9, 0x10, 0x08,\n"," 0x0f, 0xfb, 0xf8, 0xcf, 0x23, 0x13, 0xf5, 0x04, 0x07, 0x0e, 0x22, 0xfd,\n"," 0x29, 0xf3, 0xf2, 0x18, 0xf1, 0x04, 0x0b, 0x11, 0x00, 0x09, 0xed, 0x07,\n"," 0xe0, 0x27, 0x07, 0x1e, 0xe4, 0xda, 0x0e, 0xdb, 0xd6, 0x3d, 0x0f, 0x25,\n"," 0x03, 0xdf, 0xdc, 0x20, 0xe5, 0x09, 0xea, 0x36, 0xee, 0x29, 0xda, 0x00,\n"," 0xde, 0xfc, 0x27, 0x21, 0xdb, 0x38, 0xed, 0x05, 0xd2, 0xfe, 0xf4, 0xf4,\n"," 0x07, 0xf5, 0xd3, 0xfc, 0xe1, 0x0e, 0xf7, 0x19, 0x00, 0x35, 0xe0, 0xf6,\n"," 0x01, 0xfe, 0x08, 0x27, 0xf1, 0x52, 0xec, 0x06, 0xdd, 0xd7, 0x1a, 0xfd,\n"," 0x0b, 0x2d, 0xdf, 0x02, 0xd8, 0xe9, 0xdd, 0xec, 0x0b, 0x14, 0x0a, 0x02,\n"," 0xec, 0x1d, 0x08, 0xde, 0x11, 0xec, 0x0e, 0x0b, 0x2b, 0xee, 0x0c, 0xf4,\n"," 0x0c, 0xdc, 0x03, 0x05, 0x17, 0xfb, 0xf5, 0xf9, 0xea, 0xeb, 0x06, 0x12,\n"," 0x20, 0x20, 0x0e, 0xf9, 0x00, 0xd7, 0x11, 0x03, 0x1d, 0x2b, 0x01, 0xe5,\n"," 0x01, 0xf6, 0xf9, 0x03, 0x10, 0x1d, 0x14, 0xcd, 0xfd, 0xe3, 0xe9, 0x1d,\n"," 0xfa, 0x11, 0x07, 0xd9, 0x09, 0xfd, 0xeb, 0x02, 0xfc, 0x0e, 0xe6, 0xe9,\n"," 0x0c, 0xeb, 0xeb, 0x08, 0x0e, 0xf6, 0xf7, 0xe2, 0x14, 0x09, 0x02, 0xfd,\n"," 0x09, 0x02, 0x0e, 0xe9, 0x31, 0x05, 0xfc, 0x11, 0xef, 0x05, 0x05, 0x09,\n"," 0xfc, 0xd7, 0x09, 0xee, 0xdc, 0x05, 0x0e, 0x18, 0xd2, 0xd3, 0xf1, 0x26,\n"," 0xcc, 0x3a, 0x0d, 0x2a, 0x22, 0xf7, 0xce, 0x14, 0xed, 0x1a, 0xfe, 0x54,\n"," 0xf3, 0x2f, 0xfb, 0x14, 0xe6, 0xf3, 0xe7, 0x15, 0xd3, 0x1a, 0xe2, 0x0b,\n"," 0xe0, 0xf6, 0x0a, 0xf6, 0xe8, 0xea, 0xf4, 0x17, 0xd4, 0x11, 0xfb, 0x11,\n"," 0xfd, 0x37, 0xf6, 0x0b, 0xe3, 0x00, 0x05, 0x1a, 0xdc, 0x59, 0xd7, 0x17,\n"," 0xbf, 0xe7, 0xe3, 0x2a, 0xfa, 0x30, 0xfb, 0xf3, 0xde, 0xee, 0x01, 0xfd,\n"," 0x10, 0x1b, 0x06, 0xf5, 0xee, 0xf8, 0xd9, 0xe4, 0x0a, 0xe0, 0x01, 0x13,\n"," 0x22, 0x0d, 0xda, 0xfe, 0x06, 0xeb, 0xfe, 0xe9, 0x1c, 0x1e, 0xff, 0xfb,\n"," 0xfb, 0xf9, 0xe6, 0xec, 0x14, 0x2d, 0x01, 0x09, 0xd9, 0xd9, 0x0c, 0xe7,\n"," 0x2e, 0x12, 0xef, 0xe9, 0xfe, 0xee, 0x0b, 0x13, 0x0d, 0x0d, 0xf1, 0xf2,\n"," 0xf6, 0xf5, 0xf0, 0x1e, 0xf6, 0xf7, 0x01, 0xcd, 0xfd, 0x04, 0xed, 0xfd,\n"," 0xfd, 0x22, 0x16, 0xde, 0x09, 0xee, 0xee, 0xe3, 0x19, 0x14, 0xd7, 0xee,\n"," 0x1d, 0xf6, 0x02, 0xfd, 0x21, 0x02, 0xe0, 0xfa, 0x21, 0xf4, 0xfe, 0xf8,\n"," 0xf4, 0xf1, 0xff, 0x19, 0xf5, 0x02, 0xea, 0x23, 0x01, 0x07, 0x0e, 0x2f,\n"," 0xf2, 0xec, 0x04, 0xfb, 0xd9, 0x40, 0xee, 0x19, 0x30, 0xed, 0xf5, 0xe7,\n"," 0xe6, 0x20, 0xf6, 0x27, 0xf3, 0x2a, 0xec, 0x14, 0xff, 0x00, 0x0f, 0x05,\n"," 0xd0, 0x0d, 0xfe, 0x1e, 0xd7, 0xf8, 0xeb, 0xfe, 0xf3, 0x16, 0xe9, 0x07,\n"," 0xec, 0x10, 0x19, 0x08, 0x0e, 0x2a, 0x11, 0x08, 0xe1, 0xe9, 0x11, 0x1c,\n"," 0xf1, 0x53, 0xd4, 0xf8, 0xc7, 0xed, 0xf4, 0x03, 0xf1, 0x29, 0xfc, 0xf0,\n"," 0xc3, 0xf5, 0xf4, 0x0c, 0x21, 0x11, 0xf9, 0x0b, 0xe0, 0xfc, 0x08, 0xfc,\n"," 0x12, 0xe1, 0x18, 0x03, 0x17, 0x0f, 0xfc, 0xdb, 0x06, 0xeb, 0x05, 0x0f,\n"," 0x17, 0x0b, 0x0f, 0x16, 0xdd, 0xf1, 0xf1, 0xfd, 0x0f, 0x29, 0xec, 0x21,\n"," 0xe7, 0xe3, 0x0c, 0x09, 0x0d, 0x10, 0xe7, 0x12, 0xf6, 0xe9, 0x01, 0x0f,\n"," 0x17, 0xf0, 0xec, 0xd2, 0xff, 0x0c, 0xd5, 0x0b, 0xff, 0x09, 0xf5, 0xe1,\n"," 0xfe, 0xf5, 0x0f, 0x0e, 0x01, 0x18, 0xfa, 0xe7, 0x02, 0x0f, 0xf4, 0x1b,\n"," 0x24, 0x03, 0xfe, 0xf1, 0x16, 0x0d, 0xfa, 0xf3, 0x13, 0x17, 0x04, 0xec,\n"," 0x17, 0x00, 0xf0, 0x17, 0x01, 0xff, 0x0e, 0xfb, 0xfc, 0x23, 0x0c, 0x02,\n"," 0xe8, 0x00, 0xfe, 0x1c, 0xd8, 0xd9, 0x05, 0xda, 0xa8, 0x2a, 0xdf, 0x3f,\n"," 0x0d, 0xe6, 0xe8, 0x0d, 0xf1, 0x17, 0x15, 0x57, 0xe5, 0x24, 0xfa, 0x0b,\n"," 0xe9, 0xef, 0xfc, 0x03, 0xd0, 0x2e, 0xe7, 0x1e, 0xe8, 0x18, 0x15, 0xfe,\n"," 0xfd, 0x0b, 0xe9, 0x0d, 0xe8, 0x00, 0x00, 0x08, 0xfd, 0x27, 0xea, 0x01,\n"," 0xda, 0x01, 0xfa, 0x2a, 0xf8, 0x46, 0x03, 0x04, 0xd8, 0xed, 0xfb, 0x28,\n"," 0xed, 0x43, 0xeb, 0x17, 0xce, 0xed, 0xe0, 0xe2, 0x0b, 0x18, 0x03, 0x24,\n"," 0xdd, 0xfe, 0xee, 0xf3, 0x27, 0xef, 0x00, 0x1b, 0x1d, 0x19, 0xee, 0xf0,\n"," 0x0b, 0x03, 0xf6, 0x03, 0x12, 0x1f, 0x0a, 0x1b, 0xf0, 0xec, 0xf5, 0x02,\n"," 0x25, 0x15, 0x0d, 0x23, 0xe0, 0xe1, 0x14, 0xf5, 0x07, 0x24, 0xf8, 0x0e,\n"," 0xf3, 0x00, 0xf8, 0xf4, 0x07, 0x00, 0xfa, 0xe5, 0x06, 0xe9, 0xf7, 0x16,\n"," 0x13, 0xfb, 0x04, 0xe6, 0xf1, 0xde, 0x07, 0xfc, 0x08, 0x05, 0xf2, 0xfe,\n"," 0x0f, 0xfd, 0xea, 0xe8, 0x0e, 0x03, 0xf6, 0xe9, 0x15, 0xf5, 0xf4, 0xf1,\n"," 0x04, 0xf8, 0x06, 0xe6, 0x23, 0xe5, 0xff, 0xf1, 0xfa, 0x00, 0xf4, 0xfb,\n"," 0xf6, 0x08, 0x02, 0xfe, 0x07, 0x08, 0xfe, 0x10, 0xef, 0xd6, 0x02, 0xed,\n"," 0xbd, 0x21, 0xff, 0x29, 0x22, 0xe3, 0xe1, 0xfa, 0xfe, 0x33, 0x00, 0x31,\n"," 0xf0, 0x1b, 0x03, 0x06, 0xf1, 0xe5, 0xe8, 0xdb, 0xe6, 0x23, 0xeb, 0x09,\n"," 0xef, 0x0b, 0x1b, 0xfc, 0xff, 0x00, 0xee, 0x0a, 0xc2, 0x14, 0xfe, 0x0e,\n"," 0xf8, 0x33, 0xe6, 0x0f, 0xeb, 0x08, 0x27, 0x24, 0xf0, 0x54, 0xea, 0xeb,\n"," 0xc4, 0xe1, 0xf3, 0xe2, 0xfd, 0x3c, 0xfe, 0xfa, 0xca, 0xec, 0xef, 0x1a,\n"," 0x08, 0x13, 0x03, 0x17, 0xf2, 0x09, 0xfa, 0xe8, 0x26, 0xee, 0x09, 0x22,\n"," 0x07, 0x0f, 0x08, 0xf8, 0x00, 0x02, 0xfe, 0xda, 0x2a, 0x0d, 0x0e, 0x23,\n"," 0xf8, 0xfa, 0xfb, 0xf5, 0x1e, 0x14, 0xf8, 0xf2, 0xdd, 0xf9, 0xf1, 0x1b,\n"," 0x0e, 0x0e, 0xf9, 0xf7, 0xe6, 0xfb, 0x0a, 0x03, 0xf6, 0x12, 0xf8, 0xff,\n"," 0xed, 0x14, 0x04, 0x12, 0x12, 0x0b, 0x05, 0xf1, 0x03, 0xe2, 0xf2, 0x18,\n"," 0xfb, 0x00, 0xf1, 0xc6, 0x0f, 0xfd, 0xeb, 0xef, 0x16, 0xf8, 0x00, 0xf5,\n"," 0x20, 0xf7, 0x04, 0xf0, 0x08, 0x06, 0x0a, 0xf9, 0x11, 0xf4, 0xf8, 0xec,\n"," 0xf2, 0x0d, 0x15, 0xe0, 0xe4, 0x07, 0xef, 0xfd, 0xf3, 0x13, 0xfa, 0x1e,\n"," 0xf2, 0xee, 0xec, 0xf3, 0xc9, 0x18, 0x13, 0x34, 0x16, 0xeb, 0xf3, 0xe0,\n"," 0xd2, 0x24, 0xf4, 0x25, 0xe4, 0xfe, 0x03, 0x04, 0xf0, 0xfc, 0xf6, 0xf1,\n"," 0xcc, 0xfe, 0xf9, 0xeb, 0xdb, 0x1c, 0xf0, 0x18, 0xf6, 0xff, 0x00, 0x0b,\n"," 0xe0, 0x14, 0xf6, 0x14, 0xff, 0x2b, 0x19, 0xd9, 0xed, 0xf8, 0x00, 0x07,\n"," 0xe2, 0x48, 0xf0, 0xf3, 0xd5, 0xeb, 0x04, 0xef, 0xee, 0x38, 0xf4, 0x20,\n"," 0xc9, 0xfe, 0xe9, 0x08, 0x0a, 0x0d, 0xf7, 0x22, 0xf8, 0x12, 0xfe, 0xe4,\n"," 0x25, 0xdb, 0x0d, 0x05, 0xfe, 0x02, 0xe6, 0x03, 0xfc, 0xf5, 0x08, 0xee,\n"," 0x16, 0x20, 0x07, 0xf3, 0xfb, 0xec, 0x00, 0xec, 0x16, 0x22, 0x00, 0x1e,\n"," 0xe0, 0xd6, 0x00, 0x1a, 0x09, 0x1a, 0xeb, 0x13, 0xfe, 0xe2, 0xf7, 0xff,\n"," 0xe6, 0xf3, 0x28, 0xf9, 0x01, 0xf8, 0xee, 0xe8, 0x07, 0x0a, 0x1d, 0xf0,\n"," 0xed, 0xde, 0x06, 0x15, 0xfd, 0xf5, 0x13, 0xcf, 0xfc, 0x00, 0x0b, 0xdc,\n"," 0x0e, 0xf9, 0xfa, 0xed, 0x12, 0xfa, 0xf7, 0x07, 0x22, 0xe6, 0x01, 0xee,\n"," 0x0e, 0xef, 0xf5, 0x13, 0x01, 0xe9, 0xed, 0x03, 0xe7, 0xda, 0xdc, 0x19,\n"," 0xf5, 0x1d, 0x07, 0x2c, 0xee, 0xd5, 0xf1, 0xf2, 0xc6, 0x21, 0x01, 0x3a,\n"," 0x0e, 0xe0, 0x06, 0x0f, 0xdc, 0x33, 0xed, 0x30, 0xec, 0x02, 0x23, 0x06,\n"," 0x01, 0x13, 0xfe, 0xed, 0xdb, 0xfc, 0x13, 0xf9, 0xfc, 0x0d, 0x09, 0x18,\n"," 0xeb, 0x0d, 0xeb, 0x02, 0xcc, 0x10, 0x1a, 0xed, 0x05, 0x2c, 0xf6, 0x07,\n"," 0xf0, 0xf0, 0xf0, 0x17, 0xe9, 0x33, 0xeb, 0x19, 0xb8, 0xf0, 0xeb, 0xed,\n"," 0xdf, 0x1b, 0xf6, 0xfa, 0xd2, 0xf4, 0xfe, 0x18, 0xf1, 0x09, 0xf0, 0xe5,\n"," 0xd8, 0x08, 0x1a, 0x19, 0x16, 0xe1, 0x12, 0x02, 0x14, 0x1c, 0x02, 0x1e,\n"," 0x11, 0xf1, 0x08, 0x01, 0x0b, 0x1d, 0xfe, 0x21, 0xed, 0xf1, 0x03, 0x0c,\n"," 0x21, 0xfe, 0xfa, 0x18, 0xec, 0xe4, 0x02, 0x09, 0x15, 0x03, 0x0b, 0x0c,\n"," 0xfb, 0xeb, 0xfa, 0xfc, 0xf7, 0xf8, 0xf0, 0x14, 0xf5, 0xfa, 0x0a, 0x09,\n"," 0x13, 0x06, 0xea, 0xf5, 0x06, 0xfb, 0xfa, 0x0f, 0x10, 0xf9, 0xfa, 0xe7,\n"," 0xf2, 0xe2, 0x01, 0x14, 0x06, 0x02, 0xf3, 0xfe, 0x0c, 0xfb, 0xf0, 0xfa,\n"," 0x07, 0xe9, 0xea, 0xf1, 0xf5, 0xf7, 0xe5, 0xf7, 0xf8, 0xf5, 0xf9, 0xf5,\n"," 0xe7, 0xfd, 0xe7, 0xfc, 0xe2, 0x28, 0x0e, 0x00, 0xf3, 0xd9, 0x10, 0x16,\n"," 0xf4, 0x39, 0xe8, 0x28, 0xed, 0xf3, 0xdc, 0x07, 0x0f, 0x3a, 0xec, 0x09,\n"," 0xeb, 0xf7, 0x16, 0x09, 0xf9, 0x1f, 0x02, 0x26, 0xd9, 0xfb, 0x0a, 0xf5,\n"," 0xf5, 0x2b, 0xe7, 0xfb, 0xfd, 0x05, 0x1b, 0x0a, 0x05, 0x16, 0xf3, 0xfd,\n"," 0xe8, 0x23, 0x01, 0xe7, 0xfb, 0x0f, 0x0d, 0x03, 0xe0, 0x3c, 0x1f, 0xe3,\n"," 0xd2, 0x13, 0xfd, 0xeb, 0xe4, 0x11, 0xfb, 0x08, 0xde, 0xeb, 0x02, 0xf2,\n"," 0x10, 0xf8, 0xf0, 0x16, 0xe3, 0x21, 0x05, 0x14, 0x17, 0xe6, 0xf7, 0xed,\n"," 0x09, 0x19, 0x14, 0x23, 0x0b, 0xfe, 0x13, 0x26, 0x00, 0x25, 0xef, 0xee,\n"," 0x05, 0x00, 0x07, 0xf3, 0xfb, 0x15, 0xf5, 0xfc, 0xe0, 0xf4, 0xf4, 0xe4,\n"," 0xe5, 0x10, 0xf6, 0x03, 0xdc, 0xe5, 0x09, 0xf5, 0xe7, 0xf1, 0xf1, 0xdb,\n"," 0x01, 0x09, 0xfd, 0xdc, 0xdc, 0xfc, 0x12, 0xee, 0x15, 0xdf, 0x15, 0xe1,\n"," 0xf9, 0x08, 0x05, 0xde, 0x04, 0xea, 0x25, 0x1d, 0x12, 0xd9, 0xf0, 0xdf,\n"," 0x31, 0xe1, 0xe5, 0xe5, 0xf6, 0xf1, 0x00, 0xe9, 0x07, 0xf2, 0x08, 0x0b,\n"," 0xee, 0xe1, 0xfa, 0x06, 0x76, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n"," 0x10, 0x00, 0x00, 0x00, 0xc9, 0x01, 0x00, 0x00, 0x59, 0xfe, 0xff, 0xff,\n"," 0x8f, 0xfe, 0xff, 0xff, 0x50, 0x01, 0x00, 0x00, 0x60, 0xfb, 0xff, 0xff,\n"," 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e,\n"," 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x58, 0xfa, 0xff, 0xff, 0xbc, 0x01, 0x00, 0x00,\n"," 0xb0, 0x01, 0x00, 0x00, 0xa4, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x06, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00,\n"," 0xb4, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0xaa, 0xfe, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,\n"," 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n"," 0xce, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x03, 0x00, 0x00, 0x00,\n"," 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x1a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00,\n"," 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,\n"," 0x07, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,\n"," 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x28, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,\n"," 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n"," 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,\n"," 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00,\n"," 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x38, 0x00, 0x00, 0x00,\n"," 0x2c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,\n"," 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00,\n"," 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00,\n"," 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,\n"," 0x1a, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00,\n"," 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x02, 0x00, 0x00, 0x00,\n"," 0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,\n"," 0x31, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n"," 0x03, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00,\n"," 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0a, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n"," 0xac, 0x04, 0x00, 0x00, 0x44, 0x04, 0x00, 0x00, 0xc4, 0x03, 0x00, 0x00,\n"," 0x4c, 0x03, 0x00, 0x00, 0xd0, 0x02, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00,\n"," 0x20, 0x02, 0x00, 0x00, 0xb4, 0x01, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00,\n"," 0x6c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0xd4, 0xff, 0xff, 0xff, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,\n"," 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,\n"," 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n"," 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00,\n"," 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa8, 0x07, 0x00, 0x00,\n"," 0xf2, 0xfb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x4c, 0x00, 0x00, 0x00,\n"," 0x07, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0xe4, 0xfb, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,\n"," 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b,\n"," 0x13, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,\n"," 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x5f, 0x69, 0x6e, 0x74, 0x38, 0x00,\n"," 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x0e, 0x00, 0x1a, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,\n"," 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,\n"," 0xb4, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,\n"," 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00,\n"," 0x12, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,\n"," 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x08, 0x00, 0x00, 0x00, 0xd6, 0x72, 0xec, 0x39, 0x57, 0x66, 0x72, 0x3a,\n"," 0x1e, 0xe6, 0x14, 0x3a, 0x27, 0x15, 0x3a, 0x39, 0x33, 0xb7, 0x25, 0x3a,\n"," 0xf6, 0x03, 0x80, 0x3a, 0xd2, 0x73, 0x28, 0x39, 0x79, 0xbb, 0x5c, 0x3a,\n"," 0x12, 0x00, 0x00, 0x00, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x77, 0x65,\n"," 0x69, 0x67, 0x68, 0x74, 0x73, 0x2f, 0x72, 0x65, 0x61, 0x64, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,\n"," 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x32, 0xfd, 0xff, 0xff,\n"," 0x00, 0x00, 0x00, 0x09, 0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n"," 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x24, 0xfd, 0xff, 0xff,\n"," 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x4a, 0xb2, 0xf3, 0x39, 0x1f, 0x00, 0x00, 0x00, 0x66, 0x69, 0x6e, 0x61,\n"," 0x6c, 0x5f, 0x66, 0x63, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73,\n"," 0x2f, 0x72, 0x65, 0x61, 0x64, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70,\n"," 0x6f, 0x73, 0x65, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0xa0, 0x0f, 0x00, 0x00, 0x9a, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n"," 0x58, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,\n"," 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xbb, 0xb0, 0xba, 0x3d,\n"," 0x01, 0x00, 0x00, 0x00, 0xd8, 0x1c, 0x35, 0x41, 0x01, 0x00, 0x00, 0x00,\n"," 0x3b, 0xcf, 0x3e, 0xc1, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f,\n"," 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x06, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x02,\n"," 0x2c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n"," 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x0f, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f,\n"," 0x32, 0x2f, 0x73, 0x68, 0x61, 0x70, 0x65, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x42, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n"," 0x5c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x14, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,\n"," 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,\n"," 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, 0x50, 0xd0, 0x3d,\n"," 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0xcf, 0x41, 0x01, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68,\n"," 0x61, 0x70, 0x65, 0x5f, 0x32, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0xba, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n"," 0x60, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,\n"," 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,\n"," 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, 0x50, 0xd0, 0x3d,\n"," 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0xcf, 0x41, 0x01, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68,\n"," 0x61, 0x70, 0x65, 0x5f, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x38, 0x00, 0x00,\n"," 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa8, 0x07, 0x00, 0x00,\n"," 0x2e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x60, 0x00, 0x00, 0x00,\n"," 0x09, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n"," 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,\n"," 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,\n"," 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,\n"," 0xbd, 0xad, 0x93, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x1a, 0x93, 0x41,\n"," 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n"," 0x08, 0x00, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x02,\n"," 0x44, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0xc4, 0x94, 0x0c, 0x38, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d,\n"," 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00,\n"," 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00,\n"," 0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x02, 0xa4, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n"," 0x8c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,\n"," 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,\n"," 0x7c, 0x67, 0x40, 0x38, 0x32, 0x3f, 0xc5, 0x38, 0x5e, 0x53, 0x72, 0x38,\n"," 0x90, 0x6b, 0x97, 0x37, 0xd6, 0xd8, 0x86, 0x38, 0xc2, 0x56, 0xd0, 0x38,\n"," 0xf3, 0x12, 0x89, 0x37, 0x92, 0x9d, 0xb3, 0x38, 0x0b, 0x00, 0x00, 0x00,\n"," 0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00,\n"," 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,\n"," 0x70, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,\n"," 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n"," 0xca, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x06, 0x02, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00,\n"," 0x00, 0x00, 0x00, 0x72, 0xe6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x19,\n"," 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00,\n"," 0x06, 0x00, 0x00, 0x00, 0x00, 0x16, 0x0a, 0x00, 0x0e, 0x00, 0x07, 0x00,\n"," 0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,\n"," 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x07, 0x00,\n"," 0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,\n"," 0x03, 0x00, 0x00, 0x00\n","};\n","unsigned int g_model_len = 18952;\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/tensorflow/lite/micro/kernels/dequantize.cc b/tensorflow/lite/micro/kernels/dequantize.cc index 37fb8ffc3c6..4b87c0eb04c 100644 --- a/tensorflow/lite/micro/kernels/dequantize.cc +++ b/tensorflow/lite/micro/kernels/dequantize.cc @@ -28,7 +28,27 @@ namespace ops { namespace micro { namespace dequantize { +struct OpData { + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -42,10 +62,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE( context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32); + if (output->type == kTfLiteInt32) { + const double effective_output_scale = + static_cast(input->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(effective_output_scale, &data->output_multiplier, + &data->output_shift); + } return kTfLiteOk; } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + const TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); @@ -76,28 +106,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } } else if (output->type == kTfLiteInt32) { - int32_t output_multiplier; - int output_shift; - const double effective_output_scale = - static_cast(input->params.scale) / - static_cast(output->params.scale); - QuantizeMultiplier(effective_output_scale, &output_multiplier, - &output_shift); int flat_size = MatchingFlatSize(GetTensorShape(input), GetTensorShape(output)); switch (input->type) { case kTfLiteInt16: { reference_ops::Requantize( - GetTensorData(input), flat_size, output_multiplier, - output_shift, input->params.zero_point, output->params.zero_point, - GetTensorData(output)); + GetTensorData(input), flat_size, data->output_multiplier, + data->output_shift, input->params.zero_point, + output->params.zero_point, GetTensorData(output)); break; } case kTfLiteInt8: { reference_ops::Requantize( - GetTensorData(input), flat_size, output_multiplier, - output_shift, input->params.zero_point, output->params.zero_point, - GetTensorData(output)); + GetTensorData(input), flat_size, data->output_multiplier, + data->output_shift, input->params.zero_point, + output->params.zero_point, GetTensorData(output)); break; } default: @@ -119,7 +142,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace dequantize TfLiteRegistration* Register_DEQUANTIZE() { - static TfLiteRegistration r = {/*init=*/nullptr, + static TfLiteRegistration r = {/*init=*/dequantize::Init, /*free=*/nullptr, /*prepare=*/dequantize::Prepare, /*invoke=*/dequantize::Eval, diff --git a/tensorflow/lite/micro/kernels/prelu.cc b/tensorflow/lite/micro/kernels/prelu.cc index a20d2c88225..2c575269cca 100644 --- a/tensorflow/lite/micro/kernels/prelu.cc +++ b/tensorflow/lite/micro/kernels/prelu.cc @@ -64,14 +64,20 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, 0); const TfLiteTensor* alpha = GetInput(context, node, 1); TfLiteTensor* output = GetOutput(context, node, 0); - int32_t output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier_1 = 0; + int output_shift_1 = 0; + int32_t output_multiplier_2 = 0; + int output_shift_2 = 0; if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) { - double real_multiplier = static_cast(input->params.scale) * - static_cast(alpha->params.scale) / - static_cast(output->params.scale); - QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier, - &output_shift); + double real_multiplier_1 = static_cast(input->params.scale) * + static_cast(output->params.scale); + double real_multiplier_2 = static_cast(input->params.scale) * + static_cast(alpha->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier_1, &output_multiplier_1, + &output_shift_1); + QuantizeMultiplier(real_multiplier_2, &output_multiplier_2, + &output_shift_2); } switch (input->type) { case kTfLiteFloat32: { @@ -86,8 +92,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { op_params.input_offset = -input->params.zero_point; op_params.alpha_offset = -alpha->params.zero_point; op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = output_multiplier; - op_params.output_shift = output_shift; + op_params.output_multiplier_1 = output_multiplier_1; + op_params.output_shift_1 = output_shift_1; + op_params.output_multiplier_2 = output_multiplier_2; + op_params.output_shift_2 = output_shift_2; reference_ops::BroadcastPrelu4DSlow( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(alpha), GetTensorData(alpha), diff --git a/tensorflow/lite/micro/kernels/prelu_test.cc b/tensorflow/lite/micro/kernels/prelu_test.cc index 4b35dac5849..d6c851a2726 100644 --- a/tensorflow/lite/micro/kernels/prelu_test.cc +++ b/tensorflow/lite/micro/kernels/prelu_test.cc @@ -154,14 +154,14 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) { const int output_dims_count = 12; float output_data[output_dims_count]; - tflite::testing::TestPreluFloat({4, 1, 2, 2, 3}, // input shape + tflite::testing::TestPreluFloat({1, 2, 2, 3}, // input shape { 0.0f, 0.0f, 0.0f, // Row 1, Column 1 1.0f, 1.0f, 1.0f, // Row 1, Column 2 -1.0f, -1.0f, -1.0f, // Row 2, Column 1 -2.0f, -2.0f, -2.0f, // Row 1, Column 2 }, - {3, 1, 1, 3}, // alpha shape + {1, 1, 1, 3}, // alpha shape {0.0f, 1.0f, 2.0f}, // alpha values { 0.0f, 0.0f, 0.0f, // Row 1, Column 1 @@ -169,7 +169,7 @@ TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) { 0.0f, -1.0f, -2.0f, // Row 2, Column 1 0.0f, -2.0f, -4.0f, // Row 1, Column 2 }, - {4, 1, 2, 2, 3}, // output shape + {1, 2, 2, 3}, // output shape output_data); } @@ -182,13 +182,13 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) { const int output_dims_count = 12; uint8_t output_data[output_dims_count]; tflite::testing::TestPreluQuantized( - {4, 1, 2, 2, 3}, // input shape + {1, 2, 2, 3}, // input shape {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax)}, - kMin, kMax, {3, 1, 1, 3}, // alpha shape + kMin, kMax, {1, 1, 1, 3}, // alpha shape {F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-0.5f, kMin, kMax)}, kMin, kMax, {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), @@ -196,7 +196,7 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) { F2Q(0.0f, kMin, kMax), F2Q(-0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(-0.125f, kMin, kMax), F2Q(0.125f, kMin, kMax)}, - {4, 1, 2, 2, 3}, // output shape + {1, 2, 2, 3}, // output shape kMin, kMax, output_data); } diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc index d40471df948..b5bba83beb8 100644 --- a/tensorflow/lite/micro/kernels/quantize.cc +++ b/tensorflow/lite/micro/kernels/quantize.cc @@ -26,7 +26,27 @@ namespace ops { namespace micro { namespace quantize { +struct OpData { + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -48,10 +68,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); + if (input->type == kTfLiteInt16 && output->type == kTfLiteInt8) { + double effective_scale = + static_cast(input->params.scale / output->params.scale); + + QuantizeMultiplier(effective_scale, &data->output_multiplier, + &data->output_shift); + } return kTfLiteOk; } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + const TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); @@ -79,17 +109,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } else if (input->type == kTfLiteInt16) { size_t size = ElementCount(*input->dims); - int32_t output_multiplier; - int output_shift; - double effective_scale = - static_cast(input->params.scale / output->params.scale); switch (output->type) { case kTfLiteInt8: - QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift); reference_ops::Requantize( - GetTensorData(input), size, output_multiplier, - output_shift, input->params.zero_point, output->params.zero_point, - GetTensorData(output)); + GetTensorData(input), size, data->output_multiplier, + data->output_shift, input->params.zero_point, + output->params.zero_point, GetTensorData(output)); break; default: TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", @@ -113,7 +138,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // AffineQuantize takes scale and zero point and quantizes the float value to // quantized output, in int8 or uint8 format. TfLiteRegistration* Register_QUANTIZE() { - static TfLiteRegistration r = {/*init=*/nullptr, + static TfLiteRegistration r = {/*init=*/quantize::Init, /*free=*/nullptr, /*prepare=*/quantize::Prepare, /*invoke=*/quantize::Eval, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc index c8da67b5af8..c8bba633de7 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc @@ -152,10 +152,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, double real_multiplier = 0.0; TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( context, input, filter, bias, output, &real_multiplier)); - int exponent; - xtensa::hifimini::QuantizeMultiplier(real_multiplier, - &data->output_multiplier, &exponent); - data->output_shift = -exponent; + xtensa::hifimini::QuantizeMultiplier( + real_multiplier, &data->output_multiplier, &data->output_shift); return CalculateActivationRangeQuantized(context, activation, output, &data->output_activation_min, &data->output_activation_max); @@ -200,8 +198,7 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, op_params.weights_offset = -filter->params.zero_point; op_params.output_offset = output->params.zero_point; op_params.output_multiplier = data.output_multiplier; - // TODO(b/138810107): Figure out whether output shift should be inverted - op_params.output_shift = -data.output_shift; + op_params.output_shift = data.output_shift; op_params.quantized_activation_min = data.output_activation_min; op_params.quantized_activation_max = data.output_activation_max; diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc index 2177bf1c363..29b2544a625 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc @@ -108,22 +108,24 @@ struct OpData { int scale_multiplier = 0; }; -// This size will work for both the hotword (1) and ambient music (1): -constexpr int kMaxOpDataSize = 2; -static int op_data_counter = 0; -static OpData kStaticOpData[kMaxOpDataSize]; - -void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + auto* op_data = static_cast(node->user_data); + TfLiteTensor* output = GetOutput(context, node, 0); const TfLiteTensor* input = GetInput(context, node, 0); - // TODO(b/132070898): Use statically slotted OpData structures until a - // scratch memory API is ready. - OpData* op_data = &kStaticOpData[op_data_counter++]; - node->user_data = op_data; - + // TODO(b/155682734): Fix dangerous input/output scale ratio assumptions. op_data->scale_multiplier = xtensa::hifimini::CreateQConstantForInt24( 0, input->params.scale / output->params.scale); @@ -131,7 +133,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* op_data = reinterpret_cast(node->user_data); + TFLITE_DCHECK(node->user_data != nullptr); + auto* op_data = static_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); @@ -159,8 +162,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // AffineQuantize takes scale and zero point and quantizes the float value to // quantized output, in int8 or uint8 format. TfLiteRegistration* Register_QUANTIZE() { - static TfLiteRegistration r = {/*init=*/nullptr, - /*free=*/quantize::Free, + static TfLiteRegistration r = {/*init=*/quantize::Init, + /*free=*/nullptr, /*prepare=*/quantize::Prepare, /*invoke=*/quantize::Eval, /*profiling_string=*/nullptr, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc index c77e9d1173c..c95fd0e40a4 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc @@ -34,9 +34,6 @@ namespace { // registrations for selective types (e.g. compile without float support), this // can be removed. Otherwise, any HiFi specific optimizations should land here. -// This size will work for both the hotword (1) and ambient music (0): -static SoftmaxParams kStaticOpData; - TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, const TfLiteTensor* input, TfLiteTensor* output, @@ -47,11 +44,13 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); } else { if (output->type == kTfLiteInt16) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + std::numeric_limits::min()); // NOTE: Current int16 softmax output does not require symmetric scaling // - so no need to verify scale here. } else { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + std::numeric_limits::min()); TF_LITE_ENSURE(context, output->params.scale == 1.f / 256); } } @@ -71,29 +70,18 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, return kTfLiteOk; } -TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input, - TfLiteTensor* output, - const SoftmaxParams& op_params) { - switch (output->type) { - case kTfLiteInt16: - tflite::reference_ops::Softmax( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); - return kTfLiteOk; - case kTfLiteInt8: - tflite::reference_ops::Softmax( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); - return kTfLiteOk; - default: - TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", - TfLiteTypeGetName(output->type), output->type); - return kTfLiteError; - } -} - } // namespace +void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams), + &data) == kTfLiteError) { + return nullptr; + } + return data; +} + TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { auto* params = static_cast(node->builtin_data); @@ -103,10 +91,8 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, 0); TF_LITE_ENSURE(context, NumDimensions(input) >= 1); - // TODO(b/132070898): Use statically slotted SoftmaxParams structures until a - // scratch memory API is ready. - SoftmaxParams* op_params = &kStaticOpData; - node->user_data = op_params; + TFLITE_DCHECK(node->user_data != nullptr); + SoftmaxParams* op_params = static_cast(node->user_data); TF_LITE_ENSURE_STATUS( CalculateSoftmaxOpData(context, input, output, params, op_params)); @@ -120,19 +106,22 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); - switch (input->type) { - case kTfLiteInt8: - return SoftmaxQuantized(context, input, output, *op_params); - default: - TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", - TfLiteTypeGetName(input->type), input->type); - return kTfLiteError; + if (input->type == kTfLiteInt8 && output->type == kTfLiteInt16) { + // TODO(b/155656675): Const ref params can be slow on xtensa. + tflite::reference_ops::Softmax( + *op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + return kTfLiteOk; + } else { + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; } } } // namespace activations TfLiteRegistration* Register_SOFTMAX() { - static TfLiteRegistration r = {/*init=*/nullptr, + static TfLiteRegistration r = {/*init=*/activations::SoftmaxInit, /*free=*/nullptr, /*prepare=*/activations::SoftmaxPrepare, /*invoke=*/activations::SoftmaxEval, diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index c2b32166467..54ce3383a08 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -388,10 +388,8 @@ TfLiteStatus MicroAllocator::Init() { return kTfLiteError; } subgraph_ = (*subgraphs)[0]; - tensors_ = subgraph_->tensors(); - operators_ = subgraph_->operators(); - context_->tensors_size = tensors_->size(); + context_->tensors_size = subgraph_->tensors()->size(); context_->tensors = reinterpret_cast(memory_allocator_->AllocateFromTail( sizeof(TfLiteTensor) * context_->tensors_size, @@ -405,9 +403,9 @@ TfLiteStatus MicroAllocator::Init() { } // Initialize runtime tensors in context_ using the flatbuffer. - for (size_t i = 0; i < tensors_->size(); ++i) { + for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) { TfLiteStatus status = internal::InitializeRuntimeTensor( - memory_allocator_, *tensors_->Get(i), model_->buffers(), + memory_allocator_, *subgraph_->tensors()->Get(i), model_->buffers(), error_reporter_, &context_->tensors[i]); if (status != kTfLiteOk) { TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d", @@ -472,7 +470,7 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations( auto* output = reinterpret_cast( memory_allocator_->AllocateFromTail( - sizeof(NodeAndRegistration) * operators_->size(), + sizeof(NodeAndRegistration) * subgraph_->operators()->size(), alignof(NodeAndRegistration))); if (output == nullptr) { TF_LITE_REPORT_ERROR( @@ -483,8 +481,8 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations( TfLiteStatus status = kTfLiteOk; auto* opcodes = model_->operator_codes(); MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_); - for (size_t i = 0; i < operators_->size(); ++i) { - const auto* op = operators_->Get(i); + for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { + const auto* op = subgraph_->operators()->Get(i); size_t index = op->opcode_index(); if (index >= opcodes->size()) { TF_LITE_REPORT_ERROR(error_reporter_, @@ -567,7 +565,7 @@ TfLiteStatus MicroAllocator::FinishTensorAllocation() { AllocationInfoBuilder builder(error_reporter_, &tmp_allocator); TF_LITE_ENSURE_STATUS( - builder.Init(tensors_->size(), scratch_buffer_count_)); + builder.Init(subgraph_->tensors()->size(), scratch_buffer_count_)); TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph_, context_->tensors)); TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_)); const AllocationInfo* allocation_info = builder.Finish(); @@ -606,8 +604,8 @@ TfLiteStatus MicroAllocator::FinishTensorAllocation() { // Data in variables need to be kept for the next invocation so allocating // them from the tail (persistent area). - if (AllocateVariables(tensors_, context_->tensors, memory_allocator_) != - kTfLiteOk) { + if (AllocateVariables(subgraph_->tensors(), context_->tensors, + memory_allocator_) != kTfLiteOk) { TF_LITE_REPORT_ERROR( error_reporter_, "Failed to allocate variables. Please increase arena size."); diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index b16f814071c..6a6e1e03e53 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -135,8 +135,6 @@ class MicroAllocator { size_t scratch_buffer_count_ = 0; const SubGraph* subgraph_; - const flatbuffers::Vector>* operators_; - const flatbuffers::Vector>* tensors_; }; } // namespace tflite diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index c5d35407648..2d774d0a139 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -87,8 +87,6 @@ MicroInterpreter::MicroInterpreter(const Model* model, return; } subgraph_ = (*subgraphs)[0]; - tensors_ = subgraph_->tensors(); - operators_ = subgraph_->operators(); context_.impl_ = static_cast(&context_helper_); context_.ReportError = context_helper_.ReportOpError; @@ -112,7 +110,7 @@ MicroInterpreter::MicroInterpreter(const Model* model, MicroInterpreter::~MicroInterpreter() { if (node_and_registrations_ != nullptr) { - for (size_t i = 0; i < operators_->size(); ++i) { + for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { TfLiteNode* node = &(node_and_registrations_[i].node); const TfLiteRegistration* registration = node_and_registrations_[i].registration; @@ -171,7 +169,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { context_.RequestScratchBufferInArena = nullptr; context_.GetScratchBuffer = nullptr; - for (size_t i = 0; i < operators_->size(); ++i) { + for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { context_helper_.SetNodeIndex(i); auto* node = &(node_and_registrations_[i].node); auto* registration = node_and_registrations_[i].registration; @@ -195,7 +193,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { // in Prepare stage. context_.RequestScratchBufferInArena = context_helper_.RequestScratchBufferInArena; - for (size_t i = 0; i < operators_->size(); ++i) { + for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { // Set node idx to annotate the lifetime for scratch buffers. context_helper_.SetNodeIndex(i); auto* node = &(node_and_registrations_[i].node); @@ -237,7 +235,7 @@ TfLiteStatus MicroInterpreter::Invoke() { TF_LITE_ENSURE_OK(&context_, AllocateTensors()); } - for (size_t i = 0; i < operators_->size(); ++i) { + for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { auto* node = &(node_and_registrations_[i].node); auto* registration = node_and_registrations_[i].registration; diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index b2046128c78..15f53b681a6 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -132,7 +132,7 @@ class MicroInterpreter { TfLiteStatus initialization_status() const { return initialization_status_; } - size_t operators_size() const { return operators_->size(); } + size_t operators_size() const { return subgraph_->operators()->size(); } // For debugging only. const NodeAndRegistration node_and_registration(int node_index) const { @@ -163,8 +163,6 @@ class MicroInterpreter { bool tensors_allocated_; TfLiteStatus initialization_status_; - const flatbuffers::Vector>* tensors_; - const flatbuffers::Vector>* operators_; const SubGraph* subgraph_; internal::ContextHelper context_helper_; diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h index ac304352a57..ead9be490a3 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -34,12 +34,16 @@ inline int MicroOpResolverAnyVersion() { return 0; } template class MicroOpResolver : public OpResolver { public: + explicit MicroOpResolver(ErrorReporter* error_reporter = nullptr) + : error_reporter_(error_reporter) {} + const TfLiteRegistration* FindOp(tflite::BuiltinOperator op, int version) const override { for (unsigned int i = 0; i < registrations_len_; ++i) { const TfLiteRegistration& registration = registrations_[i]; if ((registration.builtin_code == op) && (registration.version == MicroOpResolverAnyVersion() || + version == MicroOpResolverAnyVersion() || registration.version == version)) { return ®istration; } @@ -53,6 +57,7 @@ class MicroOpResolver : public OpResolver { if ((registration.builtin_code == BuiltinOperator_CUSTOM) && (strcmp(registration.custom_name, op) == 0) && (registration.version == MicroOpResolverAnyVersion() || + version == MicroOpResolverAnyVersion() || registration.version == version)) { return ®istration; } @@ -60,11 +65,16 @@ class MicroOpResolver : public OpResolver { return nullptr; } - void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration, - int version = 1) { + TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, + TfLiteRegistration* registration, int version = 1) { if (registrations_len_ >= tOpCount) { - // TODO(b/147748244) - Add error reporting hooks so we can report this! - return; + if (error_reporter_) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Couldn't register builtin op #%d, resolver size " + "is too small (%d)", + op, tOpCount); + } + return kTfLiteError; } TfLiteRegistration* new_registration = ®istrations_[registrations_len_]; registrations_len_ += 1; @@ -72,20 +82,32 @@ class MicroOpResolver : public OpResolver { *new_registration = *registration; new_registration->builtin_code = op; new_registration->version = version; + + return kTfLiteOk; } - void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration, - int min_version, int max_version) { + TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, + TfLiteRegistration* registration, int min_version, + int max_version) { for (int version = min_version; version <= max_version; ++version) { - AddBuiltin(op, registration, version); + TfLiteStatus add_status = AddBuiltin(op, registration, version); + if (add_status != kTfLiteOk) { + return add_status; + } } + return kTfLiteOk; } - void AddCustom(const char* name, TfLiteRegistration* registration, - int version = 1) { + TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration, + int version = 1) { if (registrations_len_ >= tOpCount) { - // TODO(b/147748244) - Add error reporting hooks so we can report this! - return; + if (error_reporter_) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "Couldn't register custom op '%s', resolver size is too small (%d)", + name, tOpCount); + } + return kTfLiteError; } TfLiteRegistration* new_registration = ®istrations_[registrations_len_]; registrations_len_ += 1; @@ -94,13 +116,19 @@ class MicroOpResolver : public OpResolver { new_registration->builtin_code = BuiltinOperator_CUSTOM; new_registration->custom_name = name; new_registration->version = version; + + return kTfLiteOk; } - void AddCustom(const char* name, TfLiteRegistration* registration, - int min_version, int max_version) { + TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration, + int min_version, int max_version) { for (int version = min_version; version <= max_version; ++version) { - AddCustom(name, registration, version); + TfLiteStatus add_status = AddCustom(name, registration, version); + if (add_status != kTfLiteOk) { + return add_status; + } } + return kTfLiteOk; } unsigned int GetRegistrationLength() { return registrations_len_; } @@ -108,6 +136,7 @@ class MicroOpResolver : public OpResolver { private: TfLiteRegistration registrations_[tOpCount]; unsigned int registrations_len_ = 0; + ErrorReporter* error_reporter_; TF_LITE_REMOVE_VIRTUAL_DELETE }; diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc b/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc index 0619591523a..61ab0e3bec9 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc +++ b/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/micro/micro_mutable_op_resolver.h" - #include "tensorflow/lite/micro/testing/micro_test.h" namespace tflite { @@ -35,6 +34,22 @@ TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } + +class MockErrorReporter : public ErrorReporter { + public: + MockErrorReporter() : has_been_called_(false) {} + int Report(const char* format, va_list args) override { + has_been_called_ = true; + return 0; + }; + + bool HasBeenCalled() { return has_been_called_; } + + private: + bool has_been_called_; + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + } // namespace } // namespace tflite @@ -52,8 +67,10 @@ TF_LITE_MICRO_TEST(TestOperations) { // We need space for 7 operators because of 2 ops, one with 3 versions, one // with 4 versions. MicroOpResolver<7> micro_op_resolver; - micro_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r, 1, 3); - micro_op_resolver.AddCustom("mock_custom", &r, 1, 4); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, micro_op_resolver.AddBuiltin( + BuiltinOperator_CONV_2D, &r, 1, 3)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + micro_op_resolver.AddCustom("mock_custom", &r, 1, 4)); OpResolver* resolver = µ_op_resolver; const TfLiteRegistration* registration = @@ -96,8 +113,10 @@ TF_LITE_MICRO_TEST(TestOpRegistrationOverflow) { MicroOpResolver<4> micro_op_resolver; // Register 7 ops, but only 4 is expected because the class is created with // that limit.. - micro_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r, 0, 2); - micro_op_resolver.AddCustom("mock_custom", &r, 0, 3); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, micro_op_resolver.AddBuiltin( + BuiltinOperator_CONV_2D, &r, 0, 2)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, + micro_op_resolver.AddCustom("mock_custom", &r, 0, 3)); OpResolver* resolver = µ_op_resolver; TF_LITE_MICRO_EXPECT_EQ(4, micro_op_resolver.GetRegistrationLength()); @@ -137,4 +156,117 @@ TF_LITE_MICRO_TEST(TestZeroVersionRegistration) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr)); } +TF_LITE_MICRO_TEST(TestZeroModelVersion) { + using tflite::MicroOpResolver; + using tflite::OpResolver; + + static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, + tflite::MockPrepare, tflite::MockInvoke}; + + MicroOpResolver<2> micro_op_resolver; + micro_op_resolver.AddCustom("mock_custom", &r, 1, 2); + TF_LITE_MICRO_EXPECT_EQ(2, micro_op_resolver.GetRegistrationLength()); + OpResolver* resolver = µ_op_resolver; + + // If the Op version in the model is 0, we should always get the first + // registration. + const TfLiteRegistration* registration = resolver->FindOp("mock_custom", 0); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + TF_LITE_MICRO_EXPECT_EQ(1, registration->version); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr)); + + // If a non-zero version is requested, the correct version'd op should be + // returned. TODO(b/151245712): Realistically, we are better off removing + // these version checks altogether. + for (int i = 1; i <= 2; ++i) { + registration = resolver->FindOp("mock_custom", i); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + TF_LITE_MICRO_EXPECT_EQ(i, registration->version); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr)); + } + + registration = resolver->FindOp("mock_custom", 42); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration); +} + +TF_LITE_MICRO_TEST(TestBuiltinRegistrationErrorReporting) { + using tflite::BuiltinOperator_CONV_2D; + using tflite::BuiltinOperator_RELU; + using tflite::MicroOpResolver; + + static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, + tflite::MockPrepare, tflite::MockInvoke}; + + tflite::MockErrorReporter mock_reporter; + MicroOpResolver<1> micro_op_resolver(&mock_reporter); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, micro_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r)); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, micro_op_resolver.AddBuiltin(BuiltinOperator_RELU, &r)); + TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled()); +} + +TF_LITE_MICRO_TEST(TestCustomRegistrationErrorReporting) { + using tflite::BuiltinOperator_CONV_2D; + using tflite::BuiltinOperator_RELU; + using tflite::MicroOpResolver; + + static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, + tflite::MockPrepare, tflite::MockInvoke}; + + tflite::MockErrorReporter mock_reporter; + MicroOpResolver<1> micro_op_resolver(&mock_reporter); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + micro_op_resolver.AddCustom("mock_custom_0", &r)); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, + micro_op_resolver.AddCustom("mock_custom_1", &r)); + TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled()); +} + +TF_LITE_MICRO_TEST(TestBuiltinVersionRegistrationErrorReporting) { + using tflite::BuiltinOperator_CONV_2D; + using tflite::BuiltinOperator_RELU; + using tflite::MicroOpResolver; + + static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, + tflite::MockPrepare, tflite::MockInvoke}; + + tflite::MockErrorReporter mock_reporter; + MicroOpResolver<2> micro_op_resolver(&mock_reporter); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, micro_op_resolver.AddBuiltin( + BuiltinOperator_CONV_2D, &r, 1, 2)); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, micro_op_resolver.AddBuiltin( + BuiltinOperator_RELU, &r, 1, 2)); + TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled()); +} + +TF_LITE_MICRO_TEST(TestCustomVersionRegistrationErrorReporting) { + using tflite::BuiltinOperator_CONV_2D; + using tflite::BuiltinOperator_RELU; + using tflite::MicroOpResolver; + + static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, + tflite::MockPrepare, tflite::MockInvoke}; + + tflite::MockErrorReporter mock_reporter; + MicroOpResolver<2> micro_op_resolver(&mock_reporter); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, micro_op_resolver.AddCustom("mock_custom_0", &r, 1, 2)); + TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled()); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, micro_op_resolver.AddCustom("mock_custom_1", &r, 1, 2)); + TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled()); +} + TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_optional_debug_tools.cc b/tensorflow/lite/micro/micro_optional_debug_tools.cc index 70f16c78d79..42c42aea9f8 100644 --- a/tensorflow/lite/micro/micro_optional_debug_tools.cc +++ b/tensorflow/lite/micro/micro_optional_debug_tools.cc @@ -95,6 +95,8 @@ const char* AllocTypeName(TfLiteAllocationType type) { return "kTfLiteArenaRw"; case kTfLiteArenaRwPersistent: return "kTfLiteArenaRwPersistent"; + case kTfLitePersistentRo: + return "kTfLitePersistentRo"; } return "(invalid)"; } diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index 8599a27df52..1331163a410 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -86,8 +86,8 @@ else ifeq ($(BUILD_TYPE), release) CXXFLAGS += -DNDEBUG -O3 -DTF_LITE_STRIP_ERROR_STRINGS CCFLAGS += -DNDEBUG -O3 -DTF_LITE_STRIP_ERROR_STRINGS else - CXXFLAGS += -O3 - CCFLAGS += -O3 + CXXFLAGS += -DNDEBUG -O3 + CCFLAGS += -DNDEBUG -O3 endif # This library is the main target for this makefile. It will contain a minimal diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh index 2248031f6d1..a403019d192 100755 --- a/tensorflow/lite/micro/tools/make/download_and_extract.sh +++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh @@ -86,7 +86,7 @@ patch_kissfft() { # CIFAR10 test dataset. patch_cifar10_dataset() { xxd -l 30730 -i ${1}/test_batch.bin ${1}/../../../../examples/image_recognition_experimental/first_10_cifar_images.h - sed -i "s/unsigned char/const unsigned char/g" ${1}/../../../../examples/image_recognition_experimental/first_10_cifar_images.h + sed -i -E "s/unsigned char/const unsigned char/g" ${1}/../../../../examples/image_recognition_experimental/first_10_cifar_images.h } build_embarc_mli() { diff --git a/tensorflow/lite/optional_debug_tools.cc b/tensorflow/lite/optional_debug_tools.cc index c5ccdb98390..2e25b0a17f7 100644 --- a/tensorflow/lite/optional_debug_tools.cc +++ b/tensorflow/lite/optional_debug_tools.cc @@ -77,6 +77,8 @@ const char* AllocTypeName(TfLiteAllocationType type) { return "kTfLiteArenaRw"; case kTfLiteArenaRwPersistent: return "kTfLiteArenaRwPersistent"; + case kTfLitePersistentRo: + return "kTfLitePersistentRo"; } return "(invalid)"; } diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index 7248792523e..c1f37c81b7f 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -157,6 +157,7 @@ py_test( name = "lite_v2_test", srcs = ["lite_v2_test.py"], python_version = "PY3", + shard_count = 4, srcs_version = "PY2AND3", tags = [ "no_windows", diff --git a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc index 844a9827cb6..92e7c22a702 100644 --- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -609,6 +609,7 @@ PyObject* InterpreterWrapper::GetTensor(int i) const { size_t size_of_type; if (GetSizeOfType(nullptr, tensor->type, &size_of_type) != kTfLiteOk) { PyErr_SetString(PyExc_ValueError, "Unknown tensor type."); + free(data); return nullptr; } sparse_buffer_dims[0] = tensor->bytes / size_of_type; diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index b2d58ec8746..99be58f4376 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -20,6 +20,8 @@ from __future__ import division from __future__ import print_function import enum +import shutil +import tempfile import warnings from absl import logging @@ -384,8 +386,13 @@ class TFLiteConverterBase(object): return True return False - def _parse_saved_model_args(self): - """Parses SavedModel arguments from the given Keras/RNN SavedModel.""" + def _parse_saved_model_args(self, always_enable_saved_model_import=False): + """Parses SavedModel arguments from the given Keras/RNN SavedModel. + + Args: + always_enable_saved_model_import: Bool. When the value is true, it enables + MLIR saved model import path regardless of checking the conditions. + """ if not self.experimental_new_converter: self.saved_model_dir = None return @@ -398,21 +405,23 @@ class TFLiteConverterBase(object): # frozen graph def path. self.saved_model_dir = None return - if not self._contains_function_with_implements_attr(saved_model_proto): + if (not always_enable_saved_model_import and + not self._contains_function_with_implements_attr(saved_model_proto)): self.saved_model_dir = None - else: + return + + if not self._saved_model_exported_names: self._saved_model_exported_names = [] - self._saved_model_version = saved_model_proto.saved_model_schema_version - if self._saved_model_version not in [1, 2]: - raise ValueError( - "SavedModel file format({0}) is not supported".format( - self._saved_model_version)) + self._saved_model_version = saved_model_proto.saved_model_schema_version + if self._saved_model_version not in [1, 2]: + raise ValueError("SavedModel file format({0}) is not supported".format( + self._saved_model_version)) class TFLiteConverterBaseV2(TFLiteConverterBase): """Converter subclass to share functionality between V2 converters.""" - def _convert(self, graph_def, input_tensors, output_tensors): + def convert(self, graph_def, input_tensors, output_tensors): """Converts a TensorFlow GraphDef based on instance variables. Args: @@ -539,7 +548,7 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2): self._saved_model_tags = saved_model_tags self._saved_model_exported_names = saved_model_exported_names self._trackable_obj = trackable_obj - self._parse_saved_model_args() + self._parse_saved_model_args(always_enable_saved_model_import=True) def convert(self): """Converts a TensorFlow GraphDef based on instance variables. @@ -569,7 +578,115 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2): graph.get_tensor_by_name(signature_def.outputs[key].name) for key in signature_def.outputs ] - return self._convert(meta_graph.graph_def, input_tensors, output_tensors) + return super(TFLiteSavedModelConverterV2, + self).convert(meta_graph.graph_def, input_tensors, + output_tensors) + + +class TFLiteKerasModelConverterV2(TFLiteConverterBaseV2): + """Converts the given Keras model into TensorFlow Lite model.""" + + def __init__(self, keras_model, trackable_obj=None): + """Constructor for TFLiteConverter. + + Args: + keras_model: tf.Keras.Model. + trackable_obj: tf.AutoTrackable object associated with `funcs`. A + reference to this object needs to be maintained so that Variables do not + get garbage collected since functions have a weak reference to + Variables. This is only required when the tf.AutoTrackable object is not + maintained by the user (e.g. `from_saved_model`). + """ + super(TFLiteKerasModelConverterV2, self).__init__() + self._keras_model = keras_model + self._trackable_obj = trackable_obj + + def convert(self): + """Converts a keras model based on instance variables. + + Returns: + The converted data in serialized format. + + Raises: + ValueError: + Multiple concrete functions are specified. + Input shape is not specified. + Invalid quantization parameters. + """ + temp_dir = tempfile.mkdtemp() + try: + self._keras_model.save(temp_dir, save_format="tf") + self.saved_model_dir = temp_dir + self._saved_model_tags = set([_tag_constants.SERVING]) + self._saved_model_exported_names = [ + _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ] + self._parse_saved_model_args() + if self.saved_model_dir: + graph = _ops.Graph() + saved_model = _loader_impl.SavedModelLoader(self.saved_model_dir) + saved_model.load_graph(graph, tags=self._saved_model_tags) + meta_graph = saved_model.get_meta_graph_def_from_tags( + self._saved_model_tags) + signature_def = meta_graph.signature_def[ + _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + input_tensors = [ + graph.get_tensor_by_name(signature_def.inputs[key].name) + for key in signature_def.inputs + ] + output_tensors = [ + graph.get_tensor_by_name(signature_def.outputs[key].name) + for key in signature_def.outputs + ] + self._trackable_obj = _load(self.saved_model_dir, + self._saved_model_tags) + return super(TFLiteKerasModelConverterV2, + self).convert(meta_graph.graph_def, input_tensors, + output_tensors) + finally: + shutil.rmtree(temp_dir, True) + + input_signature = None + # If the model's call is not a `tf.function`, then we need to first get its + # input signature from `model_input_signature` method. We can't directly + # call `trace_model_call` because otherwise the batch dimension is set + # to None. + # Once we have better support for dynamic shapes, we can remove this. + if not isinstance(self._keras_model.call, _def_function.Function): + # Pass `keep_original_batch_size=True` will ensure that we get an input + # signature including the batch dimension specified by the user. + input_signature = _saving_utils.model_input_signature( + self._keras_model, keep_original_batch_size=True) + + func = _saving_utils.trace_model_call(self._keras_model, input_signature) + concrete_func = func.get_concrete_function() + self._funcs = [concrete_func] + + frozen_func, graph_def = ( + _convert_to_constants.convert_variables_to_constants_v2_as_graph( + self._funcs[0], lower_control_flow=False)) + + input_tensors = [ + tensor for tensor in frozen_func.inputs + if tensor.dtype != _dtypes.resource + ] + output_tensors = frozen_func.outputs + + # Run a Grappler pass. + grappler_config = self._grappler_config() + # Skip running grappler when there are no optimizers to run. If not, + # grappler will run with the default optimizer set and it will lead to + # causing an unexpected behavior. + if grappler_config.graph_options.rewrite_options.optimizers: + graph_def = _run_graph_optimizations( + graph_def, + input_tensors, + output_tensors, + config=grappler_config, + graph=frozen_func.graph) + + return super(TFLiteKerasModelConverterV2, + self).convert(graph_def, input_tensors, output_tensors) class TFLiteFrozenGraphConverterV2(TFLiteConverterBaseV2): @@ -637,7 +754,8 @@ class TFLiteFrozenGraphConverterV2(TFLiteConverterBaseV2): config=grappler_config, graph=frozen_func.graph) - return self._convert(graph_def, input_tensors, output_tensors) + return super(TFLiteFrozenGraphConverterV2, + self).convert(graph_def, input_tensors, output_tensors) @_tf_export("lite.TFLiteConverter", v1=[]) @@ -761,6 +879,9 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2): if not signature_keys: signature_keys = saved_model.signatures + if len(signature_keys) != 1: + raise ValueError("Only support a single signature key.") + funcs = [] for key in signature_keys: if key not in saved_model.signatures: @@ -786,21 +907,7 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2): Returns: TFLiteConverter object. """ - input_signature = None - # If the model's call is not a `tf.function`, then we need to first get its - # input signature from `model_input_signature` method. We can't directly - # call `trace_model_call` because otherwise the batch dimension is set - # to None. - # Once we have better support for dynamic shapes, we can remove this. - if not isinstance(model.call, _def_function.Function): - # Pass `keep_original_batch_size=True` will ensure that we get an input - # signature including the batch dimension specified by the user. - input_signature = _saving_utils.model_input_signature( - model, keep_original_batch_size=True) - - func = _saving_utils.trace_model_call(model, input_signature) - concrete_func = func.get_concrete_function() - return cls([concrete_func]) + return TFLiteKerasModelConverterV2(model) # pylint: disable=useless-super-delegation def convert(self): @@ -960,7 +1067,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase): raise ValueError("std_dev and mean must be defined when inference_type " "or inference_input_type is QUANTIZED_UINT8 or INT8.") - def _convert(self): + def convert(self): """Converts a TensorFlow GraphDef based on instance variables. Returns: @@ -1243,8 +1350,86 @@ class TFLiteSavedModelConverter(TFLiteConverterBaseV1): self._output_tensors = result[2] self._parse_saved_model_args() + +class TFLiteKerasModelConverter(TFLiteConverterBaseV1): + """Converts the given SavedModel into TensorFlow Lite model.""" + + def __init__(self, + model_file, + input_arrays=None, + input_shapes=None, + output_arrays=None, + custom_objects=None): + """Constructor for TFLiteConverter. + + Args: + model_file: Full filepath of HDF5 file containing the tf.keras model. + input_arrays: List of input tensors to freeze graph with. Uses input + arrays from SignatureDef when none are provided. (default None) + input_shapes: Dict of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : + None}). (default None) + output_arrays: List of output tensors to freeze graph with. Uses output + arrays from SignatureDef when none are provided. (default None) + custom_objects: Dict mapping names (strings) to custom classes or + functions to be considered during model deserialization. (default None) + + Raises: + ValueError: Invalid arguments. + """ + super(TFLiteKerasModelConverter, + self).__init__(experimental_debug_info_func=None) + # Handles Keras when Eager mode is enabled. + if context.executing_eagerly(): + if input_arrays or output_arrays: + raise ValueError("`input_arrays` and `output_arrays` are unsupported " + "with Eager mode. If your model requires any of these " + "parameters, please use disable_eager_execution().") + + _keras.backend.set_learning_phase(False) + keras_model = _keras.models.load_model(model_file, custom_objects) + + function = _saving_utils.trace_model_call(keras_model) + concrete_func = function.get_concrete_function() + + frozen_func = _convert_to_constants.convert_variables_to_constants_v2( + concrete_func, lower_control_flow=False) + _set_tensor_shapes(frozen_func.inputs, input_shapes) + self._keras_model = keras_model + self._graph_def = frozen_func.graph.as_graph_def() + self._input_tensors = frozen_func.inputs + self._output_tensors = frozen_func.outputs + self._debug_info_func = _build_debug_info_func(frozen_func.graph) + return + + # Handles Keras when Eager mode is disabled. + _keras.backend.clear_session() + _keras.backend.set_learning_phase(False) + keras_model = _keras.models.load_model(model_file, custom_objects) + sess = _keras.backend.get_session() + + # Get input and output tensors. + if input_arrays: + input_tensors = _get_tensors_from_tensor_names(sess.graph, input_arrays) + else: + input_tensors = keras_model.inputs + + if output_arrays: + output_tensors = _get_tensors_from_tensor_names(sess.graph, output_arrays) + else: + output_tensors = keras_model.outputs + _set_tensor_shapes(input_tensors, input_shapes) + + graph_def = _freeze_graph(sess, input_tensors, output_tensors) + self._keras_model = keras_model + self._graph_def = graph_def + self._input_tensors = input_tensors + self._output_tensors = output_tensors + self._debug_info_func = _build_debug_info_func(sess.graph) + def convert(self): - """Converts a TensorFlow GraphDef based on instance variables. + """Converts a Keras model based on instance variables. Returns: The converted data in serialized format. Either a TFLite Flatbuffer or a @@ -1255,7 +1440,28 @@ class TFLiteSavedModelConverter(TFLiteConverterBaseV1): Input shape is not specified. None value for dimension in input_tensor. """ - return self._convert() + temp_dir = tempfile.mkdtemp() + try: + self._keras_model.save(temp_dir, save_format="tf") + tag_set = set([_tag_constants.SERVING]) + signature_key = _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + result = _freeze_saved_model(temp_dir, None, None, None, tag_set, + signature_key) + + self.saved_model_dir = temp_dir + self._saved_model_tags = tag_set + self._saved_model_exported_names = [signature_key] + self._parse_saved_model_args() + if self.saved_model_dir: + self._graph_def = result[0] + self._input_tensors = result[1] + self._output_tensors = result[2] + self._debug_info_func = _build_debug_info_func(result[3]) + return super(TFLiteKerasModelConverter, self).convert() + finally: + shutil.rmtree(temp_dir, True) + + return super(TFLiteKerasModelConverter, self).convert() class TFLiteFrozenGraphConverter(TFLiteConverterBaseV1): @@ -1304,20 +1510,6 @@ class TFLiteFrozenGraphConverter(TFLiteConverterBaseV1): self._input_arrays_with_shape = input_arrays_with_shape self._output_arrays = output_arrays - def convert(self): - """Converts a TensorFlow GraphDef based on instance variables. - - Returns: - The converted data in serialized format. Either a TFLite Flatbuffer or a - Graphviz graph depending on value in `output_format`. - - Raises: - ValueError: - Input shape is not specified. - None value for dimension in input_tensor. - """ - return self._convert() - @_tf_export(v1=["lite.TFLiteConverter"]) class TFLiteConverter(TFLiteFrozenGraphConverter): @@ -1645,53 +1837,8 @@ class TFLiteConverter(TFLiteFrozenGraphConverter): Returns: TFLiteConverter class. """ - # Handles Keras when Eager mode is enabled. - if context.executing_eagerly(): - if input_arrays or output_arrays: - raise ValueError("`input_arrays` and `output_arrays` are unsupported " - "with Eager mode. If your model requires any of these " - "parameters, please use disable_eager_execution().") - - _keras.backend.set_learning_phase(False) - keras_model = _keras.models.load_model(model_file, custom_objects) - - function = _saving_utils.trace_model_call(keras_model) - concrete_func = function.get_concrete_function() - - frozen_func = _convert_to_constants.convert_variables_to_constants_v2( - concrete_func, lower_control_flow=False) - _set_tensor_shapes(frozen_func.inputs, input_shapes) - return cls( - frozen_func.graph.as_graph_def(), - frozen_func.inputs, - frozen_func.outputs, - experimental_debug_info_func=_build_debug_info_func( - frozen_func.graph)) - - # Handles Keras when Eager mode is disabled. - _keras.backend.clear_session() - _keras.backend.set_learning_phase(False) - keras_model = _keras.models.load_model(model_file, custom_objects) - sess = _keras.backend.get_session() - - # Get input and output tensors. - if input_arrays: - input_tensors = _get_tensors_from_tensor_names(sess.graph, input_arrays) - else: - input_tensors = keras_model.inputs - - if output_arrays: - output_tensors = _get_tensors_from_tensor_names(sess.graph, output_arrays) - else: - output_tensors = keras_model.outputs - _set_tensor_shapes(input_tensors, input_shapes) - - graph_def = _freeze_graph(sess, input_tensors, output_tensors) - return cls( - graph_def, - input_tensors, - output_tensors, - experimental_debug_info_func=_build_debug_info_func(sess.graph)) + return TFLiteKerasModelConverter(model_file, input_arrays, input_shapes, + output_arrays, custom_objects) # pylint: disable=useless-super-delegation def convert(self): diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py index 530c514eb96..1bcb2ce0ee4 100644 --- a/tensorflow/lite/python/lite_test.py +++ b/tensorflow/lite/python/lite_test.py @@ -269,9 +269,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): [out_tensor]) converter.inference_input_type = lite_constants.QUANTIZED_UINT8 converter.inference_type = lite_constants.FLOAT - converter.quantized_input_stats = { - 'Placeholder': (0., 1.) - } # mean, std_dev + converter.quantized_input_stats = {'Placeholder': (0., 1.)} # mean, std_dev tflite_model = converter.convert() self.assertTrue(tflite_model) @@ -1327,6 +1325,41 @@ class FromSessionTest(TestModels, parameterized.TestCase): tflite_model = converter.convert() self.assertTrue(tflite_model) + def testResizeWithShape(self): + with ops.Graph().as_default(): + # Construct a graph with a dynamically shapped input and an internal node + # that relies on the output of that input's shape. + in_tensor = array_ops.placeholder( + shape=[None, None], dtype=dtypes.float32) + in_tensor2 = [[1, 2], [3, 4]] + out_tensor = array_ops.reshape(in_tensor2, array_ops.shape(in_tensor)) + sess = session.Session() + + converter = lite.TFLiteConverter.from_session(sess, [in_tensor], + [out_tensor]) + converter.experimental_new_converter = True + tflite_model = converter.convert() + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertTrue(([1, 1] == input_details[0]['shape']).all()) + self.assertTrue(([-1, -1] == input_details[0]['shape_signature']).all()) + + # Resize tensor and invoke. + interpreter.resize_tensor_input(0, [4]) + interpreter.allocate_tensors() + interpreter.invoke() + + # The output should be reshaped properly according to the resized input. + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual(np.int32, output_details[0]['dtype']) + self.assertTrue(([4] == output_details[0]['shape']).all()) + output_data = interpreter.get_tensor(output_details[0]['index']) + self.assertTrue(([1, 2, 3, 4] == output_data).all()) + def testResizingIntermediateDynamicTensor(self): # This is a regression test for the case where shape of dynamic output # tensors changes between invocations. @@ -1895,7 +1928,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): input_details = interpreter.get_input_details() self.assertLen(input_details, 1) - self.assertEqual('dense_input', input_details[0]['name']) + self.assertEndsWith(input_details[0]['name'], 'dense_input') self.assertEqual(np.float32, input_details[0]['dtype']) self.assertTrue(([1, 3] == input_details[0]['shape']).all()) self.assertEqual((0., 0.), input_details[0]['quantization']) @@ -1990,7 +2023,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): input_details = interpreter.get_input_details() self.assertLen(input_details, 1) - self.assertEqual('dense_input', input_details[0]['name']) + self.assertEndsWith(input_details[0]['name'], 'dense_input') self.assertTrue(([2, 3] == input_details[0]['shape']).all()) def testSequentialModelOutputArray(self): @@ -2109,12 +2142,12 @@ class FromKerasFile(TestModels, parameterized.TestCase): input_details = interpreter.get_input_details() self.assertLen(input_details, 2) - self.assertEqual('input_a', input_details[0]['name']) + self.assertEndsWith(input_details[0]['name'], 'input_a') self.assertEqual(np.float32, input_details[0]['dtype']) self.assertTrue(([1, 3] == input_details[0]['shape']).all()) self.assertEqual((0., 0.), input_details[0]['quantization']) - self.assertEqual('input_b', input_details[1]['name']) + self.assertEndsWith(input_details[1]['name'], 'input_b') self.assertEqual(np.float32, input_details[1]['dtype']) self.assertTrue(([1, 3] == input_details[1]['shape']).all()) self.assertEqual((0., 0.), input_details[1]['quantization']) @@ -2165,7 +2198,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): input_details = interpreter.get_input_details() self.assertLen(input_details, 1) - self.assertEqual('dense_input', input_details[0]['name']) + self.assertEndsWith(input_details[0]['name'], 'dense_input') self.assertEqual(np.float32, input_details[0]['dtype']) self.assertTrue(([1, 3] == input_details[0]['shape']).all()) self.assertEqual((0., 0.), input_details[0]['quantization']) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 59f326d4b9f..4768892f359 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -213,9 +213,11 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): self.units = units def build(self, input_shape): - self.w = self.add_weight(shape=(input_shape[-1], self.units), - initializer='random_normal', - trainable=True) + self.w = self.add_weight( + 'weight', + shape=(input_shape[-1], self.units), + initializer='random_normal', + trainable=True) self.min_var = self.add_weight( 'min', initializer=tf.keras.initializers.Constant(-6.0), @@ -469,15 +471,10 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): save_dir = os.path.join(self.get_temp_dir(), 'saved_model') save(root, save_dir, {'add': add_func, 'sub': sub_func}) - # Ensure the converter generates. - converter = lite.TFLiteConverterV2.from_saved_model(save_dir) - self.assertLen(converter._funcs, 2) - # Try converting multiple functions. with self.assertRaises(ValueError) as error: - _ = converter.convert() - self.assertIn('This converter can only convert a single ConcreteFunction', - str(error.exception)) + _ = lite.TFLiteConverterV2.from_saved_model(save_dir) + self.assertIn('Only support a single signature key.', str(error.exception)) @test_util.run_v2_only def testNoConcreteFunctionModel(self): @@ -487,12 +484,9 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): save_dir = os.path.join(self.get_temp_dir(), 'saved_model') save(root, save_dir) - converter = lite.TFLiteConverterV2.from_saved_model(save_dir) - self.assertLen(converter._funcs, 0) - with self.assertRaises(ValueError) as error: - _ = converter.convert() - self.assertIn('No ConcreteFunction is specified.', str(error.exception)) + _ = lite.TFLiteConverterV2.from_saved_model(save_dir) + self.assertIn('Only support a single signature key.', str(error.exception)) @test_util.run_v2_only def testKerasSequentialModel(self): @@ -756,7 +750,10 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): input_data = tf.constant( np.array(np.random.random_sample((1, 10, 10)), dtype=np.float32)) rnn_obj = rnn_layer(units=10, input_shape=(10, 10)) - model = tf.keras.models.Sequential([rnn_obj]) + model = tf.keras.models.Sequential([ + tf.keras.layers.Input(batch_size=1, shape=(10, 10), name='input'), + rnn_obj, + ]) # Convert model. converter = lite.TFLiteConverterV2.from_keras_model(model) @@ -795,6 +792,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): input_data = tf.constant( np.array(np.random.random_sample((1, 10, 10)), dtype=np.float32)) model = tf.keras.models.Sequential() + model.add(tf.keras.layers.Input(batch_size=1, shape=(10, 10), name='input')) model.add( tf.keras.layers.Bidirectional( recurrent_v2.LSTM(units=10, return_sequences=True), diff --git a/tensorflow/lite/testdata/dynamic_shapes.bin b/tensorflow/lite/testdata/dynamic_shapes.bin new file mode 100644 index 00000000000..268d457131a Binary files /dev/null and b/tensorflow/lite/testdata/dynamic_shapes.bin differ diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index 9d50f1ad604..df85f659bf3 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -329,7 +329,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], }), ) @@ -368,7 +368,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], }), ) @@ -408,7 +408,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], }), ) @@ -443,7 +443,7 @@ cc_library( "//tensorflow/core:android_tensorflow_lib", ], "//tensorflow:ios": [ - "//tensorflow/core:ios_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", ], }), ) diff --git a/tensorflow/lite/testing/op_tests/prelu.py b/tensorflow/lite/testing/op_tests/prelu.py index f927c7a8b00..bc5875739ed 100644 --- a/tensorflow/lite/testing/op_tests/prelu.py +++ b/tensorflow/lite/testing/op_tests/prelu.py @@ -35,12 +35,33 @@ def make_prelu_tests(options): # channel. "input_shape": [[1, 10, 10, 3], [3, 3, 3, 3]], "shared_axes": [[1, 2], [1]], + "fully_quantize": [False], + "input_range": [(-10, 10)], }, { # 2D-3D example. Share the 2nd axis. "input_shape": [[20, 20], [20, 20, 20]], "shared_axes": [[1]], - } + "fully_quantize": [False], + "input_range": [(-10, 10)], + }, + # Quantized cases. + { + # The canonical case for image processing is having a 4D `input` + # (NHWC)and `shared_axes`=[1, 2], so the alpha parameter is per + # channel. + "input_shape": [[1, 10, 10, 3], [3, 3, 3, 3]], + "shared_axes": [[1, 2], [1]], + "fully_quantize": [True], + "input_range": [(-10, 10)], + }, + { + # 2D-3D example. Share the 2nd axis. + "input_shape": [[20, 20], [20, 20, 20]], + "shared_axes": [[1]], + "fully_quantize": [True], + "input_range": [(-10, 10)], + }, ] def build_graph(parameters): @@ -64,7 +85,8 @@ def make_prelu_tests(options): for dim in range(1, len(input_shape)): alpha_shape.append(1 if dim in shared_axes else input_shape[dim]) - alpha_values = create_tensor_data(np.float32, alpha_shape) + alpha_values = create_tensor_data( + np.float32, alpha_shape, min_value=-5, max_value=5) # There should be only 1 trainable variable tensor. variables = tf.compat.v1.all_variables() diff --git a/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc index 9816cc1df6a..171d522daa7 100644 --- a/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc @@ -271,8 +271,8 @@ bool MinMaxApproximatelyEqual(const MinMax& minmax1, const MinMax& minmax2) { const double magnitude = std::min(minmax1.max - minmax1.min, minmax2.max - minmax2.min); const double tolerated = 1e-6 * magnitude; - return std::abs(minmax1.min - minmax2.min) < tolerated && - std::abs(minmax1.max - minmax2.max) < tolerated; + return std::abs(minmax1.min - minmax2.min) <= tolerated && + std::abs(minmax1.max - minmax2.max) <= tolerated; } // Propagates MinMax from any of the listed arrays, to all others. diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index 3718f83acaa..cf127a9f459 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -173,6 +173,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kSlice, 3}, "1.14.0"}, {{OperatorType::kTanh, 1}, "1.14.0"}, {{OperatorType::kTanh, 2}, "1.14.0"}, + {{OperatorType::kTanh, 3}, kPendingReleaseOpVersion}, {{OperatorType::kOneHot, 1}, "1.11.0"}, {{OperatorType::kCTCBeamSearchDecoder, 1}, "1.11.0"}, {{OperatorType::kUnpack, 1}, "1.11.0"}, @@ -180,8 +181,10 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kUnpack, 3}, "2.2.0"}, {{OperatorType::kUnpack, 4}, kPendingReleaseOpVersion}, {{OperatorType::kLeakyRelu, 1}, "1.13.1"}, + {{OperatorType::kLeakyRelu, 2}, kPendingReleaseOpVersion}, {{OperatorType::kLogistic, 1}, "1.14.0"}, {{OperatorType::kLogistic, 2}, "1.14.0"}, + {{OperatorType::kLogistic, 3}, kPendingReleaseOpVersion}, {{OperatorType::kLogSoftmax, 1}, "1.14.0"}, {{OperatorType::kLogSoftmax, 2}, "1.14.0"}, {{OperatorType::kSquaredDifference, 1}, "1.13.1"}, diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 57b791a1a94..917fd24c952 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -1118,6 +1118,7 @@ class ResizeBilinear GetVersioningOpSig(builtin_op(), op_signature); op_sig.options.resize.half_pixel_centers = resize_bilinear_op.half_pixel_centers; + op_sig.options.resize.align_corners = resize_bilinear_op.align_corners; return ::tflite::GetBuiltinOperatorVersion(op_sig); } }; @@ -1147,6 +1148,7 @@ class ResizeNearestNeighbor ::tflite::OpSignature op_sig = GetVersioningOpSig(builtin_op(), op_signature); op_sig.options.resize.half_pixel_centers = resize_nn_op.half_pixel_centers; + op_sig.options.resize.align_corners = resize_nn_op.align_corners; return ::tflite::GetBuiltinOperatorVersion(op_sig); } }; diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD index 357072226af..f6cb71749f8 100644 --- a/tensorflow/lite/tools/benchmark/BUILD +++ b/tensorflow/lite/tools/benchmark/BUILD @@ -142,6 +142,7 @@ cc_library( ":profiling_listener", "//tensorflow/lite:framework", "//tensorflow/lite:string_util", + "//tensorflow/lite/c:common", "//tensorflow/lite/kernels:builtin_ops", "//tensorflow/lite/profiling:platform_profiler", "//tensorflow/lite/profiling:profile_summary_formatter", diff --git a/tensorflow/lite/tools/benchmark/README.md b/tensorflow/lite/tools/benchmark/README.md index a4f632c40a9..c44129cbbd3 100644 --- a/tensorflow/lite/tools/benchmark/README.md +++ b/tensorflow/lite/tools/benchmark/README.md @@ -87,6 +87,7 @@ the reported data on hexagon is in cycles, not in ms like on cpu. #### CoreML delegate * `use_coreml`: `bool` (default=false) +* `coreml_version`: `int` (default=0) #### External delegate * `external_delegate_path`: `string` (default="") diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc index 26fed5e279f..cafef6fa133 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc @@ -334,7 +334,7 @@ void BenchmarkPerformanceOptions::Run() { // profiling listener etc. in each Run() invoke because such listeners may be // reset and become invalid in the next Run(). As a result, we record the // number of externally-added listeners here to prevent they're cleared later. - const int num_external_listners = single_option_run_->NumListeners(); + const int num_external_listeners = single_option_run_->NumListeners(); // Now perform all runs, each with different performance-affecting parameters. for (const auto& run_params : all_run_params_) { @@ -349,7 +349,7 @@ void BenchmarkPerformanceOptions::Run() { // Clear internally created listeners before each run but keep externally // created ones. - single_option_run_->RemoveListeners(num_external_listners); + single_option_run_->RemoveListeners(num_external_listeners); all_run_stats_->MarkBenchmarkStart(*single_option_run_params_); single_option_run_->Run(); diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc index 489780e4f69..969713cce73 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc @@ -29,6 +29,7 @@ limitations under the License. #include "absl/base/attributes.h" #include "absl/strings/numbers.h" #include "ruy/profiler/profiler.h" // from @ruy +#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/model.h" #include "tensorflow/lite/op_resolver.h" @@ -596,17 +597,20 @@ TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() { return kTfLiteOk; } -TfLiteStatus BenchmarkTfLiteModel::Init() { - TF_LITE_ENSURE_STATUS(LoadModel()); - +TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() { auto resolver = GetOpResolver(); - const int32_t num_threads = params_.Get("num_threads"); tflite::InterpreterBuilder(*model_, *resolver)(&interpreter_, num_threads); if (!interpreter_) { - TFLITE_LOG(ERROR) << "Failed to construct interpreter"; + TFLITE_LOG(ERROR) << "Failed to initialize the interpreter"; return kTfLiteError; } + return kTfLiteOk; +} + +TfLiteStatus BenchmarkTfLiteModel::Init() { + TF_LITE_ENSURE_STATUS(LoadModel()); + TF_LITE_ENSURE_STATUS(InitInterpreter()); // Install profilers if necessary right after interpreter is created so that // any memory allocations inside the TFLite runtime could be recorded if the diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h index b56390b3775..cc87743b531 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h +++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h @@ -74,6 +74,9 @@ class BenchmarkTfLiteModel : public BenchmarkModel { // Allow subclasses to create a customized Op resolver during init. virtual std::unique_ptr GetOpResolver() const; + // Allow subclass to initialize a customized tflite interpereter. + virtual TfLiteStatus InitInterpreter(); + // Create a BenchmarkListener that's specifically for TFLite profiling if // necessary. virtual std::unique_ptr MayCreateProfilingListener() const; diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 12ddf9945fd..ab150e87d93 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -29,6 +29,9 @@ limitations under the License. // TfLiteDelegate - allows delegation of nodes to alternative backends. // // Some abstractions in this file are created and managed by Interpreter. +// +// NOTE: The order of values in these structs are "semi-ABI stable". New values +// should be added only to the end of structs and never reordered. #ifndef TENSORFLOW_LITE_C_COMMON_H_ #define TENSORFLOW_LITE_C_COMMON_H_ @@ -318,15 +321,23 @@ typedef union TfLitePtrUnion { void* data; } TfLitePtrUnion; -// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped -// data (or data externally allocated). kTfLiteArenaRw is arena allocated -// data. kTfLiteDynamic is for tensors that are allocated during evaluation. +// Memory allocation strategies. +// * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated. +// * kTfLiteArenaRw: Arena allocated with no guarantees about persistence, +// and available during eval. +// * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and +// only available during eval. +// * kTfLiteDynamic: Allocated during eval, or for string tensors. +// * kTfLitePersistentRo: Allocated and populated during prepare. This is +// useful for tensors that can be computed during prepare and treated +// as constant inputs for downstream ops (also in prepare). typedef enum TfLiteAllocationType { kTfLiteMemNone = 0, kTfLiteMmapRo, kTfLiteArenaRw, kTfLiteArenaRwPersistent, kTfLiteDynamic, + kTfLitePersistentRo, } TfLiteAllocationType; // The delegates should use zero or positive integers to represent handles. diff --git a/tensorflow/lite/tools/delegates/README.md b/tensorflow/lite/tools/delegates/README.md index f0e15e9e71a..709fcffb24d 100644 --- a/tensorflow/lite/tools/delegates/README.md +++ b/tensorflow/lite/tools/delegates/README.md @@ -93,6 +93,9 @@ TFLite delegate. * `use_coreml`: `bool` (default=false) \ Whether to use the [Core ML delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/delegates/coreml). This option is only available in iOS. +* `coreml_version`: `int` (default=0) \ + Target Core ML version for model conversion. The default value is 0 and it + means using the newest version that's available on the device. ### External delegate provider * `external_delegate_path`: `string` (default="") \ diff --git a/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc b/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc index 0d1a8ade368..c29555716a4 100644 --- a/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc +++ b/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc @@ -32,6 +32,7 @@ class CoreMlDelegateProvider : public DelegateProvider { CoreMlDelegateProvider() { #if defined(REAL_IPHONE_DEVICE) default_params_.AddParam("use_coreml", ToolParam::Create(true)); + default_params_.AddParam("coreml_version", ToolParam::Create(0)); #endif } std::vector CreateFlags(ToolParams* params) const final; @@ -49,6 +50,10 @@ std::vector CoreMlDelegateProvider::CreateFlags( #if defined(REAL_IPHONE_DEVICE) std::vector flags = { CreateFlag("use_coreml", params, "use Core ML"), + CreateFlag("coreml_version", params, + "Target Core ML version for model conversion. " + "The default value is 0 and it means using the newest " + "version that's available on the device."), }; return flags; #else @@ -71,6 +76,7 @@ TfLiteDelegatePtr CoreMlDelegateProvider::CreateTfLiteDelegate( if (params.Get("use_coreml")) { TfLiteCoreMlDelegateOptions coreml_opts = { .enabled_devices = TfLiteCoreMlDelegateAllDevices}; + coreml_opts.coreml_version = params.Get("coreml_version"); coreml_opts.max_delegated_partitions = params.Get("max_delegated_partitions"); coreml_opts.min_nodes_per_partition = diff --git a/tensorflow/lite/tools/delegates/external_delegate_provider.cc b/tensorflow/lite/tools/delegates/external_delegate_provider.cc index 95b0e42802f..193860820b1 100644 --- a/tensorflow/lite/tools/delegates/external_delegate_provider.cc +++ b/tensorflow/lite/tools/delegates/external_delegate_provider.cc @@ -119,7 +119,7 @@ std::vector ExternalDelegateProvider::CreateFlags( "The library path for the underlying external."), CreateFlag( "external_delegate_options", params, - "Comma-seperated options to be passed to the external delegate")}; + "Comma-separated options to be passed to the external delegate")}; return flags; } diff --git a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h index 36f80469a97..9ff20d630ce 100644 --- a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h +++ b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h @@ -33,7 +33,7 @@ class DelegateProviders { DelegateProviders(); // Initialize delegate-related parameters from commandline arguments and - // returns true if sucessful. + // returns true if successful. bool InitFromCmdlineArgs(int* argc, const char** argv); // Get all parameters from all registered delegate providers. diff --git a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc index c2dfa8d0360..5d0a4dfa7d3 100644 --- a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc +++ b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc @@ -66,7 +66,7 @@ TEST(EvaluationDelegateProviderTest, GetAllParamsWithTfliteInferenceParams) { TfliteInferenceParams params; params.set_delegate(TfliteInferenceParams::NONE); params.set_num_threads(4); - // The same-meaning parameter in TfliteInferenceParams takes precendence. + // The same-meaning parameter in TfliteInferenceParams takes precedence. tools::ToolParams tool_params = providers.GetAllParams(params); EXPECT_EQ(4, tool_params.Get("num_threads")); EXPECT_EQ(1, argc); diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index ad3832f9962..41f87fb033d 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -246,7 +246,7 @@ BENCHMARK_LIB_SRCS := $(filter-out \ $(BENCHMARK_ALL_SRCS)) # These target-specific makefiles should modify or replace options like -# CXXFLAGS or LIBS to work for a specific targetted architecture. All logic +# CXXFLAGS or LIBS to work for a specific targeted architecture. All logic # based on platforms or architectures should happen within these files, to # keep this main makefile focused on the sources and dependencies. include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc) diff --git a/tensorflow/lite/tools/optimize/model_utils.cc b/tensorflow/lite/tools/optimize/model_utils.cc index 26dcff222bd..ae868cf21b8 100644 --- a/tensorflow/lite/tools/optimize/model_utils.cc +++ b/tensorflow/lite/tools/optimize/model_utils.cc @@ -134,8 +134,10 @@ void SetOperatorCodeVersion(ModelT* model) { OperatorCodeT* op_code = model->operator_codes[op->opcode_index].get(); operator_property::OperatorProperty property = operator_property::GetOperatorProperty(model, subgraph_idx, op_idx); - if (property.quantizable) { - // Only update the versions of quantizable operations. + if (property.quantizable && op_code->version < property.version) { + // Only update the versions of quantizable operations if the original + // version is lesser than minimum quantized one mentioned by + // OperatorProperty. op_code->version = property.version; } } diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index 71fdad87bd2..3633cb63ace 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -815,7 +815,17 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.version = 2; break; + case BuiltinOperator_PRELU: + property.inputs = {{0, {}}, {1, {}}}; + property.outputs = {{0, {}}}; + property.restrict_same_input_output_scale = false; + property.version = 1; + break; case BuiltinOperator_LEAKY_RELU: + property.inputs = {{0, {}}}; + property.outputs = {{0, {}}}; + property.version = 2; + break; case BuiltinOperator_RELU: case BuiltinOperator_RELU6: property.inputs = {{0, {}}}; diff --git a/tensorflow/lite/tools/optimize/operator_property.h b/tensorflow/lite/tools/optimize/operator_property.h index 995595e7878..95b0e5000c3 100644 --- a/tensorflow/lite/tools/optimize/operator_property.h +++ b/tensorflow/lite/tools/optimize/operator_property.h @@ -86,7 +86,7 @@ struct OperatorProperty { bool restrict_same_input_output_scale = false; // Use same min of min and max of max for each group. - // Incompatable with restrict_same_input_output_scale and restricted_value. + // Incompatible with restrict_same_input_output_scale and restricted_value. // TODO(jianlijianli): make it compatible with other restrictions when there // is a use case. std::vector> restrict_scale = {}; diff --git a/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py b/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py index 5e4bf99ccdf..782d88cbc9b 100644 --- a/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py +++ b/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py @@ -74,6 +74,6 @@ def modify_model_interface(input_file, output_file, input_type, output_type): # Throw an exception if the return status is an error. if status != 0: raise RuntimeError( - 'Error occured when trying to modify the model input type from float ' + 'Error occurred when trying to modify the model input type from float ' 'to {input_type} and output type from float to {output_type}.'.format( input_type=input_type, output_type=output_type)) diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc index e2a0af7a380..9022afca629 100644 --- a/tensorflow/lite/tools/versioning/op_version.cc +++ b/tensorflow/lite/tools/versioning/op_version.cc @@ -133,7 +133,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { if (op_sig.input_types.size() == 2) { return 6; } - // `keep_num_dims` is supported at verison 5. + // `keep_num_dims` is supported at version 5. if (op_sig.options.fully_connected.keep_num_dims) { return 5; } @@ -363,13 +363,20 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { } return 1; case BuiltinOperator_RESIZE_BILINEAR: - case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: if (op_sig.options.resize.half_pixel_centers) { return 3; } else if (op_sig.input_types.at(0) == TensorType_INT8) { return 2; } return 1; + case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: + if (op_sig.options.resize.half_pixel_centers || + op_sig.options.resize.align_corners) { + return 3; + } else if (op_sig.input_types.at(0) == TensorType_INT8) { + return 2; + } + return 1; case BuiltinOperator_MAXIMUM: case BuiltinOperator_MINIMUM: @@ -438,6 +445,18 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { } return 1; + case BuiltinOperator_TANH: + case BuiltinOperator_LOGISTIC: + if (op_sig.input_types.at(0) == TensorType_INT16 && + op_sig.output_types.at(0) == TensorType_INT16) { + return 3; + } + + if (op_sig.input_types.at(0) == TensorType_INT8) { + return 2; + } + return 1; + case BuiltinOperator_FILL: if (op_sig.input_types.size() >= 2 && (op_sig.input_types.at(1) == TensorType_BOOL || @@ -458,6 +477,12 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { } return 1; + case BuiltinOperator_LEAKY_RELU: + if (op_sig.input_types.at(0) == TensorType_INT16) { + return 2; + } + return 1; + case BuiltinOperator_CONCATENATION: case BuiltinOperator_SOFTMAX: // In case of int16 inputs, the version is 3. @@ -479,8 +504,6 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { case BuiltinOperator_REDUCE_MAX: case BuiltinOperator_REDUCE_MIN: case BuiltinOperator_RELU6: - case BuiltinOperator_TANH: - case BuiltinOperator_LOGISTIC: case BuiltinOperator_LOG_SOFTMAX: case BuiltinOperator_TOPK_V2: case BuiltinOperator_ARG_MAX: @@ -596,6 +619,8 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, if (resize_bilinear_option) { op_sig.options.resize.half_pixel_centers = resize_bilinear_option->half_pixel_centers(); + op_sig.options.resize.align_corners = + resize_bilinear_option->align_corners(); } } break; case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: { @@ -604,6 +629,7 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, if (resize_nn_option) { op_sig.options.resize.half_pixel_centers = resize_nn_option->half_pixel_centers(); + op_sig.options.resize.align_corners = resize_nn_option->align_corners(); } } break; // TODO(b/150176627): Add tests for GetOpSignature. diff --git a/tensorflow/lite/tools/versioning/op_version.h b/tensorflow/lite/tools/versioning/op_version.h index fba6c943462..4b0fe8836e2 100644 --- a/tensorflow/lite/tools/versioning/op_version.h +++ b/tensorflow/lite/tools/versioning/op_version.h @@ -48,6 +48,7 @@ typedef struct { } lstm; struct { bool half_pixel_centers; + bool align_corners; } resize; struct { int32_t num_dims; diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc index 7d9039ff848..f0d8259d764 100644 --- a/tensorflow/lite/tools/versioning/op_version_test.cc +++ b/tensorflow/lite/tools/versioning/op_version_test.cc @@ -594,4 +594,64 @@ TEST(OpVersionTEst, VersioningFillTest) { TensorType_INT32}}; EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1); } +TEST(OpVersionTest, VersioningResizeBilinearTest) { + // Default. + OpSignature fake_op_sig = { + .op = BuiltinOperator_RESIZE_BILINEAR, + .input_types = + std::vector{TensorType_FLOAT32, TensorType_INT32}, + .output_types = std::vector{TensorType_FLOAT32}, + }; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1); + + // align_corners=true is still version 1. + fake_op_sig.options.resize.align_corners = true; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1); + + // half_pixel_centers=true must be version 3. + fake_op_sig.options.resize.align_corners = false; + fake_op_sig.options.resize.half_pixel_centers = true; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + + // int8 input is version 2. + fake_op_sig = { + .op = BuiltinOperator_RESIZE_BILINEAR, + .input_types = std::vector{TensorType_INT8, TensorType_INT32}, + .output_types = std::vector{TensorType_INT8}, + }; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2); + + fake_op_sig.options.resize.half_pixel_centers = true; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); +} +TEST(OpVersionTest, VersioningResizeNearestNeighborTest) { + // Default. + OpSignature fake_op_sig = { + .op = BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + .input_types = + std::vector{TensorType_FLOAT32, TensorType_INT32}, + .output_types = std::vector{TensorType_FLOAT32}, + }; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1); + + // align_corners=true is version 3. + fake_op_sig.options.resize.align_corners = true; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + + // half_pixel_centers=true must be version 3. + fake_op_sig.options.resize.align_corners = false; + fake_op_sig.options.resize.half_pixel_centers = true; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + + // int8 input is version 2. + fake_op_sig = { + .op = BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + .input_types = std::vector{TensorType_INT8, TensorType_INT32}, + .output_types = std::vector{TensorType_INT8}, + }; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2); + + fake_op_sig.options.resize.align_corners = true; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); +} } // namespace tflite diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index d4df3df079e..9ca7bb4fe28 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -343,6 +343,8 @@ tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh +tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh +tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh tensorflow/tools/ci_build/release/ubuntu_16/sanity/build.sh tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh tensorflow/tools/ci_build/release/windows/cpu_libtensorflow/nightly.bat diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4729ce9d743..a49e4b74def 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -230,6 +230,7 @@ py_library( "//tensorflow/python/tools:module_util", "//tensorflow/python/tools/api/generator:create_python_api", "//tensorflow/python/tpu:tpu_noestimator", + "//tensorflow/python/types", "//third_party/py/numpy", ], ) @@ -655,15 +656,15 @@ tf_python_pybind_extension( "@com_google_absl//absl/types:optional", ] + if_static( extra_deps = [ - "//tensorflow/core:eager_service_proto_cc", - "//tensorflow/core:master_proto_cc", - "//tensorflow/core:worker_proto_cc", + "//tensorflow/core/protobuf:eager_service_proto_cc", + "//tensorflow/core/protobuf:master_proto_cc", + "//tensorflow/core/protobuf:worker_proto_cc", "//tensorflow/core:version_lib", ], otherwise = [ - "//tensorflow/core:eager_service_proto_cc_headers_only", - "//tensorflow/core:master_proto_cc_headers_only", - "//tensorflow/core:worker_proto_cc_headers_only", + "//tensorflow/core/protobuf:eager_service_proto_cc_headers_only", + "//tensorflow/core/protobuf:master_proto_cc_headers_only", + "//tensorflow/core/protobuf:worker_proto_cc_headers_only", ], ), ) @@ -996,6 +997,8 @@ cc_library( "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", "//tensorflow/c:tf_status_helper", + "//tensorflow/c:tf_tensor_internal", + "//tensorflow/c/eager:tfe_context_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", ], @@ -8047,14 +8050,14 @@ tf_python_pybind_extension( "//tensorflow/core/platform", ] + if_static( extra_deps = [ - "//tensorflow/core:eager_service_proto_cc", - "//tensorflow/core:master_proto_cc", - "//tensorflow/core:worker_proto_cc", + "//tensorflow/core/protobuf:eager_service_proto_cc", + "//tensorflow/core/protobuf:master_proto_cc", + "//tensorflow/core/protobuf:worker_proto_cc", ], otherwise = [ - "//tensorflow/core:eager_service_proto_cc_headers_only", - "//tensorflow/core:master_proto_cc_headers_only", - "//tensorflow/core:worker_proto_cc_headers_only", + "//tensorflow/core/protobuf:eager_service_proto_cc_headers_only", + "//tensorflow/core/protobuf:master_proto_cc_headers_only", + "//tensorflow/core/protobuf:worker_proto_cc_headers_only", ], ), ) diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py index 0cfd7b1d4e9..a903c43bcfc 100644 --- a/tensorflow/python/autograph/converters/control_flow.py +++ b/tensorflow/python/autograph/converters/control_flow.py @@ -32,6 +32,7 @@ from tensorflow.python.autograph.pyct.static_analysis import activity from tensorflow.python.autograph.pyct.static_analysis import annos from tensorflow.python.autograph.pyct.static_analysis import liveness from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions +from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs from tensorflow.python.autograph.utils import compat_util @@ -554,7 +555,8 @@ def transform(node, ctx): graphs = cfg.build(node) node = qual_names.resolve(node) node = activity.resolve(node, ctx, None) - node = reaching_definitions.resolve(node, ctx, graphs, AnnotatedDef) + node = reaching_definitions.resolve(node, ctx, graphs) + node = reaching_fndefs.resolve(node, ctx, graphs) node = liveness.resolve(node, ctx, graphs) node = ControlFlowTransformer(ctx).visit(node) diff --git a/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py b/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py index d05e63662f1..203b99517dc 100644 --- a/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py +++ b/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py @@ -35,6 +35,7 @@ from tensorflow.python.autograph.pyct.static_analysis import activity from tensorflow.python.autograph.pyct.static_analysis import annos from tensorflow.python.autograph.pyct.static_analysis import liveness from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions +from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs # TODO(mdan): Refactor functions to make them smaller. @@ -630,7 +631,8 @@ def transform(node, ctx): graphs = cfg.build(node) node = qual_names.resolve(node) node = activity.resolve(node, ctx, None) - node = reaching_definitions.resolve(node, ctx, graphs, AnnotatedDef) + node = reaching_definitions.resolve(node, ctx, graphs) + node = reaching_fndefs.resolve(node, ctx, graphs) node = liveness.resolve(node, ctx, graphs) node = ControlFlowTransformer(ctx).visit(node) diff --git a/tensorflow/python/autograph/g3doc/reference/control_flow.md b/tensorflow/python/autograph/g3doc/reference/control_flow.md index 79cc0f31450..cf580af7330 100644 --- a/tensorflow/python/autograph/g3doc/reference/control_flow.md +++ b/tensorflow/python/autograph/g3doc/reference/control_flow.md @@ -164,7 +164,7 @@ after if #### Python values modified in TensorFlow control flow become Tensors If a symbol is modified in a TensorFlow control flow statement, then it becomes -a `tf.Tensor`, even if it started off as a Python promitive value. +a `tf.Tensor`, even if it started off as a Python primitive value. For example, the conditional below will run as a `tf.cond` (its condition is a `tf.Tensor`), which in turn will cause `i` to become a `tf.Tensor`. diff --git a/tensorflow/python/autograph/g3doc/reference/generated_code.md b/tensorflow/python/autograph/g3doc/reference/generated_code.md index b62911b7203..389fa53a065 100644 --- a/tensorflow/python/autograph/g3doc/reference/generated_code.md +++ b/tensorflow/python/autograph/g3doc/reference/generated_code.md @@ -66,7 +66,7 @@ print(inspect.getsourcefile(converted_f)) ``` `tf.autograph.to_code` is a shortcut to obtain the generated code, and it's -equivalent with calling `inspect.getsource(tf.autograph.to_code(f))`. +equivalent with calling `inspect.getsource(tf.autograph.to_graph(f))`. #### Recording diagnostic information: `tf.autograph.set_verbosity` diff --git a/tensorflow/python/autograph/g3doc/reference/limitations.md b/tensorflow/python/autograph/g3doc/reference/limitations.md index 1ced1fad486..c41350466fa 100644 --- a/tensorflow/python/autograph/g3doc/reference/limitations.md +++ b/tensorflow/python/autograph/g3doc/reference/limitations.md @@ -16,6 +16,88 @@ should not be confused with TensorFlow variables. Key Term: A TensorFlow loop variable (or loop variable for short) refers to a value (typically a `tf.Tensor`) modified by a loop. See `tf.while_loop`. +### Undefined and None values in TensorFlow + +TensorFlow does not support undefined or `None` values. All tensors must have +a value. + +Example: + +``` +x = tf.cond( + tf.random.uniform(()) > 0.5, + lambda: tf.constant(1), + lambda: None) # Error -- a Tensor cannot be None +``` + +The same restriction carries over in AutoGraph. If a variable is created inside +control flow, and used after, then it must be defined before the control flow +statement: + +``` +if tf.random.uniform(()) > 0.5: + x = tf.constant(1) +else: + x = None +tf.print(x) # Error -- x may be None here +``` + +For this reason, AutoGraph forbids variables to be defined in only one branch +of a TensorFlow conditional, if the variable is used afterwards: + +``` +del x +if tf.random.uniform(()) > 0.5: + x = tf.constant(1) +else: + pass +tf.print(x) # Error -- x may be undefined here +``` + +Note that if the variable is not used after the control flow statement, then it +is considered local to the control flow block, and is not subject to these +restrictions. + +``` +del x +if tf.random.uniform(()) > 0.5: + x = tf.constant(1) # Okay -- x does not need to be returned from the TF cond +else: + pass +``` + +Similarly, variables may not be defined inside a TensorFlow loop, unless they +are local to the loop. A variable is local to the loop if (1) it's not used +after the loop and (2) the value from a previour iteration is not used in the +next iteration: + +``` +del x +while tf.random.uniform(()) > 0.5: # Error -- x must be defined before the loop + x = tf.constant(1) +tf.print(x) +``` + +``` +del x +while tf.random.uniform(()) > 0.5: # Okay -- x is local to the loop + x = tf.constant(1) +``` + +Avoid these limitations by defining a default value before the control flow +statement: + +``` +x = tf.constant() +if tf.random.uniform(()) > 0.5: + x = tf.constant(1) +tf.print(x) # Okay -- x is either 0 or 1 +``` + +Note: `None` values and undefined symbols are allowed in Eager control flow, +because Eager execution uses Python control flow, rather than TensorFlow +control flow ops. + ### Indirect modifications and hidden side effects in TensorFlow control flow Key Point: We recommend using a functional programming style, immutable Python @@ -187,6 +269,62 @@ objects, but it does support basic collection objects such as `list`, `dict`, `tuple`, `namedtuple` and their subclasses. Design your objects as subclasses of [namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple). +#### Variables closed over by lambda functions + +AutoGraph assumes that variables that local functions close over may be used +anywhere in the parent function, because in general it is possible to hide a +function call in almost any Python statement). For this reason, these variables +are accounted within TensorFlow loops. + +For example, the following code correctly captures `a` in the TensorFlow loop +variables: + +``` +a = 0 +def f(): + tf.print(a) +for i in tf.range(3): + a = i +f() # Prints 2 +``` + +An consequence is that these variables must be defined before the loop (see +Undefined and None values above). So the following code will raise an error, +even if the variable is never used after the loop: + +``` +def f(): + tf.print(a) +for i in tf.range(3): # Error -- `a` must be defined before the loop. + a = i +``` + +However, lambda functions are handled differently, for reasons of backward +compatibility. Lambda functions are assumed to be used in the statement where +they are used, or at least in the same block. + +``` +a = 0 +foo(lambda: a) # This lambda is not expected to be called anywhere else. +for i in tf.range(3): # Okay -- `a` is local to the loop. + a = i +``` + +Due to that reason, the following code will not work as expected for TensorFlow +loops. + +``` +a = 0 +l = lambda: tf.print(a) +for i in tf.range(3): + a = i # `a` is considered local to the loop +l() # Prints 0! +``` + +Note that none of these restrictions only apply to TensorFlow loops; Python +loops correctly correctly handle closures in all cases. + + ### Python collections in TensorFlow control flow Key Point: Use TensorFlow collection classes instead of Python collections. @@ -489,69 +627,6 @@ while tf.random.uniform(()) > 0.5: x = tf.constant((1, 2, 3)) # Error -- inconsistent shapes: (), (3,) ``` -### Undefined and None values in TensorFlow - -TensorFlow does not support undefined and `None` values. All tensors must have -a value. - -Example: - -``` -x = tf.cond( - tf.random.uniform(()) > 0.5, - lambda: tf.constant(1), - lambda: None) # Error -- a Tensor cannot be None -``` - -The same restriction carries over in AutoGraph, but only if the symbol is used -after the conditional (otherwise AutoGraph avoids making it a return value -of the `tf.cond`): - -``` -if tf.random.uniform(()) > 0.5: - x = tf.constant(1) -else: - x = None -tf.print(x) # Error -- x may be None here -``` - -A related but less obvious restriction in AutoGraph forbids symbols to be -defined in only one branch of TensorFlow control flow, if the symbol is -used afterwards: - -``` -del x -if tf.random.uniform(()) > 0.5: - x = tf.constant(1) -else: - pass -tf.print(x) # Error -- x may be undefined here -``` - -Similarly, variables defined in a loop may not be used outside the loop, again -if the symbol is used afterwards: - -``` -del x -if tf.random.uniform(()) > 0.5: - x = tf.constant(1) -tf.print(x) # Error -- x may be undefined here -``` - -Avoid these limitations by defining a default value before the control flow -statement: - -``` -x = tf.constant() -if tf.random.uniform(()) > 0.5: - x = tf.constant(1) -tf.print(x) # Okay -- x is either 0 or 1 -``` - -Note: `None` values and undefined symbols are allowed in Eager control flow, -because Eager execution uses Python control flow, rather than TensorFlow -control flow ops. - ### Access to source code Key point: AutoGraph can only handle functions whose source code can be diff --git a/tensorflow/python/autograph/operators/data_structures_test.py b/tensorflow/python/autograph/operators/data_structures_test.py index c5a3a3d1cac..5d835fd3771 100644 --- a/tensorflow/python/autograph/operators/data_structures_test.py +++ b/tensorflow/python/autograph/operators/data_structures_test.py @@ -106,11 +106,12 @@ class ListTest(test.TestCase): with self.cached_session() as sess: self.assertAllEqual(self.evaluate(t), [[1, 2, 3]]) - @test_util.run_v1_only("b/117943489") + @test_util.run_deprecated_v1 def test_append_tensorarray(self): l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True) l1 = data_structures.list_append(l, 1) l2 = data_structures.list_append(l1, 2) + with self.cached_session() as sess: self.assertAllEqual(self.evaluate(l1.stack()), [1]) self.assertAllEqual(self.evaluate(l2.stack()), [1, 2]) diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py index 1e8595d2061..a5f3f5b33a4 100644 --- a/tensorflow/python/autograph/pyct/anno.py +++ b/tensorflow/python/autograph/pyct/anno.py @@ -93,6 +93,9 @@ class Static(NoValue): ORIG_DEFINITIONS = ( 'The value of DEFINITIONS that applied to the original code before any' ' conversion.') + DEFINED_FNS_IN = ( + 'Local function definitions that may exist when exiting the node. See' + ' reaching_fndefs.py') DEFINED_VARS_IN = ( 'Symbols defined when entering the node. See reaching_definitions.py.') LIVE_VARS_OUT = ('Symbols live when exiting the node. See liveness.py.') diff --git a/tensorflow/python/autograph/pyct/static_analysis/BUILD b/tensorflow/python/autograph/pyct/static_analysis/BUILD index 3620cff3fd1..0764a3e64b4 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/BUILD +++ b/tensorflow/python/autograph/pyct/static_analysis/BUILD @@ -23,6 +23,7 @@ py_library( "annos.py", "liveness.py", "reaching_definitions.py", + "reaching_fndefs.py", ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py index b9e398a8fe1..ca68bc9911c 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py @@ -617,9 +617,23 @@ class ActivityAnalyzer(transformer.Base): # TODO(mdan): Do remove it, it's confusing. self._enter_scope(False) node.body = self.visit(node.body) + + # The lambda body can contain nodes of types normally not found as + # statements, and may not have the SCOPE annotation needed by the CFG. + # So we attach one if necessary. + if not anno.hasanno(node.body, anno.Static.SCOPE): + anno.setanno(node.body, anno.Static.SCOPE, self.scope) + self._exit_and_record_scope(node, NodeAnno.BODY_SCOPE) + lambda_scope = self.scope self._exit_and_record_scope(node, NodeAnno.ARGS_AND_BODY_SCOPE) + + # Exception: lambdas are assumed to be used in the place where + # they are defined. Therefore, their activity is passed on to the + # calling statement. + self.scope.read.update(lambda_scope.read - lambda_scope.bound) + return node def visit_With(self, node): diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py index 7a6bfd4b820..3a1b552190a 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py @@ -393,6 +393,31 @@ class ActivityAnalyzerTest(ActivityAnalyzerTestBase): self.assertScopeIs(scope, ('x', 'y'), ('y',)) self.assertSymbolSetsAre(('x', 'y'), scope.bound, 'BOUND') + def test_nested_lambda(self): + + def test_fn(a): + return lambda x: (x * a) + + node, _ = self._parse_and_analyze(test_fn) + + fn_node = node + scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE) + self.assertScopeIs(scope, ('a',), ()) + + return_node = node.body[0] + + scope = anno.getanno(return_node, anno.Static.SCOPE) + self.assertScopeIs(scope, ('a',), ()) + + lam_def_node = return_node.value + + scope = anno.getanno(lam_def_node, NodeAnno.BODY_SCOPE) + self.assertScopeIs(scope, ('a', 'x'), ()) + + scope = anno.getanno(lam_def_node, NodeAnno.ARGS_AND_BODY_SCOPE) + self.assertScopeIs(scope, ('a', 'x'), ()) + self.assertSymbolSetsAre(('x',), scope.bound, 'BOUND') + def test_nested_function_arg_defaults(self): def test_fn(a): diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness.py b/tensorflow/python/autograph/pyct/static_analysis/liveness.py index 7d64a9377d7..5502147bf80 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/liveness.py +++ b/tensorflow/python/autograph/pyct/static_analysis/liveness.py @@ -42,9 +42,6 @@ class Analyzer(cfg.GraphVisitor): def __init__(self, graph, include_annotations): super(Analyzer, self).__init__(graph) - # This allows communicating that nodes generate extra symbols, - # e.g. those that a function definition closes over. - self.extra_gen = {} self.include_annotations = include_annotations def init_state(self, _): @@ -56,7 +53,7 @@ class Analyzer(cfg.GraphVisitor): if anno.hasanno(node.ast_node, anno.Static.SCOPE): node_scope = anno.getanno(node.ast_node, anno.Static.SCOPE) - gen = node_scope.read | self.extra_gen.get(node.ast_node, frozenset()) + gen = node_scope.read if not self.include_annotations: gen -= node_scope.annotations # TODO(mdan): verify whether composites' parents need to be added. @@ -69,6 +66,18 @@ class Analyzer(cfg.GraphVisitor): live_out |= self.in_[n] live_in = gen | (live_out - kill) + reaching_functions = anno.getanno( + node.ast_node, anno.Static.DEFINED_FNS_IN) + for fn_ast_node in reaching_functions: + if isinstance(fn_ast_node, gast.Lambda): + # Exception: lambda functions are assumed to be used only in the + # place where they are defined, and not later. + continue + fn_scope = anno.getanno(fn_ast_node, annos.NodeAnno.ARGS_AND_BODY_SCOPE) + # Any closure of a reaching function definition is conservatively + # considered live. + live_in |= (fn_scope.read - fn_scope.bound) + else: assert self.can_ignore(node), (node.ast_node, node) @@ -84,7 +93,7 @@ class Analyzer(cfg.GraphVisitor): return prev_live_in != live_in -class WholeTreeAnalyzer(transformer.Base): +class TreeAnnotator(transformer.Base): """Runs liveness analysis on each of the functions defined in the AST. If a function defined other local functions, those will have separate CFGs. @@ -94,7 +103,7 @@ class WholeTreeAnalyzer(transformer.Base): subfunction. For example: def foo(): - # baz is live here + # baz is live from here on def bar(): print(baz) @@ -103,63 +112,14 @@ class WholeTreeAnalyzer(transformer.Base): """ def __init__(self, source_info, graphs, include_annotations): - super(WholeTreeAnalyzer, self).__init__(source_info) + super(TreeAnnotator, self).__init__(source_info) self.include_annotations = include_annotations self.allow_skips = False self.graphs = graphs self.current_analyzer = None - self.analyzers = {} - - def visit_FunctionDef(self, node): - parent_analyzer = self.current_analyzer - subgraph = self.graphs[node] - - # Postorder tree processing makes this a bit complicated: - # 1. construct an analyzer object and put it on stack - # 2. recursively walk the subtree; this will initialize the analyzer's - # in_ state properly (done in a block below) - # 3. run the final analysis - analyzer = Analyzer(subgraph, self.include_annotations) - self.current_analyzer = analyzer - node = self.generic_visit(node) - analyzer.visit_reverse() - - if parent_analyzer is not None: - # Wire the state between the two subgraphs' analyzers. - child_in_state = analyzer.in_[subgraph.entry] - # Exception: symbols modified in the child function are local to it - body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) - for qn in body_scope.modified: - # Note: a function modifying the symbol doesn't make that symbol - # live at the function's entry. In fact when that happens it is - # probably a case of undefined assignment, like this: - # - # bar = 0 - # def foo(): - # print(bar) # bar is undefined here! - # bar = 1 - # - # Hence we use discard and not remove below. - child_in_state.discard(qn) - parent_analyzer.extra_gen[node] = frozenset(child_in_state,) - - self.analyzers[node] = analyzer - self.current_analyzer = parent_analyzer - return node - - -class Annotator(transformer.Base): - """AST visitor that annotates each control flow block with live symbols.""" - - # Note: additional nodes may be added as needed. - - def __init__(self, source_info, cross_function_analyzer): - super(Annotator, self).__init__(source_info) - self.cross_function_analyzer = cross_function_analyzer - self.current_analyzer = None def visit(self, node): - node = super(Annotator, self).visit(node) + node = super(TreeAnnotator, self).visit(node) if (self.current_analyzer is not None and isinstance(node, gast.stmt) and node in self.current_analyzer.graph.index): @@ -168,14 +128,23 @@ class Annotator(transformer.Base): frozenset(self.current_analyzer.in_[cfg_node])) return node - def visit_FunctionDef(self, node): + def _analyze_function(self, node, is_lambda): parent_analyzer = self.current_analyzer - self.current_analyzer = self.cross_function_analyzer.analyzers[node] + analyzer = Analyzer(self.graphs[node], self.include_annotations) + analyzer.visit_reverse() + self.current_analyzer = analyzer node = self.generic_visit(node) + self.current_analyzer = parent_analyzer return node + def visit_Lambda(self, node): + return self._analyze_function(node, is_lambda=True) + + def visit_FunctionDef(self, node): + return self._analyze_function(node, is_lambda=False) + def _block_statement_live_out(self, node): successors = self.current_analyzer.graph.stmt_next[node] stmt_live_out = set() @@ -246,9 +215,5 @@ def resolve(node, source_info, graphs, include_annotations=True): Returns: ast.AST """ - cross_function_analyzer = WholeTreeAnalyzer( - source_info, graphs, include_annotations) - node = cross_function_analyzer.visit(node) - visitor = Annotator(source_info, cross_function_analyzer) - node = visitor.visit(node) + node = TreeAnnotator(source_info, graphs, include_annotations).visit(node) return node diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py index 90bcc67301a..ecb466532e2 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py @@ -26,6 +26,7 @@ from tensorflow.python.autograph.pyct import qual_names from tensorflow.python.autograph.pyct import transformer from tensorflow.python.autograph.pyct.static_analysis import activity from tensorflow.python.autograph.pyct.static_analysis import liveness +from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs from tensorflow.python.platform import test @@ -49,7 +50,8 @@ class LivenessAnalyzerTestBase(test.TestCase): ctx = transformer.Context(entity_info, namer, None) node = activity.resolve(node, ctx) graphs = cfg.build(node) - liveness.resolve(node, ctx, graphs) + node = reaching_fndefs.resolve(node, ctx, graphs) + node = liveness.resolve(node, ctx, graphs) return node def assertHasLiveOut(self, node, expected): @@ -191,6 +193,73 @@ class LivenessAnalyzerTest(LivenessAnalyzerTestBase): self.assertHasLiveOut(fn_body[0], 'a') + def test_live_out_nested_functions_defined_ahead(self): + + def test_fn(a, b): + def foo(): + return a + + if b: + a = [] + + return foo + + node = self._parse_and_analyze(test_fn) + fn_body = node.body + + self.assertHasLiveOut(fn_body[1], ('a', 'foo')) + + def test_live_out_nested_functions_defined_after(self): + + def test_fn(a, b): + if b: + a = [] + + def foo(): + return a + + return foo + + node = self._parse_and_analyze(test_fn) + fn_body = node.body + + self.assertHasLiveOut(fn_body[0], ('a',)) + + def test_live_out_lambda(self): + + def test_fn(a, b): + if b: + a = [] + + foo = lambda: a + + if b: + pass + + return foo + + node = self._parse_and_analyze(test_fn) + fn_body = node.body + + self.assertHasLiveOut(fn_body[0], ('a', 'b')) + self.assertHasLiveOut(fn_body[2], ('foo',)) + + def test_live_out_nested_functions_hidden_by_argument(self): + + def test_fn(b): + def foo(a): + return a + + if b: + a = [] # pylint:disable=unused-variable + + return foo + + node = self._parse_and_analyze(test_fn) + fn_body = node.body + + self.assertHasLiveOut(fn_body[1], ('foo')) + def test_live_out_nested_functions_isolation(self): def test_fn(b): diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs.py new file mode 100644 index 00000000000..f650c392106 --- /dev/null +++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs.py @@ -0,0 +1,182 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""An analysis that determines the reach of a function definition. + +A function definition is said to reach a statement if that function may exist +(and therefore may be called) when that statement executes. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gast + +from tensorflow.python.autograph.pyct import anno +from tensorflow.python.autograph.pyct import cfg +from tensorflow.python.autograph.pyct import transformer + + +class Definition(object): + """Definition objects describe a unique definition of a function.""" + + def __init__(self, def_node): + self.def_node = def_node + + +class _NodeState(object): + """Abstraction for the state of the CFG walk for reaching definition analysis. + + This is a value type. Only implements the strictly necessary operators. + + Attributes: + value: Dict[qual_names.QN, Set[Definition, ...]], the defined symbols and + their possible definitions + """ + + def __init__(self, init_from=None): + if init_from: + self.value = set(init_from) + else: + self.value = set() + + def __eq__(self, other): + return self.value == other.value + + def __ne__(self, other): + return self.value != other.value + + def __or__(self, other): + assert isinstance(other, _NodeState) + result = _NodeState(self.value) + result.value.update(other.value) + return result + + def __add__(self, value): + result = _NodeState(self.value) + result.value.add(value) + return result + + def __repr__(self): + return 'NodeState[%s]=%s' % (id(self), repr(self.value)) + + +class Analyzer(cfg.GraphVisitor): + """CFG visitor that determines reaching definitions at statement level.""" + + def __init__(self, graph, external_defs): + super(Analyzer, self).__init__(graph) + # This allows communicating that nodes have extra reaching definitions, + # e.g. those that a function closes over. + self.external_defs = external_defs + + def init_state(self, _): + return _NodeState() + + def visit_node(self, node): + prev_defs_out = self.out[node] + + if node is self.graph.entry: + defs_in = _NodeState(self.external_defs) + else: + defs_in = prev_defs_out + + for n in node.prev: + defs_in |= self.out[n] + + defs_out = defs_in + if isinstance(node.ast_node, (gast.Lambda, gast.FunctionDef)): + defs_out += node.ast_node + + self.in_[node] = defs_in + self.out[node] = defs_out + + return prev_defs_out != defs_out + + +class TreeAnnotator(transformer.Base): + """AST visitor that annotates each symbol name with its reaching definitions. + + Simultaneously, the visitor runs the dataflow analysis on each function node, + accounting for the effect of closures. For example: + + def foo(): + def f(): + pass + def g(): + # `def f` reaches here + """ + + def __init__(self, source_info, graphs): + super(TreeAnnotator, self).__init__(source_info) + self.graphs = graphs + self.allow_skips = False + self.current_analyzer = None + + def _proces_function(self, node): + parent_analyzer = self.current_analyzer + subgraph = self.graphs[node] + + if (self.current_analyzer is not None + and node in self.current_analyzer.graph.index): + cfg_node = self.current_analyzer.graph.index[node] + defined_in = self.current_analyzer.in_[cfg_node].value + else: + defined_in = () + + analyzer = Analyzer(subgraph, defined_in) + analyzer.visit_forward() + + self.current_analyzer = analyzer + node = self.generic_visit(node) + self.current_analyzer = parent_analyzer + return node + + def visit_FunctionDef(self, node): + return self._proces_function(node) + + def visit_Lambda(self, node): + return self._proces_function(node) + + def visit(self, node): + # This can happen before entering the top level function + if (self.current_analyzer is not None + and node in self.current_analyzer.graph.index): + cfg_node = self.current_analyzer.graph.index[node] + anno.setanno(node, anno.Static.DEFINED_FNS_IN, + self.current_analyzer.in_[cfg_node].value) + + extra_node = anno.getanno(node, anno.Basic.EXTRA_LOOP_TEST, default=None) + if extra_node is not None: + cfg_node = self.current_analyzer.graph.index[extra_node] + anno.setanno(extra_node, anno.Static.DEFINED_FNS_IN, + self.current_analyzer.in_[cfg_node].value) + + return super(TreeAnnotator, self).visit(node) + + +def resolve(node, source_info, graphs): + """Resolves reaching definitions for each symbol. + + Args: + node: ast.AST + source_info: transformer.SourceInfo + graphs: Dict[ast.FunctionDef, cfg.Graph] + Returns: + ast.AST + """ + visitor = TreeAnnotator(source_info, graphs) + node = visitor.visit(node) + return node diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs_test.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs_test.py new file mode 100644 index 00000000000..500ac91f64f --- /dev/null +++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs_test.py @@ -0,0 +1,58 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for reaching_fndefs module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.autograph.pyct import anno +from tensorflow.python.autograph.pyct import cfg +from tensorflow.python.autograph.pyct import naming +from tensorflow.python.autograph.pyct import parser +from tensorflow.python.autograph.pyct import qual_names +from tensorflow.python.autograph.pyct import transformer +from tensorflow.python.autograph.pyct.static_analysis import activity +from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions +from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs +from tensorflow.python.platform import test + + +class ReachingFndefsAnalyzerTest(test.TestCase): + + def _parse_and_analyze(self, test_fn): + # TODO(mdan): Use a custom FunctionTransformer here. + node, source = parser.parse_entity(test_fn, future_features=()) + entity_info = transformer.EntityInfo( + name=test_fn.__name__, + source_code=source, + source_file=None, + future_features=(), + namespace={}) + node = qual_names.resolve(node) + namer = naming.Namer({}) + ctx = transformer.Context(entity_info, namer, None) + node = activity.resolve(node, ctx) + graphs = cfg.build(node) + node = reaching_definitions.resolve(node, ctx, graphs) + node = reaching_fndefs.resolve(node, ctx, graphs) + return node + + def assertHasFnDefs(self, node): + anno.getanno(node, anno.Static.DEFINED_FNS_IN) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/autograph/utils/tensor_list_test.py b/tensorflow/python/autograph/utils/tensor_list_test.py index bbbc3bf6918..017d97bb040 100644 --- a/tensorflow/python/autograph/utils/tensor_list_test.py +++ b/tensorflow/python/autograph/utils/tensor_list_test.py @@ -34,7 +34,6 @@ class TensorListTest(test.TestCase): def _shape(self, shape_tuple): return constant(shape_tuple, dtypes.int32) - @test_util.run_v1_only("b/117943489") def test_dynamic_list_append(self): l = [] l = tl.dynamic_list_append(l, 1) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index dd8e64ac182..1c244c1b297 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -1917,6 +1917,9 @@ class SessionTest(test_util.TensorFlowTestCase): a = constant_op.constant(1) b = constant_op.constant(2) c = a + b + # Ensure if the same kernel with the same arguments is executed then its + # execution is logged. + d = a + b else: # Passing the config to the server, but not the session should still # result in logging device placement. @@ -1925,12 +1928,16 @@ class SessionTest(test_util.TensorFlowTestCase): a = constant_op.constant(1) b = constant_op.constant(2) c = a + b + d = a + b with session.Session(server.target) as sess: with CaptureStderr() as log: - sess.run(c) + c, d = sess.run([c, d]) + self.assertEqual(c, 3) + self.assertEqual(d, 3) # Ensure that we did log device placement. - self.assertTrue('/replica:0/task:0/device:CPU:0' in str(log), str(log)) + add_executions = [l for l in str(log).splitlines() if 'AddV2' in l] + self.assertEqual(len(add_executions), 2) @test_util.run_v1_only('b/120545219') def testLocalMasterSessionTimeout(self): diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index 78a1613c86c..cb960fd599a 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -89,7 +89,8 @@ void TF_Run_wrapper_helper(TF_DeprecatedSession* session, const char* handle, input_names.push_back(key_string); inputs_safe.emplace_back(make_safe(static_cast(nullptr))); - s = PyArrayToTF_Tensor(value, &inputs_safe.back()); + s = NdarrayToTensor(nullptr /*ctx*/, value, &inputs_safe.back(), + true /*convert_to_string*/); if (!s.ok()) { Set_TF_Status_from_Status(out_status, s); return; @@ -367,7 +368,7 @@ void TF_SessionRun_wrapper_helper(TF_Session* session, const char* handle, // cleaned up properly. // // Memory management: - // PyArrayToTF_Tensor() creates a new ndarray PyObject from the input + // NdarrayToTensor() creates a new ndarray PyObject from the input // ndarray. We manage the new ndarray's lifetime in order to keep the // underlying data buffer alive (the new ndarray also guarantees a contiguous // data buffer). The new ndarray's data buffer is used to create the @@ -382,7 +383,7 @@ void TF_SessionRun_wrapper_helper(TF_Session* session, const char* handle, std::vector input_vals_safe; for (PyObject* ndarray : input_ndarrays) { input_vals_safe.emplace_back(make_safe(static_cast(nullptr))); - s = PyArrayToTF_Tensor(ndarray, &input_vals_safe.back()); + s = NdarrayToTensor(nullptr, ndarray, &input_vals_safe.back(), true); if (!s.ok()) { Set_TF_Status_from_Status(out_status, s); return; diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 51597fb0596..26d291877cb 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 5, 7) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 5, 13) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None diff --git a/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py b/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py index ffc98b917d2..13d56a84d3c 100644 --- a/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py @@ -75,9 +75,8 @@ def _test_combinations(): ("FromTensors2", lambda: dataset_ops.Dataset.from_tensors((0, 1)), 1), ("FromTensorSlices1", lambda: dataset_ops.Dataset.from_tensor_slices([0, 0, 0]), 3), - ("FromTensorSlices2", - lambda: dataset_ops.Dataset.from_tensor_slices(([0, 0, 0], [1, 1, 1])), - 3), + ("FromTensorSlices2", lambda: dataset_ops.Dataset.from_tensor_slices( + ([0, 0, 0], [1, 1, 1])), 3), ("Interleave1", lambda: dataset_ops.Dataset.range(5).interleave( lambda _: dataset_ops.Dataset.from_tensors(0), cycle_length=1), cardinality.UNKNOWN), @@ -134,6 +133,19 @@ def _test_combinations(): lambda: dataset_ops.Dataset.range(5).filter(lambda _: True).take(2), cardinality.UNKNOWN), ("Take4", lambda: dataset_ops.Dataset.range(5).repeat().take(2), 2), + ("Unbatch1", lambda: dataset_ops.Dataset.range(5).batch( + 2, drop_remainder=True).unbatch(), 4), + ("Unbatch2", lambda: dataset_ops.Dataset.range(5).batch( + 2, drop_remainder=False).unbatch(), cardinality.UNKNOWN), + ("Unbatch3", lambda: dataset_ops.Dataset.range(5).batch( + 2, drop_remainder=True).filter(lambda _: True).unbatch(), + cardinality.UNKNOWN), + ("Unbatch4", lambda: dataset_ops.Dataset.range(5).batch( + 2, drop_remainder=True).repeat().unbatch(), cardinality.INFINITE), + ("Unbatch5", lambda: dataset_ops.Dataset.zip(( + dataset_ops.Dataset.range(4).batch(2, drop_remainder=False), + dataset_ops.Dataset.range(5).batch(2, drop_remainder=True), + )).unbatch(), 4), ("Window1", lambda: dataset_ops.Dataset.range(5).window( size=2, shift=2, drop_remainder=True), 2), ("Window2", lambda: dataset_ops.Dataset.range(5).window( @@ -144,12 +156,12 @@ def _test_combinations(): (dataset_ops.Dataset.range(5), dataset_ops.Dataset.range(3))), 3), ("Zip3", lambda: dataset_ops.Dataset.zip((dataset_ops.Dataset.range( 5), dataset_ops.Dataset.range(3).repeat())), 5), - ("Zip4", lambda: dataset_ops.Dataset.zip((dataset_ops.Dataset.range( - 5).repeat(), dataset_ops.Dataset.range(3).repeat())), - cardinality.INFINITE), - ("Zip5", lambda: dataset_ops.Dataset.zip((dataset_ops.Dataset.range( - 5), dataset_ops.Dataset.range(3).filter(lambda _: True))), - cardinality.UNKNOWN), + ("Zip4", lambda: dataset_ops.Dataset.zip( + (dataset_ops.Dataset.range(5).repeat(), dataset_ops.Dataset.range(3). + repeat())), cardinality.INFINITE), + ("Zip5", lambda: dataset_ops.Dataset.zip( + (dataset_ops.Dataset.range(5), dataset_ops.Dataset.range(3).filter( + lambda _: True))), cardinality.UNKNOWN), ] def reduce_fn(x, y): diff --git a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py index 941ca209848..13948305aea 100644 --- a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py @@ -41,9 +41,9 @@ class CsvDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def _setup_files(self, inputs, linebreak='\n', compression_type=None): filenames = [] - for i, ip in enumerate(inputs): + for i, file_rows in enumerate(inputs): fn = os.path.join(self.get_temp_dir(), 'temp_%d.csv' % i) - contents = linebreak.join(ip).encode('utf-8') + contents = linebreak.join(file_rows).encode('utf-8') if compression_type is None: with open(fn, 'wb') as f: f.write(contents) @@ -580,6 +580,13 @@ class CsvDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]], record_defaults=record_defaults) + def testCsvDataset_immutableParams(self): + inputs = [['a,b,c', '1,2,3', '4,5,6']] + filenames = self._setup_files(inputs) + select_cols = ['a', 'c'] + _ = readers.make_csv_dataset( + filenames, batch_size=1, select_columns=select_cols) + self.assertAllEqual(select_cols, ['a', 'c']) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/data/experimental/ops/data_service_ops.py b/tensorflow/python/data/experimental/ops/data_service_ops.py index c1c23668db0..67dfadb4841 100644 --- a/tensorflow/python/data/experimental/ops/data_service_ops.py +++ b/tensorflow/python/data/experimental/ops/data_service_ops.py @@ -84,15 +84,29 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource): if task_refresh_interval_hint_ms is None: task_refresh_interval_hint_ms = dataset_ops.AUTOTUNE + self._dataset_id = ops.convert_to_tensor( + dataset_id, dtype=dtypes.int64, name="dataset_id") + self._processing_mode = ops.convert_to_tensor( + processing_mode, dtype=dtypes.string, name="processing_mode") + self._address = ops.convert_to_tensor( + address, dtype=dtypes.string, name="address") + self._protocol = ops.convert_to_tensor( + protocol, dtype=dtypes.string, name="protocol") + self._job_name = ops.convert_to_tensor( + job_name, dtype=dtypes.string, name="job_name") + self._max_outstanding_requests = ops.convert_to_tensor( + max_outstanding_requests, + dtype=dtypes.int64, + name="max_outstanding_requests") self._element_spec = input_dataset.element_spec variant_tensor = gen_experimental_dataset_ops.data_service_dataset( - dataset_id=dataset_id, - processing_mode=processing_mode, - address=address, - protocol=protocol, - job_name=job_name, - max_outstanding_requests=max_outstanding_requests, + dataset_id=self._dataset_id, + processing_mode=self._processing_mode, + address=self._address, + protocol=self._protocol, + job_name=self._job_name, + max_outstanding_requests=self._max_outstanding_requests, task_refresh_interval_hint_ms=task_refresh_interval_hint_ms, iteration_counter=gen_experimental_dataset_ops.dummy_iteration_counter( ), @@ -297,5 +311,8 @@ def distribute(processing_mode, Returns: Dataset: A `Dataset` of the elements produced by the data service. """ - return _distribute(processing_mode, service, job_name, - max_outstanding_requests) + return _distribute( + processing_mode=processing_mode, + service=service, + job_name=job_name, + max_outstanding_requests=max_outstanding_requests) diff --git a/tensorflow/python/data/experimental/ops/readers.py b/tensorflow/python/data/experimental/ops/readers.py index 8795a206bb1..b8f4c34f40e 100644 --- a/tensorflow/python/data/experimental/ops/readers.py +++ b/tensorflow/python/data/experimental/ops/readers.py @@ -183,24 +183,30 @@ def _get_sorted_col_indices(select_columns, column_names): """Transforms select_columns argument into sorted column indices.""" names_to_indices = {n: i for i, n in enumerate(column_names)} num_cols = len(column_names) - for i, v in enumerate(select_columns): + + results = [] + for v in select_columns: + # If value is already an int, check if it's valid. if isinstance(v, int): if v < 0 or v >= num_cols: raise ValueError( "Column index %d specified in select_columns out of valid range." % v) - continue - if v not in names_to_indices: + results.append(v) + # Otherwise, check that it's a valid column name and convert to the + # the relevant column index. + elif v not in names_to_indices: raise ValueError( "Value '%s' specified in select_columns not a valid column index or " "name." % v) - select_columns[i] = names_to_indices[v] + else: + results.append(names_to_indices[v]) # Sort and ensure there are no duplicates - result = sorted(set(select_columns)) - if len(result) != len(select_columns): + results = sorted(set(results)) + if len(results) != len(select_columns): raise ValueError("select_columns contains duplicate columns") - return result + return results def _maybe_shuffle_and_repeat( diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py index eac1c674b2d..217c586caef 100644 --- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py +++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py @@ -215,6 +215,21 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): val = next(iterator).numpy() self.assertEqual(i, val) + @combinations.generate(test_base.eager_only_combinations()) + def testMaxOutstandingRequests(self): + num_elements = 10 + num_workers = 3 + service = self.create_cluster(num_workers) + ds = dataset_ops.Dataset.range(num_elements) + ds = ds.apply( + data_service_ops._distribute( + "parallel_epochs", + service, + max_outstanding_requests=1, + task_refresh_interval_hint_ms=20)) + self.assertCountEqual(num_workers * list(range(num_elements)), + self.getDatasetOutput(ds)) + @combinations.generate(test_base.eager_only_combinations()) def testInsideFunction(self): num_workers = 3 diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 5dccb47fb19..a7e62a2dc7c 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -452,6 +452,7 @@ cuda_py_test( "//tensorflow/python:array_ops", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", "//tensorflow/python:math_ops", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/eager:context", @@ -1647,3 +1648,25 @@ py_test( "@absl_py//absl/testing:parameterized", ], ) + +cuda_py_test( + name = "strategy_common_test", + srcs = ["strategy_common_test.py"], + tags = [ + "multi_and_single_gpu", + # TODO(b/155301154): Enable this test on multi-gpu guitar once multi process + # runner can run on guitar. + "noguitar", + ], + xla_enable_strict_auto_jit = True, + deps = [ + ":combinations", + ":reduce_util", + ":strategy_combinations", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python/eager:def_function", + "@absl_py//absl/testing:parameterized", + ], +) diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py index 0a662908323..8c8970f4aeb 100644 --- a/tensorflow/python/distribute/cross_device_ops.py +++ b/tensorflow/python/distribute/cross_device_ops.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import collections +import threading import enum import six @@ -31,7 +32,7 @@ from tensorflow.python.distribute import reduce_util from tensorflow.python.distribute import tpu_values from tensorflow.python.distribute import values as value_lib from tensorflow.python.eager import context -from tensorflow.python.eager import def_function +from tensorflow.python.eager import executor from tensorflow.python.framework import kernels from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -948,6 +949,20 @@ class CollectiveAllReduce(CrossDeviceOps): self._collective_keys = (collective_keys or cross_device_utils.CollectiveKeys()) self._communication = communication + # In a multi threaded eager program we need to ensure different groups of + # collectives don't interleave each other, otherwise there will be deadlock. + self._lock = threading.Lock() + + # Collective ops requires all devices to participate and is blocking. In + # eager, we need one async executor for each device to be able to launch + # them altogether. Note that async doesn't imply concurrency. Within an + # async executor operations are still executed sequentially. In graph or + # function building, the executors are not used. + self._executors = [] + for _ in range(self._num_gpus_per_worker or 1): + # If num_gpus_per_worker is zero, we assume there's only one device (CPU). + self._executors.append(executor.new_executor(enable_async=True)) + super(CollectiveAllReduce, self).__init__() @property @@ -1059,33 +1074,26 @@ class CollectiveAllReduce(CrossDeviceOps): "num_workers = %d, communication_hint = %s, num_packs = %d" % (batch_size, self._num_workers, communication, len(packs)), 10) - def batch_fn(): - """Wrapper function around batched all-reduce calls.""" - reduced_values = [] - for pack in packs: - # By placing all CollectiveReduce ops in a pack under single name scope, - # we ensure they will be picked up by the `ScopedAllocator` grappler - # optimizer and packed into a single all-reduce. - with ops.name_scope("allreduce"): - for per_replica in pack: - # Add control dependencies per device from the last gradients to the - # current set, in order to serialize NCCL launches. - if (communication == CollectiveCommunication.NCCL.value and - reduced_values): - control_inputs = [g for g in reduced_values[-1]] - else: - control_inputs = None - reduced_values.append( - cross_device_utils.build_collective_reduce( - per_replica.values, self._num_workers, - self._collective_keys, "Add", "Id", communication, - control_inputs)) - return reduced_values + reduced_values = [] + for pack in packs: + # By placing all CollectiveReduce ops in a pack under single name scope, + # we ensure they will be picked up by the `ScopedAllocator` grappler + # optimizer and packed into a single all-reduce. + with self._lock, ops.name_scope("allreduce"): + for per_replica in pack: + # Add control dependencies per device from the last gradients to the + # current set, in order to serialize NCCL launches. + if (communication == CollectiveCommunication.NCCL.value and + reduced_values): + control_inputs = list(reduced_values[-1]) + else: + control_inputs = None + reduced_values.append( + cross_device_utils.build_collective_reduce( + per_replica.values, self._num_workers, + self._collective_keys, "Add", "Id", communication, + control_inputs, executors=self._executors)) - if context.executing_eagerly(): - batch_fn = def_function.function(batch_fn) - - reduced_values = batch_fn() mirrored = [] # Reverse the order of reduced value to recover the order in the input. for value in reversed(reduced_values): @@ -1134,6 +1142,12 @@ class CollectiveAllReduce(CrossDeviceOps): mirrored.append(value_lib.regroup(value, wrap_class=value_lib.Mirrored)) return mirrored + def __deepcopy__(self, memo): + # distribute_coordinator deep-copies the strategy object, so + # CollectiveAllReduce needs to support deep copy as well. + return CollectiveAllReduce(self._num_workers, self._num_gpus_per_worker, + self._collective_keys, self._communication) + def choose_the_best(devices, session_config=None): """Find the best CrossDeviceOps locally given a `tf.compat.v1.ConfigProto`. diff --git a/tensorflow/python/distribute/cross_device_ops_test.py b/tensorflow/python/distribute/cross_device_ops_test.py index e1aa2bea97c..09de4306199 100644 --- a/tensorflow/python/distribute/cross_device_ops_test.py +++ b/tensorflow/python/distribute/cross_device_ops_test.py @@ -19,6 +19,9 @@ from __future__ import division from __future__ import print_function import itertools +import os +import threading +import time from absl.testing import parameterized import numpy as np @@ -39,6 +42,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import kernels from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import collective_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables @@ -835,6 +839,64 @@ class CollectiveAllReduceTest(multi_worker_test_base.MultiWorkerTestBase, variable_length=variable_length, local_mode=True) + @combinations.generate( + combinations.combine( + required_gpus=2, + mode="eager", + communication=[ + CollectiveCommunication.NCCL, CollectiveCommunication.RING + ])) + def testEagerMultiThread(self, communication): + collective, devices, _ = self._get_test_objects( + None, + None, + num_gpus=2, + communication=communication, + use_strategy_object=False, + local_mode=True) + + # We would like to simulate the following sequence: + # thread-0 device0 device1 + # thread-1 device0 device1 + # If the kernel launch sequence is as-is the program will deadlock since + # NCCL requires the launch order to be same on each device. + v0 = _make_per_replica([1.0 for _ in devices], devices) + v1 = _make_per_replica([2.0 for _ in devices], devices) + + # Add a delay to collective_ops.all_reduce according to the input tensors + # index in `sequence.` + sequence = [v0.values[0], v1.values[0], v1.values[1], v0.values[1]] + all_reduce = collective_ops.all_reduce + + def delayed_all_reduce(input_tensor, *args, **kwargs): + for idx, v in enumerate(sequence): + if input_tensor is v: + time.sleep(idx) + break + return all_reduce(input_tensor, *args, **kwargs) + + with test.mock.patch.object(collective_ops, "all_reduce", + delayed_all_reduce): + # We only use NCCL for batch reduce with two or more values, so we use two + # values here. + + def thread_fn(): + reduced = collective.batch_reduce(reduce_util.ReduceOp.SUM, [(v0, v0), + (v0, v0)]) + self.assertAllEqual(reduced[0].values, [2.0, 2.0]) + self.assertAllEqual(reduced[1].values, [2.0, 2.0]) + + t = threading.Thread(target=thread_fn) + t.start() + reduced = collective.batch_reduce(reduce_util.ReduceOp.SUM, [(v1, v1), + (v1, v1)]) + self.assertAllEqual(reduced[0].values, [4.0, 4.0]) + self.assertAllEqual(reduced[1].values, [4.0, 4.0]) + t.join() + if __name__ == "__main__": + # Set default inter op thread pool size to one to ensure we don't exhaust the + # thread pool with the additional executors to run collectives in eager. + os.environ["TF_NUM_INTEROP_THREADS"] = "1" test.main() diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py index f9917385b59..d7be93ae2c4 100644 --- a/tensorflow/python/distribute/cross_device_utils.py +++ b/tensorflow/python/distribute/cross_device_utils.py @@ -337,10 +337,12 @@ def build_collective_reduce(input_tensors, reduction_op='Add', unary_op='Id', communication_hint='AUTO', - control_inputs=None): + control_inputs=None, + executors=None): """Build a subgraph that does one full all-reduce, using the collective Op. - This method must be called in graph mode or inside a tf.function. + If called in eager mode, it's required to supply a list of async executors for + each input Tensor. Args: input_tensors: tensors within a single worker graph that are to be reduced @@ -355,6 +357,7 @@ def build_collective_reduce(input_tensors, implementation. control_inputs: if not None, add control edges between control_inputs and (index-wise) corresponding collective_reduce tensors + executors: a list of async executor. Required for eager execution. Returns: An array of final tensors, one per device, computed by the full reduction. @@ -362,9 +365,11 @@ def build_collective_reduce(input_tensors, Raises: ValueError: There must be at least two tensors over all the workers. """ - assert not context.executing_eagerly(), ( - 'build_collective_reduce can only be called in graph mode or inside ' - 'tf.function') + if context.executing_eagerly(): + if (not executors or len(executors) != len(input_tensors) or + not all(e.is_async() for e in executors)): + raise ValueError( + 'collectives requires async executors for each device in eager mode') group_size = len(input_tensors) * num_workers if group_size < 2: @@ -375,15 +380,19 @@ def build_collective_reduce(input_tensors, out_tensors = [] for idx, input_tensor in enumerate(input_tensors): - with ops.device(input_tensor.device): - with ops.control_dependencies( - _control_input(input_tensors, control_inputs, idx)): - out_tensor = collective_ops.all_reduce(input_tensor, group_size, - group_key, instance_key, - reduction_op, unary_op, - subdiv_offsets, - communication_hint) - out_tensors.append(out_tensor) + if context.executing_eagerly(): + executor_scope = context.executor_scope(executors[idx]) + else: + executor_scope = ops.NullContextmanager() + with executor_scope, \ + ops.device(input_tensor.device), \ + ops.control_dependencies( + _control_input(input_tensors, control_inputs, idx)): + out_tensor = collective_ops.all_reduce(input_tensor, group_size, + group_key, instance_key, + reduction_op, unary_op, + subdiv_offsets, communication_hint) + out_tensors.append(out_tensor) return out_tensors diff --git a/tensorflow/python/distribute/custom_training_loop_models_test.py b/tensorflow/python/distribute/custom_training_loop_models_test.py index 3c748bd7364..48f2af0349a 100644 --- a/tensorflow/python/distribute/custom_training_loop_models_test.py +++ b/tensorflow/python/distribute/custom_training_loop_models_test.py @@ -378,6 +378,46 @@ class KerasModelsTest(test.TestCase, parameterized.TestCase): for model_v, model2_v in zip(model.variables, model2.variables): self.assertAllClose(model_v.numpy(), model2_v.numpy()) + @combinations.generate( + combinations.combine( + distribution=strategy_combinations.all_strategies, mode=["eager"])) + def test_nested_tf_functions_with_control_flow(self, distribution): + inputs = np.random.random((10, 3)).astype(np.float32) + targets = np.ones((10, 4), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)).repeat() + dataset = dataset.batch(10, drop_remainder=True) + input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) + + def get_model(): + x = keras.layers.Input(shape=(3,), name="input") + y = keras.layers.Dense(4, name="dense")(x) + model = keras.Model(x, y) + return model + + with distribution.scope(): + model = get_model() + optimizer = keras.optimizer_v2.gradient_descent.SGD(0.1, momentum=0.01) + + @def_function.function + def train_step(iterator): + + def step_fn(inputs): + images, targets = inputs + with backprop.GradientTape() as tape: + outputs = model(images) + loss = math_ops.reduce_sum(outputs - targets) + grads = tape.gradient(loss, model.variables) + optimizer.apply_gradients(zip(grads, model.variables)) + + distribution.run(step_fn, args=(next(iterator),)) + + @def_function.function + def train_steps(iterator): + for _ in math_ops.range(10): + train_step(iterator) + + train_steps(input_iterator) + @combinations.generate( combinations.combine( distribution=strategy_combinations.all_strategies, diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index d17a594cb5e..6baa15f59c1 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -1912,9 +1912,8 @@ class StrategyExtendedV2(object): def _reduce(self, reduce_op, value): # Default implementation until we have an implementation for each strategy. - return self._local_results( - self.reduce_to(reduce_op, value, - device_util.current() or "/device:CPU:0"))[0] + dst = device_util.current() or self._default_device or "/device:CPU:0" + return self._local_results(self.reduce_to(reduce_op, value, dst))[0] def reduce_to(self, reduce_op, value, destinations, experimental_hints=None): """Combine (via e.g. sum or mean) values across replicas. diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index 47c3a744419..1413777d0bc 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -160,7 +160,7 @@ class MultiProcessRunnerTest(test.TestCase): for i in range(0, 10): print( 'index {}, iteration {}'.format(self._worker_idx(), i), flush=True) - time.sleep(1) + time.sleep(5) mpr = multi_process_runner.MultiProcessRunner( proc_func, diff --git a/tensorflow/python/distribute/multi_worker_continuous_run_test.py b/tensorflow/python/distribute/multi_worker_continuous_run_test.py index 90484a12423..437255c1015 100644 --- a/tensorflow/python/distribute/multi_worker_continuous_run_test.py +++ b/tensorflow/python/distribute/multi_worker_continuous_run_test.py @@ -33,10 +33,13 @@ from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.framework import config +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import variable_scope + NUM_WORKERS = 5 @@ -84,9 +87,10 @@ class MultiWorkerContinuousRunTest(test.TestCase, parameterized.TestCase): for _ in range(20): worker_step_fn(worker_id) - multi_process_runner.run( - worker_fn, - cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS)) + with test_util.skip_if_error(self, errors_impl.UnavailableError): + multi_process_runner.run( + worker_fn, + cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS)) @combinations.generate(combinations.combine(mode=['eager'])) def testVariableInitializationWithChangingShape(self, mode): @@ -116,9 +120,10 @@ class MultiWorkerContinuousRunTest(test.TestCase, parameterized.TestCase): for i in range(20): worker_step_fn(worker_id, num_dims=(i + 1)) - multi_process_runner.run( - worker_fn, - cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS)) + with test_util.skip_if_error(self, errors_impl.UnavailableError): + multi_process_runner.run( + worker_fn, + cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS)) if __name__ == '__main__': diff --git a/tensorflow/python/distribute/multi_worker_test_base.py b/tensorflow/python/distribute/multi_worker_test_base.py index a80ca831e4b..408cad2ca0a 100644 --- a/tensorflow/python/distribute/multi_worker_test_base.py +++ b/tensorflow/python/distribute/multi_worker_test_base.py @@ -50,6 +50,7 @@ from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import coordinator from tensorflow.python.training import server_lib +from tensorflow.python.util import deprecation from tensorflow.python.util import nest from tensorflow.python.util.compat import collections_abc @@ -559,6 +560,10 @@ class MultiWorkerMultiProcessTest(test.TestCase): return subprocess.Popen( cmd_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + @deprecation.deprecated( + None, '`run_multiple_tasks_in_processes` is deprecated; any new test ' + 'requiring multiple processes should use `multi_process_runner` for ' + 'better support of log printing, streaming, and more functionality.') def run_multiple_tasks_in_processes(self, cmd_args, cluster_spec): """Run `cmd_args` in a process for each task in `cluster_spec`.""" processes = {} @@ -570,6 +575,10 @@ class MultiWorkerMultiProcessTest(test.TestCase): processes[task_type].append(p) return processes + @deprecation.deprecated( + None, '`join_independent_workers` is deprecated; any new test ' + 'requiring multiple processes should use `multi_process_runner` for ' + 'better support of log printing, streaming, and more functionality.') def join_independent_workers(self, worker_processes): return_codes = [] for p in nest.flatten(worker_processes): @@ -585,6 +594,10 @@ class MultiWorkerMultiProcessTest(test.TestCase): for return_code in return_codes: self.assertEqual(return_code, 0) + @deprecation.deprecated( + None, '`stream_stderr` is deprecated; any new test ' + 'requiring multiple processes should use `multi_process_runner` for ' + 'better support of log printing, streaming, and more functionality.') def stream_stderr(self, processes, print_only_first=False): """Consume stderr of all processes and print to stdout. diff --git a/tensorflow/python/distribute/parallel_device/BUILD b/tensorflow/python/distribute/parallel_device/BUILD index e7526a56f66..930816d4407 100644 --- a/tensorflow/python/distribute/parallel_device/BUILD +++ b/tensorflow/python/distribute/parallel_device/BUILD @@ -1,4 +1,8 @@ +load("//tensorflow:tensorflow.bzl", "tf_custom_op_library", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") + package( + default_visibility = ["//tensorflow:internal"], licenses = ["notice"], # Apache 2.0 ) @@ -13,6 +17,7 @@ py_library( srcs = ["parallel_device.py"], srcs_version = "PY2AND3", deps = [ + ":parallel_device_ops", ":saving", "//tensorflow/python:_pywrap_parallel_device", ], @@ -25,6 +30,25 @@ py_library( deps = ["//tensorflow/python:framework_ops"], ) +tf_gen_op_wrapper_py( + name = "parallel_device_ops_py", + out = "gen_parallel_device_ops.py", + deps = ["//tensorflow/c/eager/parallel_device:parallel_device_ops"], +) + +tf_custom_op_library( + name = "_parallel_device_ops.so", + srcs = ["//tensorflow/c/eager/parallel_device:parallel_device_ops_srcs"], +) + +tf_custom_op_py_library( + name = "parallel_device_ops", + dso = [":_parallel_device_ops.so"], + kernels = ["//tensorflow/c/eager/parallel_device:parallel_device_ops"], + visibility = ["//tensorflow:internal"], + deps = [":parallel_device_ops_py"], +) + py_test( name = "parallel_device_test", srcs = ["parallel_device_test.py"], diff --git a/tensorflow/python/distribute/parallel_device/parallel_device.py b/tensorflow/python/distribute/parallel_device/parallel_device.py index 982b061cdb7..2dbdc653a64 100644 --- a/tensorflow/python/distribute/parallel_device/parallel_device.py +++ b/tensorflow/python/distribute/parallel_device/parallel_device.py @@ -22,11 +22,17 @@ import contextlib import threading from tensorflow.python import _pywrap_parallel_device +from tensorflow.python.distribute.parallel_device import gen_parallel_device_ops from tensorflow.python.distribute.parallel_device import saving from tensorflow.python.eager import context +from tensorflow.python.framework import load_library from tensorflow.python.framework import ops +from tensorflow.python.platform import resource_loader from tensorflow.python.tpu.ops import tpu_ops +load_library.load_op_library( + resource_loader.get_path_to_datafile("_parallel_device_ops.so")) + _next_device_number = 0 _next_device_number_lock = threading.Lock() @@ -58,6 +64,8 @@ class ParallelDevice(object): device, device_info = _pywrap_parallel_device.GetParallelDeviceCapsules( self.name, self.components) context.register_custom_device(device, self.name, device_info) + with ops.device(self.name): + self._device_ids = gen_parallel_device_ops.device_id() def pack(self, tensors): """Create a tensor on the parallel device from a sequence of tensors. @@ -84,6 +92,18 @@ class ParallelDevice(object): return tpu_ops.tpu_replicated_output( parallel_tensor, num_replicas=len(self.components)) + @property + def device_ids(self): + """A parallel tensor with scalar integers numbering component devices. + + Each device ID is placed on its corresponding device, in the same order as + the `components` constructor argument. + + Returns: + A parallel tensor containing 0 on the first device, 1 on the second, etc. + """ + return self._device_ids + # TODO(allenl): Fixing saving in Python is a bit odd. One alternative would be # to provide a hook for the custom device to create save specs/etc., then call # that hook from the default variable implementation if the variable is on a diff --git a/tensorflow/python/distribute/parallel_device/parallel_device_test.py b/tensorflow/python/distribute/parallel_device/parallel_device_test.py index d3f3417eca9..e35eb601cc5 100644 --- a/tensorflow/python/distribute/parallel_device/parallel_device_test.py +++ b/tensorflow/python/distribute/parallel_device/parallel_device_test.py @@ -119,6 +119,12 @@ class ParallelDeviceTests(_VirtualDeviceTestCase): self.assertIn(self.device.components[0], outputs[0].backing_device) self.assertIn(self.device.components[1], outputs[1].backing_device) + def test_device_id(self): + device_ids = self.device.unpack(self.device.device_ids) + self.assertAllClose([0, 1], device_ids) + self.assertIn(self.device.components[0], device_ids[0].backing_device) + self.assertIn(self.device.components[1], device_ids[1].backing_device) + def test_collective_reduce(self): with ops.device(self.device.name): x = self.device.pack( diff --git a/tensorflow/python/distribute/strategy_common_test.py b/tensorflow/python/distribute/strategy_common_test.py new file mode 100644 index 00000000000..c277310b6a0 --- /dev/null +++ b/tensorflow/python/distribute/strategy_common_test.py @@ -0,0 +1,65 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for common methods in strategy classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import reduce_util +from tensorflow.python.distribute import strategy_combinations +from tensorflow.python.eager import def_function +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class StrategyReduceTest(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.combine( + strategy=[strategy_combinations.multi_worker_mirrored_two_workers] + + strategy_combinations.strategies_minus_tpu, + mode=['eager'])) + def testSimpleReduce(self, strategy): + + def fn_eager(): + + def replica_fn(): + return array_ops.ones((), dtypes.float32) + + per_replica_value = strategy.run(replica_fn) + return strategy.reduce( + reduce_util.ReduceOp.SUM, value=per_replica_value, axis=None) + + fn_graph = def_function.function(fn_eager) + + # Run reduce under the strategy scope to explicitly enter + # strategy default_device scope. + with strategy.scope(): + self.assertEqual(fn_eager().numpy(), 1.0 * strategy.num_replicas_in_sync) + self.assertEqual(fn_graph().numpy(), 1.0 * strategy.num_replicas_in_sync) + + # Run reduce without a strategy scope to implicitly enter + # strategy default_device scope. + self.assertEqual(fn_eager().numpy(), 1.0 * strategy.num_replicas_in_sync) + self.assertEqual(fn_graph().numpy(), 1.0 * strategy.num_replicas_in_sync) + + +if __name__ == '__main__': + combinations.main() diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index 82a4a803510..b574c523ccd 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -96,35 +96,34 @@ def validate_run_function(fn): @tf_export("distribute.experimental.TPUStrategy", v1=[]) class TPUStrategy(distribute_lib.Strategy): - """TPU distribution strategy implementation.""" + """TPU distribution strategy implementation. + + To construct a TPUStrategy object, you need to run the + initialization code as below: + + >>> resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='') + >>> tf.config.experimental_connect_to_cluster(resolver) + >>> tf.tpu.experimental.initialize_tpu_system(resolver) + >>> strategy = tf.distribute.experimental.TPUStrategy(resolver) + + While using distribution strategies, the variables created within strategy's + scope will be replicated across all the replicas and can be kept in sync + using all-reduce algorithms. + + To run TF2 programs on TPUs, you can either use `.compile` and + `.fit` APIs in `tf.keras` with TPUStrategy, or write your own customized + training loop by calling `strategy.run` directly. Note that + TPUStrategy doesn't support pure eager execution, so please make sure the + function passed into `strategy.run` is a `tf.function` or + `strategy.run` is called inside a `tf.function` if eager + behavior is enabled. + """ def __init__(self, tpu_cluster_resolver=None, device_assignment=None): """Synchronous training in TPU donuts or Pods. - To construct a TPUStrategy object, you need to run the - initialization code as below: - - ```python - resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu) - tf.config.experimental_connect_to_cluster(resolver) - tf.tpu.experimental.initialize_tpu_system(resolver) - strategy = tf.distribute.experimental.TPUStrategy(resolver) - ``` - - While using distribution strategies, the variables created within strategy's - scope will be replicated across all the replicas and can be kept in sync - using all-reduce algorithms. - - To run TF2 programs on TPUs, you can either use `.compile` and - `.fit` APIs in `tf.keras` with TPUStrategy, or write your own customized - training loop by calling `strategy.run` directly. Note that - TPUStrategy doesn't support pure eager execution, so please make sure the - function passed into `strategy.run` is a `tf.function` or - `strategy.run` is called inside a `tf.function` if eager - behavior is enabled. - Args: tpu_cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver, which provides information about the TPU cluster. @@ -140,10 +139,6 @@ class TPUStrategy(distribute_lib.Strategy): distribute_lib.distribution_strategy_replica_gauge.get_cell( "num_replicas_per_worker").set(self.extended.num_replicas_per_host) - # TODO(b/155193424): Enable OwnedMultiDeviceIterator on TPU Pod. - if self.extended.num_hosts > 1: - self._enable_legacy_iterators = True - # TODO(cjfj): Modify `_call_for_each_replica` in `TPUExtended` such that this # can use the default implementation. # This implementation runs a single step. It does not use infeed or outfeed. @@ -213,26 +208,26 @@ class TPUStrategyV1(distribute_lib.StrategyV1): Users can pass strategy specific options to `options` argument. An example to enable bucketizing dynamic shapes in `TPUStrategy.run` is: - ```python - resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='') - tf.config.experimental_connect_to_cluster(resolver) - tf.tpu.experimental.initialize_tpu_system(resolver) - strategy = tf.distribute.experimental.TPUStrategy(tpu='') + >>> resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='') + >>> tf.config.experimental_connect_to_cluster(resolver) + >>> tf.tpu.experimental.initialize_tpu_system(resolver) + >>> strategy = tf.distribute.experimental.TPUStrategy(resolver) - options = tf.distribute.RunOptions() - options.experimental_bucketizing_dynamic_shape = True + >>> options = tf.distribute.RunOptions( + ... experimental_bucketizing_dynamic_shape=True) - iterator = iter(inputs) + >>> dataset = tf.data.Dataset.range( + ... strategy.num_replicas_in_sync, output_type=dtypes.float32).batch( + ... strategy.num_replicas_in_sync, drop_remainder=True) + >>> input_iterator = iter(strategy.experimental_distribute_dataset(dataset)) - @tf.function() - def step_fn(inputs): - output = tf.reduce_sum(inputs) - return output + >>> @tf.function() + ... def step_fn(inputs): + ... output = tf.reduce_sum(inputs) + ... return output - strategy.run(step_fn, args=(next(iterator),), - options=options) - ``` + >>> strategy.run(step_fn, args=(next(input_iterator),), options=options) Args: fn: The function to run. The output must be a `tf.nest` of `Tensor`s. diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 4fe3d287ccc..444915aa123 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -38,6 +38,7 @@ from tensorflow.python.ops import variables as variables_lib from tensorflow.python.training.saving import saveable_object from tensorflow.python.training.saving import saveable_object_util from tensorflow.python.training.tracking import base as trackable +from tensorflow.python.types import core from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -422,7 +423,8 @@ class DistributedVarOp(object): return hash((self.name, self.graph, self.traceback, self.type)) -class DistributedVariable(DistributedDelegate, variables_lib.Variable): +class DistributedVariable(DistributedDelegate, variables_lib.Variable, + core.Tensor): """Holds a map from replica to variables.""" # TODO(josh11b): Support changing the set of variables if e.g. if new @@ -741,9 +743,6 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable): pass -ops.register_dense_tensor_like_type(DistributedVariable) - - def _validate_colocate_extended(v, extended): variable_strategy = v._distribute_strategy # pylint: disable=protected-access if variable_strategy.extended is not extended: @@ -1380,7 +1379,7 @@ def value_container(val): return val -class AggregatingVariable(variables_lib.Variable): +class AggregatingVariable(variables_lib.Variable, core.Tensor): """A wrapper around a variable that aggregates updates across replicas.""" def __init__(self, strategy, v, aggregation): @@ -1649,4 +1648,3 @@ def _tensor_conversion_aggregate(var, dtype=None, name=None, as_ref=False): ops.register_tensor_conversion_function(AggregatingVariable, _tensor_conversion_aggregate) -ops.register_dense_tensor_like_type(AggregatingVariable) diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index daa7e5563d3..67ed86b4047 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -56,6 +56,7 @@ from tensorflow.python.saved_model.model_utils import mode_keys from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training import saver as saver_lib from tensorflow.python.training.tracking import util as trackable_utils +from tensorflow.python.types import core from tensorflow.python.util import nest @@ -623,10 +624,10 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase): v = variables_lib.Variable( 0., synchronization=synchronization, aggregation=aggregation) # In cross replica context. - self.assertTrue(ops.is_dense_tensor_like(v)) + self.assertIsInstance(v, core.Tensor) # In replica context. distribution.run( - lambda v: self.assertTrue(ops.is_dense_tensor_like(v)), args=(v,)) + lambda v: self.assertIsInstance(v, core.Tensor), args=(v,)) def testAssignReturnValueIsTensorLike(self, distribution, synchronization, aggregation): @@ -645,9 +646,9 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase): # values is not allowed when aggregation is SUM. See # `cross_device_ops.reduce_non_distributed_value`. delta = array_ops.identity(1.) - self.assertTrue(ops.is_dense_tensor_like(v.assign(delta))) - self.assertTrue(ops.is_dense_tensor_like(v.assign_sub(delta))) - self.assertTrue(ops.is_dense_tensor_like(v.assign_add(delta))) + self.assertIsInstance(v.assign(delta), core.Tensor) + self.assertIsInstance(v.assign_sub(delta), core.Tensor) + self.assertIsInstance(v.assign_add(delta), core.Tensor) # In cross replica context we return a PerReplica which is not Tensor like # yet. diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 9630ce01ce9..227fca5ea6f 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -110,14 +110,12 @@ def run_benchmark(func, num_iters, execution_mode=None): class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): def __init__(self): - # TODO(b/153054118): Add tf.RandomUniform - if not context.is_tfrt_enabled(): - # used for multiply benchmarks - self._m_2 = random_ops.random_uniform([2]) + # used for multiply benchmarks + self._m_2 = random_ops.random_uniform([2]) - # used for matmul benchmarks - self._m_2_by_2 = random_ops.random_uniform((2, 2)) - self._m_100_by_784 = random_ops.random_uniform((100, 784)) + # used for matmul benchmarks + self._m_2_by_2 = random_ops.random_uniform((2, 2)) + self._m_100_by_784 = random_ops.random_uniform((100, 784)) self._num_iters_2_by_2 = 30000 self._num_iters_100_by_784 = 30000 @@ -182,22 +180,18 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func() # Warmup. self._run(func, 3000) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_create_float_constant(self): self._benchmark_create_constant(42.0, dtype=None) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_create_float_constant_uncached(self): self._benchmark_create_constant(42.0, dtype=None, cached=False) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_create_int32_constant(self): if context.num_gpus(): return # int32 constants are always allocated on CPU. self._benchmark_create_constant(42, dtype=dtypes.int32) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_create_int32_constant_uncached(self): if context.num_gpus(): return # int32 constants are always allocated on CPU. @@ -213,21 +207,17 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func() # Warmup. self._run(func, 30000) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_add_float_scalars(self): self._benchmark_add(42.0, 24.0) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_add_int32_scalars(self): self._benchmark_add(42, 24) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_add_float_scalar_tensor(self): tensor_a = constant_op.constant(42.0) tensor_b = constant_op.constant(24.0) self._benchmark_add(tensor_a, tensor_b) - @test_util.disable_tfrt("Scalars are not handled correctly") def benchmark_add_int32_scalar_tensor(self): tensor_a = constant_op.constant(42) tensor_b = constant_op.constant(24) @@ -319,17 +309,16 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func = lambda: math_ops.multiply(m, m) self._run(func, num_iters) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("numpy() not supported") def benchmark_np_multiply(self): self._benchmark_np_multiply(self._m_2, 30000) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_multiply_CPU(self): with context.device(CPU): m = self._m_2.cpu() self._benchmark_tf_multiply(m, 30000) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tf_multiply_GPU(self): if not context.num_gpus(): return @@ -337,13 +326,12 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): m = self._m_2.gpu() self._benchmark_tf_multiply(m, 30000) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_multiply_op_CPU(self): with context.device(CPU): m = self._m_2.cpu() self._benchmark_tf_multiply_op(m, 30000) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tf_multiply_op_GPU(self): if not context.num_gpus(): return @@ -351,7 +339,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): m = self._m_2.gpu() self._benchmark_tf_multiply_op(m, 30000) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_identity(self): m = self._m_2 self._run(lambda: gen_array_ops.identity(m), 30000) @@ -360,7 +347,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): def benchmark_slowpath_tf_identity(self): self._run(lambda: gen_array_ops.identity(1), 30000) - @test_util.disable_tfrt("random ops not supported") def benchmark_tfe_py_execute_identity(self): m = self._m_2 ctx_handle = context.context()._handle @@ -498,19 +484,17 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(m.value, num_iters) # Benchmarks for A^2, A of dimension 2 by 2. - @test_util.disable_tfrt("random ops not supported") def benchmark_np_matmul_2_by_2(self): self._benchmark_np_matmul( self._m_2_by_2, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_tf_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("async not supported") def benchmark_tf_matmul_2_by_2_CPU_async(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -520,35 +504,32 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC) - @test_util.disable_tfrt("random ops not supported") def benchmark_gen_math_ops_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_gen_math_ops_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_tfe_py_fastpath_execute_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_tfe_py_fastpath_execute_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_tfe_py_execute_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_tfe_py_execute_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("Mutex corrupt: waiting writer with no waiters") def benchmark_defun_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("async not supported") def benchmark_defun_matmul_2_by_2_CPU_async(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -558,14 +539,14 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("Mutex corrupt: waiting writer with no waiters") def benchmark_defun_matmul_forward_backward_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul_forward_backward( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("async not supported") def benchmark_defun_matmul_forward_backward_2_by_2_CPU_async(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -575,7 +556,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tf_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -584,7 +565,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_tf_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("async not supported") def benchmark_tf_matmul_2_by_2_GPU_async(self): if not context.num_gpus(): return @@ -596,7 +577,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_gen_math_ops_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -605,7 +586,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_gen_math_ops_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tfe_py_execute_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -614,7 +595,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_tfe_py_execute_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_defun_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -623,7 +604,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("async not supported") def benchmark_defun_matmul_2_by_2_GPU_async(self): if not context.num_gpus(): return @@ -635,28 +616,26 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("function not supported") def benchmark_nested_defun_matmul_2_by_2(self): m = self._m_2_by_2.cpu() self._benchmark_nested_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) # Benchmarks for AA.T, A of dimension 100 by 784. - @test_util.disable_tfrt("random ops not supported") def benchmark_np_matmul_100_by_784(self): self._benchmark_np_matmul( self._m_100_by_784, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_tf_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("async not supported") def benchmark_tf_matmul_100_by_784_CPU_async(self): with context.device(CPU): m = self._m_100_by_784.cpu() @@ -666,35 +645,33 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_100_by_784, execution_mode=context.ASYNC) - @test_util.disable_tfrt("random ops not supported") def benchmark_gen_math_ops_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_gen_math_ops_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") def benchmark_tfe_py_fastpath_execute_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_tfe_py_fastpath_execute_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tfe_py_execute_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_tfe_py_execute_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("function not supported") def benchmark_defun_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tf_matmul_100_by_784_GPU(self): if not context.num_gpus(): return @@ -703,7 +680,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_tf_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("async not supported") def benchmark_tf_matmul_100_by_784_GPU_async(self): if not context.num_gpus(): return @@ -715,7 +692,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_100_by_784, execution_mode=context.ASYNC) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_gen_math_ops_matmul_100_by_784_GPU(self): if not context.num_gpus(): return @@ -724,7 +701,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_gen_math_ops_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tfe_py_execute_matmul_100_by_784_GPU(self): if not context.num_gpus(): return @@ -733,7 +710,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_tfe_py_execute_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_defun_matmul_100_by_784_GPU(self): if not context.num_gpus(): return @@ -742,7 +719,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_nested_defun_matmul_100_by_784(self): m = self._m_100_by_784.gpu() self._benchmark_nested_defun_matmul( @@ -815,35 +792,35 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func() self._run(func, 3000) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_in_defun_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_in_defun_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_in_defun_of_defun_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_in_defun_of_defun_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_of_defun_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_of_defun_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_matmul_CPU(shape=(100, 784)) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_in_defun_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_in_defun_matmul_CPU(shape=(100, 784)) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_in_defun_of_defun_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_in_defun_of_defun_matmul_CPU(shape=(100, 784)) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("defun not supported") def benchmark_forwardprop_of_defun_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_of_defun_matmul_CPU(shape=(100, 784)) @@ -988,25 +965,20 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func = lambda: array_ops.zeros_like(m) self._run(func, 3000) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_zeros_like_CPU(self): self._benchmark_tf_zeros_like(self._m_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_zeros_like_GPU(self): self._benchmark_tf_zeros_like(self._m_2_by_2, device=GPU) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_zeros_like_variable_CPU(self): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_tf_zeros_like(m) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_zeros_like_variable_GPU(self): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_tf_zeros_like(m, device=GPU) - @test_util.disable_tfrt("random ops not supported") def _benchmark_tf_random_uniform_2_by_2(self, shape=(2, 2), dtype=dtypes.int32, @@ -1018,30 +990,24 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(func, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_random_uniform_2_by_2_integer_CPU(self): self._benchmark_tf_random_uniform_2_by_2() - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_random_uniform_2_by_2_integer_GPU(self): self._benchmark_tf_random_uniform_2_by_2(device=GPU) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_random_uniform_2_by_2_float_CPU(self): self._benchmark_tf_random_uniform_2_by_2(dtype=dtypes.float32) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_random_uniform_2_by_2_float_GPU(self): self._benchmark_tf_random_uniform_2_by_2( dtype=dtypes.float32, device=GPU) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_random_uniform_2_by_2_default_setting_CPU(self): with context.device(CPU): func = lambda: random_ops.random_uniform((2, 2)) self._run(func, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_random_uniform_2_by_2_default_setting_GPU(self): with context.device(GPU): func = lambda: random_ops.random_uniform((2, 2)) @@ -1063,19 +1029,15 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(func, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_dropout_scalar_rate_2_by_2_CPU(self): self._benchmark_tf_dropout_2_by_2(is_rate_tensor=False) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_dropout_scalar_rate_2_by_2_GPU(self): self._benchmark_tf_dropout_2_by_2(is_rate_tensor=False, device=GPU) - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_dropout_2_by_2_CPU(self): self._benchmark_tf_dropout_2_by_2() - @test_util.disable_tfrt("random ops not supported") def benchmark_tf_dropout_2_by_2_GPU(self): self._benchmark_tf_dropout_2_by_2(device=GPU) @@ -1088,25 +1050,25 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func = lambda: array_ops.transpose(m, perm, conjugate) self._run(func, num_iters, execution_mode=execution_mode) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("ConvertToEagerTensorUncached error") def benchmark_tf_transpose_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_tf_transpose_2_by_2_GPU(self): with context.device(GPU): m = self._m_2_by_2.gpu() self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("ConvertToEagerTensorUncached error") def benchmark_tf_transpose_variable_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("Cannot convert array to EagerTensor of dtype int32") def benchmark_tf_transpose_variable_2_by_2_GPU(self): with context.device(GPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) @@ -1164,26 +1126,23 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): return defined(t1=t, t2=t, t3=t, t4=t, t5=t, t6=t, t7=t, t8=t) self._run(signature_computation, 30000) - @test_util.disable_tfrt("random ops not supported") def benchmark_matmul_read_variable_op_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_matmul_read_variable(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_matmul_read_variable_op_with_tape_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_matmul_read_variable_with_tape( m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_read_variable_op_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_read_variable(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_read_variable_op_2_by_2_GPU(self): if not context.num_gpus(): return @@ -1191,14 +1150,13 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): m = resource_variable_ops.ResourceVariable(self._m_2_by_2.gpu()) self._benchmark_read_variable(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") def benchmark_read_variable_op_with_tape_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_read_variable_with_tape( m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("random ops not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_read_variable_op_with_tape_2_by_2_GPU(self): if not context.num_gpus(): return @@ -1228,7 +1186,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(scan, 100) - @test_util.disable_tfrt("add not supported, only add_v2") def benchmark_fastpath_conversion_type_inference(self): c = constant_op.constant(1., dtype=dtypes.float32) @@ -1268,7 +1225,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): xs = [[[np.linspace(0, 1, 21).tolist()] * 20] * 20] self._run(lambda: constant_op.constant(xs, dtype=dtypes.float64), 10000) - @test_util.disable_tfrt("tf.fill not supported") def benchmark_list_of_zeros_to_np_array(self): values = [] for _ in range(1000): @@ -1286,11 +1242,11 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): resources.append(resource_variable_ops.ResourceVariable(self._m_2)) self._run(lambda: add_all(resources), num_iters) - @test_util.disable_tfrt("Random uniform needs fallback") + @test_util.disable_tfrt("funtion not supported") def benchmarkFunctionWithFiveResourceInputs(self): self._benchmarkFunctionWithResourceInputs(5, 1000) - @test_util.disable_tfrt("Random uniform needs fallback") + @test_util.disable_tfrt("funtion not supported") def benchmarkFunctionWithFiveHundredResourceInputs(self): self._benchmarkFunctionWithResourceInputs(500, 100) @@ -1325,15 +1281,15 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): with context.device(CPU): self._run(benchmark_fn, 10) - @test_util.disable_tfrt("VarHandleOp needs fallback") + @test_util.disable_tfrt("funtion not supported") def benchmarkTenThousandResourceReadsInCondInInnerFunc(self): self._benchmarkResourceReadsInCondInInnerFunc(10000) - @test_util.disable_tfrt("VarHandleOp needs fallback") + @test_util.disable_tfrt("funtion not supported") def benchmarkHundredResourceReadsInCondInInnerFunc(self): self._benchmarkResourceReadsInCondInInnerFunc(100) - @test_util.disable_tfrt("VarHandleOp needs fallback") + @test_util.disable_tfrt("funtion not supported") def benchmarkTenResourceReadsInCondInInnerFunc(self): self._benchmarkResourceReadsInCondInInnerFunc(10) diff --git a/tensorflow/python/eager/benchmarks_test_base.py b/tensorflow/python/eager/benchmarks_test_base.py index 552d844c32d..3d81d08ccbf 100644 --- a/tensorflow/python/eager/benchmarks_test_base.py +++ b/tensorflow/python/eager/benchmarks_test_base.py @@ -32,4 +32,6 @@ class MicroBenchmarksBase(test.Benchmark): "examples_per_sec": float("{0:.3f}".format(num_iters / total_time)), "us_per_example": float("{0:.3f}".format(total_time * 1e6 / num_iters)) } - self.report_benchmark(iters=num_iters, wall_time=mean_us, extras=extras) + benchmark_name = self._get_benchmark_name() + self.report_benchmark( + iters=num_iters, wall_time=mean_us, extras=extras, name=benchmark_name) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 182b8478420..86b3d5cf95f 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -1509,9 +1509,11 @@ class Context(object): return self.config.allow_soft_placement @soft_device_placement.setter - def soft_device_placement(self, enabled): - self._soft_device_placement = enabled + def soft_device_placement(self, enable): + if self._context_handle is not None: + pywrap_tfe.TFE_ContextSetSoftDevicePlacement(self._handle, enable) + self._soft_device_placement = enable self._thread_local_data.function_call_options = None @property @@ -1519,15 +1521,11 @@ class Context(object): return self.config.log_device_placement @log_device_placement.setter - def log_device_placement(self, enabled): - if self._log_device_placement == enabled: - return - + def log_device_placement(self, enable): if self._context_handle is not None: - raise RuntimeError( - "Device placement logging must be set at program startup") + pywrap_tfe.TFE_ContextSetLogDevicePlacement(self._handle, enable) - self._log_device_placement = enabled + self._log_device_placement = enable self._thread_local_data.function_call_options = None @property diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 47b3966827f..c1401fc56ee 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -1112,5 +1112,4 @@ class EagerTensorCacheTest(test_util.TensorFlowTestCase): if __name__ == '__main__': - context.set_log_device_placement(True) test.main() diff --git a/tensorflow/python/eager/gradient_input_output_exclusions.py b/tensorflow/python/eager/gradient_input_output_exclusions.py index 983f10551ba..94962bf6135 100644 --- a/tensorflow/python/eager/gradient_input_output_exclusions.py +++ b/tensorflow/python/eager/gradient_input_output_exclusions.py @@ -36,6 +36,7 @@ from tensorflow.python.autograph.pyct import qual_names from tensorflow.python.autograph.pyct import transformer from tensorflow.python.autograph.pyct.static_analysis import activity from tensorflow.python.autograph.pyct.static_analysis import liveness +from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs from tensorflow.python.framework import op_def_registry from tensorflow.python.framework import ops @@ -208,6 +209,7 @@ def _live_tensors(f, attr_name="inputs"): graphs = cfg.build(node) node = qual_names.resolve(node) node = activity.resolve(node, ctx, None) + node = reaching_fndefs.resolve(node, ctx, graphs) node = liveness.resolve(node, ctx, graphs) op_arg_name = anno.getanno(node.args.args[0], anno.Basic.QN) diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index d67cdf9cc06..786c26c009a 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -55,8 +55,6 @@ py_library( py_library( name = "feature_column_v2", srcs = [ - "dense_features.py", - "dense_features_v2.py", "feature_column_v2.py", "sequence_feature_column.py", "serialization.py", @@ -126,15 +124,6 @@ tf_py_test( ], ) -tf_py_test( - name = "dense_features_test", - srcs = ["dense_features_test.py"], - tags = ["no_pip"], - deps = [ - ":feature_column_test_main_lib", - ], -) - py_library( name = "feature_column_test_main_lib", srcs = ["feature_column_test.py"], @@ -177,15 +166,6 @@ tf_py_test( deps = [":feature_column_v2_test_main_lib"], ) -tf_py_test( - name = "dense_features_v2_test", - srcs = ["dense_features_v2_test.py"], - tags = ["no_pip"], - deps = [ - ":feature_column_v2_test_main_lib", - ], -) - py_library( name = "feature_column_v2_test_main_lib", srcs = ["feature_column_v2_test.py"], diff --git a/tensorflow/python/feature_column/feature_column_lib.py b/tensorflow/python/feature_column/feature_column_lib.py index afe14f55bfc..bda20ff3f2c 100644 --- a/tensorflow/python/feature_column/feature_column_lib.py +++ b/tensorflow/python/feature_column/feature_column_lib.py @@ -19,13 +19,13 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import,g-bad-import-order -# We import dense_features_v2 first so that the V1 DenseFeatures is the default -# if users directly import feature_column_lib. -from tensorflow.python.feature_column.dense_features_v2 import * -from tensorflow.python.feature_column.dense_features import * from tensorflow.python.feature_column.feature_column import * from tensorflow.python.feature_column.feature_column_v2 import * from tensorflow.python.feature_column.sequence_feature_column import * from tensorflow.python.feature_column.serialization import * +# We import dense_features_v2 first so that the V1 DenseFeatures is the default +# if users directly import feature_column_lib. +from tensorflow.python.keras.feature_column.dense_features_v2 import * +from tensorflow.python.keras.feature_column.dense_features import * from tensorflow.python.keras.feature_column.sequence_feature_column import * # pylint: enable=unused-import,line-too-long diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index fe769850fb0..a13f38a5203 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -31,7 +31,6 @@ from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.client import session from tensorflow.python.eager import backprop from tensorflow.python.eager import context -from tensorflow.python.feature_column import dense_features as df from tensorflow.python.feature_column import feature_column as fc_old from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.feature_column import serialization @@ -5582,23 +5581,6 @@ class IndicatorColumnTest(test.TestCase): self.evaluate(weight_var.assign([[1.], [2.], [3.], [4.]])) self.assertAllClose([[2. + 3.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 - def test_dense_features(self): - animal = fc.indicator_column( - fc.categorical_column_with_identity('animal', num_buckets=4)) - with ops.Graph().as_default(): - features = { - 'animal': - sparse_tensor.SparseTensor( - indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) - } - net = df.DenseFeatures([animal])(features) - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net)) - @test_util.run_deprecated_v1 def test_input_layer(self): animal = fc.indicator_column( @@ -6271,156 +6253,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), self.evaluate(predictions)) - @parameterized.named_parameters( - { - 'testcase_name': 'use_safe_embedding_lookup', - 'use_safe_embedding_lookup': True - }, { - 'testcase_name': 'dont_use_safe_embedding_lookup', - 'use_safe_embedding_lookup': False - }) - @test_util.run_deprecated_v1 - def test_dense_features(self, use_safe_embedding_lookup): - # Inputs. - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) - - # Build columns. - categorical_column = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer, - use_safe_embedding_lookup=use_safe_embedding_lookup) - - # Provide sparse input and get dense result. - l = df.DenseFeatures((embedding_column,)) - dense_features = l({'aaa': sparse_input}) - - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual(('dense_features/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - for v in global_vars: - self.assertIsInstance(v, variables_lib.Variable) - trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertItemsEqual(('dense_features/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in trainable_vars])) - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) - self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) - - if use_safe_embedding_lookup: - self.assertIn('SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - else: - self.assertNotIn( - 'SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - - @test_util.run_deprecated_v1 - def test_dense_features_not_trainable(self): - # Inputs. - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) - - # Build columns. - categorical_column = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer, - trainable=False) - - # Provide sparse input and get dense result. - dense_features = df.DenseFeatures((embedding_column,))({ - 'aaa': sparse_input - }) - - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual(('dense_features/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - self.assertItemsEqual([], - ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) - self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) - @test_util.run_deprecated_v1 def test_input_layer(self): # Inputs. @@ -7326,129 +7158,6 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): # = [3*1 + 5*2, 3*0 +5*0] = [13, 0] self.assertAllClose([[94. + 13.], [29.]], self.evaluate(predictions)) - def _test_dense_features(self, trainable=True): - # Inputs. - vocabulary_size = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 4)), - values=(2, 0, 1), - dense_shape=(2, 5)) - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [0] - # example 1, ids [] - indices=((0, 0),), - values=(0,), - dense_shape=(2, 5)) - sparse_input_c = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 1), (1, 1), (1, 3)), - values=(2, 0, 1), - dense_shape=(2, 5)) - sparse_input_d = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [] - indices=((0, 1),), - values=(2,), - dense_shape=(2, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0: - # A ids [2], embedding = [7, 11] - # B ids [0], embedding = [1, 2] - # C ids [2], embedding = [7, 11] - # D ids [2], embedding = [7, 11] - (7., 11., 1., 2., 7., 11., 7., 11.), - # example 1: - # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - # B ids [], embedding = [0, 0] - # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - # D ids [], embedding = [0, 0] - (2., 3.5, 0., 0., 2., 3.5, 0., 0.), - ) - - # Build columns. - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - categorical_column_c = fc.categorical_column_with_identity( - key='ccc', num_buckets=vocabulary_size) - categorical_column_d = fc.categorical_column_with_identity( - key='ddd', num_buckets=vocabulary_size) - - embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer, - trainable=trainable) - embedding_column_c, embedding_column_d = fc.shared_embedding_columns_v2( - [categorical_column_c, categorical_column_d], - dimension=embedding_dimension, - initializer=_initializer, - trainable=trainable) - - features = { - 'aaa': sparse_input_a, - 'bbb': sparse_input_b, - 'ccc': sparse_input_c, - 'ddd': sparse_input_d - } - - # Provide sparse input and get dense result. - dense_features = df.DenseFeatures( - feature_columns=(embedding_column_b, embedding_column_a, - embedding_column_c, embedding_column_d))( - features) - - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual( - ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], - tuple([v.name for v in global_vars])) - for v in global_vars: - self.assertIsInstance(v, variables_lib.Variable) - trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - if trainable: - self.assertItemsEqual( - ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], - tuple([v.name for v in trainable_vars])) - else: - self.assertItemsEqual([], tuple([v.name for v in trainable_vars])) - shared_embedding_vars = global_vars - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllEqual(embedding_values, - self.evaluate(shared_embedding_vars[0])) - self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) - - @test_util.run_deprecated_v1 - def test_dense_features(self): - self._test_dense_features() - - @test_util.run_deprecated_v1 - def test_dense_features_no_trainable(self): - self._test_dense_features(trainable=False) - @test_util.run_deprecated_v1 def test_serialization(self): diff --git a/tensorflow/python/feature_column/keras_integration_test.py b/tensorflow/python/feature_column/keras_integration_test.py index e0677e84e50..456c0204350 100644 --- a/tensorflow/python/feature_column/keras_integration_test.py +++ b/tensorflow/python/feature_column/keras_integration_test.py @@ -23,12 +23,12 @@ import numpy as np from tensorflow.python import keras from tensorflow.python import tf2 from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.feature_column import dense_features_v2 from tensorflow.python.feature_column import feature_column_lib as fc from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import metrics as metrics_module from tensorflow.python.keras import testing_utils +from tensorflow.python.keras.feature_column import dense_features_v2 from tensorflow.python.keras.optimizer_v2 import gradient_descent from tensorflow.python.keras.premade import linear from tensorflow.python.keras.premade import wide_deep diff --git a/tensorflow/python/feature_column/sequence_feature_column_test.py b/tensorflow/python/feature_column/sequence_feature_column_test.py index 3d5d24ec03a..d0cf5ee7670 100644 --- a/tensorflow/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/python/feature_column/sequence_feature_column_test.py @@ -24,7 +24,6 @@ from absl.testing import parameterized import numpy as np from tensorflow.python.client import session -from tensorflow.python.feature_column import dense_features from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.feature_column import sequence_feature_column as sfc from tensorflow.python.feature_column import serialization @@ -111,54 +110,6 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase): sfc.concatenate_context_input(context_input, seq_input) -@test_util.run_all_in_graph_and_eager_modes -class DenseFeaturesTest(test.TestCase): - """Tests DenseFeatures with sequence feature columns.""" - - def test_embedding_column(self): - """Tests that error is raised for sequence embedding column.""" - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - - categorical_column_a = sfc.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column_a = fc.embedding_column( - categorical_column_a, dimension=2) - - input_layer = dense_features.DenseFeatures([embedding_column_a]) - with self.assertRaisesRegexp( - ValueError, - r'In embedding_column: aaa_embedding\. categorical_column must not be ' - r'of type SequenceCategoricalColumn\.'): - _ = input_layer({'aaa': sparse_input}) - - def test_indicator_column(self): - """Tests that error is raised for sequence indicator column.""" - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - - categorical_column_a = sfc.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - indicator_column_a = fc.indicator_column(categorical_column_a) - - input_layer = dense_features.DenseFeatures([indicator_column_a]) - with self.assertRaisesRegexp( - ValueError, - r'In indicator_column: aaa_indicator\. categorical_column must not be ' - r'of type SequenceCategoricalColumn\.'): - _ = input_layer({'aaa': sparse_input}) - - def _assert_sparse_tensor_value(test_case, expected, actual): _assert_sparse_tensor_indices_shape(test_case, expected, actual) diff --git a/tensorflow/python/feature_column/serialization_test.py b/tensorflow/python/feature_column/serialization_test.py index 78b72746ac9..881ca0cca5e 100644 --- a/tensorflow/python/feature_column/serialization_test.py +++ b/tensorflow/python/feature_column/serialization_test.py @@ -20,7 +20,6 @@ from __future__ import print_function from absl.testing import parameterized -from tensorflow.python.feature_column import dense_features from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.feature_column import serialization from tensorflow.python.framework import test_util @@ -114,71 +113,6 @@ class FeatureColumnSerializationTest(test.TestCase): self.assertIs(new_price.normalizer_fn, _custom_fn) -@test_util.run_all_in_graph_and_eager_modes -class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - ('default', None, None), - ('trainable', True, 'trainable'), - ('not_trainable', False, 'frozen')) - def test_get_config(self, trainable, name): - cols = [fc.numeric_column('a'), - fc.embedding_column(fc.categorical_column_with_identity( - key='b', num_buckets=3), dimension=2)] - orig_layer = dense_features.DenseFeatures( - cols, trainable=trainable, name=name) - config = orig_layer.get_config() - - self.assertEqual(config['name'], orig_layer.name) - self.assertEqual(config['trainable'], trainable) - self.assertLen(config['feature_columns'], 2) - self.assertEqual( - config['feature_columns'][0]['class_name'], 'NumericColumn') - self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,)) - self.assertEqual( - config['feature_columns'][1]['class_name'], 'EmbeddingColumn') - - @parameterized.named_parameters( - ('default', None, None), - ('trainable', True, 'trainable'), - ('not_trainable', False, 'frozen')) - def test_from_config(self, trainable, name): - cols = [fc.numeric_column('a'), - fc.embedding_column(fc.categorical_column_with_vocabulary_list( - 'b', vocabulary_list=['1', '2', '3']), dimension=2), - fc.indicator_column(fc.categorical_column_with_hash_bucket( - key='c', hash_bucket_size=3))] - orig_layer = dense_features.DenseFeatures( - cols, trainable=trainable, name=name) - config = orig_layer.get_config() - - new_layer = dense_features.DenseFeatures.from_config(config) - - self.assertEqual(new_layer.name, orig_layer.name) - self.assertEqual(new_layer.trainable, trainable) - self.assertLen(new_layer._feature_columns, 3) - self.assertEqual(new_layer._feature_columns[0].name, 'a') - self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0) - self.assertEqual(new_layer._feature_columns[1].categorical_column.name, 'b') - self.assertIsInstance(new_layer._feature_columns[2], fc.IndicatorColumn) - - def test_crossed_column(self): - a = fc.categorical_column_with_vocabulary_list( - 'a', vocabulary_list=['1', '2', '3']) - b = fc.categorical_column_with_vocabulary_list( - 'b', vocabulary_list=['1', '2', '3']) - ab = fc.crossed_column([a, b], hash_bucket_size=2) - cols = [fc.indicator_column(ab)] - - orig_layer = dense_features.DenseFeatures(cols) - config = orig_layer.get_config() - - new_layer = dense_features.DenseFeatures.from_config(config) - - self.assertLen(new_layer._feature_columns, 1) - self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator') - - @test_util.run_all_in_graph_and_eager_modes class LinearModelLayerSerializationTest(test.TestCase, parameterized.TestCase): diff --git a/tensorflow/python/framework/config_test.py b/tensorflow/python/framework/config_test.py index b07bb874385..3051f1d0623 100644 --- a/tensorflow/python/framework/config_test.py +++ b/tensorflow/python/framework/config_test.py @@ -159,7 +159,6 @@ class ConfigTest(test.TestCase, parameterized.TestCase): else: self.assertFalse(config.get_soft_device_placement()) - @def_function.function def mod(): with ops.device('/device:GPU:0'): a = constant_op.constant(1.0) @@ -172,8 +171,10 @@ class ConfigTest(test.TestCase, parameterized.TestCase): config.get_soft_device_placement(), context.context().soft_device_placement) - # Since soft placement is enabled, the mod operation should work with CPU + # Since soft placement is enabled, the mod operation should fallback to CPU + # with pure eager execution as well as functions mod() + def_function.function(mod)() config.set_soft_device_placement(False) self.assertEqual(config.get_soft_device_placement(), False) @@ -182,8 +183,11 @@ class ConfigTest(test.TestCase, parameterized.TestCase): context.context().soft_device_placement) # Since soft placement is disabled, the mod operation should fail on GPU + # with pure eager execution as well as functions with self.assertRaises(errors.InvalidArgumentError): mod() + with self.assertRaises(errors.InvalidArgumentError): + def_function.function(mod)() @reset_eager def testLogDevicePlacement(self): @@ -203,12 +207,8 @@ class ConfigTest(test.TestCase, parameterized.TestCase): context.ensure_initialized() - with self.assertRaises(RuntimeError): - context.set_log_device_placement(True) - - # If the setting the device placement is a no-op, do not throw a runtime - # exception. - context.set_log_device_placement(False) + # Changing the device placement should not throw an exception + context.set_log_device_placement(True) @reset_eager def testEnableMlirBridge(self): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index f43663d5396..43652d51eae 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -62,6 +62,7 @@ from tensorflow.python.framework import versions from tensorflow.python.ops import control_flow_util from tensorflow.python.platform import app from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.types import core as core_tf_types from tensorflow.python.types import internal from tensorflow.python.util import compat from tensorflow.python.util import decorator_utils @@ -213,53 +214,11 @@ def _as_graph_element(obj): return None -_TENSOR_LIKE_TYPES = tuple() - - +# Deprecated - do not use. +# This API to avoid breaking estimator and tensorflow-mesh which depend on this +# internal API. The stub should be safe to use after TF 2.3 is released. def is_dense_tensor_like(t): - """EXPERIMENTAL: Returns true if `t` implements the tensor interface. - - See `register_dense_tensor_like_type()` for the current definition of a - "tensor-like type". - - Args: - t: An object. - - Returns: - True iff `t` is an instance of one of the registered "tensor-like" types. - """ - return isinstance(t, _TENSOR_LIKE_TYPES) - - -def register_dense_tensor_like_type(tensor_type): - """EXPERIMENTAL: Registers `tensor_type` as implementing the tensor interface. - - A "tensor-like type" can represent a single dense tensor, and implements - the `name`, `dtype` and `shape` properties. - - Args: - tensor_type: A type implementing the tensor interface. - - Raises: - TypeError: If `tensor_type` does not implement the tensor interface. - """ - if not (hasattr(tensor_type, "name") and - isinstance(tensor_type.name, property)): - raise TypeError("Type %s does not define a `name` property" % - tensor_type.__name__) - if not (hasattr(tensor_type, "dtype") and - isinstance(tensor_type.dtype, property)): - raise TypeError("Type %s does not define a `dtype` property" % - tensor_type.__name__) - if not (hasattr(tensor_type, "shape") and - isinstance(tensor_type.shape, property)): - raise TypeError("Type %s does not define a `shape` property" % - tensor_type.__name__) - # We expect this list to be small, so choose quadratic complexity - # for registration, so that we have a tuple that can be used for - # more efficient `isinstance` checks later. - global _TENSOR_LIKE_TYPES - _TENSOR_LIKE_TYPES = tuple(list(_TENSOR_LIKE_TYPES) + [tensor_type]) + return isinstance(t, core_tf_types.Tensor) def uid(): @@ -304,7 +263,7 @@ def disable_tensor_equality(): # TODO(mdan): This object should subclass Symbol, not just Tensor. @tf_export("Tensor") -class Tensor(internal.NativeObject): +class Tensor(internal.NativeObject, core_tf_types.Tensor): """A tensor is a multidimensional array of elements represented by a `tf.Tensor` object. All elements are of a single known data type. @@ -1305,9 +1264,6 @@ class _EagerTensorBase(Tensor): EagerTensor = pywrap_tfe.TFE_Py_InitEagerTensor(_EagerTensorBase) -register_dense_tensor_like_type(Tensor) - - @tf_export(v1=["convert_to_tensor"]) def convert_to_tensor_v1(value, dtype=None, @@ -6261,10 +6217,12 @@ def add_to_collection(name, value): Args: name: The key for the collection. For example, the `GraphKeys` class contains many standard names for collections. - value: The value to add to the collection. @compatibility(eager) - Collections are only supported in eager when variables are created inside - an EagerVariableStore (e.g. as part of a layer or template). - @end_compatibility + value: The value to add to the collection. + + @compatibility(eager) + Collections are only supported in eager when variables are created inside + an EagerVariableStore (e.g. as part of a layer or template). + @end_compatibility """ get_default_graph().add_to_collection(name, value) @@ -6279,10 +6237,12 @@ def add_to_collections(names, value): Args: names: The key for the collections. The `GraphKeys` class contains many standard names for collections. - value: The value to add to the collections. @compatibility(eager) - Collections are only supported in eager when variables are created inside - an EagerVariableStore (e.g. as part of a layer or template). - @end_compatibility + value: The value to add to the collections. + + @compatibility(eager) + Collections are only supported in eager when variables are created inside + an EagerVariableStore (e.g. as part of a layer or template). + @end_compatibility """ get_default_graph().add_to_collections(names, value) diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 20f58a00cfe..322df8ffac8 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -3268,56 +3268,6 @@ class DeprecatedTest(test_util.TensorFlowTestCase): test_ops.old() -class DenseTensorLikeTypeTest(test_util.TensorFlowTestCase): - - @test_util.disable_tfrt("Graph is not supported yet.") - def testSuccess(self): - op = ops.Operation( - ops._NodeDef("FloatOutput", "myop"), ops.Graph(), [], [dtypes.float32]) - t = op.outputs[0] - self.assertTrue(ops.is_dense_tensor_like(t)) - - v = variables.Variable([17]) - self.assertTrue(ops.is_dense_tensor_like(v)) - - class BadClassNoName(object): - pass - - class BadClassBadName(object): - - def name(self): - pass - - class BadClassNoDtype(object): - - @property - def name(self): - pass - - class BadClassBadDtype(object): - - @property - def name(self): - pass - - def dtype(self): - pass - - def testBadClass(self): - with self.assertRaisesRegexp(TypeError, "`name`"): - ops.register_dense_tensor_like_type( - DenseTensorLikeTypeTest.BadClassNoName) - with self.assertRaisesRegexp(TypeError, "`name`"): - ops.register_dense_tensor_like_type( - DenseTensorLikeTypeTest.BadClassBadName) - with self.assertRaisesRegexp(TypeError, "`dtype`"): - ops.register_dense_tensor_like_type( - DenseTensorLikeTypeTest.BadClassNoDtype) - with self.assertRaisesRegexp(TypeError, "`dtype`"): - ops.register_dense_tensor_like_type( - DenseTensorLikeTypeTest.BadClassBadDtype) - - class NameScopeTest(test_util.TensorFlowTestCase): def testStripAndPrependScope(self): diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 50388595c3d..968b635250a 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -26,6 +26,7 @@ from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.types import core from tensorflow.python.types import internal from tensorflow.python.util import compat from tensorflow.python.util import nest @@ -1009,7 +1010,7 @@ def is_tensor(x): # pylint: disable=invalid-name `True` if `x` is a tensor or "tensor-like", `False` if not. """ return (isinstance(x, internal.NativeObject) or - ops.is_dense_tensor_like(x) or + isinstance(x, core.Tensor) or getattr(x, "is_tensor_like", False)) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index fa2a4f63e6c..d5bbd889166 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -460,6 +460,38 @@ def skip_if(condition): return real_skip_if +@contextlib.contextmanager +def skip_if_error(test_obj, error_type, messages=None): + """Context manager to skip cases not considered failures by the tests. + + Note that this does not work if used in setUpClass/tearDownClass. + Usage in setUp/tearDown works fine just like regular test methods. + + Args: + test_obj: A test object provided as `self` in the test methods; this object + is usually an instance of `unittest.TestCase`'s subclass and should have + `skipTest` method. + error_type: The error type to skip. Note that if `messages` are given, both + `error_type` and `messages` need to match for the test to be skipped. + messages: Optional, a string or list of strings. If `None`, the test will be + skipped if `error_type` matches what is raised; otherwise, the test is + skipped if any of the `messages` is contained in the message of the error + raised, and `error_type` matches the error raised. + + Yields: + Nothing. + """ + if messages: + messages = nest.flatten(messages) + try: + yield + except error_type as e: + if not messages or any([message in str(e) for message in messages]): + test_obj.skipTest("Skipping error: {}".format(str(e))) + else: + raise + + def enable_c_shapes(fn): """No-op. TODO(b/74620627): Remove this.""" return fn diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index b5cb903c666..2bd75c3919e 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -22,6 +22,7 @@ import collections import copy import random import threading +import unittest import weakref from absl.testing import parameterized @@ -808,6 +809,66 @@ class TestUtilTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertEqual(tested_codepaths, set(["present", "future"])) +class SkipTestTest(test_util.TensorFlowTestCase): + + def _verify_test_in_set_up_or_tear_down(self): + with self.assertRaises(unittest.SkipTest): + with test_util.skip_if_error(self, ValueError, + ["foo bar", "test message"]): + raise ValueError("test message") + try: + with self.assertRaisesRegexp(ValueError, "foo bar"): + with test_util.skip_if_error(self, ValueError, "test message"): + raise ValueError("foo bar") + except unittest.SkipTest: + raise RuntimeError("Test is not supposed to skip.") + + def setUp(self): + super(SkipTestTest, self).setUp() + self._verify_test_in_set_up_or_tear_down() + + def tearDown(self): + super(SkipTestTest, self).tearDown() + self._verify_test_in_set_up_or_tear_down() + + def test_skip_if_error_should_skip(self): + with self.assertRaises(unittest.SkipTest): + with test_util.skip_if_error(self, ValueError, "test message"): + raise ValueError("test message") + + def test_skip_if_error_should_skip_with_list(self): + with self.assertRaises(unittest.SkipTest): + with test_util.skip_if_error(self, ValueError, + ["foo bar", "test message"]): + raise ValueError("test message") + + def test_skip_if_error_should_skip_without_expected_message(self): + with self.assertRaises(unittest.SkipTest): + with test_util.skip_if_error(self, ValueError): + raise ValueError("test message") + + def test_skip_if_error_should_skip_without_error_message(self): + with self.assertRaises(unittest.SkipTest): + with test_util.skip_if_error(self, ValueError): + raise ValueError() + + def test_skip_if_error_should_raise_message_mismatch(self): + try: + with self.assertRaisesRegexp(ValueError, "foo bar"): + with test_util.skip_if_error(self, ValueError, "test message"): + raise ValueError("foo bar") + except unittest.SkipTest: + raise RuntimeError("Test is not supposed to skip.") + + def test_skip_if_error_should_raise_no_message(self): + try: + with self.assertRaisesRegexp(ValueError, ""): + with test_util.skip_if_error(self, ValueError, "test message"): + raise ValueError() + except unittest.SkipTest: + raise RuntimeError("Test is not supposed to skip.") + + # Its own test case to reproduce variable sharing issues which only pop up when # setUp() is overridden and super() is not called. class GraphAndEagerNoVariableSharing(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 503f6cf0e92..2700fae9e29 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -393,6 +393,9 @@ def _default_learning_phase(): False, shape=(), name='keras_learning_phase') +@deprecated('2020-10-11', + 'Simply pass a True/False value to the `training` argument ' + 'of the `__call__` method of your layer or model.') @keras_export('keras.backend.set_learning_phase') def set_learning_phase(value): """Sets the learning phase to a fixed value. diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 6748a572805..db326ea32f0 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -307,14 +307,20 @@ class CallbackList(object): end_hook_name = hook_name begin_hook_name = 'on_{mode}_batch_begin'.format(mode=mode) - threshold_time = 0.5 * batch_time + threshold_time = 1.5 * batch_time warning_msg = ('Callbacks method `{hook}` is slow compared to ' - 'the batch time. Check your callbacks.') + 'the batch time (batch time: {batch_time:.4f}s vs ' + '`{hook}` time: {cbk_time:.4f}s). Check your callbacks.') if self._timing[begin_hook_name] > threshold_time: - logging.warning(warning_msg.format(hook=begin_hook_name)) + logging.warning(warning_msg.format( + hook=begin_hook_name, + batch_time=batch_time, + cbk_time=self._timing[begin_hook_name])) if self._timing[end_hook_name] > threshold_time: - logging.warning(warning_msg.format(hook=end_hook_name)) - + logging.warning(warning_msg.format( + hook=end_hook_name, + batch_time=batch_time, + cbk_time=self._timing[end_hook_name])) self._check_timing = False self._batch_start_time = None diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index 9d15f87ed79..2f1256ee3ee 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -302,8 +302,8 @@ class KerasCallbacksTest(keras_parameterized.TestCase): epochs=10, callbacks=[SleepCallback()]) warning_msg = ('Callbacks method `on_train_batch_end` is slow compared ' - 'to the batch time. Check your callbacks.') - self.assertIn(warning_msg, warning_messages) + 'to the batch time') + self.assertIn(warning_msg, '\n'.join(warning_messages)) @keras_parameterized.run_with_all_model_types(exclude_models='functional') @keras_parameterized.run_all_keras_modes diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index d379e4f3349..87625446e2f 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -362,30 +362,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "multi_worker_callback_tf1_test", - srcs = ["multi_worker_callback_tf1_test.py"], - # TODO(b/132384649): Enable for guitar and oss tests. - shard_count = 24, - tags = [ - "manual", - "no_oss", - "noguitar", - "notap", - ], - deps = [ - ":distribute", - ":multi_worker_testing_utils", - "//tensorflow/python:platform", - "//tensorflow/python/distribute:collective_all_reduce_strategy", - "//tensorflow/python/distribute:combinations", - "//tensorflow/python/distribute:distribute_config", - "//tensorflow/python/distribute:distribute_coordinator", - "//tensorflow/python/distribute:multi_worker_test_base", - "//tensorflow/python/keras", - ], -) - py_test( name = "multi_worker_callback_tf2_test", srcs = ["multi_worker_callback_tf2_test.py"], @@ -454,6 +430,11 @@ py_test( srcs = ["multi_worker_tutorial_test.py"], python_version = "PY3", shard_count = 5, + tags = [ + "noasan", + "nomsan", + "notsan", + ], # TODO(b/156029134) deps = [ "//tensorflow/python:platform", "//tensorflow/python/data/ops:dataset_ops", diff --git a/tensorflow/python/keras/distribute/multi_worker_callback_tf1_test.py b/tensorflow/python/keras/distribute/multi_worker_callback_tf1_test.py deleted file mode 100644 index 95a235e7b33..00000000000 --- a/tensorflow/python/keras/distribute/multi_worker_callback_tf1_test.py +++ /dev/null @@ -1,597 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Keras callbacks in multi-worker training with TF1.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import sys -import tempfile -import threading - -from absl.testing import parameterized - -from tensorflow.python import keras -from tensorflow.python.distribute import collective_all_reduce_strategy as collective_strategy -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import distribute_coordinator as dc -from tensorflow.python.distribute import mirrored_strategy -from tensorflow.python.distribute import multi_worker_test_base as test_base -from tensorflow.python.distribute import multi_worker_util -from tensorflow.python.keras import backend as K -from tensorflow.python.keras import callbacks -from tensorflow.python.keras import testing_utils -from tensorflow.python.keras.distribute import multi_worker_testing_utils -from tensorflow.python.keras.distribute import multi_worker_training_state as training_state -from tensorflow.python.platform import test - - -def get_strategy_object(strategy_cls): - if strategy_cls == mirrored_strategy.MirroredStrategy: - return strategy_cls(mirrored_strategy.all_local_devices()) - else: - # CollectiveAllReduceStrategy and ParameterServerStrategy. - return strategy_cls() - - -def generate_callback_test_function(custom_callable): - """Generic template for callback tests using mnist synthetic dataset.""" - - @combinations.generate( - combinations.combine( - mode=['graph'], - strategy_cls=[collective_strategy.CollectiveAllReduceStrategy], - required_gpus=[0, 1], - file_format=['h5', 'tf'])) - def test_template(self, strategy_cls, file_format): - num_workers = 2 - num_epoch = 2 - - cluster_spec = test_base.create_cluster_spec(num_workers=num_workers) - self._barrier = dc._Barrier(2) - - def _independent_worker_fn(*args, **kwargs): # pylint: disable=unused-argument - """Simulates an Independent Worker inside of a thread.""" - with test.mock.patch.object(dc, '_run_std_server', - self._make_mock_run_std_server()): - strategy = get_strategy_object(strategy_cls) - batch_size = 64 - steps = 2 - train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset( - batch_size, steps) - with strategy.scope(): - model = multi_worker_testing_utils.get_mnist_model((28, 28, 1)) - - custom_callable( - model, - self, - train_ds, - num_epoch, - steps, - strategy, - saving_filepath=kwargs['saving_filepath'], - barrier=kwargs['barrier'], - threading_local=kwargs['threading_local']) - - # Pass saving_filepath from the parent thread to ensure every worker has the - # same filepath to save. - saving_filepath = os.path.join(self.get_temp_dir(), - 'checkpoint.' + file_format) - barrier = dc._Barrier(2) - threading_local = threading.local() - threads = self.run_multiple_tasks_in_threads( - _independent_worker_fn, - cluster_spec, - saving_filepath=saving_filepath, - barrier=barrier, - threading_local=threading_local) - self.assertFalse(training_state.checkpoint_exists(saving_filepath)) - - threads_to_join = [] - strategy = get_strategy_object(strategy_cls) - if strategy.extended.experimental_between_graph: - for ts in threads.values(): - threads_to_join.extend(ts) - else: - threads_to_join = [threads['worker'][0]] - self.join_independent_workers(threads_to_join) - - return test_template - - -class KerasMultiWorkerCallbackTest(test_base.IndependentWorkerTestBase, - parameterized.TestCase): - """KerasMultiWorkerCallbackTest for TF1. - - TODO(rchao): Migrate all tests in this class to - `multi_worker_callback_tf2_test`. - """ - - # The callables of the actual testing content to be run go below. - @staticmethod - def callableForTestChiefOnlyCallback(model, test_obj, train_ds, num_epoch, - steps, strategy, saving_filepath, - **kwargs): - - class ChiefOnly(keras.callbacks.Callback): - - def __init__(self): - self._chief_worker_only = True - self.filtered_correctly = True - - def on_train_begin(self, logs): - if not multi_worker_util.is_chief(): - # Non-chief workers shouldn't run this callback. - self.filtered_correctly = False - - cb = ChiefOnly() - model.fit( - x=train_ds, epochs=num_epoch, steps_per_epoch=steps, callbacks=[cb]) - - test_obj.assertTrue(cb.filtered_correctly) - - @staticmethod - def callableForTestModelCheckpointSavesOnChiefButNotOtherwise( - model, test_obj, train_ds, num_epoch, steps, strategy, saving_filepath, - **kwargs): - - extension = os.path.splitext(saving_filepath)[1] - - # Incorporate type/index information and thread id in saving_filepath to - # ensure every worker has a unique path. Note that in normal use case the - # saving_filepath will be the same for all workers, but we use different - # ones here just to test out chief saves checkpoint but non-chief doesn't. - - saving_filepath = os.path.join( - test_obj.get_temp_dir(), 'checkpoint_%s_%d%s' % - (test_base.get_task_type(), test_base.get_task_index(), extension)) - - # The saving_filepath shouldn't exist at the beginning (as it's unique). - test_obj.assertFalse(training_state.checkpoint_exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[callbacks.ModelCheckpoint(filepath=saving_filepath)]) - - # If it's chief, the model should be saved; if not, the model shouldn't. - test_obj.assertEqual( - training_state.checkpoint_exists(saving_filepath), test_base.is_chief()) - - @staticmethod - def initialFitting(test_obj, model, train_ds, num_epoch, steps, - saving_filepath): - # The saving_filepath shouldn't exist at the beginning. - test_obj.assertFalse(training_state.checkpoint_exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[ - callbacks.ModelCheckpoint( - filepath=saving_filepath, save_weights_only=True) - ]) - - # The saving_filepath should exist after fitting with callback. Both chief - # and non-chief worker should both see it exists (which was saved only by - # chief). - test_obj.assertTrue(training_state.checkpoint_exists(saving_filepath)) - - history_after_one_more_epoch = model.fit( - x=train_ds, epochs=1, steps_per_epoch=steps) - - # The saving_filepath should continue to exist (if it did) after fitting - # without callback. - test_obj.assertTrue(training_state.checkpoint_exists(saving_filepath)) - - return saving_filepath, history_after_one_more_epoch - - @staticmethod - def callableForTestLoadWeightFromModelCheckpoint(model, test_obj, train_ds, - num_epoch, steps, strategy, - saving_filepath, **kwargs): - filepaths = [] - real_mkstemp = tempfile.mkstemp - def mocked_mkstemp(): - # Only non-chief should call tempfile.mkstemp() inside fit() in sync - # training. - assert not test_base.is_chief() - file_handle, temp_file_name = real_mkstemp() - extension = os.path.splitext(saving_filepath)[1] - temp_filepath = temp_file_name + extension - filepaths.append(temp_filepath) - return file_handle, temp_file_name - - # Mock tempfile.mkstemp() so the filepaths can be stored and verified later. - with test.mock.patch.object(tempfile, 'mkstemp', mocked_mkstemp): - saving_filepath, history_after_one_more_epoch = \ - KerasMultiWorkerCallbackTest.initialFitting( - test_obj, model, train_ds, num_epoch, steps, saving_filepath) - - with strategy.scope(): - model.load_weights(saving_filepath) - - history_after_loading_weight_and_one_more_epoch = model.fit( - x=train_ds, epochs=1, steps_per_epoch=steps) - - test_obj.assertAllClose( - history_after_one_more_epoch.history, - history_after_loading_weight_and_one_more_epoch.history, - rtol=5e-5) - - # Verify the temp files are indeed removed (no trace left behind). - for filepath in filepaths: - assert not training_state.checkpoint_exists(filepath) - - @staticmethod - def callableForTestModelRestoreCallback(model, test_obj, train_ds, num_epoch, - steps, strategy, saving_filepath, - **kwargs): - - saving_filepath, history_after_one_more_epoch = \ - KerasMultiWorkerCallbackTest.initialFitting( - test_obj, model, train_ds, num_epoch, steps, saving_filepath) - - # The model should get restored to the weights previously saved, by - # adding a ModelCheckpoint callback (which results in a - # _ModelRestoreCallback being added), with load_weights_on_restart=True. - history_after_model_restoring_and_one_more_epoch = model.fit( - x=train_ds, - epochs=1, - steps_per_epoch=steps, - callbacks=[ - callbacks.ModelCheckpoint( - filepath=saving_filepath, - save_weights_only=True, - load_weights_on_restart=True) - ]) - - # Asserting the history one epoch after initial fitting and one epoch after - # restoring are closed. - test_obj.assertAllClose( - history_after_one_more_epoch.history, - history_after_model_restoring_and_one_more_epoch.history, - rtol=5e-5) - - history_one_more_epoch_without_model_restoring = model.fit( - x=train_ds, epochs=1, steps_per_epoch=steps) - - # Ensuring training for another epoch gives different result. - test_obj.assertNotAllClose( - history_after_model_restoring_and_one_more_epoch.history, - history_one_more_epoch_without_model_restoring.history, - rtol=5e-5) - - @staticmethod - def callableForTestBackupModelRemoved(model, test_obj, train_ds, num_epoch, - steps, strategy, saving_filepath, - **kwargs): - - # `barrier` object needs to be passed in from parent - # thread so both threads refer to the same object. - barrier = kwargs['barrier'] - - num_epoch = 3 - - # Testing the backup filepath `multi_worker_training_state` uses. - _, backup_filepath = training_state._get_backup_filepath(saving_filepath) - - # The backup_filepath shouldn't exist at the beginning. - test_obj.assertFalse(training_state.checkpoint_exists(backup_filepath)) - - # Callback to verify that the backup file exists in the middle of training. - class BackupFilepathVerifyingCallback(callbacks.Callback): - - def on_epoch_begin(self, epoch, logs=None): - if epoch > 1: - # Asserting that after the first two epochs, the backup file should - # exist. - test_obj.assertTrue(training_state.checkpoint_exists(backup_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[ - callbacks.ModelCheckpoint( - filepath=saving_filepath, save_weights_only=True), - BackupFilepathVerifyingCallback() - ]) - - # Sync on the two threads so we make sure the backup file is removed before - # we move on. - barrier.wait() - - # The back up file should not exist at successful exit of `model.fit()`. - test_obj.assertFalse(training_state.checkpoint_exists(backup_filepath)) - - @staticmethod - def callableForTestBackupModelNotRemovedIfInterrupted(model, test_obj, - train_ds, num_epoch, - steps, strategy, - saving_filepath, - **kwargs): - - # `barrier` object needs to be passed in from parent - # thread so both threads refer to the same object. - barrier = kwargs['barrier'] - - num_epoch = 4 - - # Testing the backup filepath `multi_worker_training_state` uses. - _, backup_filepath = training_state._get_backup_filepath(saving_filepath) - - # The backup_filepath shouldn't exist at the beginning. - test_obj.assertFalse(training_state.checkpoint_exists(backup_filepath)) - - # Callback to interrupt in the middle of training. - class InterruptingCallback(callbacks.Callback): - - def on_epoch_begin(self, epoch, logs=None): - if epoch == 2: - raise RuntimeError('Interrupting!') - - try: - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[ - callbacks.ModelCheckpoint( - filepath=saving_filepath, save_weights_only=True), - InterruptingCallback() - ]) - except RuntimeError as e: - if 'Interrupting!' not in e.message: - raise - - # Sync on the two threads. - barrier.wait() - - # The back up file should exist after interruption of `model.fit()`. - test_obj.assertTrue(training_state.checkpoint_exists(backup_filepath)) - - @staticmethod - def callableForTestUnmatchedModelFile(model, test_obj, train_ds, num_epoch, - steps, strategy, saving_filepath, - **kwargs): - - # The saving_filepath shouldn't exist at the beginning. - test_obj.assertFalse(training_state.checkpoint_exists(saving_filepath)) - - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=[ - callbacks.ModelCheckpoint( - filepath=saving_filepath, save_weights_only=True) - ]) - - (train_ds, _), (_, _) = testing_utils.get_test_data( - train_samples=10, test_samples=10, input_shape=(3,), num_classes=2) - - # Switch to a model of different structure. - with strategy.scope(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(5, input_dim=3, activation='relu')) - model.add(keras.layers.Dense(2, activation='softmax')) - model.compile( - loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) - - test_obj.assertTrue(training_state.checkpoint_exists(saving_filepath)) - - if saving_filepath.endswith('.tf'): - test_obj.skipTest('Loading mismatched TF checkpoint would cause Fatal ' - 'Python error: Aborted. Skipping.') - - # Unmatched format. Should raise ValueError. - with test_obj.assertRaisesRegexp(ValueError, 'Error loading file from'): - model.fit( - x=train_ds, - epochs=num_epoch, - batch_size=8, - callbacks=[ - callbacks.ModelCheckpoint( - filepath=saving_filepath, - save_weights_only=True, - load_weights_on_restart=True) - ]) - - @staticmethod - def callableForTestReduceLROnPlateau(model, test_obj, train_ds, num_epoch, - steps, strategy, saving_filepath, - **kwargs): - - cbks = [ - callbacks.ReduceLROnPlateau( - monitor='loss', - factor=0.1, - min_delta=1, - patience=1, - cooldown=5, - verbose=1) - ] - - # It is expected that the learning rate would drop by `factor` within - # 3 epochs with `min_delta=1`. - model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks) - test_obj.assertAllClose( - float(K.get_value(model.optimizer.lr)), 0.0001, atol=1e-8) - - # It is expected that the learning rate would drop by another `factor` - # within 3 epochs with `min_delta=1`. - model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks) - test_obj.assertAllClose( - float(K.get_value(model.optimizer.lr)), 0.00001, atol=1e-8) - - @staticmethod - def callableForTestEarlyStopping(model, test_obj, train_ds, num_epoch, steps, - strategy, saving_filepath, **kwargs): - - class EpochCounterCallback(callbacks.Callback): - - def on_epoch_begin(self, epoch, logs): - self.last_epoch = epoch - - epoch_counter_cbk = EpochCounterCallback() - cbks = [ - callbacks.EarlyStopping( - monitor='loss', min_delta=0.05, patience=1, verbose=1), - epoch_counter_cbk - ] - - # Empirically, it is expected that `model.fit()` would terminate around the - # 22th epoch. Asserting that it should have been stopped before the 50th - # epoch to avoid flakiness and be more predictable. - model.fit(x=train_ds, epochs=100, steps_per_epoch=steps, callbacks=cbks) - test_obj.assertLess(epoch_counter_cbk.last_epoch, 50) - - @staticmethod - def callableForTestLearningRateScheduler(model, test_obj, train_ds, num_epoch, - steps, strategy, saving_filepath, - **kwargs): - - cbks = [ - callbacks.LearningRateScheduler( - schedule=lambda x: 1. / (1. + x), verbose=1) - ] - - # It is expected that with `epochs=2`, the learning rate would drop to - # 1 / (1 + 2) = 0.5. - model.fit(x=train_ds, epochs=2, steps_per_epoch=steps, callbacks=cbks) - test_obj.assertAllClose( - float(K.get_value(model.optimizer.lr)), 0.5, atol=1e-8) - - # It is expected that with `epochs=4`, the learning rate would drop to - # 1 / (1 + 4) = 0.25. - model.fit(x=train_ds, epochs=4, steps_per_epoch=steps, callbacks=cbks) - test_obj.assertAllClose( - float(K.get_value(model.optimizer.lr)), 0.25, atol=1e-8) - - # pylint: disable=g-doc-args - @staticmethod - def callableForTestIntermediateDirForFTAreRemoved(model, test_obj, train_ds, - num_epoch, steps, strategy, - saving_filepath, **kwargs): - """Testing that the temporary directory are removed. - - Some temporary directories are created for the purpose of fault tolerance. - This test ensures that such directories should have been removed at the time - `model.fit()` finishes successfully. - """ - - # `threading_local` and `barrier` objects have to be passed in from parent - # thread so both threads refer to the same object. - threading_local = kwargs['threading_local'] - barrier = kwargs['barrier'] - - # Two threads will each has one copy of `temp_dirs_supposed_to_be_removed` - # list. - threading_local.temp_dirs_supposed_to_be_removed = [] - - callbacks_list = [ - callbacks.ModelCheckpoint( - filepath=saving_filepath, - save_weights_only=True, - load_weights_on_restart=True), - ] - - # Keep the references to the real function objects. - real_os_path_join = os.path.join - real_tempfile_mkdtemp = tempfile.mkdtemp - - # Make a `os.path.join` wrapper, which will be patched onto the real - # function, so the temporary directories can be tracked. - def wrapper_os_path_join(path, *paths): - join_result = real_os_path_join(path, *paths) - if len(paths) == 1 and paths[0] == 'backup': - threading_local.temp_dirs_supposed_to_be_removed.append(join_result) - return join_result - - # Likewise for `tempfile.mkdtemp`. - def wrapper_tempfile_mkdtemp(): - result = real_tempfile_mkdtemp() - threading_local.temp_dirs_supposed_to_be_removed.append(result) - return result - - # Now the two threads must sync here: if they are out of sync, one thread - # can go ahead and patch `os.path.join` while the other has not even - # assigned the real `os.path.join` to `real_os_path_join`. If this happened, - # the "real" `os.path.join` the slower thread would see is actually the - # wrapper of the other. - barrier.wait() - - # Note that `os.path.join` will respect the second patch (there are two - # patches because of the two threads). Both threads will refer to the same - # copy of `wrapper_os_path_join` because of the `barrier` preceding - # `model.fit()`. Likewise for `wrapper_tempfile_mkdtemp`. - os.path.join = wrapper_os_path_join - tempfile.mkdtemp = wrapper_tempfile_mkdtemp - - barrier.wait() - model.fit( - x=train_ds, - epochs=num_epoch, - steps_per_epoch=steps, - callbacks=callbacks_list) - - # Sync before un-patching to prevent either thread from accessing the real - # functions. Also to make sure `model.fit()` is done on both threads (so we - # can safely assert the directories are removed). - barrier.wait() - os.path.join = real_os_path_join - tempfile.mkdtemp = real_tempfile_mkdtemp - - # There should be directory (names) that are supposed to be removed. - test_obj.assertTrue(threading_local.temp_dirs_supposed_to_be_removed) - for temp_dir_supposed_to_be_removed in ( - threading_local.temp_dirs_supposed_to_be_removed): - # They should have been removed and thus don't exist. - test_obj.assertFalse(os.path.exists(temp_dir_supposed_to_be_removed)) - - # The actual testing methods go here. - test_chief_only_callback = generate_callback_test_function( - callableForTestChiefOnlyCallback.__func__) - test_model_checkpoint_saves_on_chief_but_not_otherwise = \ - generate_callback_test_function( - callableForTestModelCheckpointSavesOnChiefButNotOtherwise.__func__) - test_load_weight_from_model_checkpoint = generate_callback_test_function( - callableForTestLoadWeightFromModelCheckpoint.__func__) - test_model_restore_callback = generate_callback_test_function( - callableForTestModelRestoreCallback.__func__) - test_unmatched_model_file = generate_callback_test_function( - callableForTestUnmatchedModelFile.__func__) - test_reduce_lr_on_plateau = generate_callback_test_function( - callableForTestReduceLROnPlateau.__func__) - test_early_stopping = generate_callback_test_function( - callableForTestEarlyStopping.__func__) - test_learning_rate_scheduler = generate_callback_test_function( - callableForTestLearningRateScheduler.__func__) - test_intermediate_dir_for_ft_are_removed = generate_callback_test_function( - callableForTestIntermediateDirForFTAreRemoved.__func__) - test_backup_model_removed = generate_callback_test_function( - callableForTestBackupModelRemoved.__func__) - test_backup_model_not_removed_if_interrupted = \ - generate_callback_test_function( - callableForTestBackupModelNotRemovedIfInterrupted.__func__) - - -if __name__ == '__main__': - with test.mock.patch.object(sys, 'exit', os._exit): - test.main() diff --git a/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py b/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py index 0a9c5547f5a..1a46bcd7499 100644 --- a/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py +++ b/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py @@ -28,6 +28,8 @@ from tensorflow.python.distribute import collective_all_reduce_strategy from tensorflow.python.distribute import combinations from tensorflow.python.distribute import multi_process_runner from tensorflow.python.distribute import multi_worker_test_base +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import test_util from tensorflow.python.keras.datasets import mnist from tensorflow.python.keras.optimizer_v2 import gradient_descent from tensorflow.python.platform import test @@ -122,10 +124,11 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase): steps_per_epoch=70, callbacks=callbacks) - mpr_result = multi_process_runner.run( - proc_func, - multi_worker_test_base.create_cluster_spec(num_workers=num_workers), - list_stdout=True) + with test_util.skip_if_error(self, errors_impl.UnavailableError): + mpr_result = multi_process_runner.run( + proc_func, + multi_worker_test_base.create_cluster_spec(num_workers=num_workers), + list_stdout=True) def extract_accuracy(worker_id, input_string): match = re.match( diff --git a/tensorflow/python/keras/engine/BUILD b/tensorflow/python/keras/engine/BUILD index 203e481170f..1ff15d7e2e1 100644 --- a/tensorflow/python/keras/engine/BUILD +++ b/tensorflow/python/keras/engine/BUILD @@ -21,8 +21,8 @@ py_library( srcs = [ "__init__.py", "compile_utils.py", + "functional.py", "input_layer.py", - "network.py", "node.py", "partial_batch_padding_handler.py", "saving.py", @@ -460,9 +460,9 @@ tf_py_test( ) tf_py_test( - name = "network_test", + name = "functional_test", size = "medium", - srcs = ["network_test.py"], + srcs = ["functional_test.py"], python_version = "PY3", shard_count = 8, tags = [ diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index 210f56ae87a..f6fa17df5c2 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -1006,13 +1006,23 @@ class Layer(module.Module, version_utils.LayerVersionSelector): """Whether the layer is dynamic (eager-only); set in the constructor.""" # NOTE(taylorrobie): Currently self._dynamic is read-only. If that changes # then this cache logic must be updated. - return self._dynamic + return self._dynamic or any(layer.dynamic + for layer in self._unique_sublayers()) + + def _unique_sublayers(self): + # Model.layers will use this as implementation, but we can't expose this + # one as the public property since it might conflict with subclass layers + # which also have user defined layers property. + self._maybe_create_attribute('_layers', []) + return list( + trackable_layer_utils.filter_empty_layer_containers(self._layers)) @property @doc_controls.do_not_doc_inheritable @trackable_layer_utils.cache_recursive_attribute('stateful') def stateful(self): - return self._stateful + return self._stateful or any( + getattr(layer, 'stateful', False) for layer in self._unique_sublayers()) @stateful.setter @trackable_layer_utils.invalidate_recursive_cache('stateful') diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index 586efda4680..c5e00d8e38e 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -676,16 +676,8 @@ def enable_v2_dtype_behavior(): float32) instead of None. In addition, layers will automatically cast floating-point inputs to the layer's dtype. - >>> tf.compat.v1.keras.layers.disable_v2_dtype_behavior() >>> x = tf.ones((4, 4, 4, 4), dtype='float64') >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2) - >>> print(layer.dtype) # None since V2 behavior is disabled - None - >>> y = layer(x) # Doesn't cast inputs since V2 dtype behavior is disabled - >>> print(y.dtype.name) - float64 - >>> tf.compat.v1.keras.layers.enable_v2_dtype_behavior() - >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2) >>> print(layer.dtype) # float32 since V2 dtype behavior is enabled float32 >>> y = layer(x) # Layer casts inputs since V2 dtype behavior is enabled diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index 626892752c8..24d12ae4d59 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -833,13 +833,15 @@ class Layer(base_layer.Layer): def dynamic(self): # NOTE(taylorrobie): Currently self._dynamic is read-only. If that changes # then this cache logic must be updated. - return self._dynamic + return self._dynamic or any(layer.dynamic + for layer in self._unique_sublayers()) @property @doc_controls.do_not_generate_docs @trackable_layer_utils.cache_recursive_attribute('stateful') def stateful(self): - return self._stateful + return self._stateful or any( + getattr(layer, 'stateful', False) for layer in self._unique_sublayers()) @stateful.setter @trackable_layer_utils.invalidate_recursive_cache('stateful') diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/functional.py similarity index 58% rename from tensorflow/python/keras/engine/network.py rename to tensorflow/python/keras/engine/functional.py index 87d1953ace5..80eb6cb27d5 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/functional.py @@ -22,84 +22,46 @@ from __future__ import print_function import collections import copy import itertools -import json -import os -import six from six.moves import zip # pylint: disable=redefined-builtin -from tensorflow.python.eager import context from tensorflow.python.framework import composite_tensor -from tensorflow.python.framework import errors -from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape from tensorflow.python.keras import backend from tensorflow.python.keras.engine import base_layer from tensorflow.python.keras.engine import base_layer_utils -from tensorflow.python.keras.engine import compile_utils from tensorflow.python.keras.engine import input_layer as input_layer_module +from tensorflow.python.keras.engine import training as training_lib from tensorflow.python.keras.engine import training_utils -from tensorflow.python.keras.saving import hdf5_format -from tensorflow.python.keras.saving import save from tensorflow.python.keras.saving.saved_model import network_serialization from tensorflow.python.keras.utils import generic_utils -from tensorflow.python.keras.utils import layer_utils from tensorflow.python.keras.utils import tf_utils -from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite -from tensorflow.python.keras.utils.io_utils import path_to_string from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training import checkpoint_management -from tensorflow.python.training import py_checkpoint_reader from tensorflow.python.training.tracking import base as trackable -from tensorflow.python.training.tracking import data_structures -from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils -from tensorflow.python.training.tracking import tracking -from tensorflow.python.training.tracking import util as trackable_utils -from tensorflow.python.util import deprecation from tensorflow.python.util import nest -from tensorflow.python.util import serialization from tensorflow.python.util import tf_inspect -from tensorflow.tools.docs import doc_controls -# pylint: disable=g-import-not-at-top -try: - import h5py -except ImportError: - h5py = None +# pylint: disable=g-classes-have-attributes +class Functional(training_lib.Model): + """A `Functional` model is a `Model` defined as a directed graph of layers. -try: - import yaml -except ImportError: - yaml = None -# pylint: enable=g-import-not-at-top - - -class Network(base_layer.Layer): - """A `Network` is a composition of layers. - - `Network` is the topological form of a "model". A `Model` - is simply a `Network` with added training routines. - - Two types of `Networks` exist: Graph Networks and Subclass Networks. Graph - networks are used in the Keras Functional and Sequential APIs. Subclassed - networks are used when a user subclasses the `Model` class. In general, - more Keras features are supported with Graph Networks than with Subclassed - Networks, specifically: + Three types of `Model` exist: subclassed `Model`, `Functional` model, + and `Sequential` (a special case of `Functional`). + In general, more Keras features are supported with `Functional` + than with subclassed `Model`s, specifically: - Model cloning (`keras.models.clone`) - Serialization (`model.get_config()/from_config`, `model.to_json()/to_yaml()` - Whole-model saving (`model.save()`) - A Graph Network can be instantiated by passing two arguments to `__init__`. - The first argument is the `keras.Input` Tensors that represent the inputs - to the Network. The second argument specifies the output Tensors that - represent the outputs of this Network. Both arguments can be a nested - structure of Tensors. + A `Functional` model can be instantiated by passing two arguments to + `__init__`. The first argument is the `keras.Input` Tensors that represent + the inputs to the model. The second argument specifies the output + tensors that represent the outputs of this model. Both arguments can be a + nested structure of tensors. Example: @@ -107,10 +69,10 @@ class Network(base_layer.Layer): inputs = {'x1': keras.Input(shape=(10,)), 'x2': keras.Input(shape=(1,))} t = keras.layers.Dense(1, activation='relu')(inputs['x1']) outputs = keras.layers.Add()([t, inputs['x2']) - network = Network(inputs, outputs) + model = keras.Model(inputs, outputs) ``` - A Graph Network constructed using the Functional API can also include raw + A `Functional` model constructed using the Functional API can also include raw TensorFlow functions, with the exception of functions that create Variables or assign ops. @@ -120,38 +82,14 @@ class Network(base_layer.Layer): inputs = keras.Input(shape=(10,)) x = keras.layers.Dense(1)(inputs) outputs = tf.nn.relu(x) - network = Network(inputs, outputs) + model = keras.Model(inputs, outputs) ``` - Subclassed Networks can be instantiated via `name` and (optional) `dynamic` - keyword arguments. Subclassed Networks keep track of their Layers, and their - `call` method can be overridden. Subclassed Networks are typically created - indirectly, by subclassing the `Model` class. - - Example: - - ``` - class MyModel(keras.Model): - def __init__(self): - super(MyModel, self).__init__(name='my_model', dynamic=False) - - self.layer1 = keras.layers.Dense(10, activation='relu') - - def call(self, inputs): - return self.layer1(inputs) - ``` - - Allowed args in `super().__init__`: - name: String name of the model. - dynamic: (Subclassed models only) Set this to `True` if your model should - only be run eagerly, and should not be used to generate a static - computation graph. This attribute is automatically set for Functional API - models. + Arguments: + inputs: List of input tensors (must be created via `tf.keras.Input()`). + outputs: List of outputs tensors. + name: String, optional. Name of the model. trainable: Boolean, whether the model's variables should be trainable. - dtype: (Subclassed models only) Default dtype of the model's weights ( - default of `None` means use the type of the first input). This attribute - has no effect on Functional API models, which do not have weights of their - own. """ # See tf.Module for the usage of this property. @@ -160,79 +98,31 @@ class Network(base_layer.Layer): _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain( ('_layer_call_argspecs', '_compiled_trainable_state', '_output_mask_cache', '_output_tensor_cache', '_output_shape_cache'), - base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES + training_lib.Model._TF_MODULE_IGNORED_PROPERTIES )) - def __init__(self, *args, **kwargs): # pylint: disable=super-init-not-called - # Signature detection - if (len(args) == 2 or - len(args) == 1 and 'outputs' in kwargs or - 'inputs' in kwargs and 'outputs' in kwargs): - # Graph network - self._init_graph_network(*args, **kwargs) - else: - # Subclassed network - self._init_subclassed_network(**kwargs) - - tf_utils.assert_no_legacy_layers(self.layers) - - # Several Network methods have "no_automatic_dependency_tracking" - # annotations. Since Network does automatic dependency tracking on attribute - # assignment, including for common data structures such as lists, by default - # we'd have quite a few empty dependencies which users don't care about (or - # would need some way to ignore dependencies automatically, which is confusing - # when applied to user code). Some attributes, such as _layers, would cause - # structural issues (_layers being the place where Layers assigned to tracked - # attributes are stored). - # - # Aside from these aesthetic and structural issues, useless dependencies on - # empty lists shouldn't cause issues; adding or removing them will not break - # checkpoints, but may cause "all Python objects matched" assertions to fail - # (in which case less strict assertions may be substituted if necessary). @trackable.no_automatic_dependency_tracking - def _base_init(self, **kwargs): - # The following are implemented as property functions: - # self.trainable_weights - # self.non_trainable_weights - # self.input_spec - # self.losses - # self.updates - - generic_utils.validate_kwargs(kwargs, {'trainable', 'dtype', 'dynamic', - 'name', 'autocast'}) - - super(Network, self).__init__(**kwargs) - - self.input_names = None - self.output_names = None - self._saved_model_inputs_spec = None - - # This is True for Sequential networks and Functional networks. - self._compute_output_and_mask_jointly = False - - # Don't reset compilation if already done. This may occur if calling - # `__init__` (or `_init_graph_network`) on an already-compiled model - # such as a Sequential model. Sequential models may need to rebuild - # themselves after compilation. - self._maybe_create_attribute('_is_compiled', False) - self._maybe_create_attribute('optimizer', None) - - self._trackable_saver = ( - trackable_utils.saver_with_op_caching(self)) + def __init__(self, inputs=None, outputs=None, name=None, trainable=True): + # generic_utils.validate_kwargs( + # kwargs, {'name', 'trainable'}, + # 'Functional models may only specify `name` and `trainable` keyword ' + # 'arguments during initialization. Got an unexpected argument:') + super(Functional, self).__init__(name=name, trainable=trainable) + self._init_graph_network(inputs, outputs) @trackable.no_automatic_dependency_tracking - def _init_graph_network(self, inputs, outputs, **kwargs): - generic_utils.validate_kwargs( - kwargs, {'name', 'trainable'}, - 'Functional models may only specify `name` and `trainable` keyword ' - 'arguments during initialization. Got an unexpected argument:') + def _init_graph_network(self, inputs, outputs): + # This method is needed for Sequential to reinitialize graph network when + # layer is added or removed. + self._is_graph_network = True + # Normalize and set self.inputs, self.outputs. if isinstance(inputs, list) and len(nest.flatten(inputs)) == 1: inputs = inputs[0] if isinstance(outputs, list) and len(nest.flatten(outputs)) == 1: outputs = outputs[0] - self._nested_outputs = outputs self._nested_inputs = inputs + self._nested_outputs = outputs self.inputs = nest.flatten(inputs) self.outputs = nest.flatten(outputs) @@ -247,7 +137,6 @@ class Network(base_layer.Layer): if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs): base_layer_utils.create_keras_history(self._nested_outputs) - self._base_init(**kwargs) self._validate_graph_inputs_and_outputs() # A Network does not create weights of its own, thus it is already @@ -255,7 +144,6 @@ class Network(base_layer.Layer): self.built = True self._build_input_shape = nest.map_structure(lambda x: x.shape, inputs) self._compute_output_and_mask_jointly = True - self._is_graph_network = True # `_expects_training_arg` is True since the `training` argument is always # present in the signature of the `call` method of a graph network. self._expects_training_arg = True @@ -325,6 +213,7 @@ class Network(base_layer.Layer): self._compute_tensor_usage_count() self._set_save_spec(self._nested_inputs) + tf_utils.assert_no_legacy_layers(self.layers) @property def input(self): @@ -340,9 +229,7 @@ class Network(base_layer.Layer): RuntimeError: If called in Eager mode. AttributeError: If no inbound nodes are found. """ - if self._is_graph_network: - return self._nested_inputs - return super(Network, self).input + return self._nested_inputs @property def input_shape(self): @@ -360,9 +247,7 @@ class Network(base_layer.Layer): AttributeError: if the layer has no defined input_shape. RuntimeError: if called in Eager mode. """ - if self._is_graph_network: - return nest.map_structure(backend.int_shape, self.input) - return super(Network, self).input_shape + return nest.map_structure(backend.int_shape, self.input) @property def output(self): @@ -379,9 +264,7 @@ class Network(base_layer.Layer): layers. RuntimeError: if called in Eager mode. """ - if self._is_graph_network: - return self._nested_outputs - return super(Network, self).output + return self._nested_outputs @property def output_shape(self): @@ -398,9 +281,7 @@ class Network(base_layer.Layer): AttributeError: if the layer has no defined output shape. RuntimeError: if called in Eager mode. """ - if self._is_graph_network: - return nest.map_structure(backend.int_shape, self.output) - return super(Network, self).output_shape + return nest.map_structure(backend.int_shape, self.output) def _set_output_names(self): """Assigns unique names to the Network's outputs. @@ -421,29 +302,9 @@ class Network(base_layer.Layer): uniquified.append(proposal) self.output_names = uniquified - @trackable.no_automatic_dependency_tracking - def _init_subclassed_network(self, **kwargs): - self._base_init(**kwargs) - self._is_graph_network = False - self.inputs = None - self.outputs = None - - @property - @trackable_layer_utils.cache_recursive_attribute('dynamic') - def dynamic(self): - if self._is_graph_network: - return any(layer.dynamic for layer in self.layers) - return self._dynamic or any(layer.dynamic for layer in self.layers) - @property def _layer_checkpoint_dependencies(self): """Dictionary of layer dependencies to be included in the checkpoint.""" - # Use getattr because this function can be called from __setattr__, at which - # point the _is_graph_network attribute has not been created. - if (not getattr(self, '_is_graph_network', False) and - base_layer_utils.is_subclassed(self)): - return {} # Only add layer dependencies for graph networks - weight_layer_index = 0 dependencies = collections.OrderedDict() @@ -470,14 +331,14 @@ class Network(base_layer.Layer): dependencies = [ trackable.TrackableReference(name=name, ref=layer) for name, layer in self._layer_checkpoint_dependencies.items()] - dependencies.extend(super(Network, self)._checkpoint_dependencies) + dependencies.extend(super(Functional, self)._checkpoint_dependencies) return dependencies def _lookup_dependency(self, name): layer_dependencies = self._layer_checkpoint_dependencies if name in layer_dependencies: return layer_dependencies[name] - return super(Network, self)._lookup_dependency(name) + return super(Functional, self)._lookup_dependency(name) def _handle_deferred_layer_dependencies(self, layers): """Handles layer checkpoint dependencies that are added after init.""" @@ -488,263 +349,17 @@ class Network(base_layer.Layer): self._handle_deferred_dependencies(name=layer_to_name[layer], trackable=layer) - def __setattr__(self, name, value): - if not getattr(self, '_self_setattr_tracking', True): - super(Network, self).__setattr__(name, value) - return - - if all( - isinstance(v, (base_layer.Layer, - data_structures.TrackableDataStructure)) or - trackable_layer_utils.has_weights(v) for v in nest.flatten(value)): - try: - self._is_graph_network - except AttributeError: - # six.raise_from supresses the original AttributeError from being raised - six.raise_from( - RuntimeError('It looks like you are subclassing `Model` and you ' - 'forgot to call `super(YourClass, self).__init__()`.' - ' Always start with this line.'), None) - - super(Network, self).__setattr__(name, value) - - # Keep track of metric instance created in subclassed model/layer. - # We do this so that we can maintain the correct order of metrics by adding - # the instance to the `metrics` list as soon as it is created. - from tensorflow.python.keras import metrics as metrics_module # pylint: disable=g-import-not-at-top - if isinstance(value, metrics_module.Metric): - self._metrics.append(value) - @property - @trackable_layer_utils.cache_recursive_attribute('stateful') - def stateful(self): - return any(getattr(layer, 'stateful', False) for layer in self.layers) - - def reset_states(self): - for layer in self.layers: - if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False): - layer.reset_states() - - @property - @deprecation.deprecated( - date=None, - instructions='This property should not be used in TensorFlow 2.0, ' - 'as updates are applied automatically.') - @doc_controls.do_not_generate_docs - def state_updates(self): - """Deprecated, do NOT use! - - Returns the `updates` from all layers that are stateful. - - This is useful for separating training updates and - state updates, e.g. when we need to update a layer's internal state - during prediction. - - Returns: - A list of update ops. - """ - state_updates = [] - for layer in self.layers: - if getattr(layer, 'stateful', False): - if hasattr(layer, 'updates'): - state_updates += layer.updates - return state_updates - - @property - def weights(self): - """Returns the list of all layer variables/weights. - - Returns: - A list of variables. - """ - return self._dedup_weights(self._undeduplicated_weights) - - @property - def _undeduplicated_weights(self): - """Returns the undeduplicated list of all layer variables/weights.""" - self._assert_weights_created() - weights = [] - for layer in self._layers: - weights += layer.weights - weights += (self._trainable_weights + self._non_trainable_weights) - return weights - - @property - @tracking.cached_per_instance def _should_compute_mask(self): - return self._is_graph_network and super(Network, self)._should_compute_mask + return True def compute_mask(self, inputs, mask): - if not self._is_graph_network: - return None - # TODO(omalleyt): b/123540974 This function is not really safe to call # by itself because it will duplicate any updates and losses in graph # mode by `call`ing the Layers again. output_tensors = self._run_internal_graph(inputs, mask=mask) return nest.map_structure(lambda t: t._keras_mask, output_tensors) - @property - def layers(self): - return list( - trackable_layer_utils.filter_empty_layer_containers(self._layers)) - - def get_layer(self, name=None, index=None): - """Retrieves a layer based on either its name (unique) or index. - - If `name` and `index` are both provided, `index` will take precedence. - Indices are based on order of horizontal graph traversal (bottom-up). - - Arguments: - name: String, name of layer. - index: Integer, index of layer. - - Returns: - A layer instance. - - Raises: - ValueError: In case of invalid layer name or index. - """ - # TODO(fchollet): We could build a dictionary based on layer names - # since they are constant, but we have not done that yet. - if index is not None and name is not None: - raise ValueError('Provide only a layer name or a layer index.') - - if index is not None: - if len(self.layers) <= index: - raise ValueError('Was asked to retrieve layer at index ' + str(index) + - ' but model only has ' + str(len(self.layers)) + - ' layers.') - else: - return self.layers[index] - - if name is not None: - for layer in self.layers: - if layer.name == name: - return layer - raise ValueError('No such layer: ' + name + '.') - raise ValueError('Provide either a layer name or layer index.') - - @property - def trainable_weights(self): - self._assert_weights_created() - return self._dedup_weights( - trackable_layer_utils.gather_trainable_weights( - trainable=self.trainable, - sub_layers=self._layers, - extra_variables=self._trainable_weights)) - - @property - def non_trainable_weights(self): - self._assert_weights_created() - return self._dedup_weights( - trackable_layer_utils.gather_non_trainable_weights( - trainable=self.trainable, - sub_layers=self._layers, - extra_variables=self._non_trainable_weights + - self._trainable_weights)) - - @generic_utils.default - def build(self, input_shape): - """Builds the model based on input shapes received. - - This is to be used for subclassed models, which do not know at instantiation - time what their inputs look like. - - This method only exists for users who want to call `model.build()` in a - standalone way (as a substitute for calling the model on real data to - build it). It will never be called by the framework (and thus it will - never throw unexpected errors in an unrelated workflow). - - Args: - input_shape: Single tuple, TensorShape, or list of shapes, where shapes - are tuples, integers, or TensorShapes. - - Raises: - ValueError: - 1. In case of invalid user-provided data (not of type tuple, - list, or TensorShape). - 2. If the model requires call arguments that are agnostic - to the input shapes (positional or kwarg in call signature). - 3. If not all layers were properly built. - 4. If float type inputs are not supported within the layers. - - In each of these cases, the user should build their model by calling it - on real tensor data. - """ - if self._is_graph_network: - super(Network, self).build(input_shape) - return - - # If subclass network - if input_shape is None: - raise ValueError('Input shape must be defined when calling build on a ' - 'model subclass network.') - valid_types = (tuple, list, tensor_shape.TensorShape) - if not isinstance(input_shape, valid_types): - raise ValueError('Specified input shape is not one of the valid types. ' - 'Please specify a batch input shape of type tuple or ' - 'list of input shapes. User provided ' - 'input type: {}'.format(type(input_shape))) - - if input_shape and not self.inputs: - # We create placeholders for the `None`s in the shape and build the model - # in a Graph. Since tf.Variable is compatible with both eager execution - # and graph building, the variables created after building the model in - # a Graph are still valid when executing eagerly. - if context.executing_eagerly(): - graph = func_graph.FuncGraph('build_graph') - else: - graph = backend.get_graph() - with graph.as_default(): - if isinstance(input_shape, list): - x = [base_layer_utils.generate_placeholders_from_shape(shape) - for shape in input_shape] - elif isinstance(input_shape, dict): - x = { - k: base_layer_utils.generate_placeholders_from_shape(shape) - for k, shape in input_shape.items() - } - else: - x = base_layer_utils.generate_placeholders_from_shape(input_shape) - - kwargs = {} - call_signature = self._call_full_argspec - call_args = call_signature.args - # Exclude `self`, `inputs`, and any argument with a default value. - if len(call_args) > 2: - if call_signature.defaults: - call_args = call_args[2:-len(call_signature.defaults)] - else: - call_args = call_args[2:] - for arg in call_args: - if arg == 'training': - # Case where `training` is a positional arg with no default. - kwargs['training'] = False - else: - # Has invalid call signature with unknown positional arguments. - raise ValueError( - 'Currently, you cannot build your model if it has ' - 'positional or keyword arguments that are not ' - 'inputs to the model, but are required for its ' - '`call` method. Instead, in order to instantiate ' - 'and build your model, `call` your model on real ' - 'tensor data with all expected call arguments.') - elif len(call_args) < 2: - # Signature without `inputs`. - raise ValueError('You can only call `build` on a model if its `call` ' - 'method accepts an `inputs` argument.') - try: - self.call(x, **kwargs) - except (errors.InvalidArgumentError, TypeError): - raise ValueError('You cannot build your model by calling `build` ' - 'if your layers do not support float type inputs. ' - 'Instead, in order to instantiate and build your ' - 'model, `call` your model on real tensor data (of ' - 'the correct dtype).') - - super(Network, self).build(input_shape) - def call(self, inputs, training=None, mask=None): """Calls the model on new inputs. @@ -763,17 +378,10 @@ class Network(base_layer.Layer): A tensor if there is a single output, or a list of tensors if there are more than one outputs. """ - if not self._is_graph_network: - raise NotImplementedError('When subclassing the `Model` class, you should' - ' implement a `call` method.') - return self._run_internal_graph( inputs, training=training, mask=mask) def compute_output_shape(self, input_shape): - if not self._is_graph_network: - return super(Network, self).compute_output_shape(input_shape) - # Convert any shapes in tuple format to TensorShapes. input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) @@ -975,8 +583,6 @@ class Network(base_layer.Layer): return tensor def get_config(self): - if not self._is_graph_network: - raise NotImplementedError return copy.deepcopy(get_network_config(self)) @classmethod @@ -1002,373 +608,6 @@ class Network(base_layer.Layer): connect_ancillary_layers(model, created_layers) return model - def save(self, - filepath, - overwrite=True, - include_optimizer=True, - save_format=None, - signatures=None, - options=None): - """Saves the model to Tensorflow SavedModel or a single HDF5 file. - - The savefile includes: - - - The model architecture, allowing to re-instantiate the model. - - The model weights. - - The state of the optimizer, allowing to resume training - exactly where you left off. - - This allows you to save the entirety of the state of a model - in a single file. - - Saved models can be reinstantiated via `keras.models.load_model`. - The model returned by `load_model` is a compiled model ready to be used - (unless the saved model was never compiled in the first place). - - Models built with the Sequential and Functional API can be saved to both the - HDF5 and SavedModel formats. Subclassed models can only be saved with the - SavedModel format. - - Note that the model weights may have different scoped names after being - loaded. Scoped names include the model/layer names, such as - `"dense_1/kernel:0"`. It is recommended that you use the layer properties to - access specific variables, e.g. `model.get_layer("dense_1").kernel`. - - Arguments: - filepath: String, PathLike, path to SavedModel or H5 file to save the - model. - overwrite: Whether to silently overwrite any existing file at the - target location, or provide the user with a manual prompt. - include_optimizer: If True, save optimizer's state together. - save_format: Either `'tf'` or `'h5'`, indicating whether to save the - model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, - and 'h5' in TF 1.X. - signatures: Signatures to save with the SavedModel. Applicable to the - 'tf' format only. Please see the `signatures` argument in - `tf.saved_model.save` for details. - options: Optional `tf.saved_model.SaveOptions` object that specifies - options for saving to SavedModel. - - Example: - - ```python - from keras.models import load_model - - model.save('my_model.h5') # creates a HDF5 file 'my_model.h5' - del model # deletes the existing model - - # returns a compiled model - # identical to the previous one - model = load_model('my_model.h5') - ``` - """ - save.save_model(self, filepath, overwrite, include_optimizer, save_format, - signatures, options) - - def save_weights(self, filepath, overwrite=True, save_format=None): - """Saves all layer weights. - - Either saves in HDF5 or in TensorFlow format based on the `save_format` - argument. - - When saving in HDF5 format, the weight file has: - - `layer_names` (attribute), a list of strings - (ordered names of model layers). - - For every layer, a `group` named `layer.name` - - For every such layer group, a group attribute `weight_names`, - a list of strings - (ordered names of weights tensor of the layer). - - For every weight in the layer, a dataset - storing the weight value, named after the weight tensor. - - When saving in TensorFlow format, all objects referenced by the network are - saved in the same format as `tf.train.Checkpoint`, including any `Layer` - instances or `Optimizer` instances assigned to object attributes. For - networks constructed from inputs and outputs using `tf.keras.Model(inputs, - outputs)`, `Layer` instances used by the network are tracked/saved - automatically. For user-defined classes which inherit from `tf.keras.Model`, - `Layer` instances must be assigned to object attributes, typically in the - constructor. See the documentation of `tf.train.Checkpoint` and - `tf.keras.Model` for details. - - While the formats are the same, do not mix `save_weights` and - `tf.train.Checkpoint`. Checkpoints saved by `Model.save_weights` should be - loaded using `Model.load_weights`. Checkpoints saved using - `tf.train.Checkpoint.save` should be restored using the corresponding - `tf.train.Checkpoint.restore`. Prefer `tf.train.Checkpoint` over - `save_weights` for training checkpoints. - - The TensorFlow format matches objects and variables by starting at a root - object, `self` for `save_weights`, and greedily matching attribute - names. For `Model.save` this is the `Model`, and for `Checkpoint.save` this - is the `Checkpoint` even if the `Checkpoint` has a model attached. This - means saving a `tf.keras.Model` using `save_weights` and loading into a - `tf.train.Checkpoint` with a `Model` attached (or vice versa) will not match - the `Model`'s variables. See the [guide to training - checkpoints](https://www.tensorflow.org/guide/checkpoint) for details - on the TensorFlow format. - - Arguments: - filepath: String or PathLike, path to the file to save the weights to. - When saving in TensorFlow format, this is the prefix used for - checkpoint files (multiple files are generated). Note that the '.h5' - suffix causes weights to be saved in HDF5 format. - overwrite: Whether to silently overwrite any existing file at the - target location, or provide the user with a manual prompt. - save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or - '.keras' will default to HDF5 if `save_format` is `None`. Otherwise - `None` defaults to 'tf'. - - Raises: - ImportError: If h5py is not available when attempting to save in HDF5 - format. - ValueError: For invalid/unknown format arguments. - """ - self._assert_weights_created() - filepath = path_to_string(filepath) - filepath_is_h5 = _is_hdf5_filepath(filepath) - if save_format is None: - if filepath_is_h5: - save_format = 'h5' - else: - save_format = 'tf' - else: - user_format = save_format.lower().strip() - if user_format in ('tensorflow', 'tf'): - save_format = 'tf' - elif user_format in ('hdf5', 'h5', 'keras'): - save_format = 'h5' - else: - raise ValueError( - 'Unknown format "%s". Was expecting one of {"tf", "h5"}.' % ( - save_format,)) - if save_format == 'tf' and filepath_is_h5: - raise ValueError( - ('save_weights got save_format="tf"/"tensorflow", but the ' - 'filepath ("%s") looks like an HDF5 file. Omit the ".h5"/".keras" ' - 'when saving in TensorFlow format.') - % filepath) - - if save_format == 'h5' and h5py is None: - raise ImportError( - '`save_weights` requires h5py when saving in hdf5.') - if save_format == 'tf': - check_filepath = filepath + '.index' - else: - check_filepath = filepath - # If file exists and should not be overwritten: - if not overwrite and os.path.isfile(check_filepath): - proceed = ask_to_proceed_with_overwrite(check_filepath) - if not proceed: - return - if save_format == 'h5': - with h5py.File(filepath, 'w') as f: - hdf5_format.save_weights_to_hdf5_group(f, self.layers) - else: - if context.executing_eagerly(): - session = None - else: - session = backend.get_session() - optimizer = getattr(self, 'optimizer', None) - if (optimizer - and not isinstance(optimizer, trackable.Trackable)): - logging.warning( - ('This model was compiled with a Keras optimizer (%s) but is being ' - 'saved in TensorFlow format with `save_weights`. The model\'s ' - 'weights will be saved, but unlike with TensorFlow optimizers in ' - 'the TensorFlow format the optimizer\'s state will not be ' - 'saved.\n\nConsider using a TensorFlow optimizer from `tf.train`.') - % (optimizer,)) - self._trackable_saver.save(filepath, session=session) - # Record this checkpoint so it's visible from tf.train.latest_checkpoint. - checkpoint_management.update_checkpoint_state_internal( - save_dir=os.path.dirname(filepath), - model_checkpoint_path=filepath, - save_relative_paths=True, - all_model_checkpoint_paths=[filepath]) - - def load_weights(self, filepath, by_name=False, skip_mismatch=False): - """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. - - If `by_name` is False weights are loaded based on the network's - topology. This means the architecture should be the same as when the weights - were saved. Note that layers that don't have weights are not taken into - account in the topological ordering, so adding or removing layers is fine as - long as they don't have weights. - - If `by_name` is True, weights are loaded into layers only if they share the - same name. This is useful for fine-tuning or transfer-learning models where - some of the layers have changed. - - Only topological loading (`by_name=False`) is supported when loading weights - from the TensorFlow format. Note that topological loading differs slightly - between TensorFlow and HDF5 formats for user-defined classes inheriting from - `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the - TensorFlow format loads based on the object-local names of attributes to - which layers are assigned in the `Model`'s constructor. - - Arguments: - filepath: String or PathLike, path to the weights file to load. For - weight files in TensorFlow format, this is the file prefix (the - same as was passed to `save_weights`). - by_name: Boolean, whether to load weights by name or by topological - order. Only topological loading is supported for weight files in - TensorFlow format. - skip_mismatch: Boolean, whether to skip loading of layers where there is - a mismatch in the number of weights, or a mismatch in the shape of - the weight (only valid when `by_name=True`). - - Returns: - When loading a weight file in TensorFlow format, returns the same status - object as `tf.train.Checkpoint.restore`. When graph building, restore - ops are run automatically as soon as the network is built (on first call - for user-defined classes inheriting from `Model`, immediately if it is - already built). - - When loading weights in HDF5 format, returns `None`. - - Raises: - ImportError: If h5py is not available and the weight file is in HDF5 - format. - ValueError: If `skip_mismatch` is set to `True` when `by_name` is - `False`. - """ - - if skip_mismatch and not by_name: - raise ValueError( - 'When calling model.load_weights, skip_mismatch can only be set to ' - 'True when by_name is True.') - - filepath = path_to_string(filepath) - if _is_hdf5_filepath(filepath): - save_format = 'h5' - else: - try: - py_checkpoint_reader.NewCheckpointReader(filepath) - save_format = 'tf' - except errors_impl.DataLossError: - # The checkpoint is not readable in TensorFlow format. Try HDF5. - save_format = 'h5' - if save_format == 'tf': - status = self._trackable_saver.restore(filepath) - if by_name: - raise NotImplementedError( - 'Weights may only be loaded based on topology into Models when ' - 'loading TensorFlow-formatted weights (got by_name=True to ' - 'load_weights).') - if not context.executing_eagerly(): - session = backend.get_session() - # Restore existing variables (if any) immediately, and set up a - # streaming restore for any variables created in the future. - trackable_utils.streaming_restore(status=status, session=session) - status.assert_nontrivial_match() - return status - if h5py is None: - raise ImportError( - '`load_weights` requires h5py when loading weights from HDF5.') - if self._is_graph_network and not self.built: - raise NotImplementedError( - 'Unable to load weights saved in HDF5 format into a subclassed ' - 'Model which has not created its variables yet. Call the Model ' - 'first, then load the weights.') - self._assert_weights_created() - with h5py.File(filepath, 'r') as f: - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - if by_name: - hdf5_format.load_weights_from_hdf5_group_by_name( - f, self.layers, skip_mismatch=skip_mismatch) - else: - hdf5_format.load_weights_from_hdf5_group(f, self.layers) - - def _updated_config(self): - """Util shared between different serialization methods. - - Returns: - Model config with Keras version information added. - """ - from tensorflow.python.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top - - config = self.get_config() - model_config = { - 'class_name': self.__class__.__name__, - 'config': config, - 'keras_version': keras_version, - 'backend': backend.backend() - } - return model_config - - def to_json(self, **kwargs): - """Returns a JSON string containing the network configuration. - - To load a network from a JSON save file, use - `keras.models.model_from_json(json_string, custom_objects={})`. - - Arguments: - **kwargs: Additional keyword arguments - to be passed to `json.dumps()`. - - Returns: - A JSON string. - """ - model_config = self._updated_config() - return json.dumps( - model_config, default=serialization.get_json_type, **kwargs) - - def to_yaml(self, **kwargs): - """Returns a yaml string containing the network configuration. - - To load a network from a yaml save file, use - `keras.models.model_from_yaml(yaml_string, custom_objects={})`. - - `custom_objects` should be a dictionary mapping - the names of custom losses / layers / etc to the corresponding - functions / classes. - - Arguments: - **kwargs: Additional keyword arguments - to be passed to `yaml.dump()`. - - Returns: - A YAML string. - - Raises: - ImportError: if yaml module is not found. - """ - if yaml is None: - raise ImportError( - 'Requires yaml module installed (`pip install pyyaml`).') - return yaml.dump(self._updated_config(), **kwargs) - - def summary(self, line_length=None, positions=None, print_fn=None): - """Prints a string summary of the network. - - Arguments: - line_length: Total length of printed lines - (e.g. set this to adapt the display to different - terminal window sizes). - positions: Relative or absolute positions of log elements - in each line. If not provided, - defaults to `[.33, .55, .67, 1.]`. - print_fn: Print function to use. Defaults to `print`. - It will be called on each line of the summary. - You can set it to a custom function - in order to capture the string summary. - - Raises: - ValueError: if `summary()` is called before the model is built. - """ - if not self.built: - raise ValueError('This model has not yet been built. ' - 'Build the model first by calling `build()` or calling ' - '`fit()` with some data, or specify ' - 'an `input_shape` argument in the first layer(s) for ' - 'automatic build.') - layer_utils.print_summary(self, - line_length=line_length, - positions=positions, - print_fn=print_fn) - def _validate_graph_inputs_and_outputs(self): """Validates the inputs and outputs of a Graph Network.""" # Check for redundancy in inputs. @@ -1542,30 +781,9 @@ class Network(base_layer.Layer): self._tensor_usage_count = tensor_usage_count def _assert_weights_created(self): - """Asserts that all the weights for the network have been created. - - For a non-dynamic network, the weights must already be created after the - layer has been called. For a dynamic network, the exact list of weights can - never be known for certain since it may change at any time during execution. - - We run this check right before accessing weights or getting the Numpy value - for the current weights. Otherwise, if the layer has never been called, - the user would just get an empty list, which is misleading. - - Raises: - ValueError: if the weights of the network has not yet been created. - """ - if self.dynamic: - return - if (not self._is_graph_network and - 'build' in self.__class__.__dict__ and - not self.built): - # For any model that has customized build() method but hasn't - # been invoked yet, this will cover both sequential and subclass model. - raise ValueError('Weights for model %s have not yet been created. ' - 'Weights are created when the Model is first called on ' - 'inputs or `build()` is called with an `input_shape`.' % - self.name) + # Override the implementation in Model. + # The Functional model should always have weight created already. + return def _graph_network_add_loss(self, symbolic_loss): new_nodes, new_layers = _map_subgraph_network(self.inputs, [symbolic_loss]) @@ -1587,42 +805,11 @@ class Network(base_layer.Layer): new_layers.append(add_metric_layer) self._insert_layers(new_layers, new_nodes) - @trackable.no_automatic_dependency_tracking - def _set_save_spec(self, inputs): - if self._saved_model_inputs_spec is not None: - return # Already set. - - input_names = self.input_names - if not input_names: - input_names = compile_utils.create_pseudo_input_names(inputs) - - flat_inputs = nest.flatten(inputs) - specs = [] - for name, tensor in zip(input_names, flat_inputs): - specs.append( - tf_utils.get_tensor_spec(tensor, dynamic_batch=False, name=name)) - specs = nest.pack_sequence_as(inputs, specs) - - self._saved_model_inputs_spec = specs - - def _get_save_spec(self, dynamic_batch=True): - if self._saved_model_inputs_spec is None: - return None - - return nest.map_structure( - lambda t: tf_utils.get_tensor_spec(t, dynamic_batch=dynamic_batch), - self._saved_model_inputs_spec) - @property def _trackable_saved_model_saver(self): return network_serialization.NetworkSavedModelSaver(self) -def _is_hdf5_filepath(filepath): - return (filepath.endswith('.h5') or filepath.endswith('.keras') or - filepath.endswith('.hdf5')) - - def _make_node_key(layer_name, node_index): return layer_name + '_ib-' + str(node_index) @@ -1830,7 +1017,7 @@ def _map_subgraph_network(inputs, outputs): def _should_skip_first_node(layer): """Returns True if the first layer node should not be saved or loaded.""" # Networks start with a pre-existing node linking their input to output. - return issubclass(layer.__class__, Network) and layer._is_graph_network + return isinstance(layer, Functional) def _deserialize_keras_tensors(kwargs, layer_map): diff --git a/tensorflow/python/keras/engine/network_test.py b/tensorflow/python/keras/engine/functional_test.py similarity index 97% rename from tensorflow/python/keras/engine/network_test.py rename to tensorflow/python/keras/engine/functional_test.py index b4e8adf2c49..90fc9f2697f 100644 --- a/tensorflow/python/keras/engine/network_test.py +++ b/tensorflow/python/keras/engine/functional_test.py @@ -33,8 +33,8 @@ from tensorflow.python.keras import layers from tensorflow.python.keras import models from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine import base_layer +from tensorflow.python.keras.engine import functional from tensorflow.python.keras.engine import input_layer as input_layer_lib -from tensorflow.python.keras.engine import network as network_lib from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training as training_lib from tensorflow.python.keras.utils import layer_utils @@ -89,7 +89,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): self.assertEqual(len(layer.updates), 3) - network = network_lib.Network(x2, y2) + network = functional.Functional(x2, y2) self.assertEqual(len(network.updates), 3) x3 = input_layer_lib.Input(shape=(1,)) @@ -120,7 +120,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): dense_a = layers.Dense(4, name='dense_a') dense_b = layers.Dense(2, name='dense_b') y = dense_b(dense_a(x)) - network = network_lib.Network(x, y, name='dense_network') + network = functional.Functional(x, y, name='dense_network') # test various get_layer by index self.assertEqual(network.get_layer(index=1), dense_a) @@ -251,7 +251,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): x = input_layer_lib.Input(shape=(32,)) dense = layers.Dense(2) y = dense(x) - network = network_lib.Network(x, y, name='dense_network') + network = functional.Functional(x, y, name='dense_network') # test basic attributes self.assertEqual(network.name, 'dense_network') @@ -740,7 +740,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): else: x = input_layer_lib.Input(shape=(32,)) y = MaskedLayer()(x) # pylint: disable=not-callable - network = network_lib.Network(x, y) + network = functional.Functional(x, y) # test callability on Input x_2 = input_layer_lib.Input(shape=(32,)) @@ -1102,7 +1102,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): def test_subclassed_error_if_init_not_called(self): - class MyNetwork(network_lib.Network): + class MyNetwork(training_lib.Model): def __init__(self): self._foo = [layers.Dense(10), layers.Dense(10)] @@ -1124,10 +1124,12 @@ class NetworkConstructionTest(keras_parameterized.TestCase): inputs = input_layer_lib.Input(shape=(32,)) outputs = layers.Dense(4)(inputs) - with self.assertRaisesRegexp(TypeError, 'unexpected argument'): + with self.assertRaisesRegexp(TypeError, + 'got an unexpected keyword argument'): model = training_lib.Model( inputs, outputs, name='m', trainable=False, dtype='int64') - with self.assertRaisesRegexp(TypeError, 'unexpected argument'): + with self.assertRaisesRegexp(TypeError, + 'got an unexpected keyword argument'): model = training_lib.Model( inputs, outputs, name='m', trainable=False, dynamic=False) @@ -1136,8 +1138,10 @@ class NetworkConstructionTest(keras_parameterized.TestCase): self.assertFalse(model.trainable) self.assertFalse(model.dynamic) + class SubclassModel(training_lib.Model): + pass # Subclassed model - model = training_lib.Model( + model = SubclassModel( name='subclassed', trainable=True, dtype='int64', dynamic=True) self.assertEqual('subclassed', model.name) self.assertTrue(model.dynamic) @@ -1150,9 +1154,9 @@ class NetworkConstructionTest(keras_parameterized.TestCase): input_tensor2 = input_layer_lib.Input(shape=[10], name='b') output_tensor1 = layers.Dense(units=10)(input_tensor1) - net = network_lib.Network( + net = functional.Functional( inputs=[input_tensor1, input_tensor2], outputs=[output_tensor1]) - net2 = network_lib.Network.from_config(net.get_config()) + net2 = functional.Functional.from_config(net.get_config()) self.assertLen(net2.inputs, 2) self.assertEqual('a', net2.layers[0].name) self.assertEqual('b', net2.layers[1].name) @@ -1180,8 +1184,8 @@ class DeferredModeTest(keras_parameterized.TestCase): self.assertEqual(x.shape.as_list(), [None, 2]) outputs = layers.Dense(4)(x) - network = network_lib.Network(inputs, outputs) - self.assertIsInstance(network, network_lib.Network) + network = functional.Functional(inputs, outputs) + self.assertIsInstance(network, functional.Functional) if context.executing_eagerly(): # It should be possible to call such a network on EagerTensors. @@ -1204,7 +1208,7 @@ class DeferredModeTest(keras_parameterized.TestCase): c = AddLayer()([a, input_b]) # pylint: disable=not-callable c = layers.Dense(2)(c) - network = network_lib.Network([input_a, input_b], [a, c]) + network = functional.Functional([input_a, input_b], [a, c]) if context.executing_eagerly(): a_val = constant_op.constant( np.random.random((10, 32)).astype('float32')) @@ -1484,9 +1488,9 @@ class NestedNetworkTest(keras_parameterized.TestCase): 'x2': input_layer_lib.Input(shape=(1,)) } outputs = layers.Add()([inputs['x1'], inputs['x2']]) - network = network_lib.Network(inputs, outputs) + network = functional.Functional(inputs, outputs) - network = network_lib.Network.from_config(network.get_config()) + network = functional.Functional.from_config(network.get_config()) result_tensor = network({ 'x': array_ops.ones((1, 1), 'float32'), @@ -1509,9 +1513,9 @@ class NestedNetworkTest(keras_parameterized.TestCase): 'x*x': layers.Multiply()([inputs, inputs]) } - network = network_lib.Network(inputs, outputs) + network = functional.Functional(inputs, outputs) - network = network_lib.Network.from_config(network.get_config()) + network = functional.Functional.from_config(network.get_config()) result_tensor = network(array_ops.ones((1, 1), 'float32')) result = self.evaluate(result_tensor) @@ -1531,7 +1535,8 @@ class NestedNetworkTest(keras_parameterized.TestCase): 'x1+x2': layers.Add()([inner_inputs['x1'], inner_inputs['x2']]), 'x1*x2': layers.Multiply()([inner_inputs['x1'], inner_inputs['x2']]) } - inner_network = network_lib.Network(inner_inputs, inner_outputs) + inner_network = functional.Functional( + inner_inputs, inner_outputs) inputs = [ input_layer_lib.Input(shape=(1,)), @@ -1539,9 +1544,9 @@ class NestedNetworkTest(keras_parameterized.TestCase): ] middle = inner_network({'x1': inputs[0], 'x2': inputs[1]}) outputs = layers.Add()([middle['x1+x2'], middle['x1*x2']]) - network = network_lib.Network(inputs, outputs) + network = functional.Functional(inputs, outputs) - network = network_lib.Network.from_config(network.get_config()) + network = functional.Functional.from_config(network.get_config()) # Computes: `(x1+x2) + (x1*x2)` result_tensor = network( @@ -1735,13 +1740,13 @@ class DTypeTest(keras_parameterized.TestCase): def test_graph_network_dtype(self): inputs = input_layer_lib.Input((10,)) outputs = layers.Dense(10)(inputs) - network = network_lib.Network(inputs, outputs) + network = functional.Functional(inputs, outputs) self.assertEqual(network.dtype, 'float32') @testing_utils.enable_v2_dtype_behavior def test_subclassed_network_dtype(self): - class IdentityNetwork(network_lib.Network): + class IdentityNetwork(training_lib.Model): def call(self, inputs): return inputs @@ -1785,11 +1790,11 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): def layer_and_network_test(self): # Top level layer - network = network_lib.Network() + network = functional.Functional() layer_0 = AttrTrackingLayer() - sub_network = network_lib.Network() + sub_network = functional.Functional() layer_1 = AttrTrackingLayer(dynamic=True) layer_2 = AttrTrackingLayer() sub_network.sub_layers = [layer_1, layer_2] @@ -1887,7 +1892,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): x = input_layer_lib.Input(shape=(None, 32)) dense = layers.Dense(2) y = dense(x) - network = network_lib.Network(x, y, name='dense_network') + network = functional.Functional(x, y, name='dense_network') for i in range(999, 1024): self.assertEqual(network.compute_output_shape((1, i, 32)), (1, i, 2)) @@ -1895,7 +1900,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): def test_2d_inputs_squeezed_to_1d(self): input_1d = input_layer_lib.Input(shape=()) outputs = input_1d * 2. - net = network_lib.Network(input_1d, outputs) + net = functional.Functional(input_1d, outputs) x = np.ones((10, 1)) y = net(x) @@ -1904,7 +1909,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): def test_1d_inputs_expanded_to_2d(self): input_1d = input_layer_lib.Input(shape=(1,)) outputs = input_1d * 2. - net = network_lib.Network(input_1d, outputs) + net = functional.Functional(input_1d, outputs) x = np.ones((10,)) y = net(x) @@ -1927,14 +1932,14 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): inputs = input_layer_lib.Input(10) outputs = my_layer(inputs, training=True) - network = network_lib.Network(inputs, outputs) + network = functional.Functional(inputs, outputs) # Hard-coded value passed during construction is respected. self.assertAllEqual(network(x, training=False), x) inputs = input_layer_lib.Input(10) outputs = my_layer(inputs, training=False) - network = network_lib.Network(inputs, outputs) + network = functional.Functional(inputs, outputs) network(x, training=True) # Hard-coded value passed during construction is respected. @@ -1942,7 +1947,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): inputs = input_layer_lib.Input(10) outputs = my_layer(inputs, training=None) - network = network_lib.Network(inputs, outputs) + network = functional.Functional(inputs, outputs) # `None` value passed during construction is overridden. self.assertAllEqual(network(x, training=True), x) diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py index 2d5abac7fd6..d07ed477ba9 100644 --- a/tensorflow/python/keras/engine/sequential.py +++ b/tensorflow/python/keras/engine/sequential.py @@ -26,8 +26,8 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.keras import layers as layer_module from tensorflow.python.keras.engine import base_layer +from tensorflow.python.keras.engine import functional from tensorflow.python.keras.engine import input_layer -from tensorflow.python.keras.engine import training from tensorflow.python.keras.engine import training_utils from tensorflow.python.keras.saving.saved_model import model_serialization from tensorflow.python.keras.utils import generic_utils @@ -35,7 +35,6 @@ from tensorflow.python.keras.utils import layer_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.tracking import base as trackable -from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect from tensorflow.python.util.deprecation import deprecated @@ -48,7 +47,7 @@ SINGLE_LAYER_OUTPUT_ERROR_MSG = ('All layers in a Sequential model should have ' @keras_export('keras.Sequential', 'keras.models.Sequential') -class Sequential(training.Model): +class Sequential(functional.Functional): """`Sequential` groups a linear stack of layers into a `tf.keras.Model`. `Sequential` provides training and inference features on this model. @@ -113,7 +112,9 @@ class Sequential(training.Model): layers: Optional list of layers to add to the model. name: Optional name for the model. """ - super(Sequential, self).__init__(name=name, autocast=False) + # Skip the init in FunctionalModel since model doesn't have input/output yet + super(functional.Functional, self).__init__( # pylint: disable=bad-super-call + name=name, autocast=False) self.supports_masking = True self._compute_output_and_mask_jointly = True self._auto_track_sub_layers = False @@ -152,11 +153,6 @@ class Sequential(training.Model): return layers[1:] return layers[:] - @property - @trackable_layer_utils.cache_recursive_attribute('dynamic') - def dynamic(self): - return any(layer.dynamic for layer in self.layers) - @trackable.no_automatic_dependency_tracking def add(self, layer): """Adds a layer instance on top of the layer stack. @@ -233,7 +229,7 @@ class Sequential(training.Model): self.built = True if set_inputs or self._graph_initialized: - self._init_graph_network(self.inputs, self.outputs, name=self.name) + self._init_graph_network(self.inputs, self.outputs) self._graph_initialized = True else: self._layers.append(layer) @@ -267,7 +263,7 @@ class Sequential(training.Model): elif self._graph_initialized: self.layers[-1]._outbound_nodes = [] self.outputs = [self.layers[-1].output] - self._init_graph_network(self.inputs, self.outputs, name=self.name) + self._init_graph_network(self.inputs, self.outputs) self.built = True @trackable.no_automatic_dependency_tracking @@ -341,7 +337,7 @@ class Sequential(training.Model): # case, we fall back to the legacy deferred behavior. # TODO(fchollet): consider raising here, as we should not be # supporting such layers. - self._init_graph_network(inputs, outputs, name=self.name) + self._init_graph_network(inputs, outputs) self._graph_initialized = True except: # pylint:disable=bare-except self._use_legacy_deferred_behavior = True @@ -350,7 +346,7 @@ class Sequential(training.Model): @generic_utils.default def build(self, input_shape=None): if self._graph_initialized: - self._init_graph_network(self.inputs, self.outputs, name=self.name) + self._init_graph_network(self.inputs, self.outputs) else: if input_shape is None: raise ValueError('You must provide an `input_shape` argument.') @@ -380,7 +376,7 @@ class Sequential(training.Model): if self._graph_initialized: if not self.built: - self._init_graph_network(self.inputs, self.outputs, name=self.name) + self._init_graph_network(self.inputs, self.outputs) return super(Sequential, self).call(inputs, training=training, mask=mask) outputs = inputs # handle the corner case where self.layers is empty @@ -519,6 +515,13 @@ class Sequential(training.Model): return False return True + def _assert_weights_created(self): + if self._graph_initialized: + return + # When the graph has not been initialized, use the Model's implementation to + # to check if the weights has been created. + super(functional.Functional, self)._assert_weights_created() # pylint: disable=bad-super-call + def _get_shape_tuple(t): if hasattr(t, 'shape'): diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index bb68ffca2ed..52bf42a099d 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -20,6 +20,9 @@ from __future__ import print_function import copy import itertools +import json +import os +import six from tensorflow.python.autograph.lang import directives from tensorflow.python.distribute import distribute_coordinator as dc @@ -31,19 +34,31 @@ from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import monitoring +from tensorflow.python.framework import errors +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras import backend from tensorflow.python.keras import callbacks as callbacks_module from tensorflow.python.keras import optimizers from tensorflow.python.keras.distribute import distributed_training_utils as dist_utils +from tensorflow.python.keras.engine import base_layer +from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.engine import compile_utils from tensorflow.python.keras.engine import data_adapter -from tensorflow.python.keras.engine import network from tensorflow.python.keras.engine import training_utils from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer as lso +from tensorflow.python.keras.saving import hdf5_format +from tensorflow.python.keras.saving import save from tensorflow.python.keras.saving.saved_model import model_serialization +from tensorflow.python.keras.utils import generic_utils +from tensorflow.python.keras.utils import layer_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.keras.utils import version_utils +from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite +from tensorflow.python.keras.utils.io_utils import path_to_string from tensorflow.python.keras.utils.mode_keys import ModeKeys from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -52,12 +67,33 @@ from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.ops import variables from tensorflow.python.ops.ragged import ragged_concat_ops from tensorflow.python.ops.ragged import ragged_tensor +from tensorflow.python.platform import tf_logging as logging from tensorflow.python.profiler import trace +from tensorflow.python.training import checkpoint_management +from tensorflow.python.training import py_checkpoint_reader from tensorflow.python.training.tracking import base as trackable +from tensorflow.python.training.tracking import data_structures +from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils +from tensorflow.python.training.tracking import util as trackable_utils from tensorflow.python.util import deprecation from tensorflow.python.util import nest +from tensorflow.python.util import serialization from tensorflow.python.util import tf_decorator from tensorflow.python.util.tf_export import keras_export +from tensorflow.tools.docs import doc_controls + + +# pylint: disable=g-import-not-at-top +try: + import h5py +except ImportError: + h5py = None + +try: + import yaml +except ImportError: + yaml = None +# pylint: enable=g-import-not-at-top _keras_api_gauge = monitoring.BoolGauge('/tensorflow/api/keras', @@ -97,8 +133,25 @@ def disable_multi_worker(method): target=method, decorator_func=_method_wrapper) +def inject_functional_model_class(cls): + from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top + from tensorflow.python.keras.engine import training_v1 # pylint: disable=g-import-not-at-top + if cls == Model or cls == training_v1.Model: + return functional.Functional + + cls.__bases__ = tuple(inject_functional_model_class(base) + for base in cls.__bases__) + return cls + + +def is_functional_model_init_params(args, kwargs): + return (len(args) == 2 or + len(args) == 1 and 'outputs' in kwargs or + 'inputs' in kwargs and 'outputs' in kwargs) + + @keras_export('keras.Model', 'keras.models.Model') -class Model(network.Network, version_utils.ModelVersionSelector): +class Model(base_layer.Layer, version_utils.ModelVersionSelector): """`Model` groups layers into an object with training and inference features. Arguments: @@ -174,11 +227,61 @@ class Model(network.Network, version_utils.ModelVersionSelector): _TF_MODULE_IGNORED_PROPERTIES = frozenset( itertools.chain(('_train_counter', '_test_counter', '_predict_counter', '_steps_per_execution'), - network.Network._TF_MODULE_IGNORED_PROPERTIES)) # pylint: disable=protected-access + base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES)) # pylint: disable=protected-access + def __new__(cls, *args, **kwargs): + # Signature detection + if is_functional_model_init_params(args, kwargs) and cls == Model: + # Functional model + from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top + return functional.Functional(*args, **kwargs) + else: + return super(Model, cls).__new__(cls, *args, **kwargs) + + @trackable.no_automatic_dependency_tracking def __init__(self, *args, **kwargs): - super(Model, self).__init__(*args, **kwargs) - _keras_api_gauge.get_cell('model').set(True) + # Special case for Subclassed Functional Model, which we couldn't detect + # when __new__ is called. We only realize it is a functional model when it + # calls super.__init__ with input and output tensor. + from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top + if (is_functional_model_init_params(args, kwargs) and + not isinstance(self, functional.Functional)): + inject_functional_model_class(self.__class__) + functional.Functional.__init__(self, *args, **kwargs) + return + + # The following are implemented as property functions: + # self.trainable_weights + # self.non_trainable_weights + generic_utils.validate_kwargs(kwargs, {'trainable', 'dtype', 'dynamic', + 'name', 'autocast'}) + super(Model, self).__init__(**kwargs) + # By default, Model is a subclass model, which is not in graph network. + self._is_graph_network = False + + self.inputs = None + self.outputs = None + self.input_names = None + self.output_names = None + # stop_training is used by callback to stop training when error happens + self.stop_training = False + self.history = None + # These objects are used in the default `Model.compile`. They are not + # guaranteed to be set after `Model.compile` is called, as users can + # override compile with custom logic. + self.compiled_loss = None + self.compiled_metrics = None + + # This is True for Sequential networks and Functional networks. + self._compute_output_and_mask_jointly = False + + # Don't reset compilation if already done. This may occur if calling + # `__init__` (or `_init_graph_network`) on an already-compiled model + # such as a Sequential model. Sequential models may need to rebuild + # themselves after compilation. + self._maybe_create_attribute('_is_compiled', False) + self._maybe_create_attribute('optimizer', None) + # Model must be created under scope of DistStrat it will be trained with. if ds_context.has_strategy(): self._distribution_strategy = ds_context.get_strategy() @@ -186,23 +289,20 @@ class Model(network.Network, version_utils.ModelVersionSelector): self._distribution_strategy = None # Defaults to value of `tf.config.experimental_functions_run_eagerly`. self._run_eagerly = None - self.stop_training = False # Initialize cache attrs. self._reset_compile_cache() # Fault-tolerance handler. Set in `ModelCheckpoint`. self._training_state = None - self.history = None - - # These objects are used in the default `Model.compile`. They are not - # guaranteed to be set after `Model.compile` is called, as users can - # override compile with custom logic. - self.compiled_loss = None - self.compiled_metrics = None + self._saved_model_inputs_spec = None + self._trackable_saver = ( + trackable_utils.saver_with_op_caching(self)) self._steps_per_execution = None self._init_batch_counters() + self._base_model_initialized = True + _keras_api_gauge.get_cell('model').set(True) @trackable.no_automatic_dependency_tracking def _init_batch_counters(self): @@ -214,67 +314,153 @@ class Model(network.Network, version_utils.ModelVersionSelector): self._predict_counter = variables.Variable( 0, dtype='int64', aggregation=agg) - def get_weights(self): - """Retrieves the weights of the model. + def __setattr__(self, name, value): + if not getattr(self, '_self_setattr_tracking', True): + super(Model, self).__setattr__(name, value) + return - Returns: - A flat list of Numpy arrays. - """ - with self.distribute_strategy.scope(): - return super(Model, self).get_weights() + if all( + isinstance(v, (base_layer.Layer, + data_structures.TrackableDataStructure)) or + trackable_layer_utils.has_weights(v) for v in nest.flatten(value)): + try: + self._base_model_initialized + except AttributeError: + # six.raise_from supresses the original AttributeError from being raised + six.raise_from( + RuntimeError('It looks like you are subclassing `Model` and you ' + 'forgot to call `super(YourClass, self).__init__()`.' + ' Always start with this line.'), None) - def load_weights(self, filepath, by_name=False, skip_mismatch=False): - """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. + super(Model, self).__setattr__(name, value) - If `by_name` is False weights are loaded based on the network's - topology. This means the architecture should be the same as when the weights - were saved. Note that layers that don't have weights are not taken into - account in the topological ordering, so adding or removing layers is fine as - long as they don't have weights. + # Keep track of metric instance created in subclassed model/layer. + # We do this so that we can maintain the correct order of metrics by adding + # the instance to the `metrics` list as soon as it is created. + from tensorflow.python.keras import metrics as metrics_module # pylint: disable=g-import-not-at-top + if isinstance(value, metrics_module.Metric): + self._metrics.append(value) - If `by_name` is True, weights are loaded into layers only if they share the - same name. This is useful for fine-tuning or transfer-learning models where - some of the layers have changed. + @generic_utils.default + def build(self, input_shape): + """Builds the model based on input shapes received. - Only topological loading (`by_name=False`) is supported when loading weights - from the TensorFlow format. Note that topological loading differs slightly - between TensorFlow and HDF5 formats for user-defined classes inheriting from - `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the - TensorFlow format loads based on the object-local names of attributes to - which layers are assigned in the `Model`'s constructor. + This is to be used for subclassed models, which do not know at instantiation + time what their inputs look like. - Arguments: - filepath: String, path to the weights file to load. For weight files in - TensorFlow format, this is the file prefix (the same as was passed - to `save_weights`). - by_name: Boolean, whether to load weights by name or by topological - order. Only topological loading is supported for weight files in - TensorFlow format. - skip_mismatch: Boolean, whether to skip loading of layers where there is - a mismatch in the number of weights, or a mismatch in the shape of - the weight (only valid when `by_name=True`). + This method only exists for users who want to call `model.build()` in a + standalone way (as a substitute for calling the model on real data to + build it). It will never be called by the framework (and thus it will + never throw unexpected errors in an unrelated workflow). - Returns: - When loading a weight file in TensorFlow format, returns the same status - object as `tf.train.Checkpoint.restore`. When graph building, restore - ops are run automatically as soon as the network is built (on first call - for user-defined classes inheriting from `Model`, immediately if it is - already built). - - When loading weights in HDF5 format, returns `None`. + Args: + input_shape: Single tuple, TensorShape, or list of shapes, where shapes + are tuples, integers, or TensorShapes. Raises: - ImportError: If h5py is not available and the weight file is in HDF5 - format. - ValueError: If `skip_mismatch` is set to `True` when `by_name` is - `False`. + ValueError: + 1. In case of invalid user-provided data (not of type tuple, + list, or TensorShape). + 2. If the model requires call arguments that are agnostic + to the input shapes (positional or kwarg in call signature). + 3. If not all layers were properly built. + 4. If float type inputs are not supported within the layers. + + In each of these cases, the user should build their model by calling it + on real tensor data. """ - if dist_utils.is_tpu_strategy(self._distribution_strategy): - if (self._distribution_strategy.extended.steps_per_run > 1 and - (not network._is_hdf5_filepath(filepath))): # pylint: disable=protected-access - raise ValueError('Load weights is not yet supported with TPUStrategy ' - 'with steps_per_run greater than 1.') - return super(Model, self).load_weights(filepath, by_name, skip_mismatch) + if self._is_graph_network: + super(Model, self).build(input_shape) + return + + if input_shape is None: + raise ValueError('Input shape must be defined when calling build on a ' + 'model subclass network.') + valid_types = (tuple, list, tensor_shape.TensorShape) + if not isinstance(input_shape, valid_types): + raise ValueError('Specified input shape is not one of the valid types. ' + 'Please specify a batch input shape of type tuple or ' + 'list of input shapes. User provided ' + 'input type: {}'.format(type(input_shape))) + + if input_shape and not self.inputs: + # We create placeholders for the `None`s in the shape and build the model + # in a Graph. Since tf.Variable is compatible with both eager execution + # and graph building, the variables created after building the model in + # a Graph are still valid when executing eagerly. + if context.executing_eagerly(): + graph = func_graph.FuncGraph('build_graph') + else: + graph = backend.get_graph() + with graph.as_default(): + if isinstance(input_shape, list): + x = [base_layer_utils.generate_placeholders_from_shape(shape) + for shape in input_shape] + elif isinstance(input_shape, dict): + x = { + k: base_layer_utils.generate_placeholders_from_shape(shape) + for k, shape in input_shape.items() + } + else: + x = base_layer_utils.generate_placeholders_from_shape(input_shape) + + kwargs = {} + call_signature = self._call_full_argspec + call_args = call_signature.args + # Exclude `self`, `inputs`, and any argument with a default value. + if len(call_args) > 2: + if call_signature.defaults: + call_args = call_args[2:-len(call_signature.defaults)] + else: + call_args = call_args[2:] + for arg in call_args: + if arg == 'training': + # Case where `training` is a positional arg with no default. + kwargs['training'] = False + else: + # Has invalid call signature with unknown positional arguments. + raise ValueError( + 'Currently, you cannot build your model if it has ' + 'positional or keyword arguments that are not ' + 'inputs to the model, but are required for its ' + '`call` method. Instead, in order to instantiate ' + 'and build your model, `call` your model on real ' + 'tensor data with all expected call arguments.') + elif len(call_args) < 2: + # Signature without `inputs`. + raise ValueError('You can only call `build` on a model if its `call` ' + 'method accepts an `inputs` argument.') + try: + self.call(x, **kwargs) + except (errors.InvalidArgumentError, TypeError): + raise ValueError('You cannot build your model by calling `build` ' + 'if your layers do not support float type inputs. ' + 'Instead, in order to instantiate and build your ' + 'model, `call` your model on real tensor data (of ' + 'the correct dtype).') + + super(Model, self).build(input_shape) + + def call(self, inputs, training=None, mask=None): + """Calls the model on new inputs. + + In this case `call` just reapplies + all ops in the graph to the new inputs + (e.g. build a new computational graph from the provided inputs). + + Arguments: + inputs: A tensor or list of tensors. + training: Boolean or boolean scalar tensor, indicating whether to run + the `Network` in training mode or inference mode. + mask: A mask or list of masks. A mask can be + either a tensor or None (no mask). + + Returns: + A tensor if there is a single output, or + a list of tensors if there are more than one outputs. + """ + raise NotImplementedError('When subclassing the `Model` class, you should ' + 'implement a `call` method.') def compile(self, optimizer='rmsprop', @@ -399,6 +585,10 @@ class Model(network.Network, version_utils.ModelVersionSelector): dtype='int64', aggregation=variables.VariableAggregationV2.ONLY_FIRST_REPLICA) + @property + def _should_compute_mask(self): + return False + @property def metrics(self): """Returns the model's metrics added using `compile`, `add_metric` APIs. @@ -1661,6 +1851,564 @@ class Model(network.Network, version_utils.ModelVersionSelector): verbose=verbose, callbacks=callbacks) + ###################################################################### + # Functions below are not training related. They are for model weights + # tracking, save/load, serialization, etc. + ###################################################################### + + @property + def trainable_weights(self): + self._assert_weights_created() + return self._dedup_weights( + trackable_layer_utils.gather_trainable_weights( + trainable=self.trainable, + sub_layers=self._layers, + extra_variables=self._trainable_weights)) + + @property + def non_trainable_weights(self): + self._assert_weights_created() + return self._dedup_weights( + trackable_layer_utils.gather_non_trainable_weights( + trainable=self.trainable, + sub_layers=self._layers, + extra_variables=self._non_trainable_weights + + self._trainable_weights)) + + def get_weights(self): + """Retrieves the weights of the model. + + Returns: + A flat list of Numpy arrays. + """ + with self.distribute_strategy.scope(): + return super(Model, self).get_weights() + + def save(self, + filepath, + overwrite=True, + include_optimizer=True, + save_format=None, + signatures=None, + options=None): + """Saves the model to Tensorflow SavedModel or a single HDF5 file. + + The savefile includes: + + - The model architecture, allowing to re-instantiate the model. + - The model weights. + - The state of the optimizer, allowing to resume training + exactly where you left off. + + This allows you to save the entirety of the state of a model + in a single file. + + Saved models can be reinstantiated via `keras.models.load_model`. + The model returned by `load_model` is a compiled model ready to be used + (unless the saved model was never compiled in the first place). + + Models built with the Sequential and Functional API can be saved to both the + HDF5 and SavedModel formats. Subclassed models can only be saved with the + SavedModel format. + + Note that the model weights may have different scoped names after being + loaded. Scoped names include the model/layer names, such as + `"dense_1/kernel:0"`. It is recommended that you use the layer properties to + access specific variables, e.g. `model.get_layer("dense_1").kernel`. + + Arguments: + filepath: String, PathLike, path to SavedModel or H5 file to save the + model. + overwrite: Whether to silently overwrite any existing file at the + target location, or provide the user with a manual prompt. + include_optimizer: If True, save optimizer's state together. + save_format: Either `'tf'` or `'h5'`, indicating whether to save the + model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, + and 'h5' in TF 1.X. + signatures: Signatures to save with the SavedModel. Applicable to the + 'tf' format only. Please see the `signatures` argument in + `tf.saved_model.save` for details. + options: Optional `tf.saved_model.SaveOptions` object that specifies + options for saving to SavedModel. + + Example: + + ```python + from keras.models import load_model + + model.save('my_model.h5') # creates a HDF5 file 'my_model.h5' + del model # deletes the existing model + + # returns a compiled model + # identical to the previous one + model = load_model('my_model.h5') + ``` + """ + save.save_model(self, filepath, overwrite, include_optimizer, save_format, + signatures, options) + + def save_weights(self, filepath, overwrite=True, save_format=None): + """Saves all layer weights. + + Either saves in HDF5 or in TensorFlow format based on the `save_format` + argument. + + When saving in HDF5 format, the weight file has: + - `layer_names` (attribute), a list of strings + (ordered names of model layers). + - For every layer, a `group` named `layer.name` + - For every such layer group, a group attribute `weight_names`, + a list of strings + (ordered names of weights tensor of the layer). + - For every weight in the layer, a dataset + storing the weight value, named after the weight tensor. + + When saving in TensorFlow format, all objects referenced by the network are + saved in the same format as `tf.train.Checkpoint`, including any `Layer` + instances or `Optimizer` instances assigned to object attributes. For + networks constructed from inputs and outputs using `tf.keras.Model(inputs, + outputs)`, `Layer` instances used by the network are tracked/saved + automatically. For user-defined classes which inherit from `tf.keras.Model`, + `Layer` instances must be assigned to object attributes, typically in the + constructor. See the documentation of `tf.train.Checkpoint` and + `tf.keras.Model` for details. + + While the formats are the same, do not mix `save_weights` and + `tf.train.Checkpoint`. Checkpoints saved by `Model.save_weights` should be + loaded using `Model.load_weights`. Checkpoints saved using + `tf.train.Checkpoint.save` should be restored using the corresponding + `tf.train.Checkpoint.restore`. Prefer `tf.train.Checkpoint` over + `save_weights` for training checkpoints. + + The TensorFlow format matches objects and variables by starting at a root + object, `self` for `save_weights`, and greedily matching attribute + names. For `Model.save` this is the `Model`, and for `Checkpoint.save` this + is the `Checkpoint` even if the `Checkpoint` has a model attached. This + means saving a `tf.keras.Model` using `save_weights` and loading into a + `tf.train.Checkpoint` with a `Model` attached (or vice versa) will not match + the `Model`'s variables. See the [guide to training + checkpoints](https://www.tensorflow.org/guide/checkpoint) for details + on the TensorFlow format. + + Arguments: + filepath: String or PathLike, path to the file to save the weights to. + When saving in TensorFlow format, this is the prefix used for + checkpoint files (multiple files are generated). Note that the '.h5' + suffix causes weights to be saved in HDF5 format. + overwrite: Whether to silently overwrite any existing file at the + target location, or provide the user with a manual prompt. + save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or + '.keras' will default to HDF5 if `save_format` is `None`. Otherwise + `None` defaults to 'tf'. + + Raises: + ImportError: If h5py is not available when attempting to save in HDF5 + format. + ValueError: For invalid/unknown format arguments. + """ + self._assert_weights_created() + filepath = path_to_string(filepath) + filepath_is_h5 = _is_hdf5_filepath(filepath) + if save_format is None: + if filepath_is_h5: + save_format = 'h5' + else: + save_format = 'tf' + else: + user_format = save_format.lower().strip() + if user_format in ('tensorflow', 'tf'): + save_format = 'tf' + elif user_format in ('hdf5', 'h5', 'keras'): + save_format = 'h5' + else: + raise ValueError( + 'Unknown format "%s". Was expecting one of {"tf", "h5"}.' % ( + save_format,)) + if save_format == 'tf' and filepath_is_h5: + raise ValueError( + ('save_weights got save_format="tf"/"tensorflow", but the ' + 'filepath ("%s") looks like an HDF5 file. Omit the ".h5"/".keras" ' + 'when saving in TensorFlow format.') + % filepath) + + if save_format == 'h5' and h5py is None: + raise ImportError( + '`save_weights` requires h5py when saving in hdf5.') + if save_format == 'tf': + check_filepath = filepath + '.index' + else: + check_filepath = filepath + # If file exists and should not be overwritten: + if not overwrite and os.path.isfile(check_filepath): + proceed = ask_to_proceed_with_overwrite(check_filepath) + if not proceed: + return + if save_format == 'h5': + with h5py.File(filepath, 'w') as f: + hdf5_format.save_weights_to_hdf5_group(f, self.layers) + else: + if context.executing_eagerly(): + session = None + else: + session = backend.get_session() + optimizer = getattr(self, 'optimizer', None) + if (optimizer + and not isinstance(optimizer, trackable.Trackable)): + logging.warning( + ('This model was compiled with a Keras optimizer (%s) but is being ' + 'saved in TensorFlow format with `save_weights`. The model\'s ' + 'weights will be saved, but unlike with TensorFlow optimizers in ' + 'the TensorFlow format the optimizer\'s state will not be ' + 'saved.\n\nConsider using a TensorFlow optimizer from `tf.train`.') + % (optimizer,)) + self._trackable_saver.save(filepath, session=session) + # Record this checkpoint so it's visible from tf.train.latest_checkpoint. + checkpoint_management.update_checkpoint_state_internal( + save_dir=os.path.dirname(filepath), + model_checkpoint_path=filepath, + save_relative_paths=True, + all_model_checkpoint_paths=[filepath]) + + def load_weights(self, filepath, by_name=False, skip_mismatch=False): + """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. + + If `by_name` is False weights are loaded based on the network's + topology. This means the architecture should be the same as when the weights + were saved. Note that layers that don't have weights are not taken into + account in the topological ordering, so adding or removing layers is fine as + long as they don't have weights. + + If `by_name` is True, weights are loaded into layers only if they share the + same name. This is useful for fine-tuning or transfer-learning models where + some of the layers have changed. + + Only topological loading (`by_name=False`) is supported when loading weights + from the TensorFlow format. Note that topological loading differs slightly + between TensorFlow and HDF5 formats for user-defined classes inheriting from + `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the + TensorFlow format loads based on the object-local names of attributes to + which layers are assigned in the `Model`'s constructor. + + Arguments: + filepath: String, path to the weights file to load. For weight files in + TensorFlow format, this is the file prefix (the same as was passed + to `save_weights`). + by_name: Boolean, whether to load weights by name or by topological + order. Only topological loading is supported for weight files in + TensorFlow format. + skip_mismatch: Boolean, whether to skip loading of layers where there is + a mismatch in the number of weights, or a mismatch in the shape of + the weight (only valid when `by_name=True`). + + Returns: + When loading a weight file in TensorFlow format, returns the same status + object as `tf.train.Checkpoint.restore`. When graph building, restore + ops are run automatically as soon as the network is built (on first call + for user-defined classes inheriting from `Model`, immediately if it is + already built). + + When loading weights in HDF5 format, returns `None`. + + Raises: + ImportError: If h5py is not available and the weight file is in HDF5 + format. + ValueError: If `skip_mismatch` is set to `True` when `by_name` is + `False`. + """ + if dist_utils.is_tpu_strategy(self._distribution_strategy): + if (self._distribution_strategy.extended.steps_per_run > 1 and + (not _is_hdf5_filepath(filepath))): + raise ValueError('Load weights is not yet supported with TPUStrategy ' + 'with steps_per_run greater than 1.') + if skip_mismatch and not by_name: + raise ValueError( + 'When calling model.load_weights, skip_mismatch can only be set to ' + 'True when by_name is True.') + + filepath = path_to_string(filepath) + if _is_hdf5_filepath(filepath): + save_format = 'h5' + else: + try: + py_checkpoint_reader.NewCheckpointReader(filepath) + save_format = 'tf' + except errors_impl.DataLossError: + # The checkpoint is not readable in TensorFlow format. Try HDF5. + save_format = 'h5' + if save_format == 'tf': + status = self._trackable_saver.restore(filepath) + if by_name: + raise NotImplementedError( + 'Weights may only be loaded based on topology into Models when ' + 'loading TensorFlow-formatted weights (got by_name=True to ' + 'load_weights).') + if not context.executing_eagerly(): + session = backend.get_session() + # Restore existing variables (if any) immediately, and set up a + # streaming restore for any variables created in the future. + trackable_utils.streaming_restore(status=status, session=session) + status.assert_nontrivial_match() + return status + if h5py is None: + raise ImportError( + '`load_weights` requires h5py when loading weights from HDF5.') + if not self._is_graph_network and not self.built: + raise ValueError( + 'Unable to load weights saved in HDF5 format into a subclassed ' + 'Model which has not created its variables yet. Call the Model ' + 'first, then load the weights.') + self._assert_weights_created() + with h5py.File(filepath, 'r') as f: + if 'layer_names' not in f.attrs and 'model_weights' in f: + f = f['model_weights'] + if by_name: + hdf5_format.load_weights_from_hdf5_group_by_name( + f, self.layers, skip_mismatch=skip_mismatch) + else: + hdf5_format.load_weights_from_hdf5_group(f, self.layers) + + def _updated_config(self): + """Util shared between different serialization methods. + + Returns: + Model config with Keras version information added. + """ + from tensorflow.python.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top + + config = self.get_config() + model_config = { + 'class_name': self.__class__.__name__, + 'config': config, + 'keras_version': keras_version, + 'backend': backend.backend() + } + return model_config + + def get_config(self): + raise NotImplementedError + + @classmethod + def from_config(cls, config, custom_objects=None): + # Since only FunctionalModel produces config, the model can only + # be constructed for FunctionalModel + from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top + return functional.Functional.from_config( + config, custom_objects=custom_objects) + + def to_json(self, **kwargs): + """Returns a JSON string containing the network configuration. + + To load a network from a JSON save file, use + `keras.models.model_from_json(json_string, custom_objects={})`. + + Arguments: + **kwargs: Additional keyword arguments + to be passed to `json.dumps()`. + + Returns: + A JSON string. + """ + model_config = self._updated_config() + return json.dumps( + model_config, default=serialization.get_json_type, **kwargs) + + def to_yaml(self, **kwargs): + """Returns a yaml string containing the network configuration. + + To load a network from a yaml save file, use + `keras.models.model_from_yaml(yaml_string, custom_objects={})`. + + `custom_objects` should be a dictionary mapping + the names of custom losses / layers / etc to the corresponding + functions / classes. + + Arguments: + **kwargs: Additional keyword arguments + to be passed to `yaml.dump()`. + + Returns: + A YAML string. + + Raises: + ImportError: if yaml module is not found. + """ + if yaml is None: + raise ImportError( + 'Requires yaml module installed (`pip install pyyaml`).') + return yaml.dump(self._updated_config(), **kwargs) + + def reset_states(self): + for layer in self.layers: + if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False): + layer.reset_states() + + @property + @deprecation.deprecated( + date=None, + instructions='This property should not be used in TensorFlow 2.0, ' + 'as updates are applied automatically.') + @doc_controls.do_not_generate_docs + def state_updates(self): + """Deprecated, do NOT use! + + Returns the `updates` from all layers that are stateful. + + This is useful for separating training updates and + state updates, e.g. when we need to update a layer's internal state + during prediction. + + Returns: + A list of update ops. + """ + state_updates = [] + for layer in self.layers: + if getattr(layer, 'stateful', False): + if hasattr(layer, 'updates'): + state_updates += layer.updates + return state_updates + + @property + def weights(self): + """Returns the list of all layer variables/weights. + + Returns: + A list of variables. + """ + return self._dedup_weights(self._undeduplicated_weights) + + @property + def _undeduplicated_weights(self): + """Returns the undeduplicated list of all layer variables/weights.""" + self._assert_weights_created() + weights = [] + for layer in self._layers: + weights += layer.weights + weights += (self._trainable_weights + self._non_trainable_weights) + return weights + + def summary(self, line_length=None, positions=None, print_fn=None): + """Prints a string summary of the network. + + Arguments: + line_length: Total length of printed lines + (e.g. set this to adapt the display to different + terminal window sizes). + positions: Relative or absolute positions of log elements + in each line. If not provided, + defaults to `[.33, .55, .67, 1.]`. + print_fn: Print function to use. Defaults to `print`. + It will be called on each line of the summary. + You can set it to a custom function + in order to capture the string summary. + + Raises: + ValueError: if `summary()` is called before the model is built. + """ + if not self.built: + raise ValueError('This model has not yet been built. ' + 'Build the model first by calling `build()` or calling ' + '`fit()` with some data, or specify ' + 'an `input_shape` argument in the first layer(s) for ' + 'automatic build.') + layer_utils.print_summary(self, + line_length=line_length, + positions=positions, + print_fn=print_fn) + + @property + def layers(self): + return self._unique_sublayers() + + def get_layer(self, name=None, index=None): + """Retrieves a layer based on either its name (unique) or index. + + If `name` and `index` are both provided, `index` will take precedence. + Indices are based on order of horizontal graph traversal (bottom-up). + + Arguments: + name: String, name of layer. + index: Integer, index of layer. + + Returns: + A layer instance. + + Raises: + ValueError: In case of invalid layer name or index. + """ + # TODO(fchollet): We could build a dictionary based on layer names + # since they are constant, but we have not done that yet. + if index is not None and name is not None: + raise ValueError('Provide only a layer name or a layer index.') + + if index is not None: + if len(self.layers) <= index: + raise ValueError('Was asked to retrieve layer at index ' + str(index) + + ' but model only has ' + str(len(self.layers)) + + ' layers.') + else: + return self.layers[index] + + if name is not None: + for layer in self.layers: + if layer.name == name: + return layer + raise ValueError('No such layer: ' + name + '.') + raise ValueError('Provide either a layer name or layer index.') + + @trackable.no_automatic_dependency_tracking + def _set_save_spec(self, inputs): + if self._saved_model_inputs_spec is not None: + return # Already set. + + input_names = self.input_names + if not input_names: + input_names = compile_utils.create_pseudo_input_names(inputs) + + flat_inputs = nest.flatten(inputs) + specs = [] + for name, tensor in zip(input_names, flat_inputs): + specs.append( + tf_utils.get_tensor_spec(tensor, dynamic_batch=False, name=name)) + specs = nest.pack_sequence_as(inputs, specs) + + self._saved_model_inputs_spec = specs + + def _get_save_spec(self, dynamic_batch=True): + if self._saved_model_inputs_spec is None: + return None + + return nest.map_structure( + lambda t: tf_utils.get_tensor_spec(t, dynamic_batch=dynamic_batch), + self._saved_model_inputs_spec) + + def _assert_weights_created(self): + """Asserts that all the weights for the model have been created. + + For a non-dynamic model, the weights must already be created after the + layer has been called. For a dynamic model, the exact list of weights can + never be known for certain since it may change at any time during execution. + + We run this check right before accessing weights or getting the Numpy value + for the current weights. Otherwise, if the layer has never been called, + the user would just get an empty list, which is misleading. + + Raises: + ValueError: if the weights of the network has not yet been created. + """ + if self.dynamic: + return + + if ('build' in self.__class__.__dict__ and + self.__class__ != Model and + not self.built): + # For any model that has customized build() method but hasn't + # been invoked yet, this will cover both sequential and subclass model. + # Also make sure to exclude Model class itself which has build() defined. + raise ValueError('Weights for model %s have not yet been created. ' + 'Weights are created when the Model is first called on ' + 'inputs or `build()` is called with an `input_shape`.' % + self.name) + def _check_call_args(self, method_name): """Check that `call` has only one positional arg.""" # Always allow first arg, regardless of arg name. @@ -1990,3 +2738,8 @@ def _disallow_inside_tf_function(method_name): 'directly on `Tensor`s inside a `tf.function` like: `model(x)`.' ).format(method_name=method_name) raise RuntimeError(error_msg) + + +def _is_hdf5_filepath(filepath): + return (filepath.endswith('.h5') or filepath.endswith('.keras') or + filepath.endswith('.hdf5')) diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py index 0a40ce3899b..c137c6e517a 100644 --- a/tensorflow/python/keras/engine/training_v1.py +++ b/tensorflow/python/keras/engine/training_v1.py @@ -43,7 +43,7 @@ from tensorflow.python.keras import losses from tensorflow.python.keras import metrics as metrics_module from tensorflow.python.keras import optimizers from tensorflow.python.keras.distribute import distributed_training_utils -from tensorflow.python.keras.engine import network +from tensorflow.python.keras.engine import base_layer from tensorflow.python.keras.engine import training as training_lib from tensorflow.python.keras.engine import training_arrays from tensorflow.python.keras.engine import training_distributed @@ -62,6 +62,7 @@ from tensorflow.python.ops.losses import util as tf_losses_utils from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils +from tensorflow.python.types import core from tensorflow.python.util import deprecation from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -180,8 +181,8 @@ class Model(training_lib.Model): self._compile_time_distribution_strategy) if strategy: with strategy.scope(): - return network.Network.get_weights(self) - return network.Network.get_weights(self) + return base_layer.Layer.get_weights(self) + return base_layer.Layer.get_weights(self) def load_weights(self, filepath, by_name=False, skip_mismatch=False): """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. @@ -231,7 +232,7 @@ class Model(training_lib.Model): """ if distributed_training_utils.is_tpu_strategy(self._distribution_strategy): if (self._distribution_strategy.extended.steps_per_run > 1 and - (not network._is_hdf5_filepath(filepath))): # pylint: disable=protected-access + (not training_lib._is_hdf5_filepath(filepath))): # pylint: disable=protected-access raise ValueError('Load weights is not yet supported with TPUStrategy ' 'with steps_per_run greater than 1.') return super(Model, self).load_weights(filepath, by_name, skip_mismatch) @@ -490,6 +491,11 @@ class Model(training_lib.Model): """Returns the model's metrics added using `compile`, `add_metric` APIs.""" metrics = [] if self._is_compiled: + if not hasattr(self, '_v1_compile_was_called'): + # See b/155687393 for more details, the model is created as a v2 + # instance but converted to v1. Fallback to use base Model to retrieve + # the metrics. + return super(Model, self).metrics metrics += self._compile_metric_functions metrics.extend(self._metrics) metrics.extend(_get_metrics_from_layers(self._layers)) @@ -503,6 +509,12 @@ class Model(training_lib.Model): # losses for backward compatibility. metrics_names = ['loss'] if self._is_compiled: + if not hasattr(self, '_v1_compile_was_called'): + # See b/155687393 for more details, the model is created as a v2 + # instance but converted to v1. Fallback to use base Model to retrieve + # the metrics name + return super(Model, self).metrics_names + # Add output loss metric names to the metric names list. if len(self._training_endpoints) > 1: metrics_names.extend([ @@ -3143,7 +3155,7 @@ def _convert_scipy_sparse_tensor(value, expected_input): The possibly-converted 'value'. """ if issparse is not None and issparse(value): - if ops.is_dense_tensor_like(expected_input): + if isinstance(expected_input, core.Tensor): if ops.executing_eagerly_outside_functions(): # In TF2 we do not silently densify sparse matrices. raise ValueError('A SciPy sparse matrix was passed to a model ' diff --git a/tensorflow/python/keras/feature_column/BUILD b/tensorflow/python/keras/feature_column/BUILD index 650efcceb52..94097c28d73 100644 --- a/tensorflow/python/keras/feature_column/BUILD +++ b/tensorflow/python/keras/feature_column/BUILD @@ -12,11 +12,88 @@ exports_files(["LICENSE"]) py_library( name = "feature_column", + srcs = ["__init__.py"], deps = [ + ":dense_features", + ":dense_features_v2", ":sequence_feature_column", ], ) +py_library( + name = "dense_features", + srcs = [ + "dense_features.py", + ], + deps = [ + "//tensorflow/python:framework_ops", + "//tensorflow/python:tf_export", + "//tensorflow/python:util", + "//tensorflow/python/feature_column:feature_column_v2", + "//tensorflow/python/keras:backend", + ], +) + +py_library( + name = "dense_features_v2", + srcs = [ + "dense_features_v2.py", + ], + deps = [ + ":dense_features", + "//tensorflow/python:framework_ops", + "//tensorflow/python:tf_export", + "//tensorflow/python/feature_column:feature_column_v2", + ], +) + +tf_py_test( + name = "dense_features_test", + srcs = ["dense_features_test.py"], + tags = ["no_pip"], + deps = [ + ":dense_features", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:lookup_ops", + "//tensorflow/python:partitioned_variables", + "//tensorflow/python:session", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:variables", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/feature_column:feature_column_v2", + ], +) + +tf_py_test( + name = "dense_features_v2_test", + srcs = ["dense_features_v2_test.py"], + tags = ["no_pip"], + deps = [ + ":dense_features_v2", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:lookup_ops", + "//tensorflow/python:session", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:variables", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/feature_column:feature_column_v2", + ], +) + py_library( name = "sequence_feature_column", srcs = ["sequence_feature_column.py"], @@ -59,6 +136,7 @@ py_test( srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ + ":dense_features", ":sequence_feature_column", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", diff --git a/tensorflow/python/keras/feature_column/__init__.py b/tensorflow/python/keras/feature_column/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tensorflow/python/feature_column/dense_features.py b/tensorflow/python/keras/feature_column/dense_features.py similarity index 97% rename from tensorflow/python/feature_column/dense_features.py rename to tensorflow/python/keras/feature_column/dense_features.py index 6feef185815..820f1a6b1b7 100644 --- a/tensorflow/python/feature_column/dense_features.py +++ b/tensorflow/python/keras/feature_column/dense_features.py @@ -23,7 +23,6 @@ import json from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.framework import ops from tensorflow.python.keras import backend -from tensorflow.python.keras.layers import serialization as layer_serialization from tensorflow.python.util import serialization from tensorflow.python.util.tf_export import keras_export @@ -173,7 +172,3 @@ class DenseFeatures(fc._BaseFeaturesLayer): # pylint: disable=protected-access cols_to_output_tensors[column] = processed_tensors output_tensors.append(processed_tensors) return self._verify_and_concat_tensors(output_tensors) - - -layer_serialization.inject_feature_column_v1_objects( - 'DenseFeatures', DenseFeatures) diff --git a/tensorflow/python/feature_column/dense_features_test.py b/tensorflow/python/keras/feature_column/dense_features_test.py similarity index 62% rename from tensorflow/python/feature_column/dense_features_test.py rename to tensorflow/python/keras/feature_column/dense_features_test.py index 7cd523dcc14..ec07964bcbe 100644 --- a/tensorflow/python/feature_column/dense_features_test.py +++ b/tensorflow/python/keras/feature_column/dense_features_test.py @@ -18,19 +18,21 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl.testing import parameterized import numpy as np from tensorflow.python.client import session from tensorflow.python.eager import backprop from tensorflow.python.eager import context -from tensorflow.python.feature_column import dense_features as df from tensorflow.python.feature_column import feature_column_v2 as fc +from tensorflow.python.feature_column import sequence_feature_column as sfc from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.keras.feature_column import dense_features as df from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import partitioned_variables @@ -676,5 +678,417 @@ class DenseFeaturesTest(test.TestCase): sess.run(net, feed_dict={features['price']: np.array(1)}) +class IndicatorColumnTest(test.TestCase): + + @test_util.run_deprecated_v1 + def test_dense_features(self): + animal = fc.indicator_column( + fc.categorical_column_with_identity('animal', num_buckets=4)) + with ops.Graph().as_default(): + features = { + 'animal': + sparse_tensor.SparseTensor( + indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) + } + net = df.DenseFeatures([animal])(features) + + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + + self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net)) + + +class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + { + 'testcase_name': 'use_safe_embedding_lookup', + 'use_safe_embedding_lookup': True + }, { + 'testcase_name': 'dont_use_safe_embedding_lookup', + 'use_safe_embedding_lookup': False + }) + @test_util.run_deprecated_v1 + def test_dense_features(self, use_safe_embedding_lookup): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) + + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer, + use_safe_embedding_lookup=use_safe_embedding_lookup) + + # Provide sparse input and get dense result. + l = df.DenseFeatures((embedding_column,)) + dense_features = l({'aaa': sparse_input}) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('dense_features/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + for v in global_vars: + self.assertIsInstance(v, variables_lib.Variable) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertCountEqual(('dense_features/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in trainable_vars])) + + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + + self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) + self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) + + if use_safe_embedding_lookup: + self.assertIn('SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + else: + self.assertNotIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + + @test_util.run_deprecated_v1 + def test_dense_features_not_trainable(self): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) + + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer, + trainable=False) + + # Provide sparse input and get dense result. + dense_features = df.DenseFeatures((embedding_column,))({ + 'aaa': sparse_input + }) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('dense_features/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + self.assertCountEqual([], + ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) + + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) + + +class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): + + def _test_dense_features(self, trainable=True): + # Inputs. + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 4)), + values=(2, 0, 1), + dense_shape=(2, 5)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [0] + # example 1, ids [] + indices=((0, 0),), + values=(0,), + dense_shape=(2, 5)) + sparse_input_c = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 1), (1, 1), (1, 3)), + values=(2, 0, 1), + dense_shape=(2, 5)) + sparse_input_d = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [] + indices=((0, 1),), + values=(2,), + dense_shape=(2, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0: + # A ids [2], embedding = [7, 11] + # B ids [0], embedding = [1, 2] + # C ids [2], embedding = [7, 11] + # D ids [2], embedding = [7, 11] + (7., 11., 1., 2., 7., 11., 7., 11.), + # example 1: + # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + # B ids [], embedding = [0, 0] + # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + # D ids [], embedding = [0, 0] + (2., 3.5, 0., 0., 2., 3.5, 0., 0.), + ) + + # Build columns. + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + categorical_column_c = fc.categorical_column_with_identity( + key='ccc', num_buckets=vocabulary_size) + categorical_column_d = fc.categorical_column_with_identity( + key='ddd', num_buckets=vocabulary_size) + + embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer, + trainable=trainable) + embedding_column_c, embedding_column_d = fc.shared_embedding_columns_v2( + [categorical_column_c, categorical_column_d], + dimension=embedding_dimension, + initializer=_initializer, + trainable=trainable) + + features = { + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + 'ccc': sparse_input_c, + 'ddd': sparse_input_d + } + + # Provide sparse input and get dense result. + dense_features = df.DenseFeatures( + feature_columns=(embedding_column_b, embedding_column_a, + embedding_column_c, embedding_column_d))( + features) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual( + ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], + tuple([v.name for v in global_vars])) + for v in global_vars: + self.assertIsInstance(v, variables_lib.Variable) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + if trainable: + self.assertCountEqual( + ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], + tuple([v.name for v in trainable_vars])) + else: + self.assertCountEqual([], tuple([v.name for v in trainable_vars])) + shared_embedding_vars = global_vars + + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + + self.assertAllEqual(embedding_values, + self.evaluate(shared_embedding_vars[0])) + self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) + + @test_util.run_deprecated_v1 + def test_dense_features(self): + self._test_dense_features() + + @test_util.run_deprecated_v1 + def test_dense_features_no_trainable(self): + self._test_dense_features(trainable=False) + + +@test_util.run_all_in_graph_and_eager_modes +class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ('default', None, None), + ('trainable', True, 'trainable'), + ('not_trainable', False, 'frozen')) + def test_get_config(self, trainable, name): + cols = [fc.numeric_column('a'), + fc.embedding_column(fc.categorical_column_with_identity( + key='b', num_buckets=3), dimension=2)] + orig_layer = df.DenseFeatures( + cols, trainable=trainable, name=name) + config = orig_layer.get_config() + + self.assertEqual(config['name'], orig_layer.name) + self.assertEqual(config['trainable'], trainable) + self.assertLen(config['feature_columns'], 2) + self.assertEqual( + config['feature_columns'][0]['class_name'], 'NumericColumn') + self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,)) + self.assertEqual( + config['feature_columns'][1]['class_name'], 'EmbeddingColumn') + + @parameterized.named_parameters( + ('default', None, None), + ('trainable', True, 'trainable'), + ('not_trainable', False, 'frozen')) + def test_from_config(self, trainable, name): + cols = [fc.numeric_column('a'), + fc.embedding_column(fc.categorical_column_with_vocabulary_list( + 'b', vocabulary_list=['1', '2', '3']), dimension=2), + fc.indicator_column(fc.categorical_column_with_hash_bucket( + key='c', hash_bucket_size=3))] + orig_layer = df.DenseFeatures( + cols, trainable=trainable, name=name) + config = orig_layer.get_config() + + new_layer = df.DenseFeatures.from_config(config) + + self.assertEqual(new_layer.name, orig_layer.name) + self.assertEqual(new_layer.trainable, trainable) + self.assertLen(new_layer._feature_columns, 3) + self.assertEqual(new_layer._feature_columns[0].name, 'a') + self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0) + self.assertEqual(new_layer._feature_columns[1].categorical_column.name, 'b') + self.assertIsInstance(new_layer._feature_columns[2], fc.IndicatorColumn) + + def test_crossed_column(self): + a = fc.categorical_column_with_vocabulary_list( + 'a', vocabulary_list=['1', '2', '3']) + b = fc.categorical_column_with_vocabulary_list( + 'b', vocabulary_list=['1', '2', '3']) + ab = fc.crossed_column([a, b], hash_bucket_size=2) + cols = [fc.indicator_column(ab)] + + orig_layer = df.DenseFeatures(cols) + config = orig_layer.get_config() + + new_layer = df.DenseFeatures.from_config(config) + + self.assertLen(new_layer._feature_columns, 1) + self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator') + + +@test_util.run_all_in_graph_and_eager_modes +class SequenceFeatureColumnsTest(test.TestCase): + """Tests DenseFeatures with sequence feature columns.""" + + def test_embedding_column(self): + """Tests that error is raised for sequence embedding column.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = fc.embedding_column( + categorical_column_a, dimension=2) + + input_layer = df.DenseFeatures([embedding_column_a]) + with self.assertRaisesRegexp( + ValueError, + r'In embedding_column: aaa_embedding\. categorical_column must not be ' + r'of type SequenceCategoricalColumn\.'): + _ = input_layer({'aaa': sparse_input}) + + def test_indicator_column(self): + """Tests that error is raised for sequence indicator column.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + indicator_column_a = fc.indicator_column(categorical_column_a) + + input_layer = df.DenseFeatures([indicator_column_a]) + with self.assertRaisesRegexp( + ValueError, + r'In indicator_column: aaa_indicator\. categorical_column must not be ' + r'of type SequenceCategoricalColumn\.'): + _ = input_layer({'aaa': sparse_input}) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/feature_column/dense_features_v2.py b/tensorflow/python/keras/feature_column/dense_features_v2.py similarity index 94% rename from tensorflow/python/feature_column/dense_features_v2.py rename to tensorflow/python/keras/feature_column/dense_features_v2.py index 405c5d63249..e4dc22f1bbe 100644 --- a/tensorflow/python/feature_column/dense_features_v2.py +++ b/tensorflow/python/keras/feature_column/dense_features_v2.py @@ -18,10 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.feature_column import dense_features from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.framework import ops -from tensorflow.python.keras.layers import serialization as layer_serialization +from tensorflow.python.keras.feature_column import dense_features from tensorflow.python.util.tf_export import keras_export @@ -94,7 +93,3 @@ class DenseFeatures(dense_features.DenseFeatures): # We would like to call Layer.build and not _DenseFeaturesHelper.build. # pylint: disable=protected-access super(fc._BaseFeaturesLayer, self).build(None) # pylint: disable=bad-super-call - - -layer_serialization.inject_feature_column_v2_objects( - 'DenseFeatures', DenseFeatures) diff --git a/tensorflow/python/feature_column/dense_features_v2_test.py b/tensorflow/python/keras/feature_column/dense_features_v2_test.py similarity index 99% rename from tensorflow/python/feature_column/dense_features_v2_test.py rename to tensorflow/python/keras/feature_column/dense_features_v2_test.py index 71cb163a7d9..95fc8b7ac1e 100644 --- a/tensorflow/python/feature_column/dense_features_v2_test.py +++ b/tensorflow/python/keras/feature_column/dense_features_v2_test.py @@ -23,7 +23,6 @@ import numpy as np from tensorflow.python.client import session from tensorflow.python.eager import backprop from tensorflow.python.eager import context -from tensorflow.python.feature_column import dense_features_v2 as df from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -31,6 +30,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.keras.feature_column import dense_features_v2 as df from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import variables as variables_lib diff --git a/tensorflow/python/keras/feature_column/sequence_feature_column_integration_test.py b/tensorflow/python/keras/feature_column/sequence_feature_column_integration_test.py index 8784182e23b..b1100bf7b07 100644 --- a/tensorflow/python/keras/feature_column/sequence_feature_column_integration_test.py +++ b/tensorflow/python/keras/feature_column/sequence_feature_column_integration_test.py @@ -24,11 +24,11 @@ from google.protobuf import text_format from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.feature_column import dense_features from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.feature_column import sequence_feature_column as sfc from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.keras.feature_column import dense_features from tensorflow.python.keras.feature_column import sequence_feature_column as ksfc from tensorflow.python.keras.layers import recurrent from tensorflow.python.ops import init_ops_v2 diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD index 78b00d6c16e..501c99fe890 100644 --- a/tensorflow/python/keras/layers/preprocessing/BUILD +++ b/tensorflow/python/keras/layers/preprocessing/BUILD @@ -110,6 +110,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":table_utils", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", @@ -145,6 +146,30 @@ py_library( ], ) +py_library( + name = "table_utils", + srcs = [ + "table_utils.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:lookup_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:string_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:tensor_spec", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/keras:backend", + "//tensorflow/python/keras/engine:base_preprocessing_layer", + "//tensorflow/python/ops/ragged", + ], +) + py_library( name = "text_vectorization", srcs = [ @@ -412,6 +437,20 @@ distribute_py_test( ], ) +tf_py_test( + name = "table_utils_test", + srcs = ["table_utils_test.py"], + python_version = "PY3", + deps = [ + ":table_utils", + "//tensorflow/python:client_testlib", + "//tensorflow/python/keras", + "//tensorflow/python/keras/utils:generic_utils", + "//tensorflow/python/ops/ragged:ragged_string_ops", + "@absl_py//absl/testing:parameterized", + ], +) + tf_py_test( name = "text_vectorization_test", size = "medium", diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py index 4f909b648b6..05a6e84e6cc 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py @@ -51,6 +51,19 @@ _RESIZE_METHODS = { 'mitchellcubic': ResizeMethod.MITCHELLCUBIC } +H_AXIS = 1 +W_AXIS = 2 + + +def check_fill_mode_and_interpolation(fill_mode, interpolation): + if fill_mode not in {'reflect', 'wrap', 'constant'}: + raise NotImplementedError( + 'Unknown `fill_mode` {}. Only `reflect`, `wrap` and ' + '`constant` are supported.'.format(fill_mode)) + if interpolation not in {'nearest', 'bilinear'}: + raise NotImplementedError('Unknown `interpolation` {}. Only `nearest` and ' + '`bilinear` are supported.'.format(interpolation)) + @keras_export('keras.layers.experimental.preprocessing.Resizing') class Resizing(Layer): @@ -132,9 +145,8 @@ class CenterCrop(Layer): def call(self, inputs): inputs_shape = array_ops.shape(inputs) - h_axis, w_axis = 1, 2 - img_hd = inputs_shape[h_axis] - img_wd = inputs_shape[w_axis] + img_hd = inputs_shape[H_AXIS] + img_wd = inputs_shape[W_AXIS] img_hd_diff = img_hd - self.target_height img_wd_diff = img_wd - self.target_width checks = [] @@ -230,9 +242,9 @@ class RandomCrop(Layer): def resize_and_center_cropped_inputs(): """Deterministically resize to shorter side and center crop.""" input_shape = array_ops.shape(inputs) - input_height_t = input_shape[1] - input_width_t = input_shape[2] - ratio_cond = (input_height_t / input_width_t > 1.) + input_height_t = input_shape[H_AXIS] + input_width_t = input_shape[W_AXIS] + ratio_cond = (input_height_t / input_width_t > (self.height / self.width)) # pylint: disable=g-long-lambda resized_height = tf_utils.smart_cond( ratio_cond, @@ -407,17 +419,24 @@ class RandomTranslation(Layer): """Randomly translate each image during training. Arguments: - height_factor: a positive float represented as fraction of value, or a tuple - of size 2 representing lower and upper bound for shifting vertically. When - represented as a single float, this value is used for both the upper and - lower bound. For instance, `height_factor=(0.2, 0.3)` results in an output - height varying in the range `[original - 20%, original + 30%]`. - `height_factor=0.2` results in an output height varying in the range - `[original - 20%, original + 20%]`. - width_factor: a positive float represented as fraction of value, or a tuple + height_factor: a float represented as fraction of value, or a tuple + of size 2 representing lower and upper bound for shifting vertically. + A negative value means shifting image up, while a positive value + means shifting image down. When represented as a single positive float, + this value is used for both the upper and lower bound. For instance, + `height_factor=(-0.2, 0.3)` results in an output shifted by a random + amount in the range [-20%, +30%]. + `height_factor=0.2` results in an output height shifted by a random + amount in the range [-20%, +20%]. + width_factor: a float represented as fraction of value, or a tuple of size 2 representing lower and upper bound for shifting horizontally. - When represented as a single float, this value is used for both the upper - and lower bound. + A negative value means shifting image left, while a positive value + means shifting image right. When represented as a single positive float, + this value is used for both the upper and lower bound. For instance, + `width_factor=(-0.2, 0.3)` results in an output shifted left by 20%, and + shifted right by 30%. + `width_factor=0.2` results in an output height shifted left or right + by 20%. fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{'constant', 'reflect', 'wrap'}`). - *reflect*: `(d c b a | a b c d | d c b a)` @@ -440,8 +459,8 @@ class RandomTranslation(Layer): data_format='channels_last'. Raise: - ValueError: if lower bound is not between [0, 1], or upper bound is - negative. + ValueError: if either bound is not between [0, 1], or upper bound is + less than lower bound. """ def __init__(self, @@ -454,38 +473,34 @@ class RandomTranslation(Layer): **kwargs): self.height_factor = height_factor if isinstance(height_factor, (tuple, list)): - self.height_lower = abs(height_factor[0]) + self.height_lower = height_factor[0] self.height_upper = height_factor[1] else: - self.height_lower = self.height_upper = height_factor - if self.height_upper < 0.: - raise ValueError('`height_factor` cannot have negative values as upper ' - 'bound, got {}'.format(height_factor)) + self.height_lower = -height_factor + self.height_upper = height_factor + if self.height_upper < self.height_lower: + raise ValueError('`height_factor` cannot have upper bound less than ' + 'lower bound, got {}'.format(height_factor)) if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.: raise ValueError('`height_factor` must have values between [-1, 1], ' 'got {}'.format(height_factor)) self.width_factor = width_factor if isinstance(width_factor, (tuple, list)): - self.width_lower = abs(width_factor[0]) + self.width_lower = width_factor[0] self.width_upper = width_factor[1] else: - self.width_lower = self.width_upper = width_factor - if self.width_upper < 0.: - raise ValueError('`width_factor` cannot have negative values as upper ' - 'bound, got {}'.format(width_factor)) + self.width_lower = -width_factor + self.width_upper = width_factor + if self.width_upper < self.width_lower: + raise ValueError('`width_factor` cannot have upper bound less than ' + 'lower bound, got {}'.format(width_factor)) if abs(self.width_lower) > 1. or abs(self.width_upper) > 1.: raise ValueError('`width_factor` must have values between [-1, 1], ' 'got {}'.format(width_factor)) - if fill_mode not in {'reflect', 'wrap', 'constant'}: - raise NotImplementedError( - 'Unknown `fill_mode` {}. Only `reflect`, `wrap` and ' - '`constant` are supported.'.format(fill_mode)) - if interpolation not in {'nearest', 'bilinear'}: - raise NotImplementedError( - 'Unknown `interpolation` {}. Only `nearest` and ' - '`bilinear` are supported.'.format(interpolation)) + check_fill_mode_and_interpolation(fill_mode, interpolation) + self.fill_mode = fill_mode self.interpolation = interpolation self.seed = seed @@ -501,22 +516,24 @@ class RandomTranslation(Layer): """Translated inputs with random ops.""" inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] - h_axis, w_axis = 1, 2 + h_axis, w_axis = H_AXIS, W_AXIS img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32) img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32) height_translate = self._rng.uniform( shape=[batch_size, 1], - minval=-self.height_lower, - maxval=self.height_upper) + minval=self.height_lower, + maxval=self.height_upper, + dtype=dtypes.float32) height_translate = height_translate * img_hd width_translate = self._rng.uniform( shape=[batch_size, 1], - minval=-self.width_lower, - maxval=self.width_upper) + minval=self.width_lower, + maxval=self.width_upper, + dtype=dtypes.float32) width_translate = width_translate * img_wd translations = math_ops.cast( - array_ops.concat([height_translate, width_translate], axis=1), - dtype=inputs.dtype) + array_ops.concat([width_translate, height_translate], axis=1), + dtype=dtypes.float32) return transform( inputs, get_translation_matrix(translations), @@ -713,9 +730,15 @@ class RandomRotation(Layer): `(samples, height, width, channels)`, data_format='channels_last'. Attributes: - factor: a positive float represented as fraction of 2pi, or a tuple of size + factor: a float represented as fraction of 2pi, or a tuple of size 2 representing lower and upper bound for rotating clockwise and - counter-clockwise. When represented as a single float, lower = upper. + counter-clockwise. A positive values means rotating counter clock-wise, + while a negative value means clock-wise. When represented as a single + float, this value is used for both the upper and lower bound. For + instance, `factor=(-0.2, 0.3)` results in an output + rotation by a random amount in the range `[-20% * 2pi, 30% * 2pi]`. + `factor=0.2` results in an output rotating by a random amount in the range + `[-20% * 2pi, 20% * 2pi]`. fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{'constant', 'reflect', 'wrap'}`). - *reflect*: `(d c b a | a b c d | d c b a)` @@ -736,8 +759,8 @@ class RandomRotation(Layer): data_format='channels_last'. Raise: - ValueError: if lower bound is not between [0, 1], or upper bound is - negative. + ValueError: if either bound is not between [0, 1], or upper bound is + less than lower bound. """ def __init__(self, @@ -752,18 +775,12 @@ class RandomRotation(Layer): self.lower = factor[0] self.upper = factor[1] else: - self.lower = self.upper = factor - if self.lower < 0. or self.upper < 0.: + self.lower = -factor + self.upper = factor + if self.upper < self.lower: raise ValueError('Factor cannot have negative values, ' 'got {}'.format(factor)) - if fill_mode not in {'reflect', 'wrap', 'constant'}: - raise NotImplementedError( - 'Unknown `fill_mode` {}. Only `reflect`, `wrap` and ' - '`constant` are supported.'.format(fill_mode)) - if interpolation not in {'nearest', 'bilinear'}: - raise NotImplementedError( - 'Unknown `interpolation` {}. Only `nearest` and ' - '`bilinear` are supported.'.format(interpolation)) + check_fill_mode_and_interpolation(fill_mode, interpolation) self.fill_mode = fill_mode self.interpolation = interpolation self.seed = seed @@ -779,13 +796,12 @@ class RandomRotation(Layer): """Rotated inputs with random ops.""" inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] - h_axis, w_axis = 1, 2 - img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32) - img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32) + img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32) + img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32) min_angle = self.lower * 2. * np.pi max_angle = self.upper * 2. * np.pi angles = self._rng.uniform( - shape=[batch_size], minval=-min_angle, maxval=max_angle) + shape=[batch_size], minval=min_angle, maxval=max_angle) return transform( inputs, get_rotation_matrix(angles, img_hd, img_wd), @@ -815,16 +831,23 @@ class RandomZoom(Layer): """Randomly zoom each image during training. Arguments: - height_factor: a positive float represented as fraction of value, or a tuple - of size 2 representing lower and upper bound for zooming horizontally. - When represented as a single float, this value is used for both the - upper and lower bound. For instance, `height_factor=(0.2, 0.3)` result in - an output zoom varying in the range `[original * 20%, original * 30%]`. - width_factor: a positive float represented as fraction of value, or a tuple + height_factor: a float represented as fraction of value, or a tuple of size 2 representing lower and upper bound for zooming vertically. When represented as a single float, this value is used for both the - upper and lower bound. For instance, `width_factor=(0.2, 0.3)` result in - an output zoom varying in the range `[original * 20%, original * 30%]`. + upper and lower bound. A positive value means zooming out, while a + negative value means zooming in. + For instance, `height_factor=(0.2, 0.3)` result in an output zoomed out + by a random amount in the range [+20%, +30%]. + `height_factor=(-0.3, -0.2)` result in an output zoomed in by a random + amount in the range [+20%, +30%]. + width_factor: a float represented as fraction of value, or a tuple + of size 2 representing lower and upper bound for zooming horizontally. + When represented as a single float, this value is used for both the + upper and lower bound. + For instance, `width_factor=(0.2, 0.3)` result in an output zooming out + between 20% to 30%. + `width_factor=(-0.3, -0.2)` result in an output zooming in between 20% + to 30%. fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{'constant', 'reflect', 'wrap'}`). - *reflect*: `(d c b a | a b c d | d c b a)` @@ -863,35 +886,27 @@ class RandomZoom(Layer): self.height_lower = height_factor[0] self.height_upper = height_factor[1] else: - self.height_lower = self.height_upper = height_factor - if self.height_lower < 0. or self.height_upper < 0.: - raise ValueError('`height_factor` cannot have negative values, ' + self.height_lower = -height_factor + self.height_upper = height_factor + + if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.: + raise ValueError('`height_factor` must have values between [-1, 1], ' 'got {}'.format(height_factor)) - if self.height_lower > self.height_upper: - raise ValueError('`height_factor` cannot have lower bound larger than ' - 'upper bound, got {}.'.format(height_factor)) self.width_factor = width_factor if isinstance(width_factor, (tuple, list)): self.width_lower = width_factor[0] self.width_upper = width_factor[1] else: - self.width_lower = self.width_upper = width_factor - if self.width_lower < 0. or self.width_upper < 0.: - raise ValueError('`width_factor` cannot have negative values, ' - 'got {}'.format(width_factor)) - if self.width_lower > self.width_upper: - raise ValueError('`width_factor` cannot have lower bound larger than ' - 'upper bound, got {}.'.format(width_factor)) + self.width_lower = -width_factor + self.width_upper = width_factor + + if self.width_lower < -1. or self.width_upper < -1.: + raise ValueError('`width_factor` must have values larger than -1, ' + 'got {}'.format(width_factor)) + + check_fill_mode_and_interpolation(fill_mode, interpolation) - if fill_mode not in {'reflect', 'wrap', 'constant'}: - raise NotImplementedError( - 'Unknown `fill_mode` {}. Only `reflect`, `wrap` and ' - '`constant` are supported.'.format(fill_mode)) - if interpolation not in {'nearest', 'bilinear'}: - raise NotImplementedError( - 'Unknown `interpolation` {}. Only `nearest` and ' - '`bilinear` are supported.'.format(interpolation)) self.fill_mode = fill_mode self.interpolation = interpolation self.seed = seed @@ -907,22 +922,19 @@ class RandomZoom(Layer): """Zoomed inputs with random ops.""" inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] - h_axis, w_axis = 1, 2 - img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32) - img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32) + img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32) + img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32) height_zoom = self._rng.uniform( shape=[batch_size, 1], - minval=-self.height_lower, - maxval=self.height_upper) - height_zoom = height_zoom * img_hd + minval=1. + self.height_lower, + maxval=1. + self.height_upper) width_zoom = self._rng.uniform( shape=[batch_size, 1], - minval=-self.width_lower, - maxval=self.width_upper) - width_zoom = width_zoom * img_wd + minval=1. + self.width_lower, + maxval=1. + self.width_upper) zooms = math_ops.cast( - array_ops.concat([height_zoom, width_zoom], axis=1), - dtype=inputs.dtype) + array_ops.concat([width_zoom, height_zoom], axis=1), + dtype=dtypes.float32) return transform( inputs, get_zoom_matrix(zooms, img_hd, img_wd), fill_mode=self.fill_mode, @@ -974,8 +986,8 @@ def get_zoom_matrix(zooms, image_height, image_width, name=None): # [0 0 1]] # where the last entry is implicit. # Zoom matrices are always float32. - x_offset = ((image_height + 1.) / 2.0) * (zooms[:, 0, None] - 1.) - y_offset = ((image_width + 1.) / 2.0) * (zooms[:, 1, None] - 1.) + x_offset = ((image_width - 1.) / 2.0) * (1.0 - zooms[:, 0, None]) + y_offset = ((image_height - 1.) / 2.0) * (1.0 - zooms[:, 1, None]) return array_ops.concat( values=[ zooms[:, 0, None], @@ -1073,11 +1085,11 @@ class RandomHeight(Layer): factor: A positive float (fraction of original height), or a tuple of size 2 representing lower and upper bound for resizing vertically. When represented as a single float, this value is used for both the upper and - lower bound. For instance, `factor=(0.2, 0.3)` results in an output height - varying in the range `[original + 20%, original + 30%]`. `factor=(-0.2, - 0.3)` results in an output height varying in the range `[original - 20%, - original + 30%]`. `factor=0.2` results in an output height varying in the - range `[original - 20%, original + 20%]`. + lower bound. For instance, `factor=(0.2, 0.3)` results in an output with + height changed by a random amount in the range `[20%, 30%]`. + `factor=(-0.2, 0.3)` results in an output with height changed by a random + amount in the range `[-20%, +30%]. `factor=0.2` results in an output with + height changed by a random amount in the range `[-20%, +20%]`. interpolation: String, the interpolation method. Defaults to `bilinear`. Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`, `gaussian`, `mitchellcubic` @@ -1099,12 +1111,17 @@ class RandomHeight(Layer): **kwargs): self.factor = factor if isinstance(factor, (tuple, list)): - self.height_lower = -factor[0] + self.height_lower = factor[0] self.height_upper = factor[1] else: - self.height_lower = self.height_upper = factor - if self.height_lower > 1.: - raise ValueError('`factor` cannot have abs lower bound larger than 1.0, ' + self.height_lower = -factor + self.height_upper = factor + + if self.height_upper < self.height_lower: + raise ValueError('`factor` cannot have upper bound less than ' + 'lower bound, got {}'.format(factor)) + if self.height_lower < -1. or self.height_upper < -1.: + raise ValueError('`factor` must have values larger than -1, ' 'got {}'.format(factor)) self.interpolation = interpolation self._interpolation_method = get_interpolation(interpolation) @@ -1120,12 +1137,11 @@ class RandomHeight(Layer): def random_height_inputs(): """Inputs height-adjusted with random ops.""" inputs_shape = array_ops.shape(inputs) - h_axis, w_axis = 1, 2 - img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32) - img_wd = inputs_shape[w_axis] + img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32) + img_wd = inputs_shape[W_AXIS] height_factor = self._rng.uniform( shape=[], - minval=(1.0 - self.height_lower), + minval=(1.0 + self.height_lower), maxval=(1.0 + self.height_upper)) adjusted_height = math_ops.cast(height_factor * img_hd, dtypes.int32) adjusted_size = array_ops.stack([adjusted_height, img_wd]) @@ -1163,14 +1179,14 @@ class RandomWidth(Layer): By default, this layer is inactive during inference. Arguments: - factor: A positive float (fraction of original width), or a tuple of - size 2 representing lower and upper bound for resizing horizontally. When + factor: A positive float (fraction of original height), or a tuple of size 2 + representing lower and upper bound for resizing vertically. When represented as a single float, this value is used for both the upper and - lower bound. For instance, `factor=(0.2, 0.3)` results in an output width - varying in the range `[original + 20%, original + 30%]`. `factor=(-0.2, - 0.3)` results in an output width varying in the range `[original - 20%, - original + 30%]`. `factor=0.2` results in an output width varying in the - range `[original - 20%, original + 20%]`. + lower bound. For instance, `factor=(0.2, 0.3)` results in an output with + width changed by a random amount in the range `[20%, 30%]`. + `factor=(-0.2, 0.3)` results in an output with width changed by a random + amount in the range `[-20%, +30%]. `factor=0.2` results in an output with + width changed by a random amount in the range `[-20%, +20%]`. interpolation: String, the interpolation method. Defaults to `bilinear`. Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`, `gaussian`, `mitchellcubic` @@ -1183,7 +1199,7 @@ class RandomWidth(Layer): Output shape: 4D tensor with shape: - `(samples, random_height, width, channels)`. + `(samples, height, random_width, channels)`. """ def __init__(self, @@ -1194,12 +1210,16 @@ class RandomWidth(Layer): **kwargs): self.factor = factor if isinstance(factor, (tuple, list)): - self.width_lower = -factor[0] + self.width_lower = factor[0] self.width_upper = factor[1] else: - self.width_lower = self.width_upper = factor - if self.width_lower > 1.: - raise ValueError('`factor` cannot have abs lower bound larger than 1.0, ' + self.width_lower = -factor + self.width_upper = factor + if self.width_upper < self.width_lower: + raise ValueError('`factor` cannot have upper bound less than ' + 'lower bound, got {}'.format(factor)) + if self.width_lower < -1. or self.width_upper < -1.: + raise ValueError('`factor` must have values larger than -1, ' 'got {}'.format(factor)) self.interpolation = interpolation self._interpolation_method = get_interpolation(interpolation) @@ -1215,12 +1235,11 @@ class RandomWidth(Layer): def random_width_inputs(): """Inputs width-adjusted with random ops.""" inputs_shape = array_ops.shape(inputs) - h_axis, w_axis = 1, 2 - img_hd = inputs_shape[h_axis] - img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32) + img_hd = inputs_shape[H_AXIS] + img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32) width_factor = self._rng.uniform( shape=[], - minval=(1.0 - self.width_lower), + minval=(1.0 + self.width_lower), maxval=(1.0 + self.width_upper)) adjusted_width = math_ops.cast(width_factor * img_wd, dtypes.int32) adjusted_size = array_ops.stack([img_hd, adjusted_width]) diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py index a741ee1c069..28c9955c9dd 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py @@ -74,6 +74,40 @@ class ResizingTest(keras_parameterized.TestCase): with CustomObjectScope({'Resizing': image_preprocessing.Resizing}): self._run_test(kwargs, expected_height, expected_width) + def test_down_sampling_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype(dtype) + layer = image_preprocessing.Resizing( + height=2, width=2, interpolation='nearest') + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [5, 7], + [13, 15] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_up_sampling_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype(dtype) + layer = image_preprocessing.Resizing( + height=4, width=4, interpolation='nearest') + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [0, 0, 1, 1], + [0, 0, 1, 1], + [2, 2, 3, 3], + [2, 2, 3, 3] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 4, 4, 1)) + self.assertAllEqual(expected_output, output_image) + @parameterized.named_parameters( ('reshape_bilinear_10_by_4', {'interpolation': 'bilinear'}, 10, 4)) def test_reshaping(self, kwargs, expected_height, expected_width): @@ -223,6 +257,21 @@ class RandomCropTest(keras_parameterized.TestCase): with CustomObjectScope({'RandomCrop': image_preprocessing.RandomCrop}): self._run_test(expected_height, expected_width) + def test_random_crop_full_height(self): + self._run_test(5, 2) + + def test_random_crop_full_width(self): + self._run_test(3, 8) + + def test_random_crop_full(self): + np.random.seed(1337) + height, width = 8, 16 + inp = np.random.random((12, 8, 16, 3)) + with tf_test_util.use_gpu(): + layer = image_preprocessing.RandomCrop(height, width) + actual_output = layer(inp, training=0) + self.assertAllClose(inp, actual_output) + def test_predicting_with_mock_longer_height(self): np.random.seed(1337) height, width = 3, 3 @@ -242,8 +291,7 @@ class RandomCropTest(keras_parameterized.TestCase): with tf_test_util.use_gpu(): layer = image_preprocessing.RandomCrop(height, width) actual_output = layer(inp, training=0) - resized_inp = image_ops.resize_images_v2( - inp, size=[4, 8]) + resized_inp = image_ops.resize_images_v2(inp, size=[4, 8]) expected_output = resized_inp[:, :, 1:7, :] self.assertAllClose(expected_output, actual_output) @@ -475,21 +523,152 @@ class RandomTranslationTest(keras_parameterized.TestCase): @parameterized.named_parameters( ('random_translate_4_by_6', .4, .6), ('random_translate_3_by_2', .3, .2), - ('random_translate_tuple_factor', (.5, .4), (.2, .3))) + ('random_translate_tuple_factor', (-.5, .4), (.2, .3))) def test_random_translation(self, height_factor, width_factor): self._run_test(height_factor, width_factor) - def test_random_translation_negative_lower(self): - mock_offset = np.random.random((12, 1)) - with test.mock.patch.object( - gen_stateful_random_ops, 'stateful_uniform', return_value=mock_offset): - with self.cached_session(use_gpu=True): - layer = image_preprocessing.RandomTranslation((-0.2, .3), .4) - layer_2 = image_preprocessing.RandomTranslation((0.2, .3), .4) - inp = np.random.random((12, 5, 8, 3)).astype(np.float32) - actual_output = layer(inp, training=1) - actual_output_2 = layer_2(inp, training=1) - self.assertAllClose(actual_output, actual_output_2) + def test_random_translation_up_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(-.2, -.2), width_factor=0.) + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24], + [20, 21, 22, 23, 24] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_up_numeric_constant(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(-.2, -.2), width_factor=0., fill_mode='constant') + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24], + [0, 0, 0, 0, 0] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_down_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + # Shifting by .2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(.2, .2), width_factor=0.) + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_asymmetric_size_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype(dtype) + # Shifting by .5 * 8 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(.5, .5), width_factor=0.) + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [6, 7], + [4, 5], + [2, 3], + [0, 1], + [0, 1], + [2, 3], + [4, 5], + [6, 7], + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 8, 2, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_down_numeric_constant(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=(.2, .2), width_factor=0., fill_mode='constant') + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [0, 0, 0, 0, 0], + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_left_numeric_reflect(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + # Shifting by .2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=0., width_factor=(-.2, -.2)) + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [1, 2, 3, 4, 4], + [6, 7, 8, 9, 9], + [11, 12, 13, 14, 14], + [16, 17, 18, 19, 19], + [21, 22, 23, 24, 24] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_translation_left_numeric_constant(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + # Shifting by -.2 * 5 = 1 pixel. + layer = image_preprocessing.RandomTranslation( + height_factor=0., width_factor=(-.2, -.2), fill_mode='constant') + output_image = layer(input_image) + # pyformat: disable + expected_output = np.asarray([ + [1, 2, 3, 4, 0], + [6, 7, 8, 9, 0], + [11, 12, 13, 14, 0], + [16, 17, 18, 19, 0], + [21, 22, 23, 24, 0] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) def test_random_translation_inference(self): with CustomObjectScope( @@ -768,7 +947,7 @@ class RandomRotationTest(keras_parameterized.TestCase): @parameterized.named_parameters(('random_rotate_4', .4), ('random_rotate_3', .3), - ('random_rotate_tuple_factor', (.5, .4))) + ('random_rotate_tuple_factor', (-.5, .4))) def test_random_rotation(self, factor): self._run_test(factor) @@ -808,22 +987,55 @@ class RandomZoomTest(keras_parameterized.TestCase): expected_output_shape=(None, orig_height, orig_width, channels)) @parameterized.named_parameters( - ('random_zoom_4_by_6', .4, .6), ('random_zoom_2_by_3', .2, .3), - ('random_zoom_tuple_factor', (.4, .5), (.2, .3))) + ('random_zoom_4_by_6', -.4, -.6), ('random_zoom_2_by_3', -.2, -.3), + ('random_zoom_tuple_factor', (-.4, -.5), (-.2, -.3))) def test_random_zoom_in(self, height_factor, width_factor): self._run_test(height_factor, width_factor) @parameterized.named_parameters( - ('random_zoom_4_by_6', 1.4, 1.6), ('random_zoom_2_by_3', 1.2, 1.3), - ('random_zoom_tuple_factor', (1.4, 1.5), (1.2, 1.3))) + ('random_zoom_4_by_6', .4, .6), ('random_zoom_2_by_3', .2, .3), + ('random_zoom_tuple_factor', (.4, .5), (.2, .3))) def test_random_zoom_out(self, height_factor, width_factor): self._run_test(height_factor, width_factor) - def test_random_zoom_invalid_factor(self): - with self.assertRaises(ValueError): - image_preprocessing.RandomZoom((.5, .4), .2) - with self.assertRaises(ValueError): - image_preprocessing.RandomZoom(.2, (.5, .4)) + def test_random_zoom_in_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) + layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5), + interpolation='nearest') + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([ + [6, 7, 7, 8, 8], + [11, 12, 12, 13, 13], + [11, 12, 12, 13, 13], + [16, 17, 17, 18, 18], + [16, 17, 17, 18, 18] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_zoom_out_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) + layer = image_preprocessing.RandomZoom((.5, .5), (.5, .5), + fill_mode='constant', + interpolation='nearest') + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([ + [0, 0, 0, 0, 0], + [0, 6, 7, 9, 0], + [0, 11, 12, 14, 0], + [0, 21, 22, 24, 0], + [0, 0, 0, 0, 0] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 5, 5, 1)) + self.assertAllEqual(expected_output, output_image) def test_random_zoom_inference(self): with CustomObjectScope( @@ -861,7 +1073,7 @@ class RandomHeightTest(keras_parameterized.TestCase): self.assertEqual(img_out.shape[3], 3) @parameterized.named_parameters(('random_height_4_by_6', (.4, .6)), - ('random_height_3_by_2', (.3, 1.2)), + ('random_height_3_by_2', (-.3, .2)), ('random_height_3', .3)) def test_random_height_basic(self, factor): self._run_test(factor) @@ -877,6 +1089,39 @@ class RandomHeightTest(keras_parameterized.TestCase): img_out = layer(img, training=True) self.assertEqual(img_out.shape[1], 3) + def test_random_height_longer_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 6), (2, 3, 1)).astype(dtype) + layer = image_preprocessing.RandomHeight(factor=(1., 1.)) + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([ + [0, 1, 2], + [0.75, 1.75, 2.75], + [2.25, 3.25, 4.25], + [3, 4, 5] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 4, 3, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_height_shorter_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 8), (4, 2, 1)).astype(dtype) + layer = image_preprocessing.RandomHeight( + factor=(-.5, -.5), interpolation='nearest') + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([ + [2, 3], + [6, 7] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + def test_random_height_invalid_factor(self): with self.assertRaises(ValueError): image_preprocessing.RandomHeight((-1.5, .4)) @@ -916,7 +1161,7 @@ class RandomWidthTest(keras_parameterized.TestCase): self.assertEqual(img_out.shape[3], 3) @parameterized.named_parameters(('random_width_4_by_6', (.4, .6)), - ('random_width_3_by_2', (.3, 1.2)), + ('random_width_3_by_2', (-.3, .2)), ('random_width_3', .3)) def test_random_width_basic(self, factor): self._run_test(factor) @@ -932,6 +1177,38 @@ class RandomWidthTest(keras_parameterized.TestCase): img_out = layer(img, training=True) self.assertEqual(img_out.shape[2], 3) + def test_random_width_longer_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 6), (3, 2, 1)).astype(dtype) + layer = image_preprocessing.RandomWidth(factor=(1., 1.)) + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([ + [0, 0.25, 0.75, 1], + [2, 2.25, 2.75, 3], + [4, 4.25, 4.75, 5] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 3, 4, 1)) + self.assertAllEqual(expected_output, output_image) + + def test_random_width_shorter_numeric(self): + for dtype in (np.int64, np.float32): + with tf_test_util.use_gpu(): + input_image = np.reshape(np.arange(0, 8), (2, 4, 1)).astype(dtype) + layer = image_preprocessing.RandomWidth( + factor=(-.5, -.5), interpolation='nearest') + output_image = layer(np.expand_dims(input_image, axis=0)) + # pyformat: disable + expected_output = np.asarray([ + [1, 3], + [5, 7] + ]).astype(dtype) + # pyformat: enable + expected_output = np.reshape(expected_output, (1, 2, 2, 1)) + self.assertAllEqual(expected_output, output_image) + def test_random_width_invalid_factor(self): with self.assertRaises(ValueError): image_preprocessing.RandomWidth((-1.5, .4)) diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup.py b/tensorflow/python/keras/layers/preprocessing/index_lookup.py index 812eeca7ea3..d6c8a07c8ba 100644 --- a/tensorflow/python/keras/layers/preprocessing/index_lookup.py +++ b/tensorflow/python/keras/layers/preprocessing/index_lookup.py @@ -24,17 +24,11 @@ import operator import numpy as np from tensorflow.python.framework import dtypes -from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.keras.engine import base_preprocessing_layer -from tensorflow.python.ops import array_ops +from tensorflow.python.keras.layers.preprocessing import table_utils from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_functional_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import gfile from tensorflow.python.util import compat # The string tokens in the extracted vocabulary @@ -100,23 +94,29 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): reserve_zero=True, mask_zero=False, **kwargs): - allowed_dtypes = [dtypes.string, dtypes.int64] + invert = False + if invert: + allowed_dtypes = [dtypes.int32, dtypes.int64] + else: + allowed_dtypes = [dtypes.string, dtypes.int32, dtypes.int64] + if "dtype" in kwargs and kwargs["dtype"] not in allowed_dtypes: - raise ValueError( - "TextVectorization may only have a dtype of string or int64.") - elif "dtype" not in kwargs: - kwargs["dtype"] = dtypes.string + raise ValueError("TextVectorization may only have a dtype in %s." % + allowed_dtypes) + + if "dtype" not in kwargs: + kwargs["dtype"] = dtypes.int64 if invert else dtypes.string # If max_tokens is set, the value must be greater than 1 - otherwise we # are creating a 0-element vocab, which doesn't make sense. if max_tokens is not None and max_tokens <= 1: - raise ValueError("max_tokens must be greater than 1.") + raise ValueError("If set, max_tokens must be greater than 1.") - # For now, limit the num_oov_tokens to one. if num_oov_tokens < 0: raise ValueError("num_oov_tokens must be greater than 0. You passed %s" % num_oov_tokens) + self.invert = invert self.max_tokens = max_tokens self.num_oov_tokens = num_oov_tokens self.reserve_zero = reserve_zero @@ -167,91 +167,24 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): # counting code in the Model object doesn't throw an attribute error. tracked_table.shape = tensor_shape.TensorShape((0,)) - self._inverse_table = None + if self.num_oov_tokens <= 1: + oov_tokens = None + else: + oov_start = 1 if reserve_zero else 0 + oov_tokens = list(range(oov_start, self._reserved_values)) + + self._table_handler = table_utils.TableHandler( + table=self._table, + oov_tokens=oov_tokens, + use_v1_apis=self._use_v1_apis()) if vocabulary is not None: if isinstance(vocabulary, str): - vocabulary = self._get_vocabulary_from_file(vocabulary) + vocabulary = table_utils.get_vocabulary_from_file(vocabulary) + table_utils.validate_vocabulary_is_unique(vocabulary) - vocabulary_set = set(vocabulary) - if len(vocabulary) != len(vocabulary_set): - repeated_items = [ - item for item, count in collections.Counter(vocabulary).items() - if count > 1 - ] - raise ValueError("The passed vocabulary has at least one repeated " - "term. Please uniquify your dataset before passing " - "it to IndexLookup(). The repeated terms are %s" % - repeated_items) self.set_vocabulary(vocabulary) - def _get_vocabulary_from_file(self, vocabulary_path): - vocab = [] - with gfile.GFile(vocabulary_path, "r") as reader: - while True: - # Get the next line, and break if it is None. - text = reader.readline() - if not text: - break - - # Convert the raw text into UTF8 and strip whitespace. - if isinstance(text, str): - token = text - elif isinstance(text, bytes): - token = text.decode("utf-8", "ignore") - token = token.strip() - vocab.append(token) - return vocab - - def _get_table_data(self): - keys, values = self._table.export() - return (keys.numpy(), values.numpy()) - - def vocab_size(self): - return self._table.size().numpy() - - def _clear_table(self): - keys, _ = self._table.export() - self._table.remove(keys) - if self._inverse_table: - keys, _ = self._inverse_table.export() - self._inverse_table.remove(keys) - - def _insert_table_data(self, keys, values): - if len(values) != len(keys): - raise RuntimeError("Size mismatch between values and key arrays. " - "Keys had size %s, values had size %s." % - (len(keys), len(values))) - self._table.insert(keys, values) - if self._inverse_table: - self._inverse_table.insert(values, keys) - - def _initialize_inverse_table(self): - keys, values = self._table.export() - self._inverse_table.insert(values, keys) - - def _to_numpy(self, preprocessed_data): - """Converts preprocessed inputs into numpy arrays.""" - if isinstance(preprocessed_data, np.ndarray): - return preprocessed_data - return np.array(preprocessed_data.to_list()) - # End of V1/V2 shim points. - - def _assert_same_type(self, expected_type, values, value_name): - if dtypes.as_dtype(expected_type) != dtypes.as_dtype(values.dtype): - raise RuntimeError("Expected %s type %s, got %s" % - (value_name, expected_type, values.dtype)) - - def _convert_to_ndarray(self, x, dtype=None): - array = np.array(x) if isinstance(x, (list, tuple)) else x - if dtype not in (None, dtypes.string): - # If the dtype is an integer, we do permissive casting. This allows - # users to examine int32 data if the dtype is int64 without trouble. - np_dtype = dtypes.as_dtype(dtype).as_numpy_dtype - if np.can_cast(array.dtype, np_dtype): - array = array.astype(np_dtype, casting="safe") - return array - def compute_output_shape(self, input_shape): return input_shape @@ -281,10 +214,10 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): super(IndexLookup, self).adapt(data, reset_state) def get_vocabulary(self): - if self.vocab_size() == 0: + if self._table_handler.vocab_size() == 0: return [] - keys, values = self._get_table_data() + keys, values = self._table_handler.data() # This is required because the MutableHashTable doesn't preserve insertion # order, but we rely on the order of the array to assign indices. if self.dtype == dtypes.string: @@ -292,6 +225,9 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): else: return [x for _, x in sorted(zip(values, keys))] + def vocab_size(self): + return self._table_handler.vocab_size() + def get_config(self): config = { "max_tokens": self.max_tokens, @@ -329,7 +265,7 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): ValueError: If there are too many inputs, the inputs do not match, or input data is missing. """ - current_table_size = self.vocab_size() + current_table_size = self._table_handler.vocab_size() total_vocab_size = len(vocab) + (current_table_size if append else 0) if self.max_tokens is not None and total_vocab_size > self._max_elements: raise ValueError( @@ -338,93 +274,28 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): "token(s) are automatically added to the number of tokens." % (total_vocab_size, self.max_tokens)) - start_index = self._reserved_values + (self.vocab_size() if append else 0) + start_index = self._reserved_values + (current_table_size if append else 0) values = np.arange(start_index, len(vocab) + start_index, dtype=np.int64) - vocab = self._convert_to_ndarray(vocab, self.dtype) - self._assert_same_type(self.dtype, vocab, "vocab") + vocab = table_utils.convert_to_ndarray(vocab, self.dtype) + table_utils.assert_same_type(self.dtype, vocab, "vocab") - values = self._convert_to_ndarray(values, self._output_dtype) - self._assert_same_type(self._output_dtype, values, "values") + values = table_utils.convert_to_ndarray(values, self._output_dtype) + table_utils.assert_same_type(self._output_dtype, values, "values") - if not append and self.vocab_size() > 0: - self._clear_table() - self._insert_table_data(vocab, values) + if not append and current_table_size > 0: + self._table_handler.clear() + self._table_handler.insert(vocab, values) def _set_state_variables(self, updates): if not self.built: raise RuntimeError("_set_state_variables() must be called after build().") self.set_vocabulary(updates[_VOCAB_NAME]) - def __call__(self, inputs, invert=False, **kwargs): - if invert and not self._inverse_table: - # If the user wants to perform an inverse lookup, we need to build an - # inverse lookup table and initialize it to have the inverse of the - # forward table's vocabulary. - self._inverse_table = lookup_ops.MutableHashTable( - key_dtype=self._output_dtype, - value_dtype=self.dtype, - default_value="", - name=(self._name + "_inverse_index_table")) + def call(self, inputs): + return self._table_handler.lookup(inputs) - tracked_inverse_table = self._add_trackable( - self._inverse_table, trainable=False) - # This is a workaround for summary() on this layer. Because the table is - # not mutable during training, the effective number of parameters (and so - # the weight shape) is 0; we add this as an attr so that the parameter - # counting code in the Model object doesn't throw an attribute error. - tracked_inverse_table.shape = tensor_shape.TensorShape((0,)) - - # This is a workaround for saving not working yet for MutableHashTables. - # By replacing the existing function call by an explicit failure, we - # can provide a more user-friendly error message. - def fail(_): - raise NotImplementedError( - "Saving is not yet supported for IndexLookup layers.") - - self._inverse_table._list_extra_dependencies_for_serialization = fail # pylint: disable=protected-access - self._initialize_inverse_table() - - return super(IndexLookup, self).__call__(inputs, invert=invert, **kwargs) - - def replace_oov_buckets(self, inputs, lookups): - if self.num_oov_tokens <= 1: - return lookups - - if inputs.dtype.is_integer: - inputs = string_ops.as_string(inputs) - hashed_inputs = string_ops.string_to_hash_bucket_fast( - inputs, num_buckets=self.num_oov_tokens) - if self.reserve_zero: - hashed_inputs = math_ops.add(hashed_inputs, 1) - return array_ops.where(math_ops.equal(lookups, -1), hashed_inputs, lookups) - - def call(self, inputs, invert=False): - table = self._inverse_table if invert else self._table - # The table lookup ops don't natively support ragged tensors, so if we have - # a RT we need to use map_flat_values to look up every element. - if ragged_tensor.is_ragged(inputs): - indexed_data = ragged_functional_ops.map_flat_values(table.lookup, inputs) - if not invert: - indexed_data = ragged_functional_ops.map_flat_values( - self.replace_oov_buckets, inputs, indexed_data) - elif isinstance( - inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): - if not invert: - values = self.replace_oov_buckets(inputs.values, - table.lookup(inputs.values)) - indexed_data = sparse_tensor.SparseTensor(inputs.indices, values, - inputs.dense_shape) - else: - indexed_data = table.lookup(inputs) - if not invert: - indexed_data = self.replace_oov_buckets(inputs, indexed_data) - # (b/149446477): output does not preserve input shape. - indexed_data.set_shape(inputs.shape) - - # Composite tensors can pass tensor values through, which will cause - # errors if this is the only layer in the model. To fix this, pass - # the output through an identity op. - return array_ops.identity(indexed_data) + def _use_v1_apis(self): + return False class _IndexLookupAccumulator( diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py b/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py index 54305b3d6d7..3c5b5757ec2 100644 --- a/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py +++ b/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py @@ -261,7 +261,7 @@ class CategoricalEncodingMultiOOVTest( vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) input_array = sparse_tensor.SparseTensor( indices=[[0, 0], [1, 2]], - values=np.array([13, 132], dtype=np.int64), + values=np.array([13, 133], dtype=np.int64), dense_shape=[3, 4]) expected_indices = [[0, 0], [1, 2]] @@ -295,7 +295,7 @@ class CategoricalEncodingMultiOOVTest( def test_ragged_int_input_multi_bucket(self): vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) - input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 132]], + input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 133]], dtype=np.int64) expected_output = [[3, 4, 6], [6, 5, 3, 2]] @@ -560,7 +560,7 @@ class IndexLookupVocabularyTest(keras_parameterized.TestCase, class InverseLookupOutputTest(keras_parameterized.TestCase, preprocessing_test_utils.PreprocessingLayerTest): - def test_inverse_output(self): + def DISABLE_test_inverse_output(self): vocab_data = ["earth", "wind", "and", "fire"] input_array = np.array([["earth", "wind", "and", "fire"], ["fire", "and", "earth", "michigan"]]) @@ -579,7 +579,7 @@ class InverseLookupOutputTest(keras_parameterized.TestCase, self.assertAllEqual(expected_ints, int_outputs) self.assertAllEqual(expected_strings, string_outputs) - def test_inverse_output_serialization(self): + def DISABLE_test_inverse_output_serialization(self): vocab_data = ["earth", "wind", "and", "fire"] input_array = np.array([["earth", "wind", "and", "fire"], ["fire", "and", "earth", "michigan"]]) diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py b/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py index c6e0b6ed286..47fea11dd57 100644 --- a/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py +++ b/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py @@ -18,12 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np -from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_preprocessing_layer_v1 from tensorflow.python.keras.layers.preprocessing import index_lookup -from tensorflow.python.ops.ragged import ragged_tensor_value class IndexLookup(index_lookup.IndexLookup, @@ -59,37 +56,5 @@ class IndexLookup(index_lookup.IndexLookup, this option is set, reserve_zero must also be set. Defaults to False. """ - def _get_table_data(self): - keys, values = self._table.export() - np_keys = K.get_session().run(keys) - np_values = K.get_session().run(values) - return (np_keys, np_values) - - def vocab_size(self): - return K.get_session().run(self._table.size()) - - def _clear_table(self): - keys, _ = self._table.export() - K.get_session().run(self._table.remove(keys)) - if self._inverse_table: - keys, _ = self._inverse_table.export() - K.get_session().run(self._inverse_table.remove(keys)) - - def _insert_table_data(self, keys, values): - K.get_session().run(self._table.insert(keys, values)) - if self._inverse_table: - K.get_session().run(self._inverse_table.insert(values, keys)) - - def _initialize_inverse_table(self): - keys, values = self._table.export() - K.get_session().run(self._inverse_table.insert(values, keys)) - - def _to_numpy(self, data): - """Converts preprocessed inputs into numpy arrays.""" - if isinstance(data, np.ndarray): - return data - session = K.get_session() - data = session.run(data) - if isinstance(data, ragged_tensor_value.RaggedTensorValue): - data = np.array(data.to_list()) - return data + def _use_v1_apis(self): + return True diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py index b087a2101c7..cf9600a63ab 100644 --- a/tensorflow/python/keras/layers/preprocessing/normalization.py +++ b/tensorflow/python/keras/layers/preprocessing/normalization.py @@ -41,7 +41,7 @@ _VARIANCE_NAME = 'variance' class Normalization(CombinerPreprocessingLayer): """Feature-wise normalization of the data. - This layer will coerce its inputs into a normal distribution centered around + This layer will coerce its inputs into a distribution centered around 0 with standard deviation 1. It accomplishes this by precomputing the mean and variance of the data, and calling (input-mean)/sqrt(var) at runtime. diff --git a/tensorflow/python/keras/layers/preprocessing/table_utils.py b/tensorflow/python/keras/layers/preprocessing/table_utils.py new file mode 100644 index 00000000000..88e9d95e2ed --- /dev/null +++ b/tensorflow/python/keras/layers/preprocessing/table_utils.py @@ -0,0 +1,192 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for working with tf.lookup tables in Keras.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import numpy as np + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.keras import backend as K +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import string_ops +from tensorflow.python.ops.ragged import ragged_functional_ops +from tensorflow.python.ops.ragged import ragged_tensor +from tensorflow.python.platform import gfile + + +class TableHandler(object): + """Wrapper object that holds a lookup table and provides accessors.""" + + def __init__(self, table, oov_tokens=None, use_v1_apis=False): + self.table = table + self.use_v1_apis = use_v1_apis + if oov_tokens is None: + self.oov_tokens = oov_tokens + else: + if not isinstance(oov_tokens, (list, tuple, np.ndarray)): + oov_tokens = [oov_tokens] + self.oov_tokens = math_ops.cast(oov_tokens, table._value_dtype) # pylint: disable=protected-access + + def data(self): + keys, values = self.table.export() + return (self._eval(keys), self._eval(values)) + + def vocab_size(self): + return self._eval(self.table.size()) + + def clear(self): + keys, _ = self.table.export() + self._run(self.table.remove(keys)) + + def insert(self, keys, values): + if len(values) != len(keys): + raise RuntimeError("Size mismatch between values and key arrays. " + "Keys had size %s, values had size %s." % + (len(keys), len(values))) + self._run(self.table.insert(keys, values)) + + def _replace_oov_buckets(self, inputs, lookups): + """Replace the default OOV value with one of the OOV bucket values.""" + if self.oov_tokens is None: + return lookups + + num_oov_elements = self.oov_tokens.shape.num_elements() + if inputs.dtype.is_integer: + oov_indices = math_ops.floormod(inputs, num_oov_elements) + else: + oov_indices = string_ops.string_to_hash_bucket_fast( + inputs, num_buckets=num_oov_elements) + + oov_values = array_ops.gather(self.oov_tokens, oov_indices) + oov_locations = math_ops.equal(lookups, self.table._default_value) # pylint: disable=protected-access + + return array_ops.where(oov_locations, oov_values, lookups) + + def _ragged_lookup(self, inputs): + """Perform a table lookup on a ragged tensor.""" + # The table lookup ops don't natively support ragged tensors, so if we have + # a RT we need to use map_flat_values to look up every element. + indexed_data = ragged_functional_ops.map_flat_values( + self.table.lookup, inputs) + indexed_data = ragged_functional_ops.map_flat_values( + self._replace_oov_buckets, inputs, indexed_data) + # Composite tensors can pass tensor values through, which will cause + # errors if all operations in the TF graph do so. We can break this chain + # with an identity here. + return array_ops.identity(indexed_data) + + def _sparse_lookup(self, inputs): + """Perform a table lookup on a sparse tensor.""" + values = self.table.lookup(inputs.values) + values = self._replace_oov_buckets(inputs.values, values) + indexed_data = sparse_tensor.SparseTensor(inputs.indices, values, + inputs.dense_shape) + # Composite tensors can pass tensor values through, which will cause + # errors if all operations in the TF graph do so. We can break this chain + # with an identity here. + return array_ops.identity(indexed_data) + + def _tensor_lookup(self, inputs): + """Perform a table lookup on a tf.tensor.""" + values = self.table.lookup(inputs) + indexed_data = self._replace_oov_buckets(inputs, values) + # (b/149446477): output does not preserve input shape. + indexed_data.set_shape(inputs.shape) + return indexed_data + + def lookup(self, inputs): + """Perform a table lookup.""" + # Sparse tensors don't play nicely with tensor conversion, so we handle + # them before attempting to convert lists or arrays to tensors. + if isinstance( + inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): + return self._sparse_lookup(inputs) + + # Try to convert lists/arrays to tensors or RaggedTensors. + inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs) + + # Run the lookup operation on the converted tensor. + if ragged_tensor.is_ragged(inputs): + return self._ragged_lookup(inputs) + else: + return self._tensor_lookup(inputs) + + def _eval(self, tensor): + if self.use_v1_apis: + return K.get_session().run(tensor) + else: + return tensor.numpy() + + def _run(self, op): + if self.use_v1_apis: + K.get_session().run(op) + + +def get_vocabulary_from_file(vocabulary_path, encoding="utf-8"): + """Read a vocabulary in from a file.""" + vocab = [] + with gfile.GFile(vocabulary_path, "r") as reader: + while True: + # Get the next line, and break if it is None. + text = reader.readline() + if not text: + break + + # Convert the raw text and strip whitespace. + if isinstance(text, str): + token = text + elif isinstance(text, bytes): + token = text.decode(encoding, "ignore") + token = token.strip() + vocab.append(token) + return vocab + + +def validate_vocabulary_is_unique(vocabulary): + """Validate that a vocabulary contains no repeated tokens.""" + vocabulary_set = set(vocabulary) + if len(vocabulary) != len(vocabulary_set): + repeated_items = [ + item for item, count in collections.Counter(vocabulary).items() + if count > 1 + ] + raise ValueError("The passed vocabulary has at least one repeated " + "term. Please uniquify your dataset. The repeated terms " + "are %s" % repeated_items) + + +def assert_same_type(expected_type, values, value_name): + """Assert that 'values' is of type 'expected_type'.""" + if dtypes.as_dtype(expected_type) != dtypes.as_dtype(values.dtype): + raise RuntimeError("Expected %s type %s, got %s" % + (value_name, expected_type, values.dtype)) + + +def convert_to_ndarray(x, dtype=None): + """Convert 'x' to a numpy array.""" + array = np.array(x) if isinstance(x, (list, tuple)) else x + if dtype not in (None, dtypes.string): + # If the dtype is an integer, we do permissive casting. This allows + # users to examine int32 data if the dtype is int64 without trouble. + np_dtype = dtypes.as_dtype(dtype).as_numpy_dtype + if np.can_cast(array.dtype, np_dtype): + array = array.astype(np_dtype, casting="safe") + return array + diff --git a/tensorflow/python/keras/layers/preprocessing/table_utils_test.py b/tensorflow/python/keras/layers/preprocessing/table_utils_test.py new file mode 100644 index 00000000000..60a891f6ba8 --- /dev/null +++ b/tensorflow/python/keras/layers/preprocessing/table_utils_test.py @@ -0,0 +1,243 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras lookup table utils.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils +from tensorflow.python.keras.layers.preprocessing import table_utils +from tensorflow.python.ops import lookup_ops +from tensorflow.python.ops.ragged import ragged_factory_ops +from tensorflow.python.platform import test + + +def get_table(dtype=dtypes.string, oov_tokens=None): + table = lookup_ops.MutableHashTable( + key_dtype=dtype, + value_dtype=dtypes.int64, + default_value=-7, + name="index_table") + return table_utils.TableHandler( + table, oov_tokens, use_v1_apis=(not context.executing_eagerly())) + + +@keras_parameterized.run_all_keras_modes +class CategoricalEncodingInputTest( + keras_parameterized.TestCase, + preprocessing_test_utils.PreprocessingLayerTest): + + def test_sparse_string_input(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 2]], + values=["fire", "michigan"], + dense_shape=[3, 4]) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [5, 1] + expected_dense_shape = [3, 4] + + table = get_table(oov_tokens=[1]) + table.insert(vocab_data, range(2, len(vocab_data) + 2)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_sparse_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 2]], + values=np.array([13, 32], dtype=np.int64), + dense_shape=[3, 4]) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [5, 1] + expected_dense_shape = [3, 4] + + table = get_table(dtype=dtypes.int64, oov_tokens=[1]) + table.insert(vocab_data, range(2, len(vocab_data) + 2)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_ragged_string_input(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = ragged_factory_ops.constant( + [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]]) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + table = get_table(oov_tokens=[1]) + table.insert(vocab_data, range(2, len(vocab_data) + 2)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_output, output_data) + + def test_ragged_int_input(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]], + dtype=np.int64) + expected_output = [[2, 3, 5], [5, 4, 2, 1]] + + table = get_table(dtype=dtypes.int64, oov_tokens=[1]) + table.insert(vocab_data, range(2, len(vocab_data) + 2)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_output, output_data) + + +@keras_parameterized.run_all_keras_modes +class CategoricalEncodingMultiOOVTest( + keras_parameterized.TestCase, + preprocessing_test_utils.PreprocessingLayerTest): + + def test_sparse_string_input_multi_bucket(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 2]], values=["fire", "ohio"], dense_shape=[3, 4]) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [6, 2] + expected_dense_shape = [3, 4] + + table = get_table(oov_tokens=[1, 2]) + table.insert(vocab_data, range(3, len(vocab_data) + 3)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_sparse_int_input_multi_bucket(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 2]], + values=np.array([13, 132], dtype=np.int64), + dense_shape=[3, 4]) + + expected_indices = [[0, 0], [1, 2]] + expected_values = [6, 1] + expected_dense_shape = [3, 4] + + table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2]) + table.insert(vocab_data, range(3, len(vocab_data) + 3)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_indices, output_data.indices) + self.assertAllEqual(expected_values, output_data.values) + self.assertAllEqual(expected_dense_shape, output_data.dense_shape) + + def test_ragged_string_input_multi_bucket(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = ragged_factory_ops.constant([["earth", "wind", "fire"], + ["fire", "and", "earth", + "ohio"]]) + expected_output = [[3, 4, 6], [6, 5, 3, 2]] + + table = get_table(oov_tokens=[1, 2]) + table.insert(vocab_data, range(3, len(vocab_data) + 3)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_output, output_data) + + def test_ragged_int_input_multi_bucket(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 132]], + dtype=np.int64) + expected_output = [[3, 4, 6], [6, 5, 3, 1]] + + table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2]) + table.insert(vocab_data, range(3, len(vocab_data) + 3)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_output, output_data) + + def test_tensor_int_input_multi_bucket(self): + vocab_data = np.array([10, 11, 12, 13], dtype=np.int64) + input_array = np.array([[13, 132], [13, 133]], dtype=np.int64) + expected_values = [[6, 1], [6, 2]] + + table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2]) + table.insert(vocab_data, range(3, len(vocab_data) + 3)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_values, output_data) + + def test_tensor_string_input_multi_bucket(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = [["earth", "wind", "fire", "michigan"], + ["fire", "and", "earth", "ohio"]] + expected_output = [[3, 4, 6, 1], [6, 5, 3, 2]] + + table = get_table(oov_tokens=[1, 2]) + table.insert(vocab_data, range(3, len(vocab_data) + 3)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_output, output_data) + + +@keras_parameterized.run_all_keras_modes +class IndexLookupOutputTest(keras_parameterized.TestCase, + preprocessing_test_utils.PreprocessingLayerTest): + + def test_int_output_default_lookup_value(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array([["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"]]) + expected_output = [[1, 2, 3, 4], [4, 3, 1, -7]] + + table = get_table(oov_tokens=None) + table.insert(vocab_data, range(1, len(vocab_data) + 1)) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_output, output_data) + + def test_output_shape(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array([["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"]]) + + table = get_table() + table.insert(vocab_data, range(1, len(vocab_data) + 1)) + output_data = table.lookup(input_array) + + self.assertAllEqual(input_array.shape[1:], output_data.shape[1:]) + + def test_int_output_no_reserved_zero_default_lookup_value(self): + vocab_data = ["earth", "wind", "and", "fire"] + input_array = np.array([["earth", "wind", "and", "fire"], + ["fire", "and", "earth", "michigan"]]) + expected_output = [[0, 1, 2, 3], [3, 2, 0, -7]] + + table = get_table(oov_tokens=None) + table.insert(vocab_data, range(len(vocab_data))) + output_data = table.lookup(input_array) + + self.assertAllEqual(expected_output, output_data) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/keras/layers/serialization.py b/tensorflow/python/keras/layers/serialization.py index 67aaf1d6eb8..30be3d485df 100644 --- a/tensorflow/python/keras/layers/serialization.py +++ b/tensorflow/python/keras/layers/serialization.py @@ -64,23 +64,11 @@ ALL_V2_MODULES = ( recurrent_v2, preprocessing_normalization ) -FEATURE_COLUMN_V1_OBJECTS = {} -FEATURE_COLUMN_V2_OBJECTS = {} # ALL_OBJECTS is meant to be a global mutable. Hence we need to make it # thread-local to avoid concurrent mutations. LOCAL = threading.local() -def inject_feature_column_v1_objects(name, cls): - global FEATURE_COLUMN_V1_OBJECTS - FEATURE_COLUMN_V1_OBJECTS[name] = cls - - -def inject_feature_column_v2_objects(name, cls): - global FEATURE_COLUMN_V2_OBJECTS - FEATURE_COLUMN_V2_OBJECTS[name] = cls - - def populate_deserializable_objects(): """Populates dict ALL_OBJECTS with every built-in layer. """ @@ -126,7 +114,7 @@ def populate_deserializable_objects(): LOCAL.ALL_OBJECTS['Input'] = input_layer.Input LOCAL.ALL_OBJECTS['InputSpec'] = input_spec.InputSpec - LOCAL.ALL_OBJECTS['Network'] = models.Network + LOCAL.ALL_OBJECTS['Functional'] = models.Functional LOCAL.ALL_OBJECTS['Model'] = models.Model LOCAL.ALL_OBJECTS['SequenceFeatures'] = SequenceFeatures LOCAL.ALL_OBJECTS['Sequential'] = models.Sequential @@ -134,9 +122,11 @@ def populate_deserializable_objects(): LOCAL.ALL_OBJECTS['WideDeepModel'] = WideDeepModel if tf2.enabled(): - LOCAL.ALL_OBJECTS.update(FEATURE_COLUMN_V2_OBJECTS) + from tensorflow.python.keras.feature_column.dense_features_v2 import DenseFeatures # pylint: disable=g-import-not-at-top + LOCAL.ALL_OBJECTS['DenseFeatures'] = DenseFeatures else: - LOCAL.ALL_OBJECTS.update(FEATURE_COLUMN_V1_OBJECTS) + from tensorflow.python.keras.feature_column.dense_features import DenseFeatures # pylint: disable=g-import-not-at-top + LOCAL.ALL_OBJECTS['DenseFeatures'] = DenseFeatures # Merge layers, function versions. LOCAL.ALL_OBJECTS['add'] = merge.add diff --git a/tensorflow/python/keras/layers/wrappers_test.py b/tensorflow/python/keras/layers/wrappers_test.py index a3173f4d11f..bb22db25591 100644 --- a/tensorflow/python/keras/layers/wrappers_test.py +++ b/tensorflow/python/keras/layers/wrappers_test.py @@ -377,7 +377,8 @@ class TimeDistributedTest(keras_parameterized.TestCase): input_layer.compute_output_shape([None, 2, 4]).as_list(), [None, 2, 8]) - @keras_parameterized.run_all_keras_modes + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) + # TODO(scottzhu): check why v1 session failed. def test_TimeDistributed_with_mask_first_implementation(self): np.random.seed(100) rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py index c43ca21ea06..29e5a68c854 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py @@ -23,9 +23,10 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables +from tensorflow.python.types import core -class AutoCastVariable(variables.Variable): +class AutoCastVariable(variables.Variable, core.Tensor): """Variable that will cast itself to a different dtype in applicable contexts. This class wraps a floating-point `tf.Variable`. It emulates the variable @@ -417,7 +418,6 @@ class AutoCastVariable(variables.Variable): ops.register_tensor_conversion_function(AutoCastVariable, AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access -ops.register_dense_tensor_like_type(AutoCastVariable) def create_autocast_variable(variable): diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py index eaffb90e64b..9f5099e100e 100644 --- a/tensorflow/python/keras/models.py +++ b/tensorflow/python/keras/models.py @@ -23,7 +23,7 @@ from tensorflow.python.framework import ops from tensorflow.python.keras import backend as K from tensorflow.python.keras import metrics as metrics_module from tensorflow.python.keras import optimizers -from tensorflow.python.keras.engine import network +from tensorflow.python.keras.engine import functional from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training from tensorflow.python.keras.engine import training_v1 @@ -31,7 +31,6 @@ from tensorflow.python.keras.engine.base_layer import AddMetric from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.engine.input_layer import Input from tensorflow.python.keras.engine.input_layer import InputLayer -from tensorflow.python.keras.engine.network import Network from tensorflow.python.keras.saving import model_config from tensorflow.python.keras.saving import save from tensorflow.python.keras.utils import generic_utils @@ -45,6 +44,7 @@ from tensorflow.python.util.tf_export import keras_export # API entries importable from `keras.models`: Model = training.Model # pylint: disable=invalid-name Sequential = sequential.Sequential # pylint: disable=invalid-name +Functional = functional.Functional # pylint: disable=invalid-name save_model = save.save_model load_model = save.load_model model_from_config = model_config.model_from_config @@ -193,12 +193,12 @@ def _clone_functional_model(model, input_tensors=None, layer_fn=_clone_layer): if not callable(layer_fn): raise ValueError('Expected `layer_fn` argument to be a callable.') - model_config, created_layers = _clone_layers_and_model_config( + model_configs, created_layers = _clone_layers_and_model_config( model, new_input_layers, layer_fn) # Reconstruct model from the config, using the cloned layers. input_tensors, output_tensors, created_layers = ( - network.reconstruct_from_config(model_config, - created_layers=created_layers)) + functional.reconstruct_from_config(model_configs, + created_layers=created_layers)) metrics_names = model.metrics_names model = Model(input_tensors, output_tensors, name=model.name) # Layers not directly tied to outputs of the Model, such as loss layers @@ -209,8 +209,8 @@ def _clone_functional_model(model, input_tensors=None, layer_fn=_clone_layer): if ancillary_layers: new_nodes = nest.flatten([ layer.inbound_nodes[1:] - if network._should_skip_first_node(layer) else layer.inbound_nodes - for layer in created_layers.values() + if functional._should_skip_first_node(layer) + else layer.inbound_nodes for layer in created_layers.values() ]) _insert_ancillary_layers(model, ancillary_layers, metrics_names, new_nodes) return model @@ -244,7 +244,8 @@ def _clone_layers_and_model_config(model, input_layers, layer_fn): created_layers[layer.name] = layer_fn(layer) return {} - config = network.get_network_config(model, serialize_layer_fn=_copy_layer) + config = functional.get_network_config( + model, serialize_layer_fn=_copy_layer) return config, created_layers @@ -495,7 +496,7 @@ def _in_place_subclassed_model_reset(model): # This will not work for nested subclassed models used as layers. # This would be theoretically possible to support, but would add complexity. # Only do it if users complain. - if isinstance(layer, Network) and not layer._is_graph_network: + if isinstance(layer, training.Model) and not layer._is_graph_network: raise ValueError('We do not support the use of nested subclassed models ' 'in `model_to_estimator` at this time. Found nested ' 'model: %s' % layer) diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py index 5de5e59b385..d1deaf34f45 100644 --- a/tensorflow/python/keras/optimizer_v2/rmsprop.py +++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py @@ -121,16 +121,19 @@ class RMSprop(optimizer_v2.OptimizerV2): Setting this to `True` may help with training, but is slightly more expensive in terms of computation and memory. Defaults to `False`. name: Optional name prefix for the operations created when applying - gradients. Defaults to "RMSprop". @compatibility(eager) When eager - execution is enabled, `learning_rate`, `decay`, `momentum`, and - `epsilon` can each be a callable that takes no arguments and returns the - actual value to use. This can be useful for changing these values across - different invocations of optimizer functions. @end_compatibility + gradients. Defaults to "RMSprop". **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip gradients by value, `decay` is included for backward compatibility to allow time inverse decay of learning rate. `lr` is included for backward compatibility, recommended to use `learning_rate` instead. + + @compatibility(eager) + When eager execution is enabled, `learning_rate`, `decay`, `momentum`, and + `epsilon` can each be a callable that takes no arguments and returns the + actual value to use. This can be useful for changing these values across + different invocations of optimizer functions. + @end_compatibility """ super(RMSprop, self).__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) diff --git a/tensorflow/python/keras/preprocessing/BUILD b/tensorflow/python/keras/preprocessing/BUILD index 403bc6e4808..24260fb71db 100644 --- a/tensorflow/python/keras/preprocessing/BUILD +++ b/tensorflow/python/keras/preprocessing/BUILD @@ -85,6 +85,7 @@ tf_py_test( deps = [ ":image", "//tensorflow/python:client_testlib", + "//tensorflow/python/keras", "//third_party/py/numpy", ], ) diff --git a/tensorflow/python/keras/preprocessing/image.py b/tensorflow/python/keras/preprocessing/image.py index 3af573fa036..953962c7771 100644 --- a/tensorflow/python/keras/preprocessing/image.py +++ b/tensorflow/python/keras/preprocessing/image.py @@ -14,6 +14,7 @@ # ============================================================================== # pylint: disable=invalid-name # pylint: disable=g-import-not-at-top +# pylint: disable=g-classes-have-attributes """Set of tools for real-time data augmentation on image data. """ from __future__ import absolute_import @@ -35,6 +36,7 @@ from tensorflow.python.keras.utils import data_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import image_ops from tensorflow.python.ops import math_ops +from tensorflow.python.platform import tf_logging from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import keras_export @@ -49,6 +51,7 @@ random_brightness = image.random_brightness apply_affine_transform = image.apply_affine_transform +@keras_export('keras.preprocessing.image.smart_resize', v1=[]) def smart_resize(x, size, interpolation='bilinear'): """Resize images to a target size without aspect ratio distortion. @@ -65,7 +68,7 @@ def smart_resize(x, size, interpolation='bilinear'): ``` However, if you do this, you distort the aspect ratio of your images, since - in general they do not all have the same aspect ratio. This is + in general they do not all have the same aspect ratio as `size`. This is fine in many cases, but not always (e.g. for GANs this can be a problem). Note that passing the argument `preserve_aspect_ratio=True` to `resize` @@ -458,6 +461,123 @@ class NumpyArrayIterator(image.NumpyArrayIterator, Iterator): **kwargs) +class DataFrameIterator(image.DataFrameIterator, Iterator): + """Iterator capable of reading images from a directory on disk as a dataframe. + + Arguments: + dataframe: Pandas dataframe containing the filepaths relative to + `directory` (or absolute paths if `directory` is None) of the images in + a string column. It should include other column/s + depending on the `class_mode`: - if `class_mode` is `"categorical"` + (default value) it must include the `y_col` column with the class/es + of each image. Values in column can be string/list/tuple if a single + class or list/tuple if multiple classes. - if `class_mode` is + `"binary"` or `"sparse"` it must include the given `y_col` column + with class values as strings. - if `class_mode` is `"raw"` or + `"multi_output"` it should contain the columns specified in `y_col`. + - if `class_mode` is `"input"` or `None` no extra column is needed. + directory: string, path to the directory to read images from. If `None`, + data in `x_col` column should be absolute paths. + image_data_generator: Instance of `ImageDataGenerator` to use for random + transformations and normalization. If None, no transformations and + normalizations are made. + x_col: string, column in `dataframe` that contains the filenames (or + absolute paths if `directory` is `None`). + y_col: string or list, column/s in `dataframe` that has the target data. + weight_col: string, column in `dataframe` that contains the sample + weights. Default: `None`. + target_size: tuple of integers, dimensions to resize input images to. + color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read + images. + classes: Optional list of strings, classes to use (e.g. `["dogs", + "cats"]`). If None, all classes in `y_col` will be used. + class_mode: one of "binary", "categorical", "input", "multi_output", + "raw", "sparse" or None. Default: "categorical". + Mode for yielding the targets: + - `"binary"`: 1D numpy array of binary labels, + - `"categorical"`: 2D numpy array of one-hot encoded labels. Supports + multi-label output. + - `"input"`: images identical to input images (mainly used to work + with autoencoders), + - `"multi_output"`: list with the values of the different columns, + - `"raw"`: numpy array of values in `y_col` column(s), + - `"sparse"`: 1D numpy array of integer labels, - `None`, no targets + are returned (the generator will only yield batches of image data, + which is useful to use in `model.predict_generator()`). + batch_size: Integer, size of a batch. + shuffle: Boolean, whether to shuffle the data between epochs. + seed: Random seed for data shuffling. + data_format: String, one of `channels_first`, `channels_last`. + save_to_dir: Optional directory where to save the pictures being yielded, + in a viewable format. This is useful for visualizing the random + transformations being applied, for debugging purposes. + save_prefix: String prefix to use for saving sample images (if + `save_to_dir` is set). + save_format: Format to use for saving sample images (if `save_to_dir` is + set). + subset: Subset of data (`"training"` or `"validation"`) if + validation_split is set in ImageDataGenerator. + interpolation: Interpolation method used to resample the image if the + target size is different from that of the loaded image. Supported + methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 + or newer is installed, "lanczos" is also supported. If PIL version 3.4.0 + or newer is installed, "box" and "hamming" are also supported. By + default, "nearest" is used. + dtype: Dtype to use for the generated arrays. + validate_filenames: Boolean, whether to validate image filenames in + `x_col`. If `True`, invalid images will be ignored. Disabling this + option + can lead to speed-up in the instantiation of this class. Default: `True`. + """ + + def __init__( + self, + dataframe, + directory=None, + image_data_generator=None, + x_col='filename', + y_col='class', + weight_col=None, + target_size=(256, 256), + color_mode='rgb', + classes=None, + class_mode='categorical', + batch_size=32, + shuffle=True, + seed=None, + data_format='channels_last', + save_to_dir=None, + save_prefix='', + save_format='png', + subset=None, + interpolation='nearest', + dtype='float32', + validate_filenames=True): + super(DataFrameIterator, self).__init__( + dataframe=dataframe, + directory=directory, + image_data_generator=image_data_generator, + x_col=x_col, + y_col=y_col, + weight_col=weight_col, + target_size=target_size, + color_mode=color_mode, + classes=classes, + class_mode=class_mode, + batch_size=batch_size, + shuffle=shuffle, + seed=seed, + data_format=data_format, + save_to_dir=save_to_dir, + save_prefix=save_prefix, + save_format=save_format, + subset=subset, + interpolation=interpolation, + dtype=dtype, + validate_filenames=validate_filenames + ) + + @keras_export('keras.preprocessing.image.ImageDataGenerator') class ImageDataGenerator(image.ImageDataGenerator): """Generate batches of tensor image data with real-time data augmentation. @@ -685,6 +805,302 @@ class ImageDataGenerator(image.ImageDataGenerator): validation_split=validation_split, **kwargs) + def flow(self, + x, + y=None, + batch_size=32, + shuffle=True, + sample_weight=None, + seed=None, + save_to_dir=None, + save_prefix='', + save_format='png', + subset=None): + """Takes data & label arrays, generates batches of augmented data. + + Arguments: + x: Input data. Numpy array of rank 4 or a tuple. If tuple, the first + element should contain the images and the second element another numpy + array or a list of numpy arrays that gets passed to the output without + any modifications. Can be used to feed the model miscellaneous data + along with the images. In case of grayscale data, the channels axis of + the image array should have value 1, in case of RGB data, it should + have value 3, and in case of RGBA data, it should have value 4. + y: Labels. + batch_size: Int (default: 32). + shuffle: Boolean (default: True). + sample_weight: Sample weights. + seed: Int (default: None). + save_to_dir: None or str (default: None). This allows you to optionally + specify a directory to which to save the augmented pictures being + generated (useful for visualizing what you are doing). + save_prefix: Str (default: `''`). Prefix to use for filenames of saved + pictures (only relevant if `save_to_dir` is set). + save_format: one of "png", "jpeg" + (only relevant if `save_to_dir` is set). Default: "png". + subset: Subset of data (`"training"` or `"validation"`) if + `validation_split` is set in `ImageDataGenerator`. + + Returns: + An `Iterator` yielding tuples of `(x, y)` + where `x` is a numpy array of image data + (in the case of a single image input) or a list + of numpy arrays (in the case with + additional inputs) and `y` is a numpy array + of corresponding labels. If 'sample_weight' is not None, + the yielded tuples are of the form `(x, y, sample_weight)`. + If `y` is None, only the numpy array `x` is returned. + """ + return NumpyArrayIterator( + x, + y, + self, + batch_size=batch_size, + shuffle=shuffle, + sample_weight=sample_weight, + seed=seed, + data_format=self.data_format, + save_to_dir=save_to_dir, + save_prefix=save_prefix, + save_format=save_format, + subset=subset) + + def flow_from_directory(self, + directory, + target_size=(256, 256), + color_mode='rgb', + classes=None, + class_mode='categorical', + batch_size=32, + shuffle=True, + seed=None, + save_to_dir=None, + save_prefix='', + save_format='png', + follow_links=False, + subset=None, + interpolation='nearest'): + """Takes the path to a directory & generates batches of augmented data. + + Arguments: + directory: string, path to the target directory. It should contain one + subdirectory per class. Any PNG, JPG, BMP, PPM or TIF images inside + each of the subdirectories directory tree will be included in the + generator. See [this script]( + https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) + for more details. + target_size: Tuple of integers `(height, width)`, defaults to `(256, + 256)`. The dimensions to which all images found will be resized. + color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". Whether + the images will be converted to have 1, 3, or 4 channels. + classes: Optional list of class subdirectories + (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list + of classes will be automatically inferred from the subdirectory + names/structure under `directory`, where each subdirectory will be + treated as a different class (and the order of the classes, which + will map to the label indices, will be alphanumeric). The + dictionary containing the mapping from class names to class + indices can be obtained via the attribute `class_indices`. + class_mode: One of "categorical", "binary", "sparse", + "input", or None. Default: "categorical". + Determines the type of label arrays that are returned: - + "categorical" will be 2D one-hot encoded labels, - "binary" will + be 1D binary labels, "sparse" will be 1D integer labels, - "input" + will be images identical to input images (mainly used to work with + autoencoders). - If None, no labels are returned (the generator + will only yield batches of image data, which is useful to use with + `model.predict_generator()`). Please note that in case of + class_mode None, the data still needs to reside in a subdirectory + of `directory` for it to work correctly. + batch_size: Size of the batches of data (default: 32). + shuffle: Whether to shuffle the data (default: True) If set to False, + sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + save_to_dir: None or str (default: None). This allows you to optionally + specify a directory to which to save the augmented pictures being + generated (useful for visualizing what you are doing). + save_prefix: Str. Prefix to use for filenames of saved pictures (only + relevant if `save_to_dir` is set). + save_format: One of "png", "jpeg" + (only relevant if `save_to_dir` is set). Default: "png". + follow_links: Whether to follow symlinks inside + class subdirectories (default: False). + subset: Subset of data (`"training"` or `"validation"`) if + `validation_split` is set in `ImageDataGenerator`. + interpolation: Interpolation method used to resample the image if the + target size is different from that of the loaded image. Supported + methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version + 1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL + version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also + supported. By default, `"nearest"` is used. + + Returns: + A `DirectoryIterator` yielding tuples of `(x, y)` + where `x` is a numpy array containing a batch + of images with shape `(batch_size, *target_size, channels)` + and `y` is a numpy array of corresponding labels. + """ + return DirectoryIterator( + directory, + self, + target_size=target_size, + color_mode=color_mode, + classes=classes, + class_mode=class_mode, + data_format=self.data_format, + batch_size=batch_size, + shuffle=shuffle, + seed=seed, + save_to_dir=save_to_dir, + save_prefix=save_prefix, + save_format=save_format, + follow_links=follow_links, + subset=subset, + interpolation=interpolation) + + def flow_from_dataframe(self, + dataframe, + directory=None, + x_col='filename', + y_col='class', + weight_col=None, + target_size=(256, 256), + color_mode='rgb', + classes=None, + class_mode='categorical', + batch_size=32, + shuffle=True, + seed=None, + save_to_dir=None, + save_prefix='', + save_format='png', + subset=None, + interpolation='nearest', + validate_filenames=True, + **kwargs): + """Takes the dataframe and the path to a directory + generates batches. + + The generated batches contain augmented/normalized data. + + **A simple tutorial can be found **[here]( + http://bit.ly/keras_flow_from_dataframe). + + Arguments: + dataframe: Pandas dataframe containing the filepaths relative to + `directory` (or absolute paths if `directory` is None) of the images + in a string column. It should include other column/s + depending on the `class_mode`: - if `class_mode` is `"categorical"` + (default value) it must include the `y_col` column with the + class/es of each image. Values in column can be string/list/tuple + if a single class or list/tuple if multiple classes. - if + `class_mode` is `"binary"` or `"sparse"` it must include the given + `y_col` column with class values as strings. - if `class_mode` is + `"raw"` or `"multi_output"` it should contain the columns + specified in `y_col`. - if `class_mode` is `"input"` or `None` no + extra column is needed. + directory: string, path to the directory to read images from. If `None`, + data in `x_col` column should be absolute paths. + x_col: string, column in `dataframe` that contains the filenames (or + absolute paths if `directory` is `None`). + y_col: string or list, column/s in `dataframe` that has the target data. + weight_col: string, column in `dataframe` that contains the sample + weights. Default: `None`. + target_size: tuple of integers `(height, width)`, default: `(256, 256)`. + The dimensions to which all images found will be resized. + color_mode: one of "grayscale", "rgb", "rgba". Default: "rgb". Whether + the images will be converted to have 1 or 3 color channels. + classes: optional list of classes (e.g. `['dogs', 'cats']`). Default is + None. If not provided, the list of classes will be automatically + inferred from the `y_col`, which will map to the label indices, will + be alphanumeric). The dictionary containing the mapping from class + names to class indices can be obtained via the attribute + `class_indices`. + class_mode: one of "binary", "categorical", "input", "multi_output", + "raw", sparse" or None. Default: "categorical". + Mode for yielding the targets: + - `"binary"`: 1D numpy array of binary labels, + - `"categorical"`: 2D numpy array of one-hot encoded labels. + Supports multi-label output. + - `"input"`: images identical to input images (mainly used to work + with autoencoders), + - `"multi_output"`: list with the values of the different columns, + - `"raw"`: numpy array of values in `y_col` column(s), + - `"sparse"`: 1D numpy array of integer labels, - `None`, no targets + are returned (the generator will only yield batches of image data, + which is useful to use in `model.predict_generator()`). + batch_size: size of the batches of data (default: 32). + shuffle: whether to shuffle the data (default: True) + seed: optional random seed for shuffling and transformations. + save_to_dir: None or str (default: None). This allows you to optionally + specify a directory to which to save the augmented pictures being + generated (useful for visualizing what you are doing). + save_prefix: str. Prefix to use for filenames of saved pictures (only + relevant if `save_to_dir` is set). + save_format: one of "png", "jpeg" + (only relevant if `save_to_dir` is set). Default: "png". + subset: Subset of data (`"training"` or `"validation"`) if + `validation_split` is set in `ImageDataGenerator`. + interpolation: Interpolation method used to resample the image if the + target size is different from that of the loaded image. Supported + methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version + 1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL + version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also + supported. By default, `"nearest"` is used. + validate_filenames: Boolean, whether to validate image filenames in + `x_col`. If `True`, invalid images will be ignored. Disabling this + option can lead to speed-up in the execution of this function. + Defaults to `True`. + **kwargs: legacy arguments for raising deprecation warnings. + + Returns: + A `DataFrameIterator` yielding tuples of `(x, y)` + where `x` is a numpy array containing a batch + of images with shape `(batch_size, *target_size, channels)` + and `y` is a numpy array of corresponding labels. + """ + if 'has_ext' in kwargs: + tf_logging.warn( + 'has_ext is deprecated, filenames in the dataframe have ' + 'to match the exact filenames in disk.', DeprecationWarning) + if 'sort' in kwargs: + tf_logging.warn( + 'sort is deprecated, batches will be created in the' + 'same order than the filenames provided if shuffle' + 'is set to False.', DeprecationWarning) + if class_mode == 'other': + tf_logging.warn( + '`class_mode` "other" is deprecated, please use ' + '`class_mode` "raw".', DeprecationWarning) + class_mode = 'raw' + if 'drop_duplicates' in kwargs: + tf_logging.warn( + 'drop_duplicates is deprecated, you can drop duplicates ' + 'by using the pandas.DataFrame.drop_duplicates method.', + DeprecationWarning) + + return DataFrameIterator( + dataframe, + directory, + self, + x_col=x_col, + y_col=y_col, + weight_col=weight_col, + target_size=target_size, + color_mode=color_mode, + classes=classes, + class_mode=class_mode, + data_format=self.data_format, + batch_size=batch_size, + shuffle=shuffle, + seed=seed, + save_to_dir=save_to_dir, + save_prefix=save_prefix, + save_format=save_format, + subset=subset, + interpolation=interpolation, + validate_filenames=validate_filenames) + + keras_export('keras.preprocessing.image.random_rotation')(random_rotation) keras_export('keras.preprocessing.image.random_shift')(random_shift) keras_export('keras.preprocessing.image.random_shear')(random_shear) diff --git a/tensorflow/python/keras/preprocessing/image_test.py b/tensorflow/python/keras/preprocessing/image_test.py index d7da420318f..d2f4b18f7dd 100644 --- a/tensorflow/python/keras/preprocessing/image_test.py +++ b/tensorflow/python/keras/preprocessing/image_test.py @@ -25,6 +25,9 @@ import tempfile import numpy as np from tensorflow.python.framework import test_util +from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras import layers +from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.preprocessing import image as preprocessing_image from tensorflow.python.platform import test @@ -52,7 +55,7 @@ def _generate_test_images(): return [rgb_images, gray_images] -class TestImage(test.TestCase): +class TestImage(keras_parameterized.TestCase): @test_util.run_v2_only def test_smart_resize(self): @@ -143,8 +146,7 @@ class TestImage(test.TestCase): generator = preprocessing_image.ImageDataGenerator( data_format='unknown') - generator = preprocessing_image.ImageDataGenerator( - zoom_range=(2, 2)) + generator = preprocessing_image.ImageDataGenerator(zoom_range=(2., 2.)) def test_image_data_generator_fit(self): generator = preprocessing_image.ImageDataGenerator( @@ -319,14 +321,21 @@ class TestImage(test.TestCase): self.assertEqual( len(set(train_iterator.filenames) & set(filenames)), num_training) + model = sequential.Sequential([layers.Flatten(), layers.Dense(2)]) + model.compile(optimizer='sgd', loss='mse') + model.fit(train_iterator, epochs=1) + shutil.rmtree(tmp_folder) + @keras_parameterized.run_all_keras_modes def test_directory_iterator_with_validation_split_25_percent(self): self.directory_iterator_with_validation_split_test_helper(0.25) + @keras_parameterized.run_all_keras_modes def test_directory_iterator_with_validation_split_40_percent(self): self.directory_iterator_with_validation_split_test_helper(0.40) + @keras_parameterized.run_all_keras_modes def test_directory_iterator_with_validation_split_50_percent(self): self.directory_iterator_with_validation_split_test_helper(0.50) diff --git a/tensorflow/python/keras/saving/hdf5_format_test.py b/tensorflow/python/keras/saving/hdf5_format_test.py index cae58329005..757385a25ea 100644 --- a/tensorflow/python/keras/saving/hdf5_format_test.py +++ b/tensorflow/python/keras/saving/hdf5_format_test.py @@ -1210,7 +1210,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase): def test_incompatible_checkpoint(self): save_path = trackable.Checkpoint().save( os.path.join(self.get_temp_dir(), 'ckpt')) - m = keras.Model() + m = DummySubclassModel() with self.assertRaisesRegexp(AssertionError, 'Nothing to load'): m.load_weights(save_path) m.dense = keras.layers.Dense(2) @@ -1222,7 +1222,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_directory_passed(self): with self.cached_session(): - m = keras.Model() + m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) self.evaluate(v.assign(42.)) prefix = os.path.join(self.get_temp_dir(), @@ -1235,7 +1235,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_relative_path(self): with self.cached_session(): - m = keras.Model() + m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) os.chdir(self.get_temp_dir()) @@ -1266,7 +1266,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_nonexistent_prefix_directory(self): with self.cached_session(): - m = keras.Model() + m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) self.evaluate(v.assign(42.)) prefix = os.path.join(self.get_temp_dir(), @@ -1276,5 +1276,10 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase): m.load_weights(prefix) self.assertEqual(42., self.evaluate(v)) + +class DummySubclassModel(training.Model): + pass + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/saving/saved_model/load.py b/tensorflow/python/keras/saving/saved_model/load.py index 5ffeb0671a1..13af49e3a0d 100644 --- a/tensorflow/python/keras/saving/saved_model/load.py +++ b/tensorflow/python/keras/saving/saved_model/load.py @@ -62,9 +62,9 @@ layers_module = LazyLoader( input_layer = LazyLoader( "input_layer", globals(), "tensorflow.python.keras.engine.input_layer") -network_lib = LazyLoader( - "network_lib", globals(), - "tensorflow.python.keras.engine.network") +functional_lib = LazyLoader( + "functional_lib", globals(), + "tensorflow.python.keras.engine.functional") training_lib = LazyLoader( "training_lib", globals(), "tensorflow.python.keras.engine.training") @@ -142,7 +142,7 @@ def _is_graph_network(layer): # pylint: disable=protected-access if isinstance(layer, RevivedNetwork): return False - elif isinstance(layer, network_lib.Network): + elif isinstance(layer, functional_lib.Functional): return (layer._is_graph_network or isinstance(layer, models_lib.Sequential)) return False @@ -371,7 +371,8 @@ class KerasObjectLoader(tf_load.Loader): # functional or Sequential model. model_is_functional_or_sequential = ( metadata.get('is_graph_network', False) or - metadata['class_name'] == 'Sequential') + metadata['class_name'] == 'Sequential' or + metadata['class_name'] == 'Functional') if not (generic_utils.validate_config(config) and model_is_functional_or_sequential): return None # Revive as custom model. @@ -383,7 +384,8 @@ class KerasObjectLoader(tf_load.Loader): if class_name == 'Sequential': model = models_lib.Sequential(name=config['name']) else: - model = models_lib.Model(name=config['name']) + model = models_lib.Functional( + inputs=[], outputs=[], name=config['name']) # Record this model and its layers. This will later be used to reconstruct # the model. @@ -561,10 +563,11 @@ class KerasObjectLoader(tf_load.Loader): if not model.built and not isinstance(input_specs, dict): model.build(input_shapes) else: - (inputs, outputs, created_layers) = network_lib.reconstruct_from_config( - config, created_layers={layer.name: layer for layer in layers}) + (inputs, outputs, + created_layers) = functional_lib.reconstruct_from_config( + config, created_layers={layer.name: layer for layer in layers}) model.__init__(inputs, outputs, name=config['name']) - network_lib.connect_ancillary_layers(model, created_layers) + functional_lib.connect_ancillary_layers(model, created_layers) # Set model dtype and trainable status. _set_network_attributes_from_metadata(model) @@ -764,7 +767,7 @@ def revive_custom_object(identifier, metadata): revived_classes = { '_tf_keras_layer': (RevivedLayer, base_layer.Layer), '_tf_keras_input_layer': (RevivedInputLayer, input_layer.InputLayer), - '_tf_keras_network': (RevivedNetwork, network_lib.Network), + '_tf_keras_network': (RevivedNetwork, functional_lib.Functional), '_tf_keras_model': (RevivedNetwork, model_class), '_tf_keras_sequential': (RevivedNetwork, models_lib.Sequential), } @@ -852,7 +855,7 @@ def _revive_setter(layer, name, value): layer._track_trackable(value, name=name) layer._serialized_attributes[name] = value # pylint: enable=protected-access - elif (isinstance(layer, network_lib.Network) and + elif (isinstance(layer, functional_lib.Functional) and re.match(r'^layer(_with_weights)?-[\d+]', name) is not None): # Edges named "layer-n" or "layer_with_weights-n", which are tracked in # network._track_layers, should not be added as an attribute. diff --git a/tensorflow/python/keras/saving/saved_model/model_serialization.py b/tensorflow/python/keras/saving/saved_model/model_serialization.py index 412fb0b54e5..c711e82a045 100644 --- a/tensorflow/python/keras/saving/saved_model/model_serialization.py +++ b/tensorflow/python/keras/saving/saved_model/model_serialization.py @@ -20,11 +20,11 @@ from __future__ import print_function from tensorflow.python.keras.saving import saving_utils from tensorflow.python.keras.saving.saved_model import constants -from tensorflow.python.keras.saving.saved_model import network_serialization +from tensorflow.python.keras.saving.saved_model import layer_serialization from tensorflow.python.keras.saving.saved_model import save_impl -class ModelSavedModelSaver(network_serialization.NetworkSavedModelSaver): +class ModelSavedModelSaver(layer_serialization.LayerSavedModelSaver): """Model SavedModel serialization.""" @property @@ -33,6 +33,10 @@ class ModelSavedModelSaver(network_serialization.NetworkSavedModelSaver): def _python_properties_internal(self): metadata = super(ModelSavedModelSaver, self)._python_properties_internal() + # Network stateful property is dependent on the child layers. + metadata.pop('stateful') + metadata['is_graph_network'] = self.obj._is_graph_network # pylint: disable=protected-access + metadata.update( saving_utils.model_metadata( self.obj, include_optimizer=True, require_config=False)) diff --git a/tensorflow/python/keras/saving/saved_model/network_serialization.py b/tensorflow/python/keras/saving/saved_model/network_serialization.py index 1c94377e3db..c98cba47155 100644 --- a/tensorflow/python/keras/saving/saved_model/network_serialization.py +++ b/tensorflow/python/keras/saving/saved_model/network_serialization.py @@ -18,22 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.keras.saving.saved_model import layer_serialization +from tensorflow.python.keras.saving.saved_model import model_serialization -# Network serialization is pretty much the same as layer serialization. -class NetworkSavedModelSaver(layer_serialization.LayerSavedModelSaver): +# FunctionalModel serialization is pretty much the same as Model serialization. +class NetworkSavedModelSaver(model_serialization.ModelSavedModelSaver): """Network serialization.""" @property def object_identifier(self): return '_tf_keras_network' - - def _python_properties_internal(self): - metadata = super(NetworkSavedModelSaver, self)._python_properties_internal() - - # Network stateful property is dependent on the child layers. - metadata.pop('stateful') - - metadata['is_graph_network'] = self.obj._is_graph_network # pylint: disable=protected-access - return metadata diff --git a/tensorflow/python/keras/saving/saved_model/saved_model_test.py b/tensorflow/python/keras/saving/saved_model/saved_model_test.py index 9cbe8607a54..5e9ccc2d37a 100644 --- a/tensorflow/python/keras/saving/saved_model/saved_model_test.py +++ b/tensorflow/python/keras/saving/saved_model/saved_model_test.py @@ -39,7 +39,6 @@ from tensorflow.python.distribute import mirrored_strategy from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.feature_column import feature_column_v2 as fc -from tensorflow.python.feature_column.dense_features import DenseFeatures from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -48,6 +47,7 @@ from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import regularizers from tensorflow.python.keras import testing_utils +from tensorflow.python.keras.feature_column.dense_features import DenseFeatures from tensorflow.python.keras.saving.saved_model import load as keras_load from tensorflow.python.keras.saving.saved_model import save_impl as keras_save from tensorflow.python.keras.utils import generic_utils diff --git a/tensorflow/python/keras/utils/version_utils_test.py b/tensorflow/python/keras/utils/version_utils_test.py index 76e888ca553..0a3cd53f3c0 100644 --- a/tensorflow/python/keras/utils/version_utils_test.py +++ b/tensorflow/python/keras/utils/version_utils_test.py @@ -53,12 +53,12 @@ class SplitUtilsTest(keras_parameterized.TestCase): inputs = keras.Input(10) outputs = keras.layers.Dense(1)(inputs) model = keras.Model(inputs, outputs) - self._check_model_class(model.__class__) + self._check_model_class(model.__class__.__bases__[0]) self._check_layer_class(model) def test_sequential_model(self): model = keras.Sequential([keras.layers.Dense(1)]) - model_class = model.__class__.__bases__[0] + model_class = model.__class__.__bases__[0].__bases__[0] self._check_model_class(model_class) self._check_layer_class(model) diff --git a/tensorflow/python/keras/utils/vis_utils.py b/tensorflow/python/keras/utils/vis_utils.py index 87c436a5bd7..158f6c83748 100644 --- a/tensorflow/python/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/utils/vis_utils.py @@ -55,10 +55,10 @@ def check_pydot(): def is_wrapped_model(layer): - from tensorflow.python.keras.engine import network + from tensorflow.python.keras.engine import functional from tensorflow.python.keras.layers import wrappers return (isinstance(layer, wrappers.Wrapper) and - isinstance(layer.layer, network.Network)) + isinstance(layer.layer, functional.Functional)) def add_edge(dot, src, dst): @@ -98,7 +98,7 @@ def model_to_dot(model, """ from tensorflow.python.keras.layers import wrappers from tensorflow.python.keras.engine import sequential - from tensorflow.python.keras.engine import network + from tensorflow.python.keras.engine import functional if not check_pydot(): message = ( @@ -147,7 +147,8 @@ def model_to_dot(model, class_name = layer.__class__.__name__ if isinstance(layer, wrappers.Wrapper): - if expand_nested and isinstance(layer.layer, network.Network): + if expand_nested and isinstance(layer.layer, + functional.Functional): submodel_wrapper = model_to_dot(layer.layer, show_shapes, show_layer_names, rankdir, expand_nested, @@ -162,7 +163,7 @@ def model_to_dot(model, child_class_name = layer.layer.__class__.__name__ class_name = '{}({})'.format(class_name, child_class_name) - if expand_nested and isinstance(layer, network.Network): + if expand_nested and isinstance(layer, functional.Functional): submodel_not_wrapper = model_to_dot(layer, show_shapes, show_layer_names, rankdir, expand_nested, @@ -200,7 +201,8 @@ def model_to_dot(model, inputlabels, outputlabels) - if not expand_nested or not isinstance(layer, network.Network): + if not expand_nested or not isinstance( + layer, functional.Functional): node = pydot.Node(layer_id, label=label) dot.add_node(node) @@ -218,16 +220,17 @@ def model_to_dot(model, add_edge(dot, inbound_layer_id, layer_id) else: # if inbound_layer is not Model or wrapped Model - if (not isinstance(inbound_layer, network.Network) and + if (not isinstance(inbound_layer, + functional.Functional) and not is_wrapped_model(inbound_layer)): # if current layer is not Model or wrapped Model - if (not isinstance(layer, network.Network) and + if (not isinstance(layer, functional.Functional) and not is_wrapped_model(layer)): assert dot.get_node(inbound_layer_id) assert dot.get_node(layer_id) add_edge(dot, inbound_layer_id, layer_id) # if current layer is Model - elif isinstance(layer, network.Network): + elif isinstance(layer, functional.Functional): add_edge(dot, inbound_layer_id, sub_n_first_node[layer.name].get_name()) # if current layer is wrapped Model @@ -236,9 +239,9 @@ def model_to_dot(model, name = sub_w_first_node[layer.layer.name].get_name() add_edge(dot, layer_id, name) # if inbound_layer is Model - elif isinstance(inbound_layer, network.Network): + elif isinstance(inbound_layer, functional.Functional): name = sub_n_last_node[inbound_layer.name].get_name() - if isinstance(layer, network.Network): + if isinstance(layer, functional.Functional): output_name = sub_n_first_node[layer.name].get_name() add_edge(dot, name, output_name) else: diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index 47f392d7438..37ee8d38f53 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -1688,8 +1688,6 @@ class AssertShapesTest(test.TestCase): rank_three_shapes, array_ops.constant(1), correct_rank=3, actual_rank=0) def test_raises_dynamic_incorrect_rank(self): - self.skipTest("b/134600611") - x_value = 5 rank_two_shapes = [(1, 1), (1, 3), ("a", "b"), (None, None)] with ops.Graph().as_default(): diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py index e17a029c5ff..7d5f7715eb1 100644 --- a/tensorflow/python/kernel_tests/cholesky_op_test.py +++ b/tensorflow/python/kernel_tests/cholesky_op_test.py @@ -114,12 +114,14 @@ class CholeskyOpTest(test.TestCase): def testBasic(self): data = np.array([[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]]) for dtype in (np.float32, np.float64): - self._verifyCholesky(data.astype(dtype)) + with self.subTest(dtype=dtype): + self._verifyCholesky(data.astype(dtype)) for dtype in (np.complex64, np.complex128): - complex_data = np.tril(1j * data, -1).astype(dtype) - complex_data += np.triu(-1j * data, 1).astype(dtype) - complex_data += data - self._verifyCholesky(complex_data) + with self.subTest(dtype=dtype): + complex_data = np.tril(1j * data, -1).astype(dtype) + complex_data += np.triu(-1j * data, 1).astype(dtype) + complex_data += data + self._verifyCholesky(complex_data) def testBatch(self): simple_array = np.array([[[1., 0.], [0., 5.]]]) # shape (1, 2, 2) @@ -131,13 +133,15 @@ class CholeskyOpTest(test.TestCase): # Generate random positive-definite matrices. matrices = np.random.rand(10, 5, 5) for i in xrange(10): - matrices[i] = np.dot(matrices[i].T, matrices[i]) + with self.subTest(i=i): + matrices[i] = np.dot(matrices[i].T, matrices[i]) self._verifyCholesky(matrices) # Generate random complex valued positive-definite matrices. matrices = np.random.rand(10, 5, 5) + 1j * np.random.rand(10, 5, 5) for i in xrange(10): - matrices[i] = np.dot(matrices[i].T.conj(), matrices[i]) + with self.subTest(i=i): + matrices[i] = np.dot(matrices[i].T.conj(), matrices[i]) self._verifyCholesky(matrices) @test_util.run_deprecated_v1 diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 303d2791d07..8c84bde1431 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -97,23 +97,27 @@ class ComparisonOpTest(test.TestCase): for t in dtypes: for x in data: for y in data: - self.assertEqual(self._compareScalar(math_ops.less, x, y, t), x < y) - self.assertEqual( - self._compareScalar(math_ops.less_equal, x, y, t), x <= y) - self.assertEqual( - self._compareScalar(math_ops.greater, x, y, t), x > y) - self.assertEqual( - self._compareScalar(math_ops.greater_equal, x, y, t), x >= y) - self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y) - self.assertEqual( - self._compareScalar(math_ops.not_equal, x, y, t), x != y) + with self.subTest(t=t, x=x, y=y): + self.assertEqual(self._compareScalar(math_ops.less, x, y, t), x < y) + self.assertEqual( + self._compareScalar(math_ops.less_equal, x, y, t), x <= y) + self.assertEqual( + self._compareScalar(math_ops.greater, x, y, t), x > y) + self.assertEqual( + self._compareScalar(math_ops.greater_equal, x, y, t), x >= y) + self.assertEqual( + self._compareScalar(math_ops.equal, x, y, t), x == y) + self.assertEqual( + self._compareScalar(math_ops.not_equal, x, y, t), x != y) data = [-1, 0, 1, -1j, 1j, 1 + 1j, 1 - 1j] for t in [np.complex64, np.complex128]: for x in data: for y in data: - self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y) - self.assertEqual( - self._compareScalar(math_ops.not_equal, x, y, t), x != y) + with self.subTest(t=t, x=x, y=y): + self.assertEqual( + self._compareScalar(math_ops.equal, x, y, t), x == y) + self.assertEqual( + self._compareScalar(math_ops.not_equal, x, y, t), x != y) def _compare(self, x, y, np_func, tf_func): np_ans = np_func(x, y) @@ -126,22 +130,24 @@ class ComparisonOpTest(test.TestCase): x = np.linspace(-15, 15, 6).reshape(1, 3, 2) y = np.linspace(20, -10, 6).reshape(1, 3, 2) for t in [np.float16, np.float32, np.float64, np.int32, np.int64]: - xt = x.astype(t) - yt = y.astype(t) - self._compare(xt, yt, np.less, math_ops.less) - self._compare(xt, yt, np.less_equal, math_ops.less_equal) - self._compare(xt, yt, np.greater, math_ops.greater) - self._compare(xt, yt, np.greater_equal, math_ops.greater_equal) - self._compare(xt, yt, np.equal, math_ops.equal) - self._compare(xt, yt, np.not_equal, math_ops.not_equal) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + self._compare(xt, yt, np.less, math_ops.less) + self._compare(xt, yt, np.less_equal, math_ops.less_equal) + self._compare(xt, yt, np.greater, math_ops.greater) + self._compare(xt, yt, np.greater_equal, math_ops.greater_equal) + self._compare(xt, yt, np.equal, math_ops.equal) + self._compare(xt, yt, np.not_equal, math_ops.not_equal) # Complex types do not support ordering but do support equality tests. for t in [np.complex64, np.complex128]: - xt = x.astype(t) - xt -= 1j * xt - yt = y.astype(t) - yt -= 1j * yt - self._compare(xt, yt, np.equal, math_ops.equal) - self._compare(xt, yt, np.not_equal, math_ops.not_equal) + with self.subTest(t=t): + xt = x.astype(t) + xt -= 1j * xt + yt = y.astype(t) + yt -= 1j * yt + self._compare(xt, yt, np.equal, math_ops.equal) + self._compare(xt, yt, np.not_equal, math_ops.not_equal) def _compareBCast(self, xs, ys, dtype, np_func, tf_func): x = np.linspace(-15, 15, np.prod(xs)).astype(dtype).reshape(xs) @@ -178,7 +184,8 @@ class ComparisonOpTest(test.TestCase): for (xs, ys) in shapes: for dtype in dtypes: - self._compareBCast(xs, ys, dtype, np_func, tf_func) + with self.subTest(xs=xs, ys=ys, dtype=dtype): + self._compareBCast(xs, ys, dtype, np_func, tf_func) def testBCastLess(self): self._testBCastByFunc(np.less, math_ops.less) @@ -209,10 +216,11 @@ class ComparisonOpTest(test.TestCase): y = np.arange(0, 10).reshape([5, 2]) for t in dtypes: for f in funcs: - with self.assertRaisesRegexp( - (ValueError, errors.InvalidArgumentError), - "Incompatible shapes|Dimensions must be equal"): - f(x.astype(t), y.astype(t)) + with self.subTest(t=t, f=f): + with self.assertRaisesRegexp( + (ValueError, errors.InvalidArgumentError), + "Incompatible shapes|Dimensions must be equal"): + f(x.astype(t), y.astype(t)) class LogicalOpTest(test.TestCase): @@ -241,23 +249,27 @@ class LogicalOpTest(test.TestCase): data = [np.array([True]), np.array([False])] for use_gpu in [True, False]: for x in data: - self._not(x, use_gpu) + with self.subTest(use_gpu=use_gpu, x=x): + self._not(x, use_gpu) for x in data: for y in data: - self._compareBinary(x, y, np.logical_and, math_ops.logical_and, - use_gpu) - self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu) - self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, - use_gpu) + with self.subTest(use_gpu=use_gpu, x=x, y=y): + self._compareBinary(x, y, np.logical_and, math_ops.logical_and, + use_gpu) + self._compareBinary(x, y, np.logical_or, math_ops.logical_or, + use_gpu) + self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, + use_gpu) def testTensor(self): x = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) y = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) for use_gpu in [True, False]: - self._not(x, use_gpu) - self._compareBinary(x, y, np.logical_and, math_ops.logical_and, use_gpu) - self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu) - self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, use_gpu) + with self.subTest(use_gpu=use_gpu): + self._not(x, use_gpu) + self._compareBinary(x, y, np.logical_and, math_ops.logical_and, use_gpu) + self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu) + self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, use_gpu) def testBCast(self): shapes = [ @@ -277,18 +289,22 @@ class LogicalOpTest(test.TestCase): x = np.random.randint(0, 2, np.prod(xs)).astype(np.bool).reshape(xs) y = np.random.randint(0, 2, np.prod(ys)).astype(np.bool).reshape(ys) for use_gpu in [True, False]: - self._compareBinary(x, y, np.logical_and, math_ops.logical_and, use_gpu) - self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu) - self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, use_gpu) + with self.subTest(xs=xs, ys=ys, use_gpu=use_gpu): + self._compareBinary(x, y, np.logical_and, math_ops.logical_and, + use_gpu) + self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu) + self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, + use_gpu) @test_util.run_deprecated_v1 def testShapeMismatch(self): x = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) y = np.random.randint(0, 2, 6).astype(np.bool).reshape(3, 2, 1) for f in [math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor]: - with self.assertRaisesWithPredicateMatch( - ValueError, lambda e: "Dimensions must" in str(e)): - f(x, y) + with self.subTest(f=f): + with self.assertRaisesWithPredicateMatch( + ValueError, lambda e: "Dimensions must" in str(e)): + f(x, y) @test_util.run_deprecated_v1 def testUsingAsPythonValueFails(self): @@ -389,11 +405,12 @@ class SelectOpTest(test.TestCase): np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64, np.complex128 ]: - xt = x.astype(t) - yt = y.astype(t) - self._compare(fn, c, xt, yt, use_gpu=False) - if t in [np.float16, np.float32, np.float64]: - self._compare(fn, c, xt, yt, use_gpu=True) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + self._compare(fn, c, xt, yt, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._compare(fn, c, xt, yt, use_gpu=True) def testScalar(self): self._testScalar(array_ops.where) @@ -404,11 +421,12 @@ class SelectOpTest(test.TestCase): np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64, np.complex128 ]: - xt = x.astype(t) - yt = y.astype(t) - self._compare(fn, c, xt, yt, use_gpu=False) - if t in [np.float16, np.float32, np.float64]: - self._compare(fn, c, xt, yt, use_gpu=True) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + self._compare(fn, c, xt, yt, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._compare(fn, c, xt, yt, use_gpu=True) def testScalarBroadcast(self): c = True @@ -450,11 +468,12 @@ class SelectOpTest(test.TestCase): np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64, np.complex128 ]: - xt = x.astype(t) - yt = y.astype(t) - self._compare(fn, c, xt, yt, use_gpu=False) - if t in [np.float16, np.float32, np.float64]: - self._compare(fn, c, xt, yt, use_gpu=True) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + self._compare(fn, c, xt, yt, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._compare(fn, c, xt, yt, use_gpu=True) def testBasic(self): self._testBasic(array_ops.where) @@ -465,11 +484,12 @@ class SelectOpTest(test.TestCase): np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64, np.complex128 ]: - xt = x.astype(t) - yt = y.astype(t) - self._compare(fn, c, xt, yt, use_gpu=False) - if t in [np.float16, np.float32, np.float64]: - self._compare(fn, c, xt, yt, use_gpu=True) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + self._compare(fn, c, xt, yt, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._compare(fn, c, xt, yt, use_gpu=True) def testBasicBroadcast(self): c0 = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) @@ -478,53 +498,55 @@ class SelectOpTest(test.TestCase): c3 = np.random.randint(0, 2, 1).astype(np.bool).reshape(1, 1, 1) for c in [c0, c1, c2, c3]: # where_v2 only - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 1, 1) * 100 - self._testBasicBroadcast(array_ops.where_v2, c, x, y) - self._testBasicBroadcast(array_ops.where_v2, c, y, x) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 3, 1) * 100 - self._testBasicBroadcast(array_ops.where_v2, c, x, y) - self._testBasicBroadcast(array_ops.where_v2, c, y, x) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 1, 2) * 100 - self._testBasicBroadcast(array_ops.where_v2, c, x, y) - self._testBasicBroadcast(array_ops.where_v2, c, y, x) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 1) * 100 - self._testBasicBroadcast(array_ops.where_v2, c, x, y) - self._testBasicBroadcast(array_ops.where_v2, c, y, x) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1) * 100 - self._testBasicBroadcast(array_ops.where_v2, c, x, y) - self._testBasicBroadcast(array_ops.where_v2, c, y, x) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 2) * 100 - self._testBasicBroadcast(array_ops.where_v2, c, x, y) - self._testBasicBroadcast(array_ops.where_v2, c, y, x) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(3, 2) * 100 - self._testBasicBroadcast(array_ops.where_v2, c, x, y) - self._testBasicBroadcast(array_ops.where_v2, c, y, x) + with self.subTest(c=c): + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 1, 1) * 100 + self._testBasicBroadcast(array_ops.where_v2, c, x, y) + self._testBasicBroadcast(array_ops.where_v2, c, y, x) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 3, 1) * 100 + self._testBasicBroadcast(array_ops.where_v2, c, x, y) + self._testBasicBroadcast(array_ops.where_v2, c, y, x) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 1, 2) * 100 + self._testBasicBroadcast(array_ops.where_v2, c, x, y) + self._testBasicBroadcast(array_ops.where_v2, c, y, x) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 1) * 100 + self._testBasicBroadcast(array_ops.where_v2, c, x, y) + self._testBasicBroadcast(array_ops.where_v2, c, y, x) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1) * 100 + self._testBasicBroadcast(array_ops.where_v2, c, x, y) + self._testBasicBroadcast(array_ops.where_v2, c, y, x) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 2) * 100 + self._testBasicBroadcast(array_ops.where_v2, c, x, y) + self._testBasicBroadcast(array_ops.where_v2, c, y, x) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(3, 2) * 100 + self._testBasicBroadcast(array_ops.where_v2, c, x, y) + self._testBasicBroadcast(array_ops.where_v2, c, y, x) def _testGradients(self, fn): c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) x = np.random.rand(1, 3, 2) * 100 y = np.random.rand(1, 3, 2) * 100 for t in [np.float16, np.float32, np.float64]: - xt = x.astype(t) - yt = y.astype(t) - if t == np.float16: - # Compare fp16 theoretical gradients to fp32 numerical gradients, - # since fp16 numerical gradients are too imprecise unless great - # care is taken with choosing the inputs and the delta. This is - # a weaker check (in particular, it does not test the op itself, - # only its gradient), but it's much better than nothing. - self._compareGradientX(fn, c, xt, yt, np.float) - self._compareGradientY(fn, c, xt, yt, np.float) - else: - self._compareGradientX(fn, c, xt, yt) - self._compareGradientY(fn, c, xt, yt) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + if t == np.float16: + # Compare fp16 theoretical gradients to fp32 numerical gradients, + # since fp16 numerical gradients are too imprecise unless great + # care is taken with choosing the inputs and the delta. This is + # a weaker check (in particular, it does not test the op itself, + # only its gradient), but it's much better than nothing. + self._compareGradientX(fn, c, xt, yt, np.float) + self._compareGradientY(fn, c, xt, yt, np.float) + else: + self._compareGradientX(fn, c, xt, yt) + self._compareGradientY(fn, c, xt, yt) @test_util.run_deprecated_v1 def testGradients(self): @@ -536,27 +558,28 @@ class SelectOpTest(test.TestCase): c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) for t in [np.float32, np.float64]: # where_v2 only - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 1, 1) * 100 - self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 3, 1) * 100 - self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 1, 2) * 100 - self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 1) * 100 - self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1) * 100 - self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(1, 2) * 100 - self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) - x = np.random.rand(1, 3, 2) * 100 - y = np.random.rand(3, 2) * 100 - self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) + with self.subTest(t=t): + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 1, 1) * 100 + self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 3, 1) * 100 + self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 1, 2) * 100 + self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 1) * 100 + self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1) * 100 + self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(1, 2) * 100 + self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) + x = np.random.rand(1, 3, 2) * 100 + y = np.random.rand(3, 2) * 100 + self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t)) def _testShapeMismatch(self, fn): c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) @@ -566,10 +589,11 @@ class SelectOpTest(test.TestCase): np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64, np.complex128 ]: - xt = x.astype(t) - yt = y.astype(t) - with self.assertRaises(ValueError): - fn(c, xt, yt) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + with self.assertRaises(ValueError): + fn(c, xt, yt) @test_util.run_deprecated_v1 def testShapeMismatch(self): @@ -597,9 +621,10 @@ class SelectOpTest(test.TestCase): for c in False, True: for a in 7.0, np.nan: for b in 5.0, np.nan: - x = fn(c, a, b).eval() - y = a if c else b - self.assertEqual(np.isnan(x), np.isnan(y)) + with self.subTest(c=c, a=a, b=b): + x = fn(c, a, b).eval() + y = a if c else b + self.assertEqual(np.isnan(x), np.isnan(y)) @test_util.run_deprecated_v1 def testNan(self): @@ -677,11 +702,12 @@ class BatchSelectOpTest(test.TestCase): np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64, np.complex128 ]: - xt = x.astype(t) - yt = y.astype(t) - self._compare(c, xt, yt, use_gpu=False) - if t in [np.float16, np.float32, np.float64]: - self._compare(c, xt, yt, use_gpu=True) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + self._compare(c, xt, yt, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._compare(c, xt, yt, use_gpu=True) @test_util.run_deprecated_v1 def testGradients(self): @@ -689,19 +715,20 @@ class BatchSelectOpTest(test.TestCase): x = np.random.rand(16, 2, 8) * 100 y = np.random.rand(16, 2, 8) * 100 for t in [np.float16, np.float32, np.float64]: - xt = x.astype(t) - yt = y.astype(t) - if t == np.float16: - # Compare fp16 theoretical gradients to fp32 numerical gradients, - # since fp16 numerical gradients are too imprecise unless great - # care is taken with choosing the inputs and the delta. This is - # a weaker check (in particular, it does not test the op itself, - # only its gradient), but it's much better than nothing. - self._compareGradientX(c, xt, yt, np.float) - self._compareGradientY(c, xt, yt, np.float) - else: - self._compareGradientX(c, xt, yt) - self._compareGradientY(c, xt, yt) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + if t == np.float16: + # Compare fp16 theoretical gradients to fp32 numerical gradients, + # since fp16 numerical gradients are too imprecise unless great + # care is taken with choosing the inputs and the delta. This is + # a weaker check (in particular, it does not test the op itself, + # only its gradient), but it's much better than nothing. + self._compareGradientX(c, xt, yt, np.float) + self._compareGradientY(c, xt, yt, np.float) + else: + self._compareGradientX(c, xt, yt) + self._compareGradientY(c, xt, yt) @test_util.run_deprecated_v1 def testShapeMismatch(self): @@ -712,10 +739,11 @@ class BatchSelectOpTest(test.TestCase): np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64, np.complex128 ]: - xt = x.astype(t) - yt = y.astype(t) - with self.assertRaises(ValueError): - array_ops.where(c, xt, yt) + with self.subTest(t=t): + xt = x.astype(t) + yt = y.astype(t) + with self.assertRaises(ValueError): + array_ops.where(c, xt, yt) class MinMaxOpTest(test.TestCase): @@ -735,23 +763,26 @@ class MinMaxOpTest(test.TestCase): y = np.random.rand(1, 3, 2) * 100. for t in [np.float16, np.float32, np.float64, np.uint8, np.int16, np.int32, np.int64]: - self._compare(x.astype(t), y.astype(t), use_gpu=False) - self._compare(x.astype(t), y.astype(t), use_gpu=True) + with self.subTest(t=t): + self._compare(x.astype(t), y.astype(t), use_gpu=False) + self._compare(x.astype(t), y.astype(t), use_gpu=True) def testDifferentShapes(self): x = np.random.rand(1, 3, 2) * 100. y = np.random.rand(2) * 100. # should broadcast for t in [np.float16, np.float32, np.float64, np.int32, np.int64]: - self._compare(x.astype(t), y.astype(t), use_gpu=False) - self._compare(x.astype(t), y.astype(t), use_gpu=True) + with self.subTest(t=t): + self._compare(x.astype(t), y.astype(t), use_gpu=False) + self._compare(x.astype(t), y.astype(t), use_gpu=True) def testScalar(self): x = np.random.rand(1, 3, 2) * 100. y = np.random.rand(1).item() * 100. # should broadcast # dropped np.float64, int64 because TF automatically converts to 32 bit for t in [np.float32, np.int32]: - self._compare(x.astype(t), t(y), use_gpu=False) - self._compare(x.astype(t), t(y), use_gpu=True) + with self.subTest(t=t): + self._compare(x.astype(t), t(y), use_gpu=False) + self._compare(x.astype(t), t(y), use_gpu=True) def _compareGradientX(self, func, x, y): with self.cached_session(): @@ -841,13 +872,15 @@ class MathOpsOverloadTest(test.TestCase): ] for dtype in dtypes: for np_func, tf_func in funcs: - if dtype in (dtypes_lib.complex64, - dtypes_lib.complex128) and tf_func == _FLOORDIV: - continue # floordiv makes no sense for complex - self._compareBinary(10, 5, dtype, np_func, tf_func) + with self.subTest(dtype=dtype, np_func=np_func, tf_func=tf_func): + if dtype in (dtypes_lib.complex64, + dtypes_lib.complex128) and tf_func == _FLOORDIV: + continue # floordiv makes no sense for complex + self._compareBinary(10, 5, dtype, np_func, tf_func) # Mod only works for int32 and int64. for dtype in [dtypes_lib.int32, dtypes_lib.int64]: - self._compareBinary(10, 3, dtype, np.mod, _MOD) + with self.subTest(dtype=dtype): + self._compareBinary(10, 3, dtype, np.mod, _MOD) def testOverloadComparisons(self): dtypes = [ @@ -865,18 +898,20 @@ class MathOpsOverloadTest(test.TestCase): ] for dtype in dtypes: for np_func, tf_func in funcs: - self._compareBinary(10, 5, dtype, np_func, tf_func) + with self.subTest(dtype=dtype, np_func=np_func, tf_func=tf_func): + self._compareBinary(10, 5, dtype, np_func, tf_func) logical_funcs = [(np.logical_and, _AND), (np.logical_or, _OR), (np.logical_xor, _XOR), (np.equal, math_ops.equal), (np.not_equal, math_ops.not_equal)] for np_func, tf_func in logical_funcs: - self._compareBinary(True, False, dtypes_lib.bool, np_func, tf_func) - self._compareBinary(True, True, dtypes_lib.bool, np_func, tf_func) - self._compareBinary(False, False, dtypes_lib.bool, np_func, tf_func) - self._compareBinary(False, True, dtypes_lib.bool, np_func, tf_func) - self._compareBinary([True, True, False, False], - [True, False, True, False], dtypes_lib.bool, np_func, - tf_func) + with self.subTest(np_func=np_func, tf_func=tf_func): + self._compareBinary(True, False, dtypes_lib.bool, np_func, tf_func) + self._compareBinary(True, True, dtypes_lib.bool, np_func, tf_func) + self._compareBinary(False, False, dtypes_lib.bool, np_func, tf_func) + self._compareBinary(False, True, dtypes_lib.bool, np_func, tf_func) + self._compareBinary([True, True, False, False], + [True, False, True, False], dtypes_lib.bool, + np_func, tf_func) self._compareUnary(True, dtypes_lib.bool, np.logical_not, _INV) self._compareUnary(False, dtypes_lib.bool, np.logical_not, _INV) self._compareUnary([True, False], dtypes_lib.bool, np.logical_not, _INV) @@ -924,16 +959,17 @@ class IsFiniteInfNanTest(test.TestCase): # It is not accurate for very large arguments, so we test for # fi.max/100 instead of fi.max here. for value in [fi.min, -2, -1, 0, fi.tiny, 1, 2, 1000, fi.max / 100]: - x = np.full((size,), value, dtype=dtype) - np_y = np.sqrt(x) - np_nan = np.isnan(np_y) - with test_util.use_gpu(): - tf_y = math_ops.sqrt(x) - tf_nan = math_ops.is_nan(tf_y) - if value < 0: - self.assertAllEqual(np_nan, self.evaluate(tf_nan)) - else: - self.assertAllCloseAccordingToType(np_y, self.evaluate(tf_y)) + with self.subTest(dtype=dtype, size=size, value=value): + x = np.full((size,), value, dtype=dtype) + np_y = np.sqrt(x) + np_nan = np.isnan(np_y) + with test_util.use_gpu(): + tf_y = math_ops.sqrt(x) + tf_nan = math_ops.is_nan(tf_y) + if value < 0: + self.assertAllEqual(np_nan, self.evaluate(tf_nan)) + else: + self.assertAllCloseAccordingToType(np_y, self.evaluate(tf_y)) class RoundingTest(test.TestCase): @@ -978,7 +1014,8 @@ class RoundingTest(test.TestCase): def testTypes(self): self.skipTest("b/131162241") for dtype in [np.float16, np.float32, np.float64]: - self._testDtype(dtype) + with self.subTest(dtype=dtype): + self._testDtype(dtype) class ComplexMakeRealImagTest(test.TestCase): @@ -999,19 +1036,21 @@ class ComplexMakeRealImagTest(test.TestCase): real = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(np.float32) imag = (np.arange(-3, 3) / 5.).reshape([1, 3, 2]).astype(np.float32) for use_gpu in [False, True]: - self._compareMake(real, imag, use_gpu) - self._compareMake(real, 12.0, use_gpu) - self._compareMake(23.0, imag, use_gpu) + with self.subTest(use_gpu=use_gpu): + self._compareMake(real, imag, use_gpu) + self._compareMake(real, 12.0, use_gpu) + self._compareMake(23.0, imag, use_gpu) def testRealImagNumericType(self): for use_gpu in [True, False]: for value in [1., 1j, 1. + 1j]: - np_real, np_imag = np.real(value), np.imag(value) - with test_util.device(use_gpu=use_gpu): - tf_real = math_ops.real(value) - tf_imag = math_ops.imag(value) - self.assertAllEqual(np_real, self.evaluate(tf_real)) - self.assertAllEqual(np_imag, self.evaluate(tf_imag)) + with self.subTest(use_gpu=use_gpu, value=value): + np_real, np_imag = np.real(value), np.imag(value) + with test_util.device(use_gpu=use_gpu): + tf_real = math_ops.real(value) + tf_imag = math_ops.imag(value) + self.assertAllEqual(np_real, self.evaluate(tf_real)) + self.assertAllEqual(np_imag, self.evaluate(tf_imag)) def _compareRealImag(self, cplx, use_gpu): np_real, np_imag = np.real(cplx), np.imag(cplx) @@ -1079,9 +1118,10 @@ class ComplexMakeRealImagTest(test.TestCase): def testRealReal(self): for dtype in (dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.float32, dtypes_lib.float64): - x = array_ops.placeholder(dtype) - y = math_ops.real(x) - self.assertEqual(x, y) + with self.subTest(dtype=dtype): + x = array_ops.placeholder(dtype) + y = math_ops.real(x) + self.assertEqual(x, y) def _compareConj(self, cplx, use_gpu): np_ans = np.conj(cplx) @@ -1110,9 +1150,10 @@ class ComplexMakeRealImagTest(test.TestCase): def testConjReal(self): for dtype in (dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64): - x = array_ops.placeholder(dtype) - y = math_ops.conj(x) - self.assertEqual(x, y) + with self.subTest(dtype=dtype): + x = array_ops.placeholder(dtype) + y = math_ops.conj(x) + self.assertEqual(x, y) @test_util.run_deprecated_v1 def testConjString(self): @@ -1146,10 +1187,11 @@ class ComplexMakeRealImagTest(test.TestCase): epsilon = 1e-3 with self.cached_session(): for args in [(x_, 0.), (0., x_)]: - z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args))) - jacob_t, jacob_n = gradient_checker.compute_gradient( - x_, list(x.shape), z, [1], x_init_value=x, delta=epsilon) - self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon) + with self.subTest(args=args): + z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args))) + jacob_t, jacob_n = gradient_checker.compute_gradient( + x_, list(x.shape), z, [1], x_init_value=x, delta=epsilon) + self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon) @test_util.run_deprecated_v1 def testGradient(self): @@ -1208,7 +1250,8 @@ class PolyvalTest(test.TestCase): np.int32, np.float32, np.float64, np.complex64, np.complex128 ]: for degree in range(5): - self._runtest(dtype, degree) + with self.subTest(dtype=dtype, degree=degree): + self._runtest(dtype, degree) def testBroadcast(self): dtype = np.float32 @@ -1216,15 +1259,16 @@ class PolyvalTest(test.TestCase): shapes = [(1,), (2, 1), (1, 2), (2, 2)] for x_shape in shapes: for coeff_shape in shapes: - x = np.random.rand(*x_shape).astype(dtype) - coeffs = [ - np.random.rand(*coeff_shape).astype(dtype) - for _ in range(degree + 1) - ] - np_val = np.polyval(coeffs, x) - with self.cached_session(): - tf_val = math_ops.polyval(coeffs, x) - self.assertAllClose(np_val, self.evaluate(tf_val)) + with self.subTest(x_shape=x_shape, coeff_shape=coeff_shape): + x = np.random.rand(*x_shape).astype(dtype) + coeffs = [ + np.random.rand(*coeff_shape).astype(dtype) + for _ in range(degree + 1) + ] + np_val = np.polyval(coeffs, x) + with self.cached_session(): + tf_val = math_ops.polyval(coeffs, x) + self.assertAllClose(np_val, self.evaluate(tf_val)) def testEmpty(self): x = np.random.rand(2, 2).astype(np.float32) diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py index 20cd128783e..916d9a4b8c8 100644 --- a/tensorflow/python/kernel_tests/linalg_ops_test.py +++ b/tensorflow/python/kernel_tests/linalg_ops_test.py @@ -66,10 +66,11 @@ class CholeskySolveTest(test.TestCase): _RandomPDMatrix(n, self.rng)]).astype(np_type) chol = linalg_ops.cholesky(array) for k in range(1, 3): - rhs = self.rng.randn(2, n, k).astype(np_type) - x = linalg_ops.cholesky_solve(chol, rhs) - self.assertAllClose( - rhs, math_ops.matmul(array, x).eval(), atol=atol) + with self.subTest(n=n, np_type=np_type, atol=atol, k=k): + rhs = self.rng.randn(2, n, k).astype(np_type) + x = linalg_ops.cholesky_solve(chol, rhs) + self.assertAllClose( + rhs, math_ops.matmul(array, x).eval(), atol=atol) class LogdetTest(test.TestCase): @@ -82,24 +83,26 @@ class LogdetTest(test.TestCase): for n in range(1, 6): for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5), (np.complex64, 0.05), (np.complex128, 1e-5)]: - matrix = _RandomPDMatrix(n, self.rng, np_dtype) - _, logdet_np = np.linalg.slogdet(matrix) - with self.session(use_gpu=True): - # Create 2 x n x n matrix - # matrix = np.array( - # [_RandomPDMatrix(n, self.rng, np_dtype), - # _RandomPDMatrix(n, self.rng, np_dtype)]).astype(np_dtype) - logdet_tf = linalg.logdet(matrix) - self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol) + with self.subTest(n=n, np_dtype=np_dtype, atol=atol): + matrix = _RandomPDMatrix(n, self.rng, np_dtype) + _, logdet_np = np.linalg.slogdet(matrix) + with self.session(use_gpu=True): + # Create 2 x n x n matrix + # matrix = np.array( + # [_RandomPDMatrix(n, self.rng, np_dtype), + # _RandomPDMatrix(n, self.rng, np_dtype)]).astype(np_dtype) + logdet_tf = linalg.logdet(matrix) + self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol) def test_works_with_underflow_case(self): for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5), (np.complex64, 0.05), (np.complex128, 1e-5)]: - matrix = (np.eye(20) * 1e-6).astype(np_dtype) - _, logdet_np = np.linalg.slogdet(matrix) - with self.session(use_gpu=True): - logdet_tf = linalg.logdet(matrix) - self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol) + with self.subTest(np_dtype=np_dtype, atol=atol): + matrix = (np.eye(20) * 1e-6).astype(np_dtype) + _, logdet_np = np.linalg.slogdet(matrix) + with self.session(use_gpu=True): + logdet_tf = linalg.logdet(matrix) + self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol) class SlogdetTest(test.TestCase): @@ -112,7 +115,20 @@ class SlogdetTest(test.TestCase): for n in range(1, 6): for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5), (np.complex64, 0.05), (np.complex128, 1e-5)]: - matrix = _RandomPDMatrix(n, self.rng, np_dtype) + with self.subTest(n=n, np_dtype=np_dtype, atol=atol): + matrix = _RandomPDMatrix(n, self.rng, np_dtype) + sign_np, log_abs_det_np = np.linalg.slogdet(matrix) + with self.session(use_gpu=True): + sign_tf, log_abs_det_tf = linalg.slogdet(matrix) + self.assertAllClose( + log_abs_det_np, self.evaluate(log_abs_det_tf), atol=atol) + self.assertAllClose(sign_np, self.evaluate(sign_tf), atol=atol) + + def test_works_with_underflow_case(self): + for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5), + (np.complex64, 0.05), (np.complex128, 1e-5)]: + with self.subTest(np_dtype=np_dtype, atol=atol): + matrix = (np.eye(20) * 1e-6).astype(np_dtype) sign_np, log_abs_det_np = np.linalg.slogdet(matrix) with self.session(use_gpu=True): sign_tf, log_abs_det_tf = linalg.slogdet(matrix) @@ -120,30 +136,20 @@ class SlogdetTest(test.TestCase): log_abs_det_np, self.evaluate(log_abs_det_tf), atol=atol) self.assertAllClose(sign_np, self.evaluate(sign_tf), atol=atol) - def test_works_with_underflow_case(self): - for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5), - (np.complex64, 0.05), (np.complex128, 1e-5)]: - matrix = (np.eye(20) * 1e-6).astype(np_dtype) - sign_np, log_abs_det_np = np.linalg.slogdet(matrix) - with self.session(use_gpu=True): - sign_tf, log_abs_det_tf = linalg.slogdet(matrix) - self.assertAllClose( - log_abs_det_np, self.evaluate(log_abs_det_tf), atol=atol) - self.assertAllClose(sign_np, self.evaluate(sign_tf), atol=atol) - class AdjointTest(test.TestCase): def test_compare_to_numpy(self): for dtype in np.float64, np.float64, np.complex64, np.complex128: - matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j, - 6 + 6j]]).astype(dtype) - expected_transposed = np.conj(matrix_np.T) - with self.session(): - matrix = ops.convert_to_tensor(matrix_np) - transposed = linalg.adjoint(matrix) - self.assertEqual((3, 2), transposed.get_shape()) - self.assertAllEqual(expected_transposed, self.evaluate(transposed)) + with self.subTest(dtype=dtype): + matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j, + 6 + 6j]]).astype(dtype) + expected_transposed = np.conj(matrix_np.T) + with self.session(): + matrix = ops.convert_to_tensor(matrix_np) + transposed = linalg.adjoint(matrix) + self.assertEqual((3, 2), transposed.get_shape()) + self.assertAllEqual(expected_transposed, self.evaluate(transposed)) class EyeTest(parameterized.TestCase, test.TestCase): diff --git a/tensorflow/python/kernel_tests/lu_op_test.py b/tensorflow/python/kernel_tests/lu_op_test.py index 1c0280c3ce6..7935b66f4af 100644 --- a/tensorflow/python/kernel_tests/lu_op_test.py +++ b/tensorflow/python/kernel_tests/lu_op_test.py @@ -128,14 +128,16 @@ class LuOpTest(test.TestCase): for dtype in (np.float32, np.float64): for output_idx_type in (dtypes.int32, dtypes.int64): - self._verifyLu(data.astype(dtype), output_idx_type=output_idx_type) + with self.subTest(dtype=dtype, output_idx_type=output_idx_type): + self._verifyLu(data.astype(dtype), output_idx_type=output_idx_type) for dtype in (np.complex64, np.complex128): for output_idx_type in (dtypes.int32, dtypes.int64): - complex_data = np.tril(1j * data, -1).astype(dtype) - complex_data += np.triu(-1j * data, 1).astype(dtype) - complex_data += data - self._verifyLu(complex_data, output_idx_type=output_idx_type) + with self.subTest(dtype=dtype, output_idx_type=output_idx_type): + complex_data = np.tril(1j * data, -1).astype(dtype) + complex_data += np.triu(-1j * data, 1).astype(dtype) + complex_data += data + self._verifyLu(complex_data, output_idx_type=output_idx_type) def testPivoting(self): # This matrix triggers partial pivoting because the first diagonal entry @@ -144,38 +146,41 @@ class LuOpTest(test.TestCase): self._verifyLu(data.astype(np.float32)) for dtype in (np.float32, np.float64): - self._verifyLu(data.astype(dtype)) - _, p = linalg_ops.lu(data) - p_val = self.evaluate([p]) - # Make sure p_val is not the identity permutation. - self.assertNotAllClose(np.arange(3), p_val) + with self.subTest(dtype=dtype): + self._verifyLu(data.astype(dtype)) + _, p = linalg_ops.lu(data) + p_val = self.evaluate([p]) + # Make sure p_val is not the identity permutation. + self.assertNotAllClose(np.arange(3), p_val) for dtype in (np.complex64, np.complex128): - complex_data = np.tril(1j * data, -1).astype(dtype) - complex_data += np.triu(-1j * data, 1).astype(dtype) - complex_data += data - self._verifyLu(complex_data) - _, p = linalg_ops.lu(data) - p_val = self.evaluate([p]) - # Make sure p_val is not the identity permutation. - self.assertNotAllClose(np.arange(3), p_val) + with self.subTest(dtype=dtype): + complex_data = np.tril(1j * data, -1).astype(dtype) + complex_data += np.triu(-1j * data, 1).astype(dtype) + complex_data += data + self._verifyLu(complex_data) + _, p = linalg_ops.lu(data) + p_val = self.evaluate([p]) + # Make sure p_val is not the identity permutation. + self.assertNotAllClose(np.arange(3), p_val) def testInvalidMatrix(self): # LU factorization gives an error when the input is singular. # Note: A singular matrix may return without error but it won't be a valid # factorization. for dtype in self.float_types: - with self.assertRaises(errors.InvalidArgumentError): - self.evaluate( - linalg_ops.lu( - np.array([[1., 2., 3.], [2., 4., 6.], [2., 3., 4.]], - dtype=dtype))) - with self.assertRaises(errors.InvalidArgumentError): - self.evaluate( - linalg_ops.lu( - np.array([[[1., 2., 3.], [2., 4., 6.], [1., 2., 3.]], - [[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]], - dtype=dtype))) + with self.subTest(dtype=dtype): + with self.assertRaises(errors.InvalidArgumentError): + self.evaluate( + linalg_ops.lu( + np.array([[1., 2., 3.], [2., 4., 6.], [2., 3., 4.]], + dtype=dtype))) + with self.assertRaises(errors.InvalidArgumentError): + self.evaluate( + linalg_ops.lu( + np.array([[[1., 2., 3.], [2., 4., 6.], [1., 2., 3.]], + [[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]], + dtype=dtype))) def testBatch(self): simple_array = np.array([[[1., -1.], [2., 5.]]]) # shape (1, 2, 2) diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index 33879232fd3..5d587954858 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -1021,7 +1021,7 @@ class TensorArrayTest(test.TestCase): # self._testWhileLoopWritePackGradients( # dynamic_size=False, dtype=tf.int64) - @test_util.run_v1_only("b/117943489") + @test_util.run_deprecated_v1 def testSkipEagerWhileLoopDynamicWritePackGradients(self): self._testWhileLoopWritePackGradients( dynamic_size=True, dtype=dtypes.float32) @@ -1251,7 +1251,6 @@ class TensorArrayTest(test.TestCase): with self.assertRaises(ValueError): w1.write(4, c2) - @test_util.run_v1_only("b/117943489") def testUnpackShape(self): self._testUnpackShape() @@ -1340,11 +1339,11 @@ class TensorArrayTest(test.TestCase): grad = gradients_impl.gradients(ys=[r], xs=[x]) self.assertAllEqual(np.array([1.0, 1.0, 1.0]), self.evaluate(grad)[0]) - @test_util.run_v1_only("b/117943489") + @test_util.run_deprecated_v1 def testSkipEagerTensorArrayUnpackDynamic(self): self._testTensorArrayUnpackDynamic() - @test_util.run_v1_only("b/117943489") + @test_util.run_deprecated_v1 def testSkipEagerTensorArraySplitDynamic(self): with self.session(use_gpu=True) as sess: ta = tensor_array_ops.TensorArray( @@ -1422,7 +1421,7 @@ class TensorArrayTest(test.TestCase): v2_msg if control_flow_util.ENABLE_CONTROL_FLOW_V2 else v1_msg): ta.stack().eval() - @test_util.run_v1_only("b/120545219") + @test_util.run_deprecated_v1 def testSkipEagerTensorArrayEvalEmpty(self): self._testTensorArrayEvalEmpty() @@ -1445,11 +1444,11 @@ class TensorArrayTest(test.TestCase): # first dimension of zero self.assertAllEqual([0, 5], self.evaluate(concatenated).shape) - @test_util.run_v1_only("b/117943489") + @test_util.run_deprecated_v1 def testSkipEagerTensorArrayEvalEmptyWithDefault(self): self._testTensorArrayEvalEmptyWithDefault() - @test_util.run_v1_only("b/117943489") + @test_util.run_deprecated_v1 def testSkipEagerTensorArrayScatterReadAndGradients(self): with self.session(use_gpu=True) as session: ta = tensor_array_ops.TensorArray( @@ -1476,7 +1475,7 @@ class TensorArrayTest(test.TestCase): self.assertAllEqual([10.0, -10.0], read_vals[1]) self.assertAllEqual([[2.0, 3.0], [4.0, 5.0]], grad_vals[0]) - @test_util.run_v1_only("b/117943489") + @test_util.run_deprecated_v1 def testSkipEagerTensorArrayScatterPartialReadAndGradients(self): with self.session(use_gpu=True) as session: ta = tensor_array_ops.TensorArray( diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc index 2f9972c81bf..2afd2888e8f 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.cc +++ b/tensorflow/python/lib/core/ndarray_tensor.cc @@ -17,6 +17,8 @@ limitations under the License. #include +#include "tensorflow/c/eager/tfe_context_internal.h" +#include "tensorflow/c/tf_tensor_internal.h" #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -488,8 +490,9 @@ Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray) { return Status::OK(); } -Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor) { - DCHECK(out_tensor != nullptr); +Status NdarrayToTensor(TFE_Context* ctx, PyObject* ndarray, + Safe_TF_TensorPtr* ret, bool convert_string) { + DCHECK(ret != nullptr); // Make sure we dereference this array object in case of error, etc. Safe_PyObjectPtr array_safe(make_safe( @@ -515,26 +518,52 @@ Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor) { if (dtype == TF_RESOURCE) { size_t size = PyArray_NBYTES(array); array_safe.release(); - *out_tensor = make_safe(TF_NewTensor(dtype, {}, 0, PyArray_DATA(array), - size, &DelayedNumpyDecref, array)); + + if (ctx) { + *ret = make_safe(new TF_Tensor{tensorflow::unwrap(ctx)->CreateTensor( + static_cast(dtype), {}, 0, PyArray_DATA(array), + size, convert_string, &DelayedNumpyDecref, array)}); + } else { + *ret = make_safe(TF_NewTensor(dtype, {}, 0, PyArray_DATA(array), size, + &DelayedNumpyDecref, array)); + } } else if (dtype != TF_STRING) { size_t size = PyArray_NBYTES(array); array_safe.release(); - *out_tensor = make_safe(TF_NewTensor(dtype, dims.data(), dims.size(), - PyArray_DATA(array), size, - &DelayedNumpyDecref, array)); + if (ctx) { + *ret = make_safe(new TF_Tensor{tensorflow::unwrap(ctx)->CreateTensor( + static_cast(dtype), dims.data(), dims.size(), + PyArray_DATA(array), size, convert_string, &DelayedNumpyDecref, + array)}); + } else { + *ret = make_safe(TF_NewTensor(dtype, dims.data(), dims.size(), + PyArray_DATA(array), size, + &DelayedNumpyDecref, array)); + } + } else { size_t size = 0; void* encoded = nullptr; TF_RETURN_IF_ERROR(EncodePyBytesArray(array, nelems, &size, &encoded)); - *out_tensor = make_safe(TF_NewTensor( - dtype, dims.data(), dims.size(), encoded, size, - [](void* data, size_t len, void* arg) { - delete[] reinterpret_cast(data); - }, - nullptr)); + if (ctx) { + *ret = make_safe(new TF_Tensor{tensorflow::unwrap(ctx)->CreateTensor( + static_cast(dtype), dims.data(), dims.size(), + encoded, size, convert_string, + [](void* data, size_t len, void* arg) { + delete[] reinterpret_cast(data); + }, + nullptr)}); + } else { + *ret = make_safe(TF_NewTensor( + dtype, dims.data(), dims.size(), encoded, size, + [](void* data, size_t len, void* arg) { + delete[] reinterpret_cast(data); + }, + nullptr)); + } } + return Status::OK(); } @@ -543,7 +572,8 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status); Status NdarrayToTensor(PyObject* obj, Tensor* ret) { Safe_TF_TensorPtr tf_tensor = make_safe(static_cast(nullptr)); - Status s = PyArrayToTF_Tensor(obj, &tf_tensor); + Status s = NdarrayToTensor(nullptr /*ctx*/, obj, &tf_tensor, + false /*convert_string*/); if (!s.ok()) { return s; } diff --git a/tensorflow/python/lib/core/ndarray_tensor.h b/tensorflow/python/lib/core/ndarray_tensor.h index c5cd24cff2d..38c098417d5 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.h +++ b/tensorflow/python/lib/core/ndarray_tensor.h @@ -28,15 +28,21 @@ Status TF_TensorToMaybeAliasedPyArray(Safe_TF_TensorPtr tensor, Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray); -// Converts the given numpy ndarray to a (safe) TF_Tensor. The returned -// TF_Tensor in `out_tensor` may have its own Python reference to `ndarray`s -// data. After `out_tensor` is destroyed, this reference must (eventually) be -// decremented via ClearDecrefCache(). -// -// `out_tensor` must be non-null. Caller retains ownership of `ndarray`. -Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor); +// Creates a tensor in 'ret' from the input `ndarray`. The returned TF_Tensor +// in `ret` may have its own Python reference to `ndarray`s data. After `ret` +// is destroyed, this reference must (eventually) be decremented via +// ClearDecrefCache(). +// `convert_string` indicates whether it has to handle tstring conversion. +// Expected to be removed once tstring migration is done. +ABSL_MUST_USE_RESULT +Status NdarrayToTensor(TFE_Context* ctx, PyObject* ndarray, + Safe_TF_TensorPtr* ret, bool convert_string); // Creates a tensor in 'ret' from the input Ndarray. +// TODO(kkb): This is an old conversion function that does not support TFRT. +// Currently it's used for session, py_func, and an internal project. Migrate +// them. +ABSL_MUST_USE_RESULT Status NdarrayToTensor(PyObject* obj, Tensor* ret); // Creates a numpy array in 'ret' which either aliases the content of 't' or has diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc index ecf4a92f0e7..22829f546b1 100644 --- a/tensorflow/python/lib/core/py_seq_tensor.cc +++ b/tensorflow/python/lib/core/py_seq_tensor.cc @@ -681,9 +681,11 @@ typedef Converter BoolConverter; // The two may share underlying storage so changes to one may reflect in the // other. TFE_TensorHandle* NumpyToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj) { - tensorflow::Tensor tensor; - tensorflow::Status status = tensorflow::NdarrayToTensor(obj, &tensor); - if (!status.ok()) { + Safe_TF_TensorPtr tf_tensor = make_safe(static_cast(nullptr)); + Status status = tensorflow::NdarrayToTensor(ctx, obj, &tf_tensor, + true /*convert_string*/); + + if (TF_PREDICT_FALSE(!status.ok())) { PyErr_SetString(PyExc_ValueError, tensorflow::strings::StrCat( "Failed to convert a NumPy array to a Tensor (", @@ -692,8 +694,8 @@ TFE_TensorHandle* NumpyToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj) { return nullptr; } - TensorInterface t(std::move(tensor)); - return tensorflow::wrap(tensorflow::unwrap(ctx)->CreateLocalHandle(&t)); + return tensorflow::wrap( + tensorflow::unwrap(ctx)->CreateLocalHandle(tf_tensor->tensor)); } } // namespace diff --git a/tensorflow/python/lib/core/pybind11_status.h b/tensorflow/python/lib/core/pybind11_status.h index feb974798de..3f9991c6577 100644 --- a/tensorflow/python/lib/core/pybind11_status.h +++ b/tensorflow/python/lib/core/pybind11_status.h @@ -69,6 +69,20 @@ inline void MaybeRaiseRegisteredFromStatus(const tensorflow::Status& status) { } } +inline void MaybeRaiseRegisteredFromStatusWithGIL( + const tensorflow::Status& status) { + if (!status.ok()) { + // Acquire GIL for throwing exception. + pybind11::gil_scoped_acquire acquire; + + PyErr_SetObject(PyExceptionRegistry::Lookup(status.code()), + pybind11::make_tuple(pybind11::none(), pybind11::none(), + status.error_message()) + .ptr()); + throw pybind11::error_already_set(); + } +} + inline void MaybeRaiseFromTFStatus(TF_Status* status) { TF_Code code = TF_GetCode(status); if (code != TF_OK) { diff --git a/tensorflow/python/lib/io/file_io_wrapper.cc b/tensorflow/python/lib/io/file_io_wrapper.cc index de806a9c969..0a2410b69e1 100644 --- a/tensorflow/python/lib/io/file_io_wrapper.cc +++ b/tensorflow/python/lib/io/file_io_wrapper.cc @@ -42,50 +42,65 @@ PYBIND11_MODULE(_pywrap_file_io, m) { py::gil_scoped_release release; status = tensorflow::Env::Default()->FileExists(filename); } - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("DeleteFile", [](const std::string& filename) { - tensorflow::MaybeRaiseRegisteredFromStatus( - tensorflow::Env::Default()->DeleteFile(filename)); + py::gil_scoped_release release; + tensorflow::Status status = + tensorflow::Env::Default()->DeleteFile(filename); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("ReadFileToString", [](const std::string& filename) { std::string data; + py::gil_scoped_release release; const auto status = ReadFileToString(tensorflow::Env::Default(), filename, &data); + pybind11::gil_scoped_acquire acquire; tensorflow::MaybeRaiseRegisteredFromStatus(status); return py::bytes(data); }); m.def("WriteStringToFile", [](const std::string& filename, tensorflow::StringPiece data) { - return WriteStringToFile(tensorflow::Env::Default(), filename, data); + py::gil_scoped_release release; + const auto status = + WriteStringToFile(tensorflow::Env::Default(), filename, data); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("GetChildren", [](const std::string& dirname) { std::vector results; + py::gil_scoped_release release; const auto status = tensorflow::Env::Default()->GetChildren(dirname, &results); + pybind11::gil_scoped_acquire acquire; tensorflow::MaybeRaiseRegisteredFromStatus(status); return results; }); m.def("GetMatchingFiles", [](const std::string& pattern) { std::vector results; + py::gil_scoped_release release; const auto status = tensorflow::Env::Default()->GetMatchingPaths(pattern, &results); + pybind11::gil_scoped_acquire acquire; tensorflow::MaybeRaiseRegisteredFromStatus(status); return results; }); m.def("CreateDir", [](const std::string& dirname) { + py::gil_scoped_release release; const auto status = tensorflow::Env::Default()->CreateDir(dirname); if (tensorflow::errors::IsAlreadyExists(status)) { return; } - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("RecursivelyCreateDir", [](const std::string& dirname) { - tensorflow::MaybeRaiseRegisteredFromStatus( - tensorflow::Env::Default()->RecursivelyCreateDir(dirname)); + py::gil_scoped_release release; + const auto status = + tensorflow::Env::Default()->RecursivelyCreateDir(dirname); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("CopyFile", [](const std::string& src, const std::string& target, bool overwrite) { + py::gil_scoped_release release; auto* env = tensorflow::Env::Default(); tensorflow::Status status; if (!overwrite && env->FileExists(target).ok()) { @@ -93,10 +108,11 @@ PYBIND11_MODULE(_pywrap_file_io, m) { } else { status = env->CopyFile(src, target); } - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("RenameFile", [](const std::string& src, const std::string& target, bool overwrite) { + py::gil_scoped_release release; auto* env = tensorflow::Env::Default(); tensorflow::Status status; if (!overwrite && env->FileExists(target).ok()) { @@ -104,9 +120,10 @@ PYBIND11_MODULE(_pywrap_file_io, m) { } else { status = env->RenameFile(src, target); } - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("DeleteRecursively", [](const std::string& dirname) { + py::gil_scoped_release release; tensorflow::int64 undeleted_files; tensorflow::int64 undeleted_dirs; auto status = tensorflow::Env::Default()->DeleteRecursively( @@ -115,23 +132,25 @@ PYBIND11_MODULE(_pywrap_file_io, m) { status = tensorflow::errors::PermissionDenied("could not fully delete dir"); } - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }); m.def("IsDirectory", [](const std::string& dirname) { + py::gil_scoped_release release; const auto status = tensorflow::Env::Default()->IsDirectory(dirname); // FAILED_PRECONDITION response means path exists but isn't a dir. if (tensorflow::errors::IsFailedPrecondition(status)) { return false; } - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); return true; }); m.def("HasAtomicMove", [](const std::string& path) { + py::gil_scoped_release release; bool has_atomic_move; const auto status = tensorflow::Env::Default()->HasAtomicMove(path, &has_atomic_move); - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); return has_atomic_move; }); @@ -141,9 +160,11 @@ PYBIND11_MODULE(_pywrap_file_io, m) { .def_readonly("is_directory", &tensorflow::FileStatistics::is_directory); m.def("Stat", [](const std::string& filename) { + py::gil_scoped_release release; std::unique_ptr self( new tensorflow::FileStatistics); const auto status = tensorflow::Env::Default()->Stat(filename, self.get()); + py::gil_scoped_acquire acquire; tensorflow::MaybeRaiseRegisteredFromStatus(status); return self.release(); }); @@ -151,66 +172,83 @@ PYBIND11_MODULE(_pywrap_file_io, m) { using tensorflow::WritableFile; py::class_(m, "WritableFile") .def(py::init([](const std::string& filename, const std::string& mode) { + py::gil_scoped_release release; auto* env = tensorflow::Env::Default(); std::unique_ptr self; const auto status = mode.find("a") == std::string::npos ? env->NewWritableFile(filename, &self) : env->NewAppendableFile(filename, &self); + py::gil_scoped_acquire acquire; tensorflow::MaybeRaiseRegisteredFromStatus(status); return self.release(); })) .def("append", [](WritableFile* self, tensorflow::StringPiece data) { - tensorflow::MaybeRaiseRegisteredFromStatus(self->Append(data)); + const auto status = self->Append(data); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); }) // TODO(slebedev): Make WritableFile::Tell const and change self // to be a reference. .def("tell", [](WritableFile* self) { tensorflow::int64 pos = -1; + py::gil_scoped_release release; const auto status = self->Tell(&pos); - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); return pos; }) .def("flush", [](WritableFile* self) { - tensorflow::MaybeRaiseRegisteredFromStatus(self->Flush()); + py::gil_scoped_release release; + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(self->Flush()); }) .def("close", [](WritableFile* self) { - tensorflow::MaybeRaiseRegisteredFromStatus(self->Close()); + py::gil_scoped_release release; + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(self->Close()); }); using tensorflow::io::BufferedInputStream; py::class_(m, "BufferedInputStream") .def(py::init([](const std::string& filename, size_t buffer_size) { + py::gil_scoped_release release; std::unique_ptr file; const auto status = tensorflow::Env::Default()->NewRandomAccessFile(filename, &file); - tensorflow::MaybeRaiseRegisteredFromStatus(status); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); std::unique_ptr input_stream( new tensorflow::io::RandomAccessInputStream(file.release(), /*owns_file=*/true)); + py::gil_scoped_acquire acquire; return new BufferedInputStream(input_stream.release(), buffer_size, /*owns_input_stream=*/true); })) .def("read", [](BufferedInputStream* self, tensorflow::int64 bytes_to_read) { + py::gil_scoped_release release; tensorflow::tstring result; const auto status = self->ReadNBytes(bytes_to_read, &result); if (!status.ok() && !tensorflow::errors::IsOutOfRange(status)) { result.clear(); tensorflow::MaybeRaiseRegisteredFromStatus(status); } + py::gil_scoped_acquire acquire; return py::bytes(result); }) .def("readline", [](BufferedInputStream* self) { - return py::bytes(self->ReadLineAsString()); + py::gil_scoped_release release; + auto output = self->ReadLineAsString(); + py::gil_scoped_acquire acquire; + return py::bytes(output); }) .def("seek", [](BufferedInputStream* self, tensorflow::int64 pos) { - tensorflow::MaybeRaiseRegisteredFromStatus(self->Seek(pos)); + py::gil_scoped_release release; + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(self->Seek(pos)); }) - .def("tell", [](BufferedInputStream* self) { return self->Tell(); }); + .def("tell", [](BufferedInputStream* self) { + py::gil_scoped_release release; + return self->Tell(); + }); } } // namespace diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 33aac84d77f..1cb6fdbd726 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -39,6 +39,7 @@ from tensorflow.python.ops import gen_math_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_array_ops import * from tensorflow.python.ops.gen_array_ops import reverse_v2 as reverse # pylint: disable=unused-import +from tensorflow.python.types import core from tensorflow.python.util import deprecation from tensorflow.python.util import dispatch from tensorflow.python.util import nest @@ -1381,13 +1382,13 @@ def _autopacking_helper(list_or_tuple, dtype, name): if context.executing_eagerly(): # NOTE: Fast path when all the items are tensors, this doesn't do any type # checking. - if all(ops.is_dense_tensor_like(elem) for elem in list_or_tuple): + if all(isinstance(elem, core.Tensor) for elem in list_or_tuple): return gen_array_ops.pack(list_or_tuple, name=name) must_pack = False converted_elems = [] with ops.name_scope(name) as scope: for i, elem in enumerate(list_or_tuple): - if ops.is_dense_tensor_like(elem): + if isinstance(elem, core.Tensor): if dtype is not None and elem.dtype.base_dtype != dtype: raise TypeError("Cannot convert a list containing a tensor of dtype " "%s to %s (Tensor is: %r)" % @@ -1396,7 +1397,7 @@ def _autopacking_helper(list_or_tuple, dtype, name): must_pack = True elif isinstance(elem, (list, tuple)): converted_elem = _autopacking_helper(elem, dtype, str(i)) - if ops.is_dense_tensor_like(converted_elem): + if isinstance(converted_elem, core.Tensor): must_pack = True converted_elems.append(converted_elem) else: @@ -1404,7 +1405,7 @@ def _autopacking_helper(list_or_tuple, dtype, name): if must_pack: elems_as_tensors = [] for i, elem in enumerate(converted_elems): - if ops.is_dense_tensor_like(elem): + if isinstance(elem, core.Tensor): elems_as_tensors.append(elem) else: # NOTE(mrry): This is inefficient, but it enables us to @@ -1429,7 +1430,7 @@ def _get_dtype_from_nested_lists(list_or_tuple): such object exists. """ for elem in list_or_tuple: - if ops.is_dense_tensor_like(elem): + if isinstance(elem, core.Tensor): return elem.dtype.base_dtype elif isinstance(elem, (list, tuple)): maybe_dtype = _get_dtype_from_nested_lists(elem) @@ -1441,7 +1442,7 @@ def _get_dtype_from_nested_lists(list_or_tuple): def _cast_nested_seqs_to_dtype(dtype): def _maybe_cast(elem): - if ops.is_dense_tensor_like(elem): + if isinstance(elem, core.Tensor): if dtype != elem.dtype.base_dtype: elem = gen_math_ops.cast(elem, dtype) return elem @@ -1455,7 +1456,7 @@ _NON_AUTOPACKABLE_TYPES.add(np.ndarray) def _should_not_autopack(v): # The condition we really want is - # ops.is_dense_tensor_like(...) + # any(isinstance(elem, core.Tensor)) # but it is >5x slower due to abc.ABCMeta.__instancecheck__. # pylint: disable=unidiomatic-typecheck # TODO(slebedev): add nest.all? diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index 3085e05eaf6..cefca5defae 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -1845,7 +1845,12 @@ def assert_shapes(shapes, data=None, summarize=None, message=None, name=None): 'Specified by tensor %s dimension %d' % (tensor_name(specified_by_y), specified_at_dim)) - actual_size = sizes.actual_sizes[tensor_dim] + # This is extremely subtle. If actual_sizes is dynamic, we must + # make sure a control dependency is inserted here so that this slice + # can not execute until the rank is asserted to be enough for the + # slice to not fail. + with ops.control_dependencies(rank_assertions): + actual_size = sizes.actual_sizes[tensor_dim] if _has_known_value(actual_size) and _has_known_value(specified_size): if int(actual_size) != int(specified_size): raise ValueError( @@ -1871,12 +1876,17 @@ def assert_shapes(shapes, data=None, summarize=None, message=None, name=None): size_assertions.append( control_flow_ops.Assert(condition, data_, summarize=summarize)) else: - size = sizes.actual_sizes[tensor_dim] + # Not sure if actual_sizes is a constant, but for safety, guard + # on rank. See explanation above about actual_sizes need for safety. + with ops.control_dependencies(rank_assertions): + size = sizes.actual_sizes[tensor_dim] size_specifications[size_symbol] = (size, sizes.x, tensor_dim) - with ops.control_dependencies(rank_assertions): - shapes_assertion = control_flow_ops.group(size_assertions) - return shapes_assertion + # Ensure both assertions actually occur. + with ops.control_dependencies(rank_assertions): + shapes_assertion = control_flow_ops.group(size_assertions) + + return shapes_assertion # pylint: disable=line-too-long diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index fe66e8ccdfb..8ec925824de 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -863,11 +863,24 @@ def Gradient(inputs, f, name=None): return symbolic_gradient(input=inputs, Tout=tlist, f=f, name=name) +def _GetInputDtypes(func): + """Returns the input dtypes of func, excluding dtypes for captured inputs.""" + if isinstance(func, function._DefinedFunction): # pylint: disable=protected-access + return func.declared_input_types + + # We assume that `func` is a ConcreteFunction here, but we are not able to + # verify since importing eager function library will cause cyclic dependence. + # + # ConcreteFunction.inputs includes captured inputs. + num_non_captured_inputs = len(func.inputs) - len(func.captured_inputs) + inputs_without_captured = func.inputs[:num_non_captured_inputs] + return [t.dtype for t in inputs_without_captured] + + def _LoopBodyCaptureWrapper(func): """Returns a wrapper for `func` that handles loop-carried captured inputs.""" - @function.Defun( - *func.declared_input_types, func_name="%s_Wrapper" % func.name) + @function.Defun(*_GetInputDtypes(func), func_name="%s_Wrapper" % func.name) def Wrapper(*args): """A wrapper that handles loop-carried captured inputs.""" result = func(*args) @@ -877,11 +890,11 @@ def _LoopBodyCaptureWrapper(func): if isinstance(result, ops.Operation): return extra_args # Unary functions return a single Tensor value. - elif not isinstance(result, tuple): + elif not isinstance(result, (list, tuple)): return (result,) + extra_args # N-ary functions return a tuple of Tensors. else: - return result + extra_args + return result + type(result)(extra_args) return Wrapper @@ -917,19 +930,23 @@ def While(input_, cond, body, name=None, hostmem=None): raise ValueError("While op 'cond' argument must be a function " "without implicitly captured inputs.") - if cond.declared_input_types != body.declared_input_types: + cond_input_types = _GetInputDtypes(cond) + body_input_types = _GetInputDtypes(body) + + if cond_input_types != body_input_types: raise ValueError( "While op 'cond' and 'body' signatures do not match. %r vs %r" % - (cond.declared_input_types, body.declared_input_types)) + (cond_input_types, body_input_types)) if body.captured_inputs: - cond_dtypes = list( - body.declared_input_types) + [t.dtype for t in body.captured_inputs] + cond_dtypes = list(body_input_types) + [ + t.dtype for t in body.captured_inputs + ] @function.Defun(*cond_dtypes, func_name="%s_Wrapper" % cond.name) def CondWrapper(*args): """A wrapper that handles loop-carried captured inputs.""" - return cond(*args[:len(body.declared_input_types)]) + return cond(*args[:len(body_input_types)]) ret = gen_functional_ops._while( input_ + body.captured_inputs, @@ -1184,8 +1201,8 @@ def partitioned_call(args, if hasattr(f, "graph"): _set_read_only_resource_inputs_attr(op, f.graph) if hasattr(f.graph, "collective_manager_ids_used"): - ops.set_int_list_attr( - op, acd.COLLECTIVE_MANAGER_IDS, f.graph.collective_manager_ids_used) + ops.set_int_list_attr(op, acd.COLLECTIVE_MANAGER_IDS, + f.graph.collective_manager_ids_used) return outputs if outputs else op diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index de5be20aa84..248c57c1ba5 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -45,6 +45,7 @@ from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import from tensorflow.python.platform import device_context from tensorflow.python.util import deprecation +from tensorflow.python.util import dispatch from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.deprecation import deprecated_args from tensorflow.python.util.deprecation import deprecated_argument_lookup @@ -4513,6 +4514,7 @@ def _get_noise_shape(x, noise_shape): @tf_export(v1=["nn.dropout"]) +@dispatch.add_dispatch_support @deprecation.deprecated_args(None, "Please use `rate` instead of `keep_prob`. " "Rate should be set to `rate = 1 - keep_prob`.", "keep_prob") @@ -4567,6 +4569,7 @@ def dropout(x, keep_prob=None, noise_shape=None, seed=None, name=None, @tf_export("nn.dropout", v1=[]) +@dispatch.add_dispatch_support def dropout_v2(x, rate, noise_shape=None, seed=None, name=None): """Computes dropout: randomly sets elements to zero to prevent overfitting. diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py index 11380b2dac2..01776808525 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py @@ -1400,6 +1400,8 @@ class StatelessIfTest(PForTestCase): class IfTest(PForTestCase): def test_read_var(self): + self.skipTest("b/156438918") # Flaky + x = [1, 2, 3, 4, 5.] y = 2.5 z = resource_variable_ops.ResourceVariable(5.) diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py index 773195283d6..8e18b9968fe 100644 --- a/tensorflow/python/ops/parallel_for/math_test.py +++ b/tensorflow/python/ops/parallel_for/math_test.py @@ -23,6 +23,7 @@ from absl.testing import parameterized from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops as framework_ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops @@ -150,72 +151,81 @@ class MathTest(PForTestCase, parameterized.TestCase): self._test_loop_fn(loop_fn, 3) def test_binary_cwise_ops(self): - logical_ops = [ - math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor - ] + # Enable tensor equality to test `equal` and `not_equal` ops below. + default_equality = framework_ops.Tensor._USE_EQUALITY + framework_ops.enable_tensor_equality() + try: + logical_ops = [ + math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor + ] - # Wrapper functions restricting the range of inputs of zeta and polygamma. - def safe_polygamma(x, y): - return math_ops.polygamma( - math_ops.round(clip_ops.clip_by_value(y, 1, 10)), x * x + 1) + # Wrapper functions restricting the range of inputs of zeta and polygamma. + def safe_polygamma(x, y): + return math_ops.polygamma( + math_ops.round(clip_ops.clip_by_value(y, 1, 10)), x * x + 1) - def safe_zeta(x, y): - return math_ops.zeta(x * x + 1, y * y) + def safe_zeta(x, y): + return math_ops.zeta(x * x + 1, y * y) - float_ops = [ - math_ops.add, - math_ops.add_v2, - math_ops.atan2, - math_ops.complex, - math_ops.div, - math_ops.divide, - math_ops.div_no_nan, - math_ops.equal, - math_ops.floor_mod, - math_ops.greater, - math_ops.greater_equal, - math_ops.igamma, - math_ops.igammac, - math_ops.igamma_grad_a, - math_ops.less, - math_ops.less_equal, - math_ops.maximum, - math_ops.minimum, - math_ops.mod, - math_ops.multiply, - math_ops.not_equal, - math_ops.pow, - math_ops.squared_difference, - math_ops.subtract, - math_ops.truncate_mod, - safe_polygamma, - safe_zeta, - ] - # FloorDiv fails on XLA due floor's discontinuities exacerbating small - # division differences. - if not test_util.is_xla_enabled(): - float_ops += [math_ops.floor_div] - for op in logical_ops + float_ops: - x = random_ops.random_uniform([7, 3, 5]) - y = random_ops.random_uniform([3, 5]) - if op in logical_ops: - x = x > 0 - y = y > 0 + float_ops = [ + math_ops.add, + math_ops.add_v2, + math_ops.atan2, + math_ops.complex, + math_ops.div, + math_ops.divide, + math_ops.div_no_nan, + math_ops.equal, + lambda x, y: framework_ops.convert_to_tensor(x == y), + lambda x, y: framework_ops.convert_to_tensor(x != y), + math_ops.floor_mod, + math_ops.greater, + math_ops.greater_equal, + math_ops.igamma, + math_ops.igammac, + math_ops.igamma_grad_a, + math_ops.less, + math_ops.less_equal, + math_ops.maximum, + math_ops.minimum, + math_ops.mod, + math_ops.multiply, + math_ops.not_equal, + math_ops.pow, + math_ops.squared_difference, + math_ops.subtract, + math_ops.truncate_mod, + safe_polygamma, + safe_zeta, + ] + # FloorDiv fails on XLA due floor's discontinuities exacerbating small + # division differences. + if not test_util.is_xla_enabled(): + float_ops += [math_ops.floor_div] + for op in logical_ops + float_ops: + x = random_ops.random_uniform([7, 3, 5]) + y = random_ops.random_uniform([3, 5]) + if op in logical_ops: + x = x > 0 + y = y > 0 - output_dtypes = [] + output_dtypes = [] - # pylint: disable=cell-var-from-loop - def loop_fn(i): - x1 = array_ops.gather(x, i) - y1 = array_ops.gather(y, i) - outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)] - del output_dtypes[:] - output_dtypes.extend(t.dtype for t in outputs) - return outputs + # pylint: disable=cell-var-from-loop + def loop_fn(i): + x1 = array_ops.gather(x, i) + y1 = array_ops.gather(y, i) + outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)] + del output_dtypes[:] + output_dtypes.extend(t.dtype for t in outputs) + return outputs - # pylint: enable=cell-var-from-loop + # pylint: enable=cell-var-from-loop - self._test_loop_fn(loop_fn, 3) + self._test_loop_fn(loop_fn, 3) + finally: + if not default_equality: + framework_ops.disable_tensor_equality() def test_approximate_equal(self): x = random_ops.random_uniform([3, 5]) diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py index bece477e754..c4621758702 100644 --- a/tensorflow/python/ops/parallel_for/pfor.py +++ b/tensorflow/python/ops/parallel_for/pfor.py @@ -2784,8 +2784,8 @@ def _convert_equal(pfor_input): x = pfor_input.input(0)[0] y = pfor_input.input(1)[0] incompatible_shape_error = pfor_input.get_attr("incompatible_shape_error") - assert incompatible_shape_error - return wrap(math_ops.equal(x, y), True) + return wrap(gen_math_ops.equal( + x, y, incompatible_shape_error=incompatible_shape_error), True) @RegisterPFor("NotEqual") @@ -2794,8 +2794,8 @@ def _convert_not_equal(pfor_input): x = pfor_input.input(0)[0] y = pfor_input.input(1)[0] incompatible_shape_error = pfor_input.get_attr("incompatible_shape_error") - assert incompatible_shape_error - return wrap(math_ops.not_equal(x, y), True) + return wrap(gen_math_ops.not_equal( + x, y, incompatible_shape_error=incompatible_shape_error), True) @RegisterPFor("ApproximateEqual") diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py index dd5bd782462..f13bed07ba0 100644 --- a/tensorflow/python/ops/ragged/ragged_dispatch.py +++ b/tensorflow/python/ops/ragged/ragged_dispatch.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import clip_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gen_bitwise_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables @@ -453,6 +454,26 @@ def _ragged_dynamic_partition(data, partitions, num_partitions, name=None): num_partitions, name) return [result[i] for i in range(num_partitions)] + +def _ragged_nn_dropout_v1(x, keep_prob=None, noise_shape=None, seed=None, + name=None, rate=None): + if noise_shape is not None: + raise ValueError('noise_shape is not supported yet for RaggedTensor x') + with ops.name_scope(name, 'RaggedNNDropout', [x, rate]): + x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x') + return x.with_flat_values(nn_ops.dropout(x.flat_values, keep_prob=keep_prob, + seed=seed, rate=rate)) + + +def _ragged_nn_dropout_v2(x, rate, noise_shape=None, seed=None, name=None): + if noise_shape is not None: + raise ValueError('noise_shape is not supported yet for RaggedTensor x') + with ops.name_scope(name, 'RaggedNNDropout', [x, rate]): + x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x') + return x.with_flat_values(nn_ops.dropout_v2(x.flat_values, rate=rate, + seed=seed)) + + # (original_op, ragged_op, ragged_args) _RAGGED_DISPATCH_OPS = [ (array_ops.batch_gather, ragged_batch_gather_ops.batch_gather, @@ -497,6 +518,8 @@ _RAGGED_DISPATCH_OPS = [ (math_ops.reduce_mean, ragged_math_ops.reduce_mean, ['input_tensor']), (math_ops.reduce_any, ragged_math_ops.reduce_any, ['input_tensor']), (math_ops.reduce_all, ragged_math_ops.reduce_all, ['input_tensor']), + (nn_ops.dropout, _ragged_nn_dropout_v1, ['x']), + (nn_ops.dropout_v2, _ragged_nn_dropout_v2, ['x']), ] diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py index 0ce9a6f9771..60d9f6c8713 100644 --- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py +++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import clip_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gen_bitwise_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops.ragged import ragged_dispatch @@ -232,6 +233,10 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase, {'op': array_ops.check_numerics, 'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]), 'message': 'check-numerics'}, + {'op': nn_ops.dropout, + 'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]), + 'rate': 0.5, + 'seed': 1}, ] ) # pyformat: disable def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args): @@ -820,7 +825,8 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase, 'strings.substr', 'strings.to_hash_bucket_fast', 'strings.to_hash_bucket_strong', 'strings.to_hash_bucket', 'strings.to_number', 'strings.unicode_script', 'tile', 'truncatediv', - 'truncatemod', 'zeros_like', 'dynamic_partition', 'reverse' + 'truncatemod', 'zeros_like', 'dynamic_partition', 'reverse', + 'nn.dropout', ] # Ops that should be listed as supported in v1 only. diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index f99f886f210..d8a7765a208 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -49,6 +49,7 @@ from tensorflow.python.ops import variables from tensorflow.python.ops.gen_resource_variable_ops import * # pylint: enable=wildcard-import from tensorflow.python.training.tracking import base as trackable +from tensorflow.python.types import core from tensorflow.python.util import compat from tensorflow.python.util.deprecation import deprecated @@ -330,7 +331,7 @@ def variable_accessed(variable): tape.variable_accessed(variable) -class BaseResourceVariable(variables.VariableV1): +class BaseResourceVariable(variables.VariableV1, core.Tensor): """A python variable from an existing handle.""" # TODO(wangpeng): Deprecate `constraint` when callers no long pass it in. @@ -1830,7 +1831,6 @@ def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False): # allowing instances of the class to be used as tensors. ops.register_tensor_conversion_function(BaseResourceVariable, _dense_var_to_tensor) -ops.register_dense_tensor_like_type(BaseResourceVariable) class _UnreadVariable(BaseResourceVariable): @@ -1955,9 +1955,6 @@ class _UnreadVariable(BaseResourceVariable): return self._parent_op -ops.register_dense_tensor_like_type(_UnreadVariable) - - @ops.RegisterGradient("ReadVariableOp") def _ReadGrad(_, grad): """Gradient for read op.""" diff --git a/tensorflow/python/ops/signal/mel_ops.py b/tensorflow/python/ops/signal/mel_ops.py index aa0769166a4..b95876bc977 100644 --- a/tensorflow/python/ops/signal/mel_ops.py +++ b/tensorflow/python/ops/signal/mel_ops.py @@ -128,8 +128,6 @@ def linear_to_mel_weight_matrix(num_mel_bins=20, # S has shape [..., num_spectrogram_bins]. # M has shape [..., num_mel_bins]. M = tf.tensordot(S, A, 1) - # tf.tensordot does not support shape inference for this case yet. - M.set_shape(S.shape[:-1].concatenate(A.shape[-1:])) Args: num_mel_bins: Python int. How many bands in the resulting mel spectrum. diff --git a/tensorflow/python/ops/structured/structured_tensor.py b/tensorflow/python/ops/structured/structured_tensor.py index cb02ce52438..2007b68a548 100644 --- a/tensorflow/python/ops/structured/structured_tensor.py +++ b/tensorflow/python/ops/structured/structured_tensor.py @@ -62,21 +62,22 @@ class StructuredTensor(composite_tensor.CompositeTensor): ```python >>> # A scalar StructuredTensor describing a single person. - >>> s1 = tf.structured.constant({"age": 82, "nicknames": ["Bob", "Bobby"]}) - >>> print s1.shape - () - >>> print s1["age"] - tf.Tensor(82, shape=(), dtype=int32) + >>> s1 = StructuredTensor.from_pyval( + ... {"age": 82, "nicknames": ["Bob", "Bobby"]}) + >>> s1.shape + TensorShape([]) + >>> s1["age"] + >>> # A vector StructuredTensor describing three people. - >>> s2 = stf.struct.constant([ + >>> s2 = StructuredTensor.from_pyval([ ... {"age": 12, "nicknames": ["Josaphine"]}, ... {"age": 82, "nicknames": ["Bob", "Bobby"]}, - ... {"age": 82, "nicknames": ["Elmo"]}]) - >>> print s2.shape - (3,) - >>> print s2[0]["age"] - tf.Tensor(12, shape=(), dtype=int32) + ... {"age": 42, "nicknames": ["Elmo"]}]) + >>> s2.shape + TensorShape([3]) + >>> s2[0]["age"] + ``` ### Field Paths @@ -155,11 +156,17 @@ class StructuredTensor(composite_tensor.CompositeTensor): Examples: >>> StructuredTensor.from_fields({'x': 1, 'y': [1, 2, 3]}) - (FILL THIS IN) + >>> StructuredTensor.from_fields({'foo': [1, 2], 'bar': [3, 4]}, ... shape=[2]) - (FILL THIS IN) + """ shape = tensor_shape.as_shape(shape) @@ -312,7 +319,7 @@ class StructuredTensor(composite_tensor.CompositeTensor): If `field_name` is a `string`, then it names a field directly owned by this `StructuredTensor`. If this `StructuredTensor` has shape `[D1...DN]`, then the returned tensor will have shape `[D1...DN, V1...VM]`, where the slice - `result[d1...dN]`contains the field value for the structure at + `result[d1...dN]` contains the field value for the structure at `self[d1...dN]`. If `field_name` is a `tuple` of `string`, then it specifies a path to a @@ -431,7 +438,8 @@ class StructuredTensor(composite_tensor.CompositeTensor): def __repr__(self): return '' % (', '.join( - '%r' % k for k in sorted(self._fields)), self._shape) + '"%s": %s' % (k, v) + for k, v in sorted(self._fields.items())), self._shape) #============================================================================= # Conversion @@ -458,9 +466,9 @@ class StructuredTensor(composite_tensor.CompositeTensor): Requires that all fields are Eager tensors. - >>> print(StructuredTensor.from_fields( - ... {'a': [1, 2, 3]}, [3]).to_pyval()) - [{b'a': 1}, {b'a': 2}, {b'a': 3}] + >>> StructuredTensor.from_fields( + ... {'a': [1, 2, 3]}, [3]).to_pyval() + [{'a': 1}, {'a': 2}, {'a': 3}] Note that `StructuredTensor.from_pyval(pyval).to_pyval() == pyval`. @@ -496,9 +504,12 @@ class StructuredTensor(composite_tensor.CompositeTensor): def from_pyval(cls, pyval, typespec=None): """Constructs a StructuredTensor from a nested Python structure. - >>> print StructuredTensor.from_pyval( + >>> StructuredTensor.from_pyval( ... {'a': [1, 2, 3], 'b': [[4, 5], [6, 7]]}) - + }, + shape=())> Note that `StructuredTensor.from_pyval(pyval).to_pyval() == pyval`. @@ -628,7 +639,9 @@ class StructuredTensor(composite_tensor.CompositeTensor): ... [{'foo': 12}, {'foo': 33}, {'foo': 99}]) >>> partition = RowPartition.from_row_lengths([2, 0, 1]) >>> st.partition_outer_dimension(partition) - + }, + shape=(3, None))> Args: row_partition: A `RowPartition`. @@ -651,7 +664,9 @@ class StructuredTensor(composite_tensor.CompositeTensor): >>> st = StructuredTensor.from_pyval( ... [[{'foo': 12}, {'foo': 33}], [], [{'foo': 99}]]) >>> st.merge_dims(0, 1) - + Args: outer_axis: `int`: The first dimension in the range of dimensions to @@ -1058,12 +1073,14 @@ def _partition_outer_dimension(value, row_partition): >>> partition = row_partition.RowPartition.from_row_lengths([2, 0, 1]) >>> _partition_outer_dimension(tf.constant([1, 2, 3]), partition) - [[1, 2], [], [3]] + >>> struct_value = StructuredTensor.from_pyval( ... [{'x': 1}, {'x': 2}, {'x': 3}]) >>> _partition_outer_dimension(struct_value, partition) - [[{'x': 1}, {'x': 2}], [], [{'x': 3}]]) + }, + shape=(3, None))> Args: value: Tensor, RaggedTensor, or StructuredTensor diff --git a/tensorflow/python/ops/structured/structured_tensor_test.py b/tensorflow/python/ops/structured/structured_tensor_test.py index 99f6bb6f5a3..420705b07e7 100644 --- a/tensorflow/python/ops/structured/structured_tensor_test.py +++ b/tensorflow/python/ops/structured/structured_tensor_test.py @@ -922,14 +922,25 @@ class StructuredTensorTest(test_util.TensorFlowTestCase, st = StructuredTensor.from_pyval({"a": 5, "b": {"c": [1, 2, 3]}}) self.assertAllEqual(st.field_value(("a",)), 5) self.assertAllEqual(st.field_value(("b", "c")), [1, 2, 3]) - with self.assertRaisesRegexp(KeyError, - r"Field path \('a', 'b'\) not found in .*"): + expected = "Field path \(.*a.*,.*b.*\) not found in .*" + with self.assertRaisesRegexp(KeyError, expected): st.field_value(("a", "b")) def testRepr(self): st = StructuredTensor.from_pyval({"a": 5, "b": {"c": [1, 2, 3]}}) - self.assertEqual( - repr(st), "") + if context.executing_eagerly(): + expected = ("}, shape=())>") + else: + expected = ("}, shape=())>") + self.assertEqual(repr(st), expected) def testPartitionOuterDimension2DDenseField(self): struct = structured_tensor.StructuredTensor.from_fields( diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index d65cd235ca8..81c3f9a2f70 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -42,6 +42,7 @@ from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.types import core from tensorflow.python.util import deprecation from tensorflow.python.util import function_utils from tensorflow.python.util import tf_contextlib @@ -1000,7 +1001,7 @@ class _VariableStore(object): return initializer, initializing_from_value -class _LazyEvalTensor(object): +class _LazyEvalTensor(core.Tensor): """A Tensor-like object that only evaluates its thunk when used.""" def __init__(self, thunk): @@ -1069,8 +1070,6 @@ session.register_session_run_conversion_functions( lambda fetch: ([fetch._master_tensor], lambda fetched_vals: fetched_vals[0]) # pylint: disable=protected-access ) -ops.register_dense_tensor_like_type(_LazyEvalTensor) - # To stop regularization, use this regularizer @tf_export(v1=["no_regularizer"]) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 1080778e3d3..d3df0659b5a 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -47,6 +47,7 @@ from tensorflow.python.util import tf_should_use from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.deprecation import deprecated_args from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.types import core def default_variable_creator(_, **kwds): @@ -264,6 +265,7 @@ class VariableMetaclass(type): @tf_export("Variable", v1=[]) +# TODO(mdan): This should subclass core.Tensor, and not all its subclasses? class Variable(six.with_metaclass(VariableMetaclass, trackable.Trackable)): """See the [variable guide](https://tensorflow.org/guide/variable). @@ -1551,7 +1553,7 @@ class VariableV1(Variable): # TODO(apassos): do not repeat all comments here -class RefVariable(VariableV1): +class RefVariable(VariableV1, core.Tensor): """Ref-based implementation of variables.""" def __init__( @@ -3032,7 +3034,6 @@ class PartitionedVariable(object): # allowing instances of the class to be used as tensors. ops.register_tensor_conversion_function(RefVariable, RefVariable._TensorConversionFunction) # pylint: disable=protected-access -ops.register_dense_tensor_like_type(RefVariable) @tf_export(v1=["global_variables"]) diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD index e5ca60843e3..b6565f594c9 100644 --- a/tensorflow/python/profiler/BUILD +++ b/tensorflow/python/profiler/BUILD @@ -226,6 +226,7 @@ py_library( deps = [ "//tensorflow/python:util", "//tensorflow/python/profiler/internal:_pywrap_traceme", + "//tensorflow/python/types", "@six_archive//:six", ], ) diff --git a/tensorflow/python/saved_model/utils_impl.py b/tensorflow/python/saved_model/utils_impl.py index 42e971d050d..0f635b6bf85 100644 --- a/tensorflow/python/saved_model/utils_impl.py +++ b/tensorflow/python/saved_model/utils_impl.py @@ -178,7 +178,7 @@ def get_tensor_from_tensor_info(tensor_info, graph=None, import_scope=None): spec = struct_coder.decode_proto(spec_proto) components = [_get_tensor(component.name) for component in tensor_info.composite_tensor.components] - return spec._from_components(components) # pylint: disable=protected-access + return nest.pack_sequence_as(spec, components, expand_composites=True) else: raise ValueError("Invalid TensorInfo.encoding: %s" % encoding) diff --git a/tensorflow/python/tf_program/BUILD b/tensorflow/python/tf_program/BUILD new file mode 100644 index 00000000000..9dfb0df8a24 --- /dev/null +++ b/tensorflow/python/tf_program/BUILD @@ -0,0 +1,22 @@ +package(licenses = ["notice"]) + +py_library( + name = "pywrap_tfd", + srcs = ["pywrap_tfd.py"], + deps = [ + "//tensorflow/compiler/mlir/python/mlir_wrapper", + ], +) + +py_library( + name = "mlir_gen", + srcs = ["mlir_gen.py"], + visibility = ["//visibility:public"], + deps = [ + ":pywrap_tfd", + "//tensorflow/python/autograph/pyct", + "//tensorflow/python/autograph/pyct/static_analysis", + "//tensorflow/python/types", + "@gast_archive//:gast", + ], +) diff --git a/tensorflow/python/tf_program/mlir_gen.py b/tensorflow/python/tf_program/mlir_gen.py new file mode 100644 index 00000000000..8395848a53a --- /dev/null +++ b/tensorflow/python/tf_program/mlir_gen.py @@ -0,0 +1,456 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""mlir_gen: Generate mlir code from python code.""" + +# pylint: disable=invalid-name +# pylint: disable=missing-function-docstring + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gast as ast +from tensorflow.python.autograph.pyct import anno +from tensorflow.python.autograph.pyct import cfg +from tensorflow.python.autograph.pyct import inspect_utils +from tensorflow.python.autograph.pyct import naming +from tensorflow.python.autograph.pyct import parser +from tensorflow.python.autograph.pyct import qual_names +from tensorflow.python.autograph.pyct import transformer +from tensorflow.python.autograph.pyct.static_analysis import activity +from tensorflow.python.autograph.pyct.static_analysis import annos +from tensorflow.python.autograph.pyct.static_analysis import liveness +from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions +from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs +import tensorflow.python.tf_program.pywrap_tfd as tfp +from tensorflow.python.types import core + + +class SymbolTable(object): + """Symbol Table for python code.""" + + def __init__(self): + self.symbols = [] + self.enter_scope() + + def enter_scope(self): + """Enter a new scope - at function level.""" + self.symbols.append({'types': {}, 'symbols': {}}) + self.curr_table = self.symbols[len(self.symbols) - 1] + + def insert_symbol(self, name, value): + self.curr_table['symbols'][name] = value + self.curr_table['types'][name] = value.getType() + return value + + def insert_type(self, name, type_): + self.curr_table['types'][name] = type_ + + def exit_scope(self): + self.symbols.pop() + self.curr_table = self.symbols[len(self.symbols) - 1] + + def lookup(self, name): + curr_idx = len(self.symbols) - 1 + while curr_idx >= 0 and (name not in self.symbols[curr_idx]['symbols']): + curr_idx -= 1 + if curr_idx < 0: + return None + return self.symbols[curr_idx]['symbols'][name] + + def lookup_type(self, name): + curr_idx = len(self.symbols) - 1 + while curr_idx >= 0 and (name not in self.symbols[curr_idx]['types']): + curr_idx -= 1 + if curr_idx < 0: + return None + return self.symbols[curr_idx]['types'][name] + + def __repr__(self): + s = '\n'.join( + ' ' * idx * 2 + str(table) for idx, table in enumerate(self.symbols)) + return s + + +class ProcessType(ast.NodeVisitor): + """Visit a node and return processed type Currently only visits annotations and gives their type. + """ + + def __init__(self, prog, ctx): + self.prog = prog + self.ctx = ctx + + def visit_Attribute(self, node): + # Supported: core.Tensor + value = self.visit(node.value) + if value is None or not hasattr(value, node.attr): + raise AttributeError(str(type(value)) + ' has no attribute ' + node.attr) + attr = getattr(value, node.attr) + + if attr == core.Tensor: + return tfp.UnrankedTensorType.get(tfp.IntegerType.get(32, self.prog.ctx)) + return attr + + def visit_Name(self, node): + if node.id == 'int': + return tfp.IntegerType.get(32, self.prog.ctx) + if node.id == 'bool': + return tfp.IntegerType.get(1, self.prog.ctx) + if node.id in self.ctx.info.namespace: + return self.ctx.info.namespace[node.id] + + +class MLIRGen(ast.NodeVisitor): + """Visit the AST and generate MLIR code Requires liveness, reading_definitions. + """ + + def __init__(self, ctx): + self.ctx = ctx + self.symbol_table = SymbolTable() + self.prog = tfp.TFProgram() + self.opbuilder = None + + def visit_block(self, block): + return [self.visit(item) for item in block] + + def process_type(self, node): + return ProcessType(self.prog, self.ctx).visit(node) + + def visit_Assign(self, node): + value = self.visit(node.value) + if isinstance(value, tuple): + # If it is a tuple of values, assign one to each in targets + # TODO: This currently is assuming that all elts in targets[0] are Name + # objects. This might not be always True. + for key, val in zip(node.targets[0].elts, value): + self.symbol_table.insert_symbol(key.id, val) + else: + self.symbol_table.insert_symbol(node.targets[0].id, value) + + def visit_BinOp(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + if isinstance(node.op, ast.Sub): + return tfp.Tf_SubOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), + left, right).getResult(0) + if isinstance(node.op, ast.Add): + return tfp.Tf_AddV2Op.create(self.opbuilder, + self.opbuilder.getUnknownLoc(), left, + right).getResult(0) + + def visit_BoolOp(self, node): + values = [self.visit(value) for value in node.values] + if isinstance(node.op, ast.Or): + return tfp.OrOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), + values).getResult(0) + if isinstance(node.op, ast.And): + return tfp.AndOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), + values).getResult(0) + + def visit_Call(self, node): + func = self.visit(node.func) + args = [self.visit(arg) for arg in node.args] + callop = tfp.Tf_LegacyCallOp.create(self.opbuilder, + self.opbuilder.getUnknownLoc(), + func.getType().getResults(), args, + func.getName()) + if callop.getNumResults() == 1: + return callop[0] + return tuple(callop.getResult(idx) for idx in range(callop.getNumResults())) + + def visit_Compare(self, node): + left = self.visit(node.left) + opb = self.opbuilder + for op, right in zip(node.ops, node.comparators): + if isinstance(op, ast.Eq): + left = tfp.Tf_EqualOp.create(opb, opb.getUnknownLoc(), left, + self.visit(right)).getResult(0) + elif isinstance(op, ast.Lt): + left = tfp.Tf_LessOp.create(opb, opb.getUnknownLoc(), left, + self.visit(right)).getResult(0) + elif isinstance(op, ast.LtE): + left = tfp.Tf_LessEqualOp.create(opb, opb.getUnknownLoc(), left, + self.visit(right)).getResult(0) + elif isinstance(op, ast.Gt): + left = tfp.Tf_GreaterOp.create(opb, opb.getUnknownLoc(), left, + self.visit(right)).getResult(0) + elif isinstance(op, ast.GtE): + left = tfp.Tf_GreaterEqualOp.create(opb, opb.getUnknownLoc(), left, + self.visit(right)).getResult(0) + elif isinstance(op, ast.NotEq): + left = tfp.Tf_NotEqualOp.create(opb, opb.getUnknownLoc(), left, + self.visit(right)).getResult(0) + else: + raise NotImplementedError('CompareOp operator not recognized') + return left + + def visit_Constant(self, node): + opb = self.opbuilder + value = None + if isinstance(node.value, int): + value = tfp.Tf_ConstOp.create( + opb, opb.getUnknownLoc(), + tfp.IntegerAttr.get( + tfp.IntegerType.get(32, self.prog.ctx), node.value)).getResult(0) + return value + + def visit_FunctionDef(self, node): + # Cache the current builder + cache_builder = self.opbuilder + inputs, outputs = [], [] + + for arg in node.args.args: + inputs.append(self.process_type(arg.annotation)) + + if node.returns: + outputs = [self.process_type(node.returns)] + + currfunc = self.prog.add_function( + self.ctx.namer.new_symbol(node.name, []), + self.prog.get_function_type(inputs, outputs)) + + # Add the function to symbol table and enter new scope + self.symbol_table.insert_symbol(node.name, currfunc) + self.symbol_table.enter_scope() + + # Add arguments to symbol table + for arg, value in zip(node.args.args, currfunc.getArguments()): + self.symbol_table.insert_symbol(arg.id, value) + self.opbuilder = tfp.OpBuilder(currfunc.getBody()) + + self.visit_block(node.body) + self.symbol_table.exit_scope() + self.opbuilder = cache_builder + + def visit_If(self, node): + cond = self.visit(node.test) + + # Create ifop + body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) + orelse_scope = anno.getanno(node, annos.NodeAnno.ORELSE_SCOPE) + modified_in_cond = list(body_scope.modified | orelse_scope.modified) + outputs = [ + self.symbol_table.lookup_type(str(var)) for var in modified_in_cond + ] + ifop = tfp.IfOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), cond, + outputs) + + # Cache the builder + cache_builder = self.opbuilder + + # Visit body + self.opbuilder = tfp.OpBuilder(ifop.getRegion(0)) + # Enter scope to avoid values generated inside the region to come in symbol + # table + self.symbol_table.enter_scope() + for stmt in node.body: + self.visit(stmt) + retvals = [ + self.symbol_table.lookup(str(varname)) for varname in modified_in_cond + ] + tfp.ReturnOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), retvals) + self.symbol_table.exit_scope() + + # Visit orelse + self.opbuilder = tfp.OpBuilder(ifop.getRegion(1)) + self.symbol_table.enter_scope() + for stmt in node.orelse: + self.visit(stmt) + retvals = [ + self.symbol_table.lookup(str(varname)) for varname in modified_in_cond + ] + tfp.ReturnOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), retvals) + self.symbol_table.exit_scope() + + # Reset builder and enter return values in symbol table + self.opbuilder = cache_builder + for idx, var in enumerate(modified_in_cond): + self.symbol_table.insert_symbol(str(var), ifop.getResult(idx)) + + if ifop.getNumResults() == 1: + return ifop.getResult(0) + + return tuple(ifop.getResult(i) for i in range(ifop.getNumResults())) + + def visit_Name(self, node): + if self.symbol_table.lookup(node.id): + return self.symbol_table.lookup(node.id) + raise NotImplementedError('Symbol not found' + node.id) + + def visit_Return(self, node): + opb = self.opbuilder + value = self.visit(node.value) + if isinstance(value, tuple): + # For more than one return values + return tfp.ReturnOp.create(opb, opb.getUnknownLoc(), list(value)) + return tfp.ReturnOp.create(opb, opb.getUnknownLoc(), [value]) + + def visit_Tuple(self, node): + return tuple(self.visit(elt) for elt in node.elts) + + def visit_UnaryOp(self, node): + operand = self.visit(node.operand) + if isinstance(node.op, ast.USub): + return tfp.Tf_NegOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), + operand).getResult(0) + + def _get_basic_loop_vars(self, modified, live_in, live_out): + # [This is directly from + # tensorflow/python/autograph/converters/control_flow.py] + # The loop variables corresponding to simple symbols (e.g. `x`). + basic_loop_vars = [] + for s in modified: + if s.is_composite(): + # TODO: Raise an error when this happens for a TF loop. + continue + # Variables not live into or out of the loop are considered local to the + # loop. + if s not in live_in and s not in live_out: + continue + basic_loop_vars.append(s) + return frozenset(basic_loop_vars) + + def _get_composite_loop_vars(self, modified, live_in): + # [This is directly from + # tensorflow/python/autograph/converters/control_flow.py] + # The loop variables corresponding to composite symbols (e.g. `self.x`). + composite_loop_vars = [] + for s in modified: + if not s.is_composite(): + continue + # Mutations made to objects created inside the loop will appear as writes + # to composite symbols. Because these mutations appear as modifications + # made to composite symbols, we check whether the composite's parent is + # actually live into the loop. + # Example: + # while cond: + # x = Foo() + # x.foo = 2 * x.foo # x.foo is live into the loop, but x is not. + # + # Note that some parents might not be symbols - for example, in x['foo'], + # 'foo' is a parent, but it's a literal, not a symbol. We don't check the + # liveness of literals. + support_set_symbols = tuple( + sss for sss in s.support_set if sss.is_symbol()) + if not all(sss in live_in for sss in support_set_symbols): + continue + composite_loop_vars.append(s) + return frozenset(composite_loop_vars) + + def _get_loop_vars(self, node, modified): + # [This is directly from python/autograph/converters/control_flow.py] + body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) + defined_in = anno.getanno(node, anno.Static.DEFINED_VARS_IN) + live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN) + live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT) + reserved_symbols = body_scope.referenced + + basic_loop_vars = self._get_basic_loop_vars(modified, live_in, live_out) + composite_loop_vars = self._get_composite_loop_vars(modified, live_in) + loop_vars = tuple(basic_loop_vars | composite_loop_vars) + + # Variable that are used or defined inside the loop, but not defined + # before entering the loop. Only simple variables must be defined. The + # composite ones will be implicitly checked at runtime. + undefined_lives = basic_loop_vars - defined_in + + return loop_vars, reserved_symbols, undefined_lives + + def visit_While(self, node): + + # Create a new WhileOp + # `inputs` are initial values for loop variables + body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) + loop_vars, _, _ = self._get_loop_vars(node, body_scope.modified) + inputs = [self.symbol_table.lookup(str(name)) for name in loop_vars] + types = [input_.getType() for input_ in inputs] + while_op = tfp.WhileOp.create(self.opbuilder, + self.opbuilder.getUnknownLoc(), inputs, types) + + # cache the current builder + cache_builder = self.opbuilder + + # Process cond + self.symbol_table.enter_scope() + for input_, type_ in zip(loop_vars, types): + self.symbol_table.insert_symbol( + str(input_), + while_op.getRegion(0).front().addArgument(type_)) + self.opbuilder = tfp.OpBuilder(while_op.getRegion(0)) + tfp.ReturnOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), + [self.visit(node.test)]) + self.symbol_table.exit_scope() + + # Process body + self.symbol_table.enter_scope() + for input_, type_ in zip(loop_vars, types): + self.symbol_table.insert_symbol( + str(input_), + while_op.getRegion(1).front().addArgument(type_)) + self.opbuilder = tfp.OpBuilder(while_op.getRegion(1)) + self.visit_block(node.body) + tfp.ReturnOp.create( + self.opbuilder, self.opbuilder.getUnknownLoc(), + [self.symbol_table.lookup(str(name)) for name in loop_vars]) + self.symbol_table.exit_scope() + + # Enter new values as symbols + for idx, var in enumerate(loop_vars): + self.symbol_table.insert_symbol(str(var), while_op.getResult(idx)) + + # Restore builder + self.opbuilder = cache_builder + + +def mlir_gen_internal(node, entity_info): + """Returns mlir module for unprocessed node `node`.""" + namer = naming.Namer({}) + graphs = cfg.build(node) + ctx = transformer.Context(entity_info, namer, None) + node = qual_names.resolve(node) + node = activity.resolve(node, ctx) + node = reaching_definitions.resolve(node, ctx, graphs) + node = reaching_fndefs.resolve(node, ctx, graphs) + node = liveness.resolve(node, ctx, graphs) + mlir_generator = MLIRGen(ctx) + mlir_generator.visit(node) + return mlir_generator.prog + + +def mlir_gen(func): + """Parse a function and return TFProgram.""" + node, source = parser.parse_entity(func, future_features=()) + entity_info = transformer.EntityInfo( + name=func.__name__, + source_code=source, + source_file=None, + future_features=(), + namespace=inspect_utils.getnamespace(func)) + return mlir_gen_internal(node, entity_info) + + +def mlir_gen_from_source(source=None, src_file=None): + """Parse a function as either a string or from a supplied file path and return a TFProgram. + """ + if source is None: + source = open(src_file).read() + node = ast.parse(source) + entity_info = transformer.EntityInfo( + name='mlir_module', + source_code=source, + source_file=None, + future_features=(), + namespace={}) + return mlir_gen_internal(node, entity_info) diff --git a/tensorflow/python/tf_program/pywrap_tfd.py b/tensorflow/python/tf_program/pywrap_tfd.py new file mode 100644 index 00000000000..0d9a236f5d3 --- /dev/null +++ b/tensorflow/python/tf_program/pywrap_tfd.py @@ -0,0 +1,159 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Intermediate between python bindings for MLIR and mlir generation for tensorflow program. + +This passes most of the mlir classes as is, but adds a few new operations and +the basic structure for a tensorflow program. +""" + +# pylint: disable=invalid-name + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.compiler.mlir.python.mlir_wrapper import mlir_wrapper as mlir + +# Class Definitions +OpBuilder = mlir.OpBuilder +Block = mlir.Block + +# Types +Type = mlir.Type +IntegerType = mlir.IntegerType +FloatType = mlir.FloatType +RankedTensorType = mlir.RankedTensorType +UnrankedTensorType = mlir.UnrankedTensorType +IntegerAttr = mlir.IntegerAttr + +# Standard Ops +ReturnOp = mlir.ReturnOp + +# TF Dialect Ops +Tf_AnyOp = mlir.Tf_AnyOp +Tf_AddV2Op = mlir.Tf_AddV2Op +Tf_ConstOp = mlir.Tf_ConstOp +Tf_EqualOp = mlir.Tf_EqualOp +Tf_GreaterEqualOp = mlir.Tf_GreaterEqualOp +Tf_GreaterOp = mlir.Tf_GreaterOp +Tf_LegacyCallOp = mlir.Tf_LegacyCallOp +Tf_LessEqualOp = mlir.Tf_LessEqualOp +Tf_LessOp = mlir.Tf_LessOp +Tf_NegOp = mlir.Tf_NegOp +Tf_NotEqualOp = mlir.Tf_NotEqualOp +Tf_SubOp = mlir.Tf_SubOp + + +class IfOp(object): + """ + tfp.if(cond) ({body}, {orelse}) : type If `cond` is true, `body` is + executed, otherwise `orelse` is executed. + """ + + @classmethod + def create(cls, opb, loc, cond, outputs): + state = mlir.OperationState(loc, "tfp.If") + state.addOperands([cond]) + state.addTypes(outputs) + state.addRegion().push_back(Block.new()) # body region + state.addRegion().push_back(Block.new()) # orelse region + return opb.createOperation(state) + + +class OrOp(object): + """ + tfp.Or(ops...) This is like tf.Any, except that the first dimension is opened + into `ops`. + + Returns a tensor of 1-bit integers which is "Logical OR" of the + coressponding elements in ops... + """ + + @classmethod + def create(cls, opb, loc, values): + state = mlir.OperationState(loc, "tfp.Or") + state.addTypes( + [UnrankedTensorType.get(IntegerType.get(1, opb.getContext()))]) + state.addOperands(values) + return opb.createOperation(state) + + +class AndOp(object): + """ + tfp.And(ops...) This is like tf.All, except that the first dimension is opened + to `ops`. + + Returns a tensor of 1-bit integers which is "Logical AND" of the + coressponding elements in ops... + """ + + @classmethod + def create(cls, opb, loc, values): + state = mlir.OperationState(loc, "tfp.And") + state.addTypes( + [UnrankedTensorType.get(IntegerType.get(1, opb.getContext()))]) + state.addOperands(values) + return opb.createOperation(state) + + +class WhileOp(object): + """tfp.While(init-vals, { + + ^bb1(cond-args): + cond-region + return cond + }, { + ^bb1(body-args): + body-region + }) + As long as `cond-region` returns a "true"-like value, the body-region + is executed and the arguments are replaced by its return values for the next + iteration. + """ + + @classmethod + def create(cls, opb, loc, inputs, outputs): + state = mlir.OperationState(loc, "tfp.While") + state.addOperands(inputs) + state.addTypes(outputs) + state.addRegion().push_back(Block.new()) # cond region + state.addRegion().push_back(Block.new()) # body region + return opb.createOperation(state) + + +class TFProgram(object): + """Python wrap for a Tensorflow Program (essentially an mlir Module).""" + + def __init__(self): + mlir.registerDialects() + self.ctx = mlir.MLIRContext() + self.builder = mlir.Builder(self.ctx) + self.module = mlir.ModuleOp.create(mlir.UnknownLoc.get(self.ctx)) + self.curr_func = None + + def add_function(self, name, func_type): + self.curr_func = mlir.FuncOp.create( + mlir.UnknownLoc.get(self.ctx), name, func_type) + self.module.push_back(self.curr_func) + return self.curr_func + + def get_function_type(self, inputs, outputs): + return self.builder.getFunctionType(inputs, outputs) + + def dump(self): + self.module.dump() + + def __str__(self): + return self.module.getAsStr() diff --git a/tensorflow/python/tf_program/tests/BUILD b/tensorflow/python/tf_program/tests/BUILD new file mode 100644 index 00000000000..1cf0fad6c93 --- /dev/null +++ b/tensorflow/python/tf_program/tests/BUILD @@ -0,0 +1,20 @@ +package(licenses = ["notice"]) + +py_test( + name = "mlir_gen_test", + size = "small", + testonly = True, + srcs = ["mlir_gen_test.py"], + python_version = "PY3", + srcs_version = "PY3", + tags = [ + "no_oss_py2", + "no_pip", + ], + deps = [ + "//tensorflow/compiler/mlir/python/mlir_wrapper:filecheck_wrapper", + "//tensorflow/python:client_testlib", + "//tensorflow/python/tf_program:mlir_gen", + "//tensorflow/python/types", + ], +) diff --git a/tensorflow/python/tf_program/tests/mlir_gen_test.py b/tensorflow/python/tf_program/tests/mlir_gen_test.py new file mode 100644 index 00000000000..5e1ca5b36e0 --- /dev/null +++ b/tensorflow/python/tf_program/tests/mlir_gen_test.py @@ -0,0 +1,247 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for `mlir_gen` module""" + +# pylint: disable=missing-function-docstring +# pylint: disable=invalid-name + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.platform import test +from tensorflow.python.types import core +from tensorflow.python.tf_program.mlir_gen import mlir_gen + +import tensorflow.compiler.mlir.python.mlir_wrapper.filecheck_wrapper as fw + + +class MLIRGenTestBase(test.TestCase): + + def _check_code(self, mlir_code, exp_mlir_code): + return self.assertTrue(fw.check(str(mlir_code), exp_mlir_code)) + + +class MLIRGenTest(MLIRGenTestBase): + """MLIR Generation Tests for Tensorflow Program""" + + def test_simple(self): + + def test_fn(): + pass + + mlir_code = mlir_gen(test_fn) + mlir_code_exp = r""" + CHECK-LABEL: @test_fn + """ + self._check_code(mlir_code, mlir_code_exp) + + def test_argument(self): + + def test_fn(x: core.Tensor) -> core.Tensor: + return x + + mlir_code = mlir_gen(test_fn) + mlir_code_exp = r""" + CHECK-LABEL: @test_fn(%arg0: tensor<*xi32>) -> tensor<*xi32> { + CHECK-NEXT: return %arg0 : tensor<*xi32> + """ + self._check_code(mlir_code, mlir_code_exp) + + def test_constant(self): + + def test_fn() -> int: + return 23 + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: func @test_fn() -> i32 + CHECK: %[[r0:[0-9]+]] = "tf.Const"() {value = dense<23> : tensor} : () -> tensor + CHECK: return %[[r0]] : tensor + """ + self._check_code(mlir_code, exp_mlir_code) + + def test_BoolOp(self): + + def test_fn(x: bool, y: bool) -> bool: + return x or y or x and x and y + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: func @test_fn(%arg0: i1, %arg1: i1) -> i1 + CHECK: %[[r0:[0-9]+]] = "tfp.And"(%arg0, %arg0, %arg1) : (i1, i1, i1) -> tensor<*xi1> + CHECK: %[[r1:[0-9]+]] = "tfp.Or"(%arg0, %arg1, %[[r0]]) : (i1, i1, tensor<*xi1>) -> tensor<*xi1> + return %[[r1]] : tensor<*xi1> + """ + self._check_code(mlir_code, exp_mlir_code) + + def test_Call(self): + + def test_fn(): + + def f1(): + return 23 + + def f2(): + return f1() + + f2() + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: func @test_fn() + CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @f2} : () -> () + CHECK: } + CHECK-LABEL: func @f1() { + CHECK: %[[r0:[0-9]+]] = "tf.Const"() {value = dense<23> : tensor} : () -> tensor + CHECK: return %[[r0]] : tensor + CHECK: } + CHECK-LABEL: func @f2() { + CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @f1} : () -> () + } + """ + self._check_code(mlir_code, exp_mlir_code) + + def test_Compare(self): + + def test_fn(x: core.Tensor, y: core.Tensor, z: core.Tensor): + return x > y < z + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: func @test_fn(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>, %arg2: tensor<*xi32>) + CHECK: %[[r0:[0-9]+]] = "tf.Greater"(%arg0, %arg1) : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi1> + CHECK: %[[r1:[0-9]+]] = "tf.Less"(%[[r0]], %arg2) : (tensor<*xi1>, tensor<*xi32>) -> tensor<*xi1> + CHECK: return %[[r1]] : tensor<*xi1> + """ + self._check_code(mlir_code, exp_mlir_code) + + def test_Assign_BinOp(self): + + def test_fn() -> int: + y = 12 + 23 - 24 + return y + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: func @test_fn() -> i32 + CHECK: %[[r0:[0-9]+]] = "tf.AddV2"(%{{[0-9]+}}, %{{[0-9]+}}) : (tensor, tensor) -> tensor + CHECK: %[[r1:[0-9]+]] = "tf.Sub"(%{{[0-9]+}}, %{{[0-9]+}}) : (tensor, tensor) -> tensor + CHECK: return %[[r1]] : tensor + """ + self._check_code(mlir_code, exp_mlir_code) + + def test_if(self): + + def test_fn(x: core.Tensor) -> int: + res = 0 + if x > 0: + res = 1 + elif x < 0: + res = -1 + else: + res = 0 + return res + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: func @test_fn(%arg0: tensor<*xi32>) -> i32 + + CHECK: %[[r1:[0-9]+]] = "tf.Greater"(%arg0, %{{[0-9]+}}) : (tensor<*xi32>, tensor) -> tensor<*xi1> + CHECK-NEXT: %[[r2:[0-9]+]] = "tfp.If"(%[[r1]]) ( { + CHECK: return %{{[0-9]+}} : tensor + CHECK-NEXT: }, { + CHECK: %[[r3:[0-9]+]] = "tf.Less"(%arg0, %{{[0-9]+}}) : (tensor<*xi32>, tensor) -> tensor<*xi1> + CHECK: %[[r4:[0-9]+]] = "tfp.If"(%[[r3]]) ( { + CHECK: %[[r5:[0-9]+]] = "tf.Neg"(%{{[0-9]+}}) : (tensor) -> tensor + CHECK: return %[[r5]] : tensor + CHECK-NEXT: }, { + CHECK: return %{{[0-9]+}} : tensor + CHECK-NEXT: }) : (tensor<*xi1>) -> tensor + CHECK: return %[[r4]] : tensor + CHECK-NEXT: }) : (tensor<*xi1>) -> tensor + CHECK-NEXT: return %[[r2]] : tensor + """ + self._check_code(mlir_code, exp_mlir_code) + + def test_while(self): + + def test_fn(x: core.Tensor) -> core.Tensor: + s = 0 + while x > 0: + s = s + x + return s + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: func @test_fn(%arg0: tensor<*xi32>) -> tensor<*xi32> + + CHECK: %[[r1:[0-9]+]] = "tfp.While"(%0) ( { + CHECK-NEXT: ^{{[^ ]+}}(%arg1: tensor): + CHECK: %[[r2:[0-9]+]] = "tf.Greater"(%arg0, %{{[0-9]+}}) : (tensor<*xi32>, tensor) -> tensor<*xi1> + CHECK-NEXT: return %[[r2]] : tensor<*xi1> + CHECK-NEXT: }, { + CHECK-NEXT: ^{{[^ ]+}}(%arg1: tensor): + CHECK: %[[r3:[0-9]+]] = "tf.AddV2"(%arg1, %arg0) : (tensor, tensor<*xi32>) -> tensor<*xi32> + CHECK-NEXT: return %[[r3]] : tensor<*xi32> + CHECK-NEXT: }) : (tensor) -> tensor + CHECK-NEXT: return %[[r1]] : tensor + """ + self._check_code(mlir_code, exp_mlir_code) + + def test_fibonacci(self): + + def test_fn(x: core.Tensor) -> core.Tensor: + res, idx = 0, 2 + a, b = 0, 1 + if x == 0 or x == 1: + res = x + else: + while idx <= x: + res = a + b + a = b + b = res + idx = idx + 1 + return res + + mlir_code = mlir_gen(test_fn) + exp_mlir_code = r""" + CHECK-LABEL: @test_fn(%arg0: tensor<*xi32>) -> tensor<*xi32> + CHECK: %[[r5:[0-9]+]] = "tf.Equal"(%arg0, %{{[0-9]+}}) {incompatible_shape_error = true} : (tensor<*xi32>, tensor) -> tensor<*xi1> + CHECK: %[[r7:[0-9]+]] = "tf.Equal"(%arg0, %{{[0-9]+}}) {incompatible_shape_error = true} : (tensor<*xi32>, tensor) -> tensor<*xi1> + CHECK: %[[r8:[0-9]+]] = "tfp.Or"(%[[r5]], %[[r7]]) : (tensor<*xi1>, tensor<*xi1>) -> tensor<*xi1> + + CHECK: %[[r9:[0-9]+]]:4 = "tfp.If"(%[[r8]]) ( { + CHECK-NEXT: return %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32> + CHECK-NEXT: }, { + CHECK-NEXT: %[[r10:[0-9]+]]:4 = "tfp.While"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { + CHECK-NEXT: ^{{[^ ]*}}(%arg1: tensor, %arg2: tensor, %arg3: tensor, %arg4: tensor): + CHECK-NEXT: %[[r11:[0-9]+]] = "tf.LessEqual"(%arg{{[0-9]+}}, %arg{{[0-9]+}}) : (tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32>) -> tensor<*xi1> + CHECK-NEXT: return %[[r11]] : tensor<*xi1> + CHECK-NEXT: }, { + CHECK-NEXT: ^{{[^ ]*}}(%arg1: tensor, %arg2: tensor, %arg3: tensor, %arg4: tensor): + CHECK-NEXT: %[[r12:[0-9]+]] = "tf.AddV2"(%arg{{[0-9]+}}, %arg{{[0-9]+}}) : (tensor, tensor) -> tensor + CHECK: %[[r13:[0-9]+]] = "tf.AddV2"(%arg{{[0-9]+}}, %{{[0-9]+}}) : (tensor, tensor) -> tensor + CHECK-NEXT: return %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : tensor, tensor, tensor, tensor + CHECK-NEXT: }) : (tensor, tensor, tensor, tensor) -> (tensor, tensor, tensor, tensor) + CHECK-NEXT: return %[[r10]]#{{[0-9]+}}, %[[r10]]#{{[0-9]+}}, %[[r10]]#{{[0-9]+}}, %[[r10]]#{{[0-9]+}} : tensor, tensor, tensor, tensor + CHECK-NEXT: }) : (tensor<*xi1>) -> (tensor, tensor, tensor, tensor) + CHECK-NEXT: return %[[r9]]#{{[0-9]+}} : tensor + """ + self._check_code(mlir_code, exp_mlir_code) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index ec54efa61cf..836cafbd494 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -488,6 +488,18 @@ PYBIND11_MODULE(_pywrap_tfe, m) { // NOTE: different from TFE_ContextSyncExecutors that raises potential // errors, deliberately ignore executor statuses in cleanup. }); + m.def("TFE_ContextSetSoftDevicePlacement", [](py::handle& ctx, bool enable) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + TFE_ContextSetSoftDevicePlacement(tensorflow::InputTFE_Context(ctx), enable, + status.get()); + }); + m.def("TFE_ContextSetLogDevicePlacement", [](py::handle& ctx, bool enable) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + TFE_ContextSetSoftDevicePlacement(tensorflow::InputTFE_Context(ctx), enable, + status.get()); + }); // TFE_Executor logic m.def( diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py index c70a26f2b4d..28eba69b7da 100644 --- a/tensorflow/python/tpu/tpu.py +++ b/tensorflow/python/tpu/tpu.py @@ -1353,7 +1353,7 @@ def split_compile_and_replicate(computation, def custom_getter(getter, name, *args, **kwargs): """Variables on TPU have a few restrictions.""" - partitioner = kwargs["partitioner"] + partitioner = kwargs.get("partitioner", None) if partitioner is not None: kwargs["partitioner"] = None logging.warning( diff --git a/tensorflow/python/tpu/tpu_embedding.py b/tensorflow/python/tpu/tpu_embedding.py index fa07a929acc..d1848f34502 100644 --- a/tensorflow/python/tpu/tpu_embedding.py +++ b/tensorflow/python/tpu/tpu_embedding.py @@ -828,7 +828,7 @@ class TPUEmbedding(object): ... end_learning_rate=0.0) >>> wordpiece_table_config = TableConfig( ... vocabulary_size=119547, - ... dimension=768, + ... dimension=256, ... learning_rate_fn=learning_rate_fn) >>> wordpiece_feature_config = FeatureConfig( ... table_id='bert/embeddings/word_embeddings', @@ -846,11 +846,11 @@ class TPUEmbedding(object): ... batch_size=128, ... mode=TRAINING, ... optimization_parameters=optimization_parameters, - ... device_config=DeviceConfig( - ... num_cores=64, num_hosts=4, job_name='tpu_worker')) + ... master='') >>> with tf.Graph().as_default(): ... init_tpu_op = tf.compat.v1.tpu.initialize_system( - ... embedding_config=tpu_embedding.config_proto, job='tpu_worker') + ... embedding_config=tpu_embedding.config_proto) + ... tf.compat.v1.Session().run(init_tpu_op) """ # TODO(shizhiw): Consider adding a field to FeatureConfig that indicates that diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index 615ac587c21..93bacbdc0bb 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -92,11 +92,14 @@ class AdamOptimizer(optimizer.Optimizer): Section 2.1), not the epsilon in Algorithm 1 of the paper. use_locking: If True use locks for update operations. name: Optional name for the operations created when applying gradients. - Defaults to "Adam". @compatibility(eager) When eager execution is - enabled, `learning_rate`, `beta1`, `beta2`, and `epsilon` can each be a - callable that takes no arguments and returns the actual value to use. - This can be useful for changing these values across different - invocations of optimizer functions. @end_compatibility + Defaults to "Adam". + + @compatibility(eager) + When eager execution is enabled, `learning_rate`, `beta1`, `beta2`, and + `epsilon` can each be a callable that takes no arguments and returns the + actual value to use. This can be useful for changing these values across + different invocations of optimizer functions. + @end_compatibility """ super(AdamOptimizer, self).__init__(use_locking, name) self._lr = learning_rate diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index d77278e98f4..ab63f4237da 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -1189,7 +1189,7 @@ class _WrappedSession(object): try: self._sess.close() except _PREEMPTION_ERRORS as e: - logging.warning( + logging.error( 'An error occurred when attempting to close the ' 'session. This may be due to a preemption in a ' 'connected worker or parameter server. Error: %s', e) diff --git a/tensorflow/python/types/BUILD b/tensorflow/python/types/BUILD index f35ca7fb803..e93bf5c10b3 100644 --- a/tensorflow/python/types/BUILD +++ b/tensorflow/python/types/BUILD @@ -27,6 +27,9 @@ py_strict_library( "internal.py", ], srcs_version = "PY2AND3", - visibility = ["//tensorflow:__subpackages__"], + visibility = [ + "//tensorflow:__subpackages__", + "//tensorflow:types_whitelist", + ], deps = [], ) diff --git a/tensorflow/stream_executor/gpu/BUILD b/tensorflow/stream_executor/gpu/BUILD index 5cb1642083e..9744fc82593 100644 --- a/tensorflow/stream_executor/gpu/BUILD +++ b/tensorflow/stream_executor/gpu/BUILD @@ -222,11 +222,11 @@ cc_library( hdrs = if_gpu_is_configured(["asm_compiler.h"]), copts = tf_copts(), visibility = [ + "//tensorflow/compiler/mlir/tools/kernel_gen:__subpackages__", "//tensorflow/compiler/xla/service/gpu:__subpackages__", "//tensorflow/compiler/xla/service/mlir_gpu:__subpackages__", "//tensorflow/core/kernels:__subpackages__", "//tensorflow/stream_executor:__subpackages__", - "//third_party/tf_runtime/tools/tf_kernel_gen:__subpackages__", ], deps = if_gpu_is_configured([ ":gpu_asm_opts", diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index ed780092ce1..b6066200553 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -59,7 +59,7 @@ load( # not contain rc or alpha, only numbers. # Also update tensorflow/core/public/version.h # and tensorflow/tools/pip_package/setup.py -VERSION = "2.1.0" +VERSION = "2.2.0" VERSION_MAJOR = VERSION.split(".")[0] # Sanitize a dependency so that it works correctly from code that includes @@ -194,10 +194,10 @@ def if_macos(a, otherwise = []): "//conditions:default": otherwise, }) -def if_ios(a): +def if_ios(a, otherwise = []): return select({ clean_dep("//tensorflow:ios"): a, - "//conditions:default": [], + "//conditions:default": otherwise, }) def if_ios_x86_64(a): diff --git a/tensorflow/tools/android/inference_interface/BUILD b/tensorflow/tools/android/inference_interface/BUILD index cbd161f05b3..fb3ab00f9bc 100644 --- a/tensorflow/tools/android/inference_interface/BUILD +++ b/tensorflow/tools/android/inference_interface/BUILD @@ -34,7 +34,7 @@ cc_library( copts = tf_copts(), visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:android_tensorflow_lib_lite", + "//tensorflow/core:portable_tensorflow_lib_lite", "//tensorflow/java/src/main/native", ], alwayslink = 1, @@ -83,7 +83,7 @@ cc_binary( ], deps = [ ":android_tensorflow_inference_jni", - "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:portable_tensorflow_lib", LINKER_SCRIPT, ], ) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt index 4a30fae1da9..9315973e51d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.Tensor" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "OVERLOADABLE_OPERATORS" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt index 272396239d7..d696021fcb4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.keras.Model" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -175,7 +174,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt index 8979491971f..b8486a27b9e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.Sequential" tf_class { is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt index 448ea60cc0f..7bf71844fa6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.LinearModel" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -176,7 +175,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt index 8e1d9927434..87a7319639b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.WideDeepModel" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -176,7 +175,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt index ecda1603325..ba9156d7f95 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.layers.DenseFeatures" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt index 13c3416fc0c..00c9fc22def 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.keras.models.Model" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -175,7 +174,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt index 9218cbea99e..d3cca7311ee 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.models.Sequential" tf_class { is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt index 4a30fae1da9..9315973e51d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.Tensor" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "OVERLOADABLE_OPERATORS" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt index 272396239d7..d696021fcb4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.keras.Model" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -175,7 +174,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt index 8979491971f..b8486a27b9e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.Sequential" tf_class { is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt index 448ea60cc0f..7bf71844fa6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.LinearModel" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -176,7 +175,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt index 8e1d9927434..87a7319639b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.WideDeepModel" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -176,7 +175,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt index f7137f0d09b..130a9954202 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.DenseFeatures" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt index 13c3416fc0c..00c9fc22def 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.keras.models.Model" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -175,7 +174,7 @@ tf_class { } member_method { name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "compute_output_shape" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt index 9218cbea99e..d3cca7311ee 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.models.Sequential" tf_class { is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt index 0b49aa9f3d4..e59c78cc496 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt @@ -68,4 +68,8 @@ tf_module { name: "save_img" argspec: "args=[\'path\', \'x\', \'data_format\', \'file_format\', \'scale\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'True\'], " } + member_method { + name: "smart_resize" + argspec: "args=[\'x\', \'size\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'bilinear\'], " + } } diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 index df4b847b6f7..91d501109d0 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 @@ -75,6 +75,13 @@ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py RUN python3.8 get-pip.py RUN python3.8 -m pip install --upgrade pip setuptools wheel +# Overwrite include paths that are generated for the multipython image. +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m" +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m" + +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8" +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8" + # Make apt work with python 3.6. RUN cp /usr/lib/python3/dist-packages/apt_pkg.cpython-35m-x86_64-linux-gnu.so \ /usr/lib/python3/dist-packages/apt_pkg.so diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython index 54bb4b3773f..9c85091563e 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython @@ -17,7 +17,6 @@ RUN apt-get update && apt-get install -y \ flex \ g++ \ make \ - patchelf \ rpm2cpio \ unar \ wget \ @@ -56,17 +55,17 @@ RUN /install/install_bootstrap_deb_packages.sh COPY install/install_deb_packages.sh /install/ RUN /install/install_deb_packages.sh -# Install patchelf to facilitate the creation of manylinux2010 whls. -COPY install/install_patchelf.sh /install/ -RUN /install/install_patchelf.sh - -# Install additional dependencies to build Python from source. +# Install additional packages needed for this image: +# - dependencies to build Python from source +# - patchelf, as it is required by auditwheel RUN apt-get update && apt-get install -y \ - libncurses5-dev \ + libbz2-dev \ + libffi-dev \ libgdbm-dev \ + libncurses5-dev \ libnss3-dev \ libreadline-dev \ - libffi-dev \ + patchelf \ && \ rm -rf /var/lib/apt/lists/* @@ -87,9 +86,6 @@ RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.5" RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.6" RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -# Install auditwheel to create manylinux2010 compliant binaries -RUN pip3 install auditwheel - ENV CLANG_VERSION="r42cab985fd95ba4f3f290e7bb26b93805edb447d" COPY install/install_latest_clang.sh /install/ RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010 b/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010 index 516129ccd43..a14b9ac2a3e 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010 +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010 @@ -73,13 +73,12 @@ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py RUN python3.8 get-pip.py RUN python3.8 -m pip install --upgrade pip setuptools wheel -# TODO(klimek): Figure out a better way to get the right include paths -# forwarded when we install new packages. -RUN ln -s "/usr/include/x86_64-linux-gnu/python2.7" "/dt7/usr/include/x86_64-linux-gnu/python2.7" -RUN ln -s "/usr/include/x86_64-linux-gnu/python2.7" "/dt8/usr/include/x86_64-linux-gnu/python2.7" +# Overwrite include paths that are generated for the multipython image. +RUN ln -sf "/usr/include/x86_64-linux-gnu/python2.7" "/dt7/usr/include/x86_64-linux-gnu/python2.7" +RUN ln -sf "/usr/include/x86_64-linux-gnu/python2.7" "/dt8/usr/include/x86_64-linux-gnu/python2.7" -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m" -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m" +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m" +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m" -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8" -RUN ln -s "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8" +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8" +RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8" \ No newline at end of file diff --git a/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh index d9953db3b5a..81e5f2b6406 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh @@ -26,6 +26,7 @@ if [[ ! -x "$(which "${PIP}")" ]]; then fi PACKAGES=( + "auditwheel" "wheel" "setuptools" "virtualenv" diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh index a6ef52b8bea..bb40042e3af 100644 --- a/tensorflow/tools/ci_build/release/common.sh +++ b/tensorflow/tools/ci_build/release/common.sh @@ -146,6 +146,7 @@ function install_pip_deps { ${PIP_CMD} install --user --upgrade attrs ${PIP_CMD} install --user --upgrade tf-estimator-nightly ${PIP_CMD} install --user --upgrade "future>=0.17.1" + ${PIP_CMD} install --user --upgrade wrapt # LINT.ThenChange(:ubuntu_16_pip_installations) } @@ -178,6 +179,7 @@ function install_ubuntu_16_pip_deps { "${PIP_CMD}" install PyYAML==3.13 --user "${PIP_CMD}" install --user --upgrade tf-estimator-nightly "${PIP_CMD}" install --user --upgrade tb-nightly + "${PIP_CMD}" install --user --upgrade wrapt # LINT.ThenChange(:ubuntu_pip_installations) } @@ -219,6 +221,7 @@ function install_macos_pip_deps { ${SUDO_CMD} ${PIP_CMD} install --upgrade tb-nightly ${PIP_CMD} install --user --upgrade attrs ${PIP_CMD} install --user --upgrade tf-estimator-nightly + ${PIP_CMD} install --user --upgrade wrapt ${PIP_CMD} install --user --upgrade "future>=0.17.1" } diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh new file mode 100644 index 00000000000..abb85c18711 --- /dev/null +++ b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e + +# Source the external common scripts. +source tensorflow/tools/ci_build/release/common.sh + + +# Install latest bazel +install_bazelisk +which bazel + +# Install realpath +sudo apt-get install realpath + +./tensorflow/tools/ci_build/linux/libtensorflow.sh diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh new file mode 100644 index 00000000000..c399ed2680f --- /dev/null +++ b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh @@ -0,0 +1,30 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e + +# Source the external common scripts. +source tensorflow/tools/ci_build/release/common.sh + + +# Install latest bazel +install_bazelisk +which bazel + +# Install realpath +sudo apt-get install realpath + +export TF_NEED_CUDA=1 + +./tensorflow/tools/ci_build/linux/libtensorflow.sh diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index c092f21addb..c0442a5986d 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -2,6 +2,7 @@ # Doc generator load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow/python/tpu:tpu.bzl", "tpu_py_test") package( default_visibility = ["//tensorflow:__subpackages__"], @@ -22,6 +23,7 @@ py_library( py_test( name = "tf_doctest", srcs = ["tf_doctest.py"], + args = ["--module_prefix_skip=tpu.,distribute.tpu_strategy"], python_version = "PY3", tags = [ "no_oss_py2", @@ -40,6 +42,28 @@ py_test( ], ) +tpu_py_test( + name = "tf_doctest_tpu", + srcs = ["tf_doctest.py"], + args = ["--module=tpu.,distribute.tpu_strategy"], + disable_experimental = True, + disable_v3 = True, + main = "tf_doctest.py", + python_version = "PY3", + tags = [ + "no_oss", + "noasan", + "nomsan", + "notsan", + ], + deps = [ + ":tf_doctest_lib", + "//tensorflow:tensorflow_py", + "//tensorflow/python/keras/preprocessing", + "//third_party/py/numpy", + ], +) + py_test( name = "tf_doctest_test", srcs = ["tf_doctest_test.py"], diff --git a/tensorflow/tools/docs/tf_doctest.py b/tensorflow/tools/docs/tf_doctest.py index 19624659e37..fc81d33cfde 100644 --- a/tensorflow/tools/docs/tf_doctest.py +++ b/tensorflow/tools/docs/tf_doctest.py @@ -42,7 +42,9 @@ tf.keras.preprocessing = preprocessing FLAGS = flags.FLAGS -flags.DEFINE_string('module', None, 'A specific module to run doctest on.') +flags.DEFINE_list('module', [], 'A list of specific module to run doctest on.') +flags.DEFINE_list('module_prefix_skip', [], + 'A list of modules to ignore when resolving modules.') flags.DEFINE_boolean('list', None, 'List all the modules in the core package imported.') flags.DEFINE_string('file', None, 'A specific file to run doctest on.') @@ -50,6 +52,7 @@ flags.DEFINE_string('file', None, 'A specific file to run doctest on.') flags.mark_flags_as_mutual_exclusive(['module', 'file']) flags.mark_flags_as_mutual_exclusive(['list', 'file']) +# Both --module and --module_prefix_skip are relative to PACKAGE. PACKAGE = 'tensorflow.python.' @@ -68,23 +71,24 @@ def find_modules(): return tf_modules -def filter_on_submodules(all_modules, submodule): - """Filters all the modules based on the module flag. +def filter_on_submodules(all_modules, submodules): + """Filters all the modules based on the modules flag. The module flag has to be relative to the core package imported. - For example, if `submodule=keras.layers` then, this function will return + For example, if `module=keras.layers` then, this function will return all the modules in the submodule. Args: all_modules: All the modules in the core package. - submodule: Submodule to filter from all the modules. + submodules: Submodules to filter from all the modules. Returns: All the modules in the submodule. """ filtered_modules = [ - mod for mod in all_modules if PACKAGE + submodule in mod.__name__ + mod for mod in all_modules + if any(PACKAGE + submodule in mod.__name__ for submodule in submodules) ] return filtered_modules @@ -140,6 +144,9 @@ def load_tests(unused_loader, tests, unused_ignore): tf_modules = get_module_and_inject_docstring(FLAGS.file) for module in tf_modules: + if any(module.__name__.startswith(PACKAGE + prefix) + for prefix in FLAGS.module_prefix_skip): + continue testcase = TfTestCase() tests.addTests( doctest.DocTestSuite( diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 622480102a9..f61e00c01d5 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -50,7 +50,7 @@ DOCLINES = __doc__.split('\n') # result for pip. # Also update tensorflow/tensorflow.bzl and # tensorflow/core/public/version.h -_VERSION = '2.1.0' +_VERSION = '2.2.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.7.0', @@ -65,7 +65,7 @@ REQUIRED_PACKAGES = [ 'opt_einsum >= 2.3.2', 'protobuf >= 3.9.2', 'tensorboard >= 2.2.0, < 2.3.0', - 'tensorflow_estimator >= 2.1.0, < 2.2.0', + 'tensorflow_estimator >= 2.2.0, < 2.3.0', 'termcolor >= 1.1.0', 'wrapt >= 1.11.1', # python3 requires wheel 0.26 diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ab895dd6a99..24f9b962d79 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -162,8 +162,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): print("path_prefix was specified to tf_workspace but is no longer used " + "and will be removed in the future.") - TFRT_COMMIT = "0bad623e8d99ace05f7f60e9e7f8b53ec813d66a" - TFRT_SHA256 = "d002429866d2d824a80dcf6c1602a15398412bc01324200d371c55b13b9a4b27" + TFRT_COMMIT = "26fb26d716545388edb9785f8f4b3e60a4ad5092" + TFRT_SHA256 = "f7419a3eaab8b7137a4de5b428045a731d93da91ef1bce9ba91fab81ed23a676" TFRT_URLS = [ "http://mirror.tensorflow.org/github.com/tensorflow/runtime/archive/{commit}.zip".format(commit = TFRT_COMMIT), "https://github.com/tensorflow/runtime/archive/{commit}.zip".format(commit = TFRT_COMMIT), @@ -261,11 +261,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): name = "eigen_archive", build_file = clean_dep("//third_party:eigen.BUILD"), patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"), - sha256 = "d96aa8eda6dbf80e313c992a59e9e9451f420a6b9f58ef30aa41bffdc9df2f1b", # SHARED_EIGEN_SHA - strip_prefix = "eigen-1e41406c362788057b3adcd9a25b73f43e6e6492", + sha256 = "2c7c0aec4271dfca6b8a7707e2112f67c4cb3bdf7c89c0e98d3fcd39707c4468", # SHARED_EIGEN_SHA + strip_prefix = "eigen-49f1aeb60d9f759859fce0d16aa5d1ecc7168d51", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/1e41406c362788057b3adcd9a25b73f43e6e6492/eigen-1e41406c362788057b3adcd9a25b73f43e6e6492.tar.gz", - "https://gitlab.com/libeigen/eigen/-/archive/1e41406c362788057b3adcd9a25b73f43e6e6492/eigen-1e41406c362788057b3adcd9a25b73f43e6e6492.tar.gz", + "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/49f1aeb60d9f759859fce0d16aa5d1ecc7168d51/eigen-49f1aeb60d9f759859fce0d16aa5d1ecc7168d51.tar.gz", + "https://gitlab.com/libeigen/eigen/-/archive/49f1aeb60d9f759859fce0d16aa5d1ecc7168d51/eigen-49f1aeb60d9f759859fce0d16aa5d1ecc7168d51.tar.gz", ], ) @@ -690,8 +690,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "307cfdf5338641e3a895857ef02dc9da35cd0eb6" - LLVM_SHA256 = "5e75125ecadee4f91e07c20bf6612d740913a677348fd33c7264ee8fe7d12b17" + LLVM_COMMIT = "1c44430e738ba83eefe6d56a245ee30649d8988d" + LLVM_SHA256 = "81ad47eaf74dfaea1befbe7b41facfd9bcee5ca3d5635325584dbabf4bf1fa5e" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl index f3b2ae6846d..303339e77f7 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl @@ -53,13 +53,6 @@ NVCC_PATH = '%{nvcc_path}' PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) NVCC_VERSION = '%{cuda_version}' - -# TODO(amitpatankar): Benchmark enabling all capabilities by default. -# Environment variable for supported TF CUDA Compute Capabilities -# eg. export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 -CUDA_COMPUTE_ENV_VAR = 'TF_CUDA_COMPUTE_CAPABILITIES' -DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,6.0' - def Log(s): print('gpus/crosstool: {0}'.format(s)) @@ -78,7 +71,8 @@ def GetOptionValue(argv, option): """ parser = ArgumentParser() - parser.add_argument('-' + option, nargs='*', action='append') + parser.add_argument(option, nargs='*', action='append') + option = option.lstrip('-').replace('-', '_') args, _ = parser.parse_known_args(argv) if not args or not vars(args)[option]: return [] @@ -180,17 +174,17 @@ def InvokeNvcc(argv, log=False): host_compiler_options = GetHostCompilerOptions(argv) nvcc_compiler_options = GetNvccOptions(argv) - opt_option = GetOptionValue(argv, 'O') - m_options = GetOptionValue(argv, 'm') + opt_option = GetOptionValue(argv, '-O') + m_options = GetOptionValue(argv, '-m') m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) - include_options = GetOptionValue(argv, 'I') - out_file = GetOptionValue(argv, 'o') - depfiles = GetOptionValue(argv, 'MF') - defines = GetOptionValue(argv, 'D') + include_options = GetOptionValue(argv, '-I') + out_file = GetOptionValue(argv, '-o') + depfiles = GetOptionValue(argv, '-MF') + defines = GetOptionValue(argv, '-D') defines = ''.join([' -D' + define for define in defines]) - undefines = GetOptionValue(argv, 'U') + undefines = GetOptionValue(argv, '-U') undefines = ''.join([' -U' + define for define in undefines]) - std_options = GetOptionValue(argv, 'std') + std_options = GetOptionValue(argv, '-std') # Supported -std flags as of CUDA 9.0. Only keep last to mimic gcc/clang. nvcc_allowed_std_options = ["c++03", "c++11", "c++14"] std_options = ''.join([' -std=' + define @@ -198,7 +192,7 @@ def InvokeNvcc(argv, log=False): # The list of source files get passed after the -c option. I don't know of # any other reliable way to just get the list of source files to be compiled. - src_files = GetOptionValue(argv, 'c') + src_files = GetOptionValue(argv, '-c') # Pass -w through from host to nvcc, but don't do anything fancier with # warnings-related flags, since they're not necessarily the same across @@ -224,13 +218,12 @@ def InvokeNvcc(argv, log=False): srcs = ' '.join(src_files) out = ' -o ' + out_file[0] - supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ] nvccopts = '-D_FORCE_INLINES ' - for capability in supported_cuda_compute_capabilities: - capability = capability.replace('.', '') + for capability in GetOptionValue(argv, "--cuda-gpu-arch"): + capability = capability[len('sm_'):] nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % ( capability, capability, capability) - nvccopts += ' ' + nvcc_compiler_options + nvccopts += nvcc_compiler_options nvccopts += undefines nvccopts += defines nvccopts += std_options @@ -272,6 +265,7 @@ def main(): if args.x and args.x[0] == 'cuda': if args.cuda_log: Log('-x cuda') leftover = [pipes.quote(s) for s in leftover] + args.cuda_log = True if args.cuda_log: Log('using nvcc') return InvokeNvcc(leftover, log=args.cuda_log) diff --git a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl index 46e8aef3606..c10fb826494 100644 --- a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl +++ b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl @@ -37,13 +37,6 @@ GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}') NVCC_PATH = '%{nvcc_path}' NVCC_VERSION = '%{cuda_version}' NVCC_TEMP_DIR = "%{nvcc_tmp_dir}" -DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,6.0' - -# Taken from environment variable for supported TF CUDA Compute Capabilities -# eg. export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 -supported_cuda_compute_capabilities = os.environ.get( - 'TF_CUDA_COMPUTE_CAPABILITIES', - DEFAULT_CUDA_COMPUTE_CAPABILITIES).split(',') def Log(s): print('gpus/crosstool: {0}'.format(s)) @@ -53,7 +46,7 @@ def GetOptionValue(argv, option): """Extract the list of values for option from options. Args: - option: The option whose value to extract, without the leading '/'. + option: The option whose value to extract. Returns: 1. A list of values, either directly following the option, @@ -62,10 +55,11 @@ def GetOptionValue(argv, option): 2. The leftover options. """ - parser = ArgumentParser(prefix_chars='/') - parser.add_argument('/' + option, nargs='*', action='append') + parser = ArgumentParser(prefix_chars='-/') + parser.add_argument(option, nargs='*', action='append') + option = option.lstrip('-/').replace('-', '_') args, leftover = parser.parse_known_args(argv) - if args and vars(args)[option]: + if args and vars(args).get(option): return (sum(vars(args)[option], []), leftover) return ([], leftover) @@ -122,18 +116,18 @@ def InvokeNvcc(argv, log=False): nvcc_compiler_options, argv = GetNvccOptions(argv) - opt_option, argv = GetOptionValue(argv, 'O') + opt_option, argv = GetOptionValue(argv, '/O') opt = ['-g'] if (len(opt_option) > 0 and opt_option[0] != 'd'): opt = ['-O2'] - include_options, argv = GetOptionValue(argv, 'I') + include_options, argv = GetOptionValue(argv, '/I') includes = ["-I " + include for include in include_options] - defines, argv = GetOptionValue(argv, 'D') + defines, argv = GetOptionValue(argv, '/D') defines = ['-D' + define for define in defines] - undefines, argv = GetOptionValue(argv, 'U') + undefines, argv = GetOptionValue(argv, '/U') undefines = ['-U' + define for define in undefines] # The rest of the unrecognized options should be passed to host compiler @@ -142,10 +136,10 @@ def InvokeNvcc(argv, log=False): m_options = ["-m64"] nvccopts = ['-D_FORCE_INLINES'] - for capability in supported_cuda_compute_capabilities: - capability = capability.replace('.', '') - nvccopts += [r'-gencode=arch=compute_%s,"code=sm_%s,compute_%s"' % ( - capability, capability, capability)] + for capability in GetOptionValue(argv, "--cuda-gpu-arch"): + capability = capability[len('sm_'):] + nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % ( + capability, capability, capability) nvccopts += nvcc_compiler_options nvccopts += undefines nvccopts += defines diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 545aeebe97a..c587f117deb 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -840,10 +840,7 @@ def _compute_cuda_extra_copts(repository_ctx, compute_capabilities): "--cuda-gpu-arch=sm_" + cap.replace(".", "") for cap in compute_capabilities ] - - # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc - # TODO(csigg): Make this consistent with cuda clang and pass unconditionally. - return "if_cuda_clang(%s)" % str(capability_flags) + return str(capability_flags) def _tpl_path(repository_ctx, filename): return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename)) @@ -1092,9 +1089,6 @@ def _create_local_cuda_repository(repository_ctx): "%{cuda_version}": cuda_config.cuda_version, "%{nvcc_path}": nvcc_path, "%{gcc_host_compiler_path}": str(cc), - "%{cuda_compute_capabilities}": ", ".join( - ["\"%s\"" % c for c in cuda_config.compute_capabilities], - ), "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx), } repository_ctx.template( diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 8074eb5e290..ce5468fe679 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -178,7 +178,7 @@ filegroup( "include/mlir/Dialect/Affine/IR/AffineOps.td", "include/mlir/Dialect/Affine/IR/AffineOpsBase.td", "include/mlir/Interfaces/LoopLikeInterface.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ], ) @@ -217,7 +217,7 @@ filegroup( "include/mlir/Dialect/AVX512/AVX512.td", "include/mlir/Dialect/LLVMIR/LLVMOpBase.td", "include/mlir/IR/OpBase.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ], ) @@ -297,37 +297,37 @@ cc_library( ) filegroup( - name = "LoopOpsTdFiles", + name = "SCFTdFiles", srcs = [ - "include/mlir/Dialect/LoopOps/LoopOps.td", + "include/mlir/Dialect/SCF/SCFOps.td", "include/mlir/Interfaces/ControlFlowInterfaces.td", "include/mlir/Interfaces/LoopLikeInterface.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ], ) gentbl( - name = "LoopOpsIncGen", + name = "SCFIncGen", strip_include_prefix = "include", tbl_outs = [ ( "-gen-op-decls", - "include/mlir/Dialect/LoopOps/LoopOps.h.inc", + "include/mlir/Dialect/SCF/SCFOps.h.inc", ), ( "-gen-op-defs", - "include/mlir/Dialect/LoopOps/LoopOps.cpp.inc", + "include/mlir/Dialect/SCF/SCFOps.cpp.inc", ), ( "-gen-dialect-decls", - "include/mlir/Dialect/LoopOps/LoopOpsDialect.h.inc", + "include/mlir/Dialect/SCF/SCFOpsDialect.h.inc", ), ], tblgen = ":mlir-tblgen", - td_file = "include/mlir/Dialect/LoopOps/LoopOps.td", + td_file = "include/mlir/Dialect/SCF/SCFOps.td", td_srcs = [ - ":LoopOpsTdFiles", + ":SCFTdFiles", ], ) @@ -337,30 +337,30 @@ gentbl( tbl_outs = [ ( "-gen-pass-decls", - "include/mlir/Dialect/LoopOps/Passes.h.inc", + "include/mlir/Dialect/SCF/Passes.h.inc", ), ], tblgen = ":mlir-tblgen", - td_file = "include/mlir/Dialect/LoopOps/Passes.td", + td_file = "include/mlir/Dialect/SCF/Passes.td", td_srcs = [ ":PassBaseTdFiles", ], ) cc_library( - name = "LoopOpsTransforms", + name = "SCFTransforms", srcs = glob([ - "lib/Dialect/LoopOps/Transforms/*.cpp", - "lib/Dialect/LoopOps/Transforms/*.h", + "lib/Dialect/SCF/Transforms/*.cpp", + "lib/Dialect/SCF/Transforms/*.h", ]), - hdrs = ["include/mlir/Dialect/LoopOps/Passes.h"], + hdrs = ["include/mlir/Dialect/SCF/Passes.h"], includes = ["include"], deps = [ ":Affine", ":IR", - ":LoopOps", ":LoopPassIncGen", ":Pass", + ":SCFDialect", ":StandardOps", ":Transforms", "@llvm-project//llvm:support", @@ -374,7 +374,7 @@ filegroup( "include/mlir/IR/OpAsmInterface.td", "include/mlir/Interfaces/CallInterfaces.td", "include/mlir/Interfaces/ControlFlowInterfaces.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", "include/mlir/Interfaces/ViewLikeInterface.td", ":OpBaseTdFiles", ], @@ -521,8 +521,8 @@ cc_library( ":AffinePassIncGen", ":Analysis", ":IR", - ":LoopOps", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":Transforms", @@ -559,8 +559,8 @@ cc_library( ":Affine", ":ConversionPassIncGen", ":IR", - ":LoopOps", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":Transforms", @@ -588,17 +588,17 @@ cc_library( ) cc_library( - name = "LoopOps", + name = "SCFDialect", srcs = glob( [ - "lib/Dialect/LoopOps/*.cpp", - "lib/Dialect/LoopOps/*.h", - "lib/Dialect/LoopOps/EDSC/*.cpp", + "lib/Dialect/SCF/*.cpp", + "lib/Dialect/SCF/*.h", + "lib/Dialect/SCF/EDSC/*.cpp", ], ), hdrs = glob([ - "include/mlir/Dialect/LoopOps/*.h", - "include/mlir/Dialect/LoopOps/EDSC/*.h", + "include/mlir/Dialect/SCF/*.h", + "include/mlir/Dialect/SCF/EDSC/*.h", ]), includes = ["include"], deps = [ @@ -606,7 +606,7 @@ cc_library( ":EDSC", ":IR", ":LoopLikeInterface", - ":LoopOpsIncGen", + ":SCFIncGen", ":SideEffects", ":StandardOps", ":Support", @@ -657,6 +657,7 @@ gentbl( td_file = "include/mlir/Dialect/Shape/IR/ShapeOps.td", td_srcs = [ ":StdOpsTdFiles", + "include/mlir/Dialect/Shape/IR/ShapeBase.td", "include/mlir/Interfaces/InferTypeOpInterface.td", ], ) @@ -715,24 +716,35 @@ cc_library( ], ) +gentbl( + name = "StandardOpsTransformsPassIncGen", + strip_include_prefix = "include", + tbl_outs = [( + "-gen-pass-decls", + "include/mlir/Dialect/StandardOps/Transforms/Passes.h.inc", + )], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/StandardOps/Transforms/Passes.td", + td_srcs = [":PassBaseTdFiles"], +) + cc_library( name = "StandardOpsTransforms", - srcs = glob( - [ - "lib/Dialect/StandardOps/Transforms/*.cpp", - "lib/Dialect/StandardOps/Transforms/*.h", - ], - ), - hdrs = glob([ - "include/mlir/Dialect/StandardOps/Transforms/*.h", + srcs = glob([ + "lib/Dialect/StandardOps/Transforms/*.cpp", + "lib/Dialect/StandardOps/Transforms/*.h", ]), + hdrs = glob(["include/mlir/Dialect/StandardOps/Transforms/*.h"]), includes = ["include"], deps = [ ":Analysis", ":ControlFlowInterfaces", ":IR", + ":Pass", ":StandardOps", + ":StandardOpsTransformsPassIncGen", ":Support", + ":Transforms", "@llvm-project//llvm:support", ], ) @@ -985,7 +997,7 @@ filegroup( "include/mlir/Dialect/GPU/GPUOps.td", "include/mlir/Dialect/LLVMIR/LLVMOpBase.td", "include/mlir/IR/SymbolInterfaces.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ], ) @@ -1101,9 +1113,9 @@ cc_library( ":GPUDialect", ":GPUPassIncGen", ":IR", - ":LoopOps", ":ParallelLoopMapperAttrGen", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":Transforms", @@ -1118,7 +1130,7 @@ filegroup( "include/mlir/Dialect/LLVMIR/LLVMOps.td", "include/mlir/IR/SymbolInterfaces.td", "include/mlir/Interfaces/ControlFlowInterfaces.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ], ) @@ -1312,8 +1324,8 @@ cc_library( ":GPUDialect", ":GPUToSPIRVIncGen", ":IR", - ":LoopOps", ":Pass", + ":SCFDialect", ":SPIRVDialect", ":SPIRVLowering", ":StandardToSPIRVConversions", @@ -1405,7 +1417,7 @@ filegroup( srcs = [ "include/mlir/Dialect/LLVMIR/LLVMOpBase.td", "include/mlir/Dialect/LLVMIR/NVVMOps.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ], ) @@ -1477,7 +1489,7 @@ filegroup( srcs = [ "include/mlir/Dialect/LLVMIR/LLVMOpBase.td", "include/mlir/Dialect/LLVMIR/ROCDLOps.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ], ) @@ -1529,7 +1541,7 @@ filegroup( "include/mlir/IR/SymbolInterfaces.td", "include/mlir/Interfaces/CallInterfaces.td", "include/mlir/Interfaces/ControlFlowInterfaces.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ] + glob(["include/mlir/Dialect/SPIRV/*.td"]), ) @@ -1871,7 +1883,7 @@ cc_library( ":ControlFlowInterfaces", ":IR", ":LoopLikeInterface", - ":LoopOps", + ":SCFDialect", ":SideEffects", ":StandardOps", ":Support", @@ -1988,8 +2000,8 @@ cc_library( ":ControlFlowInterfaces", ":IR", ":LoopLikeInterface", - ":LoopOps", ":Pass", + ":SCFDialect", ":SideEffects", ":StandardOps", ":Support", @@ -2025,8 +2037,8 @@ cc_library( ":GPUDialect", ":GPUTransforms", ":IR", - ":LoopOps", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":TransformUtils", @@ -2049,9 +2061,9 @@ cc_library( ":Affine", ":ConversionPassIncGen", ":GPUDialect", - ":LoopOps", ":LoopsToGPU", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":Transforms", @@ -2073,8 +2085,8 @@ cc_library( ":ConversionPassIncGen", ":IR", ":LLVMDialect", - ":LoopOps", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":TransformUtils", @@ -2232,7 +2244,7 @@ gentbl( ), ], tblgen = ":mlir-tblgen", - td_file = "include/mlir/Interfaces/SideEffects.td", + td_file = "include/mlir/Interfaces/SideEffectInterfaces.td", td_srcs = [ ":OpBaseTdFiles", ], @@ -2280,7 +2292,7 @@ cc_library( ":Affine", ":CallOpInterfaces", ":IR", - ":LoopOps", + ":SCFDialect", ":StandardOps", ":Support", "@llvm-project//llvm:support", @@ -2467,10 +2479,11 @@ cc_library( ":LLVMTransforms", ":LinalgToLLVM", ":LinalgToSPIRV", - ":LoopOpsTransforms", ":NVVMDialect", ":Parser", ":Pass", + ":SCFTransforms", + ":StandardOpsTransforms", ":StandardToSPIRVConversions", ":StandardToStandard", ":Support", @@ -2553,8 +2566,6 @@ cc_library( ":LinalgToLLVM", ":LinalgToSPIRV", ":LinalgTransforms", - ":LoopOps", - ":LoopOpsTransforms", ":LoopPassIncGen", ":LoopsToGPUPass", ":NVVMDialect", @@ -2562,12 +2573,16 @@ cc_library( ":QuantOps", ":QuantPassIncGen", ":ROCDLDialect", + ":SCFDialect", + ":SCFTransforms", ":SDBM", ":SPIRVDialect", ":SPIRVLowering", ":SPIRVPassIncGen", ":Shape", ":StandardOps", + ":StandardOpsTransforms", + ":StandardOpsTransformsPassIncGen", ":StandardToSPIRVConversions", ":StandardToStandard", ":Transforms", @@ -2895,7 +2910,7 @@ filegroup( srcs = [ "include/mlir/Dialect/Quant/QuantOps.td", "include/mlir/Dialect/Quant/QuantOpsBase.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", ":OpBaseTdFiles", ], ) @@ -3230,8 +3245,8 @@ cc_library( ":LinalgOps", ":LinalgPassIncGen", ":LinalgStructuredOpsIncGen", - ":LoopOps", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":TransformUtils", @@ -3352,8 +3367,8 @@ cc_library( ":IR", ":LLVMDialect", ":LLVMTransforms", - ":LoopOps", ":Pass", + ":SCFDialect", ":StandardOps", ":Support", ":Transforms", @@ -3375,7 +3390,7 @@ exports_files( "include/mlir/Interfaces/CallInterfaces.td", "include/mlir/Interfaces/ControlFlowInterfaces.h", "include/mlir/Interfaces/ControlFlowInterfaces.td", - "include/mlir/Interfaces/SideEffects.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", "include/mlir/Interfaces/ViewLikeInterface.td", "include/mlir/Dialect/LLVMIR/LLVMOpBase.td", "include/mlir/Dialect/StandardOps/IR/Ops.td", diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index c3dd157af83..c19d312d082 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -77,7 +77,7 @@ gentbl( "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td", "@llvm-project//mlir:include/mlir/Interfaces/ControlFlowInterfaces.td", "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td", - "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", ], test = True, ) @@ -163,8 +163,8 @@ cc_library( "@llvm-project//mlir:IR", "@llvm-project//mlir:LinalgOps", "@llvm-project//mlir:LinalgTransforms", - "@llvm-project//mlir:LoopOps", "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Support", "@llvm-project//mlir:TransformUtils", diff --git a/third_party/toolchains/preconfig/generate/containers.bzl b/third_party/toolchains/preconfig/generate/containers.bzl index b1d0389a16d..8e6f48df99e 100644 --- a/third_party/toolchains/preconfig/generate/containers.bzl +++ b/third_party/toolchains/preconfig/generate/containers.bzl @@ -2,14 +2,14 @@ container_digests = { "ubuntu16.04": "sha256:b90dcf2f35f3354909f4491bdf019c110b4b4d95ef0395ebf178bc5d523a4208", "centos6": "sha256:d09c12fb26fbbe8398b4973260c75172eb67d509dae9d6f4ad54279b7d6b0494", - "ubuntu16.04-manylinux2010": "sha256:b5227c4069980005336dd5cf04e3122974984da3396a514a06d7db3a7ae7b2f9", + "ubuntu16.04-manylinux2010": "sha256:d5b056506e14eb216b6e27988814617a09dea77ec1ab46972072038f9df3e728", "cuda10.0-cudnn7-ubuntu14.04": "sha256:d433e1221f802dac393bc8652fabcc63aa46896cd920bb888ae0e2002fe6b756", "cuda10.0-cudnn7-centos7": "sha256:a453b7147a60928a8345689eae48916a746b3578b5e831bfa151f0529d469c88", "cuda10.0-cudnn7-centos6": "sha256:a1909ba09c703340ee0074ce63dd94fe8fea48035a25264677907a609e2375e0", "cuda10.1-cudnn7-centos6": "sha256:454b899657e87893ee5e68dc0f87df59b6a0a7418ae09cafcc3dd65ac71feca9", "cuda10.0-cudnn7-ubuntu16.04-manylinux2010": "sha256:5812d9d0ef0a3276fc5faaf4cd01f3d6e03d635893a6e2d2e04f6f01d626c432", - "cuda10.1-cudnn7-ubuntu16.04-manylinux2010": "sha256:cc7f760195d7bbe283b45ae740409751d0b74d8ffbdc2f7a3cb62c71a71fbe25", - "cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython": "sha256:c460570b88eab3da92f06fdf30098d89be4de0f3b010ee3d39086f4d000dd3b8", + "cuda10.1-cudnn7-ubuntu16.04-manylinux2010": "sha256:1e4e888f14a3d5b127151f7970487613a46ca957babe0432786627c78c0b1a36", + "cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython": "sha256:13aa5e700bb609521cd4365d4152d7d8f4118cae7ce174ce7d54cc529e21766a", "rocm-ubuntu16.04": "sha256:e645447dd6127325f3e97b8bf23424f637a8579d963b34fcc6772cf7cfaa0ebe", "windows-1803": "sha256:f109576c7c0c8a1783ff22b666e8923b52dbbe7933f69a1c7a7275202c304a12", }