diff --git a/.bazelrc b/.bazelrc index 95e2b369a89..590a87f5732 100644 --- a/.bazelrc +++ b/.bazelrc @@ -40,6 +40,7 @@ build:mkl -c opt # This config option is used to enable MKL-DNN open source library only, # without depending on MKL binary version. build:mkl_open_source_only --define=build_with_mkl_dnn_only=true +build:mkl_open_source_only --define=build_with_mkl_dnn_v1_only=true build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true build:mkl_open_source_only --define=tensorflow_mkldnn_contraction_kernel=0 diff --git a/RELEASE.md b/RELEASE.md index c2c50c590ba..6a4c2d6486d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,274 @@ +# Release 1.14.0 + +## Major Features and Improvements + +* This is the first 1.x release containing the compat.v2 module. This module + is required to allow libraries to publish code which works in both 1.x and + 2.x. After this release, no backwards incompatible changes are allowed in + the 2.0 Python API. +* Turn on MKL-DNN contraction kernels by default. MKL-DNN dynamically + dispatches the best kernel implementation based on CPU vector architecture. + To disable them, build with --define=tensorflow_mkldnn_contraction_kernel=0. + +## Behavioral changes + +* Set default loss reduction as `AUTO` for improving reliability of loss + scaling with distribution strategy and custom training loops. `AUTO` + indicates that the reduction option will be determined by the usage context. + For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When used in + distribution strategy scope, outside of built-in training loops such as + `tf.keras` `compile` and `fit`, we expect reduction value to be 'None' or + 'SUM'. Using other values will raise an error. +* Wraps losses passed to the `compile` API (strings and v1 losses) which are + not instances of v2 `Loss` class in `LossWrapper` class. => All losses will + now use `SUM_OVER_BATCH_SIZE` reduction as default. +* Disable `run_eagerly` and distribution strategy if there are symbolic + tensors added to the model using `add_metric` or `add_loss`. +* tf.linspace(start, stop, num) now always uses "stop" as last value (for + num > 1) +* `ResourceVariable` and `Variable` no longer accepts `constraint` in the + constructor, nor expose it as a @property. +* The behavior of tf.gather is now correct when axis=None and batch_dims<0. +* Only create a GCS directory object if the object does not already exist. +* In `map_vectorization` optimization, reduce the degree of parallelism in the + vectorized map node. +* Bug fix: loss and gradients should now more reliably be correctly scaled + w.r.t. the global batch size when using a tf.distribute.Strategy. +* Updating cosine similarity loss - removed the negate sign from cosine + similarity. +* DType is no longer convertible to an int. Use dtype.as_datatype_enum instead + of int(dtype) to get the same result. +* Changed default for gradient accumulation for TPU embeddings to true. +* Callbacks now log values in eager mode when a deferred build model is used. +* Transitive dependencies on :pooling_ops were removed. Some users may need to + add explicit dependencies on :pooling_ops if they reference the operators + from that library. + +## Bug Fixes and Other Changes + +* Documentation +* Deprecations and Symbol renames. + * Remove unused StringViewVariantWrapper + * Delete unused Fingerprint64Map op registration + * SignatureDef util functions have been deprecated. + * Renamed tf.image functions to remove duplicate "image" where it is + redundant. + * tf.keras.experimental.export renamed to + tf.keras.experimental.export_saved_model + * Standardize the LayerNormalization API by replacing the args `norm_axis` + and `params_axis` with `axis`. + * Tensor::UnsafeCopyFromInternal deprecated in favor Tensor::BitcastFrom +* Keras & Python API + * Add v2 module aliases for: + * tf.initializers => tf.keras.initializers + * tf.losses => tf.keras.losses & tf.metrics => tf.keras.metrics + * tf.optimizers => tf.keras.optimizers + * Add tf.keras.layers.AbstractRNNCell as the preferred implementation of + RNN cell for TF v2. User can use it to implement RNN cell with custom + behavior. + * Adding `clear_losses` API to be able to clear losses at the end of + forward pass in a custom training loop in eager. + * Add support for passing list of lists to the `metrics` param in Keras + `compile`. + * Added top-k to precision and recall to keras metrics. + * Adding public APIs for `cumsum` and `cumprod` keras backend functions. + * Fix: model.add_loss(symbolic_tensor) should work in ambient eager. + * Add name argument to tf.string_split and tf.strings_split + * Minor change to SavedModels exported from Keras using + tf.keras.experimental.export. (SignatureDef key for evaluation mode is + now "eval" instead of "test"). This will be reverted back to "test" in + the near future. + * Updates binary cross entropy logic in Keras when input is probabilities. + Instead of converting probabilities to logits, we are using the cross + entropy formula for probabilities. + * Raw TensorFlow functions can now be used in conjunction with the Keras + Functional API during model creation. This obviates the need for users + to create Lambda layers in most cases when using the Functional API. + Like Lambda layers, TensorFlow functions that result in Variable + creation or assign ops are not supported. + * Keras training and validation curves are shown on the same plot. + * Introduce `dynamic` constructor argument in Layer and Model, which + should be set to True when using imperative control flow in the `call` + method. + * Removing of dtype in the constructor of initializers and partition_info + in call. +* New ops and improved op functionality + * Add OpKernels for some stateless maps + * Add v2 APIs for AUCCurve and AUCSummationMethod + enums. #tf-metrics-convergence + * Add tf.math.nextafter op. + * Add CompositeTensor base class. + * Add tf.linalg.tridiagonal_solve op. + * Add opkernel templates for common table operations. + * Added support for TFLite in TensorFlow 2.0. + * Adds summary trace API for collecting graph and profile information. + * Add batch_dims argument to tf.gather. + * Add support for `add_metric` in the graph function mode. + * Add C++ Gradient for BatchMatMulV2. + * Added tf.random.binomial + * Added gradient for SparseToDense op. + * Add legacy string flat hash map op kernels + * Add a ragged size op and register it to the op dispatcher + * Add broadcasting support to tf.matmul. + * Add ellipsis (...) support for tf.einsum() + * Added LinearOperator.adjoint and LinearOperator.H (alias). + * Added GPU implementation of tf.linalg.tridiagonal_solve. + * Added strings.byte_split + * Add RaggedTensor.placeholder() + * Add a new "result_type" parameter to tf.strings.split + * `add_update` can now be passed a zero-arg callable in order to support + turning off the update when setting `trainable=False` on a Layer of a + Model compiled with `run_eagerly=True`. + * Add variant wrapper for absl::string_view + * Add expand_composites argument to all nest.* methods. + * Add pfor converter for Squeeze. + * Bug fix for tf.tile gradient + * Expose CriticalSection in core as tf.CriticalSection. + * Update Fingerprint64Map to use aliases + * ResourceVariable support for gather_nd. + * ResourceVariable's gather op supports batch dimensions. + * Variadic reduce is supported on CPU + * Extend tf.function with basic support for CompositeTensors arguments + (such as SparseTensor and RaggedTensor). + * Add templates and interfaces for creating lookup tables + * Post-training quantization tool supports quantizing weights shared by + multiple operations. The models made with versions of this tool will use + INT8 types for weights and will only be executable interpreters from + this version onwards. + * Malformed gif images could result in an access out of bounds in the + color palette of the frame. This has been fixed now + * image.resize now considers proper pixel centers and has new kernels + (incl. anti-aliasing). +* Performance + * Turn on MKL-DNN contraction kernels by default. MKL-DNN dynamically + dispatches the best kernel implementation based on CPU vector + architecture. To disable them, build with + --define=tensorflow_mkldnn_contraction_kernel=0. + * Support for multi-host ncclAllReduce in Distribution Strategy. + * Expose a flag that allows the number of threads to vary across Python + benchmarks. +* TensorFlow 2.0 Development + * Add v2 sparse categorical crossentropy metric. + * Allow non-Tensors through v2 losses. + * Add UnifiedGRU as the new GRU implementation for tf2.0. Change the + default recurrent activation function for GRU from 'hard_sigmoid' to + 'sigmoid', and 'reset_after' to True in 2.0. Historically recurrent + activation is 'hard_sigmoid' since it is fast than 'sigmoid'. With new + unified backend between CPU and GPU mode, since the CuDNN kernel is + using sigmoid, we change the default for CPU mode to sigmoid as well. + With that, the default GRU will be compatible with both CPU and GPU + kernel. This will enable user with GPU to use CuDNN kernel by default + and get a 10x performance boost in training. Note that this is + checkpoint breaking change. If user want to use their 1.x pre-trained + checkpoint, please construct the layer with + GRU(recurrent_activation='hard_sigmoid', reset_after=False) to fallback + to 1.x behavior. + * TF 2.0 - Update metric name to always reflect what the user has given in + compile. Affects following cases 1. When name is given as + 'accuracy'/'crossentropy' 2. When an aliased function name is used eg. + 'mse' 3. Removing the `weighted` prefix from weighted metric names. + * Begin adding Go wrapper for C Eager API + * image.resize in 2.0 now supports gradients for the new resize kernels. + * removed tf.string_split from v2 API + * Expose tf.contrib.proto.* ops in tf.io (they will exist in TF2) + * "Updates the TFLiteConverter API in 2.0. Changes from_concrete_function + to from_concrete_functions." + * Enable tf.distribute.experimental.MultiWorkerMirroredStrategy working in + eager mode. + * Support both binary and -1/1 label input in v2 hinge and squared hinge + losses. +* TensorFlow Lite + * "Adds support for tflite_convert in 2.0." + * "Remove lite.OpHint, lite.experimental, and lite.constant from 2.0 API." +* tf.contrib + * Added Neural Turing Implementation as described in + https://arxiv.org/abs/1807.08518. + * Remove tf.contrib.timeseries dependency on TF distributions. +* tf.data + * Add num_parallel_reads and passing in a Dataset containing filenames + into TextLineDataset and FixedLengthRecordDataset + * Going forward we operate in TF 2.0, this change is part of the effort to + slowly converting XYZDataset to DatasetV2 type which is the official + version going to be used in TF 2.0 and motivated by some compatibility + issue found, _BigtableXYZDataset (of type DatasetV2) does not implement + the _as_variant_tensor() of DatasetV1, when moving contrib.bigtable to + tensorflow_io. Converting into DatasetV2 removes the overheads to + maintain V1 while we are moving into TF 2.0. + * Add dataset ops to the graph (or create kernels in Eager execution) + during the python Dataset object creation instead doing it during + Iterator creation time. + * Add support for TensorArrays to tf.data Dataset. + * Switching tf.data functions to use `defun`, providing an escape hatch to + continue using the legacy `Defun`. +* Toolchains + * CUDNN_INSTALL_PATH, TENSORRT_INSTALL_PATH, NCCL_INSTALL_PATH, + NCCL_HDR_PATH are deprecated. Use TF_CUDA_PATHS instead which supports a + comma-separated list of base paths that are searched to find CUDA + libraries and headers. + * TF code now resides in `tensorflow_core` and `tensorflow` is just a + virtual pip package. No code changes are needed for projects using + TensorFlow, the change is transparent +* XLA + * XLA HLO graphs can be inspected with interactive_graphviz tool now. +* Estimator + * Use tf.compat.v1.estimator.inputs instead of tf.estimator.inputs + * Replace contrib references with tf.estimator.experimental.* for apis in + early_stopping.py + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +1e100, 4d55397500, a6802739, abenmao, Adam Weiss, Ag Ramesh, Alan Du, Albin Joy, +Alex, Aman Patel, Amit, Amit Kumar Jaiswal, Amit Srivastava, Andreas Eberle, +Andy Craze, Anthony Platanios, Armen Poghosov, armenpoghosov, arp95, Arpit Shah, +Ashwin Ramaswami, Aurelien Geron, AuréLien Geron, aweers, awesomealex1, Ayush +Agrawal, Ben Barsdell, Bharat Raghunathan, Bhavani Subramanian, blairhan, +BléNesi Attila, Brandon Carter, candy.dc, Chao Liu, chenchc, chie8842, Christian +Hansen, Christian Sigg, Clayne Robison, crafet, csukuangfj, ctiijima, Dan +Jarvis, Dan Lazewatsky, Daniel Ingram, Daniel Salvadori, Dave Airlie, David +Norman, Dayananda V, Dayananda-V, delock, Denis Khalikov, Deven Desai, Dheeraj +Rajaram Reddy, dmitrievanthony, Donovan Ong, Drew Szurko, Duncan Riach, Dustin +Neighly, Edward Forgacs, EFanZh, Fei Hu, Felix Lemke, Filip Matzner, fo40225, +frreiss, Gautam, gehring, Geoffrey Irving, Grzegorz George Pawelczak, Grzegorz +Pawelczak, Gyoung-Yoon Ryoo, HanGuo97, Hanton Yang, Hari Shankar, hehongliang, +Heungsub Lee, Hoeseong Kim, I-Hong Jhuo, Ilango R, Innovimax, Irene Dea, Jacky +Ko, Jakub Lipinski, Jason Zaman, jcf94, Jeffrey Poznanovic, Jens Elofsson, +Jeroen BéDorf, Jia Qingtong, Jiankang, Joe Q, Joe Quadrino, Joeran Beel, Jonas +Rauber, Jonathan, Jonathan Kyl, Joppe Geluykens, Joseph Friedman, jtressle, jwu, +K Yasaswi Sri Chandra Gandhi, K. Hodges, Kaixi Hou, Karl Lessard, Karl +Weinmeister, Karthik Muthuraman, Kashif Rasul, KDR, Keno Fischer, Kevin Mader, +kjopek, Koan-Sin Tan, kouml, ktaebum, Lakshay Tokas, Laurent Le Brun, Letian +Kang, Li, Guizi, Loo Rong Jie, Lucas Hendren, Lukas Geiger, Luke Han, luxupu, +Ma, Guokai, Mahmoud Abuzaina, Mandar Deshpande, manhyuk, Marco Gaido, Marek +Drozdowski, Mark Collier, Mark Ryan, mars20, Mateusz Chudyk, Matt Conley, +MattConley, mbhuiyan, mdfaijul, Melissa Grueter, Michael KäUfl, MickaëL +Schoentgen, Miguel Morin, Mihail Salnikov, Mike Arpaia, Mike Holcomb, monklof, +Moses Marin, Mshr-H, nammbash, Natalia Gimelshein, Nayana-Ibm, neargye, Neeraj +Pradhan, Nehal J Wani, Nick, Niels Ole Salscheider, Niranjan Hasabnis, nlewycky, +Nuka-137, Nutti, olicht, P Sudeepam, Palmer Lao, Pan Daoxin, Pariksheet Pinjari, +Pavel Samolysov, PENGWA, Pooya Davoodi, R S Nikhil Krishna, Rohit Gupta, Roman +Soldatow, rthadur, Ruizhe, Ryan Jiang, Samantha Andow, Sami Kama, Sana-Damani, +Saurabh Deoras, sdamani, seanshpark, Sebastien Iooss, Serv-Inc, Shahzad Lone, +Shashank Gupta, Shashi, shashvat, shashvatshahi1998, Siju, Siju Samuel, +Snease-Abq, Spencer Schaber, sremedios, srinivasan.narayanamoorthy, Steve Lang, +Steve Nesae, Sumesh Udayakumaran, Supriya Rao, Taylor Jakobson, Taylor Thornton, +Ted Chang, ThisIsPIRI, Thomas Deegan, Thomas Hagebols, tianyapiaozi, Tim Zaman, +tomguluson92, Tongxuan Liu, TungJerry, v1incent, Vagif, vcarpani, Vikram Tiwari, +Vishwak Srinivasan, Vitor-Alves, wangsiyu, wateryzephyr, WeberXie, WeijieSun, +Wen-Heng (Jack) Chung, wenxizhu, Will Battel, William D. Irons, wyzhao, Xin, +Yasuhiro Matsumoto, ymodak, Yong Tang, Younes Khoudli, Yuan Lin, Yves-Noel +Weweler, Zantares, zjjott, 卜居, 王振华 (Wang Zhenhua), 黄鑫 + +# Release 1.12.3 + +## Bug Fixes and Other Changes + +* Updates `png_archive` dependency to 1.6.37 to not be affected by + CVE-2019-7317, CVE-2018-13785, and CVE-2018-14048. +* Updates `sqlite` depenency to 3.28.0 to not be affected by CVE-2018-20506, + CVE-2018-20346, and CVE-2018-20505. + # Release 1.12.2 ## Bug Fixes and Other Changes diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index b75ec148ae8..3d8d92c63e7 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -41,22 +41,30 @@ from tensorflow.python.tools import module_util as _module_util # API IMPORTS PLACEHOLDER +# WRAPPER_PLACEHOLDER + # Make sure directory containing top level submodules is in # the __path__ so that "from tensorflow.foo import bar" works. # We're using bitwise, but there's nothing special about that. -_API_MODULE = bitwise # pylint: disable=undefined-variable -_current_module = _sys.modules[__name__] +_API_MODULE = _sys.modules[__name__].bitwise _tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__)) +_current_module = _sys.modules[__name__] + if not hasattr(_current_module, '__path__'): __path__ = [_tf_api_dir] elif _tf_api_dir not in __path__: __path__.append(_tf_api_dir) # Hook external TensorFlow modules. + +# Import compat before trying to import summary from tensorboard, so that +# reexport_tf_summary can get compat from sys.modules +_current_module.compat.v2.compat.v1 = _current_module.compat.v1 try: from tensorboard.summary._tf import summary _current_module.__path__ = ( [_module_util.get_parent_dir(summary)] + _current_module.__path__) + setattr(_current_module, "summary", summary) except ImportError: _logging.warning( "Limited tf.summary API due to missing TensorBoard installation.") @@ -65,6 +73,7 @@ try: from tensorflow_estimator.python.estimator.api._v2 import estimator _current_module.__path__ = ( [_module_util.get_parent_dir(estimator)] + _current_module.__path__) + setattr(_current_module, "estimator", estimator) except ImportError: pass @@ -72,6 +81,7 @@ try: from tensorflow.python.keras.api._v2 import keras _current_module.__path__ = ( [_module_util.get_parent_dir(keras)] + _current_module.__path__) + setattr(_current_module, "keras", keras) except ImportError: pass @@ -122,25 +132,17 @@ if _running_from_pip_package(): # pylint: disable=undefined-variable try: del python - if '__all__' in vars(): - vars()['__all__'].remove('python') - del core - if '__all__' in vars(): - vars()['__all__'].remove('core') except NameError: - # Don't fail if these modules are not available. - # For e.g. this file will be originally placed under tensorflow/_api/v1 which - # does not have 'python', 'core' directories. Then, it will be copied - # to tensorflow/ which does have these two directories. pass -# Similarly for compiler. Do it separately to make sure we do this even if the -# others don't exist. +try: + del core +except NameError: + pass try: del compiler - if '__all__' in vars(): - vars()['__all__'].remove('compiler') except NameError: pass +# pylint: enable=undefined-variable # Add module aliases if hasattr(_current_module, 'keras'): @@ -148,6 +150,8 @@ if hasattr(_current_module, 'keras'): metrics = keras.metrics optimizers = keras.optimizers initializers = keras.initializers - -compat.v2.compat.v1 = compat.v1 + setattr(_current_module, "losses", losses) + setattr(_current_module, "metrics", metrics) + setattr(_current_module, "optimizers", optimizers) + setattr(_current_module, "initializers", initializers) # pylint: enable=undefined-variable diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py index 4fa92b07051..6d1c40a2428 100644 --- a/tensorflow/api_template_v1.__init__.py +++ b/tensorflow/api_template_v1.__init__.py @@ -30,10 +30,12 @@ from tensorflow.python.tools import module_util as _module_util # API IMPORTS PLACEHOLDER +# WRAPPER_PLACEHOLDER + # Make sure directory containing top level submodules is in # the __path__ so that "from tensorflow.foo import bar" works. # We're using bitwise, but there's nothing special about that. -_API_MODULE = bitwise # pylint: disable=undefined-variable +_API_MODULE = _sys.modules[__name__].bitwise # pylint: disable=undefined-variable _current_module = _sys.modules[__name__] _tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__)) if not hasattr(_current_module, '__path__'): @@ -46,6 +48,7 @@ try: from tensorflow_estimator.python.estimator.api._v1 import estimator _current_module.__path__ = ( [_module_util.get_parent_dir(estimator)] + _current_module.__path__) + setattr(_current_module, "estimator", estimator) except ImportError: pass @@ -53,6 +56,7 @@ try: from tensorflow.python.keras.api._v1 import keras _current_module.__path__ = ( [_module_util.get_parent_dir(keras)] + _current_module.__path__) + setattr(_current_module, "keras", keras) except ImportError: pass @@ -77,9 +81,8 @@ if '__all__' in vars(): from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top # The 'app' module will be imported as part of the placeholder section above. -app.flags = flags # pylint: disable=undefined-variable -if '__all__' in vars(): - vars()['__all__'].append('flags') +_current_module.app.flags = flags # pylint: disable=undefined-variable +setattr(_current_module, "flags", flags) # Load all plugin libraries from site-packages/tensorflow-plugins if we are # running under pip. @@ -122,25 +125,16 @@ if _running_from_pip_package(): # pylint: disable=undefined-variable try: del python - if '__all__' in vars(): - vars()['__all__'].remove('python') - del core - if '__all__' in vars(): - vars()['__all__'].remove('core') except NameError: - # Don't fail if these modules are not available. - # For e.g. this file will be originally placed under tensorflow/_api/v1 which - # does not have 'python', 'core' directories. Then, it will be copied - # to tensorflow/ which does have these two directories. pass -# Similarly for compiler. Do it separately to make sure we do this even if the -# others don't exist. +try: + del core +except NameError: + pass try: del compiler - if '__all__' in vars(): - vars()['__all__'].remove('compiler') except NameError: pass -compat.v2.compat.v1 = compat.v1 +_current_module.compat.v2.compat.v1 = _current_module.compat.v1 # pylint: enable=undefined-variable diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 516b8256cec..d0b2f215975 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -64,6 +64,7 @@ tf_cuda_library( }) + [ "@com_google_absl//absl/memory", "//tensorflow/core/common_runtime/eager:eager_operation", + "//tensorflow/core/distributed_runtime/eager:remote_mgr", "//tensorflow/core/distributed_runtime/eager:eager_client", "//tensorflow/core/distributed_runtime/rpc/eager:grpc_eager_client", "//tensorflow/core/distributed_runtime/rpc:grpc_channel", diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 71a217c447d..22c1f219f38 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -54,6 +54,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h" #include "tensorflow/core/distributed_runtime/server_lib.h" #include "tensorflow/core/distributed_runtime/worker_env.h" +#include "tensorflow/core/distributed_runtime/eager/remote_mgr.h" #endif // !IS_MOBILE_PLATFORM #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/rendezvous.h" @@ -260,12 +261,14 @@ tensorflow::Status UpdateTFE_ContextWithServerDef( TF_RETURN_IF_ERROR(r->Initialize(worker_session.get())); auto* device_mgr = grpc_server->worker_env()->device_mgr; + auto remote_mgr = + absl::make_unique(/*is_master=*/true); return ctx->context->InitializeRemoteMaster( std::move(server), grpc_server->worker_env(), worker_session, std::move(remote_eager_workers), std::move(remote_device_mgr), remote_workers, context_id, r, device_mgr, keep_alive_secs, - worker_session->cluster_flr.get()); + worker_session->cluster_flr.get(), std::move(remote_mgr)); #undef LOG_AND_RETURN_IF_ERROR } #endif // !IS_MOBILE_PLATFORM diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 1e288667121..2353ddc9b20 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -890,9 +890,19 @@ ForwardAccumulator::ForwardpropFromTape( // Stop the tape from recording pop_backward_tape.release()(); + if (grad.size() != in_grads.size()) { + return tensorflow::errors::Internal("Wrong number of gradients returned."); + } + std::vector targets; + std::vector used_in_grads; + // We may end up with slightly fewer elements than we reserve, but grad.size() + // should be a reasonably tight upper bound. + targets.reserve(grad.size()); + used_in_grads.reserve(grad.size()); std::unordered_map sources_that_are_targets; - for (Gradient* grad_tensor : grad) { + for (int grad_index = 0; grad_index < grad.size(); ++grad_index) { + Gradient* grad_tensor = grad[grad_index]; if (grad_tensor != nullptr) { int64 tensor_id = vspace_.TensorId(grad_tensor); targets.push_back(tensor_id); @@ -900,23 +910,18 @@ ForwardAccumulator::ForwardpropFromTape( sources_that_are_targets.emplace( tensor_id, vspace_.TapeTensorFromGradient(grad_tensor)); } - } - } - if (targets.size() > in_grads.size()) { - return tensorflow::errors::Internal("Too many gradients returned."); - } - - for (int target_index = 0; target_index < targets.size(); ++target_index) { - Gradient* in_grad = in_grads[target_index]; - Gradient* grad_tensor = grad[target_index]; - if (grad_tensor != nullptr && in_grad != nullptr) { - // ComputeGradient steals a reference - vspace_.MarkAsResult(in_grad); + Gradient* in_grad = in_grads[grad_index]; + if (in_grad != nullptr) { + // ComputeGradient steals a reference + vspace_.MarkAsResult(in_grad); + } + used_in_grads.push_back(in_grad); } } return tape->ComputeGradient(vspace_, targets, sources, - sources_that_are_targets, in_grads, out_grads); + sources_that_are_targets, used_in_grads, + out_grads); } template diff --git a/tensorflow/compat_template.__init__.py b/tensorflow/compat_template.__init__.py index ad2443a0c32..b830af58832 100644 --- a/tensorflow/compat_template.__init__.py +++ b/tensorflow/compat_template.__init__.py @@ -28,12 +28,16 @@ from tensorflow.python.tools import module_util as _module_util # API IMPORTS PLACEHOLDER +# WRAPPER_PLACEHOLDER + # Hook external TensorFlow modules. _current_module = _sys.modules[__name__] try: from tensorboard.summary._tf import summary _current_module.__path__ = ( [_module_util.get_parent_dir(summary)] + _current_module.__path__) + # Make sure we get the correct summary module with lazy loading + setattr(_current_module, "summary", summary) except ImportError: _logging.warning( "Limited tf.compat.v2.summary API due to missing TensorBoard " @@ -43,6 +47,7 @@ try: from tensorflow_estimator.python.estimator.api._v2 import estimator _current_module.__path__ = ( [_module_util.get_parent_dir(estimator)] + _current_module.__path__) + setattr(_current_module, "estimator", estimator) except ImportError: pass @@ -50,6 +55,7 @@ try: from tensorflow.python.keras.api._v2 import keras _current_module.__path__ = ( [_module_util.get_parent_dir(keras)] + _current_module.__path__) + setattr(_current_module, "keras", keras) except ImportError: pass @@ -61,11 +67,15 @@ except ImportError: # # This make this one symbol available directly. from tensorflow.python.compat.v2_compat import enable_v2_behavior # pylint: disable=g-import-not-at-top +setattr(_current_module, "enable_v2_behavior", enable_v2_behavior) # Add module aliases -_current_module = _sys.modules[__name__] if hasattr(_current_module, 'keras'): losses = keras.losses metrics = keras.metrics optimizers = keras.optimizers initializers = keras.initializers + setattr(_current_module, "losses", losses) + setattr(_current_module, "metrics", metrics) + setattr(_current_module, "optimizers", optimizers) + setattr(_current_module, "initializers", initializers) diff --git a/tensorflow/compat_template_v1.__init__.py b/tensorflow/compat_template_v1.__init__.py index 23c722edef7..48374b766b7 100644 --- a/tensorflow/compat_template_v1.__init__.py +++ b/tensorflow/compat_template_v1.__init__.py @@ -27,12 +27,15 @@ from tensorflow.python.tools import module_util as _module_util # API IMPORTS PLACEHOLDER +# WRAPPER_PLACEHOLDER + # Hook external TensorFlow modules. _current_module = _sys.modules[__name__] try: from tensorflow_estimator.python.estimator.api._v1 import estimator _current_module.__path__ = ( [_module_util.get_parent_dir(estimator)] + _current_module.__path__) + setattr(_current_module, "estimator", estimator) except ImportError: pass @@ -40,9 +43,11 @@ try: from tensorflow.python.keras.api._v1 import keras _current_module.__path__ = ( [_module_util.get_parent_dir(keras)] + _current_module.__path__) + setattr(_current_module, "keras", keras) except ImportError: pass from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top -app.flags = flags # pylint: disable=undefined-variable +_current_module.app.flags = flags # pylint: disable=undefined-variable +setattr(_current_module, "flags", flags) diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index d9f871dc2e5..79b4654d677 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -22,6 +22,7 @@ load( "tf_cc_test", "tf_copts", ) +load("//tensorflow:tensorflow.bzl", "tfcompile_extra_flags") def tf_library( name, @@ -180,13 +181,7 @@ def tf_library( # `find` on such an object. need_xla_data_proto = flags and flags.find("--gen_program_shape") != -1 - # Pass --target_cpu=haswell to tfcompile if compiling for Haswell (bazel - # build --cpu=haswell). We put it at the beginning of the flags list so - # that tfcompile_flags can override if if desired. - flags = select({ - "//tools/target_cpu:haswell": "--target_cpu=haswell ", - "//conditions:default": "", - }) + flags + flags = tfcompile_extra_flags() + flags if enable_xla_hlo_profiling: profiling_flag = "--xla_hlo_profile" diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 38d6bc4b5fd..6992a0165d4 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -1087,8 +1087,6 @@ Status Encapsulator::MakePrunedGraphCopyAndInline( FunctionDefToBodyHelper(*fdef, node->attrs(), library, &fbody)); InlineFunctionBodyOptions inline_opts; - inline_opts.override_device = false; - TF_RETURN_IF_ERROR(InlineFunctionBody(*library, pruned_graph->get(), node, fbody.get(), inline_opts)); } diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h index fd46034ec99..2c8203b1c5d 100644 --- a/tensorflow/compiler/jit/xla_device_ops.h +++ b/tensorflow/compiler/jit/xla_device_ops.h @@ -183,6 +183,9 @@ class XlaAssignVariableOp : public OpKernel { REGISTER_KERNEL_BUILDER( \ Name("AnonymousIteratorV2").Device(DEVICE).HostMemory("deleter"), \ data::AnonymousIteratorHandleOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("DeleteIterator").Device(DEVICE).HostMemory("deleter"), \ + data::DeleteIteratorOp); \ REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE), \ data::IteratorGetNextOp); \ REGISTER_KERNEL_BUILDER(Name("IteratorGetNextAsOptional").Device(DEVICE), \ diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index c93a02c588a..ba2afffb019 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -462,9 +462,16 @@ cc_library( alwayslink = 1, ) +filegroup( + name = "tf_tfl_translate_main", + srcs = [ + "tf_tfl_translate.cc", + ], +) + tf_cc_binary( name = "tf_tfl_translate", - srcs = ["tf_tfl_translate.cc"], + srcs = [":tf_tfl_translate_main"], deps = [ ":flatbuffer_translate_lib", ":tensorflow_lite", diff --git a/tensorflow/compiler/mlir/lite/emit_error_reporter.h b/tensorflow/compiler/mlir/lite/emit_error_reporter.h index e32fa8d1b4e..40e89c5dec8 100644 --- a/tensorflow/compiler/mlir/lite/emit_error_reporter.h +++ b/tensorflow/compiler/mlir/lite/emit_error_reporter.h @@ -26,11 +26,11 @@ namespace tflite { // Error reporter that reports errors via the module's emitError. class EmitErrorReporter : public ErrorReporter { public: - explicit EmitErrorReporter(mlir::Module module) : module_(module) {} + explicit EmitErrorReporter(mlir::ModuleOp module) : module_(module) {} int Report(const char* format, va_list args) override; private: - mlir::Module module_; + mlir::ModuleOp module_; }; } // namespace tflite diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc index a6120c91eaf..5a480ae8439 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc @@ -42,6 +42,13 @@ static tflite::Padding ConvertTFL_PaddingAttrForOptionWriter( .Case("VALID", tflite::Padding_VALID); } +static tflite::MirrorPadMode ConvertTFL_MirrorPaddingAttrForOptionWriter( + llvm::StringRef str, flatbuffers::FlatBufferBuilder* builder) { + return llvm::StringSwitch(str) + .Case("REFLECT", tflite::MirrorPadMode_REFLECT) + .Case("SYMMETRIC", tflite::MirrorPadMode_SYMMETRIC); +} + static tflite::TensorType ConvertDerivedTypeAttrForOptionWriter( mlir::Type type, flatbuffers::FlatBufferBuilder* builder) { switch (type.getKind()) { diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc b/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc index 23d15663fd4..186149437d5 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc @@ -77,9 +77,9 @@ using llvm::Twine; using mlir::Block; using mlir::Dialect; using mlir::ElementsAttr; -using mlir::Function; +using mlir::FuncOp; using mlir::MLIRContext; -using mlir::Module; +using mlir::ModuleOp; using mlir::NoneType; using mlir::openOutputFile; using mlir::Operation; @@ -167,6 +167,8 @@ static StatusOr GetTFLiteType(Type type, return tflite::TensorType_STRING; case mlir::TF::TensorFlowTypes::COMPLEX64: return tflite::TensorType_COMPLEX64; + case mlir::TF::TensorFlowTypes::UINT8: + return tflite::TensorType_UINT8; case mlir::StandardTypes::Integer: { const auto& itype = type.cast(); switch (itype.getWidth()) { @@ -243,18 +245,18 @@ static bool HasValidTFLiteType(Value* value, T& error_handler) { // TODO(hinsu): Now that translation is done by making a single pass over the // MLIR module, consider inlining these validation checks at the place where // these invariants are assumed instead of checking upfront. -static bool IsValidTFLiteMlirModule(Module module) { +static bool IsValidTFLiteMlirModule(ModuleOp module) { MLIRContext* context = module.getContext(); // Verify that module has a function named main. - Function main_fn = module.getNamedFunction("main"); + FuncOp main_fn = module.lookupSymbol("main"); if (!main_fn) { return emitError(UnknownLoc::get(context), "should have a function named 'main'"), false; } - for (auto fn : module.getOps()) { + for (auto fn : module.getOps()) { if (fn.getBlocks().size() != 1) { return fn.emitError("should have exactly one basic block"), false; } @@ -323,14 +325,14 @@ class Translator { // Translates the given MLIR module into TFLite FlatBuffer format and returns // the serialized output. Returns llvm::None on unsupported, invalid inputs or // internal error. - static Optional Translate(Module module, + static Optional Translate(ModuleOp module, bool emit_builtin_tflite_ops, bool emit_select_tf_ops, bool emit_custom_ops); private: enum class OpType : char { kTfliteBuiltin, kSelectTf, kCustomOp }; - explicit Translator(Module module, bool emit_builtin_tflite_ops, + explicit Translator(ModuleOp module, bool emit_builtin_tflite_ops, bool emit_select_tf_ops, bool emit_custom_ops) : module_(module), builder_(kInitialBufferSize) { // The first buffer must be empty according to the schema definition. @@ -391,11 +393,11 @@ class Translator { Operation* inst, const std::vector& operands, const std::vector& results); - Optional> BuildSubGraph(Function fn); + Optional> BuildSubGraph(FuncOp fn); // Uses the tf.entry_function attribute (if set) to initialize the op to name // mapping. - void InitializeNamesFromAttribute(Function fn); + void InitializeNamesFromAttribute(FuncOp fn); // Returns a unique name for `op`. std::string UniqueName(mlir::Operation* op); @@ -403,7 +405,7 @@ class Translator { // Returns a unique name starting with a given prefix. std::string UniqueName(llvm::StringRef prefix); - Module module_; + ModuleOp module_; flatbuffers::FlatBufferBuilder builder_; BufferOffset empty_buffer_; @@ -449,10 +451,16 @@ std::string Translator::GetName(Operation* inst) { } std::string Translator::UniqueName(llvm::StringRef prefix) { + // Keep incrementing the counter until we find a unique name. std::string name = prefix; - auto& val = name_to_count_[name]; - if (val) name = (prefix + llvm::Twine(val)).str(); - ++val; + int64_t& prefix_count = name_to_count_[name]; + int64_t val = prefix_count; + while (val != 0) { + name = (prefix + llvm::Twine(prefix_count)).str(); + ++prefix_count; + val = name_to_count_[name]; + } + name_to_count_[name] = 1; return name; } @@ -781,7 +789,7 @@ Optional> Translator::BuildOperator( llvm::None; } -void Translator::InitializeNamesFromAttribute(Function fn) { +void Translator::InitializeNamesFromAttribute(FuncOp fn) { auto dict_attr = fn.getAttrOfType("tf.entry_function"); if (!dict_attr) return; @@ -794,8 +802,10 @@ void Translator::InitializeNamesFromAttribute(Function fn) { fn.emitWarning() << "invalid entry function specification"; return; } - for (auto it : llvm::enumerate(fn.getArguments())) + for (auto it : llvm::enumerate(fn.getArguments())) { op_to_name_[*it.value()->user_begin()] = input_names[it.index()]; + ++name_to_count_[input_names[it.index()].str()]; + } } if (auto str = dict_attr.get("outputs").dyn_cast()) { @@ -813,18 +823,19 @@ void Translator::InitializeNamesFromAttribute(Function fn) { // ensure the name that will be assigned to the buffer is the same, or // insert an op so that we can have a buffer named such. This cannot // currently happen due to pseudo_input nodes. - if (auto op = it.value()->getDefiningOp()) + if (auto op = it.value()->getDefiningOp()) { op_to_name_[op] = output_names[it.index()]; - else + name_to_count_[output_names[it.index()].str()] = 1; + } else { fn.emitWarning() << "output is not due to an op and '" << output_names[it.index()] << "' may not be a named output"; + } } } } -Optional> Translator::BuildSubGraph( - Function fn) { +Optional> Translator::BuildSubGraph(FuncOp fn) { InitializeNamesFromAttribute(fn); std::vector> tensors; llvm::DenseMap tensor_index_map; @@ -927,7 +938,7 @@ Optional> Translator::BuildSubGraph( /*name=*/builder_.CreateString(fn.getName().str())); } -Optional Translator::Translate(Module module, +Optional Translator::Translate(ModuleOp module, bool emit_builtin_tflite_ops, bool emit_select_tf_ops, bool emit_custom_ops) { @@ -941,14 +952,14 @@ Optional Translator::TranslateInternal() { // Create a list of functions in the module with main function being the // first function in the list. This is required as the first subgraph in the // model is entry point for the model. - std::vector functions; + std::vector functions; functions.reserve(std::distance(module_.begin(), module_.end())); int subgraph_idx = 0; - Function main_fn = module_.getNamedFunction("main"); + FuncOp main_fn = module_.lookupSymbol("main"); subgraph_index_map_[main_fn.getName().str()] = subgraph_idx++; functions.push_back(main_fn); - for (auto fn : module_.getOps()) { + for (auto fn : module_.getOps()) { if (fn == main_fn) continue; subgraph_index_map_[fn.getName().str()] = subgraph_idx++; @@ -992,7 +1003,7 @@ Optional Translator::TranslateInternal() { // * Ops with variable tensors // bool tflite::MlirToFlatBufferTranslateFunction( - Module module, std::string* serialized_flatbuffer, + ModuleOp module, std::string* serialized_flatbuffer, bool emit_builtin_tflite_ops, bool emit_select_tf_ops, bool emit_custom_ops) { auto maybe_translated = Translator::Translate( @@ -1003,7 +1014,7 @@ bool tflite::MlirToFlatBufferTranslateFunction( } static mlir::LogicalResult MlirToFlatBufferFileTranslateFunction( - Module module, llvm::StringRef filename) { + ModuleOp module, llvm::StringRef filename) { std::string serialized_flatbuffer; if (tflite::MlirToFlatBufferTranslateFunction( module, &serialized_flatbuffer, emit_builtin_tflite_ops, diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_translate.h b/tensorflow/compiler/mlir/lite/flatbuffer_translate.h index 7a0c60e27b1..820b2697e43 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_translate.h +++ b/tensorflow/compiler/mlir/lite/flatbuffer_translate.h @@ -32,7 +32,7 @@ namespace tflite { // Translates the given MLIR `module` into a FlatBuffer and stores the // serialized flatbuffer into the string. -bool MlirToFlatBufferTranslateFunction(mlir::Module module, +bool MlirToFlatBufferTranslateFunction(mlir::ModuleOp module, std::string *serialized_flatbuffer, bool emit_builtin_tflite_ops, bool emit_select_tf_ops, diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 0b89a85580e..34f588993d5 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -50,6 +50,13 @@ def TFL_Str : Type()">, "TFLite string type">, BuildableType<"getType()">; +//===----------------------------------------------------------------------===// +// TFLite dialect uint8 type - uses the TF uint8 type as implementation +//===----------------------------------------------------------------------===// +def TFL_Uint8 : Type()">, + "TFLite uint8 type">, + BuildableType<"getType()">; + //===----------------------------------------------------------------------===// // Activation function enum definitions. //===----------------------------------------------------------------------===// @@ -77,11 +84,17 @@ def TFL_AFAttr : StrEnumAttr< // These should match the padding enum in TFLite schema. def TFL_PAD_Same : StrEnumAttrCase<"SAME">; def TFL_PAD_Valid : StrEnumAttrCase<"VALID">; +def TFL_MIRRORPAD_Reflect : StrEnumAttrCase<"REFLECT">; +def TFL_MIRRORPAD_Symmetric : StrEnumAttrCase<"SYMMETRIC">; def TFL_PaddingAttr : StrEnumAttr<"Padding", "padding enum", [ TFL_PAD_Same, TFL_PAD_Valid ]>; +def TFL_MirrorPaddingAttr : StrEnumAttr<"Padding", "Mirror pad enum", [ + TFL_MIRRORPAD_Reflect, TFL_MIRRORPAD_Symmetric + ]>; + //===----------------------------------------------------------------------===// // Min-max range pair definitions. //===----------------------------------------------------------------------===// @@ -432,13 +445,13 @@ def TFL_ConcatenationOp : TFL_Op<"concatenation", }]; let arguments = ( - ins Variadic>:$values, + ins Variadic>:$values, I32Attr:$axis, TFL_AFAttr:$fused_activation_function ); let results = (outs - TensorOf<[F32, I64, I32, I16, I8, TFL_QI8]>:$output + TensorOf<[F32, I64, I32, I16, I8, TFL_QI8, TFL_Uint8]>:$output ); let hasOptions = 1; @@ -553,9 +566,8 @@ def TFL_LessEqualOp : TFL_Op<"less_equal", [Broadcastable, NoSideEffect]> { }]; let arguments = ( - // TODO(haoliang): missing Uint8 - ins TensorOf<[F32, I32, I64, I8]>:$lhs, - TensorOf<[F32, I32, I64, I8]>:$rhs); + ins TensorOf<[F32, I32, I64, I8, TFL_Uint8]>:$lhs, + TensorOf<[F32, I32, I64, I8, TFL_Uint8]>:$rhs); let results = (outs TFL_BoolTensor:$output); @@ -665,10 +677,9 @@ def TFL_EqualOp: TFL_Op<"equal", [Commutative, Broadcastable, }]; let arguments = ( - // TODO: missing Uint8 ins - TensorOf<[I1, F32, I32, I64, I8]>:$x, - TensorOf<[I1, F32, I32, I64, I8]>:$y + TensorOf<[I1, F32, I32, I64, I8, TFL_Uint8]>:$x, + TensorOf<[I1, F32, I32, I64, I8, TFL_Uint8]>:$y ); let results = (outs TFL_BoolTensor:$output); @@ -1066,8 +1077,7 @@ def TFL_MeanOp : TFL_Op<"mean", [NoSideEffect]> { }]; let arguments = (ins - // TODO: missing uint8 - TensorOf<[F32, I8, I32, I64]>:$input, + TensorOf<[F32, I8, I32, I64, TFL_Uint8]>:$input, TensorOf<[I32, I64]>:$axis, BoolAttr:$keep_dims ); @@ -1686,9 +1696,10 @@ def TFL_TanhOp: TFL_Op<"tanh", [ Computes element-wise Hyperbolic tangent of input }]; - let arguments = (ins AnyTensor:$x); + // TODO(haoliang): missing Uint8. + let arguments = (ins TensorOf<[F32, I16, I8]>:$x); - let results = (outs AnyTensor:$y); + let results = (outs TensorOf<[F32, I16, I8]>:$y); } def TFL_TileOp: TFL_Op<"tile", [NoSideEffect, @@ -1958,6 +1969,61 @@ def TFL_StridedSliceOp: TFL_Op<"strided_slice", let hasOptions = 1; } +def TFL_CastOp : TFL_Op<"cast", [NoSideEffect, SameOperandsAndResultShape]> { + let summary = "Cast operator"; + + let description = [{ + Casts input from input type to output type. + }]; + + // TODO(b/135538711): Add complex types here. + let arguments = (ins + TensorOf<[F32, I1, I32, I64]>:$input + ); + + let results = (outs TensorOf<[F32, I1, I32, I64]>:$output); + + // TFLite's cast op does not utilize CastOptions, instead derives types + // from the TfLiteTensors. + let hasOptions = 0; +} + + +def TFL_MirrorPadOp: TFL_Op<"mirror_pad", [ + NoSideEffect, TFL_OperandHasRank<1, 2>]> { + let summary = "MirrorPad Operator. Pads a tensor with mirrored values."; + + let description = [{ + This operation pads a input with mirrored values according to the paddings + you specify. paddings is an integer tensor with shape [n, 2], + where n is the rank of input. + For each dimension D of input, paddings[D, 0] indicates how many values + to add before the contents of input in that dimension, + and paddings[D, 1] indicates how many values to add after the contents of + input in that dimension. + + Both paddings[D, 0] and paddings[D, 1] must be no greater than + input.dim_size(D) (or input.dim_size(D) - 1) + if copy_border is true (if false, respectively). + + The padded size of each dimension D of the output is: + + paddings(D, 0) + input.dim_size(D) + paddings(D, 1) + }]; + + let arguments = (ins + // TODO: add uint8 support when ready. + TensorOf<[F32, I32, I64]>:$input, + TensorOf<[I32, I64]>:$pad, + TFL_MirrorPaddingAttr:$mode + ); + + let results = (outs + TensorOf<[F32, I32, I64]>:$output + ); + + let hasOptions = 1; +} //===----------------------------------------------------------------------===// // Quantization ops. diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_traits.h b/tensorflow/compiler/mlir/lite/ir/tfl_traits.h index c9174dfdd07..807c1100b71 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_traits.h +++ b/tensorflow/compiler/mlir/lite/ir/tfl_traits.h @@ -75,7 +75,7 @@ class FixedResultUniformScale { Builder builder(op->getContext()); IntegerType storage_type = builder.getIntegerType(BitWidth); const double scale = static_cast(ScaleMantissa) * - ::exp10(static_cast(ScaleExp)); + ::pow(10.0, static_cast(ScaleExp)); return UniformQuantizedType::getChecked( Sign, storage_type, result_type.getElementType(), scale, ZeroPoint, StorageTypeMin, StorageTypeMax, builder.getUnknownLoc()); diff --git a/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc b/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc index d65e17e0683..ff27ad76136 100644 --- a/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc +++ b/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc @@ -31,6 +31,7 @@ limitations under the License. #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include "mlir/IR/Function.h" // TF:local_config_mlir #include "mlir/IR/MLIRContext.h" // TF:local_config_mlir #include "mlir/IR/Module.h" // TF:local_config_mlir #include "mlir/Parser.h" // TF:local_config_mlir @@ -98,7 +99,7 @@ int main(int argc, char** argv) { if (!module) return 1; // TODO(jpienaar): Expand to support inputs. - mlir::Function main = module->getNamedFunction("main"); + mlir::FuncOp main = module->lookupSymbol("main"); QCHECK(main) << "No 'main' function specified."; if (main.getType().getNumInputs() != 0) LOG(QFATAL) << "NYI: Only nullary functions supported."; diff --git a/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc index 68521016f51..cc03445bd1c 100644 --- a/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc @@ -43,6 +43,8 @@ DataType ConvertIODataTypeToDataType(toco::IODataType dtype) { return DT_INT64; case toco::IODataType::STRING: return DT_STRING; + case toco::IODataType::BOOL: + return DT_BOOL; default: return DT_INVALID; } diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index 53dcc83feda..6b29869dde5 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -829,3 +829,39 @@ func @slice1Tensor(%arg0: tensor<2x3x5xf32>, %arg1: tensor<3xi32>, %arg2: tensor // CHECK-LABEL: slice1Tensor // CHECK: "tfl.slice"(%arg0, %arg1, %arg2) : (tensor<2x3x5xf32>, tensor<3xi32>, tensor<3xi32>) -> tensor } + +func @mirror_pad(tensor<2x1x3xf32>, tensor<3x2xi32>) -> tensor { +^bb0(%arg0: tensor<2x1x3xf32>, %arg1: tensor<3x2xi32>): + %0 = "tf.MirrorPad"(%arg0, %arg1) { mode = "SYMMETRIC" }: (tensor<2x1x3xf32>, tensor<3x2xi32>) -> tensor + return %0#0 : tensor + + // CHECK-LABEL: mirror_pad + // CHECK: %0 = "tfl.mirror_pad"(%arg0, %arg1) {mode = "SYMMETRIC"} : (tensor<2x1x3xf32>, tensor<3x2xi32>) -> tensor + // CHECK: return %0 : tensor +} + +func @mirror_pad_reflect(tensor<2x1x3xf32>, tensor<3x2xi32>) -> tensor { +^bb0(%arg0: tensor<2x1x3xf32>, %arg1: tensor<3x2xi32>): + %0 = "tf.MirrorPad"(%arg0, %arg1) { mode = "REFLECT" }: (tensor<2x1x3xf32>, tensor<3x2xi32>) -> tensor + return %0#0 : tensor + + // CHECK-LABEL: mirror_pad_reflect + // CHECK: %0 = "tfl.mirror_pad"(%arg0, %arg1) {mode = "REFLECT"} : (tensor<2x1x3xf32>, tensor<3x2xi32>) -> tensor + // CHECK: return %0 : tensor +} + +func @Tanh(%arg0: tensor<1xf32>) -> tensor<1xf32> { + %2 = "tf.Tanh"(%arg0) : (tensor<1xf32>) -> tensor<1xf32> + return %2: tensor<1xf32> + +// CHECK-LABEL: Tanh +// CHECK: %0 = "tfl.tanh"(%arg0) : (tensor<1xf32>) -> tensor<1xf32> +} + +func @cast(%arg0: tensor<1x2x2x5xi32>) -> tensor<1x2x2x5xf32> { + %0 = "tf.Cast"(%arg0) : (tensor<1x2x2x5xi32>) -> tensor<1x2x2x5xf32> + return %0 : tensor<1x2x2x5xf32> + + // CHECK-LABEL: cast + // CHECK: "tfl.cast"(%arg0) : (tensor<1x2x2x5xi32>) -> tensor<1x2x2x5xf32> +} diff --git a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/simple.mlir b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/simple.mlir index d823216304e..eec837bc62e 100644 --- a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/simple.mlir +++ b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/simple.mlir @@ -1,6 +1,7 @@ // RUN: flatbuffer_translate -mlir-to-tflite-flatbuffer %s -o - | flatbuffer_to_string - | FileCheck --dump-input-on-failure %s -func @main(tensor<3x2xi32>) -> tensor<3x2xi32> { +func @main(tensor<3x2xi32>) -> tensor<3x2xi32> + attributes {tf.entry_function = {inputs = "input", outputs = "SameNameAsOutput"}} { ^bb0(%arg0: tensor<3x2xi32>): // CHECK: { // CHECK-NEXT: version: 3, @@ -14,7 +15,7 @@ func @main(tensor<3x2xi32>) -> tensor<3x2xi32> { // CHECK-NEXT: shape: [ 3, 2 ], // CHECK-NEXT: type: INT32, // CHECK-NEXT: buffer: 1, -// CHECK-NEXT: name: "Input", +// CHECK-NEXT: name: "input", // CHECK-NEXT: quantization: { // CHECK-EMPTY: // CHECK-NEXT: } @@ -38,7 +39,7 @@ func @main(tensor<3x2xi32>) -> tensor<3x2xi32> { // CHECK-NEXT: shape: [ ], // CHECK-NEXT: type: INT32, // CHECK-NEXT: buffer: 4, -// CHECK-NEXT: name: "Const2", +// CHECK-NEXT: name: "SameNameAsOutput1", // CHECK-NEXT: quantization: { // CHECK-EMPTY: // CHECK-NEXT: } @@ -46,7 +47,7 @@ func @main(tensor<3x2xi32>) -> tensor<3x2xi32> { // CHECK-NEXT: shape: [ ], // CHECK-NEXT: type: INT32, // CHECK-NEXT: buffer: 5, -// CHECK-NEXT: name: "add", +// CHECK-NEXT: name: "SameNameAsOutput", // CHECK-NEXT: quantization: { // CHECK-EMPTY: // CHECK-NEXT: } @@ -90,7 +91,7 @@ func @main(tensor<3x2xi32>) -> tensor<3x2xi32> { %0 = "tfl.pseudo_input" (%arg0) : (tensor<3x2xi32>) -> tensor<3x2xi32> loc("Input") %1 = "tfl.pseudo_const" () {value = dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32>} : () -> tensor<3x2xi32> loc("Const") %2 = "tfl.sub" (%0, %1) {fused_activation_function = "RELU6"} : (tensor<3x2xi32>, tensor<3x2xi32>) -> tensor<3x2xi32> loc("sub") - %3 = "std.constant" () {value = dense<10> : tensor} : () -> tensor loc("Const2") + %3 = "std.constant" () {value = dense<10> : tensor} : () -> tensor loc("SameNameAsOutput") %4 = "tfl.add" (%3, %2) {fused_activation_function = "NONE"} : (tensor, tensor<3x2xi32>) -> tensor<3x2xi32> loc("add") return %4 : tensor<3x2xi32> } diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 1eb3f5f48d6..88b137efadb 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -817,7 +817,7 @@ func @testConcatInvalidOutputElementalType(%arg0: tensor<2xi32>, %arg1: tensor<2 // ----- func @testConcatInvalidStorageType(%arg0: tensor<2x!quant.uniform>, %arg1: tensor<2x!quant.uniform>) -> tensor<2x2x!quant.uniform> { - // expected-error @+1 {{'tfl.concatenation' op operand #0 must be tensor of 32-bit float or 64-bit integer or 32-bit integer or 16-bit integer or 8-bit integer or quantized type with 8 bits storage type values}} + // expected-error @+1 {{'tfl.concatenation' op operand #0 must be tensor of 32-bit float or 64-bit integer or 32-bit integer or 16-bit integer or 8-bit integer or quantized type with 8 bits storage type or TFLite uint8 type values}} %0 = "tfl.concatenation"(%arg0, %arg1) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<2x!quant.uniform>, tensor<2x!quant.uniform>) -> tensor<2x2x!quant.uniform> return %0 : tensor<2x2x!quant.uniform> } diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc index feeb91d3a04..9656abb1611 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc @@ -20,6 +20,7 @@ limitations under the License. #include "llvm/Support/SourceMgr.h" #include "llvm/Support/ToolOutputFile.h" #include "mlir/IR/Diagnostics.h" // TF:local_config_mlir +#include "mlir/IR/Function.h" // TF:local_config_mlir #include "mlir/IR/MLIRContext.h" // TF:local_config_mlir #include "mlir/IR/Module.h" // TF:local_config_mlir #include "mlir/Support/FileUtilities.h" // TF:local_config_mlir @@ -32,8 +33,9 @@ limitations under the License. #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/stream_executor/lib/statusor.h" +using mlir::FuncOp; using mlir::MLIRContext; -using mlir::Module; +using mlir::ModuleOp; using stream_executor::port::StatusOr; using tensorflow::Status; @@ -47,7 +49,7 @@ static llvm::cl::opt print_function_result_mapping( enum TranslationStatus { kTrSuccess, kTrFailure }; static int PrintFunctionResultMapping(const std::string &result, - Module module) { + ModuleOp module) { // Build model from the resultant string to extract the return values from // their source of truth. auto model = @@ -83,7 +85,7 @@ static int PrintFunctionResultMapping(const std::string &result, std::cout << '\'' << subgraph_name << "' outputs:\n"; mlir::Operation *terminator = nullptr; if (subgraph->name()) { - if (auto fn = module.getNamedFunction(subgraph->name()->str())) + if (auto fn = module.lookupSymbol(subgraph->name()->str())) terminator = fn.back().getTerminator(); } i = 0; diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc index 759f93c94a3..afad51b7218 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc @@ -34,7 +34,7 @@ limitations under the License. namespace tensorflow { using mlir::MLIRContext; -using mlir::Module; +using mlir::ModuleOp; using mlir::OwningModuleRef; using stream_executor::port::StatusOr; @@ -87,8 +87,8 @@ StatusOr LoadFromGraphdefOrMlirSource( context); } -bool ShouldRunQuantizePasses(mlir::Module m) { - if (mlir::Function main_fn = m.getNamedFunction("main")) { +bool ShouldRunQuantizePasses(mlir::ModuleOp m) { + if (mlir::FuncOp main_fn = m.lookupSymbol("main")) { return main_fn.getAttrOfType("tf.quantize") != mlir::Attribute(); } @@ -100,6 +100,16 @@ void AddTFToTFLConversionPasses(bool emit_builtin_tflite_ops, bool run_quantize, bool lower_tensor_list_ops, mlir::PassManager *pass_manager) { pass_manager->addPass(mlir::TFControlFlow::CreateRaiseTFControlFlowPass()); + + if (lower_tensor_list_ops) { + // Execute this pass before `CanonicalizerPass` in case some TensorList + // ops are constant folded into variant types. + // TODO(b/137125056): Move this pass after `CanonicalizerPass` after we + // handle constant ops that produce `TensorList`. + // TODO(haoliang): Add this pass by default. + pass_manager->addPass(mlir::TFL::CreateLowerStaticTensorListPass()); + } + // TODO(jpienaar): Revise post dialect constants. pass_manager->addPass(mlir::TF::CreateDecodeConstantPass()); // Canonicalization includes const folding, which is utilized here to optimize @@ -112,10 +122,6 @@ void AddTFToTFLConversionPasses(bool emit_builtin_tflite_ops, bool run_quantize, if (emit_builtin_tflite_ops) { // Prepare for TFLite dialect, rerun canonicalization, and then legalize to // the TFLite dialect. - // TODO(haoliang): Add this pass by default. - if (lower_tensor_list_ops) { - pass_manager->addPass(mlir::TFL::CreateLowerStaticTensorListPass()); - } pass_manager->addPass(mlir::TFL::CreatePrepareTFPass()); pass_manager->addPass(mlir::createCanonicalizerPass()); pass_manager->addPass(mlir::TFL::CreateLegalizeTFPass()); @@ -132,7 +138,7 @@ void AddTFToTFLConversionPasses(bool emit_builtin_tflite_ops, bool run_quantize, } Status ConvertTFControlFlowToTFLOrFlatbuffer( - mlir::Module module, bool export_to_mlir, bool emit_builtin_tflite_ops, + mlir::ModuleOp module, bool export_to_mlir, bool emit_builtin_tflite_ops, bool emit_select_tf_ops, bool emit_custom_ops, bool emit_quant_adaptor_ops, bool lower_tensor_list_ops, std::string *result) { mlir::StatusScopedDiagnosticHandler statusHandler(module.getContext(), diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h index e4e9ce3ba00..68ab674872f 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h @@ -46,7 +46,7 @@ LoadFromGraphdefOrMlirSource( // attribute "tf.quantize" by the importer module. // TODO(fengliuai): switch to the cmd flag once the flags are moved to this // file with main method. -bool ShouldRunQuantizePasses(mlir::Module m); +bool ShouldRunQuantizePasses(mlir::ModuleOp m); // Add the MLIR passes that convert TF control flow dialect to TF Lite dialect // to a MLIR `pass_manager`. These passes first raise the control flow in the TF @@ -69,7 +69,7 @@ void AddTFToTFLConversionPasses(bool emit_builtin_tflite_ops, bool run_quantize, // main function, Quantization is applied. If `export_to_mlir` is true, the // result is exported in MLIR text format, otherwise exported in flat buffer. Status ConvertTFControlFlowToTFLOrFlatbuffer( - mlir::Module module, bool export_to_mlir, bool emit_builtin_tflite_ops, + mlir::ModuleOp module, bool export_to_mlir, bool emit_builtin_tflite_ops, bool emit_select_tf_ops, bool emit_custom_ops, bool emit_quant_adaptor_ops, bool lower_tensor_list_ops, std::string* result); } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td index 0e7534bb513..ab4c8cb5ee7 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td @@ -131,6 +131,7 @@ def : Pat<(TF_SinOp F32Tensor:$arg), (TFL_SinOp $arg)>; def : Pat<(TF_SliceOp $input, $begin, $size), (TFL_SliceOp $input, $begin, $size)>; def : Pat<(TF_SoftmaxOp $arg), (TFL_SoftmaxOp $arg, ConstF32Attr<"1.0">)>; def : Pat<(TF_SqueezeOp $arg, $squeeze_dims), (TFL_SqueezeOp $arg, $squeeze_dims)>; +def : Pat<(TF_TanhOp $arg), (TFL_TanhOp $arg)>; def : Pat<(TF_TransposeOp $arg, $perm), (TFL_TransposeOp $arg, $perm)>; def : Pat<(TF_ZerosLikeOp $arg), (TFL_ZerosLikeOp $arg)>; @@ -228,18 +229,25 @@ def : Pat<(TF_MeanOp $arg0, $arg1, BoolAttr:$arg2), (TFL_MeanOp $arg0, $arg1, $a def : Pat<(TF_SumOp $arg, $axes, BoolAttr:$arg2), (TFL_SumOp $arg, $axes, $arg2)>; +// TopK in TFL is always sorted so we ignore that attribute here. +def : Pat<(TF_TopKV2Op $input, $k, $ignored_sorted), (TFL_TopKV2Op $input, $k)>; + def : Pat<(TF_MinOp $arg0, $arg1, BoolAttr:$arg2), (TFL_ReduceMinOp $arg0, $arg1, $arg2)>; def : Pat<(TF_MaxOp $arg0, $arg1, BoolAttr:$arg2), (TFL_ReduceMaxOp $arg0, $arg1, $arg2)>; def : Pat<(TF_ProdOp $arg0, $arg1, BoolAttr:$arg2), (TFL_ReduceProdOp $arg0, $arg1, $arg2)>; +def : Pat<(TF_CastOp $arg0, BoolAttr:$arg1), (TFL_CastOp $arg0)>; + def : Pat<(TF_BatchToSpaceNDOp $input, $block_shape, $crops), (TFL_BatchToSpaceNdOp $input, $block_shape, $crops)>; def : Pat<(TF_SpaceToBatchNDOp $input, $block_shape, $paddings), (TFL_SpaceToBatchNdOp $input, $block_shape, $paddings)>; def : Pat<(TF_ResizeBilinearOp $images, $size, $align_corners, ConstBoolAttrFalse:$half_pixel_centers), (TFL_ResizeBilinearOp $images, $size, $align_corners)>; +def : Pat<(TF_MirrorPadOp $arg0, $arg1, $cst), (TFL_MirrorPadOp $arg0, $arg1, $cst)>; + def : Pat< (TF_StridedSliceOp $input, $begin, $end, $strides, $begin_mask, $end_mask, $ellipsis_mask, $new_axis_mask, $shrink_axis_mask), (TFL_StridedSliceOp $input, $begin, $end, $strides, diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 5be5cfec497..05a604dc461 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -72,7 +72,6 @@ DECL_CONVERT_OP(MatMul); DECL_CONVERT_OP(Pack); DECL_CONVERT_OP(Split); DECL_CONVERT_OP(SplitV); -DECL_CONVERT_OP(TopKV2); DECL_CONVERT_OP(Unpack); #undef DECL_CONVERT_OP @@ -207,14 +206,6 @@ PatternMatchResult ConvertTFSplitVOp::matchAndRewrite( return matchSuccess(); } -PatternMatchResult ConvertTFTopKV2Op::matchAndRewrite( - Operation* op, PatternRewriter& rewriter) const { - // TopK in TFL is always sorted so we ignore that attribute here. - rewriter.replaceOpWithNewOp(op, op->getOperand(0), - op->getOperand(1)); - return matchSuccess(); -} - PatternMatchResult ConvertTFUnpackOp::matchAndRewrite( Operation* op, PatternRewriter& rewriter) const { auto tf_unpack_op = cast(op); @@ -239,7 +230,7 @@ void LegalizeTF::runOnFunction() { populateWithGenerated(ctx, &patterns); RewriteListBuilder::build(patterns, ctx); applyPatternsGreedily(func, std::move(patterns)); } diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc index 266aeb3de0d..39e89f53423 100644 --- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc +++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc @@ -61,7 +61,7 @@ namespace { class TensorListPatternRewriter : public PatternRewriter { public: - explicit TensorListPatternRewriter(Function fn) + explicit TensorListPatternRewriter(FuncOp fn) : PatternRewriter(fn.getBody()) {} Operation *createOperation(const OperationState &state) override { @@ -77,7 +77,7 @@ struct LowerStaticTensorListPass void runOnModule() override; // Apply type and op changes within a function. - LogicalResult RewriteFunction(Function func, + LogicalResult RewriteFunction(FuncOp func, TensorListPatternRewriter *rewriter); // Changes the function type of `cond_func` and `body_func`, and the result @@ -276,8 +276,8 @@ LogicalResult LowerStaticTensorListPass::UpdateWhileFunctionType( auto *context = &getContext(); auto module = getModule(); - Function cond_func = module.getNamedFunction(while_op->getCond()); - Function body_func = module.getNamedFunction(while_op->getBody()); + FuncOp cond_func = module.lookupSymbol(while_op->getCond()); + FuncOp body_func = module.lookupSymbol(while_op->getBody()); if (cond_func) { // Change `cond_func`'s argument types to `unranked_argument_types`. @@ -327,7 +327,7 @@ LogicalResult LowerStaticTensorListPass::UpdateWhileFunctionType( } LogicalResult LowerStaticTensorListPass::RewriteFunction( - Function func, TensorListPatternRewriter *rewriter) { + FuncOp func, TensorListPatternRewriter *rewriter) { auto *context = &getContext(); for (Block &block : func) { @@ -388,7 +388,7 @@ void LowerStaticTensorListPass::runOnModule() { // have a potential issue when one function taking a `DT_VARIANT` is processed // before the function that produces the `DT_VARIANT`. We need to carefully // order the functions to be processed. - std::vector funcs_in_module; + std::vector funcs_in_module; for (auto func : getModule().getOps()) { // Always place the main function to be the first in the list. if (func.getName() == "main") { diff --git a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc index f0f1d3f822c..94c19d27adc 100644 --- a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc @@ -48,7 +48,7 @@ class PostQuantizePass : public FunctionPass { bool emit_quant_adaptor_ops_; }; -void RemoveQuantizationAdaptorOps(Function func) { +void RemoveQuantizationAdaptorOps(FuncOp func) { mlir::OpBuilder builder(func.getBody()); auto& bb = func.getBlocks().front(); auto* terminator = bb.getTerminator(); diff --git a/tensorflow/compiler/mlir/lite/transforms/quantize.cc b/tensorflow/compiler/mlir/lite/transforms/quantize.cc index 7d98487ded2..6e7d060cca5 100644 --- a/tensorflow/compiler/mlir/lite/transforms/quantize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/quantize.cc @@ -50,52 +50,19 @@ struct QuantizePass : public FunctionPass { #include "tensorflow/compiler/mlir/lite/transforms/generated_quantize.inc" -struct QuantizeConcatOp : public RewritePattern { - explicit QuantizeConcatOp(MLIRContext* context) - : RewritePattern(QuantizeOp::getOperationName(), 1, context) {} - - PatternMatchResult matchAndRewrite(Operation* op, - PatternRewriter& rewriter) const override; -}; - -PatternMatchResult mlir::TFL::QuantizeConcatOp::matchAndRewrite( - Operation* op, PatternRewriter& rewriter) const { - auto quantize_op = cast(op); - auto concat_op = - dyn_cast_or_null(quantize_op.input()->getDefiningOp()); - if (!concat_op) { - return matchFailure(); - } - - SmallVector values; - values.reserve(concat_op.getNumOperands()); - for (auto operand : concat_op.values()) { - if (auto opInst = - dyn_cast_or_null(operand->getDefiningOp())) { - values.push_back(opInst.input()); - } else { - return matchFailure(); - } - } - rewriter.replaceOpWithNewOp( - op, quantize_op.output()->getType(), values, - rewriter.getI32IntegerAttr(concat_op.axis().getZExtValue()), - rewriter.getStringAttr(concat_op.fused_activation_function())); - return matchSuccess(); -} - void QuantizePass::runOnFunction() { OwningRewritePatternList patterns; auto func = getFunction(); auto* ctx = func.getContext(); TFL::populateWithGenerated(ctx, &patterns); - mlir::RewriteListBuilder::build(patterns, ctx); + mlir::RewriteListBuilder>::build(patterns, ctx); applyPatternsGreedily(func, std::move(patterns)); } } // namespace // Creates an instance of the TensorFlow Lite dialect QuantizeTFL pass. -FunctionPassBase *CreateQuantizePass() { return new QuantizePass(); } +FunctionPassBase* CreateQuantizePass() { return new QuantizePass(); } static PassRegistration pass( "tfl-quantize", "Apply quantization on models in TensorFlow Lite dialect"); diff --git a/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td index 756fae3a4cd..7fcf926d89f 100644 --- a/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td @@ -22,10 +22,6 @@ include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" // Quantize attribute $0 by using quantization parameter from %1. def QuantizeByQuantizedType : NativeCodeCall<"Quantize($0, $1.getValue())">; -// Call the generic builder of `op`. Use the result type of $0 in the new op. -class ReplaceWith : NativeCodeCall<"$_builder.create<" # op # - ">($0->getLoc(), $0->getResult(0)->getType(), $1, $2, $3)">; - // Squash tfl.dequantize and tfl.quantize pairs. // TODO(fengliuai): Compare the scale of input and output. This can also be // squashed to a requantize op if the scales are different. @@ -39,98 +35,3 @@ def : Pat<(TFL_QuantizeOp (TFL_QConstOp $qtype, (QuantizeByQuantizedType $value, $qtype))>; - -// Quantize the AddOp if both inputs are dequantized and the output is -// quantized. -def : Pat<(TFL_QuantizeOp:$q - (TFL_AddOp (TFL_DequantizeOp $lhs), (TFL_DequantizeOp $rhs), - $fused_activation_function), - $output_type), - (ReplaceWith<"TFL::AddOp"> $q, $lhs, $rhs, - $fused_activation_function)>; - -// Quantize the Conv2DOp if the input and weight are dequantized. The scale of -// the bias input is determined by the scales of input and weight operands. -def : Pat<(TFL_QuantizeOp - (TFL_Conv2DOp - (TFL_DequantizeOp $in), - (TFL_DequantizeOp $weight), - (TFL_DequantizeOp $bias), - $dilation_h_factor, - $dilation_w_factor, - $fused_activation_function, - $padding, - $stride_h, - $stride_w), - $output_type), - (TFL_Conv2DOp - $in, - $weight, - $bias, - $dilation_h_factor, - $dilation_w_factor, - $fused_activation_function, - $padding, - $stride_h, - $stride_w)>; - -// Quantize the DepthwiseConv2DOp if the input and weight are dequantized. The -// scale of the bias input is determined by the scales of input and weight -// operands. -def : Pat<(TFL_QuantizeOp - (TFL_DepthwiseConv2DOp - (TFL_DequantizeOp $in), - (TFL_DequantizeOp $weight), - (TFL_DequantizeOp $bias), - $dilation_h_factor, - $dilation_w_factor, - $fused_activation_function, - $padding, - $stride_h, - $stride_w, - $multiplier), - $output_type), - (TFL_DepthwiseConv2DOp - $in, - $weight, - $bias, - $dilation_h_factor, - $dilation_w_factor, - $fused_activation_function, - $padding, - $stride_h, - $stride_w, - $multiplier)>; - -// Quantize the ReshapeOp if the input is dequantized and output is quantized. -// The pre-quantize pass can guarantee both quantization parameters are the -// same. -def : Pat<(TFL_QuantizeOp (TFL_ReshapeOp (TFL_DequantizeOp $in)), $output_type), - (TFL_ReshapeOp $in)>; - -// Quantize the ReshapeOp if the input is dequantized and output is quantized. -// The pre-quantize pass has set the output quantization parameters to a -// pre-defined value. -def : Pat<(TFL_QuantizeOp (TFL_SoftmaxOp (TFL_DequantizeOp $in), $beta), - $output_type), - (TFL_SoftmaxOp $in, $beta)>; - -// Quantize the AveragePool2DOp if the input is dequantized and output is -// quantized. The pre-quantize pass can guarantee both quantization parameters -// are the same. -def : Pat<(TFL_QuantizeOp (TFL_AveragePool2DOp (TFL_DequantizeOp $in), - $filter_height, $filter_width, $fused_activation_function, - $padding, $stride_h, $stride_w), $output_type), - (TFL_AveragePool2DOp $in, - $filter_height, $filter_width, $fused_activation_function, - $padding, $stride_h, $stride_w)>; - -// Quantize the MaxPool2DOp if the input is dequantized and output is -// quantized. The pre-quantize pass can guarantee both quantization parameters -// are the same. -def : Pat<(TFL_QuantizeOp (TFL_MaxPool2DOp (TFL_DequantizeOp $in), - $padding, $stride_w, $tride_h, $stride_width, $stride_height, - $fused_activation_function), $output_type), - (TFL_MaxPool2DOp $in, - $padding, $stride_w, $tride_h, $stride_width, $stride_height, - $fused_activation_function)>; diff --git a/tensorflow/compiler/mlir/lite/utils/quantization_driver.cc b/tensorflow/compiler/mlir/lite/utils/quantization_driver.cc index 7ceb0f5c86e..d78aa92f36c 100644 --- a/tensorflow/compiler/mlir/lite/utils/quantization_driver.cc +++ b/tensorflow/compiler/mlir/lite/utils/quantization_driver.cc @@ -25,6 +25,7 @@ limitations under the License. #include "mlir/Dialect/QuantOps/QuantTypes.h" // TF:local_config_mlir #include "mlir/IR/Attributes.h" // TF:local_config_mlir #include "mlir/IR/Builders.h" // TF:local_config_mlir +#include "mlir/IR/Function.h" // TF:local_config_mlir #include "mlir/IR/MLIRContext.h" // TF:local_config_mlir #include "mlir/IR/Matchers.h" // TF:local_config_mlir #include "mlir/IR/Operation.h" // TF:local_config_mlir @@ -121,7 +122,7 @@ struct RequantizeState { // class QuantizationDriver { public: - explicit QuantizationDriver(Function fn) : builder_(fn.getBody()) {} + explicit QuantizationDriver(FuncOp fn) : builder_(fn.getBody()) {} // The entry point of the quantization parameters propagation. void Run(); @@ -706,7 +707,7 @@ void QuantizationDriver::Run() { } } -void ApplyQuantizationParamsPropagation(mlir::Function func) { +void ApplyQuantizationParamsPropagation(mlir::FuncOp func) { QuantizationDriver(func).Run(); } diff --git a/tensorflow/compiler/mlir/lite/utils/quantization_utils.h b/tensorflow/compiler/mlir/lite/utils/quantization_utils.h index cee00e6be38..a7b9179e5b4 100644 --- a/tensorflow/compiler/mlir/lite/utils/quantization_utils.h +++ b/tensorflow/compiler/mlir/lite/utils/quantization_utils.h @@ -20,12 +20,66 @@ limitations under the License. #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_QUANTIZATION_UTILS_H_ #include "mlir/Dialect/QuantOps/QuantTypes.h" // TF:local_config_mlir +#include "mlir/IR/BlockAndValueMapping.h" // TF:local_config_mlir +#include "mlir/IR/PatternMatch.h" // TF:local_config_mlir #include "mlir/IR/StandardTypes.h" // TF:local_config_mlir #include "mlir/StandardOps/Ops.h" // TF:local_config_mlir namespace mlir { namespace TFL { +// A generic rewrite pattern which matches any N-in-1-out operations with +// quantization parameters propagated to all the operands and results values. +// The quantization parameters are annotated by the Q/DQ op pairs. Each matched +// pattern are rewritten by its quantized alternatives. +// +// This pattern assumes all the matched ops are quantizable. This assumption is +// always right, except when a "Q" op is used as a requantize op. For non-"Q" +// ops, quantization parameters should be propagated to their result. +// +// This pattern only matches ops which only have one result. +template +struct GenericFullQuantizationPattern : public RewritePattern { + explicit GenericFullQuantizationPattern(MLIRContext* context) + : RewritePattern(Q::getOperationName(), 1, context) {} + + PatternMatchResult matchAndRewrite(Operation* op, + PatternRewriter& rewriter) const override { + if (op->getNumResults() != 1) { + return matchFailure(); + } + auto quantize_op = cast(op); + auto quantized_op = quantize_op.input()->getDefiningOp(); + // If it is a block argument, requantize op, or has more than one result, we + // shouldn't rewrite this op. + if (!quantized_op || llvm::isa(quantized_op) || + llvm::isa(quantized_op) || quantized_op->getNumResults() != 1) { + return matchFailure(); + } + + // Collect all the quantized inputs and "clone" the matched op by these + // inputs. + SmallVector inputs; + inputs.reserve(quantized_op->getNumOperands()); + for (int i = 0, e = quantized_op->getNumOperands(); i != e; ++i) { + auto* operand = quantized_op->getOperand(i); + if (auto op_inst = dyn_cast_or_null(operand->getDefiningOp())) { + inputs.push_back(op_inst.input()); + } else { + return matchFailure(); + } + } + // Use OpBuilder so we can use op name to create the new op. + OpBuilder builder(quantized_op); + OperationState new_state( + quantized_op->getLoc(), quantized_op->getName().getStringRef(), inputs, + op->getResult(0)->getType(), quantized_op->getAttrs()); + Operation* new_op = builder.createOperation(new_state); + rewriter.replaceOp(op, {new_op->getResult(0)}); + return matchSuccess(); + } +}; + // Converts the min/max/storage_type/narrow_range information to a // QuantizedType, and then returns the attribute containing the QuantizedType. TypeAttr GetQuantizedTypeAttr(Builder builder, Type input_type, FloatAttr min, @@ -62,7 +116,7 @@ quant::QuantizedType GetUniformQuantizedTypeForBias( // quantization parameters are stored as adjacent quantize and dequantize ops // and the propagation results are materialized by inserting pairs of quantize // and dequantize ops to this function. -void ApplyQuantizationParamsPropagation(mlir::Function func); +void ApplyQuantizationParamsPropagation(mlir::FuncOp func); } // end namespace TFL } // end namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index add4dbc9e67..464dc7ae345 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -103,6 +103,7 @@ cc_library( "transforms/generated_optimize.inc", "transforms/optimize.cc", "transforms/raise_control_flow.cc", + "translate/control_to_executor_dialect.cc", ], hdrs = [ "ir/control_flow_ops.h", @@ -280,6 +281,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_proto_cc", "@local_config_mlir//:IR", + "@local_config_mlir//:StandardOps", ], ) @@ -471,6 +473,7 @@ cc_library( "@llvm//:support", "@local_config_mlir//:IR", "@local_config_mlir//:Parser", + "@local_config_mlir//:Pass", ], ) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc index a32bd6ceeb7..29d73a71ad9 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc @@ -694,7 +694,7 @@ void Print(NextIterationSinkOp next_iteration, OpAsmPrinter *p) { *p << next_iteration.getOperationName() << " ["; p->printOperand(next_iteration.getOperand(0)); *p << "] "; - p->printOperand(next_iteration.getOperand(1)); + p->printOperands(llvm::drop_begin(next_iteration.getOperands(), 1)); *p << " : " << next_iteration.getOperand(1)->getType(); p->printOptionalAttrDict(next_iteration.getAttrs()); } diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td index c15bbd0cd8d..125ef1bfda6 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td @@ -250,25 +250,6 @@ def TfExecutor_SwitchOp : TfExecutor_Op<"Switch", TfeControlType: $control ); - let builders = [OpBuilder< - "Builder *builder, OperationState *result, ArrayRef operands = {}", - [{ - assert(operands.size() >= 2 && "tf_executor.Switch builder expects at " - "least two operands"); - return build(builder, result, operands[0], operands[1], operands.drop_front(2)); - }]>, - OpBuilder< - "Builder *builder, OperationState *result, Value *data, Value *predicate, ArrayRef controls = {}", - [{ - Type dataTy = data->getType(); - Type controlTy = ControlType::get(builder->getContext()); - result->types = { dataTy, dataTy, controlTy }; - result->operands.push_back(data); - result->operands.push_back(predicate); - result->operands.insert(result->operands.end(), controls.begin(), controls.end()); - }]> - ]; - let verifier = ?; } @@ -311,7 +292,6 @@ def TfExecutor_SwitchNOp : Variadic:$outputs, TfeControlType: $control ); - } def TfExecutor_MergeOp : TfExecutor_Op<"Merge", [NoSideEffect, ControlOperandsAfterAllData]> { @@ -346,18 +326,6 @@ def TfExecutor_MergeOp : TfExecutor_Op<"Merge", [NoSideEffect, ControlOperandsAf TensorOf<[I32]>:$valueIndex, TfeControlType:$control ); - - let builders = [OpBuilder< - "Builder *builder, OperationState *result, ArrayRef operands", - [{ - assert(operands.size() >= 1 && "tf_executor.Merge builder expects at " - "least one operand"); - Type data_type = operands[0]->getType(); - Type control_type = ControlType::get(builder->getContext()); - result->types = { data_type, builder->getIntegerType(32), control_type}; - result->operands.append(operands.begin(), operands.end()); - }]> - ]; } def TfExecutor_EnterOp : TfExecutor_Op<"Enter", @@ -408,19 +376,6 @@ def TfExecutor_EnterOp : TfExecutor_Op<"Enter", ); let verifier = ?; - - let builders = [OpBuilder< - "Builder *builder, OperationState *result, ArrayRef operands", - [{ - assert(operands.size() >= 1 && "tf_executor.Enter builder " - "expects at least one operand"); - result->operands.append(operands.begin(), operands.end()); - - Type control_type = ControlType::get(builder->getContext()); - result->types.push_back(operands[0]->getType()); - result->types.push_back(control_type); - }]> - ]; } def TfExecutor_NextIterationSourceOp : TfExecutor_Op<"NextIteration.Source", [NoSideEffect]> { @@ -472,12 +427,14 @@ def TfExecutor_NextIterationSourceOp : TfExecutor_Op<"NextIteration.Source", [No ); let builders = [OpBuilder< - "Builder *builder, OperationState *result, Type resultTy, ArrayRef controlInputs = {}", + "Builder *builder, OperationState *result, Type result_type, " + "ArrayRef control_inputs = {}, ArrayRef attributes = {}", [{ - Type tokenTy = TokenType::get(builder->getContext()); - Type controlTy = ControlType::get(builder->getContext()); - result->types = { resultTy, tokenTy, controlTy }; - result->operands.append(controlInputs.begin(), controlInputs.end()); + Type token_type = TokenType::get(builder->getContext()); + Type control_type = ControlType::get(builder->getContext()); + result->types = { result_type, token_type, control_type }; + result->operands.append(control_inputs.begin(), control_inputs.end()); + result->attributes.append(attributes.begin(), attributes.end()); }]> ]; } @@ -527,6 +484,19 @@ def TfExecutor_NextIterationSinkOp : TfExecutor_Op<"NextIteration.Sink"> { // Optional extra control inputs. Variadic:$controlInputs ); + + let builders = [OpBuilder< + "Builder *builder, OperationState *result, Value *token, " + "ArrayRef operands, ArrayRef attributes = {}", + [{ + assert(operands.size() >= 1 && "tf_executor.NextIteration.Sink builder " + "expects at least one operand"); + result->operands.push_back(token); + result->operands.insert(result->operands.end(), operands.begin(), + operands.end()); + result->attributes.append(attributes.begin(), attributes.end()); + }]> + ]; } def TfExecutor_ExitOp : TfExecutor_Op<"Exit", @@ -590,6 +560,20 @@ def TfExecutor_ControlTriggerOp : TfExecutor_Op<"ControlTrigger", [NoSideEffect] ); let verifier = ?; + + let builders = [OpBuilder< + "Builder *builder, OperationState *result, " + "ArrayRef operands, ArrayRef attributes = {}", + [{ + assert(operands.size() >= 1 && "tf_executor.ControlTrigger builder " + "expects at least one operand"); + result->operands.insert(result->operands.end(), operands.begin(), + operands.end()); + Type control_type = ControlType::get(builder->getContext()); + result->types = {control_type}; + result->attributes.append(attributes.begin(), attributes.end()); + }]> + ]; } def TfExecutor_LoopCondOp : TfExecutor_Op<"LoopCond", [NoSideEffect]> { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index dda278a6cf0..6b7be4435b4 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -79,6 +79,12 @@ def TF_AddNOp : TF_Op<"AddN", [Commutative, NoSideEffect]> { let summary = "Add all input tensors element wise."; let description = [{ +Inputs must be of same size and shape. + + ```python + x = [9, 7, 10] + tf.math.add_n(x) ==> 26 + ``` }]; let arguments = (ins @@ -467,6 +473,15 @@ def TF_CosOp : TF_Op<"Cos", [NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes cos of x element-wise."; let description = [{ +Given an input tensor, this function computes cosine of every + element in the tensor. Input range is `(-inf, inf)` and + output range is `[-1,1]`. If input lies outside the boundary, `nan` + is returned. + + ```python + x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")]) + tf.math.cos(x) ==> [nan -0.91113025 0.87758255 0.5403023 0.36235774 0.48718765 -0.95215535 nan] + ``` }]; let arguments = (ins @@ -1027,6 +1042,43 @@ Invert (flip) each bit of supported types; for example, type `uint8` value 01010 let description = [{ Flip each bit of supported types. For example, type `int8` (decimal 2) binary 00000010 becomes (decimal -3) binary 11111101. This operation is performed on each element of the tensor argument `x`. + +Example: +```python +import tensorflow as tf +from tensorflow.python.ops import bitwise_ops + +# flip 2 (00000010) to -3 (11111101) +tf.assert_equal(-3, bitwise_ops.invert(2)) + +dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64] + +inputs = [0, 5, 3, 14] +for dtype in dtype_list: + # Because of issues with negative numbers, let's test this indirectly. + # 1. invert(a) and a = 0 + # 2. invert(a) or a = invert(0) + input_tensor = tf.constant([0, 5, 3, 14], dtype=dtype) + not_a_and_a, not_a_or_a, not_0 = [bitwise_ops.bitwise_and( + input_tensor, bitwise_ops.invert(input_tensor)), + bitwise_ops.bitwise_or( + input_tensor, bitwise_ops.invert(input_tensor)), + bitwise_ops.invert( + tf.constant(0, dtype=dtype))] + + expected = tf.constant([0, 0, 0, 0], dtype=tf.float32) + tf.assert_equal(tf.cast(not_a_and_a, tf.float32), expected) + + expected = tf.cast([not_0] * 4, tf.float32) + tf.assert_equal(tf.cast(not_a_or_a, tf.float32), expected) + + # For unsigned dtypes let's also check the result directly. + if dtype.is_unsigned: + inverted = bitwise_ops.invert(input_tensor) + expected = tf.constant([dtype.max - x for x in inputs], dtype=tf.float32) + tf.assert_equal(tf.cast(inverted, tf.float32), tf.cast(expected, tf.float32)) +``` }]; let arguments = (ins @@ -1348,6 +1400,52 @@ def TF_MinimumOp : TF_Op<"Minimum", [Broadcastable, NoSideEffect]>, TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } +def TF_MirrorPadOp : TF_Op<"MirrorPad", [NoSideEffect]> { + let summary = "Pads a tensor with mirrored values."; + + let description = [{ +This operation pads a `input` with mirrored values according to the `paddings` +you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is +the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +how many values to add before the contents of `input` in that dimension, and +`paddings[D, 1]` indicates how many values to add after the contents of `input` +in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater +than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true +(if false, respectively). + +The padded size of each dimension D of the output is: + +`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` + +For example: + +``` +# 't' is [[1, 2, 3], [4, 5, 6]]. +# 'paddings' is [[1, 1]], [2, 2]]. +# 'mode' is SYMMETRIC. +# rank of 't' is 2. +pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2] + [2, 1, 1, 2, 3, 3, 2] + [5, 4, 4, 5, 6, 6, 5] + [5, 4, 4, 5, 6, 6, 5]] +``` + }]; + + let arguments = (ins + TF_Tensor:$input, + TF_I32OrI64Tensor:$paddings, + + TF_AnyStrAttrOf<["REFLECT", "SYMMETRIC"]>:$mode + ); + + let results = (outs + TF_Tensor:$output + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr Tpaddings = TF_DerivedOperandTypeAttr<1>; +} + def TF_MulOp : TF_Op<"Mul", [Broadcastable, Commutative, NoSideEffect]>, WithBroadcastableBinOpBuilder { let summary = "Returns x * y element-wise."; @@ -2194,9 +2292,17 @@ Specifically, `y = 1 / (1 + exp(-x))`. } def TF_SinOp : TF_Op<"Sin", [NoSideEffect, SameOperandsAndResultType]> { - let summary = "Computes sin of x element-wise."; + let summary = "Computes sine of x element-wise."; let description = [{ +Given an input tensor, this function computes sine of every + element in the tensor. Input range is `(-inf, inf)` and + output range is `[-1,1]`. + + ```python + x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10, float("inf")]) + tf.math.sin(x) ==> [nan -0.4121185 -0.47942555 0.84147096 0.9320391 -0.87329733 -0.54402107 nan] + ``` }]; let arguments = (ins @@ -2591,6 +2697,31 @@ retained with length 1. TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>; } +def TF_TanhOp : TF_Op<"Tanh", [NoSideEffect, SameOperandsAndResultType]> { + let summary = "Computes hyperbolic tangent of `x` element-wise."; + + let description = [{ +Given an input tensor, this function computes hyperbolic tangent of every + element in the tensor. Input range is `[-inf, inf]` and + output range is `[-1,1]`. + + ```python + x = tf.constant([-float("inf"), -5, -0.5, 1, 1.2, 2, 3, float("inf")]) + tf.math.tanh(x) ==> [-1. -0.99990916 -0.46211717 0.7615942 0.8336547 0.9640276 0.9950547 1.] + ``` + }]; + + let arguments = (ins + TF_FpOrComplexTensor:$x + ); + + let results = (outs + TF_FpOrComplexTensor:$y + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_TensorListFromTensorOp : TF_Op<"TensorListFromTensor", [NoSideEffect]> { let summary = [{ Creates a TensorList which, when stacked, has the value of `tensor`. diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index c602e43ef16..c664983bc1b 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -275,18 +275,18 @@ static LogicalResult Verify(FusedBatchNormOp op) { //===----------------------------------------------------------------------===// LogicalResult IfOp::verify() { - auto thenAttr = getAttrOfType("then_branch"); + auto thenAttr = getAttrOfType("then_branch"); if (!thenAttr) return emitOpError("requires then_branch attribute"); - auto elseAttr = getAttrOfType("else_branch"); + auto elseAttr = getAttrOfType("else_branch"); if (!elseAttr) return emitOpError("requires else_branch attribute"); - auto module = getParentOfType(); - auto thenFn = module.getNamedFunction(thenAttr.getValue()); + auto module = getParentOfType(); + auto thenFn = module.lookupSymbol(thenAttr.getValue()); if (!thenFn) return emitOpError("then_branch refers to an undefined function : ") << thenAttr; - auto elseFn = module.getNamedFunction(elseAttr.getValue()); + auto elseFn = module.lookupSymbol(elseAttr.getValue()); if (!elseFn) return emitOpError("else_branch refers to an undefined function : ") << elseAttr; @@ -627,9 +627,10 @@ OpFoldResult ShapeOp::fold(ArrayRef operands) { //===----------------------------------------------------------------------===// static LogicalResult Verify(SoftmaxOp op) { - if (!IsOfRankOrUnranked(op.logits(), 2)) - return op.emitOpError("requires operand to be 2D tensor"); - + if (!IsOfRankOrUnranked(op.logits(), 1) && + !IsOfRankOrUnranked(op.logits(), 2)) { + return op.emitOpError("requires operand to be 1D/2D tensor"); + } return success(); } @@ -727,20 +728,20 @@ void TruncateDivOp::getCanonicalizationPatterns( //===----------------------------------------------------------------------===// LogicalResult WhileOp::verify() { - auto condAttr = getAttrOfType("cond"); + auto condAttr = getAttrOfType("cond"); if (!condAttr) return emitOpError("requires cond attribute"); - auto module = getParentOfType(); - auto condFn = module.getNamedFunction(condAttr.getValue()); + auto module = getParentOfType(); + auto condFn = module.lookupSymbol(condAttr.getValue()); auto condFuncType = condFn.getType(); // Verify that the cond function has exactly one result. if (condFuncType.getNumResults() != 1) return emitOpError("requires cond function to have exactly one result"); - auto bodyAttr = getAttrOfType("body"); + auto bodyAttr = getAttrOfType("body"); if (!bodyAttr) return emitOpError("requires body attribute"); - auto bodyFn = module.getNamedFunction(bodyAttr.getValue()); + auto bodyFn = module.lookupSymbol(bodyAttr.getValue()); auto bodyFuncType = bodyFn.getType(); SmallVector operands(getOperandTypes()); diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h index c9c65a94ea9..dbd37e6ee54 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h @@ -119,11 +119,11 @@ class IfOp : public Op::Impl, // TODO(b/132271680): This is not following Google naming style StringRef getThen() { - return getAttrOfType("then_branch").getValue(); + return getAttrOfType("then_branch").getValue(); } StringRef getElse() { - return getAttrOfType("else_branch").getValue(); + return getAttrOfType("else_branch").getValue(); } LogicalResult verify(); @@ -157,8 +157,12 @@ class WhileOp : public Op("cond").getValue(); } - StringRef getBody() { return getAttrOfType("body").getValue(); } + StringRef getCond() { + return getAttrOfType("cond").getValue(); + } + StringRef getBody() { + return getAttrOfType("body").getValue(); + } LogicalResult verify(); }; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 1ba3ed6db8b..b2fcb01c2d5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -105,8 +105,10 @@ retained with length 1. // In MLIR, the 'tf.Placeholder.input' instruction is used to capture attributes // of function arguments. +// Note: NoSideEffect trait is not added intentionally to preserve the captured +// attributes even if the input is unused. def TF_PlaceholderInputOp : TF_Op<"Placeholder.input", - [NoSideEffect, SameOperandsAndResultType]> { + [SameOperandsAndResultType]> { let summary = "PlaceholderInput op"; let description = [{ diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.def b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.def index 20a58722edf..9f1154b84f1 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.def +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.def @@ -19,6 +19,10 @@ limitations under the License. #ifdef HANDLE_TF_TYPE // class, enumerant, name +HANDLE_TF_TYPE(Uint8, UINT8, "uint8") +HANDLE_TF_TYPE(Uint16, UINT16, "uint16") +HANDLE_TF_TYPE(Uint32, UINT32, "uint32") +HANDLE_TF_TYPE(Uint64, UINT64, "uint64") HANDLE_TF_TYPE(Qint8, QINT8, "qint8") HANDLE_TF_TYPE(Qint16, QINT16, "qint16") HANDLE_TF_TYPE(Qint32, QINT32, "qint32") diff --git a/tensorflow/compiler/mlir/tensorflow/tests/control_to_executor_dialect.mlir b/tensorflow/compiler/mlir/tensorflow/tests/control_to_executor_dialect.mlir new file mode 100644 index 00000000000..b1a9dd71fc7 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/control_to_executor_dialect.mlir @@ -0,0 +1,86 @@ +// RUN: tf-opt -tf-control-to-executor-conversion %s | FileCheck %s --dump-input=fail + + +// CHECK-LABEL: func @islands_with_control +// CHECK-SAME: (%[[ARG0:[a-z0-9]*]]: tensor<*xf32>) +func @islands_with_control(tensor<*xf32>) -> tensor<*xf32> { +^bb0(%0: tensor<*xf32>): + %1:2 = "_tf.Identity"(%0) : (tensor<*xf32>) -> (tensor<*xf32>, !_tf.control) + %2 = "_tf.Add"(%0, %0, %1#1) : (tensor<*xf32>, tensor<*xf32>, !_tf.control) -> tensor<*xf32> + return %2 : tensor<*xf32> +} + +// CHECK-NEXT: %[[GRAPH:[0-9]*]] = tf_executor.graph { +// CHECK-NEXT: %[[IDENTITY:[0-9]*]]:2 = tf_executor.island { +// CHECK-NEXT: %{{[0-9]*}} = "tf.Identity"(%[[ARG0]]) : (tensor<*xf32>) -> tensor<*xf32> +// CHECK-NEXT: tf_executor.yield %{{[0-9]*}} : tensor<*xf32> +// CHECK-NEXT: } +// CHECK-NEXT: %[[ADD:[0-9]*]]:2 = tf_executor.island(%[[IDENTITY]]#1) { +// CHECK-NEXT: %{{[0-9]*}} = "tf.Add"(%[[ARG0]], %[[ARG0]]) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> +// CHECK-NEXT: tf_executor.yield %{{[0-9]*}} : tensor<*xf32> +// CHECK-NEXT: } +// CHECK-NEXT: tf_executor.fetch %[[ADD]]#0 : tensor<*xf32> +// CHECK-NEXT: } +// CHECK-NEXT: return %[[GRAPH]] : tensor<*xf32> + +// CHECK-LABEL: func @LoopTest() { + +func @LoopTest() { + %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> (tensor, !_tf.control) + %1:2 = "_tf.Enter"(%0#0) {T = "tfdtype$DT_INT32", device = "", frame_name = "while/while_context", is_constant = false, name = "while/Enter", parallel_iterations = 10 : i64} : (tensor) -> (tensor<*xi32>, !_tf.control) + %2 = "_tf.NoOp"() {device = "", name = "cluster/pivot"} : () -> !_tf.control + %3:2 = "_tf.NextIteration.source"() {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} : () -> (tensor<*xi32>, !_tf.control) + %4:3 = "_tf.Merge"(%3#0, %1#0) {N = 2 : i64, T = "tfdtype$DT_INT32", device = "", name = "while/Merge"} : (tensor<*xi32>, tensor<*xi32>) -> (tensor<*xi32>, tensor, !_tf.control) + %5:2 = "_tf.Const"(%4#2) {device = "", dtype = "tfdtype$DT_INT32", name = "while/Less/y", value = dense<2> : tensor} : (!_tf.control) -> (tensor, !_tf.control) + %6:2 = "_tf.Less"(%4#0, %5#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Less"} : (tensor<*xi32>, tensor) -> (tensor<*xi1>, !_tf.control) + %7:2 = "_tf.LoopCond"(%6#0) {device = "", name = "while/LoopCond"} : (tensor<*xi1>) -> (tensor, !_tf.control) + %8:3 = "_tf.Switch"(%4#0, %7#0) {T = "tfdtype$DT_INT32", _class = ["loc = @while/Merge"], device = "", name = "while/Switch"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, tensor<*xi32>, !_tf.control) + %9:2 = "_tf.Exit"(%8#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Exit"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) + %10:2 = "_tf.Identity"(%8#1) {T = "tfdtype$DT_INT32", device = "", name = "while/Identity"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) + %11:2 = "_tf.Const"(%10#1) {device = "", dtype = "tfdtype$DT_INT32", name = "while/Add/y", value = dense<3> : tensor} : (!_tf.control) -> (tensor, !_tf.control) + %12:2 = "_tf.Add"(%10#0, %11#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Add"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, !_tf.control) + %13 = "_tf.ControlTrigger"(%2, %12#1, %9#1) {_tpu_replicate = "cluster", device = "", name = "gradients/while/mul_2_Da30D05wlPU_grad/SymbolicGradient/b_sync"} : (!_tf.control, !_tf.control, !_tf.control) -> !_tf.control + %14 = "_tf.NextIteration.sink"(%12#0, %13) {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} : (tensor<*xi32>, !_tf.control) -> (!_tf.control) + return +} + +// CHECK-NEXT: tf_executor.graph { +// CHECK-NEXT: %[[CONST:[0-9]*]]:2 = tf_executor.island { +// CHECK-NEXT: %{{[a-z0-9]*}} = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> tensor +// CHECK-NEXT: tf_executor.yield %{{[a-z0-9]*}} : tensor +// CHECK-NEXT: } +// CHECK-NEXT: %[[ENTER:[0-9]*]]:2 = tf_executor.Enter %[[CONST]]#0 frame "while/while_context" : (tensor) -> (tensor<*xi32>, !tf_executor.control) {T = "tfdtype$DT_INT32", device = "", name = "while/Enter"} +// CHECK-NEXT: %[[NOOP:[0-9]*]] = tf_executor.island { +// CHECK-NEXT: "tf.NoOp"() {device = "", name = "cluster/pivot"} : () -> () +// CHECK-NEXT: tf_executor.yield +// CHECK-NEXT: } +// CHECK-NEXT: %[[NEXTIT_SRC:[0-9]*]]:3 = tf_executor.NextIteration.Source : tensor<*xi32> {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} +// CHECK-NEXT: %[[MERGE:[0-9]*]]:3 = tf_executor.Merge %[[NEXTIT_SRC]]#0, %[[ENTER]]#0 : tensor<*xi32> {N = 2 : i64, T = "tfdtype$DT_INT32", device = "", name = "while/Merge"} +// CHECK-NEXT: %[[CONST_LESS:[0-9]*]]:2 = tf_executor.island(%[[MERGE]]#2) { +// CHECK-NEXT: %{{[a-z0-9]*}} = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "while/Less/y", value = dense<2> : tensor} : () -> tensor +// CHECK-NEXT: tf_executor.yield %{{[a-z0-9]*}} : tensor +// CHECK-NEXT: } +// CHECK-NEXT: %[[LESS:[0-9]*]]:2 = tf_executor.island { +// CHECK-NEXT: %{{[a-z0-9]*}} = "tf.Less"(%[[MERGE]]#0, %[[CONST_LESS]]#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Less"} : (tensor<*xi32>, tensor) -> tensor<*xi1> +// CHECK-NEXT: tf_executor.yield %{{[a-z0-9]*}} : tensor<*xi1> +// CHECK-NEXT: } +// CHECK-NEXT: %[[COND:[0-9]*]]:2 = tf_executor.LoopCond %[[LESS:[0-9]*]]#0 : (tensor<*xi1>) -> (tensor, !tf_executor.control) {device = "", name = "while/LoopCond"} +// CHECK-NEXT: %[[SWITCH:[0-9]*]]:3 = tf_executor.Switch %[[MERGE]]#0, %[[COND]]#0 : tensor<*xi32> {T = "tfdtype$DT_INT32", _class = ["loc = @while/Merge"], device = "", name = "while/Switch"} +// CHECK-NEXT: %[[EXIT:[0-9]*]]:2 = tf_executor.Exit %[[SWITCH]]#0 : tensor<*xi32> {T = "tfdtype$DT_INT32", device = "", name = "while/Exit"} +// CHECK-NEXT: %[[IDENTITY:[0-9]*]]:2 = tf_executor.island { +// CHECK-NEXT: %{{[a-z0-9]*}} = "tf.Identity"(%[[SWITCH]]#1) {T = "tfdtype$DT_INT32", device = "", name = "while/Identity"} : (tensor<*xi32>) -> tensor<*xi32> +// CHECK-NEXT: tf_executor.yield %{{[a-z0-9]*}} : tensor<*xi32> +// CHECK-NEXT: } +// CHECK-NEXT: %[[CONST_ADD:[0-9]*]]:2 = tf_executor.island(%[[IDENTITY]]#1) { +// CHECK-NEXT: %{{[a-z0-9]*}} = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "while/Add/y", value = dense<3> : tensor} : () -> tensor +// CHECK-NEXT: tf_executor.yield %{{[a-z0-9]*}} : tensor +// CHECK-NEXT: } +// CHECK-NEXT: %[[ADD:[0-9]*]]:2 = tf_executor.island { +// CHECK-NEXT: %{{[0-9]*}} = "tf.Add"(%[[IDENTITY]]#0, %[[CONST_ADD]]#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Add"} : (tensor<*xi32>, tensor) -> tensor<*xi32> +// CHECK-NEXT: tf_executor.yield %{{[0-9]*}} : tensor<*xi32> +// CHECK-NEXT: } +// CHECK-NEXT: %[[CT:[0-9]*]] = tf_executor.ControlTrigger %2, %12#1, %9#1 {_tpu_replicate = "cluster", device = "", name = "gradients/while/mul_2_Da30D05wlPU_grad/SymbolicGradient/b_sync"} +// CHECK-NEXT: tf_executor.NextIteration.Sink [%[[NEXTIT_SRC]]#1] %[[ADD]]#0, %[[CT]] : tensor<*xi32> {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} +// CHECK-NEXT: tf_executor.fetch +// CHECK-NEXT: } +// CHECK-NEXT: return diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt index 1c371586b2d..82146716fff 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt @@ -2176,14 +2176,12 @@ versions { # CHECK: %73:2 = "_tf.mul_2_Da30D05wlPU0"(%58#0, %72#0, %47#1) {_tpu_replicate = "cluster", device = "", name = "while/mul_2_Da30D05wlPU"} : (tensor<*xf32>, tensor<*xf32>, !_tf.control) -> (tensor<*xf32>, !_tf.control) # CHECK: return # CHECK-NEXT: } -# CHECK-EMPTY: # CHECK: func @less_than_5_If8q4vKg9jA0(%arg0: tensor<*xf32>) -> tensor<*xi1> # CHECK-NEXT: attributes {tf._noinline = true} { # CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_FLOAT", name = "Less/y", value = dense<5.000000e+00> : tensor} : () -> (tensor, !_tf.control) # CHECK-NEXT: %1:2 = "_tf.Less"(%arg0, %0#0) {T = "tfdtype$DT_FLOAT", device = "", name = "Less"} : (tensor<*xf32>, tensor) -> (tensor<*xi1>, !_tf.control) # CHECK-NEXT: return %1#0 : tensor<*xi1> # CHECK-NEXT: } -# CHECK-EMPTY: # CHECK: func @mul_2_Da30D05wlPU0(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> # CHECK-NEXT: attributes {tf._noinline = true} { # CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_FLOAT", name = "mul/y", value = dense<2.000000e+00> : tensor<1x1xf32>} : () -> (tensor<1x1xf32>, !_tf.control) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt index dcbf299119d..fcd0e62ab63 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt @@ -91,8 +91,7 @@ versions { # CHECK-NEXT: %0:2 = "_tf.PartitionedCall"() {Tin = [], Tout = ["tfdtype$DT_INT32"], config = "", config_proto = "", device = "", executor_type = "", f = @foo0, name = "PartitionedCall"} : () -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: return # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @foo0() -> tensor +# CHECK: func @foo0() -> tensor # CHECK-NEXT: attributes {tf.experimental_ints_on_device = true} { # CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<5> : tensor} : () -> (tensor, !_tf.control) # CHECK-NEXT: %1:2 = "_tf.Identity"(%0#0) {T = "tfdtype$DT_INT32", device = "", name = "Identity"} : (tensor) -> (tensor, !_tf.control) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt index ce1c9926b5c..e8b9ce86ddb 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt @@ -157,13 +157,11 @@ versions { # CHECK-NEXT: %1:2 = "_tf.Case"(%0#0) {Tin = [], Tout = ["tfdtype$DT_FLOAT"], branches = [@foo0, @bar0], device = "", name = "Case", output_shapes = []} : (tensor) -> (tensor<*xf32>, !_tf.control) # CHECK-NEXT: return # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @foo0() -> tensor<10xf32> { +# CHECK: func @foo0() -> tensor<10xf32> { # CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_FLOAT", name = "const_1", value = dense<1.000000e+00> : tensor<10xf32>} : () -> (tensor<10xf32>, !_tf.control) # CHECK-NEXT: return %0#0 : tensor<10xf32> # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @bar0() -> tensor<10xf32> { +# CHECK: func @bar0() -> tensor<10xf32> { # CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_FLOAT", name = "const_2", value = dense<2.000000e+00> : tensor<10xf32>} : () -> (tensor<10xf32>, !_tf.control) # CHECK-NEXT: return %0#0 : tensor<10xf32> # CHECK-NEXT: } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-defs.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-defs.pbtxt index c5c42955e58..40392a6954a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-defs.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-defs.pbtxt @@ -526,16 +526,13 @@ versions { # CHECK-NEXT: %18:2 = "_tf.Identity"(%17#0, %6) {T = "tfdtype$DT_INT32", device = "", name = "output_1_shard_0"} : (tensor<*xi32>, !_tf.control) -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: return # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @cond_false0(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi32>) { +# CHECK: func @cond_false0(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi32>) { # CHECK-NEXT: %0:2 = "_tf.Identity"(%arg0) {T = "tfdtype$DT_INT32", device = "", name = "Identity"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: %1:2 = "_tf.Identity"(%arg1) {T = "tfdtype$DT_INT32", device = "", name = "Identity_1"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: return %1#0, %0#0 : tensor<*xi32>, tensor<*xi32> # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @cond_true0(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi32>) { +# CHECK: func @cond_true0(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi32>) { # CHECK-NEXT: %0:2 = "_tf.Identity"(%arg0) {T = "tfdtype$DT_INT32", device = "", name = "Identity"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: %1:2 = "_tf.Identity"(%arg1) {T = "tfdtype$DT_INT32", device = "", name = "Identity_1"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: return %0#0, %1#0 : tensor<*xi32>, tensor<*xi32> # CHECK-NEXT: } -# CHECK-EMPTY: diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-static-output.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-static-output.pbtxt index dc4e74cd028..41107cfbff4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-static-output.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-static-output.pbtxt @@ -145,12 +145,10 @@ versions { #CHECK-NEXT: %2:2 = "_tf.If"(%0#0, %1#0) {Tcond = "tfdtype$DT_BOOL", Tin = ["tfdtype$DT_INT32"], Tout = ["tfdtype$DT_INT32"], device = "", else_branch = @get_zeros0, name = "If", output_shapes = [], then_branch = @identity0} : (tensor<*xi1>, tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) #CHECK-NEXT: return #CHECK-NEXT: } -#CHECK-EMPTY: -#CHECK-NEXT: func @get_zeros0(%arg0: tensor<*xi32>) -> tensor<2xi32> { +#CHECK: func @get_zeros0(%arg0: tensor<*xi32>) -> tensor<2xi32> { #CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "const", value = dense<[1, 2]> : tensor<2xi32>} : () -> (tensor<2xi32>, !_tf.control) #CHECK-NEXT: return %0#0 : tensor<2xi32> #CHECK-NEXT: } -#CHECK-EMPTY: -#CHECK-NEXT: func @identity0(%arg0: tensor<*xi32>) -> tensor<*xi32> { +#CHECK: func @identity0(%arg0: tensor<*xi32>) -> tensor<*xi32> { #CHECK-NEXT: return %arg0 : tensor<*xi32> #CHECK-NEXT: } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-functional-while-loop.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-functional-while-loop.pbtxt index 34e688a5605..456bf4951bd 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-functional-while-loop.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-functional-while-loop.pbtxt @@ -303,16 +303,14 @@ versions { # CHECK-NEXT: %3:4 = "_tf.While"(%1#0, %2#0, %0#0) {T = ["tfdtype$DT_INT32", "tfdtype$DT_INT32", "tfdtype$DT_INT32"], _lower_using_switch_merge = true, body = @while_body_60, cond = @while_cond_50, device = "", name = "while", output_shapes = ["tfshape$", "tfshape$", "tfshape$"], parallel_iterations = 10 : i64} : (tensor, tensor, tensor) -> (tensor, tensor, tensor, !_tf.control) # CHECK-NEXT: return %3#2 : tensor # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @while_body_60(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>, %arg2: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi32>, tensor<*xi32>) { +# CHECK: func @while_body_60(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>, %arg2: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi32>, tensor<*xi32>) { # CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Add/y", value = dense<1> : tensor} : () -> (tensor, !_tf.control) # CHECK-NEXT: %1:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "add_1/y", value = dense<1> : tensor} : () -> (tensor, !_tf.control) # CHECK-NEXT: %2:2 = "_tf.Add"(%arg2, %0#0) {T = "tfdtype$DT_INT32", device = "", name = "Add"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: %3:2 = "_tf.Add"(%arg0, %1#0) {T = "tfdtype$DT_INT32", device = "", name = "add_1"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, !_tf.control) # CHECK-NEXT: return %3#0, %arg1, %2#0 : tensor<*xi32>, tensor<*xi32>, tensor<*xi32> # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @while_cond_50(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>, %arg2: tensor<*xi32>) -> tensor<*xi1> { +# CHECK: func @while_cond_50(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>, %arg2: tensor<*xi32>) -> tensor<*xi1> { # CHECK-NEXT: %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Less/y", value = dense<10> : tensor} : () -> (tensor, !_tf.control) # CHECK-NEXT: %1:2 = "_tf.Less"(%arg2, %0#0) {T = "tfdtype$DT_INT32", device = "", name = "Less"} : (tensor<*xi32>, tensor) -> (tensor<*xi1>, !_tf.control) # CHECK-NEXT: return %1#0 : tensor<*xi1> diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt index 512ffd12eef..c1045bf19af 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt @@ -279,14 +279,12 @@ versions { # CHECK-NEXT: %5:2 = "_tf.SymbolicGradient"(%0#0, %4#0) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT"], device = "", f = @foo0, f._disable_call_shape_inference = true, name = "gradients/foo_grad/SymbolicGradient"} : (tensor, tensor<*xf32>) -> (tensor, !_tf.control) # CHECK-NEXT: return # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @foo_grad0(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> +# CHECK: func @foo_grad0(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> # CHECK-NEXT: attributes {tf._disable_call_shape_inference = true} { # CHECK-NEXT: %0:2 = "_tf.Mul"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", device = "", name = "mul_0"} : (tensor<*xf32>, tensor<*xf32>) -> (tensor<*xf32>, !_tf.control) # CHECK-NEXT: return %0#0 : tensor<*xf32> # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @foo0(%arg0: tensor<*xf32>) -> tensor<*xf32> +# CHECK: func @foo0(%arg0: tensor<*xf32>) -> tensor<*xf32> # CHECK-NEXT: attributes {tf._disable_call_shape_inference = true, tf.gradient = @foo_grad0} { # CHECK-NEXT: %0:2 = "_tf.Exp"(%arg0) {T = "tfdtype$DT_FLOAT", device = "", name = "Exp"} : (tensor<*xf32>) -> (tensor<*xf32>, !_tf.control) # CHECK-NEXT: %1:2 = "_tf.Neg"(%arg0) {T = "tfdtype$DT_FLOAT", device = "", name = "Neg"} : (tensor<*xf32>) -> (tensor<*xf32>, !_tf.control) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt index 80697690e8a..83ca4466869 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt @@ -41,12 +41,10 @@ versions { # CHECK-NEXT: %1 = "_tf.bar0"() {device = "", name = "unnamed1"} : () -> !_tf.control # CHECK-NEXT: return # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @foo0() { +# CHECK: func @foo0() { # CHECK-NEXT: %0 = "_tf.bar0"() {device = "", name = "unnamed"} : () -> !_tf.control # CHECK-NEXT: return # CHECK-NEXT: } -# CHECK-EMPTY: -# CHECK-NEXT: func @bar0() { +# CHECK: func @bar0() { # CHECK-NEXT: return # CHECK-NEXT: } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-uint8-return.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-uint8-return.pbtxt new file mode 100644 index 00000000000..32b816f5e39 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-uint8-return.pbtxt @@ -0,0 +1,111 @@ +# RUN: tf-mlir-translate -graphdef-to-mlir -mlir-print-debuginfo %s -o - | FileCheck %s + +node { + name: "PartitionedCall" + op: "PartitionedCall" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_UINT8 + } + } + } + attr { + key: "_gradient_op_type" + value { + s: "PartitionedCall-15" + } + } + attr { + key: "config" + value { + s: "" + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003GPU\020\000\n\007\n\003CPU\020\0012\002J\0008\001" + } + } + attr { + key: "executor_type" + value { + s: "" + } + } + attr { + key: "f" + value { + func { + name: "__inference_uint_const_14" + } + } + } +} +library { + function { + signature { + name: "__inference_uint_const_14" + output_arg { + name: "identity" + type: DT_UINT8 + } + } + node_def { + name: "Const" + op: "Const" + attr { + key: "dtype" + value { + type: DT_UINT8 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_UINT8 + tensor_shape { + } + int_val: 5 + } + } + } + } + node_def { + name: "Identity" + op: "Identity" + input: "Const:output:0" + attr { + key: "T" + value { + type: DT_UINT8 + } + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + } +} +versions { + producer: 29 + min_consumer: 12 +} + +# CHECK: func @main +# CHECK: "_tf.PartitionedCall"() +# CHECK-SAME: Tout = ["tfdtype$DT_UINT8"] +# CHECK-SAME: f = @[[FUNCTION:[A-Za-z0-9_]*]] +# CHECK: func @[[FUNCTION]]() -> tensor +# CHECK: return {{%[0-9]*#[0-9]*}} : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir index 52a14cd60c6..5e7d733bb57 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir @@ -588,7 +588,7 @@ func @testSoftmax(tensor<8x16xf32>) -> tensor<8x16xf32> { // Test invalid tf.Softmax func @testSoftmax(tensor<8x8x8xf32>) -> tensor<8x8x8xf32> { ^bb0(%arg0: tensor<8x8x8xf32>): - // expected-error @+1 {{requires operand to be 2D tensor}} + // expected-error @+1 {{requires operand to be 1D/2D tensor}} %0 = "tf.Softmax"(%arg0) {T = "tfdtype$DT_FLOAT"} : (tensor<8x8x8xf32>) -> tensor<8x8x8xf32> return %0 : tensor<8x8x8xf32> } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir index 9772386c029..510aaccb26a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir @@ -313,7 +313,7 @@ func @nextiteration_control(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<* %3:3 = tf_executor.NextIteration.Source : tensor<*xf32> tf_executor.NextIteration.Sink [%3#1] %3#0, %1#2 : tensor<*xf32> // CHECK: %3:3 = tf_executor.NextIteration.Source : tensor<*xf32> -// CHECK: tf_executor.NextIteration.Sink [%3#1] %3#0 : tensor<*xf32> +// CHECK: tf_executor.NextIteration.Sink [%3#1] %3#0, %1#2 : tensor<*xf32> tf_executor.fetch %3#0 : tensor<*xf32> } return %0 : tensor<*xf32> diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc index a585d6de9f1..af3e1e05ade 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc @@ -70,7 +70,7 @@ static Value* LowerCondition(Location loc, Value* value, OpBuilder* builder) { // that is compatible for tensor cast. // static Operation* CallFn(Location loc, - const std::function& get_arg, Function fn, + const std::function& get_arg, FuncOp fn, OpBuilder* builder) { FunctionType fn_type = fn.getType(); llvm::SmallVector operands; @@ -153,9 +153,9 @@ static LogicalResult LowerIfOp(IfOp op) { Value* cond_i1 = LowerCondition(loc, op.getCondition(), &builder); if (!cond_i1) return failure(); - auto module = op_inst->getParentOfType(); - auto then_fn = module.getNamedFunction(op.getThen()); - auto else_fn = module.getNamedFunction(op.getElse()); + auto module = op_inst->getParentOfType(); + auto then_fn = module.lookupSymbol(op.getThen()); + auto else_fn = module.lookupSymbol(op.getElse()); // Split the basic block before the 'if'. The new dest will be our merge // point. @@ -210,9 +210,9 @@ static LogicalResult LowerWhileOp(WhileOp op) { OpBuilder builder(op_inst); - auto module = op_inst->getParentOfType(); - auto cond_fn = module.getNamedFunction(op.getCond()); - auto body_fn = module.getNamedFunction(op.getBody()); + auto module = op_inst->getParentOfType(); + auto cond_fn = module.lookupSymbol(op.getCond()); + auto body_fn = module.lookupSymbol(op.getBody()); // Split the block containing the While op into two blocks. One containing // operations before the While op and other containing the rest. Create two diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index d300c014aed..1202d4d432c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -35,7 +35,6 @@ namespace TFControlFlow { FunctionPassBase *CreateRaiseTFControlFlowPass(); } // namespace TFControlFlow - } // namespace mlir #endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc index c835ea64158..60f7ed35a0b 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc @@ -89,7 +89,7 @@ std::vector GraphOptPass::FindPassIds() { } void GraphOptPass::runOnModule() { - mlir::Module module_in = getModule(); + mlir::ModuleOp module_in = getModule(); mlir::MLIRContext& ctx = getContext(); // Convert MLIR to Graph diff --git a/tensorflow/compiler/mlir/tensorflow/translate/control_to_executor_dialect.cc b/tensorflow/compiler/mlir/tensorflow/translate/control_to_executor_dialect.cc new file mode 100644 index 00000000000..4d9b3ca7ab7 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/translate/control_to_executor_dialect.cc @@ -0,0 +1,248 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This transformation pass transforms MLIR TF contol dialect into a combination +// of the TF and TF executor dialects. +// +// !! This code is only intended for migration purpose and will be deleted when +// !! the importer is updated to directly emit the tf_executor dialect. + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/Support/Debug.h" +#include "mlir/IR/Builders.h" // TF:local_config_mlir +#include "mlir/IR/Operation.h" // TF:local_config_mlir +#include "mlir/IR/Value.h" // TF:local_config_mlir +#include "mlir/Pass/Pass.h" // TF:local_config_mlir +#include "mlir/Pass/PassRegistry.h" // TF:local_config_mlir +#include "mlir/StandardOps/Ops.h" // TF:local_config_mlir +#include "mlir/Support/LLVM.h" // TF:local_config_mlir +#include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" + +#define DEBUG_TYPE "tf-ctl-to-executor" + +namespace mlir { + +namespace { +// This pass checks if a function contains only operations in the TF control +// dialect and converts it to a mix of the tf_executor and tf dialects. +// Operations that exist in the tf_executor dialects are used directly, +// otherwise _tf operations are wrapped in an island and the _ prefix is +// removed. Control dependencies are moved to be handled by the island itself. +struct ControlToExecutorDialectConversion + : public FunctionPass { + void runOnFunction() override; + + private: + tf_executor::IslandOp CreateIslandForOp(Operation *op, OpBuilder *builder); +}; +} // end anonymous namespace + +static bool IsUnderscoredTFOp(Operation *op) { + return op->getName().getStringRef().startswith("_tf."); +} + +static bool HasOnlyTFControlOperations(FuncOp function) { + return llvm::all_of(function, [](Block &block) { + return llvm::all_of(block, [](Operation &op) { + return IsUnderscoredTFOp(&op) || isa(op); + }); + }); +} + +tf_executor::IslandOp ControlToExecutorDialectConversion::CreateIslandForOp( + Operation *op, OpBuilder *builder) { + // Create a new region for the tf_executor.island body + SmallVector operands; + for (Value *operand : op->getOperands()) + if (operand->getType().isa()) + operands.push_back(operand); + SmallVector types; + for (Type result_type : op->getResultTypes()) + if (!result_type.isa()) + types.push_back(result_type); + types.push_back(tf_executor::ControlType::get(&getContext())); + + auto island = builder->create( + op->getLoc(), types, operands, ArrayRef{}); + island.body().push_back(new Block); + + return island; +} + +void ControlToExecutorDialectConversion::runOnFunction() { + if (!HasOnlyTFControlOperations(getFunction())) { + LLVM_DEBUG(llvm::dbgs() << "Function has unsupported operation, skip " + "tf_executor dialect conversion\n"); + return; + } + if (getFunction().getBlocks().size() != 1) { + LLVM_DEBUG(llvm::dbgs() << "Expect single block function, , skip " + "tf_executor dialect conversion\n"); + return; + } + + Block &body = getFunction().getBody().front(); + OpBuilder builder(&body, body.begin()); + + // Create a new tf_executor.graph at the beginning of the function. + auto graph_op = builder.create( + getFunction().getLoc(), getFunction().getType().getResults()); + graph_op.body().push_back(new Block); + builder.setInsertionPointToEnd(&graph_op.GetBody()); + llvm::StringMap frame_name_to_loop; + + // Loop over operations in the function and move them into the graph region. + for (Operation &op : llvm::make_early_inc_range(body)) { + // Skip the just-created tf_executor.graph. + if (isa(op)) continue; + + // This is the new operation that will replace the current one in the graph. + Operation *replacement = nullptr; + if (op.isKnownTerminator()) { + // This is the return of the function, we will create a fetch in the graph + // matching the operands of the returns. The return is then updated to + // take as operands the results of the tf_executor.graph operation. + SmallVector ret_vals; + for (Value *operand : op.getOperands()) ret_vals.push_back(operand); + for (auto &graph_result : llvm::enumerate(graph_op.getResults())) + op.setOperand(graph_result.index(), graph_result.value()); + builder.create(getFunction().getLoc(), ret_vals); + continue; + } + assert(IsUnderscoredTFOp(&op) && "Expected only _tf operations"); + + // The operands and types arrays are used to create the tf_executor ops. + SmallVector operands; + operands.append(op.getOperands().begin(), op.getOperands().end()); + SmallVector types; + for (Type result_type : op.getResultTypes()) { + if (result_type.isa()) + types.push_back(tf_executor::ControlType::get(&getContext())); + else + types.push_back(result_type); + } + auto loc = op.getLoc(); + + // Match the specific operation that has a tf_executor equivalent, the + // others will be wrapped in an island. + + // FIXME: StringSwitch + + if (op.getName().getStringRef() == "_tf.Switch") { + replacement = builder.create( + loc, types, operands, ArrayRef{}); + } else if (op.getName().getStringRef() == "_tf.SwitchN") { + replacement = builder.create( + loc, types, operands, ArrayRef{}); + } else if (op.getName().getStringRef() == "_tf.Merge") { + replacement = builder.create( + loc, types, operands, ArrayRef{}); + } else if (op.getName().getStringRef() == "_tf.NextIteration.source") { + replacement = builder.create( + loc, op.getResult(0)->getType(), operands); + // Record a mapping of the name to the nextiteration.source so that when + // we convert the sink we can get the token. + StringAttr frame = op.getAttrOfType("name"); + assert(!frame.getValue().empty()); + frame_name_to_loop[frame.getValue()] = + cast(replacement); + // Replace the results here since the _tf source does not produce a token + // there isn't a mapping for the new result #1. + op.getResult(0)->replaceAllUsesWith(replacement->getResult(0)); + for (int i : llvm::seq(1, op.getNumResults())) + op.getResult(i)->replaceAllUsesWith(replacement->getResult(i + 1)); + replacement->setAttrs(op.getAttrList()); + op.erase(); + continue; + } else if (op.getName().getStringRef() == "_tf.NextIteration.sink") { + StringAttr frame = op.getAttrOfType("name"); + assert(!frame.getValue().empty()); + tf_executor::NextIterationSourceOp srcOp = + frame_name_to_loop[frame.getValue()]; + replacement = builder.create( + loc, srcOp.token(), operands, ArrayRef{}); + replacement->setAttrs(op.getAttrList()); + op.erase(); + continue; + } else if (op.getName().getStringRef() == "_tf.LoopCond") { + replacement = builder.create( + loc, types, operands, ArrayRef{}); + } else if (op.getName().getStringRef() == "_tf.Enter") { + replacement = builder.create( + loc, types, operands, ArrayRef{}); + } else if (op.getName().getStringRef() == "_tf.Exit") { + replacement = builder.create( + loc, types, operands, ArrayRef{}); + } else if (op.getName().getStringRef() == "_tf.ControlTrigger") { + replacement = + builder.create(loc, operands); + } else { + tf_executor::IslandOp island = CreateIslandForOp(&op, &builder); + replacement = island.getOperation(); + + // General case, drop the leading _ off the name and wrap in an island. + OperationState result(loc, op.getName().getStringRef().drop_front()); + + // Only the non-control operands are carried over, the island is handling + // the control input. + for (Value *operand : op.getOperands()) + if (!operand->getType().isa()) + result.operands.push_back(operand); + + // Add a result type for each non-control result we find + bool sawControlResult = false; + for (Type result_type : op.getResultTypes()) { + if (result_type.isa()) { + sawControlResult = true; + continue; + } + // We assume all control inputs are at the end of the result list. + assert(!sawControlResult && "all control results must be last"); + result.types.push_back(result_type); + } + + // Create the operation inside the island + OpBuilder island_builder(&island.GetBody()); + Operation *inner_op = island_builder.createOperation(result); + inner_op->setAttrs(op.getAttrList()); + + // Add the terminator for the island + SmallVector ret_vals(inner_op->getResults()); + island_builder.create(loc, ret_vals); + } + + // Copy the attributes from the original operation to the replacement and + // remap the results. + if (!isa(replacement)) + replacement->setAttrs(op.getAttrList()); + for (int i : llvm::seq(0, op.getNumResults())) + op.getResult(i)->replaceAllUsesWith(replacement->getResult(i)); + op.erase(); + } +} + +FunctionPassBase *CreateTFControlToExecutorDialectConversion() { + return new ControlToExecutorDialectConversion(); +} + +} // namespace mlir + +static mlir::PassRegistration pass( + "tf-control-to-executor-conversion", + "Transform from TF control dialect to TF executor dialect."); diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc index 196837b8e3e..22c1a4dd70c 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc @@ -88,23 +88,23 @@ class Exporter { // one entry function, which is identified by name "main". This entry function // is converted to the base of the graph graph. The rest of the functions are // converted to the library functions in that graph. - static Status Convert(mlir::Module module, const ExporterConfigs& configs, + static Status Convert(mlir::ModuleOp module, const ExporterConfigs& configs, std::unique_ptr* graph, FunctionLibraryDefinition* flib_def); - // Converts a given Function to a FunctionDef and adds it to the function + // Converts a given FuncOp to a FunctionDef and adds it to the function // definition library static Status ConvertLibFunction(const ExporterConfigs& configs, const Dialect* tf_dialect, - mlir::Function function, + mlir::FuncOp function, FunctionDefLibrary* flib); - // Converts the given CFG Function to a Graph. The arguments and returns of + // Converts the given FuncOp to a Graph. The arguments and returns of // function are added to the graph with special op names kArgOp and kRetOp. // Later on, this graph can be converted a function definition and added to // another graph. static StatusOr> Convert( const ExporterConfigs& configs, const Dialect* tf_dialect, - mlir::Function function, FunctionDefLibrary* flib); + mlir::FuncOp function, FunctionDefLibrary* flib); private: explicit Exporter(Graph* graph, const Dialect* tf_dialect) @@ -376,11 +376,11 @@ Status Exporter::AddNextIterationNode(mlir::Operation* inst) { StatusOr> Exporter::Convert(const ExporterConfigs& confs, const Dialect* tf_dialect, - mlir::Function function, + mlir::FuncOp function, FunctionDefLibrary* flib) { if (function.getBlocks().size() != 1) { return errors::FailedPrecondition( - "Input Function must have only one basic block!"); + "Input FuncOp must have only one basic block!"); } mlir::Block& block = function.front(); @@ -433,7 +433,7 @@ StatusOr> Exporter::Convert(const ExporterConfigs& confs, mlir::Type type = arg->getType(); if (!type.isa()) { return errors::InvalidArgument( - "Functions arguments must have tensor types. Found ", + "FuncOps arguments must have tensor types. Found ", mlir::debugString(type), " in function ", function.getName().str()); } @@ -448,7 +448,9 @@ StatusOr> Exporter::Convert(const ExporterConfigs& confs, // definition library // TODO(prakalps): If two functions have cyclic dependence, this will // introduce an infinite loop. - auto func = function.getModule().getNamedFunction(op_name.ValueOrDie()); + auto func = + function.getParentOfType().lookupSymbol( + op_name.ValueOrDie()); if (func != nullptr) { TF_RETURN_IF_ERROR(ConvertLibFunction(confs, tf_dialect, func, flib)); TF_RETURN_IF_ERROR(graph->AddFunctionLibrary(*flib)); @@ -483,7 +485,7 @@ StatusOr> Exporter::Convert(const ExporterConfigs& confs, Status Exporter::ConvertLibFunction(const ExporterConfigs& configs, const Dialect* tf_dialect, - mlir::Function function, + mlir::FuncOp function, FunctionDefLibrary* flib) { // First look for the function in the current function library. If found, // nothing needs to be done. @@ -510,8 +512,10 @@ Status Exporter::ConvertLibFunction(const ExporterConfigs& configs, // Checks for gradient attribute. If present converts the gradient function // and populates the GradientDef. auto grad_string = mlir::TF::TensorFlowDialect::GetGradientAttrName(); - if (auto attr = function.getAttrOfType(grad_string)) { - auto grad_func = function.getModule().getNamedFunction(attr.getValue()); + if (auto attr = function.getAttrOfType(grad_string)) { + auto grad_func = + function.getParentOfType().lookupSymbol( + attr.getValue()); TF_RETURN_IF_ERROR( ConvertLibFunction(configs, tf_dialect, grad_func, flib)); GradientDef grad; @@ -531,15 +535,15 @@ Status Exporter::ConvertLibFunction(const ExporterConfigs& configs, return Status::OK(); } -Status Exporter::Convert(mlir::Module module, const ExporterConfigs& configs, +Status Exporter::Convert(mlir::ModuleOp module, const ExporterConfigs& configs, std::unique_ptr* graph, FunctionLibraryDefinition* flib_def) { mlir::Identifier entry_func_id = mlir::Identifier::get("main", module.getContext()); - absl::optional entry_func; + absl::optional entry_func; FunctionDefLibrary flib; auto tf_dialect = module.getContext()->getRegisteredDialect("tf"); - for (auto function : module.getOps()) { + for (auto function : module.getOps()) { if (function.isExternal()) return errors::FailedPrecondition("External functions not supported"); @@ -567,14 +571,14 @@ Status Exporter::Convert(mlir::Module module, const ExporterConfigs& configs, } } // namespace -Status ConvertMlirToGraph(mlir::Module module, const ExporterConfigs& confs, +Status ConvertMlirToGraph(mlir::ModuleOp module, const ExporterConfigs& confs, std::unique_ptr* graph, FunctionLibraryDefinition* flib_def) { return Exporter::Convert(module, confs, graph, flib_def); } StatusOr> ConvertMlirToGraphdef( - mlir::Module module, const ExporterConfigs& confs) { + mlir::ModuleOp module, const ExporterConfigs& confs) { FunctionLibraryDefinition flib_def(OpRegistry::Global(), FunctionDefLibrary()); auto graph = absl::make_unique(flib_def); diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h index bfdceac9de0..93061a95239 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h +++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h @@ -32,13 +32,13 @@ using stream_executor::port::StatusOr; // Given an MLIR module, returns a GraphDef. StatusOr> ConvertMlirToGraphdef( - mlir::Module module, const ExporterConfigs& configs); + mlir::ModuleOp module, const ExporterConfigs& configs); // Converts an MLIR module to TensorFlow graph and FunctionLibraryDefinition. // The "main" function of the module is stored in the graph and the rest of // functions are stored in the library. stream_executor::port::Status ConvertMlirToGraph( - mlir::Module module, const ExporterConfigs& confs, + mlir::ModuleOp module, const ExporterConfigs& confs, std::unique_ptr* graph, FunctionLibraryDefinition* flib_def); } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_graphdef.cc index c0553de183e..73fe028f366 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_graphdef.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_graphdef.cc @@ -30,6 +30,7 @@ limitations under the License. #include "llvm/Support/raw_ostream.h" #include "mlir/IR/Attributes.h" // TF:local_config_mlir #include "mlir/IR/Builders.h" // TF:local_config_mlir +#include "mlir/IR/Function.h" // TF:local_config_mlir #include "mlir/IR/Identifier.h" // TF:local_config_mlir #include "mlir/IR/Location.h" // TF:local_config_mlir #include "mlir/IR/MLIRContext.h" // TF:local_config_mlir @@ -86,7 +87,7 @@ class Importer { explicit Importer( const FunctionLibraryDefinition& flib, const GraphDebugInfo& debug_info, - const NodeSpecs& specs, mlir::Module module, + const NodeSpecs& specs, mlir::ModuleOp module, std::unordered_map* tf_name_to_mlir_name) : module_(module), context_(module.getContext()), @@ -158,8 +159,8 @@ class Importer { return ::tensorflow::ConvertTensorProto(value, builder_.get()); } - // Converts func name in graphdef to mlir::FunctionAttribute. - StatusOr ConvertFunctionCallName( + // Converts func name in graphdef to mlir::SymbolRefAttribute. + StatusOr ConvertFunctionCallName( const std::string& func_name); // Converts the given non-function-call AttrValue to an MLIR Attribute. @@ -262,7 +263,7 @@ class Importer { using NodeValueMap = absl::flat_hash_map; std::unique_ptr builder_; - mlir::Module module_; + mlir::ModuleOp module_; mlir::MLIRContext* context_; std::unordered_map* tf_name_to_mlir_name_; const FunctionLibraryDefinition& graph_flib_; @@ -611,12 +612,12 @@ Status Importer::ConvertFunctionCallAttribute( return Status::OK(); } -StatusOr Importer::ConvertFunctionCallName( +StatusOr Importer::ConvertFunctionCallName( const std::string& func_name) { TF_RETURN_IF_ERROR(ConvertLibFunction(func_name)); auto mlir_func_name = (*tf_name_to_mlir_name_)[func_name]; - auto func = module_.getNamedFunction(mlir_func_name); - return builder_->getFunctionAttr(func); + auto func = module_.lookupSymbol(mlir_func_name); + return builder_->getSymbolRefAttr(func); } StatusOr Importer::ConvertAttributeValue( @@ -721,8 +722,8 @@ Status Importer::ConvertLibFunction(const std::string& func_name) { if (!grad_func_name.empty()) { TF_RETURN_IF_ERROR(ConvertLibFunction(grad_func_name)); auto mlir_grad_func_name = (*tf_name_to_mlir_name_)[grad_func_name]; - auto grad_func = module_.getNamedFunction(mlir_grad_func_name); - auto gradient_attr = builder_->getFunctionAttr(grad_func); + auto grad_func = module_.lookupSymbol(mlir_grad_func_name); + auto gradient_attr = builder_->getSymbolRefAttr(grad_func); auto grad_string = mlir::TF::TensorFlowDialect::GetGradientAttrName(); attributes.push_back(builder_->getNamedAttr(grad_string, gradient_attr)); } @@ -1151,8 +1152,8 @@ Status Importer::Convert(llvm::StringRef func_name, const absl::InlinedVector& ret_nodes, llvm::ArrayRef attrs) { // TODO(b/122040776): Uses debug info for FunctionDef. - auto function = mlir::Function::create(mlir::UnknownLoc::get(context_), - func_name, func_type, attrs); + auto function = mlir::FuncOp::create(mlir::UnknownLoc::get(context_), + func_name, func_type, attrs); module_.push_back(function); builder_ = absl::make_unique(function.getBody()); @@ -1291,7 +1292,8 @@ StatusOr Importer::Convert( mlir::MLIRContext* context, const Graph& graph, const GraphDebugInfo& debug_info, const FunctionLibraryDefinition& flib_def, const NodeSpecs& specs) { - mlir::OwningModuleRef module = mlir::Module::create(context); + mlir::OwningModuleRef module = + mlir::ModuleOp::create(mlir::UnknownLoc::get(context)); std::unordered_map tf_name_to_mlir_name; Importer importer(flib_def, debug_info, specs, module.get(), &tf_name_to_mlir_name); diff --git a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h index 6e8547f3dcd..96a66d4eab3 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h +++ b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_MLIR_ROUNDTRIP_PASS_H_ #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_MLIR_ROUNDTRIP_PASS_H_ +#include "mlir/StandardOps/Ops.h" // TF:local_config_mlir #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/lib/core/status.h" diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc index d2b40448e7b..5c7b1e824fe 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc @@ -19,6 +19,7 @@ limitations under the License. #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "mlir/IR/Attributes.h" // TF:local_config_mlir +#include "mlir/IR/Function.h" // TF:local_config_mlir #include "mlir/IR/Identifier.h" // TF:local_config_mlir #include "mlir/IR/MLIRContext.h" // TF:local_config_mlir #include "mlir/IR/Module.h" // TF:local_config_mlir @@ -119,7 +120,7 @@ mlir::OwningModuleRef GraphdefToSplattedMlirTranslateFunction( break; default: inst.emitWarning() - << "Skipping splat converstion for " + << "Skipping splat conversion for " << "an unsupported attribute type " << element_type; continue; } diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc index 59d42fd1397..7d7632d7e82 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc @@ -63,7 +63,7 @@ static TranslateToMLIRRegistration GraphdefToSplattedMlirTranslate( "graphdef-to-splatted-mlir", GraphdefToSplattedMlirTranslateFunction); static LogicalResult MlirToGraphdefTranslateFunction( - Module module, llvm::StringRef output_filename) { + ModuleOp module, llvm::StringRef output_filename) { if (!module) return failure(); std::error_code error; diff --git a/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc b/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc index 932bc7b28c2..9c02ce2278f 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "llvm/Support/ToolOutputFile.h" +#include "mlir/IR/Function.h" // TF:local_config_mlir #include "mlir/IR/Location.h" // TF:local_config_mlir #include "mlir/IR/MLIRContext.h" // TF:local_config_mlir #include "mlir/IR/Module.h" // TF:local_config_mlir @@ -22,8 +23,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h" namespace mlir { -static mlir::Operation* ExtractOnlyOp(mlir::Module module) { - mlir::Function fn = module.getNamedFunction("main"); +static mlir::Operation* ExtractOnlyOp(mlir::ModuleOp module) { + mlir::FuncOp fn = module.lookupSymbol("main"); if (!fn) return nullptr; if (fn.getBlocks().size() != 1) return nullptr; @@ -38,7 +39,8 @@ static mlir::Operation* ExtractOnlyOp(mlir::Module module) { return &block.front(); } -static LogicalResult MlirToTfNodeDef(Module module, llvm::StringRef filename) { +static LogicalResult MlirToTfNodeDef(ModuleOp module, + llvm::StringRef filename) { auto* context = module.getContext(); auto file = openOutputFile(filename); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc index 3006cc5bc3f..0aa6b460b73 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc @@ -46,19 +46,15 @@ Status ConvertDataType(const DataType& dtype, Builder builder, Type* type) { *type = builder.getIntegerType(1); return Status::OK(); case DT_INT8: - case DT_UINT8: *type = builder.getIntegerType(8); return Status::OK(); case DT_INT16: - case DT_UINT16: *type = builder.getIntegerType(16); return Status::OK(); case DT_INT32: - case DT_UINT32: *type = builder.getIntegerType(32); return Status::OK(); case DT_INT64: - case DT_UINT64: *type = builder.getIntegerType(64); return Status::OK(); case DT_BFLOAT16: diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc index 1067aabdf81..a2f803c0858 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc @@ -115,7 +115,7 @@ Status ConvertAttribute(const mlir::UnitAttr& attr, AttrValue* value) { return Status::OK(); } -Status ConvertAttribute(const mlir::FunctionAttr& attr, AttrValue* value) { +Status ConvertAttribute(const mlir::SymbolRefAttr& attr, AttrValue* value) { value->mutable_func()->set_name(attr.getValue()); return Status::OK(); } @@ -149,7 +149,7 @@ Status ConvertAttribute(const mlir::ArrayAttr& attr, AttrValue* value) { TensorProto tensor; TF_RETURN_IF_ERROR(ConvertToTensorProto(attr, &tensor)); *list->add_tensor() = tensor; - } else if (auto attr = a.dyn_cast()) { + } else if (auto attr = a.dyn_cast()) { AttrValue attrVal; TF_RETURN_IF_ERROR(ConvertAttribute(attr, &attrVal)); *list->add_func() = attrVal.func(); @@ -218,8 +218,8 @@ Status ConvertAttributes(const llvm::ArrayRef attrs, } AttrValue value; switch (attr.getKind()) { - case mlir::StandardAttributes::Function: { - auto func_attr = attr.cast(); + case mlir::StandardAttributes::SymbolRef: { + auto func_attr = attr.cast(); value.mutable_func()->set_name(func_attr.getValue()); func_call_attrs[string(name)] = value; continue; diff --git a/tensorflow/compiler/mlir/xla/ir/xla_ops.td b/tensorflow/compiler/mlir/xla/ir/xla_ops.td index 9c57c8e1543..a05dd9b3d1d 100644 --- a/tensorflow/compiler/mlir/xla/ir/xla_ops.td +++ b/tensorflow/compiler/mlir/xla/ir/xla_ops.td @@ -298,8 +298,8 @@ def XLA_WhileOp: XLA_Op<"while", [NoSideEffect, SameOperandsAndResultType]> { let arguments = (ins Variadic:$val, - FunctionAttr:$cond, - FunctionAttr:$body + SymbolRefAttr:$cond, + SymbolRefAttr:$body ); let results = (outs Variadic:$res); @@ -320,7 +320,7 @@ def XLA_ReduceOp: XLA_Op<"reduce", [NoSideEffect]> { let arguments = (ins Variadic:$operands_and_init, - FunctionAttr:$computation, + SymbolRefAttr:$computation, ElementsAttr:$dimensions ); diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py index 4ac6a82145d..6e21c2a2e46 100644 --- a/tensorflow/compiler/tests/random_ops_test.py +++ b/tensorflow/compiler/tests/random_ops_test.py @@ -116,12 +116,14 @@ class RandomOpsTest(xla_test.XLATestCase): def rng(dtype): return random_ops.truncated_normal(shape=[2], dtype=dtype) - self._testRngIsNotConstant(rng, dtypes.float32) + # TODO(b/34339814): make this test work with 16 bit float types. + for dtype in self._random_types() & {np.float32, np.float64}: + self._testRngIsNotConstant(rng, dtype) def testTruncatedNormalIsInRange(self): count = 10000000 # TODO(b/34339814): make this test work with 16 bit float types. - for dtype in self._random_types() & {dtypes.float32, dtypes.float64}: + for dtype in self._random_types() & {np.float32, np.float64}: with self.session() as sess: with self.test_scope(): x = random_ops.truncated_normal(shape=[count], dtype=dtype) diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc index 0b54c88fae9..ed2ba9d1c47 100644 --- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc @@ -293,7 +293,7 @@ class TruncatedNormalOp : public XlaOpKernel { REGISTER_XLA_OP(Name("TruncatedNormal") .CompileTimeConstantInput("shape") - .TypeConstraint("dtype", DT_FLOAT), + .TypeConstraint("dtype", {DT_FLOAT, DT_DOUBLE}), TruncatedNormalOp); } // namespace diff --git a/tensorflow/compiler/tf2xla/resource_util.cc b/tensorflow/compiler/tf2xla/resource_util.cc index 5d72943279f..f323dd57070 100644 --- a/tensorflow/compiler/tf2xla/resource_util.cc +++ b/tensorflow/compiler/tf2xla/resource_util.cc @@ -42,63 +42,199 @@ const char kRetvalOp[] = "_Retval"; const int kMaxCallDepth = 100; +Status AnalyzeResourceUsage( + const Graph* graph, const absl::optional& function_name, + const int call_depth, const absl::flat_hash_set& resource_arg_indices, + FunctionLibraryRuntime* lib_runtime, + absl::flat_hash_map>* + source_to_path); + bool IsControlFlowV1Node(const Node* n) { return (n->IsEnter() || n->IsExit() || n->IsSwitch() || n->IsMerge() || n->IsNextIteration()); } -// Given an output edge, find the corresponding input edge if given edge is -// coming from a pass-through node. Otherwise, return nullptr. -StatusOr WalkBackPassThroughEdge(const Edge* e) { - const Node* n = e->src(); - - if (n->IsIdentity()) { - const Edge* ret; - TF_RETURN_IF_ERROR(n->input_edge(0, &ret)); - return ret; - } - - if (n->type_string() == kIdentityNOp) { - const Edge* ret; - TF_RETURN_IF_ERROR(n->input_edge(e->src_output(), &ret)); - return ret; - } - - // Reaching here means e is not coming from a pass through node, return empty - // vector to indicate we can no longer trace back. - return nullptr; -} - // TODO(ycao): Add this as Tensorflow Node method. -StatusOr> OutputEdgesByIndex(const Node* n, +StatusOr> OutputEdgesByIndex(const Node& n, int idx) { absl::InlinedVector res; - if (idx >= n->num_outputs()) { + if (idx >= n.num_outputs()) { return errors::InvalidArgument("Invalid out_edge index: ", idx, ", Node ", - n->name(), " only has ", n->num_outputs(), + n.name(), " only has ", n.num_outputs(), " outputs."); } - for (const Edge* o : n->out_edges()) { + for (const Edge* o : n.out_edges()) { if (o->src_output() == idx) res.emplace_back(o); } return res; } -bool IsStackOrTensorArraySource(const Node* n) { - const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n->type_string()); +bool IsStackOrTensorArraySource(const Node& n) { + const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n.type_string()); if (!op_info) return false; if (op_info->resource_kind() != XlaResourceKind::kStack && op_info->resource_kind() != XlaResourceKind::kTensorArray) return false; - return n->num_outputs() > 0 && n->output_type(0) == DataType::DT_RESOURCE; + return n.num_outputs() > 0 && n.output_type(0) == DataType::DT_RESOURCE; +} + +void PropagateFromStackOrTensorArraySourceOp( + const Node& n, const absl::optional& function_name, + absl::flat_hash_map* + user_to_source) { + ResourceUsageAnalysis::NodeInfo src_node_info(function_name, n.name(), + n.type_string()); + for (const Edge* o : n.out_edges()) { + if (o->IsControlEdge()) continue; + if (o->dst()->input_type(o->dst_input()) != DataType::DT_RESOURCE) { + continue; + } + (*user_to_source)[o] = src_node_info; + } +} + +Status PropagateFromArgOp( + const Node& n, const absl::optional& function_name, + const absl::flat_hash_set& resource_arg_indices, + absl::flat_hash_map* + user_to_source) { + TF_RET_CHECK(n.type_string() == kArgOp); + + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(n.attrs(), "index", &index)); + if (!resource_arg_indices.contains(index)) return Status::OK(); + + TF_RET_CHECK(function_name.has_value()) + << "ResourceUsageAnalysis does not support analyzing _Arg nodes " + "carrying Stack/TensorArray resource in given graph unless they " + "are in function calls."; + + const ResourceUsageAnalysis::NodeInfo src_node_info(function_name, n.name(), + n.type_string()); + + for (const Edge* o : n.out_edges()) { + if (o->IsControlEdge()) continue; + if (o->dst()->input_type(o->dst_input()) != DataType::DT_RESOURCE) { + continue; + } + (*user_to_source)[o] = src_node_info; + } + + return Status::OK(); +} + +Status PropagateThroughCallOp( + const Node& n, const absl::optional& function_name, + const int call_depth, FunctionLibraryRuntime* lib_runtime, + absl::flat_hash_map* + user_to_source, + absl::flat_hash_map>* + source_to_path) { + if (call_depth > kMaxCallDepth) { + return errors::InvalidArgument( + "Function call stack in given graph is too deep, last function ", + "name is: ", function_name.value()); + } + // resource_arg_indices_for_call contains all indices of the input + // arguments that carry Stack/TensorArray resource handles. + absl::flat_hash_set resource_arg_indices_for_call; + for (const Edge* e : n.in_edges()) { + if (!user_to_source->contains(e)) continue; + resource_arg_indices_for_call.emplace(e->dst_input()); + } + + absl::string_view called_function_name = n.type_string(); + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(InstantiateFunctionCall(n.def(), lib_runtime, &handle)); + auto release_handle_on_return = gtl::MakeCleanup( + [&] { TF_CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); + const FunctionBody* fbody = lib_runtime->GetFunctionBody(handle); + + // Recursively analyze called function for resource sources and users. + absl::flat_hash_map> + called_function_source_to_path; + TF_RETURN_IF_ERROR(AnalyzeResourceUsage( + fbody->graph, absl::optional(called_function_name), + call_depth + 1, resource_arg_indices_for_call, lib_runtime, + &called_function_source_to_path)); + + std::unordered_map node_name_index = + fbody->graph->BuildNodeNameIndex(); + + for (auto it : called_function_source_to_path) { + ResourceUsageAnalysis::NodeInfo src_node_info = it.first; + + // If source is an _Arg, then the true source is actually corresponding + // edge that feeds into function call node with the same index. + if (src_node_info.op_ == kArgOp) { + const Node* arg_src = node_name_index[src_node_info.node_name_]; + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(arg_src->attrs(), "index", &index)); + + const Edge* e; + TF_RETURN_IF_ERROR(n.input_edge(index, &e)); + const Node* true_src = e->src(); + src_node_info.function_name_ = function_name; + src_node_info.node_name_ = true_src->name(); + src_node_info.op_ = true_src->type_string(); + } + + for (const auto& dst_node_info : it.second) { + // If user is an _Retval, then the true user is actually corresponding + // edge of that _Retval. + if (dst_node_info.op_ == kRetvalOp) { + const Node* ret_user = node_name_index[dst_node_info.node_name_]; + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(ret_user->attrs(), "index", &index)); + + absl::InlinedVector outs; + TF_ASSIGN_OR_RETURN(outs, OutputEdgesByIndex(n, index)); + for (const Edge* o : outs) (*user_to_source)[o] = src_node_info; + } else { + (*source_to_path)[src_node_info].emplace(dst_node_info); + } + } + } + + return Status::OK(); +} + +// Analyzes pass through values for Identity and IdentityN ops. +Status PropagateThroughIdentityOp( + const Node& n, + absl::flat_hash_map* + user_to_source) { + TF_RET_CHECK(n.IsIdentity() || n.type_string() == kIdentityNOp); + if (n.IsIdentity()) { + for (const Edge* o : n.out_edges()) { + if (o->IsControlEdge()) continue; + const Edge* in; + TF_RETURN_IF_ERROR(n.input_edge(0, &in)); + if (!user_to_source->contains(in)) continue; + user_to_source->emplace(std::make_pair(o, (*user_to_source)[in])); + } + } else { + for (const Edge* o : n.out_edges()) { + if (o->IsControlEdge()) continue; + const Edge* in; + TF_RETURN_IF_ERROR(n.input_edge(o->src_output(), &in)); + if (!user_to_source->contains(in)) continue; + user_to_source->emplace(std::make_pair(o, (*user_to_source)[in])); + } + } + + return Status::OK(); } Status AnalyzeResourceUsage( - const Graph* graph, FunctionLibraryRuntime* lib_runtime, - const absl::optional& function_name, const int call_depth, - const absl::flat_hash_set& resource_arg_indices, + const Graph* graph, const absl::optional& function_name, + const int call_depth, const absl::flat_hash_set& resource_arg_indices, + FunctionLibraryRuntime* lib_runtime, absl::flat_hash_map>* source_to_path) { @@ -127,120 +263,30 @@ Status AnalyzeResourceUsage( } // Record a resource source edge. - if (IsStackOrTensorArraySource(n)) { - ResourceUsageAnalysis::NodeInfo src_node_info(function_name, n->name(), - n->type_string()); - for (const Edge* o : n->out_edges()) { - if (o->IsControlEdge()) continue; - if (o->dst()->input_type(o->dst_input()) != DataType::DT_RESOURCE) { - continue; - } - user_to_source[o] = src_node_info; - } + if (IsStackOrTensorArraySource(*n)) { + PropagateFromStackOrTensorArraySourceOp(*n, function_name, + &user_to_source); continue; } // Arguments that are listed in resource_arg_indices are also considered as // resource sources. if (n->IsArg()) { - int index; - TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index)); - if (!resource_arg_indices.contains(index)) continue; - - TF_RET_CHECK(function_name.has_value()) - << "ResourceUsageAnalysis does not support analyzing _Arg nodes " - "carrying Stack/TensorArray resource in given graph unless they " - "are in function calls."; - - const ResourceUsageAnalysis::NodeInfo src_node_info( - function_name, n->name(), n->type_string()); - - for (const Edge* o : n->out_edges()) { - if (o->IsControlEdge()) continue; - if (o->dst()->input_type(o->dst_input()) != DataType::DT_RESOURCE) { - continue; - } - user_to_source[o] = src_node_info; - } + TF_RETURN_IF_ERROR(PropagateFromArgOp( + *n, function_name, resource_arg_indices, &user_to_source)); continue; } + // Recursively analyze function call ops. if (IsFunctionCall(*lib_runtime->GetFunctionLibraryDefinition(), *n)) { - if (call_depth > kMaxCallDepth) { - return errors::InvalidArgument( - "Function call stack in given graph is too deep, last function ", - "name is: ", function_name.value()); - } - // resource_arg_indices_for_call contains all indices of the input - // arguments that carry Stack/TensorArray resource handles. - absl::flat_hash_set resource_arg_indices_for_call; - for (const Edge* e : n->in_edges()) { - if (!user_to_source.contains(e)) continue; - resource_arg_indices_for_call.emplace(e->dst_input()); - } - - absl::string_view called_function_name = n->type_string(); - FunctionLibraryRuntime::Handle handle; - TF_RETURN_IF_ERROR( - InstantiateFunctionCall(n->def(), lib_runtime, &handle)); - auto release_handle_on_return = gtl::MakeCleanup( - [&] { TF_CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); - const FunctionBody* fbody = lib_runtime->GetFunctionBody(handle); - - // Recursively analyze called function for resource sources and users. - absl::flat_hash_map> - called_function_source_to_path; - TF_RETURN_IF_ERROR(AnalyzeResourceUsage( - fbody->graph, lib_runtime, - absl::optional(called_function_name), call_depth + 1, - resource_arg_indices_for_call, &called_function_source_to_path)); - - std::unordered_map node_name_index = - fbody->graph->BuildNodeNameIndex(); - - for (auto it : called_function_source_to_path) { - ResourceUsageAnalysis::NodeInfo src_node_info = it.first; - - // If source is an _Arg, then the true source is actually corresponding - // edge that feeds into function call node with the same index. - if (src_node_info.op_ == kArgOp) { - const Node* arg_src = node_name_index[src_node_info.node_name_]; - int index; - TF_RETURN_IF_ERROR(GetNodeAttr(arg_src->attrs(), "index", &index)); - - const Edge* e; - TF_RETURN_IF_ERROR(n->input_edge(index, &e)); - const Node* true_src = e->src(); - src_node_info.function_name_ = function_name; - src_node_info.node_name_ = true_src->name(); - src_node_info.op_ = true_src->type_string(); - } - - for (const auto& dst_node_info : it.second) { - // If user is an _Retval, then the true user is actually corresponding - // edge of that _Retval. - if (dst_node_info.op_ == kRetvalOp) { - const Node* ret_user = node_name_index[dst_node_info.node_name_]; - int index; - TF_RETURN_IF_ERROR(GetNodeAttr(ret_user->attrs(), "index", &index)); - - absl::InlinedVector outs; - TF_ASSIGN_OR_RETURN(outs, OutputEdgesByIndex(n, index)); - for (const Edge* o : outs) user_to_source[o] = src_node_info; - } else { - (*source_to_path)[src_node_info].emplace(dst_node_info); - } - } - } + TF_RETURN_IF_ERROR(PropagateThroughCallOp(*n, function_name, call_depth, + lib_runtime, &user_to_source, + source_to_path)); continue; } - for (const Edge* o : n->out_edges()) { - if (o->IsControlEdge()) continue; - TF_ASSIGN_OR_RETURN(const Edge* e, WalkBackPassThroughEdge(o)); - if (!e || !user_to_source.contains(e)) continue; - user_to_source.emplace(std::make_pair(o, user_to_source[e])); + if (n->IsIdentity() || n->type_string() == kIdentityNOp) { + TF_RETURN_IF_ERROR(PropagateThroughIdentityOp(*n, &user_to_source)); } } @@ -260,8 +306,9 @@ Status AnalyzeResourceUsage( absl::flat_hash_map>* source_to_path) { return AnalyzeResourceUsage( - graph, lib_runtime, /*function_name=*/{}, /*call_depth=*/0, - /*resource_arg_indices=*/absl::flat_hash_set(), source_to_path); + graph, /*function_name=*/{}, /*call_depth=*/0, + /*resource_arg_indices=*/absl::flat_hash_set(), lib_runtime, + source_to_path); } } // namespace tensorflow diff --git a/tensorflow/compiler/xla/g3doc/operation_semantics.md b/tensorflow/compiler/xla/g3doc/operation_semantics.md index f886a06be8b..8b425bd40a4 100644 --- a/tensorflow/compiler/xla/g3doc/operation_semantics.md +++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md @@ -1367,12 +1367,12 @@ For a more intuitive description, see the "Informal Description" section below. : : : detailed description. : | `offset_dims` | `ArraySlice` | The set of dimensions in the | : : : output shape that offset into : -: : : a array sliced from operand. : +: : : an array sliced from operand. : | `slice_sizes` | `ArraySlice` | `slice_sizes[i]` is the | : : : bounds for the slice on : : : : dimension `i`. : | `collapsed_slice_dims` | `ArraySlice` | The set of dimensions in each | -: : : \: slice that are collapsed : +: : : slice that are collapsed : : : : away. These dimensions must : : : : have size 1. : | `start_index_map` | `ArraySlice` | A map that describes how to | @@ -1383,8 +1383,11 @@ For a more intuitive description, see the "Informal Description" section below. For convenience, we label dimensions in the output array not in `offset_dims` as `batch_dims`. -The output is an array of rank `batch_dims.size` + `operand.rank` - -`collapsed_slice_dims`.size. +The output is an array of rank `batch_dims.size` + `offset_dims.size`. + +The `operand.rank` must equal the sume of `offset_dims.size` and +`collapsed_slice_dims`. Also, `slice_sizes.size` has to be equal to +`operand.rank`. If `index_vector_dim` is equal to `start_indices.rank` we implicitly consider `start_indices` to have a trailing `1` dimension (i.e. if `start_indices` was of @@ -1405,61 +1408,65 @@ accounting for `collapsed_slice_dims` (i.e. we pick `adjusted_slice_sizes`[`k`] where `adjusted_slice_sizes` is `slice_sizes` with the bounds at indices `collapsed_slice_dims` removed). -Formally, the operand index `In` corresponding to an output index `Out` is -computed as follows: +Formally, the operand index `In` corresponding to a given output index `Out` is +calculated as follows: -1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }. Use `G` to slice out -vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where -Combine(A, b) inserts b at position `index_vector_dim` into A. Note that -this is well defined even if `G` is empty -- if `G` is empty then `S` = -`start_indices`. +1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }. Use `G` to slice out a + vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where + Combine(A, b) inserts b at position `index_vector_dim` into A. Note that + this is well defined even if `G` is empty -- if `G` is empty then `S` = + `start_indices`. -2. Create a starting index, `S``in`, into `operand` using `S` by -scattering `S` using `start_index_map`. More precisely: -1. `S``in`[`start_index_map`[`k`]] = `S`[`k`] if `k` < -`start_index_map.size`. -2. `S``in`[`_`] = `0` otherwise. +2. Create a starting index, `S``in`, into `operand` using `S` by + scattering `S` using `start_index_map`. More precisely: -3. Create an index `O``in` into `operand` by scattering the indices -at the offset dimensions in `Out` according to the `collapsed_slice_dims` -set. More precisely: -1. `O``in`[`expand_offset_dims`(`k`)] = -`Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size` -(`expand_offset_dims` is defined below). -2. `O``in`[`_`] = `0` otherwise. -4. `In` is `O``in` + `S``in` where + is element-wise -addition. + 1. `S``in`[`start_index_map`[`k`]] = `S`[`k`] if `k` < + `start_index_map.size`. -`expand_offset_dims` is the monotonic function with domain [`0`, `offset.size`) -and range [`0`, `operand.rank`) \ `collapsed_slice_dims`. So if, e.g., + 2. `S``in`[`_`] = `0` otherwise. + +3. Create an index `O``in` into `operand` by scattering the indices + at the offset dimensions in `Out` according to the `collapsed_slice_dims` + set. More precisely: + + 1. `O``in`[`remapped_offset_dims`(`k`)] = + `Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size` + (`remapped_offset_dims` is defined below). + + 2. `O``in`[`_`] = `0` otherwise. + +4. `In` is `O``in` + `S``in` where + is element-wise + addition. + +`remapped_offset_dims` is a monotonic function with domain [`0`, `offset.size`) +and range [`0`, `operand.rank`) \ `collapsed_slice_dims`. So if, e.g., `offset.size` is `4`, `operand.rank` is `6` and `collapsed_slice_dims` is {`0`, -`2`} then `expand_offset_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}. +`2`} then `remapped_offset_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}. ### Informal Description and Examples Informally, every index `Out` in the output array corresponds to an element `E` in the operand array, computed as follows: -- We use the batch dimensions in `Out` to look up a starting index from -`start_indices`. +- We use the batch dimensions in `Out` to look up a starting index from + `start_indices`. -- We use `start_index_map` to map the starting index (which may have size less -than operand.rank) to a "full" starting index into operand. +- We use `start_index_map` to map the starting index (whose size may be less + than operand.rank) to a "full" starting index into the `operand`. -- We dynamic-slice out a slice with size `slice_sizes` using the full starting -index. +- We dynamic-slice out a slice with size `slice_sizes` using the full starting + index. -- We reshape the slice by collapsing the `collapsed_slice_dims` dimensions. -Since all collapsed slice dimensions have to have bound 1 this reshape is -always legal. +- We reshape the slice by collapsing the `collapsed_slice_dims` dimensions. + Since all collapsed slice dimensions must have a bound of 1, this reshape is + always legal. -- We use the offset dimensions in `Out` to index into this slice to get the -input element, `E`, corresponding to output index `Out`. +- We use the offset dimensions in `Out` to index into this slice to get the + input element, `E`, corresponding to output index `Out`. -`index_vector_dim` is set to `start_indices.rank` - `1` in all of the -examples that follow. More interesting values for `index_vector_dim` does not -change the operation fundamentally, but makes the visual representation more -cumbersome. +`index_vector_dim` is set to `start_indices.rank` - `1` in all of the examples +that follow. More interesting values for `index_vector_dim` do not change the +operation fundamentally, but make the visual representation more cumbersome. To get an intuition on how all of the above fits together, let's look at an example that gathers 5 slices of shape `[8,6]` from a `[16,11]` array. The @@ -1526,12 +1533,12 @@ As a final example, we use (2) and (3) to implement `tf.gather_nd`: `G``0` and `G``1` are used to slice out a starting index from the gather indices array as usual, except the starting index has only one -element, `X`. Similarly, there is only one output offset index with the value -`O``0`. However, before being used as indices into the input array, +element, `X`. Similarly, there is only one output offset index with the value +`O``0`. However, before being used as indices into the input array, these are expanded in accordance to "Gather Index Mapping" (`start_index_map` in -the formal description) and "Offset Mapping" (`expand_offset_dims` in the formal -description) into [`X`,`0`] and [`0`,`O``0`] respectively, adding up -to [`X`,`O``0`]. In other words, the output index +the formal description) and "Offset Mapping" (`remapped_offset_dims` in the +formal description) into [`X`,`0`] and [`0`,`O``0`] respectively, +adding up to [`X`,`O``0`]. In other words, the output index [`G``0`,`G``1`,`O``0`] maps to the input index [`GatherIndices`[`G``0`,`G``1`,`0`],`X`] which gives us the semantics for `tf.gather_nd`. diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD index 5eed1aab07d..a6a1bd1830e 100644 --- a/tensorflow/compiler/xla/python/BUILD +++ b/tensorflow/compiler/xla/python/BUILD @@ -104,11 +104,49 @@ cc_library( ], ) +cc_library( + name = "event_pool", + srcs = ["event_pool.cc"], + hdrs = ["event_pool.h"], + deps = [ + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/core:stream_executor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + ], +) + +cc_library( + name = "semaphore", + srcs = ["semaphore.cc"], + hdrs = ["semaphore.h"], + deps = [ + "//tensorflow/compiler/xla:types", + "//tensorflow/core:lib", + "@com_google_absl//absl/synchronization", + ], +) + +tf_cc_test( + name = "semaphore_test", + srcs = ["semaphore_test.cc"], + deps = [ + ":semaphore", + "//tensorflow/compiler/xla:test", + "//tensorflow/core:lib", + "//tensorflow/core:test_main", + "@com_google_absl//absl/synchronization", + ], +) + cc_library( name = "shared_device_buffer", srcs = ["shared_device_buffer.cc"], hdrs = ["shared_device_buffer.h"], deps = [ + ":event_pool", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/compiler/xla/service:transfer_manager", @@ -132,6 +170,23 @@ tf_cc_test( ], ) +cc_library( + name = "device", + srcs = ["device.cc"], + hdrs = ["device.h"], + deps = [ + ":event_pool", + ":semaphore", + ":worker_thread", + "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + ], +) + cc_library( name = "local_client", srcs = [ @@ -149,9 +204,9 @@ cc_library( ], features = ["-use_header_modules"], deps = [ + ":device", ":shared_device_buffer", ":types", - ":worker_thread", "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:literal_util", @@ -165,7 +220,6 @@ cc_library( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:xla_computation", "//tensorflow/compiler/xla/service:computation_placer", - "//tensorflow/compiler/xla/service:custom_call_target_registry", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/core:bfc_allocator", @@ -199,10 +253,11 @@ tf_pybind_extension( ":local_client", ":shared_device_buffer", ":types", - ":worker_thread", ":xrt", + "@com_google_absl//absl/base", "@com_google_absl//absl/hash", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", @@ -225,6 +280,7 @@ tf_pybind_extension( "//tensorflow/compiler/xla/client/lib:self_adjoint_eig", "//tensorflow/compiler/xla/client/lib:svd", "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/compiler/xla/service:custom_call_target_registry", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/service:hlo_graph_dumper", diff --git a/tensorflow/compiler/xla/python/device.cc b/tensorflow/compiler/xla/python/device.cc new file mode 100644 index 00000000000..73df698a274 --- /dev/null +++ b/tensorflow/compiler/xla/python/device.cc @@ -0,0 +1,100 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/python/device.h" + +#include +#include + +#include "absl/memory/memory.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +Device::Device(se::StreamExecutor* executor, bool synchronous_deallocation, + bool asynchronous, bool allow_event_reuse) + : synchronous_deallocation_(synchronous_deallocation), + event_pool_(allow_event_reuse), + compute_semaphore_(/*capacity=*/asynchronous ? 32 : 1) { + compute_stream_ = absl::make_unique(executor); + host_to_device_stream_ = absl::make_unique(executor); + device_to_host_stream_ = absl::make_unique(executor); + callback_stream_ = absl::make_unique(executor); + compute_stream_->Init(); + host_to_device_stream_->Init(); + device_to_host_stream_->Init(); + callback_stream_->Init(); + device_to_device_streams_.reserve(kNumDeviceToDeviceStreams); + for (int i = 0; i < kNumDeviceToDeviceStreams; ++i) { + auto stream = absl::make_unique(executor); + stream->Init(); + device_to_device_streams_.push_back(std::move(stream)); + } + execute_thread_ = absl::make_unique(tensorflow::Env::Default(), + "py_xla_execute"); + callback_thread_ = absl::make_unique(tensorflow::Env::Default(), + "py_xla_callback"); +} + +Device::~Device() { + Status status = SynchronizeAllActivity(); + if (!status.ok()) { + LOG(ERROR) << "Error when closing device: " << status; + } +} + +Status Device::SynchronizeAllActivity() { + Status status; + // TODO(phawkins): in theory the call to SynchronizeAllActivity below should + // suffice. However on the Host platform SynchronizeAllActivity is a dummy + // implementation that doesn't actually block. To make sure activity has + // stopped, also block on the compute stream. If SynchronizeAllActivity is + // fixed, we could remove the BlockHostUntilDone call. + status.Update(compute_stream_->BlockHostUntilDone()); + bool ok = compute_stream_->parent()->SynchronizeAllActivity(); + if (!ok) { + status.Update(Unknown("SynchronizeAllActivity failed.")); + } + return status; +} + +Status Device::ThenMemcpyDeviceToDevice(se::Stream* src_stream, + se::Stream* dst_stream, + se::DeviceMemoryBase src_buffer, + se::DeviceMemoryBase dst_buffer) { + // The default implementation simply calls ThenMemcpyD2D, and assumes that + // the buffer addresses identify the devices. This does not work + // on all platforms; this method is virtual so it can be overridden. + src_stream->ThenMemcpyD2D(&dst_buffer, src_buffer, dst_buffer.size()); + return Status::OK(); +} + +void Device::ThenExecuteOnCallbackThread(se::Stream* stream, + std::function callback) const { + stream->ThenDoHostCallback([this, callback]() mutable { + callback_thread_->Schedule(std::move(callback)); + }); +} + +se::Stream* Device::GetDeviceToDeviceStream() { + absl::MutexLock lock(&mu_); + int i = next_device_to_device_stream_; + next_device_to_device_stream_ = + (next_device_to_device_stream_ + 1) % device_to_device_streams_.size(); + return device_to_device_streams_.at(i).get(); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/python/device.h b/tensorflow/compiler/xla/python/device.h new file mode 100644 index 00000000000..f40c5df7c61 --- /dev/null +++ b/tensorflow/compiler/xla/python/device.h @@ -0,0 +1,134 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DEVICE_H_ +#define TENSORFLOW_COMPILER_XLA_PYTHON_DEVICE_H_ + +#include +#include + +#include "absl/synchronization/mutex.h" +#include "tensorflow/compiler/xla/python/event_pool.h" +#include "tensorflow/compiler/xla/python/semaphore.h" +#include "tensorflow/compiler/xla/python/worker_thread.h" +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/core/platform/stream_executor.h" + +namespace xla { + +// Class that encapsulates state relating to a device (e.g., a GPU) on which we +// can perform computation and transfers. +class Device { + public: + // If synchronous_deallocation is true, the host must not free buffers until + // compute/transfers that use those buffers have completed. For example, this + // typically is the case for the "platform" where compute/transfers are + // operations that take place on another thread. + // + // If asynchronous is false, the host will synchronize to the device after + // each execution or transfer. This is intended for debugging only. + Device(se::StreamExecutor* executor, bool synchronous_deallocation, + bool asynchronous, bool allow_event_reuse); + virtual ~Device(); + + bool synchronous_deallocation() const { return synchronous_deallocation_; } + + EventPool& event_pool() { return event_pool_; } + + se::Stream* compute_stream() const { return compute_stream_.get(); } + se::Stream* host_to_device_stream() const { + return host_to_device_stream_.get(); + } + se::Stream* device_to_host_stream() const { + return device_to_host_stream_.get(); + } + + // Returns a device to device stream. Allocates streams in a round-robin + // fashion amongst the available streams. + se::Stream* GetDeviceToDeviceStream(); + + // Enqueues a copy of `src_buffer` to `dst_buffer` onto `src_stream`. + virtual Status ThenMemcpyDeviceToDevice(se::Stream* src_stream, + se::Stream* dst_stream, + se::DeviceMemoryBase src_buffer, + se::DeviceMemoryBase dst_buffer); + + WorkerThread* execute_thread() const { return execute_thread_.get(); } + + // Enqueues a host callback on 'stream', to be executed by callback_thread_. + // ThenDoHostCallback is often constrained in what it can do, in particular, + // on GPU the callback runs on a thread belonging to the GPU runtime and + // cannot perform GPU operations itself. + void ThenExecuteOnCallbackThread(se::Stream* stream, + std::function callback) const; + + // Helpers for releasing values on a worker thread at the tail of a stream on + // a worker thread. Copies `object`, and destroys the copy when the tail of + // the stream is reached. The destruction happens either in the caller's + // thread or on the worker thread (depending on thread schedules), not a + // device callback, so it is safe if the destructor frees device resource + // (e.g., GPU objects). + // TODO(phawkins): use move-capture when we can use C++14 features. + template + void ThenRelease(se::Stream* stream, T object) const { + if (callback_stream_.get() != stream) { + callback_stream_->ThenWaitFor(stream); + } + ThenExecuteOnCallbackThread(callback_stream_.get(), + [object]() { /* releases object */ }); + } + + Semaphore& compute_semaphore() { return compute_semaphore_; } + + private: + Status SynchronizeAllActivity(); + + bool synchronous_deallocation_; + + EventPool event_pool_; + + // Semaphore used to limit how many programs can be enqueued on the compute + // stream by the host ahead of the device. + Semaphore compute_semaphore_; + + std::unique_ptr compute_stream_; + std::unique_ptr host_to_device_stream_; + std::unique_ptr device_to_host_stream_; + std::vector> device_to_device_streams_; + + // Number of device-to-device streams to create in the multistream case. + static constexpr int kNumDeviceToDeviceStreams = 4; + + absl::Mutex mu_; + int next_device_to_device_stream_ GUARDED_BY(mu_) = 0; + + // Callback stream is used for running short host-side callbacks after device + // side events, without preventing the device-side stream from doing useful + // work. + std::unique_ptr callback_stream_; + + // A worker thread, used for replicated computation launches. + std::unique_ptr execute_thread_; + + // A worker thread, used for callbacks. It is necessary that this be a + // different thread to the execute thread because we acquire the compute + // semaphore during calls to Execute but release it from a callback and if + // they are the same thread we might deadlock. + std::unique_ptr callback_thread_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_PYTHON_DEVICE_H_ diff --git a/tensorflow/compiler/xla/python/event_pool.cc b/tensorflow/compiler/xla/python/event_pool.cc new file mode 100644 index 00000000000..4edb41fd41f --- /dev/null +++ b/tensorflow/compiler/xla/python/event_pool.cc @@ -0,0 +1,52 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/python/event_pool.h" + +#include "absl/memory/memory.h" +#include "tensorflow/compiler/xla/status_macros.h" + +namespace xla { + +EventPool::Handle::~Handle() { + if (pool_ && event_) { + absl::MutexLock lock(&pool_->mu_); + pool_->free_events_.push(std::move(event_)); + } +} + +EventPool::EventPool(bool allow_reuse) : allow_reuse_(allow_reuse) {} + +StatusOr EventPool::ThenAllocateAndRecordEvent( + se::Stream* stream) { + Handle event; + + if (allow_reuse_) { + event.pool_ = this; + absl::MutexLock lock(&mu_); + if (!free_events_.empty()) { + event.event_ = std::move(free_events_.top()); + free_events_.pop(); + } + } + if (!event.event_) { + event.event_ = absl::make_unique(stream->parent()); + TF_RET_CHECK(event.event_->Init()) << "Event initialization failed"; + } + stream->ThenRecordEvent(event.event_.get()); + return event; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/python/event_pool.h b/tensorflow/compiler/xla/python/event_pool.h new file mode 100644 index 00000000000..56787acd87e --- /dev/null +++ b/tensorflow/compiler/xla/python/event_pool.h @@ -0,0 +1,76 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_EVENT_POOL_H_ +#define TENSORFLOW_COMPILER_XLA_PYTHON_EVENT_POOL_H_ + +#include +#include + +#include "absl/synchronization/mutex.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/platform/stream_executor.h" + +namespace xla { + +class EventPool { + public: + class Handle { + public: + Handle() = default; + ~Handle(); + + Handle(const Handle&) = delete; + Handle(Handle&&) = default; + Handle& operator=(const Handle&) = delete; + Handle& operator=(Handle&&) = default; + + se::Event* event() const { return event_.get(); } + + private: + friend class EventPool; + + EventPool* pool_ = nullptr; + std::unique_ptr event_; + }; + + // Initializes a new EventPool. If `allow_reuse` is true, then events will be + // returned to the pool when their handles are deleted and made available to + // subsequent allocations. Reuse only works on the GPU platform. + explicit EventPool(bool allow_reuse); + + // Allocates a new (or reused) event from the pool, and records the event on + // `stream`. + // + // Reuse is only possible on GPU. Event allocation and recording are coupled + // in a single operation because on GPU it is recording an event that makes it + // a "new" event. According to the CUDA documentation it is safe to call + // cudaEventRecord even if that event may still be in use on the device; APIs + // such as cudaStreamWaitEvent capture the state of the event at the time of + // the host-side call and are not affected by a later host-side + // cudaEventRecord. + StatusOr ThenAllocateAndRecordEvent(se::Stream* stream); + + private: + const bool allow_reuse_; + + absl::Mutex mu_; + std::stack> free_events_ GUARDED_BY(mu_); +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_PYTHON_EVENT_POOL_H_ diff --git a/tensorflow/compiler/xla/python/local_client.cc b/tensorflow/compiler/xla/python/local_client.cc index d7f27d00f76..397562592c6 100644 --- a/tensorflow/compiler/xla/python/local_client.cc +++ b/tensorflow/compiler/xla/python/local_client.cc @@ -18,22 +18,39 @@ limitations under the License. // Asynchronous execution: // ----------------------- // -// If 'asynchronous' is set when constructing the client, computations and -// host-to-device transfers do not block the host waiting for the operation to -// complete but instead return control to the host immediately. This allows -// Python logic to overlap with device-side computation. +// Computations and host-to-device transfers do not need to block the host +// waiting for the operation to complete but instead return control to the host +// immediately. This allows Python logic to overlap with device-side +// computation. // // For a good user experience, we must be careful only to enqueue operations // that are unlikely to fail; as a rule error checking must be done eagerly // before returning control to the client. // +// The degree to which the client can enqueue operations ahead of the client +// is limited by a semaphore. There are at two modes: asynchronous, where we +// allow the client to enqueue up to 32 executions ahead of the device, and +// synchronous, where we limit the client to having one enqueued operation at +// a time. The value of 32 is arbitrary. +// +// Even in asynchronous mode, it is important that we do not permit +// unbounded queue-ahead. Firstly it is problematic when the user does something +// like the following in Python: +// %timeit run_computation() +// To the timeit logic, op() appears to be extremely cheap since it is deferring +// all of its real work and not blocking, and so the %timeit will run op() many +// (e.g., 10000) times to get better timing resolution, even though in reality +// it may be expensive. Secondly, on CPU the allocator is synchronized with the +// head of the compute stream, and we allocate buffers for all of the enqueued +// programs without any reuse (unlike GPU). This means that the memory usage +// is proportional to the queue size. +// // Multi-stream execution: // ----------------------- // -// On certain platforms (e.g., TPU), we use a multistream execution design, -// where different Streams are used for host-to-device transfers, -// device-to-host transfers, and compute. This allows us to overlap transfers on -// and off the device with computation. +// We use a multistream execution design, where different Streams are used for +// host-to-device transfers, device-to-host transfers, and compute. This allows +// us to overlap transfers on and off the device with computation. // // Synchronization between streams occurs via BufferDefinitionEvents that // describe when the contents of a logical buffer are known to be valid on @@ -66,9 +83,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "absl/strings/str_format.h" -#include "absl/synchronization/blocking_counter.h" #include "absl/synchronization/mutex.h" -#include "absl/synchronization/notification.h" #include "absl/time/time.h" #include "include/pybind11/pybind11.h" #include "tensorflow/compiler/xla/client/client_library.h" @@ -78,12 +93,12 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/python/shared_device_buffer.h" #include "tensorflow/compiler/xla/python/types.h" -#include "tensorflow/compiler/xla/service/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/common_runtime/bfc_allocator.h" +#include "tensorflow/core/common_runtime/gpu/gpu_host_allocator.h" #include "tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/lib/traceme.h" @@ -93,98 +108,6 @@ namespace xla { namespace py = pybind11; -// Registers a 'fn_capsule' as a CPU custom call target. -// 'fn_capsule' is a void* pointer encapsulated in a PyCapsule object, with name -// "xla._CPU_CUSTOM_CALL_TARGET". -Status RegisterCpuCustomCallTarget(const std::string& fn_name, - py::capsule capsule) { - static const char* const kName = "xla._CPU_CUSTOM_CALL_TARGET"; - if (absl::string_view(capsule.name()) != kName) { - return InvalidArgument( - "Argument to RegisterCpuCustomCallTargetRegistry was not a " - "xla._CPU_CUSTOM_CALL_TARGET capsule."); - } - CustomCallTargetRegistry::Global()->Register( - fn_name, static_cast(capsule), "Host"); - return Status::OK(); -} - -Device::Device(se::StreamExecutor* executor, bool use_multiple_streams, - bool synchronous_deallocation, bool asynchronous) - : use_multiple_streams_(use_multiple_streams), - synchronous_deallocation_(synchronous_deallocation), - asynchronous_(asynchronous) { - compute_stream_ = std::make_shared(executor); - compute_stream_->Init(); - if (use_multiple_streams) { - host_to_device_stream_ = std::make_shared(executor); - device_to_host_stream_ = std::make_shared(executor); - callback_stream_ = std::make_shared(executor); - host_to_device_stream_->Init(); - device_to_host_stream_->Init(); - callback_stream_->Init(); - device_to_device_streams_.reserve(kNumDeviceToDeviceStreams); - for (int i = 0; i < kNumDeviceToDeviceStreams; ++i) { - auto stream = std::make_shared(executor); - stream->Init(); - device_to_device_streams_.push_back(std::move(stream)); - } - } else { - callback_stream_ = host_to_device_stream_ = device_to_host_stream_ = - compute_stream_; - device_to_device_streams_.push_back(compute_stream_); - } - worker_thread_ = absl::make_unique(tensorflow::Env::Default(), - "py_xla_execute"); -} - -Device::~Device() { - Status status = SynchronizeAllActivity(); - if (!status.ok()) { - LOG(ERROR) << "Error when closing device: " << status; - } -} - -Status Device::SynchronizeAllActivity() { - Status status; - // TODO(phawkins): in theory the call to SynchronizeAllActivity below should - // suffice. However on the Host platform SynchronizeAllActivity is a dummy - // implementation that doesn't actually block. To make sure activity has - // stopped, also block on the compute stream. If SynchronizeAllActivity is - // fixed, we could remove the BlockHostUntilDone call. - status.Update(compute_stream_->BlockHostUntilDone()); - bool ok = compute_stream_->parent()->SynchronizeAllActivity(); - if (!ok) { - status.Update(Unknown("SynchronizeAllActivity failed.")); - } - return status; -} - -Status Device::ThenMemcpyDeviceToDevice(se::Stream* src_stream, - se::Stream* dst_stream, - se::DeviceMemoryBase src_buffer, - se::DeviceMemoryBase dst_buffer) { - // The default implementation simply calls ThenMemcpyD2D, and assumes that - // the buffer addresses identify the devices. This does not work - // on all platforms; this method is virtual so it can be overridden. - src_stream->ThenMemcpyD2D(&dst_buffer, src_buffer, dst_buffer.size()); - return Status::OK(); -} - -void Device::ThenExecuteOnWorkerThread(se::Stream* stream, - std::function callback) const { - stream->ThenDoHostCallback( - [this, callback]() { worker_thread_->Schedule(std::move(callback)); }); -} - -se::Stream* Device::GetDeviceToDeviceStream() { - absl::MutexLock lock(&mu_); - int i = next_device_to_device_stream_; - next_device_to_device_stream_ = - (next_device_to_device_stream_ + 1) % device_to_device_streams_.size(); - return device_to_device_streams_.at(i).get(); -} - static StatusOr> CreateBFCAllocator( se::Platform* platform, LocalClient* client, double memory_fraction, bool preallocate) { @@ -237,44 +160,57 @@ StatusOr> PyLocalClient::Get( options.set_platform(platform); TF_ASSIGN_OR_RETURN(LocalClient * client, ClientLibrary::GetOrCreateLocalClient(options)); + + bool gpu_platform = platform_name == "gpu"; std::unique_ptr allocator; - if (allocator_config.kind == AllocatorConfig::Kind::kBFC || - (platform_name == "gpu" && - allocator_config.kind == AllocatorConfig::Kind::kDefault)) { - if (platform_name != "gpu") { - return Unimplemented("BFCAllocator only available for GPU."); + std::unique_ptr host_memory_allocator; + if (gpu_platform) { + if (allocator_config.kind != AllocatorConfig::Kind::kPlatform) { + TF_ASSIGN_OR_RETURN( + allocator, + CreateBFCAllocator(platform, client, allocator_config.memory_fraction, + allocator_config.preallocate)); } - TF_ASSIGN_OR_RETURN( - auto bfc_allocator, - CreateBFCAllocator(platform, client, allocator_config.memory_fraction, - allocator_config.preallocate)); - allocator = std::move(bfc_allocator); + + tensorflow::SubAllocator* sub_allocator = new tensorflow::GpuHostAllocator( + client->backend().stream_executor(0).ValueOrDie(), /*numa_node=*/0, + /*alloc_visitors=*/{}, + /*free_visitors=*/{}); + // TODO(phawkins): allow the user to tune this. + const int64 kGpuHostMemoryLimitBytes = 64 * (1LL << 30); + host_memory_allocator = absl::make_unique( + sub_allocator, kGpuHostMemoryLimitBytes, /*allow_growth=*/true, + /*name=*/"xla_gpu_host_bfc"); + + } else if (allocator_config.kind == AllocatorConfig::Kind::kBFC) { + return Unimplemented("BFCAllocator only available for GPU."); } std::vector> devices; devices.reserve(client->device_count()); - bool use_multiple_streams = (platform_name != "cpu"); - bool synchronous_deallocation = !use_multiple_streams; + bool synchronous_deallocation = platform_name == "cpu"; for (int i = 0; i < client->device_count(); ++i) { se::StreamExecutor* executor = client->backend().stream_executor(i).ValueOrDie(); - devices.push_back(absl::make_unique(executor, use_multiple_streams, - synchronous_deallocation, - asynchronous)); + devices.push_back(absl::make_unique( + executor, synchronous_deallocation, asynchronous, + /*allow_event_reuse=*/gpu_platform)); } - return std::make_shared(platform_name, client, - std::move(devices), - std::move(allocator), asynchronous); + return std::make_shared( + platform_name, client, std::move(devices), std::move(allocator), + std::move(host_memory_allocator)); } PyLocalClient::PyLocalClient( std::string platform_name, LocalClient* client, std::vector> devices, - std::unique_ptr allocator, bool asynchronous) + std::unique_ptr allocator, + std::unique_ptr host_memory_allocator) : platform_name_(std::move(platform_name)), client_(client), devices_(std::move(devices)), owned_allocator_(std::move(allocator)), + host_memory_allocator_(std::move(host_memory_allocator)), h2d_transfer_pool_(tensorflow::Env::Default(), "py_xla_h2d_transfer", client->device_count()) { if (owned_allocator_ != nullptr) { @@ -303,56 +239,6 @@ StatusOr PyLocalClient::TransferFromOutfeed( return LiteralToPython(std::make_shared(std::move(literal))); } -static StatusOr> TransferHostToDeviceAsync( - const PythonBufferTree& tree, int device_ordinal, - std::shared_ptr client, const Device& device) { - se::DeviceMemoryAllocator* allocator = client->allocator(); - TransferManager* transfer_manager = - client->client()->backend().transfer_manager(); - TF_ASSIGN_OR_RETURN( - Shape shape, transfer_manager->ChooseCompactLayoutForShape(tree.shape)); - TF_ASSIGN_OR_RETURN(ScopedShapedBuffer buffer, - transfer_manager->AllocateScopedShapedBuffer( - shape, allocator, device_ordinal)); - TF_RETURN_IF_ERROR(transfer_manager->WriteTupleIndexTablesAsync( - device.host_to_device_stream(), buffer)); - - auto it = tree.leaves.begin(); - for (const ShapeUtil::IndexedShape& indexed_shape : - ShapeUtil::GetLeafShapes(shape)) { - TF_RET_CHECK(it != tree.leaves.end()); - ShapedBuffer leaf( - indexed_shape.shape, - transfer_manager->HostShapeToDeviceShape(indexed_shape.shape), - client->client()->platform(), device_ordinal); - leaf.buffers().CopySubtreeFrom(buffer.buffers(), indexed_shape.index, {}); - if (device.use_multiple_streams() && - !transfer_manager->CanShapedBufferBeAccessedNow( - device.host_to_device_stream()->parent(), leaf)) { - device.host_to_device_stream()->ThenWaitFor(device.compute_stream()); - } - TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDeviceAsync( - device.host_to_device_stream(), *it, leaf)); - ++it; - } - std::shared_ptr definition_event; - if (device.use_multiple_streams()) { - TF_ASSIGN_OR_RETURN(definition_event, - BufferDefinitionEvent::Create( - device.host_to_device_stream()->parent())); - definition_event->RecordOnStream(device.host_to_device_stream()); - } - std::shared_ptr device_buffer = - SharedDeviceBuffer::FromScopedShapedBuffer(std::move(buffer), - definition_event); - if (device.synchronous_deallocation()) { - device.ThenReleaseOnWorkerThread(device.host_to_device_stream(), - device_buffer); - } - return absl::make_unique(shape, std::move(device_buffer), - std::move(client)); -} - /* static */ StatusOr> PyLocalBuffer::FromPython( const py::object& argument, std::shared_ptr client, @@ -366,82 +252,83 @@ StatusOr> PyLocalBuffer::FromPython( // remain live until the transfer is complete. auto py_buffer_ref = client->py_ref_manager().ManageReferences(absl::MakeSpan(tree.arrays)); + tree.arrays.clear(); // We are done manipulating Python objects; release the GIL. py::gil_scoped_release gil_release; VLOG(1) << "PyLocalBuffer::FromPython: shape: " << tree.shape.ToString() << " device ordinal: " << device_ordinal; - const Device& device = client->device(device_ordinal); - TF_ASSIGN_OR_RETURN(std::unique_ptr buffer, - TransferHostToDeviceAsync(tree, device_ordinal, - std::move(client), device)); + Device* device = &client->device(device_ordinal); + TransferManager* transfer_manager = + client->client()->backend().transfer_manager(); + se::DeviceMemoryAllocator* allocator = client->allocator(); + TF_ASSIGN_OR_RETURN( + Shape shape, transfer_manager->ChooseCompactLayoutForShape(tree.shape)); + TF_ASSIGN_OR_RETURN(ScopedShapedBuffer buffer, + transfer_manager->AllocateScopedShapedBuffer( + shape, allocator, device_ordinal)); + TF_RETURN_IF_ERROR(transfer_manager->WriteTupleIndexTablesAsync( + device->host_to_device_stream(), buffer)); - device.ThenRelease(device.host_to_device_stream(), std::move(py_buffer_ref)); - return buffer; -} - -/*static */ StatusOr>> -PyLocalBuffer::FromPythonValues( - const std::vector>& arguments, - std::shared_ptr client) { - tensorflow::profiler::TraceMe traceme("PyLocalBuffer::FromPythonValues"); - int num_arguments = static_cast(arguments.size()); - std::vector> outputs(num_arguments); - if (num_arguments == 0) { - return outputs; - } - - struct H2DTransfer { - PythonBufferTree tree; - StatusOr> buffer; - PythonRefManager::ManagedPyObjects py_buffer_refs; - }; - - std::vector transfers(num_arguments); - for (int i = 0; i < num_arguments; ++i) { - TF_ASSIGN_OR_RETURN(transfers[i].tree, - GetPythonBufferTree(arguments[i].first)); - transfers[i].py_buffer_refs = client->py_ref_manager().ManageReferences( - absl::MakeSpan(transfers[i].tree.arrays)); - } - client->py_ref_manager().CollectGarbage(); - // We are done manipulating Python objects; release the GIL. - py::gil_scoped_release gil_release; - - auto transfer_h2d = [&](int i) -> StatusOr> { - int device_ordinal = arguments[i].second; - return TransferHostToDeviceAsync(transfers[i].tree, device_ordinal, client, - client->device(device_ordinal)); - }; - - // We perform the transfers on a thread pool in case XLA needs to do any - // host-side preprocessing of the input data. - if (num_arguments == 1) { - transfers[0].buffer = transfer_h2d(0); - } else { - absl::BlockingCounter counter(num_arguments); - for (int i = 0; i < num_arguments; ++i) { - client->h2d_transfer_pool()->Schedule([&, i]() { - transfers[i].buffer = transfer_h2d(i); - counter.DecrementCount(); - }); + std::vector> staging_buffers; + staging_buffers.reserve(tree.leaves.size()); + auto it = tree.leaves.begin(); + for (const ShapeUtil::IndexedShape& indexed_shape : + ShapeUtil::GetLeafShapes(shape)) { + TF_RET_CHECK(it != tree.leaves.end()); + ShapedBuffer leaf( + indexed_shape.shape, + transfer_manager->HostShapeToDeviceShape(indexed_shape.shape), + client->client()->platform(), device_ordinal); + leaf.buffers().CopySubtreeFrom(buffer.buffers(), indexed_shape.index, {}); + if (!transfer_manager->CanShapedBufferBeAccessedNow( + device->host_to_device_stream()->parent(), leaf)) { + device->host_to_device_stream()->ThenWaitFor(device->compute_stream()); } - counter.Wait(); + + // If applicable on the backend, stage the transfer via host memory + // allocated via the host_memory_allocator. On GPU, this is pinned memory. + if (client->host_memory_allocator()) { + int64 size = it->size_bytes({}); + void* ptr = client->host_memory_allocator()->AllocateRaw( + tensorflow::Allocator::kAllocatorAlignment, size); + std::shared_ptr staging_buffer(ptr, [client](void* ptr) { + client->host_memory_allocator()->DeallocateRaw(ptr); + }); + std::memcpy(ptr, it->untyped_data({}), size); + BorrowingLiteral literal(static_cast(staging_buffer.get()), + it->shape()); + TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDeviceAsync( + device->host_to_device_stream(), literal, leaf)); + staging_buffers.push_back(std::move(staging_buffer)); + } else { + // Otherwise, just transfer the literal. + TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDeviceAsync( + device->host_to_device_stream(), *it, leaf)); + } + ++it; } - // Release our references once the transfers have completed. - for (int i = 0; i < num_arguments; ++i) { - int device_ordinal = arguments[i].second; - const Device& device = client->device(device_ordinal); - device.ThenRelease(device.host_to_device_stream(), - std::move(transfers[i].py_buffer_refs)); - } + auto definition_event = std::make_shared(); + TF_ASSIGN_OR_RETURN(EventPool::Handle event, + device->event_pool().ThenAllocateAndRecordEvent( + device->host_to_device_stream())); + definition_event->SetDefinitionEvent(std::move(event), + device->host_to_device_stream()); - for (int i = 0; i < num_arguments; ++i) { - TF_ASSIGN_OR_RETURN(outputs[i], std::move(transfers[i].buffer)); + std::shared_ptr device_buffer = + SharedDeviceBuffer::FromScopedShapedBuffer(std::move(buffer), + definition_event); + if (device->synchronous_deallocation()) { + device->ThenRelease(device->host_to_device_stream(), device_buffer); } - return outputs; + device->ThenRelease( + device->host_to_device_stream(), + std::make_pair(std::move(py_buffer_ref), std::move(staging_buffers))); + + return absl::make_unique(shape, std::move(device_buffer), + std::move(client)); } /* static */ StatusOr> PyLocalBuffer::MakeTuple( @@ -465,13 +352,9 @@ PyLocalBuffer::FromPythonValues( se::DeviceMemoryAllocator* allocator = client->allocator(); TransferManager* transfer_manager = client->client()->backend().transfer_manager(); - const Device& device = client->device(device_ordinal); - std::shared_ptr definition_event; - if (device.use_multiple_streams()) { - TF_ASSIGN_OR_RETURN(definition_event, - BufferDefinitionEvent::Create( - device.host_to_device_stream()->parent())); - } + Device& device = client->device(device_ordinal); + + auto definition_event = std::make_shared(); TF_ASSIGN_OR_RETURN( std::shared_ptr tuple_buffer, SharedDeviceBuffer::MakeTuple(device_buffers, transfer_manager, allocator, @@ -482,21 +365,22 @@ PyLocalBuffer::FromPythonValues( // TODO(phawkins): extend TransferManager so we do not need to form a full // ShapedBuffer just to write the root tuple index table. TF_ASSIGN_OR_RETURN(ShapedBuffer shaped_buffer, buffer->AsShapedBuffer()); - if (device.use_multiple_streams() && - !transfer_manager->CanShapedBufferBeAccessedNow( + if (!transfer_manager->CanShapedBufferBeAccessedNow( device.host_to_device_stream()->parent(), shaped_buffer)) { // Wait for the compute stream so that memory allocations are synchronized. device.host_to_device_stream()->ThenWaitFor(device.compute_stream()); } TF_RETURN_IF_ERROR(transfer_manager->WriteRootTupleIndexTable( device.host_to_device_stream(), shaped_buffer)); - if (definition_event) { - definition_event->RecordOnStream(device.host_to_device_stream()); - } + + TF_ASSIGN_OR_RETURN(EventPool::Handle event, + device.event_pool().ThenAllocateAndRecordEvent( + device.host_to_device_stream())); + definition_event->SetDefinitionEvent(std::move(event), + device.host_to_device_stream()); if (device.synchronous_deallocation()) { - device.ThenReleaseOnWorkerThread(device.host_to_device_stream(), - std::move(tuple_buffer)); + device.ThenRelease(device.host_to_device_stream(), std::move(tuple_buffer)); } return buffer; } @@ -629,8 +513,7 @@ StatusOr> PyLocalBuffer::CopyToDevice( ScopedShapedBuffer dst_buffer, transfer_manager->AllocateScopedShapedBuffer( on_host_shape_, client_->allocator(), dst_device_ordinal)); - if (dst_device.use_multiple_streams() && - !transfer_manager->CanShapedBufferBeAccessedNow( + if (!transfer_manager->CanShapedBufferBeAccessedNow( dst_device.compute_stream()->parent(), dst_buffer)) { src_device_to_device_stream->ThenWaitFor(dst_device.compute_stream()); } @@ -652,6 +535,10 @@ StatusOr> PyLocalBuffer::CopyToDevice( output_buffer)); } + // We hold on to the `src_device_buffer` until the transfer is finished. + src_device.ThenRelease(src_device_to_device_stream, + std::move(src_device_buffer)); + // Write new tuple buffers. The destination buffers have different addresses, // so we must construct tuple buffers from scratch instead of copying them. if (dst_buffer.on_device_shape().IsTuple()) { @@ -665,13 +552,12 @@ StatusOr> PyLocalBuffer::CopyToDevice( dst_device.host_to_device_stream()); } - std::shared_ptr definition_event; - if (dst_device.use_multiple_streams()) { - TF_ASSIGN_OR_RETURN( - definition_event, - BufferDefinitionEvent::Create(src_device_to_device_stream->parent())); - definition_event->RecordOnStream(src_device_to_device_stream); - } + auto definition_event = std::make_shared(); + TF_ASSIGN_OR_RETURN(EventPool::Handle event, + src_device.event_pool().ThenAllocateAndRecordEvent( + src_device_to_device_stream)); + definition_event->SetDefinitionEvent(std::move(event), + src_device_to_device_stream); std::shared_ptr dst_device_buffer = SharedDeviceBuffer::FromScopedShapedBuffer(std::move(dst_buffer), @@ -756,21 +642,21 @@ StatusOr> PyLocalExecutable::ExecuteHelper( << " buffer: " << argument_buffers.back().ToString(); } - const Device& device = client_->device(device_ordinal); - // The choice of where we wait in "synchronous" mode is arbitrary; the reason - // for the wait is pacing to avoid problems such as memory fragmentation, not - // for correctness. - if (!device.asynchronous()) { - TF_RETURN_IF_ERROR(device.compute_stream()->BlockHostUntilDone()); - } + Device* device = &client_->device(device_ordinal); + // The choice of where we wait is arbitrary; the reason for the wait is pacing + // to avoid problems such as memory fragmentation and running ahead too far, + // not for correctness. Placing it before the executable launch allows the + // inputs for the next executable to be fetched even if the launch is delayed. + auto compute_reservation = std::make_shared( + device->compute_semaphore().ScopedAcquire(1)); for (BufferDefinitionEvent* event : events) { - event->WaitForEventOnStream(device.compute_stream()); + event->WaitForEventOnStream(device->compute_stream()); } ExecutableRunOptions options; - options.set_stream(device.compute_stream()); - options.set_host_to_device_stream(device.host_to_device_stream()); + options.set_stream(device->compute_stream()); + options.set_host_to_device_stream(device->host_to_device_stream()); options.set_allocator(client_->allocator()); options.set_intra_op_thread_pool( client_->client()->backend().eigen_intra_op_thread_pool_device()); @@ -787,24 +673,25 @@ StatusOr> PyLocalExecutable::ExecuteHelper( return result_buffer.status(); } - std::shared_ptr definition_event; - if (device.use_multiple_streams()) { - TF_ASSIGN_OR_RETURN( - definition_event, - BufferDefinitionEvent::Create(device.compute_stream()->parent())); - definition_event->RecordOnStream(device.compute_stream()); - } + auto definition_event = std::make_shared(); + TF_ASSIGN_OR_RETURN(EventPool::Handle event, + device->event_pool().ThenAllocateAndRecordEvent( + device->compute_stream())); + definition_event->SetDefinitionEvent(std::move(event), + device->compute_stream()); + Shape on_host_shape = result_buffer.ValueOrDie().on_host_shape(); std::shared_ptr out_buffer = SharedDeviceBuffer::FromScopedShapedBuffer( std::move(result_buffer.ValueOrDie()), definition_event); - if (device.synchronous_deallocation()) { + if (device->synchronous_deallocation()) { device_buffers.push_back(out_buffer); - device.ThenReleaseOnWorkerThread(device.compute_stream(), - std::move(device_buffers)); + device->ThenRelease(device->compute_stream(), std::move(device_buffers)); } - device.ThenReleaseOnWorkerThread(device.compute_stream(), executable_); + + device->ThenRelease(device->compute_stream(), + std::make_pair(executable_, compute_reservation)); return absl::make_unique(on_host_shape, std::move(out_buffer), client_); } @@ -853,7 +740,7 @@ PyLocalExecutable::ExecutePerReplica( for (int replica = 0; replica < num_replicas(); ++replica) { const int device_ordinal = device_assignment_(replica, 0); const Device& device = client_->device(device_ordinal); - device.worker_thread()->Schedule([&, replica] { + device.execute_thread()->Schedule([&, replica] { results[replica] = ExecuteHelper(argument_handles[replica], replica, run_id); diff --git a/tensorflow/compiler/xla/python/local_client.h b/tensorflow/compiler/xla/python/local_client.h index 4064e629c6f..8ad4c44d53f 100644 --- a/tensorflow/compiler/xla/python/local_client.h +++ b/tensorflow/compiler/xla/python/local_client.h @@ -16,7 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_CLIENT_H_ #define TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_CLIENT_H_ -#include +#include #include #include @@ -27,137 +27,19 @@ limitations under the License. #include "tensorflow/compiler/xla/client/executable_build_options.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_computation.h" +#include "tensorflow/compiler/xla/python/device.h" #include "tensorflow/compiler/xla/python/python_ref_manager.h" #include "tensorflow/compiler/xla/python/shared_device_buffer.h" -#include "tensorflow/compiler/xla/python/worker_thread.h" #include "tensorflow/compiler/xla/service/computation_placer.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/shape.h" #include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/lib/core/status.h" namespace xla { -// Registers a 'fn_capsule' as a CPU custom call target. -// 'fn_capsule' is a void* pointer encapsulated in a PyCapsule object, with name -// "xla._CPU_CUSTOM_CALL_TARGET". -Status RegisterCpuCustomCallTarget(const std::string& fn_name, - pybind11::capsule capsule); - -// Class that encapsulates state relating to a device (e.g., a GPU) on which we -// can perform computation and transfers. -class Device { - public: - // If use_multiple_streams is true, we allocate separate streams for compute - // and transfers. If it is false, we share a single stream for compute and - // transfers. The CPU device does not support multiple streams, and this is - // a workaround until it does. - // - // If synchronous_deallocation is true, the host must not free buffers until - // compute/transfers that use those buffers have completed. For example, this - // typically is the case for the "platform" where compute/transfers are - // operations that take place on another thread. - // - // If asynchronous is false, the host will synchronize to the device after - // each execution or transfer. This is intended for debugging only. - Device(se::StreamExecutor* executor, bool use_multiple_streams, - bool synchronous_deallocation, bool asynchronous); - virtual ~Device(); - - bool use_multiple_streams() const { return use_multiple_streams_; } - bool synchronous_deallocation() const { return synchronous_deallocation_; } - bool asynchronous() const { return asynchronous_; } - se::Stream* compute_stream() const { return compute_stream_.get(); } - se::Stream* host_to_device_stream() const { - return host_to_device_stream_.get(); - } - se::Stream* device_to_host_stream() const { - return device_to_host_stream_.get(); - } - - // Returns a device to device stream. Allocates streams in a round-robin - // fashion amongst the available streams. - se::Stream* GetDeviceToDeviceStream(); - - // Enqueues a copy of `src_buffer` to `dst_buffer` onto `src_stream`. - virtual Status ThenMemcpyDeviceToDevice(se::Stream* src_stream, - se::Stream* dst_stream, - se::DeviceMemoryBase src_buffer, - se::DeviceMemoryBase dst_buffer); - - // A worker thread, used for replicated computation launches and callbacks. - WorkerThread* worker_thread() const { return worker_thread_.get(); } - - // Enqueues a host callback on 'stream', to be executed by worker_thread_. - // ThenDoHostCallback is often constrained in what it can do, in particular, - // on GPU the callback runs on a thread belonging to the GPU runtime and - // cannot perform GPU operations itself. - void ThenExecuteOnWorkerThread(se::Stream* stream, - std::function callback) const; - - // Helper for releasing values from a callback at the tail of a stream. - // This is only permitted if object's destructor will not free any device - // objects, since the callback may be called from a device thread pool on - // GPU. - template - void ThenRelease(se::Stream* stream, T object) const { - if (callback_stream_.get() != stream) { - callback_stream_->ThenWaitFor(stream); - } - callback_stream_->ThenDoHostCallback( - std::bind([](T& object) { /* releases object */ }, std::move(object))); - } - - // Helpers for releasing values on a worker thread at the tail of a stream on - // a worker thread. - template - void ThenReleaseOnWorkerThread(se::Stream* stream, - std::shared_ptr object) const { - // We use a non-smart pointer here because we want to ensure that the worker - // thread is the only callee of the shared_ptr destructor, and if we passed - // object by lambda capture we have a race where the worker thread might - // run and release its reference first. - auto* ref = new std::shared_ptr(std::move(object)); - if (callback_stream_.get() != stream) { - callback_stream_->ThenWaitFor(stream); - } - ThenExecuteOnWorkerThread(callback_stream_.get(), [ref]() { delete ref; }); - } - template - void ThenReleaseOnWorkerThread(se::Stream* stream, - std::vector> object) const { - auto* ref = new std::vector>(std::move(object)); - if (callback_stream_.get() != stream) { - callback_stream_->ThenWaitFor(stream); - } - ThenExecuteOnWorkerThread(callback_stream_.get(), [ref]() { delete ref; }); - } - - private: - Status SynchronizeAllActivity(); - - bool use_multiple_streams_; - bool synchronous_deallocation_; - bool asynchronous_; - std::shared_ptr compute_stream_; - std::shared_ptr host_to_device_stream_; - std::shared_ptr device_to_host_stream_; - std::vector> device_to_device_streams_; - - // Number of device-to-device streams to create in the multistream case. - static constexpr int kNumDeviceToDeviceStreams = 4; - - absl::Mutex mu_; - int next_device_to_device_stream_ GUARDED_BY(mu_) = 0; - - // Callback stream is used for running short host-side callbacks after device - // side events, without preventing the device-side stream from doing useful - // work. - std::shared_ptr callback_stream_; - - std::unique_ptr worker_thread_; -}; - struct AllocatorConfig { enum class Kind { kDefault, // Client picks the best option for the platform. @@ -188,10 +70,11 @@ class PyLocalClient { bool asynchronous, const AllocatorConfig& allocator_config); // `allocator` may null, in which case the platform default allocator is used. - explicit PyLocalClient(std::string platform_name, LocalClient* client, - std::vector> devices, - std::unique_ptr allocator, - bool asynchronous); + explicit PyLocalClient( + std::string platform_name, LocalClient* client, + std::vector> devices, + std::unique_ptr allocator, + std::unique_ptr host_memory_allocator); virtual ~PyLocalClient() = default; Status TransferToInfeed(const LiteralSlice& literal, int device_ordinal); @@ -204,6 +87,9 @@ class PyLocalClient { } LocalClient* client() const { return client_; } se::DeviceMemoryAllocator* allocator() const { return allocator_; } + tensorflow::Allocator* host_memory_allocator() const { + return host_memory_allocator_.get(); + } tensorflow::thread::ThreadPool* h2d_transfer_pool() { return &h2d_transfer_pool_; @@ -225,6 +111,11 @@ class PyLocalClient { se::DeviceMemoryAllocator* allocator_; std::unique_ptr owned_allocator_; + // Allocator to be used for staging memory transfers to devices. Optional; + // only used on GPU where it is more efficient to copy buffers to and from the + // device via a staging area of pinned memory. + std::unique_ptr host_memory_allocator_; + tensorflow::thread::ThreadPool h2d_transfer_pool_; }; @@ -241,12 +132,6 @@ class PyLocalBuffer { const pybind11::object& argument, std::shared_ptr client, int device_ordinal); - // Converts multiple (python object, device ordinal) pairs into - // PyLocalBuffers in parallel. - static StatusOr>> FromPythonValues( - const std::vector>& argument, - std::shared_ptr client); - static StatusOr> MakeTuple( const std::vector buffers, std::shared_ptr client, int device_ordinal); diff --git a/tensorflow/compiler/xla/python/python_ref_manager.cc b/tensorflow/compiler/xla/python/python_ref_manager.cc index 4ca6be7d09d..1e9cc58d090 100644 --- a/tensorflow/compiler/xla/python/python_ref_manager.cc +++ b/tensorflow/compiler/xla/python/python_ref_manager.cc @@ -37,9 +37,9 @@ PythonRefManager::ManagedPyObjects::~ManagedPyObjects() { } } -PythonRefManager::ManagedPyObjects PythonRefManager::ManageReferences( - absl::Span objects) { - return ManagedPyObjects(this, objects); +std::shared_ptr +PythonRefManager::ManageReferences(absl::Span objects) { + return std::make_shared(this, objects); } void PythonRefManager::CollectGarbage() { diff --git a/tensorflow/compiler/xla/python/python_ref_manager.h b/tensorflow/compiler/xla/python/python_ref_manager.h index 655f16a9921..8be19336a89 100644 --- a/tensorflow/compiler/xla/python/python_ref_manager.h +++ b/tensorflow/compiler/xla/python/python_ref_manager.h @@ -48,9 +48,9 @@ class PythonRefManager { ~ManagedPyObjects(); - ManagedPyObjects(const ManagedPyObjects& other) = default; + ManagedPyObjects(const ManagedPyObjects& other) = delete; ManagedPyObjects(ManagedPyObjects&& other) = default; - ManagedPyObjects& operator=(const ManagedPyObjects& other) = default; + ManagedPyObjects& operator=(const ManagedPyObjects& other) = delete; ManagedPyObjects& operator=(ManagedPyObjects&& other) = default; private: @@ -61,7 +61,8 @@ class PythonRefManager { // Creates a managed std::shared_ptr to an object. When the shared_ptr is // destroyed, the reference to 'object' will be added to python_garbage_, // and collected next time CollectGarbage() is called. - ManagedPyObjects ManageReferences(absl::Span objects); + std::shared_ptr ManageReferences( + absl::Span objects); // Releases the contents of python_garbage_. Requires that the GIL is held. // The client calls this method during API entry points where the GIL is held diff --git a/tensorflow/compiler/xla/python/semaphore.cc b/tensorflow/compiler/xla/python/semaphore.cc new file mode 100644 index 00000000000..5926618bddc --- /dev/null +++ b/tensorflow/compiler/xla/python/semaphore.cc @@ -0,0 +1,74 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/python/semaphore.h" + +#include "tensorflow/core/platform/logging.h" + +namespace xla { + +Semaphore::Semaphore(int64 capacity) : value_(capacity) { + CHECK_GE(capacity, 0); +} + +bool Semaphore::CanAcquire(CanAcquireArgs* args) { + return args->semaphore->value_ >= args->amount; +} + +void Semaphore::Acquire(int64 amount) { + CHECK_GE(amount, 0); + + CanAcquireArgs args; + args.semaphore = this; + args.amount = amount; + + mu_.LockWhen(absl::Condition(&CanAcquire, &args)); + value_ -= amount; + mu_.Unlock(); +} + +void Semaphore::Release(int64 amount) { + CHECK_GE(amount, 0); + absl::MutexLock lock(&mu_); + value_ += amount; +} + +Semaphore::ScopedReservation::~ScopedReservation() { + if (semaphore_) { + semaphore_->Release(amount_); + } +} + +Semaphore::ScopedReservation::ScopedReservation( + ScopedReservation&& other) noexcept { + semaphore_ = other.semaphore_; + amount_ = other.amount_; + other.semaphore_ = nullptr; +} + +Semaphore::ScopedReservation& Semaphore::ScopedReservation::operator=( + ScopedReservation&& other) noexcept { + semaphore_ = other.semaphore_; + amount_ = other.amount_; + other.semaphore_ = nullptr; + return *this; +} + +Semaphore::ScopedReservation Semaphore::ScopedAcquire(int64 amount) { + Acquire(amount); + return ScopedReservation(this, amount); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/python/semaphore.h b/tensorflow/compiler/xla/python/semaphore.h new file mode 100644 index 00000000000..4afd44f4cc0 --- /dev/null +++ b/tensorflow/compiler/xla/python/semaphore.h @@ -0,0 +1,67 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_SEMAPHORE_H_ +#define TENSORFLOW_COMPILER_XLA_PYTHON_SEMAPHORE_H_ + +#include "absl/synchronization/mutex.h" +#include "tensorflow/compiler/xla/types.h" + +namespace xla { + +class Semaphore { + public: + explicit Semaphore(int64 capacity); + + // Acquires `amount` units. Blocks until `amount` units are available. + void Acquire(int64 amount); + + // Returns `amount` units to the semaphore. + void Release(int64 amount); + + class ScopedReservation { + public: + ScopedReservation(Semaphore* semaphore, int64 amount) + : semaphore_(semaphore), amount_(amount) {} + ~ScopedReservation(); + + ScopedReservation(const ScopedReservation&) = delete; + ScopedReservation(ScopedReservation&& other) noexcept; + ScopedReservation& operator=(const ScopedReservation&) = delete; + ScopedReservation& operator=(ScopedReservation&& other) noexcept; + + private: + Semaphore* semaphore_; + int64 amount_; + }; + // RAII version of Acquire. Releases the reservation when the + // ScopedReservation is destroyed. + ScopedReservation ScopedAcquire(int64 amount); + + private: + struct CanAcquireArgs { + Semaphore* semaphore; + int64 amount; + }; + static bool CanAcquire(CanAcquireArgs* args) + EXCLUSIVE_LOCKS_REQUIRED(args->semaphore->mu_); + + absl::Mutex mu_; + int64 value_ GUARDED_BY(mu_); +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_PYTHON_SEMAPHORE_H_ diff --git a/tensorflow/compiler/xla/python/semaphore_test.cc b/tensorflow/compiler/xla/python/semaphore_test.cc new file mode 100644 index 00000000000..5ef59618b8b --- /dev/null +++ b/tensorflow/compiler/xla/python/semaphore_test.cc @@ -0,0 +1,74 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/python/semaphore.h" + +#include "absl/synchronization/notification.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/env.h" + +namespace xla { +namespace { + +TEST(SemaphoreTest, UnthreadedTests) { + Semaphore semaphore(2); + semaphore.Acquire(1); + semaphore.Release(1); + + semaphore.Acquire(2); + semaphore.Release(2); + + semaphore.Acquire(1); + semaphore.Acquire(1); + semaphore.Release(1); + semaphore.Acquire(1); + semaphore.Release(1); + semaphore.Acquire(1); + semaphore.Release(2); + + { + auto a = semaphore.ScopedAcquire(1); + { auto b = semaphore.ScopedAcquire(1); } + { auto c = semaphore.ScopedAcquire(1); } + } +} + +TEST(SemaphoreTest, ConcurrentTest) { + tensorflow::thread::ThreadPool pool(tensorflow::Env::Default(), "test", 2); + Semaphore semaphore(2); + semaphore.Acquire(1); + + absl::Notification a_done; + pool.Schedule([&]() { + semaphore.Acquire(2); + semaphore.Release(2); + a_done.Notify(); + }); + + absl::Notification b_done; + pool.Schedule([&]() { + semaphore.Acquire(1); + semaphore.Release(1); + b_done.Notify(); + }); + b_done.WaitForNotification(); + EXPECT_FALSE(a_done.HasBeenNotified()); + semaphore.Release(1); + a_done.WaitForNotification(); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/python/shared_device_buffer.cc b/tensorflow/compiler/xla/python/shared_device_buffer.cc index fd85145eddc..40fc8f1cc65 100644 --- a/tensorflow/compiler/xla/python/shared_device_buffer.cc +++ b/tensorflow/compiler/xla/python/shared_device_buffer.cc @@ -21,21 +21,12 @@ limitations under the License. namespace xla { -/*static*/ StatusOr> -BufferDefinitionEvent::Create(se::StreamExecutor* executor) { - auto event = std::make_shared(executor); - TF_RET_CHECK(event->event_.Init()) - << "Buffer definition event initialization failed"; - return event; -} - -BufferDefinitionEvent::BufferDefinitionEvent(se::StreamExecutor* executor) - : event_(executor) {} - -void BufferDefinitionEvent::RecordOnStream(se::Stream* stream) { +void BufferDefinitionEvent::SetDefinitionEvent(EventPool::Handle event, + se::Stream* stream) { absl::MutexLock lock(&mu_); + CHECK(!event_.event()); + event_ = std::move(event); CHECK(streams_defined_on_.empty()); - stream->ThenRecordEvent(&event_); streams_defined_on_.push_back(stream); } @@ -50,7 +41,7 @@ void BufferDefinitionEvent::WaitForEventOnStream(se::Stream* stream) { return; } - stream->ThenWaitFor(&event_); + stream->ThenWaitFor(event_.event()); streams_defined_on_.push_back(stream); } diff --git a/tensorflow/compiler/xla/python/shared_device_buffer.h b/tensorflow/compiler/xla/python/shared_device_buffer.h index 469237488b4..ea4aa67bff9 100644 --- a/tensorflow/compiler/xla/python/shared_device_buffer.h +++ b/tensorflow/compiler/xla/python/shared_device_buffer.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_PYTHON_SHARED_DEVICE_BUFFER_H_ #include "absl/container/flat_hash_set.h" +#include "tensorflow/compiler/xla/python/event_pool.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/service/transfer_manager.h" #include "tensorflow/compiler/xla/shape.h" @@ -50,14 +51,11 @@ namespace xla { // same stream causes no additional waiting. class BufferDefinitionEvent { public: - // Creates a new definition event whose event has not yet been triggered. - static StatusOr> Create( - se::StreamExecutor* executor); + BufferDefinitionEvent() = default; - explicit BufferDefinitionEvent(se::StreamExecutor* executor); - - // Records the definition event on the tail of 'stream'. - void RecordOnStream(se::Stream* stream); + // Sets the definition event of the buffer to 'event', which is recorded + // on 'stream'. Must be called at most once. + void SetDefinitionEvent(EventPool::Handle event, se::Stream* stream); // Adds synchronization events to 'stream' that wait for this event to be // defined on 'stream'. Does nothing if the event is already known to have @@ -68,7 +66,7 @@ class BufferDefinitionEvent { // An event that is triggered when the content of one or more buffers is // ready. If this event is nullptr, it is assumed that the buffer's content is // always defined. - se::Event event_; + EventPool::Handle event_; absl::Mutex mu_; diff --git a/tensorflow/compiler/xla/python/xla.cc b/tensorflow/compiler/xla/python/xla.cc index 5dc102f785e..172e24f801e 100644 --- a/tensorflow/compiler/xla/python/xla.cc +++ b/tensorflow/compiler/xla/python/xla.cc @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include +#include "absl/base/casts.h" #include "absl/hash/hash.h" +#include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" #include "absl/types/optional.h" #include "absl/types/span.h" @@ -34,6 +37,7 @@ limitations under the License. #include "tensorflow/compiler/xla/python/local_client.h" #include "tensorflow/compiler/xla/python/types.h" #include "tensorflow/compiler/xla/python/xrt.h" +#include "tensorflow/compiler/xla/service/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -104,6 +108,22 @@ StatusOr GetComputationHloDotGraph( RenderedGraphFormat::kDot); } +// Registers a 'fn_capsule' as a CPU custom call target. +// 'fn_capsule' is a void* pointer encapsulated in a PyCapsule object, with name +// "xla._CPU_CUSTOM_CALL_TARGET". +Status RegisterCpuCustomCallTarget(const std::string& fn_name, + py::capsule capsule) { + static const char* const kName = "xla._CPU_CUSTOM_CALL_TARGET"; + if (absl::string_view(capsule.name()) != kName) { + return InvalidArgument( + "Argument to RegisterCpuCustomCallTargetRegistry was not a " + "xla._CPU_CUSTOM_CALL_TARGET capsule."); + } + CustomCallTargetRegistry::Global()->Register( + fn_name, static_cast(capsule), "Host"); + return Status::OK(); +} + } // namespace PYBIND11_MODULE(xla_extension, m) { @@ -297,7 +317,6 @@ PYBIND11_MODULE(xla_extension, m) { py::class_(m, "PyLocalBuffer") .def_static("from_python", &PyLocalBuffer::FromPython) - .def_static("from_python_values", &PyLocalBuffer::FromPythonValues) .def_static("make_tuple", &PyLocalBuffer::MakeTuple) .def("copy_to_device", &PyLocalBuffer::CopyToDevice) .def("delete", &PyLocalBuffer::Delete) @@ -307,9 +326,22 @@ PYBIND11_MODULE(xla_extension, m) { .def("to_py", &PyLocalBuffer::ToPython) .def("shape", &PyLocalBuffer::on_host_shape) .def("device", &PyLocalBuffer::device_ordinal) - .def("is_deleted", [](const PyLocalBuffer& buffer) { - return buffer.DeviceBuffer() == nullptr; - }); + .def("is_deleted", + [](const PyLocalBuffer& buffer) { + return buffer.DeviceBuffer() == nullptr; + }) + .def("unsafe_buffer_pointer", + [](const PyLocalBuffer& buffer) -> StatusOr { + TF_ASSIGN_OR_RETURN(ShapedBuffer shaped_buffer, + buffer.AsShapedBuffer()); + if (shaped_buffer.on_device_shape().IsTuple()) { + return Unimplemented( + "unsafe_buffer_pointer is not implemented for tuple " + "buffers."); + } + return absl::bit_cast( + shaped_buffer.root_buffer().opaque()); + }); py::class_(m, "LocalExecutable") .def_static("Compile", &PyLocalExecutable::Compile, diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 0af9c0db62d..7e5692fef30 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -98,10 +98,6 @@ class LocalBackend(Backend): def buffer_from_pyval(self, pyval, device=0): return _xla.PyLocalBuffer.from_python(pyval, self.client, device) - def buffers_from_pyvals(self, pyvals_and_devices): - return _xla.PyLocalBuffer.from_python_values(pyvals_and_devices, - self.client) - def make_tuple(self, c_buffers, device_ordinal): return _xla.PyLocalBuffer.make_tuple(c_buffers, self.client, device_ordinal) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 6457a49e0e8..ba1f43a6451 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -644,6 +644,7 @@ cc_library( hdrs = ["call_inliner.h"], deps = [ ":call_graph", + ":hlo", ":hlo_dce", ":hlo_pass", "//tensorflow/compiler/xla:statusor", @@ -1095,50 +1096,6 @@ tf_cc_test( ], ) -cc_library( - name = "buffer_liveness", - srcs = [ - "buffer_liveness.cc", - ], - hdrs = [ - "buffer_liveness.h", - ], - deps = [ - ":hlo", - ":hlo_ordering", - ":logical_buffer", - ":tuple_points_to_analysis", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:status_macros", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:util", - "//tensorflow/core:lib", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - ], -) - -tf_cc_test( - name = "buffer_liveness_test", - srcs = ["buffer_liveness_test.cc"], - deps = [ - ":buffer_liveness", - ":hlo", - ":hlo_dataflow_analysis", - ":hlo_parser", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/core:test", - "@com_google_absl//absl/memory", - ], -) - cc_library( name = "buffer_assignment", srcs = [ @@ -1148,7 +1105,6 @@ cc_library( "buffer_assignment.h", ], deps = [ - ":buffer_liveness", ":buffer_value_containers", ":heap_simulator", ":hlo", @@ -2785,7 +2741,6 @@ cc_library( srcs = ["copy_insertion.cc"], hdrs = ["copy_insertion.h"], deps = [ - ":buffer_liveness", ":dump", ":hlo", ":hlo_alias_analysis", @@ -2907,7 +2862,6 @@ cc_library( srcs = ["hlo_rematerialization.cc"], hdrs = ["hlo_rematerialization.h"], deps = [ - ":buffer_liveness", ":buffer_value", ":call_graph", ":flatten_call_graph", @@ -3626,7 +3580,6 @@ cc_library( srcs = ["reduce_precision_insertion.cc"], hdrs = ["reduce_precision_insertion.h"], deps = [ - ":buffer_liveness", ":hlo", ":hlo_pass", ":hlo_pass_pipeline", @@ -3946,7 +3899,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 0290d0cf26b..53a2a57617c 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -168,13 +168,8 @@ bool IsUnstridedSlice(const HloInstruction* hlo) { // algebraic expressions to simplified forms. Note: This only supports // simplifications that simply look at the operands of an instruction. For the // more general case a worklist based approach would be needed. -class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { +class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor { public: - // Default visitor action is to do nothing and return OK. - Status DefaultAction(HloInstruction* /*hlo_instruction*/) override { - return Status::OK(); - } - Status HandleAdd(HloInstruction* add) override; Status HandleAnd(HloInstruction* logical_and) override; @@ -250,9 +245,6 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { Status HandleMap(HloInstruction* map) override; - // Returns whether algebraic simplification has occurred. - const bool changed() const { return changed_; } - // Runs the visitor on a computation. static bool Run(HloComputation* computation, const AlgebraicSimplifierOptions& options, @@ -350,35 +342,6 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { StatusOr TryToSinkBroadcastAfterOpWithUniqueNonScalarOperand( HloInstruction* broadcast); - // Replaces the existing HLO instruction old_instruction, with - // new_instruction, and marks the optimizer status as changed. - // Returns the Status representing the result of the replace operation. - Status ReplaceWithNewInstruction( - HloInstruction* old_instruction, - std::unique_ptr new_instruction) { - VLOG(3) << "Replacing instruction:"; - VLOG(3) << " old: " << old_instruction->ToString(); - VLOG(3) << " new: " << new_instruction->ToString(); - TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction( - old_instruction, std::move(new_instruction))); - changed_ = true; - return Status::OK(); - } - - // Replaces the existing HLO instruction old_instruction, with - // new_instruction, and marks the optimizer status as changed. - // Returns the Status representing the result of the replace operation. - Status ReplaceInstruction(HloInstruction* old_instruction, - HloInstruction* new_instruction) { - VLOG(3) << "Replacing instruction:"; - VLOG(3) << " old: " << old_instruction->ToString(); - VLOG(3) << " new: " << new_instruction->ToString(); - TF_RETURN_IF_ERROR( - computation_->ReplaceInstruction(old_instruction, new_instruction)); - changed_ = true; - return Status::OK(); - } - StatusOr OptimizeDotOfConcat(HloInstruction* dot); StatusOr OptimizeDotOfConcatHelper( const HloInstruction& dot, HloInstruction* lhs, int64 lhs_contracting_dim, @@ -445,7 +408,7 @@ bool AlgebraicSimplifierVisitor::Run(HloComputation* computation, AlgebraicSimplifier* simplifier) { AlgebraicSimplifierVisitor visitor(computation, options, simplifier); TF_CHECK_OK(computation->Accept(&visitor)); - return visitor.changed_; + return visitor.changed_ || visitor.changed(); } bool AlgebraicSimplifierVisitor::SameShape(const HloInstruction* lhs, @@ -1723,6 +1686,7 @@ AlgebraicSimplifierVisitor::OptimizeDotOfReorderContractingDims( } Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { + CHECK(computation_ == dot->parent()); HloInstruction *lhs, *rhs; CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs)))); if (options_.is_layout_sensitive()) { @@ -2660,6 +2624,11 @@ Status AlgebraicSimplifierVisitor::HandleRemainder(HloInstruction* remainder) { HloInstruction *a, *b; CHECK(Match(remainder, m::Remainder(m::Op(&a), m::Op(&b)))); + // (A % B) % B == A % B. + if (Match(a, m::Remainder(m::Op(), m::Op().Is(b)))) { + return ReplaceInstruction(remainder, a); + } + // A % B => A & (B - 1) if B is a power of 2. switch (remainder->shape().element_type()) { case S8: diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 31b0e16c2a0..05d57cf3ba2 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -5503,5 +5503,20 @@ TEST_F(AlgebraicSimplifierTest, RemainderOfNPlusIotaOverflow) { ASSERT_FALSE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); } +TEST_F(AlgebraicSimplifierTest, RepeatedRemainder) { + const char* kModuleStr = R"( + HloModule m + test { + p = s32[1000] parameter(0) + q = s32[1000] parameter(1) + r = s32[1000] remainder(p, q) + ROOT rr = s32[1000] remainder(r, q) + })"; + TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); + ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); + EXPECT_THAT(m->entry_computation()->root_instruction(), + GmockMatch(m::Remainder(m::Parameter(), m::Parameter()))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/batchnorm_expander.cc b/tensorflow/compiler/xla/service/batchnorm_expander.cc index d14e803be6a..131b50efc9c 100644 --- a/tensorflow/compiler/xla/service/batchnorm_expander.cc +++ b/tensorflow/compiler/xla/service/batchnorm_expander.cc @@ -46,13 +46,8 @@ using absl::optional; // BatchNormExpanderVisitor traverses the HLO computation and rewrites BatchNorm // operations into smaller operations. -class BatchNormExpanderVisitor : public DfsHloVisitorWithDefault { +class BatchNormExpanderVisitor : public DfsHloRewriteVisitor { public: - // Default visitor action is to do nothing and return OK. - Status DefaultAction(HloInstruction* /*hlo_instruction*/) override { - return Status::OK(); - } - Status HandleBatchNormTraining(HloInstruction* batch_norm) override; Status HandleBatchNormInference(HloInstruction* batch_norm) override; @@ -63,9 +58,6 @@ class BatchNormExpanderVisitor : public DfsHloVisitorWithDefault { static bool Run(HloComputation* computation, bool rewrite_training_op, bool rewrite_inference_op, bool rewrite_grad_op); - // Returns whether any batch norm ops were rewritten. - const bool changed() const { return changed_; } - ~BatchNormExpanderVisitor() override = default; private: @@ -133,28 +125,6 @@ class BatchNormExpanderVisitor : public DfsHloVisitorWithDefault { elements_per_feature_u32); } - // Replaces the existing HLO instruction old_instruction, with - // new_instruction, and marks the optimizer status as changed. - // Returns the Status representing the result of the replace operation. - Status ReplaceWithNewInstruction( - HloInstruction* old_instruction, - std::unique_ptr new_instruction) { - TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction( - old_instruction, std::move(new_instruction))); - changed_ = true; - return Status::OK(); - } - - // Replaces the existing HLO instruction old_instruction, with - // new_instruction, and marks the optimizer status as changed. - // Returns the Status representing the result of the replace operation. - Status ReplaceInstruction(HloInstruction* old_instruction, - HloInstruction* new_instruction) { - TF_RETURN_IF_ERROR( - computation_->ReplaceInstruction(old_instruction, new_instruction)); - changed_ = true; - return Status::OK(); - } // Current HloComputation instance the BatchNormExpander is // traversing. HloComputation* computation_; @@ -162,9 +132,6 @@ class BatchNormExpanderVisitor : public DfsHloVisitorWithDefault { bool rewrite_training_op_; bool rewrite_inference_op_; bool rewrite_grad_op_; - - // Whether rewrite has occurred. - bool changed_ = false; }; } // namespace @@ -179,7 +146,7 @@ bool BatchNormExpanderVisitor::Run(HloComputation* computation, /*rewrite_inference_op=*/rewrite_inference_op, /*rewrite_grad_op=*/rewrite_grad_op); TF_CHECK_OK(computation->Accept(&visitor)); - return visitor.changed_; + return visitor.changed(); } Status BatchNormExpanderVisitor::HandleBatchNormTraining( diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc index 430172b474c..23d2a9225a8 100644 --- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/bfloat16_conversion_folding.h" #include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc index 85e1113bf77..f1ab34d6141 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/bfloat16_normalization.h" #include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 1e353c11445..3ae7235d887 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -656,7 +656,7 @@ Status BufferAssignment::ComputeSummaryStats() { for (const auto& computation : module_->computations()) { if (!computation->IsFusionComputation()) { const HloInstructionSequence* sequence = - liveness_->hlo_ordering().SequentialOrder(*computation); + hlo_ordering().SequentialOrder(*computation); if (sequence == nullptr) { schedule_complete = false; } else { @@ -833,7 +833,7 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, const HloValue& assigned_buffer = *CHECK_NOTNULL(dynamic_cast(buffer_offset_size.first)); for (const HloValue* new_value : hlo_buffer.values()) { - if (assignment->liveness().hlo_ordering().MayInterfere( + if (assignment->hlo_ordering().MayInterfere( assigned_buffer, *new_value, assignment->dataflow_analysis())) { VLOG(4) << "Can't assign: assignee " << assigned_buffer << " may interfere with " << new_value; @@ -917,7 +917,7 @@ Status BufferAssigner::MergeInplaceOpBuffers(BufferAssignment* assignment) { for (const HloValue* instruction_value : instruction_buffer.values()) { for (const HloValue* operand_value : operand_buffer.values()) { - if (assignment->liveness().hlo_ordering().MayInterfere( + if (assignment->hlo_ordering().MayInterfere( *instruction_value, *operand_value, assignment->dataflow_analysis())) { interfere = true; @@ -1047,8 +1047,7 @@ Status BufferAssigner::AssignSingleHloBuffer( for (const HloValue* hlo_value : hlo_buffer->values()) { HloComputation* computation = hlo_value->instruction()->parent(); const bool has_sequential_order = - assignment->liveness().hlo_ordering().SequentialOrder(*computation) != - nullptr; + assignment->hlo_ordering().SequentialOrder(*computation) != nullptr; all_computations_have_sequential_order &= has_sequential_order; } @@ -1125,10 +1124,9 @@ Status BufferAssigner::AssignBuffersForComputations( } } - const BufferLiveness& liveness = assignment->liveness(); for (const HloComputation* computation : computations) { const bool has_sequential_order = - liveness.hlo_ordering().SequentialOrder(*computation) != nullptr; + assignment->hlo_ordering().SequentialOrder(*computation) != nullptr; if (has_sequential_order && buffers_to_assign_sequentially != nullptr) { // Every sequential computation must get an entry in the // buffers_to_assign_sequentially map, even if we end up with an empty @@ -1197,7 +1195,7 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering( // Run the sequence of instructions through the heap simulator. The // heuristic that seems to give the best results is lazy-best-fit, with all // runs of alloc / free calls sorted in decreasing size order. - const HloOrdering& hlo_ordering = assignment->liveness().hlo_ordering(); + const HloOrdering& hlo_ordering = assignment->hlo_ordering(); // Returns a heap algorithm that chooses the best result from several // algorithms. @@ -1392,9 +1390,6 @@ StatusOr> BufferAssigner::CreateAssignment( BufferValue::SizeFunction buffer_size, LogicalBuffer::AlignmentFunction color_alignment, HloDataflowAnalysis::CanShareBuffer can_share_buffer) { - TF_ASSIGN_OR_RETURN(std::unique_ptr liveness, - BufferLiveness::Run(module, std::move(hlo_ordering))); - TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, HloAliasAnalysis::Run(module, can_share_buffer)); @@ -1408,11 +1403,11 @@ StatusOr> BufferAssigner::CreateAssignment( // Can't use absl::make_unique because BufferAssignment constructor is // private. std::unique_ptr assignment(new BufferAssignment( - module, std::move(liveness), std::move(buffer_size), + module, std::move(hlo_ordering), std::move(buffer_size), std::move(color_alignment), std::move(alias_analysis))); - TF_RETURN_IF_ERROR(colorer_(&assignment->alias_analysis(), - assignment->liveness().hlo_ordering())); + TF_RETURN_IF_ERROR( + colorer_(&assignment->alias_analysis(), assignment->hlo_ordering())); VLOG(3) << "After coloring:"; XLA_VLOG_LINES(3, assignment->alias_analysis().dataflow_analysis().ToString()); diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h index bfc4dced907..f60ad22fa51 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.h +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -25,7 +25,6 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/types/span.h" -#include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/heap_simulator.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h" @@ -440,11 +439,6 @@ class BufferAssignment { bool HaveDisjointSlices(const HloInstruction* hlo_a, const HloInstruction* hlo_b) const; - // Returns the underlying points-to analysis used for this assignment. - const TuplePointsToAnalysis& points_to_analysis() const { - return liveness_->points_to_analysis(); - } - const HloDataflowAnalysis& dataflow_analysis() const { return alias_analysis_->dataflow_analysis(); } @@ -452,7 +446,7 @@ class BufferAssignment { HloAliasAnalysis& alias_analysis() const { return *alias_analysis_; } // Returns the BufferLiveness object used to construct this assignment. - const BufferLiveness& liveness() const { return *liveness_; } + const HloOrdering& hlo_ordering() const { return *hlo_ordering_; } string ToString() const; BufferAssignmentProto ToProto() const; @@ -483,12 +477,12 @@ class BufferAssignment { friend class BufferAssigner; BufferAssignment(const HloModule* module, - std::unique_ptr liveness, + std::unique_ptr hlo_ordering, BufferValue::SizeFunction buffer_size, LogicalBuffer::AlignmentFunction color_alignment, std::unique_ptr alias_analysis) : module_(module), - liveness_(std::move(liveness)), + hlo_ordering_(std::move(hlo_ordering)), buffer_size_(std::move(buffer_size)), color_alignment_(std::move(color_alignment)), alias_analysis_(std::move(alias_analysis)) {} @@ -540,7 +534,8 @@ class BufferAssignment { allocation_index_for_value_; const HloModule* module_; - const std::unique_ptr liveness_; + + const std::unique_ptr hlo_ordering_; // Function which returns the buffer size for a given logical buffer (shape). BufferValue::SizeFunction buffer_size_; diff --git a/tensorflow/compiler/xla/service/buffer_liveness.cc b/tensorflow/compiler/xla/service/buffer_liveness.cc deleted file mode 100644 index 3adf129a22d..00000000000 --- a/tensorflow/compiler/xla/service/buffer_liveness.cc +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Defines the data returned by the XLA buffer assignment packages. - -#include "tensorflow/compiler/xla/service/buffer_liveness.h" - -#include -#include - -#include "absl/strings/str_format.h" -#include "absl/strings/str_join.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/service/logical_buffer.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/status_macros.h" -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/platform/logging.h" - -namespace xla { - -/* static */ -StatusOr> BufferLiveness::Run( - const HloModule* module, std::unique_ptr hlo_ordering) { - std::unique_ptr liveness( - new BufferLiveness(module, std::move(hlo_ordering))); - TF_RETURN_IF_ERROR(liveness->Analyze()); - return std::move(liveness); -} - -Status BufferLiveness::Analyze() { - TF_ASSIGN_OR_RETURN(points_to_analysis_, TuplePointsToAnalysis::Run(module_)); - for (auto* computation : module_->computations()) { - if (computation->IsFusionComputation()) { - continue; - } - // Gather all instructions whose buffers might alias other instructions into - // the set aliased_buffers_. This includes those contained as a tuple - // element in other instruction's output. - for (const auto& instruction : computation->instructions()) { - for (const LogicalBuffer* aliased_buffer : - points_to_analysis_->GetPointsToSet(instruction) - .CreateFlattenedSet()) { - if (aliased_buffer->instruction() != instruction) { - aliased_buffers_.insert(aliased_buffer); - } - } - } - - if (computation == module_->entry_computation()) { - const HloInstruction* root = computation->root_instruction(); - maybe_live_out_buffers_ = - points_to_analysis_->GetPointsToSet(root).CreateFlattenedSet(); - } - } - - XLA_VLOG_LINES(3, ToString()); - return Status::OK(); -} - -string BufferLiveness::ToString() const { - std::vector pieces; - pieces.push_back( - absl::StrFormat("BufferLiveness(module=%s):", module_->name())); - pieces.push_back("HloOrdering:"); - pieces.push_back(hlo_ordering_->ToString()); - pieces.push_back("Aliased buffers:"); - for (const LogicalBuffer* buffer : aliased_buffers_) { - pieces.push_back(absl::StrFormat(" %s", buffer->ToString())); - } - pieces.push_back("Live out buffers:"); - for (const LogicalBuffer* buffer : maybe_live_out_buffers_) { - pieces.push_back(absl::StrFormat(" %s", buffer->ToString())); - } - return absl::StrJoin(pieces, "\n"); -} - -bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a, - const LogicalBuffer& b) const { - TF_DCHECK_OK(points_to_analysis_->VerifyBuffer(a)); - TF_DCHECK_OK(points_to_analysis_->VerifyBuffer(b)); - - if (!hlo_ordering_->ExecutesBefore(a.instruction(), b.instruction())) { - return false; - } - - for (const BufferAlias& alias : points_to_analysis_->GetBufferAliases(a)) { - // Every user of 'a' must be a predecessor of 'b' or 'b' itself. - for (auto user : alias.instruction()->users()) { - if (points_to_analysis().DoesNotUseOperandBuffer(alias.instruction(), - alias.index(), user)) { - continue; - } - if (user != b.instruction() && - !hlo_ordering_->ExecutesBefore(user, b.instruction())) { - return false; - } - } - - // If the root instruction aliases the buffer 'a', the live range of 'a' is - // until the end of the computation and can never be strictly before another - // buffer nested in the same computation. This is needed to prevent the root - // instruction's buffers from being reused by later instructions even when - // the root is not the last instruction in the schedule. - if (alias.instruction()->parent()->root_instruction() == - alias.instruction() && - hlo_ordering_->call_graph().InstructionIsNestedIn( - b.instruction(), alias.instruction()->parent())) { - return false; - } - } - - // If 'b' is a user of 'a' then the buffers interfere unless 'a.instruction' - // and 'b.instruction' emit the same shape/layout, and 'b.instruction' meets - // the qualifications specified in CanShareOperandBufferWithUser. - for (const BufferAlias& alias : points_to_analysis_->GetBufferAliases(a)) { - if (b.instruction()->IsUserOf(alias.instruction()) && - !points_to_analysis().CanShareOperandBufferWithUser( - alias.instruction(), alias.index(), b.instruction(), b.index())) { - return false; - } - } - return true; -} - -namespace { -bool IsEntryParameter(const HloInstruction* instruction) { - const HloComputation* computation = instruction->parent(); - return instruction->opcode() == HloOpcode::kParameter && - computation == computation->parent()->entry_computation(); -} -} // namespace - -bool BufferLiveness::MayInterfere(const LogicalBuffer& a, - const LogicalBuffer& b) const { - // Parameters live at the entry of the computation, thus always interfere with - // all other instructions inside the computation executing before them in the - // ordering. - const HloInstruction* a_instruction = a.instruction(); - const HloInstruction* b_instruction = b.instruction(); - if (a_instruction->opcode() == HloOpcode::kParameter && - hlo_ordering_->call_graph().InstructionIsNestedIn( - b_instruction, a_instruction->parent()) && - hlo_ordering_->ExecutesBefore(b_instruction, a_instruction)) { - return true; - } - if (b_instruction->opcode() == HloOpcode::kParameter && - hlo_ordering_->call_graph().InstructionIsNestedIn( - a_instruction, b_instruction->parent()) && - hlo_ordering_->ExecutesBefore(a_instruction, b_instruction)) { - return true; - } - // Buffers without disjoint liveness may interfere. - return !live_range_strictly_before(a, b) && !live_range_strictly_before(b, a); -} - -bool BufferLiveness::MaybeLiveOut(const LogicalBuffer& buffer) const { - // Verify that a buffer is actually defined at the given instruction/index - // (eg, its not an alias of another buffer such as occurs with a bitcast). - TF_CHECK_OK(points_to_analysis_->VerifyBuffer(buffer)); - return maybe_live_out_buffers_.count(&buffer); -} - -} // namespace xla diff --git a/tensorflow/compiler/xla/service/buffer_liveness.h b/tensorflow/compiler/xla/service/buffer_liveness.h deleted file mode 100644 index f939a426ead..00000000000 --- a/tensorflow/compiler/xla/service/buffer_liveness.h +++ /dev/null @@ -1,114 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_LIVENESS_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_LIVENESS_H_ - -#include -#include -#include - -#include "absl/container/flat_hash_set.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_module.h" -#include "tensorflow/compiler/xla/service/hlo_ordering.h" -#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/lib/core/status.h" - -namespace xla { - -// Class which computes liveness of the output buffers of HLOs and their -// interference. -class BufferLiveness { - public: - using Colorer = std::function; - - // Constructs a buffer liveness object for the given module assuming the given - // HLO instruction ordering. - static StatusOr> Run( - const HloModule* module, std::unique_ptr hlo_ordering); - - // Returns true if the live range of the buffer containing the output of 'a' - // may overlap with the live range of the buffer of 'b'. If instruction 'a' - // interferes with instruction 'b' then they cannot share the same buffer. - bool MayInterfere(const LogicalBuffer& a, const LogicalBuffer& b) const; - - // Returns true if the buffer for the given instruction may be live out of the - // module. That is, the instruction's buffer may be included in the output of - // the entry computation. - bool MaybeLiveOut(const LogicalBuffer& buffer) const; - - // Returns the complete set of buffers that may be live out of the module. - const PointsToSet::BufferSet& maybe_live_out_buffers() const { - return maybe_live_out_buffers_; - } - - // Returns the underlying points-to analysis used for this liveness analysis. - const TuplePointsToAnalysis& points_to_analysis() const { - return *points_to_analysis_; - } - - // Returns the underlying hlo ordering used for this liveness analysis. - const HloOrdering& hlo_ordering() const { return *hlo_ordering_; } - - const HloModule& module() const { return *module_; } - - string ToString() const; - - static Colorer DefaultColorer() { - return [](const BufferLiveness& buffer_liveness) { - for (LogicalBuffer::Id id = 0; - id < buffer_liveness.points_to_analysis().num_logical_buffers(); - id++) { - auto& buffer = buffer_liveness.points_to_analysis().logical_buffer(id); - buffer.set_color(LogicalBuffer::Color(0)); - } - return Status::OK(); - }; - } - - private: - explicit BufferLiveness(const HloModule* module, - std::unique_ptr hlo_ordering) - : module_(module), hlo_ordering_(std::move(hlo_ordering)) {} - - // Perform buffer liveness analysis. This method must be called prior to - // MayInterfere or MaybeLiveOut. - Status Analyze(); - - // Returns true if the live range of the buffer of 'a' is strictly before the - // live range of the buffer of 'b' (they do not overlap). - bool live_range_strictly_before(const LogicalBuffer& a, - const LogicalBuffer& b) const; - - const HloModule* module_; - std::unique_ptr hlo_ordering_; - - // Set of LogicalBuffers which are aliased in the output of other - // instructions. For example, a LogicalBuffer which is inserted into a tuple - // is considered to be aliased and will be in this set. - absl::flat_hash_set aliased_buffers_; - - // LogicalBuffers that may be live out of the entry computation. - PointsToSet::BufferSet maybe_live_out_buffers_; - - std::unique_ptr points_to_analysis_; -}; - -} // namespace xla - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_LIVENESS_H_ diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc deleted file mode 100644 index d4b2268fd01..00000000000 --- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc +++ /dev/null @@ -1,934 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/buffer_liveness.h" - -#include -#include - -#include "absl/memory/memory.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_opcode.h" -#include "tensorflow/compiler/xla/service/hlo_parser.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/core/status_test_util.h" - -namespace xla { -namespace { - -class BufferLivenessTest : public HloTestBase { - protected: - // Returns the LogicalBuffer defined at the given instruction and - // index. CHECKs if no buffer is defined at that point. - const LogicalBuffer& GetBuffer(const BufferLiveness& liveness, - const HloInstruction* instruction, - const ShapeIndex& index) { - const auto& pointed_to = liveness.points_to_analysis() - .GetPointsToSet(instruction) - .element(index); - CHECK_EQ(1, pointed_to.size()); - CHECK_EQ(instruction, pointed_to[0]->instruction()); - CHECK(index == pointed_to[0]->index()); - return *pointed_to[0]; - } - - // Returns true if the top-level buffers for instructions 'a' and 'b' may - // interfere. Precondition: 'a' and 'b' are array-shaped. - bool InstructionsMayInterfere(const BufferLiveness& liveness, - HloInstruction* a, HloInstruction* b) { - EXPECT_FALSE(a->shape().IsTuple()); - EXPECT_FALSE(b->shape().IsTuple()); - return liveness.MayInterfere( - GetBuffer(liveness, /*instruction=*/a, /*index=*/{}), - GetBuffer(liveness, /*instruction=*/b, /*index=*/{})); - } - - // Returns true if the tuple elements at 'index' for instructions 'a' and 'b' - // may interfere. Precondition: 'a' and 'b' are tuple-shaped, with equal - // tuple element sub-shapes. - bool TupleElementsMayInterfere(const BufferLiveness& liveness, - HloInstruction* a, HloInstruction* b, - const ShapeIndex& index) { - // Check that top-level shapes are tuple and tuple element shapes are equal. - EXPECT_TRUE(a->shape().IsTuple()); - EXPECT_TRUE(b->shape().IsTuple()); - EXPECT_TRUE( - ShapeUtil::Compatible(ShapeUtil::GetSubshape(a->shape(), index), - ShapeUtil::GetSubshape(b->shape(), index))); - // Lookup PointsTo set for instructions 'a' and 'b'. - auto& points_to_analysis = liveness.points_to_analysis(); - const auto& points_to_a = - points_to_analysis.GetPointsToSet(a).element(index); - const auto& points_to_b = - points_to_analysis.GetPointsToSet(b).element(index); - // Make sure PointsTo sets for 'a' and 'b' are unambiguous. - EXPECT_EQ(1, points_to_a.size()); - EXPECT_EQ(points_to_a.size(), points_to_b.size()); - // Check interference. - return liveness.MayInterfere(*points_to_a[0], *points_to_b[0]); - } - - // Returns true if the top-level buffers for the given instruction maybe - // liveout of the entry computation. - // Precondition: instruction is array-shaped. - bool InstructionMaybeLiveOut(const BufferLiveness& liveness, - HloInstruction* instruction) { - return liveness.MaybeLiveOut( - GetBuffer(liveness, instruction, /*index=*/{})); - } - - std::unique_ptr BuildDummyComputation() { - auto builder = HloComputation::Builder(TestName() + "_dummy"); - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - return builder.Build(); - } - - const Shape vec_ = ShapeUtil::MakeShape(xla::F32, {42}); -}; - -TEST_F(BufferLivenessTest, ElementwiseChain) { - // A simple chain of elementwise operations. No buffers should interfere. - // - // param --> negate -> exp -> log - // - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kNegate, param)); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kExp, negate)); - auto log = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kLog, exp)); - - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, exp)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, log)); - - // No buffers should interfere. - EXPECT_FALSE(InstructionsMayInterfere(*liveness, negate, exp)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, negate, log)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, exp, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, exp, log)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, log, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, log, exp)); - - // Buffers should interfere with itself. - EXPECT_TRUE(InstructionsMayInterfere(*liveness, exp, exp)); - - // Only log is live out. - EXPECT_FALSE(InstructionMaybeLiveOut(*liveness, param)); - EXPECT_FALSE(InstructionMaybeLiveOut(*liveness, negate)); - EXPECT_FALSE(InstructionMaybeLiveOut(*liveness, exp)); - EXPECT_TRUE(InstructionMaybeLiveOut(*liveness, log)); -} - -TEST_F(BufferLivenessTest, MultipleEntryParameters_Sequential) { - // Two entry params, which interfere with each other. - // - // param0 --> negate ---------------\ - // param1 --> exp --> add - auto builder = HloComputation::Builder(TestName()); - auto param0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, vec_, "param0")); - auto param1 = builder.AddInstruction( - HloInstruction::CreateParameter(1, vec_, "param1")); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kNegate, param0)); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kExp, param1)); - auto add = builder.AddInstruction( - HloInstruction::CreateBinary(vec_, HloOpcode::kAdd, negate, exp)); - - auto module = CreateNewVerifiedModule(); - HloComputation* entry = module->AddEntryComputation(builder.Build()); - - HloSchedule schedule(module.get()); - schedule.set_sequence(entry, {param0, negate, param1, exp, add}); - auto liveness = - BufferLiveness::Run(module.get(), - absl::make_unique(schedule)) - .ConsumeValueOrDie(); - - // Entry parameters interfere as if they are defined simultaneously at - // the very beginning. - EXPECT_TRUE(InstructionsMayInterfere(*liveness, param0, param1)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param0, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param0, exp)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param0, add)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, param1, param0)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, param1, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param1, exp)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param1, add)); - - // Negate and exp still interfere. - EXPECT_TRUE(InstructionsMayInterfere(*liveness, negate, exp)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, exp, negate)); - - // But {negate, add} and {exp, add} don't interfere. - EXPECT_FALSE(InstructionsMayInterfere(*liveness, negate, add)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, exp, add)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, exp)); -} - -TEST_F(BufferLivenessTest, EmbeddedComputationParameters) { - absl::string_view hlo_string = R"( -HloModule EmbeddedComputationParameters, is_scheduled=true - -%EmbeddedComputationParameters_embedded (embedded_param0: f32[42], embedded_param1: f32[42]) -> (f32[42], f32[42]) { - %embedded_param0 = f32[42]{0} parameter(0) - %log = f32[42]{0} log(f32[42]{0} %embedded_param0) - %add = f32[42]{0} add(f32[42]{0} %log, f32[42]{0} %log) - %embedded_param1 = f32[42]{0} parameter(1) - ROOT %tuple = (f32[42]{0}, f32[42]{0}) tuple(f32[42]{0} %add, f32[42]{0} %embedded_param1) -} - -ENTRY %EmbeddedComputationParameters (param0: f32[42], param1: f32[42]) -> (f32[42], f32[42]) { - %param0 = f32[42]{0} parameter(0) - %param1 = f32[42]{0} parameter(1) - ROOT %call = (f32[42]{0}, f32[42]{0}) call(f32[42]{0} %param0, f32[42]{0} %param1), to_apply=%EmbeddedComputationParameters_embedded -} -)"; - HloModuleConfig hlo_config; - TF_ASSERT_OK_AND_ASSIGN( - std::unique_ptr module, - ParseAndReturnUnverifiedModule(hlo_string, hlo_config)); - auto liveness = - BufferLiveness::Run( - module.get(), - absl::make_unique(module->schedule())) - .ConsumeValueOrDie(); - - auto embedded_log = FindInstruction(module.get(), "log"); - auto embedded_param0 = FindInstruction(module.get(), "embedded_param0"); - auto embedded_param1 = FindInstruction(module.get(), "embedded_param1"); - auto param0 = FindInstruction(module.get(), "param0"); - auto param1 = FindInstruction(module.get(), "param1"); - - // Parameters should interfere with other instructions inside the computation. - EXPECT_TRUE( - InstructionsMayInterfere(*liveness, embedded_log, embedded_param1)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, embedded_log, param0)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, embedded_log, param1)); - EXPECT_TRUE( - InstructionsMayInterfere(*liveness, embedded_param0, embedded_param1)); -} - -TEST_F(BufferLivenessTest, InterferenceWithOuterRoot) { - absl::string_view hlo_string = R"( -HloModule InterferenceWithOuterRoot, is_scheduled=true - -Emmbedded (embedded_param: f32[42]) -> f32[42] { - embedded_param = f32[42]{0} parameter(0) - multiply = f32[42]{0} multiply(embedded_param, embedded_param) - ROOT log = f32[42]{0} log(multiply) -} - -ENTRY InterferenceWithOuterRoot { - param = f32[4096,4096]{1,0} parameter(0) - ROOT add = f32[4096,4096]{1,0} add(param, param) - call = f32[42]{0} call(param), to_apply=Emmbedded -} - -)"; - HloModuleConfig hlo_config; - TF_ASSERT_OK_AND_ASSIGN( - std::unique_ptr module, - ParseAndReturnUnverifiedModule(hlo_string, hlo_config)); - auto liveness = - BufferLiveness::Run( - module.get(), - absl::make_unique(module->schedule())) - .ConsumeValueOrDie(); - - auto multiply = FindInstruction(module.get(), "multiply"); - auto add = FindInstruction(module.get(), "add"); - - EXPECT_TRUE(InstructionsMayInterfere(*liveness, multiply, add)); -} - -TEST_F(BufferLivenessTest, NonElementwiseOperand) { - // A chain of operations with two elementwise and one non-elementwise. The - // elementwise op should not interfere with its operand, while the - // non-elementwise op should interfere. Entry params always interfere. - // - // param --> exp -> negate -> reverse - // - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kExp, param)); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kNegate, exp)); - auto reverse = - builder.AddInstruction(HloInstruction::CreateReverse(vec_, negate, {0})); - - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, exp)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, reverse)); - - // Negate is elementwise, so doesn't interfere with its operand. - // Reverse is non-elementwise, so does interfere with its operand. - EXPECT_FALSE(InstructionsMayInterfere(*liveness, exp, negate)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, negate, reverse)); -} - -TEST_F(BufferLivenessTest, OverlappedBuffers) { - // Verify simultaneously live buffers interfere (exp and negate). - // - // param --> negate -> add - // \---> exp -----/ - // - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kNegate, param)); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kExp, param)); - auto add = builder.AddInstruction( - HloInstruction::CreateBinary(vec_, HloOpcode::kAdd, negate, exp)); - - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, negate)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, exp)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, add)); - - // Negate and exp interfere with each other, but not with add. - EXPECT_TRUE(InstructionsMayInterfere(*liveness, negate, exp)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, exp, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, negate, add)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, exp, add)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, exp)); -} - -TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) { - // Identical to the test OverlappedBuffer but using a sequential ordering of - // HLO instructions. - // - // param --> negate -> add - // \---> exp -----/ - // - // Sequential order: - // param, negate, exp, add - // - // Liveness is identical to the DependencyHloOrdering. - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kNegate, param)); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kExp, param)); - auto add = builder.AddInstruction( - HloInstruction::CreateBinary(vec_, HloOpcode::kAdd, negate, exp)); - - auto module = CreateNewVerifiedModule(); - auto computation = module->AddEntryComputation(builder.Build()); - - HloSchedule schedule(module.get()); - schedule.set_sequence(computation, {param, negate, exp, add}); - auto liveness = - BufferLiveness::Run(module.get(), - absl::make_unique(schedule)) - .ConsumeValueOrDie(); - - EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, exp)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, add)); - - // Negate and exp interfere with each other, but not with add. - EXPECT_TRUE(InstructionsMayInterfere(*liveness, negate, exp)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, exp, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, negate, add)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, negate)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, exp, add)); - EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, exp)); -} - -TEST_F(BufferLivenessTest, RootInstructionIsNotLastInSequentialOrder) { - // Tests that when the root instruction is not the last instruction in the - // schedule, the live range of its buffers interfere with the buffers of the - // later instructions. - // - // Two sets of independent instructions are executed in the computation. - // param --> add (root) - // recv --> recv-done --> send --> send-done - // - // Sequential order: - // param, add (root), recv, recv-done, send, send-done - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - auto add = builder.AddInstruction( - HloInstruction::CreateBinary(vec_, HloOpcode::kAdd, param, param)); - auto token = builder.AddInstruction(HloInstruction::CreateToken()); - auto recv = builder.AddInstruction( - HloInstruction::CreateRecv(vec_, token, /*channel_id=*/0)); - auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv)); - auto send = builder.AddInstruction( - HloInstruction::CreateSend(recv_done, token, /*channel_id=*/1)); - auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send)); - - auto module = CreateNewVerifiedModule(); - auto computation = module->AddEntryComputation(builder.Build(add)); - - HloSchedule schedule(module.get()); - schedule.set_sequence(computation, - {param, add, token, recv, recv_done, send, send_done}); - TF_ASSERT_OK(schedule.Verify()); - auto liveness = - BufferLiveness::Run(module.get(), - absl::make_unique(schedule)) - .ConsumeValueOrDie(); - - EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, add)); - // Check the root instruction (add) buffer interferes with the recv buffer. - EXPECT_TRUE( - liveness->MayInterfere(GetBuffer(*liveness, add, /*index=*/{}), - GetBuffer(*liveness, recv, /*index=*/{0}))); -} - -TEST_F(BufferLivenessTest, TupleLiveOut) { - // Verify MaybeLiveOut with nested tuples. Result of computation looks like: - // - // Tuple({Tuple({Negate(Param)}, Exp(Negate(Param)))}) - // - // All values should be live out except Param. - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kNegate, param)); - auto inner_tuple = - builder.AddInstruction(HloInstruction::CreateTuple({negate})); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kExp, negate)); - auto outer_tuple = - builder.AddInstruction(HloInstruction::CreateTuple({inner_tuple, exp})); - - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - // All buffers should be live out except the param - EXPECT_FALSE(InstructionMaybeLiveOut(*liveness, param)); - EXPECT_TRUE(InstructionMaybeLiveOut(*liveness, negate)); - EXPECT_TRUE(InstructionMaybeLiveOut(*liveness, inner_tuple)); - EXPECT_TRUE(InstructionMaybeLiveOut(*liveness, exp)); - EXPECT_TRUE(InstructionMaybeLiveOut(*liveness, outer_tuple)); -} - -// bitcast liveout. - -TEST_F(BufferLivenessTest, EmbeddedComputation) { - // Test MaybeLiveOut and MayInterfere for embedded computation. - auto module = CreateNewVerifiedModule(); - - auto embedded_builder = HloComputation::Builder(TestName() + "_embedded"); - auto embedded_param = embedded_builder.AddInstruction( - HloInstruction::CreateParameter(0, vec_, "embedded_param")); - auto embedded_log = embedded_builder.AddInstruction( - HloInstruction::CreateUnary(vec_, HloOpcode::kLog, embedded_param)); - - auto embedded_computation = - module->AddEmbeddedComputation(embedded_builder.Build()); - - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param")); - auto call = builder.AddInstruction( - HloInstruction::CreateCall(vec_, {param}, embedded_computation)); - - module->AddEntryComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - // Buffers in different computations should always interfere. - EXPECT_TRUE(InstructionsMayInterfere(*liveness, embedded_log, call)); - EXPECT_TRUE(InstructionsMayInterfere(*liveness, embedded_param, param)); - EXPECT_FALSE( - InstructionsMayInterfere(*liveness, embedded_param, embedded_log)); - - // The only buffers for which MaybeLiveOut == true are those live out - // of the entry computation. Buffers live out of embedded computations should - // return false for this method. - EXPECT_FALSE(InstructionMaybeLiveOut(*liveness, embedded_log)); - EXPECT_TRUE(InstructionMaybeLiveOut(*liveness, call)); -} - -TEST_F(BufferLivenessTest, TupleConstantLiveOut) { - // Verify non top-level elements of a nested tuple constant are properly - // marked as liveout. Computation: - // - // GetTupleElement(0, TupleConstant({{0, 1}, {3}}) - // - // Only the array buffers containing 0 and 1 are liveout of the - // computation. The buffer containing {0, 1} is copied by GetTupleElement, and - // the buffers containing {3} and 3 are dead. - auto builder = HloComputation::Builder(TestName()); - Literal elements0[] = {LiteralUtil::CreateR0(0), - LiteralUtil::CreateR0(1)}; - auto inner_tuple0 = LiteralUtil::MakeTuple({&elements0[0], &elements0[1]}); - Literal element1 = LiteralUtil::CreateR0(3); - auto inner_tuple1 = LiteralUtil::MakeTuple({&element1}); - auto tuple_constant = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::MakeTuple({&inner_tuple0, &inner_tuple1}))); - builder.AddInstruction(HloInstruction::CreateGetTupleElement( - inner_tuple0.shape(), tuple_constant, 0)); - - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - // Only the element buffers of the tuple constant which are pointed to by - // the GetTupleElement instruction should be liveout. - EXPECT_FALSE(liveness->MaybeLiveOut( - GetBuffer(*liveness, tuple_constant, /*index=*/{}))); - EXPECT_TRUE(liveness->MaybeLiveOut( - GetBuffer(*liveness, tuple_constant, /*index=*/{0}))); - EXPECT_TRUE(liveness->MaybeLiveOut( - GetBuffer(*liveness, tuple_constant, /*index=*/{0, 0}))); - EXPECT_TRUE(liveness->MaybeLiveOut( - GetBuffer(*liveness, tuple_constant, /*index=*/{0, 1}))); - EXPECT_FALSE(liveness->MaybeLiveOut( - GetBuffer(*liveness, tuple_constant, /*index=*/{1}))); - EXPECT_FALSE(liveness->MaybeLiveOut( - GetBuffer(*liveness, tuple_constant, /*index=*/{1, 0}))); - EXPECT_FALSE(liveness->MaybeLiveOut( - GetBuffer(*liveness, tuple_constant, /*index=*/{1, 0}))); -} - -TEST_F(BufferLivenessTest, IndependentTupleElements) { - auto builder = HloComputation::Builder(TestName()); - // Create param0 Tuple. - auto tuple_param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, - ShapeUtil::MakeTupleShape( - {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(S32, {4})}), - "param0")); - // Create independent computations for each tuple elememt. - - // Tuple element0 computation: - // Add(GetTupleElement(tuple_param0, 0), const0) - auto tuple_element0_shape = - ShapeUtil::GetSubshape(tuple_param0->shape(), {0}); - auto tuple_element0 = - builder.AddInstruction(HloInstruction::CreateGetTupleElement( - tuple_element0_shape, tuple_param0, 0)); - auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR1({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}))); - auto add0 = builder.AddInstruction(HloInstruction::CreateBinary( - tuple_element0_shape, HloOpcode::kAdd, tuple_element0, const0)); - - // Tuple element1 computation: - // Add(GetTupleElement(tuple_param0, 1), const1) - auto tuple_element1_shape = - ShapeUtil::GetSubshape(tuple_param0->shape(), {1}); - auto tuple_element1 = - builder.AddInstruction(HloInstruction::CreateGetTupleElement( - tuple_element1_shape, tuple_param0, 1)); - auto const1 = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR1({2.f, 2.f, 2.f, 2.f, 2.f, 2.f, 2.f, 2.f}))); - auto add1 = builder.AddInstruction(HloInstruction::CreateBinary( - tuple_element1_shape, HloOpcode::kAdd, tuple_element1, const1)); - - // Create output tuple. - auto tuple_root = - builder.AddInstruction(HloInstruction::CreateTuple({add0, add1})); - - auto module = CreateNewUnverifiedModule(); - module->AddEntryComputation(BuildDummyComputation()); - module->AddEmbeddedComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - // We compare tuple element pairs that are input/output to the computation: - // 1) (input_tuple_element, output_tuple_element) = ('tuple_element0', 'add0') - // 2) (input_tuple_element, output_tuple_element) = ('tuple_element1', 'add1') - - // Tuple output element 'add0' does not depend on input 'tuple_element1'. - // Tuple output element 'add1' does not depend on input 'tuple_element0'. - - // Both element pair does not interfere, because there is no other dependency - // on the pairs tuple input element, and so liveness can compute that all - // users of the input tuple element execute before the associated output - // tuple element. - EXPECT_FALSE( - TupleElementsMayInterfere(*liveness, tuple_param0, tuple_root, {0})); - EXPECT_FALSE( - TupleElementsMayInterfere(*liveness, tuple_param0, tuple_root, {1})); -} - -TEST_F(BufferLivenessTest, DependentTupleElements) { - auto builder = HloComputation::Builder(TestName()); - // Create param0 Tuple. - auto tuple_param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, - ShapeUtil::MakeTupleShape( - {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(F32, {8})}), - "param0")); - // Create dependent computations for each tuple elememt. - - // Tuple element0 computation: - // Add(GetTupleElement(tuple_param0, 0), const0) - auto tuple_element0_shape = - ShapeUtil::GetSubshape(tuple_param0->shape(), {0}); - auto tuple_element0 = - builder.AddInstruction(HloInstruction::CreateGetTupleElement( - tuple_element0_shape, tuple_param0, 0)); - auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR1({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}))); - auto add0 = builder.AddInstruction(HloInstruction::CreateBinary( - tuple_element0_shape, HloOpcode::kAdd, tuple_element0, const0)); - - // Tuple element1 computation: - // Add(GetTupleElement(tuple_param0, 0), GetTupleElement(tuple_param0, 1)) - auto tuple_element1_shape = - ShapeUtil::GetSubshape(tuple_param0->shape(), {1}); - auto tuple_element1 = - builder.AddInstruction(HloInstruction::CreateGetTupleElement( - tuple_element1_shape, tuple_param0, 1)); - auto add1 = builder.AddInstruction(HloInstruction::CreateBinary( - tuple_element1_shape, HloOpcode::kAdd, tuple_element0, tuple_element1)); - - // Create output tuple. - auto tuple_root = - builder.AddInstruction(HloInstruction::CreateTuple({add0, add1})); - - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(BuildDummyComputation()); - module->AddEmbeddedComputation(builder.Build()); - - auto liveness = - BufferLiveness::Run( - module.get(), absl::make_unique(module.get())) - .ConsumeValueOrDie(); - - // We compare tuple element pairs that are input/output to the computation: - // 1) (input_tuple_element, output_tuple_element) = ('tuple_element0', 'add0') - // 2) (input_tuple_element, output_tuple_element) = ('tuple_element1', 'add1') - - // The first tuple element pair output 'add0', has no dependency on second - // tuple element pairs input 'tuple_element1'. - - // The second tuple element pair output 'add1', has a dependency on first - // tuple element pairs input 'tuple_element0'. - - // The first tuple element pair does interfere, because liveness cannot - // compute that all references to 'tuple_element0' are executed before 'add0' - // (because of the depenency of 'add1' on 'tuple_element0'). - EXPECT_TRUE( - TupleElementsMayInterfere(*liveness, tuple_param0, tuple_root, {0})); - - // The second tuple element pair does not interfere, because there is no - // other dependency on 'tuple_element1', and so liveness can compute that - // all users execute before 'add1'. - EXPECT_FALSE( - TupleElementsMayInterfere(*liveness, tuple_param0, tuple_root, {1})); -} - -class FusedDynamicUpdateSliceLivenessTest : public BufferLivenessTest { - protected: - // Builds and runs a computation (see test case computation graphs below). - std::unique_ptr BuildModule( - const bool update_uses_tuple_element1, const bool fuse_gte0) { - auto builder = HloComputation::Builder(TestName()); - // Create param0 Tuple. - Shape data_shape = ShapeUtil::MakeShape(F32, {8}); - Shape update_shape = ShapeUtil::MakeShape(F32, {3}); - auto tuple_param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeTupleShape({data_shape, data_shape}), "param0")); - - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, tuple_param0, 0)); - - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, tuple_param0, 1)); - - auto update = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR1({2.f, 2.f, 2.f}))); - HloInstruction* slice = nullptr; - if (update_uses_tuple_element1) { - // Create a slice instruction as an additional user of 'gte1'. - slice = builder.AddInstruction( - HloInstruction::CreateSlice(update_shape, gte1, {0}, {3}, {1})); - update = builder.AddInstruction(HloInstruction::CreateBinary( - update_shape, HloOpcode::kAdd, update, slice)); - } - // Create a DynamicUpdateSlice instruction of tuple element 1 with 'update'. - auto starts = builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(2))); - auto dynamic_update_slice = - builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - data_shape, gte1, update, {starts})); - // Create output tuple. - builder.AddInstruction( - HloInstruction::CreateTuple({gte0, dynamic_update_slice})); - // Build module and get reference to entry computation. - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(builder.Build()); - auto* computation = module->entry_computation(); - // Create fusion instruction based on number of tuple element 1 users. - if (update_uses_tuple_element1) { - computation->CreateFusionInstruction( - {dynamic_update_slice, starts, update, CHECK_NOTNULL(slice), gte1}, - HloInstruction::FusionKind::kLoop); - } else { - computation->CreateFusionInstruction( - {dynamic_update_slice, starts, update, gte1}, - HloInstruction::FusionKind::kLoop); - } - // Create fusion instruction for tuple element 0 (if requested). - if (fuse_gte0) { - computation->CreateFusionInstruction({gte0}, - HloInstruction::FusionKind::kLoop); - } - return module; - } - - // Returns whether buffer interference is detected between tuple-shaped - // parameter and root instructions at tuple element 1. - bool Run(const bool update_uses_tuple_element1, - const bool fuse_gte0 = false) { - auto module = BuildModule(update_uses_tuple_element1, fuse_gte0); - // Run BufferLiveness on 'module'. - auto liveness = BufferLiveness::Run( - module.get(), - absl::make_unique(module.get())) - .ConsumeValueOrDie(); - // Return whether or not buffers interference is detected between - // 'tuple_param0' and 'tuple_root' at shape index '{1}'. - auto tuple_param0 = FindInstruction(module.get(), "param0"); - auto tuple_root = module->entry_computation()->root_instruction(); - return TupleElementsMayInterfere(*liveness, tuple_param0, tuple_root, {1}); - } - bool RunWithHloDataflowAnalysis(const bool update_uses_tuple_element1, - const bool fuse_gte0 = false) { - auto module = BuildModule(update_uses_tuple_element1, fuse_gte0); - // Run BufferLiveness on 'module'. - auto dataflow = HloDataflowAnalysis::Run(*module).ConsumeValueOrDie(); - auto hlo_ordering = absl::make_unique(module.get()); - // Return whether or not buffers interference is detected between - // 'tuple_param0' and 'tuple_root' at shape index '{1}'. - auto tuple_param0 = FindInstruction(module.get(), "param0"); - auto tuple_root = module->entry_computation()->root_instruction(); - return hlo_ordering->MayInterfere( - dataflow->GetUniqueValueAt(tuple_param0, {1}), - dataflow->GetUniqueValueAt(tuple_root, {1}), *dataflow); - } -}; - -// Tests that live ranges of buffers Param0[1] and Tuple[1] (which alias fusion) -// do not overlap with the following computation: -// -// Param0 -// / \ -// GTE(0) Fusion -----------> FusionParam -// | | | -// | | GTE(1) Const Const -// | | \ | / -// | | DynamicUpdateSlice // fused root -// \ / -// Tuple // computation root -// -TEST_F(FusedDynamicUpdateSliceLivenessTest, NoInterference) { - EXPECT_FALSE(Run(/*update_uses_tuple_element1=*/false)); - EXPECT_FALSE( - RunWithHloDataflowAnalysis(/*update_uses_tuple_element1=*/false)); -} - -// Tests that live ranges of buffers Param0[1] and Tuple[1] (which aliases -// 'fusion1') do not overlap in the presence of another fusion instruction -// (which is a user of 'param0' at a different tuple index). -// BufferLiveness should detect no uses of Param0 at index {1} in Fusion0 -// (because Fusion0 only uses Param0 at index {0}). -// -// Param0 -// / \ -// FusionParam <----- Fusion0 Fusion1 ------> FusionParam -// | | | | -// GTE(0) | | GTE(1) Const Const -// | | \ | / -// \ / DynamicUpdateSlice -// Tuple -// -TEST_F(FusedDynamicUpdateSliceLivenessTest, NoInterferenceWithUnrelatedFusion) { - EXPECT_FALSE(Run(/*update_uses_tuple_element1=*/false, /*fuse_gte0=*/true)); - EXPECT_FALSE(RunWithHloDataflowAnalysis(/*update_uses_tuple_element1=*/false, - /*fuse_gte0=*/true)); -} - -// Tests that live ranges of buffers Param0[1] and Tuple[1] (which alias fusion) -// do overlap because GTE(1) has two users: -// 1) DynamicUpdateSlice at operand 0. -// 2) Slice at operand 0. -// -// Param0 -// / \ Const -// / \ / -// GTE(0) Fusion -----------> FusionParam FusionParam -// | | | | -// | | GTE(1) / -// | | | \ / -// | | | Slice / -// | | | \ / -// | | | Add Const -// | | | | | -// | | DynamicUpdateSlice // fused root -// \ / -// Tuple // computation root -// -TEST_F(FusedDynamicUpdateSliceLivenessTest, WithInterference) { - EXPECT_TRUE(Run(/*update_uses_tuple_element1=*/true)); - EXPECT_TRUE(RunWithHloDataflowAnalysis(/*update_uses_tuple_element1=*/true)); -} - -class DynamicUpdateSliceLivenessTest : public BufferLivenessTest { - protected: - // Builds and runs a computation (see test case computation graphs below). - // Runs BufferLiveness on this computation. - // Returns whether buffer interference is detected between tuple-shaped - // parameter and root instructions at tuple element 1. - bool Run(const bool tuple_element1_has_two_uses) { - auto builder = HloComputation::Builder(TestName()); - // Create param0 Tuple. - Shape data_shape = ShapeUtil::MakeShape(F32, {8}); - Shape update_shape = ShapeUtil::MakeShape(F32, {3}); - auto tuple_param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeTupleShape({data_shape, data_shape}), "param0")); - - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, tuple_param0, 0)); - - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, tuple_param0, 1)); - - auto update = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR1({2.f, 2.f, 2.f}))); - - if (tuple_element1_has_two_uses) { - // Add 'gte0' and 'gte1' to create another user of 'gte1'. - gte0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape, HloOpcode::kAdd, gte0, gte1)); - } - // Create a DynamicUpdateSlice instruction of tuple element 1 with 'update'. - auto starts = builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(2))); - auto dynamic_update_slice = - builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - data_shape, gte1, update, {starts})); - // Create output tuple. - auto tuple_root = builder.AddInstruction( - HloInstruction::CreateTuple({gte0, dynamic_update_slice})); - // Build module and get reference to entry computation. - auto module = CreateNewVerifiedModule(); - module->AddEntryComputation(BuildDummyComputation()); - module->AddEmbeddedComputation(builder.Build()); - // Run BufferLiveness on 'module'. - auto liveness = BufferLiveness::Run( - module.get(), - absl::make_unique(module.get())) - .ConsumeValueOrDie(); - // Return whether or not buffers interference is detected between - // 'tuple_param0' and 'tuple_root' at shape index '{1}'. - return TupleElementsMayInterfere(*liveness, tuple_param0, tuple_root, {1}); - } -}; - -// Tests that live ranges of buffers Param0[1] and Tuple[1] do not overlap in -// the following computation (because DynamicUpdateSlice (at operand 0) is the -// unique user): -// -// Parameter0 -// | | -// GTE(0) GTE(1) Const Const -// | \ | / -// | DynamicUpdateSlice -// \ / -// Tuple -// -TEST_F(DynamicUpdateSliceLivenessTest, NoInterference) { - EXPECT_FALSE(Run(/*tuple_element1_has_two_uses=*/false)); -} - -// Tests that live ranges of buffers Param0[1] and Tuple[1] do overlap because -// GTE(1) has two users: -// 1) DynamicUpdateSlice at operand 0. -// 2) Add at operand 1. -// -// Parameter0 -// | | -// GTE(0) GTE(1) -// | / | -// | / | -// Add | Const Const -// | | | | -// | DynamicUpdateSlice -// \ / -// Tuple -// -TEST_F(DynamicUpdateSliceLivenessTest, WithInterference) { - EXPECT_TRUE(Run(/*tuple_element1_has_two_uses=*/true)); -} - -} // namespace - -} // namespace xla diff --git a/tensorflow/compiler/xla/service/call_inliner.cc b/tensorflow/compiler/xla/service/call_inliner.cc index 1d421404440..062110af867 100644 --- a/tensorflow/compiler/xla/service/call_inliner.cc +++ b/tensorflow/compiler/xla/service/call_inliner.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/service/call_graph.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_dce.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/compiler/xla/service/computation_placer.cc b/tensorflow/compiler/xla/service/computation_placer.cc index dd3a6ad4067..f4dfb48168c 100644 --- a/tensorflow/compiler/xla/service/computation_placer.cc +++ b/tensorflow/compiler/xla/service/computation_placer.cc @@ -188,6 +188,8 @@ static bool InitModule() { stream_executor::host::kHostPlatformId, &CreateComputationPlacer); xla::ComputationPlacer::RegisterComputationPlacer( stream_executor::cuda::kCudaPlatformId, &CreateComputationPlacer); + xla::ComputationPlacer::RegisterComputationPlacer( + stream_executor::rocm::kROCmPlatformId, &CreateComputationPlacer); return true; } static bool module_initialized = InitModule(); diff --git a/tensorflow/compiler/xla/service/copy_insertion.h b/tensorflow/compiler/xla/service/copy_insertion.h index 72a245bc04c..703942662b1 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.h +++ b/tensorflow/compiler/xla/service/copy_insertion.h @@ -16,7 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_COPY_INSERTION_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_COPY_INSERTION_H_ -#include "tensorflow/compiler/xla/service/buffer_liveness.h" +#include "tensorflow/compiler/xla/service/hlo_alias_analysis.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 3e3af30d3aa..37baf0e36df 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -111,7 +111,6 @@ cc_library( "//tensorflow/compiler/xla/service:batch_dot_simplification", "//tensorflow/compiler/xla/service:batchnorm_expander", "//tensorflow/compiler/xla/service:buffer_assignment", - "//tensorflow/compiler/xla/service:buffer_liveness", "//tensorflow/compiler/xla/service:call_inliner", "//tensorflow/compiler/xla/service:cholesky_expander", "//tensorflow/compiler/xla/service:conditional_simplifier", @@ -247,10 +246,10 @@ cc_library( "//tensorflow/compiler/xla/service:computation_layout", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_dataflow_analysis", "//tensorflow/compiler/xla/service:hlo_execution_profile", "//tensorflow/compiler/xla/service:logical_buffer", "//tensorflow/compiler/xla/service:shaped_buffer", - "//tensorflow/compiler/xla/service:tuple_points_to_analysis", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/core/profiler/lib:traceme", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 4b09fa93f79..e7134325907 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -50,7 +50,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/batch_dot_simplification.h" #include "tensorflow/compiler/xla/service/batchnorm_expander.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" -#include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/cholesky_expander.h" #include "tensorflow/compiler/xla/service/conditional_simplifier.h" diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index b10c4006f30..476579883f3 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h" #include + #include #include #include @@ -224,19 +225,19 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( // caller. TF_RETURN_IF_ERROR(result_buffer.buffers().ForEachMutableElementWithStatus( [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) { - const auto& sources = this->GetRootPointsToSet().element(index); + const auto& sources = this->GetRootValueSet().element(index); // The points to set is unambiguous so the set should be a // singleton. - CHECK_EQ(1, sources.size()); - const LogicalBuffer* buffer_source = sources[0]; - HloInstruction* src = buffer_source->instruction(); + CHECK_EQ(1, sources.values().size()); + const HloValue* value_source = sources.values()[0]; + HloInstruction* src = value_source->instruction(); // The source for this result buffer can be a nested buffer such as // a tuple element. The source instruction should have a // non-parameter buffer assigned. TF_ASSIGN_OR_RETURN( const BufferAllocation::Slice slice, - this->assignment_->GetUniqueSlice(src, buffer_source->index())); + this->assignment_->GetUniqueSlice(src, value_source->index())); const BufferAllocation::Index buffer_index = slice.index(); se::OwningDeviceMemory& buffer = buffers[buffer_index]; if (!slice.allocation()->is_entry_computation_parameter()) { @@ -293,7 +294,7 @@ StatusOr CpuExecutable::ExecuteAsyncOnStreamImpl( const ServiceExecutableRunOptions* run_options, absl::Span arguments, HloExecutionProfile* hlo_execution_profile) { - if (GetRootPointsToSet().IsAmbiguous()) { + if (GetRootValueSet().IsAmbiguous()) { return Unimplemented("Points-to set of root instruction is ambiguous"); } @@ -371,8 +372,8 @@ StatusOr CpuExecutable::ExecuteAsyncOnStreamImpl( return ShapeUtil::ByteSizeOf(shape, sizeof(void*)); } -const PointsToSet& CpuExecutable::GetRootPointsToSet() const { - return assignment_->points_to_analysis().GetPointsToSet( +const InstructionValueSet& CpuExecutable::GetRootValueSet() const { + return assignment_->dataflow_analysis().GetInstructionValueSet( module().entry_computation()->root_instruction()); } diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 735a20749b9..169acdeffd4 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -26,11 +26,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h" #include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" -#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/platform/macros.h" @@ -129,9 +129,9 @@ class CpuExecutable : public Executable { const ServiceExecutableRunOptions* run_options, absl::Span buffers); - // Returns the points-to set of the root instruction of the entry - // computation. Uses points-to analysis from buffer assignment. - const PointsToSet& GetRootPointsToSet() const; + // Returns the instruction value set of the root instruction of the entry + // computation. Uses dataflow analysis from buffer assignment. + const InstructionValueSet& GetRootValueSet() const; // The JIT containing compiled modules. const std::unique_ptr jit_; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index 7f900d9fc55..fcb68a200d9 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -20,6 +20,8 @@ limitations under the License. #include "absl/types/span.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -29,9 +31,6 @@ limitations under the License. namespace xla { -class HloComputation; -class HloInstruction; - // DfsHloVisitor with default action based on the HloInstruction being visited. // Users should not use this class directly, but use the type aliases // DfsHloVisitorWithDefault/ConstDfsHloVisitorWithDefault instead. @@ -246,6 +245,52 @@ using DfsHloVisitorWithDefault = DfsHloVisitorWithDefaultBase; using ConstDfsHloVisitorWithDefault = DfsHloVisitorWithDefaultBase; +// A common base class for visitors performing rewriting operation. +// +// Subclasses call ReplaceWithNewInstruction and ReplaceInstruction while +// visiting. +class DfsHloRewriteVisitor : public DfsHloVisitorWithDefault { + public: + // Default visitor action is to do nothing and return OK. + Status DefaultAction(HloInstruction* /*hlo_instruction*/) override { + return Status::OK(); + } + + bool changed() const { return changed_; } + + protected: + // Replaces the existing HLO instruction old_instruction, with + // new_instruction, and marks the optimizer status as changed. + // Returns the Status representing the result of the replace operation. + Status ReplaceWithNewInstruction( + HloInstruction* old_instruction, + std::unique_ptr new_instruction) { + VLOG(3) << "Replacing instruction:"; + VLOG(3) << " old: " << old_instruction->ToString(); + VLOG(3) << " new: " << new_instruction->ToString(); + TF_RETURN_IF_ERROR(old_instruction->parent()->ReplaceWithNewInstruction( + old_instruction, std::move(new_instruction))); + changed_ = true; + return Status::OK(); + } + + // Replaces the existing HLO instruction old_instruction, with + // new_instruction, and marks the optimizer status as changed. + // Returns the Status representing the result of the replace operation. + Status ReplaceInstruction(HloInstruction* old_instruction, + HloInstruction* new_instruction) { + VLOG(3) << "Replacing instruction:"; + VLOG(3) << " old: " << old_instruction->ToString(); + VLOG(3) << " new: " << new_instruction->ToString(); + TF_RETURN_IF_ERROR(old_instruction->parent()->ReplaceInstruction( + old_instruction, new_instruction)); + changed_ = true; + return Status::OK(); + } + + bool changed_ = false; +}; + // (Const)FunctionVisitor lets you transform an // std::function into a (Const)DfsHloVisitor. // diff --git a/tensorflow/compiler/xla/service/dump.cc b/tensorflow/compiler/xla/service/dump.cc index d251c828bcd..6a4837211e8 100644 --- a/tensorflow/compiler/xla/service/dump.cc +++ b/tensorflow/compiler/xla/service/dump.cc @@ -48,22 +48,27 @@ struct CanonicalDebugOptions { // function we treat this struct's members as write-only, and read only from // `opts`. - // If dump_to is empty, default to dumping to stdout. - if (opts.xla_dump_to().empty()) { - dump_to = "-"; - } - // Did the user specifiy an explicit format for dumping? - bool output_format_specified = + bool output_format_other_than_url_specified = opts.xla_dump_hlo_as_text() || opts.xla_dump_hlo_as_proto() || opts.xla_dump_hlo_as_dot() || opts.xla_dump_hlo_as_html() || - opts.xla_dump_hlo_as_url() || opts.xla_dump_hlo_snapshots(); + opts.xla_dump_hlo_snapshots(); + bool output_format_specified = + output_format_other_than_url_specified || opts.xla_dump_hlo_as_url(); // If we haven't specified an output format, default to dumping as text. if (!output_format_specified) { dump_as_text = true; } + // If dump_to is empty, default to dumping to stdout, so long as some dump + // format other than dump-as-url was specified. If the user only specified + // --xla_dump_hlo_as_url, then don't dump to stdout, that is likely noise + // they don't want. + if (opts.xla_dump_to().empty() && output_format_other_than_url_specified) { + dump_to = "-"; + } + // If we specified a regular expression restricting which modules to dump, // respect that. // @@ -143,6 +148,10 @@ void DumpToFileInDirImpl(string_view filename, string_view contents, return; } + if (opts.dump_to.empty()) { + return; + } + const string& dir = opts.dump_to; VLOG(1) << "Dumping " << filename << " to " << dir; diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc index 1af8cea369a..3925eeb7f62 100644 --- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc +++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc @@ -760,7 +760,7 @@ Status DynamicDimensionInferenceVisitor::HandleGather(HloInstruction* hlo) { if (operand_index != 1) { return Unimplemented( "Detects a dynamic dimension on the data input of gather, which " - "is not suported: %s", + "is not supported: %s", hlo->ToString()); } // A mapping from output to input batch dim number. -1 means not a batch @@ -803,7 +803,7 @@ Status DynamicDimensionInferenceVisitor::HandleScatter(HloInstruction* hlo) { if (operand_index == 0) { return Unimplemented( "Detects a dynamic dimension on the data input of scatter, which " - "is not suported: %s", + "is not supported: %s", hlo->ToString()); } @@ -820,7 +820,7 @@ Status DynamicDimensionInferenceVisitor::HandleScatter(HloInstruction* hlo) { dimension)) { return Unimplemented( "Dynamic dimension of update window dims is not supported " - "is not suported: %s", + "is not supported: %s", hlo->ToString()); } // The dynamic dimension is collapsed and won't show up in the output. diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 9858fe6cfc6..92c147c1c71 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -477,11 +477,11 @@ cc_library( "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_casting_utils", + "//tensorflow/compiler/xla/service:hlo_dataflow_analysis", "//tensorflow/compiler/xla/service:hlo_execution_profile", "//tensorflow/compiler/xla/service:logical_buffer", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/compiler/xla/service:transfer_manager", - "//tensorflow/compiler/xla/service:tuple_points_to_analysis", "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", @@ -730,6 +730,7 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_query", "//tensorflow/compiler/xla/service:instruction_fusion", "//tensorflow/compiler/xla/service:pattern_matcher", "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", @@ -869,12 +870,14 @@ tf_cc_test( srcs = ["fusion_merger_test.cc"], deps = [ ":fusion_merger", + ":gpu_fusible", ":instruction_fusion", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla/service:hlo_matchers", "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "@com_google_absl//absl/types:span", ], ) @@ -927,8 +930,8 @@ tf_cc_test( ) cc_library( - name = "nvptx_constants", - hdrs = ["nvptx_constants.h"], + name = "target_constants", + hdrs = ["target_constants.h"], ) cc_library( @@ -937,8 +940,8 @@ cc_library( hdrs = ["gpu_transfer_manager.h"], deps = [ ":infeed_manager", - ":nvptx_constants", ":outfeed_manager", + ":target_constants", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_tree", @@ -985,10 +988,10 @@ cc_library( ":ir_emission_utils", ":ir_emitter", ":multi_output_fusion", - ":nvptx_constants", ":partition_assignment", ":stream_assignment", ":stream_executor_util", + ":target_constants", ":variadic_op_splitter", "//tensorflow/compiler/xla:protobuf_util", "//tensorflow/compiler/xla:status_macros", @@ -998,7 +1001,6 @@ cc_library( "//tensorflow/compiler/xla/service:algebraic_simplifier", "//tensorflow/compiler/xla/service:batchnorm_expander", "//tensorflow/compiler/xla/service:buffer_assignment", - "//tensorflow/compiler/xla/service:buffer_liveness", "//tensorflow/compiler/xla/service:call_inliner", "//tensorflow/compiler/xla/service:conditional_simplifier", "//tensorflow/compiler/xla/service:convolution_group_converter", diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc index 2cceb0422d0..4d61f09a7a9 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc @@ -14,8 +14,10 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h" + #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" namespace xla { diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc index 6e15ec5f939..f707a87d79e 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -151,6 +151,7 @@ class FusionInstructionMerger { int num_fail_expensive_fused_instruction_ = 0; int num_fail_net_bytes_transferred_ratio_ = 0; int num_fail_inefficient_fusion_emitter_ = 0; + int num_fail_fusion_too_large_ = 0; TF_DISALLOW_COPY_AND_ASSIGN(FusionInstructionMerger); }; @@ -172,7 +173,8 @@ Status FusionInstructionMerger::Run() { << " expensive_instruction: " << num_fail_expensive_fused_instruction_ << " net_bytes_transferred: " << num_fail_net_bytes_transferred_ratio_ << " inefficient_fusion_emitter: " - << num_fail_inefficient_fusion_emitter_ << " }"; + << num_fail_inefficient_fusion_emitter_ + << " fusion_too_large: " << num_fail_fusion_too_large_ << " }"; return Status::OK(); } @@ -258,6 +260,18 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { return Status::OK(); } + // Skip 'fusion' instruction if merging it into at least one of the users + // would make the fusion too big. + if (absl::c_any_of(fusion->users(), [fusion](const HloInstruction* user) { + return FusionWouldBeTooLarge(*fusion, *user); + })) { + VLOG(3) << "Not merging " << fusion->name() + << ": Contains one or more users where fusing would cause " + "the fusion to have too many parameters."; + ++num_fail_fusion_too_large_; + return Status::OK(); + } + // Merge fused instructions from 'fusion' into each user. std::vector users = fusion->users(); for (HloInstruction* user : users) { diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc index 45f23fcdc56..50ed7448790 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -15,6 +15,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" +#include + +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/service/hlo_parser.h" @@ -263,6 +267,50 @@ TEST_F(FusionMergerTest, WillNotMergeReduceUnfriendlyLayouts) { EXPECT_FALSE(FusionMerger().Run(module.get()).ValueOrDie()); } +// Check that we limit the number of operands to fusions we create. +TEST_F(FusionMergerTest, AvoidsLargeFusion) { + constexpr int64 kNumParams = kMaxOperandsAndOutputsPerFusion + 1; + + // Compute + // p0 + p1 + p2 + ... + pn, + // Use so many parameters that they do not fit into one fusion. + auto module = CreateNewVerifiedModule(); + HloComputation::Builder b(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {10, 100}); + + std::vector entry_params; + + for (int64 i = 0; i < kNumParams; ++i) { + entry_params.push_back( + b.AddInstruction(HloInstruction::CreateParameter(i, shape, "p"))); + } + auto make_fusion = [&](absl::Span params) { + // Build a fusion computation for calculating the sum of all parameters. + HloComputation::Builder sub_builder("subcomp"); + HloInstruction* sum = nullptr; + for (int64 i = 0; i < params.size(); ++i) { + auto p = sub_builder.AddInstruction( + HloInstruction::CreateParameter(i, shape, "p")); + if (sum == nullptr) { + sum = p; + } else { + sum = sub_builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, sum, p)); + } + } + HloComputation* subcomp = + module->AddEmbeddedComputation(sub_builder.Build()); + return HloInstruction::CreateFusion( + shape, HloInstruction::FusionKind::kLoop, params, subcomp); + }; + auto fusion = b.AddInstruction( + make_fusion(absl::MakeSpan(entry_params) + .subspan(0, kMaxOperandsAndOutputsPerFusion))); + b.AddInstruction(make_fusion({entry_params.back(), fusion})); + module->AddEntryComputation(b.Build()); + EXPECT_FALSE(FusionMerger().Run(module.get()).ValueOrDie()); +} + // TODO(b/119692968): Remove this test once fusion emitter is fixed. TEST_F(FusionMergerTest, WillNotMergeIfFusionEmitterIsInefficient) { auto module = ParseAndReturnVerifiedModule(R"( diff --git a/tensorflow/compiler/xla/service/gpu/gemm_rewriter.cc b/tensorflow/compiler/xla/service/gpu/gemm_rewriter.cc index 36098cbfb72..df7ee3cdc69 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_rewriter.cc @@ -61,7 +61,7 @@ static complex128 GetScalarConstantAsComplex(const Literal &literal) { // and provided C has no other users). // We then guide the buffer assignment to alias the buffer of the custom call // and C. -class GemmRewriterVisitor : public DfsHloVisitorWithDefault { +class GemmRewriterVisitor : public DfsHloRewriteVisitor { public: Status HandleDot(HloInstruction *instr) override { if (IsMatrixMultiplication(*instr)) { @@ -107,9 +107,7 @@ class GemmRewriterVisitor : public DfsHloVisitorWithDefault { config.set_alpha_real(new_alpha.real()); config.set_alpha_imag(new_alpha.imag()); TF_RETURN_IF_ERROR(existing_gemm->set_backend_config(config)); - TF_RETURN_IF_ERROR( - instr->parent()->ReplaceInstruction(instr, existing_gemm)); - changed_ = true; + TF_RETURN_IF_ERROR(ReplaceInstruction(instr, existing_gemm)); } } return Status::OK(); @@ -141,27 +139,12 @@ class GemmRewriterVisitor : public DfsHloVisitorWithDefault { } return Status::OK(); } - - Status DefaultAction(HloInstruction *) override { return Status::OK(); } - - bool IsChanged() { return changed_; } - - private: - Status ReplaceWithNewInstruction( - HloInstruction *instr, std::unique_ptr replacement) { - TF_RETURN_IF_ERROR(instr->parent()->ReplaceWithNewInstruction( - instr, std::move(replacement))); - changed_ = true; - return Status::OK(); - } - - bool changed_ = false; }; static StatusOr RunOnComputation(HloComputation *computation) { GemmRewriterVisitor visitor; TF_RETURN_IF_ERROR(computation->Accept(&visitor)); - return visitor.IsChanged(); + return visitor.changed(); } StatusOr GemmRewriter::Run(HloModule *module) { diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 8a65e11f809..ce559c2c1e7 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -256,7 +256,7 @@ StatusOr GpuExecutable::Execute( HloExecutionProfile* hlo_execution_profile, bool block_host_until_done) { se::DeviceMemoryAllocator* memory_allocator = run_options->allocator(); - if (GetRootPointsToSet().IsAmbiguous()) { + if (GetRootValueSet().IsAmbiguous()) { return Unimplemented("Points-to set of root instruction is ambiguous"); } @@ -327,20 +327,20 @@ StatusOr GpuExecutable::Execute( TF_RETURN_IF_ERROR(shaped_buffer.buffers().ForEachMutableElementWithStatus( [&buffer_allocations, &buffers_in_result, this]( const ShapeIndex& index, se::DeviceMemoryBase* device_memory) { - const auto& sources = this->GetRootPointsToSet().element(index); + const auto& sources = this->GetRootValueSet().element(index); // The points-to set is unambiguous so the set should be a // singleton. That is, we know exactly which instruction // produced the array at this element. - CHECK_EQ(1, sources.size()); - auto src_hlo = sources[0]->instruction(); + CHECK_EQ(1, sources.values().size()); + auto src_hlo = sources.values()[0]->instruction(); - VLOG(4) << "Looking at: " << sources[0]; + VLOG(4) << "Looking at: " << sources.values()[0]; // The source instruction should have a non-parameter buffer // assigned. - TF_ASSIGN_OR_RETURN( - const BufferAllocation::Slice slice, - this->assignment_->GetUniqueSlice(src_hlo, sources[0]->index())); + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice, + this->assignment_->GetUniqueSlice( + src_hlo, sources.values()[0]->index())); se::DeviceMemoryBase src_base = buffer_allocations->GetDeviceAddress(slice.index()); @@ -398,8 +398,8 @@ StatusOr GpuExecutable::ExecuteAsyncOnStream( return Execute(run_options, arguments, nullptr, block_host_until_done); } -const PointsToSet& GpuExecutable::GetRootPointsToSet() const { - return assignment_->points_to_analysis().GetPointsToSet( +const InstructionValueSet& GpuExecutable::GetRootValueSet() const { + return assignment_->dataflow_analysis().GetInstructionValueSet( module().entry_computation()->root_instruction()); } diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index 45ed345bbf6..fd1a17c0a92 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -29,10 +29,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/stream_assignment.h" #include "tensorflow/compiler/xla/service/gpu/thunk.h" #include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h" +#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" -#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/platform/macros.h" @@ -109,9 +109,9 @@ class GpuExecutable : public Executable { bool block_host_until_done, HloExecutionProfile* hlo_execution_profile); - // Returns the points-to set of the root instruction of the entry - // computation. Uses points-to analysis from buffer assignment. - const PointsToSet& GetRootPointsToSet() const; + // Returns the value set of the root instruction of the entry + // computation. Uses dataflow analysis from buffer assignment. + const InstructionValueSet& GetRootValueSet() const; using BufferAllocToDeviceMemoryMap = absl::flat_hash_map; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc index ec1a093a31c..11a829a12b4 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc @@ -23,8 +23,8 @@ limitations under the License. #include "llvm/IR/DataLayout.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/compiler/xla/service/gpu/nvptx_constants.h" #include "tensorflow/compiler/xla/service/gpu/outfeed_manager.h" +#include "tensorflow/compiler/xla/service/gpu/target_constants.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" @@ -181,13 +181,22 @@ Status GpuTransferManager::TransferLiteralFromOutfeed( static std::unique_ptr CreateNVPTXTransferManager() { return absl::make_unique( /*id=*/stream_executor::cuda::kCudaPlatformId, - /*pointer_size=*/llvm::DataLayout(xla::gpu::kDataLayout) + /*pointer_size=*/llvm::DataLayout(xla::gpu::nvptx::kDataLayout) + .getPointerSize(0 /* default address space */)); +} + +static std::unique_ptr CreateAMDGPUTransferManager() { + return absl::make_unique( + /*id=*/stream_executor::rocm::kROCmPlatformId, + /*pointer_size=*/llvm::DataLayout(xla::gpu::amdgpu::kDataLayout) .getPointerSize(0 /* default address space */)); } static bool InitModule() { xla::TransferManager::RegisterTransferManager( stream_executor::cuda::kCudaPlatformId, &CreateNVPTXTransferManager); + xla::TransferManager::RegisterTransferManager( + stream_executor::rocm::kROCmPlatformId, &CreateAMDGPUTransferManager); return true; } static bool module_initialized = InitModule(); diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index d862973fc6b..b3ad5719e83 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_query.h" #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h" #include "tensorflow/compiler/xla/service/pattern_matcher.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -27,30 +28,29 @@ limitations under the License. namespace xla { namespace gpu { -namespace { - -bool IsIEEEFloatingPointScalarConstant(const HloInstruction* constant) { - if (constant->opcode() != HloOpcode::kConstant || - !ShapeUtil::IsScalar(constant->shape())) { - return false; - } - auto type = constant->shape().element_type(); - return type == F16 || type == F32 || type == F64; -} - -} // namespace - /*static*/ bool GpuInstructionFusion::IsExpensive( const HloInstruction& instruction) { - switch (instruction.opcode()) { - // We say that floating-point division is cheap on the GPU. - case HloOpcode::kDivide: - return !ShapeUtil::ElementIsFloating(instruction.shape()) && - InstructionFusion::IsExpensive(instruction); - - default: - return InstructionFusion::IsExpensive(instruction); + // We say that floating-point division is cheap on the GPU. + if (instruction.opcode() == HloOpcode::kDivide && + ShapeUtil::ElementIsFloating(instruction.shape())) { + return false; } + // LLVM optimizes the integer division/remainder by a + // constant scalar to a few fast operations. + if ((instruction.opcode() == HloOpcode::kDivide || + instruction.opcode() == HloOpcode::kRemainder) && + ShapeUtil::ElementIsIntegral(instruction.shape())) { + auto* operand1 = instruction.operand(1); + if (hlo_query::IsScalarConstant(operand1)) { + return false; + } + // Broadcasted scalar is also being optimized. + if (operand1->opcode() == HloOpcode::kBroadcast && + hlo_query::IsScalarConstant(operand1->operand(0))) { + return false; + } + } + return InstructionFusion::IsExpensive(instruction); } bool GpuInstructionFusion::ShouldFuseInexpensiveChecks(HloInstruction* consumer, diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index f871e3780ff..ebab423588b 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -581,5 +581,62 @@ TEST_F(InstructionFusionTest, FuseReverse) { op::Reverse(op::Add(op::Parameter(), op::Parameter()))); } +TEST_F(InstructionFusionTest, GpuIsExpensiveF32) { + auto m = CreateNewVerifiedModule(); + Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + HloComputation::Builder builder(TestName()); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, r0f32, "param0")); + + HloInstruction* one = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0f))); + HloInstruction* div = builder.AddInstruction( + HloInstruction::CreateBinary(r0f32, HloOpcode::kDivide, param0, one)); + HloInstruction* rem = builder.AddInstruction( + HloInstruction::CreateBinary(r0f32, HloOpcode::kRemainder, param0, one)); + + EXPECT_FALSE(GpuInstructionFusion::IsExpensive(*div)); + EXPECT_TRUE(GpuInstructionFusion::IsExpensive(*rem)); +} + +TEST_F(InstructionFusionTest, GpuIsExpensiveS32) { + auto m = CreateNewVerifiedModule(); + Shape r0s32 = ShapeUtil::MakeShape(S32, {}); + HloComputation::Builder builder(TestName()); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, r0s32, "param0")); + + HloInstruction* one = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0f))); + HloInstruction* div = builder.AddInstruction( + HloInstruction::CreateBinary(r0s32, HloOpcode::kDivide, param0, one)); + HloInstruction* rem = builder.AddInstruction( + HloInstruction::CreateBinary(r0s32, HloOpcode::kRemainder, param0, one)); + + EXPECT_FALSE(GpuInstructionFusion::IsExpensive(*div)); + EXPECT_FALSE(GpuInstructionFusion::IsExpensive(*rem)); +} + +TEST_F(InstructionFusionTest, GpuIsExpensiveBroadcastS32) { + auto m = CreateNewVerifiedModule(); + Shape r1s32 = ShapeUtil::MakeShape(S32, {10}); + HloComputation::Builder builder(TestName()); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, r1s32, "param0")); + + HloInstruction* one = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0f))); + HloInstruction* one_broad = + builder.AddInstruction(HloInstruction::CreateBroadcast(r1s32, one, {})); + + HloInstruction* div = builder.AddInstruction(HloInstruction::CreateBinary( + r1s32, HloOpcode::kDivide, param0, one_broad)); + HloInstruction* rem = builder.AddInstruction(HloInstruction::CreateBinary( + r1s32, HloOpcode::kRemainder, param0, one_broad)); + + EXPECT_FALSE(GpuInstructionFusion::IsExpensive(*div)); + EXPECT_FALSE(GpuInstructionFusion::IsExpensive(*rem)); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index f90e2716869..be24b8fc05e 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -205,12 +205,12 @@ bool IrEmitter::MaybeEmitDirectAtomicOperation( } if (root_opcode == HloOpcode::kAdd) { + llvm::Triple target_triple = llvm::Triple(module_->getTargetTriple()); // NVPTX supports atomicAdd on F32 and integer types. - if (element_type == F32) { + if (target_triple.isNVPTX() && element_type == F32) { // F32 + F32 - llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::nvvm_atomic_load_add_f32, - {output_address, source}, - {output_address->getType()}, &b_); + AtomicRMW(llvm::AtomicRMWInst::FAdd, output_address, source, + llvm::AtomicOrdering::SequentiallyConsistent); return true; } if (is_atomic_integral) { diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index 48df4e4f3b8..d50a0d4baa0 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -34,7 +34,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/algebraic_simplifier.h" #include "tensorflow/compiler/xla/service/batchnorm_expander.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" -#include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/convolution_group_converter.h" @@ -66,10 +65,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h" #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.h" #include "tensorflow/compiler/xla/service/gpu/multi_output_fusion.h" -#include "tensorflow/compiler/xla/service/gpu/nvptx_constants.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" +#include "tensorflow/compiler/xla/service/gpu/target_constants.h" #include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h" #include "tensorflow/compiler/xla/service/gpu/variadic_op_splitter.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" @@ -493,11 +492,10 @@ void WarnIfBadDriverJITVersion() { }); } - } // namespace NVPTXCompiler::NVPTXCompiler() - : pointer_size_(llvm::DataLayout(kDataLayout) + : pointer_size_(llvm::DataLayout(nvptx::kDataLayout) .getPointerSize(0 /* default address space */)) {} StatusOr> NVPTXCompiler::RunHloPasses( @@ -536,8 +534,8 @@ StatusOr> NVPTXCompiler::RunBackend( llvm::Module llvm_module(module->name().c_str(), llvm_context); // Set the target triple and the data layout. - llvm_module.setTargetTriple(kTargetTriple); - llvm_module.setDataLayout(kDataLayout); + llvm_module.setTargetTriple(nvptx::kTargetTriple); + llvm_module.setDataLayout(nvptx::kDataLayout); // Determine the HLO schedule, which is an ordering of HLO instructions. This // is used by buffer assignment to enable buffer reuse, and the same ordering diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_constants.h b/tensorflow/compiler/xla/service/gpu/target_constants.h similarity index 61% rename from tensorflow/compiler/xla/service/gpu/nvptx_constants.h rename to tensorflow/compiler/xla/service/gpu/target_constants.h index 67fa0020aa0..eac7b271bcd 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_constants.h +++ b/tensorflow/compiler/xla/service/gpu/target_constants.h @@ -13,20 +13,35 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_NVPTX_CONSTANTS_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_NVPTX_CONSTANTS_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_TARGET_CONSTANTS_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_TARGET_CONSTANTS_H_ namespace xla { namespace gpu { +namespace nvptx { // The triple that represents our target. constexpr char kTargetTriple[] = "nvptx64-nvidia-cuda"; // The data layout of the emitted module. Copied from computeDataLayout in // NVPTXTargetMachine.cpp. constexpr char kDataLayout[] = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"; +} // namespace nvptx + +namespace amdgpu { + +// The triple that represents our target on LLVM AMDGPU backend. +constexpr char kTargetTriple[] = "amdgcn-amd-amdhsa"; + +// The data layout of the emitted module. +constexpr char kDataLayout[] = + "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32" + "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; + +} // namespace amdgpu } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_NVPTX_CONSTANTS_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_TARGET_CONSTANTS_H_ diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_convolution_regression_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_convolution_regression_test.cc index 4451ab8ccb9..9e5485e3c95 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/gpu_convolution_regression_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_convolution_regression_test.cc @@ -28,7 +28,7 @@ class GpuConvolutionRegressionTest : public HloTestBase { HloModuleConfig config; config.set_debug_options(GetDebugOptionsFromFlags()); (void)backend().compiler()->RunHloPasses( - ParseHloString(hlo_string, config).ConsumeValueOrDie(), + ParseAndReturnUnverifiedModule(hlo_string, config).ConsumeValueOrDie(), backend().default_stream_executor(), backend().memory_allocator()); } }; diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc index 8e66d0aad6a..a12932f573b 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc @@ -316,9 +316,9 @@ TEST_F(GpuKernelTilingTest, ColumnReductionWithPowerOf2OutputElementsUnrolled) { CompileAndVerifyIr(std::move(hlo_module), R"( ; CHECK-LABEL: define void @fusion -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 -; CHECK-NOT: call float @llvm.nvvm.atomic.load.add.f32.p0f32 +; CHECK: atomicrmw fadd float +; CHECK: atomicrmw fadd float +; CHECK-NOT: atomicrmw fadd float ; CHECK: } )", /*match_optimized_ir=*/true); @@ -363,8 +363,8 @@ TEST_F(GpuKernelTilingTest, CompileAndVerifyIr(std::move(hlo_module), R"( ; CHECK-LABEL: define void @fusion -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 -; CHECK-NOT: call float @llvm.nvvm.atomic.load.add.f32.p0f32 +; CHECK: atomicrmw fadd float +; CHECK-NOT: atomicrmw fadd float ; CHECK: } )", /*match_optimized_ir=*/true); @@ -411,11 +411,11 @@ TEST_F(GpuKernelTilingTest, ColumnReductionMOFUnrolled) { CompileAndVerifyIr(std::move(hlo_module), R"( ; CHECK-LABEL: define void @fusion -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 -; CHECK-NOT: call float @llvm.nvvm.atomic.load.add.f32.p0f32 +; CHECK: atomicrmw fadd float +; CHECK: atomicrmw fadd float +; CHECK: atomicrmw fadd float +; CHECK: atomicrmw fadd float +; CHECK-NOT: atomicrmw fadd float ; CHECK: } )", /*match_optimized_ir=*/true); @@ -446,7 +446,7 @@ TEST_F(GpuKernelTilingTest, ColumnReductionWithLayoutChangeTiled) { CompileAndVerifyIr(std::move(hlo_module), R"( ; CHECK-LABEL: define void @reduce -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 +; CHECK: atomicrmw fadd float ; CHECK: } )", /*match_optimized_ir=*/true); @@ -511,7 +511,7 @@ TEST_F(GpuKernelTilingTest, CompileAndVerifyIr(std::move(hlo_module), R"( ; CHECK-LABEL: define void @reduce -; CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 +; CHECK: atomicrmw fadd float ; CHECK: } )", /*match_optimized_ir=*/true); diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index fc4aaedde15..316c3514aeb 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -30,7 +30,6 @@ limitations under the License. #include "tensorflow/compiler/xla/iterator_util.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor.h" -#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 7274099ad97..9a083a46ec0 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -614,8 +614,9 @@ StatusOr> HloInstruction::CreateFromProto( if (!proto.dimensions().empty()) { inferred_dimension = proto.dimensions()[0]; } - TF_RET_CHECK(ShapeUtil::ElementsIn(shape) == - ShapeUtil::ElementsIn(operands(0)->shape())) + TF_RET_CHECK(shape.IsArray() && operands(0)->shape().IsArray() && + ShapeUtil::ElementsIn(shape) == + ShapeUtil::ElementsIn(operands(0)->shape())) << "shape: " << ShapeUtil::HumanString(shape) << " operand: " << ShapeUtil::HumanString(operands(0)->shape()); instruction = CreateReshape(shape, operands(0), inferred_dimension); diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index e2f4c30610a..754ccc1ff9f 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -1308,10 +1308,21 @@ void HloFusionInstruction::MergeFusionInstruction( unfused_instructions.push_back(fused_instruction); } } - CHECK(unfused_instructions.front() == cloned_fusion->fused_expression_root()); + + // If there are no unfused instructions, the fused computation must consist + // only of kParameter instructions. Make the operand of the corresponding + // parameter number the new root. + HloInstruction* unfused_root = + unfused_instructions.empty() + ? instruction_to_merge->mutable_operand( + instruction_to_merge->fused_instructions_computation() + ->root_instruction() + ->parameter_number()) + : unfused_instructions.front(); + CHECK(unfused_root == cloned_fusion->fused_expression_root() || + unfused_instructions.empty()); // Replace instruction_to_merge use of 'this' with unfused_root. - TF_CHECK_OK( - instruction_to_merge->ReplaceUseWith(this, unfused_instructions.front())); + TF_CHECK_OK(instruction_to_merge->ReplaceUseWith(this, unfused_root)); // Fuse 'unfused_instructions' into 'this'. for (auto& instruction : unfused_instructions) { FuseInstruction(instruction); @@ -1359,7 +1370,16 @@ void HloFusionInstruction::MergeFusionInstructionIntoMultiOutput( } } - HloInstruction* unfused_root = unfused_instructions.front(); + // If there are no unfused instructions, the fused computation must consist + // only of kParameter instructions. Make the operand of the corresponding + // parameter number the new root. + HloInstruction* unfused_root = + unfused_instructions.empty() + ? instruction_to_merge->mutable_operand( + instruction_to_merge->fused_instructions_computation() + ->root_instruction() + ->parameter_number()) + : unfused_instructions.front(); TF_CHECK_OK(instruction_to_merge->ReplaceAllUsesWith(unfused_root)); TF_CHECK_OK( @@ -1369,6 +1389,9 @@ void HloFusionInstruction::MergeFusionInstructionIntoMultiOutput( } // Fuse the root instruction and generate multiple outputs. + if (unfused_instructions.empty()) { + return; + } FuseInstructionIntoMultiOutput(unfused_root); TF_CHECK_OK(unfused_root->parent()->RemoveInstruction(unfused_root)); // The rest instructions are of normal fusing. diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc index d049d3f8d77..da82b599a6a 100644 --- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc +++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h" +#include #include #include #include @@ -26,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/heap_simulator.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_schedule.h" #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -403,14 +405,17 @@ StatusOr ScheduleComputationHelper( const BufferValue::SizeFunction& size_function, const MemorySchedulerAlgorithm& algorithm, const absl::flat_hash_map& - memory_by_computation) { + memory_by_computation, + int64* peak_memory) { VLOG(2) << "Computation: " << computation->name(); + if (algorithm) { return algorithm(computation, points_to_analysis, alias_analysis, - size_function, memory_by_computation); + size_function, memory_by_computation, peak_memory); } return DefaultMemoryScheduler(computation, points_to_analysis, alias_analysis, - size_function, memory_by_computation); + size_function, memory_by_computation, + peak_memory); } } // namespace @@ -421,7 +426,8 @@ StatusOr DFSMemoryScheduler( const HloAliasAnalysis& alias_analysis, const BufferValue::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation) { + memory_by_computation, + int64* peak_memory) { // These variables are a hack to prevent overflows. int64 cumulative_total_size = 0; int64 total_hlos = computation->parent()->instruction_count(); @@ -485,6 +491,12 @@ StatusOr DFSMemoryScheduler( return a->name() < b->name(); })); CHECK_EQ(sequence.size(), computation->instruction_count()); + if (peak_memory) { + TF_ASSIGN_OR_RETURN( + *peak_memory, HeapSimulator::MinimumMemoryForComputation( + *computation, sequence, alias_analysis, size_function, + &memory_by_computation)); + } return sequence; } // namespace xla @@ -494,9 +506,18 @@ StatusOr ListMemoryScheduler( const HloAliasAnalysis& alias_analysis, const BufferValue::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation) { - return ListScheduler::Run(computation, points_to_analysis, size_function, - memory_by_computation); + memory_by_computation, + int64* peak_memory) { + TF_ASSIGN_OR_RETURN(HloInstructionSequence sequence, + ListScheduler::Run(computation, points_to_analysis, + size_function, memory_by_computation)); + if (peak_memory) { + TF_ASSIGN_OR_RETURN( + *peak_memory, HeapSimulator::MinimumMemoryForComputation( + *computation, sequence, alias_analysis, size_function, + &memory_by_computation)); + } + return sequence; } StatusOr PostOrderMemoryScheduler( @@ -505,8 +526,16 @@ StatusOr PostOrderMemoryScheduler( const HloAliasAnalysis& alias_analysis, const BufferValue::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation) { - return HloInstructionSequence(computation->MakeInstructionPostOrder()); + memory_by_computation, + int64* peak_memory) { + HloInstructionSequence sequence(computation->MakeInstructionPostOrder()); + if (peak_memory) { + TF_ASSIGN_OR_RETURN( + *peak_memory, HeapSimulator::MinimumMemoryForComputation( + *computation, sequence, alias_analysis, size_function, + &memory_by_computation)); + } + return sequence; } StatusOr DefaultMemoryScheduler( @@ -515,7 +544,8 @@ StatusOr DefaultMemoryScheduler( const HloAliasAnalysis& alias_analysis, const BufferValue::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation) { + memory_by_computation, + int64* peak_memory) { // We try a few schedulers and choose whichever returns a lower min-memory, // not accounting for fragmentation. // - List is a scheduler that uses greedy heuristics. @@ -524,38 +554,33 @@ StatusOr DefaultMemoryScheduler( // - Postorder does not use any heuristics. // List wins for most of our benchmarks; postorder-based schedulers win for // some RNNs. + int64 list_memory; TF_ASSIGN_OR_RETURN( HloInstructionSequence list_sequence, ListMemoryScheduler(computation, points_to_analysis, alias_analysis, - size_function, memory_by_computation)); - TF_ASSIGN_OR_RETURN(const int64 list_memory, - HeapSimulator::MinimumMemoryForComputation( - *computation, list_sequence, alias_analysis, - size_function, &memory_by_computation)); + size_function, memory_by_computation, &list_memory)); VLOG(2) << "Min-memory list sequence: " << HumanReadableNumBytes(list_memory); + int64 dfs_memory; TF_ASSIGN_OR_RETURN( HloInstructionSequence dfs_sequence, DFSMemoryScheduler(computation, points_to_analysis, alias_analysis, - size_function, memory_by_computation)); - TF_ASSIGN_OR_RETURN(const int64 dfs_memory, - HeapSimulator::MinimumMemoryForComputation( - *computation, dfs_sequence, alias_analysis, - size_function, &memory_by_computation)); + size_function, memory_by_computation, &dfs_memory)); VLOG(2) << "Min-memory dfs sequence: " << HumanReadableNumBytes(dfs_memory); + int64 post_order_memory; TF_ASSIGN_OR_RETURN( HloInstructionSequence post_order_sequence, PostOrderMemoryScheduler(computation, points_to_analysis, alias_analysis, - size_function, memory_by_computation)); - TF_ASSIGN_OR_RETURN(const int64 post_order_memory, - HeapSimulator::MinimumMemoryForComputation( - *computation, post_order_sequence, alias_analysis, - size_function, &memory_by_computation)); + size_function, memory_by_computation, + &post_order_memory)); VLOG(2) << "Min-memory post order sequence: " << HumanReadableNumBytes(post_order_memory); auto min_memory = std::min({dfs_memory, post_order_memory, list_memory}); + if (peak_memory) { + *peak_memory = min_memory; + } if (min_memory == list_memory) { VLOG(2) << "Chose min-memory list sequence: " @@ -574,7 +599,7 @@ StatusOr DefaultMemoryScheduler( StatusOr ScheduleModule( HloModule* module, const BufferValue::SizeFunction& size_function, - const MemorySchedulerAlgorithm& algorithm) { + const MemorySchedulerAlgorithm& algorithm, int64* peak_memory) { HloSchedule schedule(module); TF_ASSIGN_OR_RETURN(std::unique_ptr points_to_analysis, TuplePointsToAnalysis::Run(module)); @@ -584,20 +609,25 @@ StatusOr ScheduleModule( absl::flat_hash_map memory_by_computation; for (auto* computation : module->MakeComputationPostOrder()) { if (!computation->IsFusionComputation()) { - TF_ASSIGN_OR_RETURN(HloInstructionSequence computation_sequence, - ScheduleComputationHelper( - computation, *points_to_analysis, *alias_analysis, - size_function, algorithm, memory_by_computation)); - memory_by_computation[computation] = - HeapSimulator::MinimumMemoryForComputation( - *computation, computation_sequence, *alias_analysis, - size_function, &schedule) - .ValueOrDie(); + int64 computation_peak_memory; + TF_ASSIGN_OR_RETURN( + HloInstructionSequence computation_sequence, + ScheduleComputationHelper( + computation, *points_to_analysis, *alias_analysis, size_function, + algorithm, memory_by_computation, &computation_peak_memory)); + memory_by_computation[computation] = computation_peak_memory; schedule.set_sequence(computation, std::move(computation_sequence)); } } VLOG(1) << "Module schedule:\n" << schedule; + if (peak_memory) { + *peak_memory = 0; + for (const auto& computation_and_peak : memory_by_computation) { + *peak_memory = std::max(*peak_memory, computation_and_peak.second); + } + } + TF_RETURN_IF_ERROR(schedule.Verify()); return std::move(schedule); @@ -614,7 +644,7 @@ StatusOr ScheduleComputation( absl::flat_hash_map empty_map; return ScheduleComputationHelper(computation, *points_to_analysis, *alias_analysis, size_function, nullptr, - empty_map); + empty_map, nullptr); } HloMemoryScheduler::HloMemoryScheduler( diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h index 7bc76dc5f7c..fd416e9413e 100644 --- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h +++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h @@ -35,13 +35,16 @@ namespace xla { // A memory scheduler computes an execution sequence for the HLO instructions in // 'computation' that minimizes peak memory, given a points-to analysis result // that describes buffer aliasing, together with a target-specific size function -// that maps a tensor's logical size to its padded size. +// that maps a tensor's logical size to its padded size. peak_memory (may be +// nullptr) is set to the peak memory of the resulting schedule according to the +// HeapSimulator. // // TODO(yunxing): Cleanup usage of TuplePointsToAnalysis. typedef std::function( HloComputation*, const TuplePointsToAnalysis&, const HloAliasAnalysis&, const LogicalBuffer::SizeFunction&, - const absl::flat_hash_map&)> + const absl::flat_hash_map&, + /*peak_memory*/ int64*)> MemorySchedulerAlgorithm; // List scheduler @@ -51,7 +54,8 @@ StatusOr ListMemoryScheduler( const HloAliasAnalysis& alias_analysis, const LogicalBuffer::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation); + memory_by_computation, + int64* peak_memory); // DFS-order scheduler StatusOr DFSMemoryScheduler( @@ -60,7 +64,8 @@ StatusOr DFSMemoryScheduler( const HloAliasAnalysis& alias_analysis, const LogicalBuffer::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation); + memory_by_computation, + int64* peak_memory); // Naive Post Order scheduler StatusOr PostOrderMemoryScheduler( @@ -69,25 +74,30 @@ StatusOr PostOrderMemoryScheduler( const HloAliasAnalysis& alias_analysis, const LogicalBuffer::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation); + memory_by_computation, + int64* peak_memory); -// The default scheduling algorithm. Runs both the list scheduler -// and the DFS scheduler, and chooses whichever returns a lower min-memory, -// not accounting for fragmentation. +// The default scheduling algorithm. Runs the list scheduler, the DFS scheduler, +// and the post-order scheduler and chooses whichever returns a lower min- +// memory, not accounting for fragmentation. peak_memory (may be nullptr) is set +// to the peak memory of the resulting schedule according to the HeapSimulator. StatusOr DefaultMemoryScheduler( HloComputation* computation, const TuplePointsToAnalysis& points_to_analysis, const HloAliasAnalysis& alias_analysis, const LogicalBuffer::SizeFunction& size_function, const absl::flat_hash_map& - memory_by_computation); + memory_by_computation, + int64* peak_memory); -// Returns an HloSchedule which seeks to minimize the memory required for -// the computation. size_function is the function returning the number of bytes -// required for a LogicalBuffer. +// Returns an HloSchedule which seeks to minimize the memory required for the +// module. size_function is the function returning the number of bytes required +// for a LogicalBuffer. peak_memory (if not nullptr) is set to the largest peak +// memory (according to the HeapSimulator) of all computations in the module. StatusOr ScheduleModule( HloModule* module, const LogicalBuffer::SizeFunction& size_function, - const MemorySchedulerAlgorithm& algorithm = {}); + const MemorySchedulerAlgorithm& algorithm = {}, + int64* peak_memory = nullptr); // Computes the schedule for a single computation. // Currently only used by the GPU backend. diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc index 3ee0c114373..2b1e059c7e5 100644 --- a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc +++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_dce.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_ordering.h" #include "tensorflow/compiler/xla/service/hlo_parser.h" @@ -38,6 +39,25 @@ namespace { class HloSchedulingTest : public HloTestBase {}; +int64 PeakMemoryUseOfEntryComputation( + HloModule* module, LogicalBuffer::SizeFunction size_function) { + CHECK(module->has_entry_computation()); + CHECK(module->has_schedule()); + + std::unique_ptr alias_analysis = + HloAliasAnalysis::Run(module).ConsumeValueOrDie(); + + const HloSchedule& schedule = module->schedule(); + + HloComputation* computation = module->entry_computation(); + const HloInstructionSequence& sequence = schedule.sequence(computation); + return HeapSimulator::Run(absl::make_unique(), + *computation, sequence, *alias_analysis, + size_function) + .ValueOrDie() + .heap_size; +} + TEST_F(HloSchedulingTest, LastUseScheduledFirst) { // Tests scheduling of the following HLO code: // @@ -122,9 +142,11 @@ ENTRY root { auto size_fn = [](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8); }; + int64 peak_memory; TF_ASSERT_OK_AND_ASSIGN( HloSchedule schedule, - ScheduleModule(module.get(), size_fn, ListMemoryScheduler)); + ScheduleModule(module.get(), size_fn, ListMemoryScheduler, &peak_memory)); + TF_ASSERT_OK(module->set_schedule(schedule)); // Verify that all instructions are in the sequence. const std::vector& sequence = schedule.sequence(module->entry_computation()).instructions(); @@ -145,6 +167,8 @@ ENTRY root { SequentialHloOrdering ordering(schedule); EXPECT_TRUE(ordering.ExecutesBefore(instructions_by_name.at("d"), instructions_by_name.at("e"))); + EXPECT_EQ(PeakMemoryUseOfEntryComputation(module.get(), size_fn), + peak_memory); } TEST_F(HloSchedulingTest, HostSendDoneSchedule) { diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 57cd453faf7..2589de633d0 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -2344,6 +2344,20 @@ bool HloParser::ParseDenseLiteral(Literal* literal, const Shape& shape) { } elems_seen_per_dim[0] = shape.dimensions(0); lexer_.Lex(); + // Fill data with deterministic (garbage) values. Use static to avoid + // creating identical constants which could potentially got CSE'ed + // away. This is a best-effort approach to make sure replaying a HLO + // gives us same optimized HLO graph. + static uint32 data = 0; + uint32* raw_data = static_cast(literal->untyped_data()); + for (int64 i = 0; i < literal->size_bytes() / 4; ++i) { + raw_data[i] = data++; + } + uint8* raw_data_int8 = static_cast(literal->untyped_data()); + static uint8 data_int8 = 0; + for (int64 i = 0; i < literal->size_bytes() % 4; ++i) { + raw_data_int8[literal->size_bytes() / 4 + i] = data_int8++; + } break; } case TokKind::kComma: @@ -4235,15 +4249,6 @@ StatusOr> ParseAndReturnUnverifiedModule( return ParseAndReturnUnverifiedModule(str, HloModuleConfig()); } -StatusOr> ParseHloString( - absl::string_view str, const HloModuleConfig& config) { - return ParseAndReturnUnverifiedModule(str, config); -} - -StatusOr> ParseHloString(absl::string_view str) { - return ParseAndReturnUnverifiedModule(str); -} - Status ParseHloString(absl::string_view str, HloModule* module) { TF_RET_CHECK(module->computation_count() == 0); HloParser parser(str); diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h index 2bfabd6ab20..e4214c1e6b5 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.h +++ b/tensorflow/compiler/xla/service/hlo_parser.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PARSER_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PARSER_H_ -#include "absl/base/macros.h" #include "absl/memory/memory.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -45,13 +44,6 @@ StatusOr> ParseAndReturnUnverifiedModule( StatusOr> ParseAndReturnUnverifiedModule( absl::string_view str); -ABSL_DEPRECATED("Use ParseAndReturnUnverifiedModule instead") -StatusOr> ParseHloString( - absl::string_view str, const HloModuleConfig& config); - -ABSL_DEPRECATED("Use ParseAndReturnUnverifiedModule instead") -StatusOr> ParseHloString(absl::string_view str); - // Given a string in the HloModule::ToString() format, parses the string and // builds the HloModule in place at the given module pointer. 'module' must // point to an empty module (no computations). diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h index 8172f0d3a15..350cf0f8e8f 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.h +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h @@ -17,7 +17,6 @@ #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" -#include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_graph.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc index 18ab401bc89..78c48e036d6 100644 --- a/tensorflow/compiler/xla/service/hlo_value.cc +++ b/tensorflow/compiler/xla/service/hlo_value.cc @@ -257,6 +257,14 @@ std::ostream& operator<<(std::ostream& out, const HloValueSet& value_set) { return out; } +bool InstructionValueSet::IsAmbiguous() const { + bool ambiguous = false; + for (auto& iter : *this) { + ambiguous |= iter.second.values().size() > 1; + } + return ambiguous; +} + bool InstructionValueSet::AssignUnionOf( absl::Span inputs) { CHECK_GT(inputs.size(), 0); diff --git a/tensorflow/compiler/xla/service/hlo_value.h b/tensorflow/compiler/xla/service/hlo_value.h index 1f01b0bb365..a1150ae299d 100644 --- a/tensorflow/compiler/xla/service/hlo_value.h +++ b/tensorflow/compiler/xla/service/hlo_value.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_VALUE_H_ #include + #include #include @@ -245,6 +246,10 @@ class InstructionValueSet : public ShapeTree { // this value set changed. bool AssignUnionOf(absl::Span inputs); + // Returns true if any value sets for any subshape element is not a + // singleton. + bool IsAmbiguous() const; + string ToString() const; }; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 1de9a66adcb..fa2631bc364 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" diff --git a/tensorflow/compiler/xla/service/llvm_ir/math_ops.cc b/tensorflow/compiler/xla/service/llvm_ir/math_ops.cc index 0e115cdabf4..333a2e8f612 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/math_ops.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/math_ops.cc @@ -22,6 +22,14 @@ namespace llvm_ir { llvm::Value* EmitFastTanh(llvm::IRBuilder<>* b, llvm::Value* input) { llvm::Type* type = input->getType(); + // For small values of x, we can approximate tanh(x)=x. For extremely small + // values of x (|x| < 1e-37), the other approximation evaluates tanh(x) = 0. + const auto kCanUseApprox = 0.0004; + auto abs_x = + llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::fabs, {input}, {type}, b); + auto use_aprox = + b->CreateFCmpOLT(abs_x, llvm::ConstantFP::get(type, kCanUseApprox)); + // Clamp the input to [-9, 9]. llvm::Value* input_clamped = llvm_ir::EmitFloatMin( llvm_ir::EmitFloatMax(input, llvm::ConstantFP::get(type, -9.0), b), @@ -52,7 +60,8 @@ llvm::Value* EmitFastTanh(llvm::IRBuilder<>* b, llvm::Value* input) { llvm::ConstantFP::get(type, denominator_coeffs[i])); } - return b->CreateFDiv(numerator, denominator); + return b->CreateSelect(use_aprox, input, + b->CreateFDiv(numerator, denominator)); } } // namespace llvm_ir diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.h b/tensorflow/compiler/xla/service/logical_buffer_analysis.h index 276a157a15a..5f774bb25a6 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.h +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LOGICAL_BUFFER_ANALYSIS_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_LOGICAL_BUFFER_ANALYSIS_H_ +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc index c0db149e340..582e59349e8 100644 --- a/tensorflow/compiler/xla/service/multi_output_fusion.cc +++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc @@ -128,6 +128,8 @@ HloInstruction* MultiOutputFusion::Fuse(HloInstruction* instr1, remaining->MergeFusionInstructionIntoMultiOutput(fused); } else { remaining->FuseInstructionIntoMultiOutput(fused); + CHECK_EQ(0, fused->user_count()); + TF_CHECK_OK(computation()->RemoveInstruction(fused)); } return remaining; } @@ -223,7 +225,7 @@ bool MultiOutputFusion::LegalToFuse(HloInstruction* instr1, return false; } - // Fusing nodes with 0 user makes no sense and the rest of the implementation + // Fusing nodes with 0 users makes no sense and the rest of the implementation // doesn't support it either. if (instr1->user_count() == 0 || instr2->user_count() == 0) { return false; diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.h b/tensorflow/compiler/xla/service/reduce_precision_insertion.h index 76c6a87f176..05990afb625 100644 --- a/tensorflow/compiler/xla/service/reduce_precision_insertion.h +++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_REDUCE_PRECISION_INSERTION_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_REDUCE_PRECISION_INSERTION_H_ -#include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index 3868bbe22e4..9ff819437b3 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -708,147 +708,4 @@ bool TuplePointsToAnalysis::HasUniqueFusedUseOfOperandAt( fused_param_uses[0].first == fusion->fused_expression_root() && fused_param_uses[0].second == use_operand_index; } - -// User and operand can share buffers iff both instructions emit the same shape -// and layout, and 'user' meets one of the following qualifications: -// -// (1) Is element-wise. Or... -// (2) Is a loop fusion instruction where the only use of 'operand' at 'index' -// in the set 'user.fused_instructions' is a DynamicUpdateSlice fused root -// at operand 0. Or... -// (3) Is a kDot -> kAdd output fusion instruction where the only use of -// 'operand' at 'index' in the set 'user.fused_instructions' is a kAdd fused -// root at operand 0 or 1. Or... -// (4) The 'user' of 'operand' is DynamicUpdateSlice or While at operand index -// 0. -// (5) The 'user' of 'operand' is Sort, and it is the only user. -// (6) The 'user' of 'operand' is TriangularSolve, it is the second operand, -// and it is the only user. -// -// (2) and (3) can only be determined if points-to analysis is available. -bool TuplePointsToAnalysis::CanShareOperandBufferWithUser( - HloInstruction* operand, const ShapeIndex& operand_index, - HloInstruction* user, const ShapeIndex& user_index) const { - CHECK(user->IsUserOf(operand)) - << "user: " << user->ToString() << " operand: " << operand->ToString(); - const Shape& operand_subshape = - ShapeUtil::GetSubshape(operand->shape(), operand_index); - const Shape& user_subshape = - ShapeUtil::GetSubshape(user->shape(), user_index); - // Check that operand and user emit the same shape and layout. - if (!ShapeUtil::Equal(operand_subshape, user_subshape)) { - return false; - } - if (user->opcode() == HloOpcode::kFusion) { - if (user->IsLoopFusion() || user->IsInputFusion()) { - if (user->fused_expression_root()->opcode() == - HloOpcode::kDynamicUpdateSlice) { - // Loop fusion with kDynamicUpdateSlice fused root. - // - // Returns true iff there is exactly one use of 'operand' at shape index - // 'operand_index', and this singleton use is the fused root at operand - // index 0. - return HasUniqueFusedUseOfOperandAt(operand, operand_index, user, 0); - } else { - HloInstruction* fusion_param = - user->fused_parameter(user->operand_index(operand)); - return HloDataflowAnalysis::AreTransitiveUsesElementwiseOrTuple( - fusion_param); - } - } else if (user->IsOutputFusion() && - user->fused_expression_root()->opcode() == HloOpcode::kAdd) { - // Output fusion with kAdd fused root. - - // Check if one operand of kAdd fused root is kDot or kConvolution. - auto* add = user->fused_expression_root(); - auto add_operand_it = - absl::c_find_if(add->operands(), [&](HloInstruction* operand) { - return operand->opcode() == HloOpcode::kConvolution || - operand->opcode() == HloOpcode::kDot; - }); - if (add_operand_it == add->operands().end()) { - return false; - } - auto* matched_add_operand = *add_operand_it; - // Calculate operand index of 'add' operand which was not matched above. - const int64 other_add_operand_index = - matched_add_operand == add->operand(0) ? 1 : 0; - // Returns true iff there is exactly one use of 'operand' at shape index - // 'operand_index', and this singleton use is the fused root (at operand - // index 'other_add_operand_index'). - return HasUniqueFusedUseOfOperandAt(operand, operand_index, user, - other_add_operand_index); - } else if (user->IsCustomFusion()) { - std::vector operand_indices = user->OperandIndices(operand); - return operand_indices.size() == 1 && operand_indices[0] == 0 && - absl::c_any_of( - user->fused_instructions_computation()->instructions(), - [](const HloInstruction* hlo) { - return hlo->opcode() == HloOpcode::kScatter; - }); - } - } - if (user->opcode() == HloOpcode::kDynamicUpdateSlice || - user->opcode() == HloOpcode::kScatter || - user->opcode() == HloOpcode::kWhile) { - // We eliminated other users in BufferLiveness::live_range_strictly_before, - // so here we just need to check that the use is at operand index 0. - std::vector operand_indices = user->OperandIndices(operand); - return operand_indices.size() == 1 && operand_indices[0] == 0; - } - if (user->opcode() == HloOpcode::kSort) { - // Only valid if there are no other users. - if (operand->users().size() != 1) { - return false; - } - // If we only sort keys, the output of sort is not a tuple, so we can always - // share the buffer. - if (user->operand_count() == 1) { - return true; - } - CHECK(!user_index.empty()); - // Only share with the right tuple element buffer. - std::vector operand_indices = user->OperandIndices(operand); - return operand_indices.size() == 1 && user_index[0] == operand_indices[0]; - } - if (user->opcode() == HloOpcode::kTriangularSolve) { - // Only valid if there are no other users. - if (operand->users().size() != 1) { - return false; - } - std::vector operand_indices = user->OperandIndices(operand); - return operand_indices.size() == 1 && operand_indices[0] == 1; - } - if (user->opcode() == HloOpcode::kCall) { - // TODO(b/62548313): Remove when buffer assignment is module scoped and - // does not assign buffers to calls. - // Find called computation parameter associated with 'operand'. - const std::vector operand_indices = user->OperandIndices(operand); - if (operand_indices.size() > 1) { - return false; - } - CHECK_EQ(1, operand_indices.size()); - auto* param = user->to_apply()->parameter_instruction(operand_indices[0]); - // Get all uses of 'operand' at 'index' in called computation. - auto param_uses = GetAllUsesOfInstructionAtIndex(param, operand_index); - - // Return true iff: - // *) There exists exactly one use of 'operand' in called computation. - // *) The unique use is by the root instruction of called computation. - // (Note: we check the root of the called computation, because the - // root result buffer is required to alias with the Call result buffer). - // *) The root instruction of the called computation is element-wise on - // 'operand'. - auto* callee_root = user->to_apply()->root_instruction(); - return param_uses.size() == 1 && param_uses[0].first == callee_root && - callee_root->IsElementwiseOnOperand(param_uses[0].second); - } - // Loop fusions that contain transposing copies won't reach here as they have - // different layouts, which fails the check in the beginning of this function. - // - // Multi-output fusion will fail the check here as tuples are not considered - // an elementwise operation. - return user->IsElementwiseOnOperand(user->operand_index(operand)); -} - } // namespace xla diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index ec0f7eeea2d..cb589326ba7 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_TUPLE_POINTS_TO_ANALYSIS_H_ #include + #include #include #include @@ -265,15 +266,6 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { const ShapeIndex& index, const HloInstruction* user) const; - // Returns true if 'user' (at 'user_index') can share a buffer with its - // operand 'operand' (at 'operand_index'). Returns false otherwise. - // - // REQUIRES: 'operand' is an operand of 'user'. - bool CanShareOperandBufferWithUser(HloInstruction* operand, - const ShapeIndex& operand_index, - HloInstruction* user, - const ShapeIndex& user_index) const; - private: explicit TuplePointsToAnalysis( const HloModule* module, diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index 4f8d1b92a98..d0515fb5825 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -928,383 +928,5 @@ TEST_F(DoesNotUseOperandBufferTest, FusedDynamicUpdateSlice) { EXPECT_FALSE( points_to_analysis_->DoesNotUseOperandBuffer(tuple, {1}, fusion)); } - -class CanShareOperandBufferWithUserTest : public PointsToAnalysisTestBase {}; - -TEST_F(CanShareOperandBufferWithUserTest, ElementWiseSameShape) { - auto builder = HloComputation::Builder(TestName()); - - Shape shape = ShapeUtil::MakeShape(F32, {8}); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, shape, "param")); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kExp, param)); - auto log = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kLog, exp)); - - BuildModuleAndRunAnalysis(builder.Build()); - - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(param, {}, exp, {})); - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(exp, {}, log, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, ElementWiseDifferentShape) { - auto builder = HloComputation::Builder(TestName()); - - Shape in_shape = ShapeUtil::MakeShape(F32, {8}); - Shape out_shape = ShapeUtil::MakeShape(PRED, {8}); - auto param0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, in_shape, "param0")); - auto param1 = builder.AddInstruction( - HloInstruction::CreateParameter(1, in_shape, "param1")); - auto result = builder.AddInstruction(HloInstruction::CreateCompare( - out_shape, param0, param1, ComparisonDirection::kEq)); - - BuildModuleAndRunAnalysis(builder.Build()); - - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(param0, {}, - result, {})); - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(param1, {}, - result, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, CopyShares) { - auto builder = HloComputation::Builder(TestName()); - - Shape shape = ShapeUtil::MakeShape(F32, {8}); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, shape, "param")); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kExp, param)); - auto copy = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kCopy, exp)); - - BuildModuleAndRunAnalysis(builder.Build()); - - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(param, {}, exp, {})); - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(exp, {}, copy, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, FusedDynamicUpdateSlice) { - auto builder = HloComputation::Builder(TestName()); - - Shape data_shape = ShapeUtil::MakeShape(F32, {8}); - auto tuple = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeTupleShape({data_shape, data_shape}), "tuple")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, tuple, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, tuple, 1)); - - // Create a DynamicUpdateSlice instruction of tuple element 1. - auto starts = builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(2))); - auto update = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR1({2.f, 2.f, 2.f}))); - auto dynamic_update_slice = - builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - data_shape, gte1, update, {starts})); - builder.AddInstruction( - HloInstruction::CreateTuple({gte0, dynamic_update_slice})); - - BuildModule(builder.Build()); - auto fusion = computation_->CreateFusionInstruction( - {dynamic_update_slice, starts, update, gte1}, - HloInstruction::FusionKind::kLoop); - RunAnalysis(); - - // The fusion instruction can share with tuple element 1. - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(tuple, {0}, - fusion, {})); - EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser(tuple, {1}, - fusion, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { - auto builder = HloComputation::Builder(TestName()); - - Shape data_shape = ShapeUtil::MakeShape(F32, {8}); - Shape update_shape = ShapeUtil::MakeShape(F32, {4}); - Shape starts_shape = ShapeUtil::MakeShape(S32, {}); - auto data = builder.AddInstruction( - HloInstruction::CreateParameter(0, data_shape, "data")); - auto update = builder.AddInstruction( - HloInstruction::CreateParameter(1, update_shape, "update")); - auto starts = builder.AddInstruction( - HloInstruction::CreateParameter(2, starts_shape, "starts")); - auto dus = builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - data_shape, data, update, {starts})); - - BuildModuleAndRunAnalysis(builder.Build()); - - // The DynamicUpdateSlice instruction can share with the data operand, but not - // with update or starts. - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(data, {}, dus, {})); - EXPECT_FALSE( - points_to_analysis_->CanShareOperandBufferWithUser(update, {}, dus, {})); - EXPECT_FALSE( - points_to_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) { - const char* hlo_text = R"( - HloModule TensorFlowScatterV1 - - update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { - lhs = s32[] parameter(0) - ROOT rhs = s32[] parameter(1) - } - - ENTRY main { - operand = s32[3,3] parameter(0) - indices = s32[2] parameter(1) - updates = s32[2,3] parameter(2) - ROOT scatter = s32[3,3] scatter(operand, indices, updates), - to_apply=update_s32, - update_window_dims={1}, - inserted_window_dims={0}, - scatter_dims_to_operand_dims={0}, - index_vector_dim=1 - } - )"; - TF_ASSERT_OK_AND_ASSIGN(module_, ParseAndReturnUnverifiedModule(hlo_text)); - computation_ = module_->entry_computation(); - RunAnalysis(); - - HloInstruction* operand_param = computation_->parameter_instruction(0); - HloInstruction* indices_param = computation_->parameter_instruction(1); - HloInstruction* updates_param = computation_->parameter_instruction(2); - HloInstruction* scatter = computation_->root_instruction(); - - EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser( - operand_param, {}, scatter, {})); - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( - indices_param, {}, scatter, {})); - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( - updates_param, {}, scatter, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { - auto builder = HloComputation::Builder(TestName()); - module_ = CreateNewVerifiedModule(); - - Shape keys_shape = ShapeUtil::MakeShape(F32, {8}); - auto keys = builder.AddInstruction( - HloInstruction::CreateParameter(0, keys_shape, "keys")); - TF_ASSERT_OK_AND_ASSIGN( - auto* sort, MakeSortHlo(keys_shape, {keys}, 0, /*is_stable=*/false, - &builder, module_.get())); - - computation_ = module_->AddEntryComputation(builder.Build()); - RunAnalysis(); - - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(keys, {}, sort, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) { - auto builder = HloComputation::Builder(TestName()); - module_ = CreateNewVerifiedModule(); - - Shape keys_shape = ShapeUtil::MakeShape(F32, {8}); - Shape values_shape = ShapeUtil::MakeShape(F32, {8}); - auto keys = builder.AddInstruction( - HloInstruction::CreateParameter(0, keys_shape, "keys")); - auto values = builder.AddInstruction( - HloInstruction::CreateParameter(1, values_shape, "values")); - TF_ASSERT_OK_AND_ASSIGN( - auto* sort, - MakeSortHlo(ShapeUtil::MakeTupleShape({keys_shape, values_shape}), - {keys, values}, 0, /*is_stable=*/false, &builder, - module_.get())); - - computation_ = module_->AddEntryComputation(builder.Build()); - RunAnalysis(); - - // The buffer for the keys can be shared with the first tuple entry. - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(keys, {}, sort, {0})); - // The buffer for the values can be shared with the second tuple entry. - EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser(values, {}, - sort, {1})); - // Verify that the buffers are not shared with the "wrong" tuple entry. - EXPECT_FALSE( - points_to_analysis_->CanShareOperandBufferWithUser(keys, {}, sort, {1})); - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(values, {}, - sort, {0})); -} - -TEST_F(CanShareOperandBufferWithUserTest, FusedDotAdd) { - auto builder = HloComputation::Builder(TestName()); - Shape data_shape = ShapeUtil::MakeShape(F32, {2, 2}); - - auto a = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR2({{1.0, 0.0}, {0.0, 1.0}}))); - auto b = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR2({{2.0, 2.0}, {2.0, 2.0}}))); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - PrecisionConfig precision_config; - precision_config.mutable_operand_precision()->Resize( - /*new_size=*/2, PrecisionConfig::DEFAULT); - auto dot = builder.AddInstruction( - HloInstruction::CreateDot(data_shape, a, b, dot_dnums, precision_config)); - - auto one = builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0))); - auto add_operand = builder.AddInstruction( - HloInstruction::CreateBroadcast(data_shape, one, {1})); - - auto add = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape, HloOpcode::kAdd, dot, add_operand)); - - BuildModule(builder.Build()); - auto fusion = computation_->CreateFusionInstruction( - {add, dot}, HloInstruction::FusionKind::kOutput); - RunAnalysis(); - - // Output fused dot add should be able to share buffer with 'add_operand'. - EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser( - add_operand, {}, fusion, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, OutputFusionCantAliasOperandBuffer) { - auto builder = HloComputation::Builder(TestName()); - Shape data_shape = ShapeUtil::MakeShape(F32, {2, 2}); - - auto one = builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0))); - auto operand = builder.AddInstruction( - HloInstruction::CreateBroadcast(data_shape, one, {1})); - - auto reverse = builder.AddInstruction( - HloInstruction::CreateReverse(data_shape, operand, {0, 1})); - - auto two = builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR2({{2.0, 2.0}, {2.0, 2.0}}))); - - auto add = builder.AddInstruction( - HloInstruction::CreateBinary(data_shape, HloOpcode::kAdd, reverse, two)); - - BuildModule(builder.Build()); - auto fusion = computation_->CreateFusionInstruction( - {add, two, reverse}, HloInstruction::FusionKind::kOutput); - RunAnalysis(); - - // Output fused operand->reverse->add cannot alias operand buffer 'operand'. - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(operand, {}, - fusion, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, WhileCanShare) { - Shape data_shape = ShapeUtil::MakeShape(F32, {8}); - - auto make_cond = [&data_shape]() { - auto builder = HloComputation::Builder(TestName() + ".Cond"); - auto data = builder.AddInstruction( - HloInstruction::CreateParameter(0, data_shape, "data")); - builder.AddInstruction(HloInstruction::CreateCompare( - ShapeUtil::MakeShape(PRED, {}), data, data, ComparisonDirection::kEq)); - return builder.Build(); - }; - - auto make_body = [&data_shape]() { - auto builder = HloComputation::Builder(TestName() + ".Body"); - auto data = builder.AddInstruction( - HloInstruction::CreateParameter(0, data_shape, "data")); - builder.AddInstruction( - HloInstruction::CreateBinary(data_shape, HloOpcode::kAdd, data, data)); - return builder.Build(); - }; - - module_ = CreateNewUnverifiedModule(); - HloComputation* cond_computation = - module_->AddEmbeddedComputation(make_cond()); - HloComputation* body_computation = - module_->AddEmbeddedComputation(make_body()); - - auto builder = HloComputation::Builder(TestName()); - auto data = builder.AddInstruction( - HloInstruction::CreateParameter(0, data_shape, "data")); - auto whil = builder.AddInstruction(HloInstruction::CreateWhile( - data_shape, cond_computation, body_computation, data)); - computation_ = module_->AddEntryComputation(builder.Build()); - - RunAnalysis(); - - // The While instruction can share with the data operand. - EXPECT_TRUE( - points_to_analysis_->CanShareOperandBufferWithUser(data, {}, whil, {})); -} - -// Tests that Call can alias operand buffer if the only use of the operand -// in the called computation is an elementwise instruction. -TEST_F(CanShareOperandBufferWithUserTest, CallToComputationWithFusionRoot) { - Shape shape = ShapeUtil::MakeShape(F32, {8}); - // Build sub-computation with fusion root. - auto sub_builder = HloComputation::Builder(TestName() + "_sub"); - auto sub_param = sub_builder.AddInstruction( - HloInstruction::CreateParameter(0, shape, "sub_param")); - auto one = sub_builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0))); - auto ones = sub_builder.AddInstruction( - HloInstruction::CreateBroadcast(shape, one, {1})); - auto add = sub_builder.AddInstruction( - HloInstruction::CreateBinary(shape, HloOpcode::kAdd, sub_param, ones)); - - module_ = CreateNewUnverifiedModule(); - auto sub_computation = module_->AddEmbeddedComputation(sub_builder.Build()); - sub_computation->CreateFusionInstruction({add, ones}, - HloInstruction::FusionKind::kLoop); - - // Build entry-computation with kCall which calls 'sub_computation'. - auto builder = HloComputation::Builder(TestName()); - - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, shape, "param")); - auto reverse = - builder.AddInstruction(HloInstruction::CreateReverse(shape, param, {0})); - auto call = builder.AddInstruction( - HloInstruction::CreateCall(shape, {reverse}, sub_computation)); - computation_ = module_->AddEntryComputation(builder.Build()); - - RunAnalysis(); - - EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser(reverse, {}, - call, {})); -} - -TEST_F(CanShareOperandBufferWithUserTest, LoopFusionWithElementwiseOperand) { - Shape full_shape = ShapeUtil::MakeShape(F32, {16, 32}); - Shape broadcast_shape = ShapeUtil::MakeShape(F32, {16}); - - auto builder = HloComputation::Builder(TestName() + "_fusion"); - auto param0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, full_shape, "full")); - auto param1 = builder.AddInstruction( - HloInstruction::CreateParameter(1, broadcast_shape, "small")); - auto broadcast = builder.AddInstruction( - HloInstruction::CreateBroadcast(full_shape, param1, {0})); - auto add = builder.AddInstruction(HloInstruction::CreateBinary( - full_shape, HloOpcode::kAdd, param0, broadcast)); - - BuildModule(builder.Build()); - auto fusion = computation_->CreateFusionInstruction( - {add, broadcast}, HloInstruction::FusionKind::kLoop); - RunAnalysis(); - - EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser(param0, {}, - fusion, {})); - EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(param1, {}, - fusion, {})); -} - } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/iota_test.cc b/tensorflow/compiler/xla/tests/iota_test.cc index 0723dd4bbdf..d7d5de5b186 100644 --- a/tensorflow/compiler/xla/tests/iota_test.cc +++ b/tensorflow/compiler/xla/tests/iota_test.cc @@ -34,7 +34,7 @@ class IotaR1Test : public ClientLibraryTestBase, public ::testing::WithParamInterface> {}; -TEST_P(IotaR1Test, DoIt) { +XLA_TEST_P(IotaR1Test, DoIt) { const auto& spec = GetParam(); const auto element_type = std::get<0>(spec); const int64 num_elements = std::get<1>(spec); @@ -63,7 +63,7 @@ class IotaR2Test : public ClientLibraryTestBase, public ::testing::WithParamInterface< std::tuple> {}; -TEST_P(IotaR2Test, DoIt) { +XLA_TEST_P(IotaR2Test, DoIt) { const auto& spec = GetParam(); const auto element_type = std::get<0>(spec); const int64 num_elements = std::get<1>(spec); @@ -95,7 +95,7 @@ class IotaR3Test : public ClientLibraryTestBase, public ::testing::WithParamInterface< std::tuple> {}; -TEST_P(IotaR3Test, DoIt) { +XLA_TEST_P(IotaR3Test, DoIt) { const auto& spec = GetParam(); const auto element_type = std::get<0>(spec); const int64 num_elements = std::get<1>(spec); diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 531b76c1513..c5e1dbe7432 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -1653,5 +1653,25 @@ ENTRY %reduce-window (parameter.0: f16[81,8], parameter.1: f16[]) -> f16[82,8] { EXPECT_TRUE(RunAndCompare(hlo_string, absl::nullopt)); } +XLA_TEST_F(ReduceWindowTextTest, R4OnlyDilation) { + const string hlo_string = R"( +HloModule R4OnlyDilation +mul { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT mul = f32[] multiply(lhs, rhs) +} +ENTRY R4OnlyDilation { + operand = f32[2,2,2,2]{3,2,1,0} parameter(0) + constant = f32[] constant(1) + ROOT reduce-window = f32[3,3,3,3]{3,2,1,0} + reduce-window(operand, constant), + window={size=1x1x1x1 pad=0_0x0_0x0_0x0_0 lhs_dilate=2x2x2x2}, + to_apply=mul +} +)"; + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001})); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h index b3c8a739058..f2570ecff37 100644 --- a/tensorflow/compiler/xla/tests/test_utils.h +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -54,6 +54,29 @@ class PseudorandomGenerator { std::mt19937 generator_; }; +// Populates a floating point literal with random floating points sampled from a +// uniform-log distribution spanning approximately the entire range of the +// representable floating point. +template +void PopulateWithRandomFullRangeFloatingPointData(Literal* literal, + std::minstd_rand0* engine) { + // Generates floating points with a log-uniform distribution. This causes the + // exponent of the floating point to have a uniform distribution. + int min_exp, max_exp; + if (std::is_same()) { + min_exp = std::numeric_limits::min_exponent; + max_exp = std::numeric_limits::max_exponent; + } else { + min_exp = std::numeric_limits::min_exponent; + max_exp = std::numeric_limits::max_exponent; + } + std::uniform_real_distribution generator(min_exp - 1, max_exp - 1); + for (FloatT& value : literal->data()) { + float sign = ((*engine)() % 2 == 0) ? 1 : -1; + value = static_cast(pow(2, generator(*engine)) * sign); + } +} + // Generates fake data in a literal of the given shape, or returns an error // status if the element type is currently unhandled for fake data // generation. See below for documentation of pseudo_random. diff --git a/tensorflow/compiler/xla/tools/hlo_extractor.cc b/tensorflow/compiler/xla/tools/hlo_extractor.cc index f3ce5f99b0c..5d681f61ff6 100644 --- a/tensorflow/compiler/xla/tools/hlo_extractor.cc +++ b/tensorflow/compiler/xla/tools/hlo_extractor.cc @@ -21,6 +21,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_verifier.h" diff --git a/tensorflow/compiler/xla/tools/interactive_graphviz.cc b/tensorflow/compiler/xla/tools/interactive_graphviz.cc index 5652d303f02..01555fa3cff 100644 --- a/tensorflow/compiler/xla/tools/interactive_graphviz.cc +++ b/tensorflow/compiler/xla/tools/interactive_graphviz.cc @@ -52,7 +52,7 @@ namespace xla { namespace tools { namespace { -bool ReadLine(const char *prompt, string *line) { +bool ReadLine(const char* prompt, string* line) { #if defined(PLATFORM_GOOGLE) return util::ReadLine(prompt, line); #else @@ -628,28 +628,22 @@ void InteractiveDumpGraphs(const Options& opts, const HloModule& module) { } } -void CheckFlags(const Options &opts) { - std::vector nonempty_proto_flags; +void CheckFlags(const Options& opts) { + int nonempty_flags_amount = 0; if (!opts.hlo_proto.empty()) { - nonempty_proto_flags.push_back("--hlo_proto"); + ++nonempty_flags_amount; } if (!opts.hlo_snapshot.empty()) { - nonempty_proto_flags.push_back("--hlo_snapshot"); + ++nonempty_flags_amount; } if (!opts.hlo_text.empty()) { - nonempty_proto_flags.push_back("--hlo_text"); + ++nonempty_flags_amount; } - switch (nonempty_proto_flags.size()) { - case 1: - // We're good to go. - break; - case 0: - LOG(FATAL) << "Need one of the following options: " - << absl::StrJoin(nonempty_proto_flags, ", "); - default: - LOG(FATAL) << "Can only specify one of " - << absl::StrJoin(nonempty_proto_flags, ", "); + if (nonempty_flags_amount == 1) { + return; } + LOG(FATAL) << "Can only specify one and only one of '--hlo_proto', " + "'--hlo_snapshot', '--hlo_text' flags."; } void RealMain(const Options& opts) { @@ -726,8 +720,7 @@ int main(int argc, char** argv) { "Platform to compile for: CPU, CUDA, etc"), tensorflow::Flag("browser", &opts.browser, "Path to web browser used to display produced graphs."), - tensorflow::Flag("help", &need_help, - "Prints this help message"), + tensorflow::Flag("help", &need_help, "Prints this help message"), }; xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc index f2e18311039..4f01325111c 100644 --- a/tensorflow/compiler/xla/window_util.cc +++ b/tensorflow/compiler/xla/window_util.cc @@ -198,12 +198,6 @@ bool HasDilation(const Window& window) { return HasBaseDilation(window) || HasWindowDilation(window); } -bool IsInactiveWindowDimension(const Window& window, int64 logical_dim) { - const WindowDimension& window_dim = window.dimensions(logical_dim); - return window_dim.size() == 1 && window_dim.stride() == 1 && - window_dim.padding_low() == 0 && window_dim.padding_high() == 0; -} - bool IsTrivialWindowDimension(const WindowDimension& window_dimension) { return window_dimension.size() == 1 && window_dimension.stride() == 1 && window_dimension.padding_low() == 0 && diff --git a/tensorflow/compiler/xla/window_util.h b/tensorflow/compiler/xla/window_util.h index e7099285c34..afb7d48f63f 100644 --- a/tensorflow/compiler/xla/window_util.h +++ b/tensorflow/compiler/xla/window_util.h @@ -58,12 +58,8 @@ bool HasDilation(const Window& window); bool HasWindowReversal(const Window& window); bool AllOrNoneReversed(const Window& window); -// Returns true if the given logical dimension is inactive in the sense that it -// has window bound 1, no striding and no padding. -bool IsInactiveWindowDimension(const Window& window, int64 logical_dim); - -// Returns true if the provided window dimension is trivial (inactive and has no -// dilation) +// Returns true if the provided window dimension is trivial in the sense that it +// has window bound 1, no striding, no padding and no dilation. bool IsTrivialWindowDimension(const WindowDimension& window_dimension); // Returns the new bound after dilation. diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py index 5ffbb906708..ca1e968fae1 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py @@ -123,7 +123,7 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): learner_config.num_classes = n_classes elif learner_config.num_classes != n_classes: raise ValueError("n_classes (%d) doesn't match learner_config (%d)." % - (learner_config.num_classes, n_classes)) + (n_classes, learner_config.num_classes)) super(GradientBoostedDecisionTreeClassifier, self).__init__( model_fn=model.model_builder, params={ diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index eedc9633e2f..a386e811d3f 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import unittest import numpy as np from tensorflow.contrib.image.ops import gen_image_ops @@ -32,9 +31,10 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import googletest -_DTYPES = set( - [dtypes.uint8, dtypes.int32, dtypes.int64, - dtypes.float16, dtypes.float32, dtypes.float64]) +_DTYPES = set([ + dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float16, dtypes.float32, + dtypes.float64 +]) class ImageOpsTest(test_util.TensorFlowTestCase): @@ -49,29 +49,35 @@ class ImageOpsTest(test_util.TensorFlowTestCase): image_ops.rotate(image, angle).eval(), np.zeros(shape, dtype.as_numpy_dtype())) - # TODO(b/133773834) Re-enable these tests. - @unittest.skip("Skipping because of b/133773834.") def test_rotate_even(self): for dtype in _DTYPES: with self.cached_session(): image = array_ops.reshape( math_ops.cast(math_ops.range(36), dtype), (6, 6)) image_rep = array_ops.tile(image[None, :, :, None], [3, 1, 1, 1]) - angles = constant_op.constant([0.0, np.pi / 4.0, np.pi / 2.0], + angles = constant_op.constant([0.0, np.pi / 2.0, np.pi * 3. / 2.], dtypes.float32) image_rotated = image_ops.rotate(image_rep, angles) + # pyformat: disable self.assertAllEqual(image_rotated[:, :, :, 0].eval(), - [[[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], + [[[0, 1, 2, 3, 4, 5], + [6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29], [30, 31, 32, 33, 34, 35]], - [[0, 3, 4, 11, 17, 0], [2, 3, 9, 16, 23, 23], - [1, 8, 15, 21, 22, 29], [6, 13, 20, 21, 27, 34], - [12, 18, 19, 26, 33, 33], [0, 18, 24, 31, 32, 0]], - [[5, 11, 17, 23, 29, 35], [4, 10, 16, 22, 28, 34], - [3, 9, 15, 21, 27, 33], [2, 8, 14, 20, 26, 32], - [1, 7, 13, 19, 25, 31], [0, 6, 12, 18, 24, 30]]]) + [[5, 11, 17, 23, 29, 35], + [4, 10, 16, 22, 28, 34], + [3, 9, 15, 21, 27, 33], + [2, 8, 14, 20, 26, 32], + [1, 7, 13, 19, 25, 31], + [0, 6, 12, 18, 24, 30]], + [[30, 24, 18, 12, 6, 0], + [31, 25, 19, 13, 7, 1], + [32, 26, 20, 14, 8, 2], + [33, 27, 21, 15, 9, 3], + [34, 28, 22, 16, 10, 4], + [35, 29, 23, 17, 11, 5]]]) def test_rotate_odd(self): for dtype in _DTYPES: diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index a703dc66e92..35352b32ec8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -625,14 +625,14 @@ class DynamicRnnEstimator(estimator.Estimator): optimizer: The type of optimizer to use. Either a subclass of `Optimizer`, an instance of an `Optimizer`, a callback that returns an optimizer, or a string. Strings must be one of 'Adagrad', 'Adam', - 'Ftrl', 'Momentum', 'RMSProp' or 'SGD. See `layers.optimize_loss` for + 'Ftrl', 'Momentum', 'RMSProp' or 'SGD'. See `layers.optimize_loss` for more details. learning_rate: Learning rate. This argument has no effect if `optimizer` is an instance of an `Optimizer`. predict_probabilities: A boolean indicating whether to predict probabilities for all classes. Used only if `problem_type` is `ProblemType.CLASSIFICATION` - momentum: Momentum value. Only used if `optimizer_type` is 'Momentum'. + momentum: Momentum value. Only used if `optimizer` is 'Momentum'. gradient_clipping_norm: Parameter used for gradient clipping. If `None`, then no clipping is performed. dropout_keep_probabilities: a list of dropout probabilities or `None`. diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index f90af791aa4..43d0ef4c80d 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -909,7 +909,13 @@ $(HOST_OBJDIR)%.pb.o: $(HOST_GENDIR)%.pb.cc $(PROTO_TEXT_OBJS) : $(PROTO_TEXT_PB_H_FILES) # Ensures we link CoreFoundation as it is used for time library when building -# for Mac. +# for Mac and iOS +ifeq ($(TARGET),OSX) + ifeq ($(HOST_ARCH),x86_64) + HOST_LDOPTS += -framework CoreFoundation + LIBS += -framework CoreFoundation + endif +endif ifeq ($(TARGET),IOS) ifeq ($(IOS_ARCH),X86_64) HOST_LDOPTS += -framework CoreFoundation diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 621bbcd3576..1a92570917c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3631,11 +3631,15 @@ tf_cuda_library( srcs = [ "common_runtime/gpu/gpu_id.h", ], - hdrs = ["common_runtime/gpu/gpu_mem_allocator.h"], + hdrs = [ + "common_runtime/gpu/gpu_host_allocator.h", + "common_runtime/gpu/gpu_mem_allocator.h", + ], features = ["parse_headers"], visibility = ["//visibility:public"], deps = [ ":allocator", + ":lib", ":lib_internal", ":stream_executor", ], @@ -4336,7 +4340,7 @@ tf_cc_tests_gpu( "common_runtime/ring_reducer_test.cc", ], linkstatic = tf_kernel_tests_linkstatic(), - tags = tf_cuda_tests_tags(), + tags = ["no_cuda_on_cpu_tap"], deps = [ ":all_kernels", ":core", @@ -4365,7 +4369,7 @@ tf_cc_tests_gpu( "common_runtime/ring_gatherer_test.cc", ], linkstatic = tf_kernel_tests_linkstatic(), - tags = tf_cuda_tests_tags(), + tags = ["no_cuda_on_cpu_tap"], deps = [ ":all_kernels", ":core", @@ -4394,7 +4398,7 @@ tf_cc_tests_gpu( "common_runtime/hierarchical_tree_broadcaster_test.cc", ], linkstatic = tf_kernel_tests_linkstatic(), - tags = tf_cuda_tests_tags(), + tags = ["no_cuda_on_cpu_tap"], deps = [ ":all_kernels", ":core", diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt index e4275266020..022f950f630 100644 --- a/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt @@ -17,7 +17,7 @@ END out_arg { name: "backprops" description: < 0) + alpha * gradients * (featurs <= 0)`. +`gradients * (features > 0) + alpha * gradients * (features <= 0)`. END } summary: "Computes rectified linear gradients for a LeakyRelu operation." diff --git a/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt index 97fd39f6478..56027301fba 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt @@ -16,6 +16,6 @@ END description: < remote_device_manager, const std::vector& remote_contexts, uint64 context_id, Rendezvous* r, DeviceMgr* local_device_mgr, int keep_alive_secs, - DistributedFunctionLibraryRuntime* cluster_flr) { + DistributedFunctionLibraryRuntime* cluster_flr, + std::unique_ptr> + remote_mgr) { mutex_lock l(remote_state_mu_); if (!remote_contexts_.empty()) { @@ -673,6 +675,7 @@ Status EagerContext::InitializeRemoteMaster( } server_ = std::move(server); + remote_mgr_ = std::move(remote_mgr); worker_env_ = worker_env; worker_session_ = worker_session; remote_eager_workers_ = std::move(remote_eager_workers); @@ -749,7 +752,9 @@ Status EagerContext::InitializeRemoteWorker( std::unique_ptr remote_eager_workers, const DeviceMgr* remote_device_mgr, const std::vector& remote_contexts, uint64 context_id, - std::function rendezvous_creator) { + std::function rendezvous_creator, + std::unique_ptr> + remote_mgr) { mutex_lock l(remote_state_mu_); if (remote_device_manager_ != nullptr || server_ != nullptr || @@ -764,6 +769,7 @@ Status EagerContext::InitializeRemoteWorker( rendezvous_creator_ = std::move(rendezvous_creator); remote_eager_workers_ = std::move(remote_eager_workers); + remote_mgr_ = std::move(remote_mgr); remote_unowned_device_manager_ = remote_device_mgr; InitDeviceMapAndAsync(); @@ -773,11 +779,6 @@ Status EagerContext::InitializeRemoteWorker( return Status::OK(); } - -tensorflow::uint64 EagerContext::NextId() { - tensorflow::mutex_lock l(next_id_mutex_); - return next_id_++; -} #endif // !IS_MOBILE_PLATFORM } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h index 3d77c96d69b..e67b5805270 100644 --- a/tensorflow/core/common_runtime/eager/context.h +++ b/tensorflow/core/common_runtime/eager/context.h @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/core/util/device_name_utils.h" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/distributed_runtime/eager/eager_client.h" +#include "tensorflow/core/distributed_runtime/eager/remote_tensor_handle.h" #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h" #include "tensorflow/core/distributed_runtime/server_lib.h" #include "tensorflow/core/distributed_runtime/worker_cache.h" @@ -64,6 +65,13 @@ limitations under the License. namespace tensorflow { +namespace eager { +// We need this forward declaration because we have circular dependency: +// Context -> RemoteMgr -> TensorHandle -> Context. +// TODO(fishx): Remove this once we remove Context dependency in TensorHandle. +class RemoteMgr; +} // namespace eager + // LINT.IfChange // Note: Keep in sync with exported copy of enum in eager/c_api.h. enum ContextDevicePlacementPolicy { @@ -173,8 +181,8 @@ class EagerContext : public core::RefCounted { GraphCollector* GetGraphCollector() { return &graph_collector_; } - void ExecutorAdd(std::unique_ptr node) { - executor_.Add(std::move(node)); + Status ExecutorAdd(std::unique_ptr node) { + return executor_.Add(std::move(node)); } Status AddFunctionDef(const FunctionDef& fdef); @@ -280,7 +288,9 @@ class EagerContext : public core::RefCounted { std::unique_ptr remote_device_manager, const std::vector& remote_contexts, uint64 context_id, Rendezvous* r, DeviceMgr* local_device_mgr, int keep_alive_secs, - DistributedFunctionLibraryRuntime* cluster_flr); + DistributedFunctionLibraryRuntime* cluster_flr, + std::unique_ptr> + remote_mgr); // Similar with InitializeRemoteMaster but this context will not kill remote // contexts in shutdown. @@ -288,20 +298,25 @@ class EagerContext : public core::RefCounted { std::unique_ptr remote_eager_workers, const DeviceMgr* remote_device_mgr, const std::vector& remote_contexts, uint64 context_id, - std::function rendezvous_creator); + std::function rendezvous_creator, + std::unique_ptr> + remote_mgr); Status StoreCollectiveOpsServer( std::unique_ptr server, DeviceMgr* device_mgr, CollectiveExecutorMgrInterface* rpc_collective_executor_mgr); + // TODO(fishx): Remove the custom deleter once we remove forward declaration. + const std::unique_ptr>& + RemoteMgr() { + return remote_mgr_; + } + // If true, then tensors should be shipped across processes via the // EagerService.SendTensor RPC. If false, _Send/_Recv ops should be used // instead (which in-turn use WorkerService.RecvTensor RPCs). bool UseSendTensorRPC() { return use_send_tensor_rpc_; } - - // Helper function to create monotonically increasing ids unique to this - // context. - uint64 NextId(); #endif // IS_MOBILE_PLATFORM bool PinSmallOpsToCPU() { return pin_small_ops_to_cpu_; } @@ -432,8 +447,8 @@ class EagerContext : public core::RefCounted { condition_variable keep_alive_thread_cv_; bool shutting_down_ GUARDED_BY(keep_alive_thread_shutdown_mu_) = false; - mutex next_id_mutex_; - uint64 next_id_ GUARDED_BY(next_id_mutex_) = 1; + std::unique_ptr> + remote_mgr_; #endif // IS_MOBILE_PLATFORM bool use_send_tensor_rpc_; diff --git a/tensorflow/core/common_runtime/eager/eager_executor.cc b/tensorflow/core/common_runtime/eager/eager_executor.cc index 6e1ae00130c..ae3369dfbc0 100644 --- a/tensorflow/core/common_runtime/eager/eager_executor.cc +++ b/tensorflow/core/common_runtime/eager/eager_executor.cc @@ -32,21 +32,32 @@ void EagerExecutor::EnableAsync() { } } -void EagerExecutor::Add(std::unique_ptr node) { - tensorflow::mutex_lock l(node_queue_mutex_); - DCHECK(thread_) << "EnableAsync should have been called before Add"; - if (!status_.ok()) { - // node will be automatically deleted - return; +Status EagerExecutor::Add(std::unique_ptr node) { + Status status; + + // If we are unable to add the node to the queue, we must call Abort. However, + // we want to do that outside of the scope of the lock since the Abort may + // try to call EagerExecutor::Add() + { + tensorflow::mutex_lock l(node_queue_mutex_); + DCHECK(thread_) << "EnableAsync should have been called before Add"; + status = status_; + if (status.ok()) { + node_queue_.push(std::move(node)); + + // If there were no previous nodes pending, wake the run thread to start + // processing requests again. + if (node_queue_.size() == 1) { + nodes_pending_.notify_all(); + } + + return Status::OK(); + } } - node_queue_.push(std::move(node)); - - // If there were no previous nodes pending, wake the run thread to start - // processing requests again. - if (node_queue_.size() == 1) { - nodes_pending_.notify_all(); - } + // Node needs to be aborted since it was not added to the queue + node->Abort(status); + return status; } tensorflow::Status EagerExecutor::WaitForAllPendingNodes() { diff --git a/tensorflow/core/common_runtime/eager/eager_executor.h b/tensorflow/core/common_runtime/eager/eager_executor.h index aea09ec1bb1..9a5aee313b6 100644 --- a/tensorflow/core/common_runtime/eager/eager_executor.h +++ b/tensorflow/core/common_runtime/eager/eager_executor.h @@ -42,6 +42,10 @@ namespace tensorflow { class EagerNode { public: EagerNode() {} + // Nodes should not do any work in their destructor. This is because if the + // node is being destructed by the EagerExecutor, then the node queue lock may + // be held. Instead opt for calling clean-up code as part of Run() or Abort(), + // since one of those are guaranteed to be run. virtual ~EagerNode() {} // Runs the computation corresponding to this node and blocks till the @@ -74,8 +78,7 @@ class EagerExecutor { void EnableAsync(); // Schedules `node` for execution. - // Note that Add must be called in monotonically increasing order of node->id. - void Add(std::unique_ptr node); + Status Add(std::unique_ptr node); // Blocks till all currently pending ops are done. Status WaitForAllPendingNodes(); diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index a206d4068e8..d39c7af8727 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/util/device_name_utils.h" #if !defined(IS_MOBILE_PLATFORM) +#include "tensorflow/core/distributed_runtime/eager/remote_mgr.h" #include "tensorflow/core/distributed_runtime/eager/eager_client.h" #include "tensorflow/core/distributed_runtime/eager/remote_execute_node.h" #endif // IS_MOBILE_PLATFORM @@ -214,6 +215,8 @@ Status ValidateInputTypeAndPlacement( EagerContext* ctx, EagerOperation* op, const core::RefCountPtr& kernel, RunMetadata* run_metadata) { + profiler::TraceMe activity("ValidateInputTypeAndPlacement", + profiler::TraceMeLevel::kInfo); if (kernel->num_inputs() != op->Inputs().size()) { return errors::InvalidArgument("expected ", kernel->num_inputs(), " inputs, got ", op->Inputs().size()); @@ -463,8 +466,7 @@ Status ShouldCompileWithXLA(const EagerOperation* op, const EagerContext* ctx, // runtime. In this case, we don't select a device because running // a function with explicitly requested device has different behavior than // running without an explicitly requested device. -Status EagerLocalExecute(EagerOperation* op, - gtl::InlinedVector* retvals, +Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, int* num_retvals) { profiler::TraceMe activity( [&] { return absl::StrCat("EagerLocalExecute: ", op->Name()); }, @@ -487,6 +489,8 @@ Status EagerLocalExecute(EagerOperation* op, std::unordered_map input_resource_variable_dtypes_and_shapes; if (is_multi_device_function) { + profiler::TraceMe activity("EagerCopyToDeviceAndAddCacheKey", + profiler::TraceMeLevel::kInfo); input_dev_ptrs.reserve(op->Inputs().size()); // All inputs need to be on local devices. // TODO(b/122851476): This is a limitation of the current code base (but @@ -628,13 +632,13 @@ Status EagerLocalExecute(EagerOperation* op, ctx->AddKernelToCache(cache_key, kernel.get()); } const DataTypeVector& output_dtypes = kernel->output_dtypes(); - const int output_dtypes_size = static_cast(output_dtypes.size()); - if (output_dtypes_size > *num_retvals) { - return errors::InvalidArgument("Expecting ", output_dtypes.size(), + const size_t num_outputs = static_cast(output_dtypes.size()); + if (num_outputs > *num_retvals) { + return errors::InvalidArgument("Expecting ", num_outputs, " outputs, but *num_retvals is ", *num_retvals); } - *num_retvals = output_dtypes_size; + *num_retvals = num_outputs; TF_RETURN_IF_ERROR(ValidateInputTypeAndPlacement( ctx, op, kernel, ctx->ShouldStoreStepStats() ? ctx->RunMetadataProto() : nullptr)); @@ -658,32 +662,33 @@ Status EagerLocalExecute(EagerOperation* op, maybe_stats->set_scheduled_nanos(now_nanos); // TODO(apassos) track referenced tensors } - retvals->resize(*num_retvals); - if (ctx->Async()) { - // Note that for async mode, execution order will make sure that all - // input handles are ready before executing them. - // TODO(agarwal): Consider executing "cheap" kernels inline for - // performance. - for (int i = 0; i < *num_retvals; ++i) { - TF_RETURN_IF_ERROR(TensorHandle::CreateAsyncLocalHandle( - /* d= */ kernel->OutputDevice(i), - /* op_device= */ kernel->device(), - /* resource_device= */ kernel->OutputResourceDevice(i), - output_dtypes[i], ctx, &(*retvals)[i])); - } - std::unique_ptr node(new ExecuteNode( - ctx, op->Inputs(), std::move(kernel), maybe_stats.release(), - maybe_step_stats, graph_collector, output_dtypes, *retvals)); - ctx->ExecutorAdd(std::move(node)); - return Status::OK(); - } else { - // Execute checks if retvals[i] is nullptr or not to figure if it needs to - // allocate it. - return EagerKernelExecute(ctx, op->Inputs(), kernel.get(), - maybe_stats.get(), maybe_step_stats, - graph_collector, retvals->data(), *num_retvals); + for (int i = 0; i < num_outputs; ++i) { + TF_RETURN_IF_ERROR(TensorHandle::CreateAsyncLocalHandle( + /* d= */ kernel->OutputDevice(i), + /* op_device= */ kernel->device(), + /* resource_device= */ kernel->OutputResourceDevice(i), + output_dtypes[i], ctx, &retvals[i])); } + + std::unique_ptr node( + new ExecuteNode(ctx, op->Inputs(), std::move(kernel), + maybe_stats.release(), maybe_step_stats, graph_collector, + output_dtypes, {retvals, num_outputs})); + // Note that for async mode, execution order will make sure that all + // input handles are ready before executing them. + // TODO(b/137118203): Consider executing "cheap" kernels inline for + // performance. + Status s = ctx->Async() ? ctx->ExecutorAdd(std::move(node)) : node->Run(); + // Since the operation failed, we need to Unref any outputs that were + // allocated. + if (!s.ok()) { + for (int i = 0; i < num_outputs; ++i) { + retvals[i]->Unref(); + } + } + + return s; } #if !defined(IS_MOBILE_PLATFORM) @@ -706,7 +711,7 @@ Status EagerRemoteSendTensor(EagerContext* ctx, TensorHandle* h, eager::SendTensorResponse response; request.set_context_id(context_id); - request.set_op_id(ctx->NextId()); + request.set_op_id(ctx->RemoteMgr()->NextOpId()); request.set_device_name(recv_device->name()); // AsProtoTensorContent doesn't work when the tensor is on the GPU, hence @@ -745,38 +750,10 @@ Status EagerRemoteSendTensor(EagerContext* ctx, TensorHandle* h, return status; } -Status AddRemoteInput(eager::Operation* remote_op, TensorHandle* input, - Device* input_device) { - tensorflow::int64 op_id; - int32 output_num; - TF_RETURN_IF_ERROR(input->RemoteAddress(input_device, &op_id, &output_num)); - - auto* remote_op_input = remote_op->add_inputs(); - remote_op_input->set_op_id(op_id); - remote_op_input->set_output_num(output_num); - - return Status::OK(); -} - -Status EnqueueAndWait(eager::EagerClient* eager_client, - const std::unique_ptr& request, - eager::EnqueueResponse* response) { - Notification n; - Status status; - eager_client->EnqueueAsync(request.get(), response, - [&n, &status](const Status& s) { - status = s; - n.Notify(); - }); - n.WaitForNotification(); - - return status; -} - void PrepareRemoteOp(eager::Operation* remote_op, EagerOperation* op) { EagerContext* ctx = op->EagerContext(); - remote_op->set_id(ctx->NextId()); + remote_op->set_id(ctx->RemoteMgr()->NextOpId()); remote_op->set_name(op->Name()); op->Attrs().FillAttrValueMap(remote_op->mutable_attrs()); @@ -801,40 +778,43 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, TF_RETURN_IF_ERROR(ctx->GetClient(op->GetDeviceName(), &eager_client)); std::unique_ptr request(new eager::EnqueueRequest); - eager::EnqueueResponse response; - request->set_context_id(context_id); eager::Operation* remote_op = request->add_queue()->mutable_operation(); - for (int i = 0; i < op->Inputs().size(); i++) { - tensorflow::TensorHandle* input = op->Inputs()[i]; - tensorflow::Device* input_device = input->device(); - if (op->Device() != input_device && - // If the expected and actual devices are on the same task, don't - // explicitly copy, and instead depend on the copy to happen locally - // when the op is executed on the device. - !ctx->OnSameTask(op->Device(), input_device)) { - tensorflow::Device* remote_cpu_device; - TF_RETURN_IF_ERROR( - ctx->CPUDeviceOnTask(op->Device(), &remote_cpu_device)); - // TODO(b/110044833): It's possible the same tensor gets copied to the - // remote device repeatedly. - // Always copy to the remote CPU so that the actual device can be - // correctly determined after the kernel is selected/instantiated, since - // the op might have its inputs on host memory. - TensorHandle* handle = nullptr; - TF_RETURN_IF_ERROR( - MaybeCopyInputToExpectedDevice(op, op->Device(), i, remote_cpu_device, - /* run_metadata= */ nullptr, &handle)); - op->UpdateInput(i, handle); - input = handle; - input_device = remote_cpu_device; - // Unref handle since it has a ref as an input now - handle->Unref(); - } + { + profiler::TraceMe activity("CopyInputToExpectedDevice", + profiler::TraceMeLevel::kInfo); + for (int i = 0; i < op->Inputs().size(); i++) { + tensorflow::TensorHandle* input = op->Inputs()[i]; + tensorflow::Device* input_device = input->device(); + if (op->Device() != input_device && + // If the expected and actual devices are on the same task, don't + // explicitly copy, and instead depend on the copy to happen locally + // when the op is executed on the device. + !ctx->OnSameTask(op->Device(), input_device)) { + tensorflow::Device* remote_cpu_device; + TF_RETURN_IF_ERROR( + ctx->CPUDeviceOnTask(op->Device(), &remote_cpu_device)); + // TODO(b/110044833): It's possible the same tensor gets copied to the + // remote device repeatedly. + // Always copy to the remote CPU so that the actual device can be + // correctly determined after the kernel is selected/instantiated, since + // the op might have its inputs on host memory. + TensorHandle* handle = nullptr; + TF_RETURN_IF_ERROR(MaybeCopyInputToExpectedDevice( + op, op->Device(), i, remote_cpu_device, + /* run_metadata= */ nullptr, &handle)); + op->UpdateInput(i, handle); + input = handle; + input_device = remote_cpu_device; + // Unref handle since it has a ref as an input now + handle->Unref(); + } - TF_RETURN_IF_ERROR(AddRemoteInput(remote_op, input, input_device)); + TF_RETURN_IF_ERROR(ctx->RemoteMgr()->SerializeRemoteTensorHandle( + input, remote_op->add_inputs(), input_device)); + } } PrepareRemoteOp(remote_op, op); @@ -842,10 +822,12 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, DataTypeVector output_dtypes; TF_RETURN_IF_ERROR(GetOutputDTypes(op, &output_dtypes)); - if (*num_retvals != output_dtypes.size()) { + const size_t num_outputs = static_cast(output_dtypes.size()); + if (num_outputs != *num_retvals) { return errors::InvalidArgument( "num_retvals does not match expected output dtypes"); } + *num_retvals = num_outputs; tensorflow::Device* op_device = op->Device(); @@ -854,42 +836,36 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, << " (is async?: " << is_async << ")."; const tensorflow::uint64 id = remote_op->id(); - if (is_async) { - for (int i = 0; i < *num_retvals; i++) { - // TODO(nareshmodi): Change the callback to instead add the decref to a - // list of pending decrefs that we can send as a batch with the next - // execute. + for (int i = 0; i < num_outputs; ++i) { + // TODO(nareshmodi): Change the callback to instead add the decref to a + // list of pending decrefs that we can send as a batch with the next + // execute. - // The device_ and resource_device_ of this TensorHandle might be - // incorrect. It is pretty hard to make it correct because for - // multi-device functions, we don't know the output device until the - // function is instantiated. Luckily, we don't need to know the correct - // remote device here. We just need to know that it is remote. If we need - // to copy this tensor to this process, the remote end will know the - // correct device of this handle. - TF_RETURN_IF_ERROR(TensorHandle::CreateUnshapedRemoteHandle( - id, i, eager_client, context_id, output_dtypes[i], op_device, - output_dtypes[i] == DT_RESOURCE ? op_device : nullptr, ctx, - &retvals[i])); - } + // The device_ and resource_device_ of this TensorHandle might be + // incorrect. It is pretty hard to make it correct because for + // multi-device functions, we don't know the output device until the + // function is instantiated. Luckily, we don't need to know the correct + // remote device here. We just need to know that it is remote. If we need + // to copy this tensor to this process, the remote end will know the + // correct device of this handle. + TF_RETURN_IF_ERROR(TensorHandle::CreateUnshapedRemoteHandle( + id, i, eager_client, context_id, output_dtypes[i], op_device, ctx, + &retvals[i])); + } - // TODO(gjn): If the retval TensorHandle is simply going to be used as a - // mirror then there should be no need to call SetRemoteShape - std::unique_ptr node(new eager::RemoteExecuteNode( - std::move(request), eager_client, op->Inputs(), retvals, *num_retvals)); - ctx->ExecutorAdd(std::move(node)); - } else { - TF_RETURN_IF_ERROR(EnqueueAndWait(eager_client, request, &response)); - - for (int i = 0; i < *num_retvals; i++) { - TF_RETURN_IF_ERROR(TensorHandle::CreateRemoteHandle( - id, i, response.queue_response(0).shape(i), eager_client, context_id, - output_dtypes[i], op_device, - output_dtypes[i] == DT_RESOURCE ? op_device : nullptr, ctx, - &retvals[i])); + std::unique_ptr node( + new eager::RemoteExecuteNode(std::move(request), op_device, eager_client, + op->Inputs(), {retvals, num_outputs})); + Status s = is_async ? ctx->ExecutorAdd(std::move(node)) : node->Run(); + // Since the operation failed, we need to Unref any outputs that were + // allocated. + if (!s.ok()) { + for (int i = 0; i < num_outputs; ++i) { + retvals[i]->Unref(); } } - return Status::OK(); + + return s; } #endif // IS_MOBILE_PLATFORM @@ -1019,10 +995,9 @@ Status EagerExecute(EagerOperation* op, if (op_is_local) { if (out_op) { - return EagerLocalExecute(out_op.get(), retvals, num_retvals); - } else { - return EagerLocalExecute(op, retvals, num_retvals); + op = out_op.get(); } + return EagerLocalExecute(op, retvals->data(), num_retvals); } if (op->EagerContext()->LogDevicePlacement() || VLOG_IS_ON(1)) { @@ -1039,19 +1014,20 @@ Status EagerExecute(EagerOperation* op, "Eager's remote execution is not available on mobile devices."); #else // !IS_MOBILE_PLATFORM if (out_op) { - return EagerRemoteExecute(out_op.get(), retvals->data(), num_retvals); - } else { - return EagerRemoteExecute(op, retvals->data(), num_retvals); + op = out_op.get(); } + return EagerRemoteExecute(op, retvals->data(), num_retvals); #endif // !IS_MOBILE_PLATFORM } +// TODO(gjn): Consider moving into ExecuteNode class Status EagerKernelExecute(EagerContext* ctx, const gtl::InlinedVector& op_inputs, - KernelAndDevice* kernel, NodeExecStats* maybe_stats, + const core::RefCountPtr& kernel, + NodeExecStats* maybe_stats, StepStats* maybe_step_stats, GraphCollector* graph_collector, - TensorHandle** retvals, int num_retvals) { + absl::Span retvals) { profiler::TraceMe activity("EagerKernelExecute", profiler::TraceMeLevel::kInfo); std::vector outputs(1); @@ -1154,21 +1130,12 @@ Status EagerKernelExecute(EagerContext* ctx, } } } - DCHECK_EQ(num_retvals, outputs.size()); - for (int i = 0; i < num_retvals; ++i) { - if (retvals[i] == nullptr) { - TF_RETURN_IF_ERROR(TensorHandle::CreateLocalHandle( - outputs[i], /* d= */ kernel->OutputDevice(i), - /* op_device= */ kernel->device(), ctx, &retvals[i])); - } else { - // In the async case, the retval is not a nullptr, and its device is - // already set since all TensorHandles always have their device set - // (potentially to nullptr) during construction. - DCHECK_EQ(kernel->device(), retvals[i]->op_device()); - DCHECK_EQ(kernel->OutputDevice(i), retvals[i]->device()); + DCHECK_EQ(retvals.size(), outputs.size()); + for (int i = 0; i < retvals.size(); ++i) { + DCHECK_EQ(kernel->device(), retvals[i]->op_device()); + DCHECK_EQ(kernel->OutputDevice(i), retvals[i]->device()); - TF_RETURN_IF_ERROR(retvals[i]->SetTensor(outputs[i])); - } + TF_RETURN_IF_ERROR(retvals[i]->SetTensor(outputs[i])); } return Status::OK(); } @@ -1178,23 +1145,21 @@ namespace { Status LocalEagerCopyToDevice(TensorHandle* h, EagerContext* ctx, Device* dstd, TensorHandle** result) { TF_RETURN_IF_ERROR(ctx->GetStatus()); - if (ctx->Async()) { - TF_RETURN_IF_ERROR(TensorHandle::CreateAsyncLocalHandle( - dstd, dstd, nullptr, h->dtype, ctx, result)); - // Note that `h` may not be currently ready. However execution order will - // make sure that `h` is ready before the copy is actually done. - std::unique_ptr node( - new CopyToDeviceNode(h, *result, dstd, ctx)); - // Note that calling Add makes `node` accessible by the EagerExecutor - // thread. So further accesses need to be thread-safe. - ctx->ExecutorAdd(std::move(node)); - return Status::OK(); - } else { - tensorflow::Tensor tensor; - TF_RETURN_IF_ERROR(h->CopyToDevice(ctx, dstd, &tensor)); - return TensorHandle::CreateLocalHandle(tensor, dstd, ctx, result); - return Status::OK(); + Device* resource_device = (h->dtype == DT_RESOURCE) ? dstd : nullptr; + TF_RETURN_IF_ERROR(TensorHandle::CreateAsyncLocalHandle( + dstd, dstd, resource_device, h->dtype, ctx, result)); + + // Note that `h` may not be currently ready. However execution order will + // make sure that `h` is ready before the copy is actually done. + std::unique_ptr node(new CopyToDeviceNode(h, *result, dstd, ctx)); + Status s = ctx->Async() ? ctx->ExecutorAdd(std::move(node)) : node->Run(); + // Since the operation failed, we need to Unref any outputs that were + // allocated. + if (!s.ok()) { + (*result)->Unref(); } + + return s; } #if !defined(IS_MOBILE_PLATFORM) @@ -1259,25 +1224,23 @@ Status ExecuteSend(EagerContext* ctx, Device* device, TensorHandle* h, } else { eager::EagerClient* eager_client; uint64 context_id = ctx->GetContextId(); - TF_RETURN_IF_ERROR( - ctx->GetClient(device, &eager_client)); + TF_RETURN_IF_ERROR(ctx->GetClient(device, &eager_client)); std::unique_ptr request(new eager::EnqueueRequest); - eager::EnqueueResponse response; - request->set_context_id(context_id); auto* remote_op = request->add_queue()->mutable_operation(); - TF_RETURN_IF_ERROR(AddRemoteInput(remote_op, h, h->device())); + TF_RETURN_IF_ERROR(ctx->RemoteMgr()->SerializeRemoteTensorHandle( + h, remote_op->add_inputs(), h->device())); PrepareRemoteOp(remote_op, &op); + std::unique_ptr node(new eager::RemoteExecuteNode( + std::move(request), nullptr, eager_client, op.Inputs(), {nullptr, 0})); if (ctx->Async()) { - std::unique_ptr node(new eager::RemoteExecuteNode( - std::move(request), eager_client, op.Inputs(), nullptr, 0)); - ctx->ExecutorAdd(std::move(node)); + TF_RETURN_IF_ERROR(ctx->ExecutorAdd(std::move(node))); } else { - TF_RETURN_IF_ERROR(EnqueueAndWait(eager_client, request, &response)); + TF_RETURN_IF_ERROR(node->Run()); } } @@ -1328,8 +1291,7 @@ Status ExecuteRecv(EagerContext* ctx, Device* device, DataType dtype, } else { eager::EagerClient* eager_client; uint64 context_id = ctx->GetContextId(); - TF_RETURN_IF_ERROR( - ctx->GetClient(device, &eager_client)); + TF_RETURN_IF_ERROR(ctx->GetClient(device, &eager_client)); std::unique_ptr request(new eager::EnqueueRequest); eager::EnqueueResponse response; @@ -1340,30 +1302,24 @@ Status ExecuteRecv(EagerContext* ctx, Device* device, DataType dtype, PrepareRemoteOp(remote_op, &op); const uint64 id = remote_op->id(); - if (ctx->Async()) { - TF_RETURN_IF_ERROR(TensorHandle::CreateUnshapedRemoteHandle( - id, 0, eager_client, context_id, dtype, device, - dtype == DT_RESOURCE ? device : nullptr, ctx, result)); - - std::unique_ptr node(new eager::RemoteExecuteNode( - std::move(request), eager_client, op.Inputs(), result, 1)); - ctx->ExecutorAdd(std::move(node)); + auto tensor_handle_data = absl::make_unique( + id, 0, eager_client, context_id, ctx); + if (mirror_dst != nullptr) { + TF_RETURN_IF_ERROR(mirror_dst->AddUnshapedRemoteMirror( + std::move(tensor_handle_data), device)); + mirror_dst->Ref(); + *result = mirror_dst; } else { - TF_RETURN_IF_ERROR(EnqueueAndWait(eager_client, request, &response)); + TF_RETURN_IF_ERROR(TensorHandle::CreateUnshapedRemoteHandle( + std::move(tensor_handle_data), dtype, device, ctx, result)); + } - auto tensor_handle_data = absl::make_unique( - id, 0, response.queue_response(0).shape(0), eager_client, context_id, - ctx); - if (mirror_dst != nullptr) { - TF_RETURN_IF_ERROR( - mirror_dst->AddRemoteMirror(std::move(tensor_handle_data), device)); - mirror_dst->Ref(); - *result = mirror_dst; - } else { - TF_RETURN_IF_ERROR(TensorHandle::CreateRemoteHandle( - std::move(tensor_handle_data), dtype, device, - dtype == DT_RESOURCE ? device : nullptr, ctx, result)); - } + std::unique_ptr node(new eager::RemoteExecuteNode( + std::move(request), device, eager_client, op.Inputs(), {result, 1})); + if (ctx->Async()) { + TF_RETURN_IF_ERROR(ctx->ExecutorAdd(std::move(node))); + } else { + TF_RETURN_IF_ERROR(node->Run()); } } @@ -1385,8 +1341,6 @@ string GetUniqueWireID() { Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, Device* device, bool mirror, TensorHandle** result) { - profiler::TraceMe activity("EagerCopyToDevice", - profiler::TraceMeLevel::kInfo); Device* send_device = h->DeviceOrHostCPU(ctx); bool sender_is_local = ctx->IsLocal(send_device); diff --git a/tensorflow/core/common_runtime/eager/execute.h b/tensorflow/core/common_runtime/eager/execute.h index d6fba09ed79..0e7e1641adf 100644 --- a/tensorflow/core/common_runtime/eager/execute.h +++ b/tensorflow/core/common_runtime/eager/execute.h @@ -15,6 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_H_ #define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_H_ +#include "absl/types/span.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_operation.h" @@ -45,10 +46,11 @@ Status EagerExecute( // `kernel->device()`, with the inputs op_inputs, in the context 'ctx'. Status EagerKernelExecute(EagerContext* ctx, const gtl::InlinedVector& op_inputs, - KernelAndDevice* kernel, NodeExecStats* maybe_stats, + const core::RefCountPtr& kernel, + NodeExecStats* maybe_stats, StepStats* maybe_step_stats, GraphCollector* graph_collector, - TensorHandle** retvals, int num_retvals); + absl::Span retvals); // Low-level utility to copy a tensor handle from one device to another. If // successful, result TensorHandle will be populated. If the caller requests for diff --git a/tensorflow/core/common_runtime/eager/execute_node.h b/tensorflow/core/common_runtime/eager/execute_node.h index 3bbd291488a..0e6e5241a56 100644 --- a/tensorflow/core/common_runtime/eager/execute_node.h +++ b/tensorflow/core/common_runtime/eager/execute_node.h @@ -15,6 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_NODE_H_ #define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_NODE_H_ +#include "absl/types/span.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" @@ -32,70 +33,75 @@ namespace tensorflow { class ExecuteNode : public EagerNode { public: ExecuteNode(EagerContext* ctx, - const tensorflow::gtl::InlinedVector& inputs, + const gtl::InlinedVector& inputs, core::RefCountPtr kernel, NodeExecStats* maybe_stats, StepStats* maybe_step_stats, GraphCollector* graph_collector, const DataTypeVector& output_dtypes, - const tensorflow::gtl::InlinedVector& retvals) + absl::Span retvals) : EagerNode(), ctx_(ctx), inputs_(inputs), kernel_(std::move(kernel)), maybe_stats_(maybe_stats), maybe_step_stats_(maybe_step_stats), - graph_collector_(graph_collector), - retvals_(retvals) { - for (auto handle : inputs_) { + graph_collector_(graph_collector) { + // Copy the output handles, since the container for them might get + // destroyed. + for (auto handle : retvals) { handle->Ref(); + retvals_.push_back(handle); } - for (auto handle : retvals_) { - handle->Ref(); - } - } - ~ExecuteNode() override { + // This is required to ensure that the tensor handles stay alive across the + // execution. for (auto handle : inputs_) { - handle->Unref(); - } - for (auto handle : retvals_) { - handle->Unref(); + handle->Ref(); } } Status Run() override { const Status status = EagerKernelExecute( - ctx_, inputs_, kernel_.get(), maybe_stats_.get(), maybe_step_stats_, - graph_collector_, retvals_.begin(), retvals_.size()); - if (status.ok()) { - // If status is ok, EagerKernelExecute would have called SetTensor on - // all the output handles. + ctx_, inputs_, kernel_, maybe_stats_.get(), maybe_step_stats_, + graph_collector_, absl::MakeSpan(retvals_)); + if (!status.ok()) { + Abort(status); return status; - } else { - Status s = - Status(status.code(), - strings::StrCat("Got error, \"", status.error_message(), - "\" while executing kernel ", - kernel_->kernel()->def().DebugString())); - Abort(s); - return s; } + + // If status is ok, EagerKernelExecute would have called SetTensor on + // all the output handles. + + for (auto handle : retvals_) { + handle->Unref(); + } + + for (auto handle : inputs_) { + handle->Unref(); + } + + return status; } void Abort(Status status) override { for (auto handle : retvals_) { handle->Poison(status); + handle->Unref(); + } + + for (auto handle : inputs_) { + handle->Unref(); } } private: - tensorflow::EagerContext* ctx_; - tensorflow::gtl::InlinedVector inputs_; - core::RefCountPtr kernel_; + EagerContext* ctx_; + gtl::InlinedVector inputs_; + core::RefCountPtr kernel_; std::unique_ptr maybe_stats_; StepStats* maybe_step_stats_; - tensorflow::GraphCollector* graph_collector_; - tensorflow::gtl::InlinedVector retvals_; + GraphCollector* graph_collector_; + gtl::InlinedVector retvals_; }; } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index a9d0774fa0f..77271d257fd 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -45,6 +45,18 @@ limitations under the License. namespace tensorflow { +std::function)>* KernelAndDevice::get_runner() + const { + if (runner_) { + return runner_; + } else { + static auto* default_runner = + new std::function)>( + [](std::function f) { f(); }); + return default_runner; + } +} + KernelAndDeviceFunc::~KernelAndDeviceFunc() { if (handle_ != kInvalidHandle) { Status status = pflr_->ReleaseHandle(handle_); @@ -274,7 +286,7 @@ Status KernelAndDeviceOp::Run(ScopedStepContainer* step_container, params.stats_collector = step_stats_collector.get(); params.graph_collector = graph_collector; } - params.runner = runner_ != nullptr ? runner_ : &default_runner_; + params.runner = get_runner(); params.step_container = step_container; params.collective_executor = @@ -360,7 +372,7 @@ Status KernelAndDeviceFunc::Run( step_stats_collector.reset(new StepStatsCollector(step_stats)); } opts.stats_collector = step_stats_collector.get(); - opts.runner = (runner_ == nullptr) ? &default_runner_ : runner_; + opts.runner = get_runner(); Notification done; Status status; diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h index 876594e27b1..edec07dec86 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.h +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h @@ -70,9 +70,8 @@ class KernelAndDevice : public core::RefCounted { : device_(flr == nullptr ? nullptr : flr->device()), host_cpu_device_(host_cpu_device), flr_(flr), - runner_(runner), - default_runner_([](std::function f) { f(); }), - collective_executor_(std::move(collective_executor)) {} + collective_executor_(std::move(collective_executor)), + runner_(runner) {} // Not thread safe. virtual ~KernelAndDevice() {} @@ -114,6 +113,8 @@ class KernelAndDevice : public core::RefCounted { virtual const string& name() const = 0; protected: + std::function)>* get_runner() const; + // TODO(apassos) Consider a shared cancellation manager. Note that this // cancellation manager is not useful to actually cancel anything, and is // provided here only for the few kernels which can't handle one being @@ -122,9 +123,10 @@ class KernelAndDevice : public core::RefCounted { Device* const device_; // can be null Device* const host_cpu_device_; // non-null FunctionLibraryRuntime* const flr_; // can be null - std::function)>* const runner_; - std::function)> default_runner_; const std::unique_ptr collective_executor_; + + private: + std::function)>* const runner_; // can be null }; // Represents an op kernel and the device it will be run on. diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index a6a2feef60a..8f68ee4bb99 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -209,25 +209,29 @@ TensorHandle::TensorHandle(std::unique_ptr t, } Status TensorHandle::CreateUnshapedRemoteHandle( - int64 op_id, int32 output_num, eager::EagerClient* eager_client, - uint64 context_id, DataType dtype, Device* d, Device* resource_device, - EagerContext* ctx, TensorHandle** h) { - DCHECK(dtype == DT_RESOURCE ? resource_device != nullptr - : resource_device == nullptr); + std::unique_ptr t, DataType dtype, + Device* d, EagerContext* ctx, TensorHandle** h) { + *h = new TensorHandle(std::move(t), dtype, d, ctx); + return Status::OK(); +} + +Status TensorHandle::CreateUnshapedRemoteHandle( + int64 op_id, int32 output_num, eager::EagerClient* eager_client, + uint64 context_id, DataType dtype, Device* device, EagerContext* ctx, + TensorHandle** h) { *h = new TensorHandle(absl::make_unique( op_id, output_num, eager_client, context_id, ctx), - dtype, d, resource_device, ctx); + dtype, device, ctx); return Status::OK(); } TensorHandle::TensorHandle(std::unique_ptr t, - DataType dtype, Device* d, Device* resource_device, - EagerContext* ctx) + DataType dtype, Device* device, EagerContext* ctx) : dtype(dtype), - device_(d), - op_device_(d), - resource_device_(resource_device), + device_(device), + op_device_(device), + resource_device_(dtype == DT_RESOURCE ? device : nullptr), remote_op_id_(t->op_id()), remote_output_num_(t->output_num()), remote_eager_client_(t->eager_client()), @@ -303,7 +307,7 @@ Status TensorHandle::NumElements(int64* num_elements) { Status TensorHandle::RemoteAddress(Device* d, int64* op_id, int32* output_num) const { if (d != device_) { - mutex_lock l(remote_mirrors_mutex_); + tf_shared_lock l(remote_mirrors_mutex_); auto mirror = remote_mirrors_.find(d); if (mirror != remote_mirrors_.end()) { *op_id = mirror->second->op_id(); @@ -311,6 +315,13 @@ Status TensorHandle::RemoteAddress(Device* d, int64* op_id, return Status::OK(); } + auto unshaped_mirror = unshaped_remote_mirrors_.find(d); + if (unshaped_mirror != unshaped_remote_mirrors_.end()) { + *op_id = unshaped_mirror->second->op_id(); + *output_num = unshaped_mirror->second->output_num(); + return Status::OK(); + } + return errors::FailedPrecondition( "Could not find remote mirror for specified device"); } @@ -321,15 +332,36 @@ Status TensorHandle::RemoteAddress(Device* d, int64* op_id, } bool TensorHandle::HasRemoteMirror(Device* d) { - mutex_lock l(remote_mirrors_mutex_); + tf_shared_lock l(remote_mirrors_mutex_); auto mirror = remote_mirrors_.find(d); if (mirror != remote_mirrors_.end()) { return true; } + auto unshaped_mirror = unshaped_remote_mirrors_.find(d); + if (unshaped_mirror != unshaped_remote_mirrors_.end()) { + return true; + } + return false; } +Status TensorHandle::AddUnshapedRemoteMirror( + std::unique_ptr t, Device* d) { + mutex_lock l(remote_mirrors_mutex_); + if (remote_mirrors_.find(d) != remote_mirrors_.end()) { + return errors::Internal("Attempted to duplicate a remote mirror."); + } + + auto ret = unshaped_remote_mirrors_.insert(std::make_pair(d, std::move(t))); + if (!ret.second) { + return errors::Internal( + "Attempted to duplicate an unshaped remote mirror."); + } + + return Status::OK(); +} + Status TensorHandle::AddRemoteMirror(std::unique_ptr t, Device* d) { mutex_lock l(remote_mirrors_mutex_); @@ -341,7 +373,33 @@ Status TensorHandle::AddRemoteMirror(std::unique_ptr t, return Status::OK(); } -Status TensorHandle::SetRemoteShape(const TensorShape& shape) { +Status TensorHandle::SetRemoteShape(const TensorShape& shape, + tensorflow::Device* d) { + VLOG(3) << "SetRemoteShape on TensorHandle: " << this << " device: " << d; + + if (d != device_) { + mutex_lock l(remote_mirrors_mutex_); + if (remote_mirrors_.find(d) != remote_mirrors_.end()) { + return errors::Internal( + "Attempted to set remote shape for existing mirror."); + } + + auto elem = unshaped_remote_mirrors_.find(d); + if (elem == unshaped_remote_mirrors_.end()) { + return errors::Internal( + "Attempted to set remote shape for non-waiting mirror."); + } + + auto& data = elem->second; + data->ReleaseRemoteTensorHandle(); + remote_mirrors_[d] = absl::make_unique( + data->op_id(), data->output_num(), shape, data->eager_client(), + data->context_id(), data->ctx()); + unshaped_remote_mirrors_.erase(elem); + + return Status::OK(); + } + DCHECK(is_remote_) << "SeRemoteShape is only called on remote handles."; DCHECK(!is_ready_notification_.HasBeenNotified()) << "SetRemoteShape is only called on non-ready handles."; @@ -365,6 +423,12 @@ Status TensorHandle::SetTensor(const tensorflow::Tensor& tensor) { DCHECK(!is_ready_notification_.HasBeenNotified()) << "SetTensor is only called on non-ready handles."; + VLOG(3) << "SetTensor on TensorHandle: " << this; + + if (tensor.dtype() == DT_RESOURCE) { + auto& resource_handle = tensor.flat()(0); + handle_dtypes_and_shapes_ = resource_handle.dtypes_and_shapes(); + } tensor_handle_data_ = absl::make_unique(tensor); is_poisoned_ = Status::OK(); is_ready_notification_.Notify(); diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index 4dc6b6f1f5f..58d10192dc0 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -82,8 +82,7 @@ class TensorHandle : public core::RefCounted { TensorHandle(std::unique_ptr t, DataType dtype, Device* d, Device* resource_device, EagerContext* ctx); TensorHandle(std::unique_ptr t, - DataType dtype, Device* d, Device* resource_device, - EagerContext* ctx); + DataType dtype, Device* device, EagerContext* ctx); #endif // IS_MOBILE_PLATFORM public: @@ -112,8 +111,11 @@ class TensorHandle : public core::RefCounted { static Status CreateUnshapedRemoteHandle(int64 op_id, int32 output_num, eager::EagerClient* eager_client, uint64 context_id, DataType dtype, - Device* d, Device* resource_device, - EagerContext* ctx, TensorHandle** h); + Device* device, EagerContext* ctx, + TensorHandle** h); + static Status CreateUnshapedRemoteHandle( + std::unique_ptr t, DataType dtype, + Device* device, EagerContext* ctx, TensorHandle** h); #endif // IS_MOBILE_PLATFORM // Symbolic tensor constructor. @@ -139,8 +141,9 @@ class TensorHandle : public core::RefCounted { #if !defined(IS_MOBILE_PLATFORM) bool HasRemoteMirror(Device* d); - // TODO(gjn): Add Unshaped remote mirrors once EagerRemoteSendTensor supports - // async execution and EagerRemoteExecute is mirror-aware. + + Status AddUnshapedRemoteMirror( + std::unique_ptr t, Device* d); Status AddRemoteMirror(std::unique_ptr t, Device* d); // Return the op_id and output num if the handle refers to a remote tensor. @@ -152,7 +155,7 @@ class TensorHandle : public core::RefCounted { // queried. // This method or Poison must be called exactly once for remote tensors that // were created without a known shape. - Status SetRemoteShape(const TensorShape& shape); + Status SetRemoteShape(const TensorShape& shape, tensorflow::Device* d); #endif // Sets the `tensor` for this async non-ready handle making it ready. @@ -224,6 +227,13 @@ class TensorHandle : public core::RefCounted { #if !defined(IS_MOBILE_PLATFORM) mutable mutex remote_mirrors_mutex_; + // TODO(gjn): Unshaped remote mirrors are long expected to be long-lived. + // Consider replacing the unshaped_remote_mirrors_ map with something more + // efficient. + std::map> + unshaped_remote_mirrors_ GUARDED_BY(remote_mirrors_mutex_); + // TODO(gjn): Is std::map the most optimal choice here? Perhaps this should be + // a fixed size map. std::map> remote_mirrors_ GUARDED_BY(remote_mirrors_mutex_); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index d643af669c2..065a6782811 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -1816,8 +1816,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { if (completed) ScheduleFinish(); }; nodestats::SetOpStart(stats); - if (TF_PREDICT_FALSE( - MightTrace(item, event_collector_, trace_using_annotations_))) { + { profiler::TraceMe activity( [&] { return strings::StrCat( @@ -1827,8 +1826,6 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { }, profiler::GetTFTraceMeLevel(op_kernel->IsExpensive())); device->ComputeAsync(async, &state->ctx, done); - } else { - device->ComputeAsync(async, &state->ctx, done); } } else { // Synchronous computes. diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 8e66e10cc3f..89f97ff165d 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -109,10 +109,6 @@ static Node* AddIdentity(StringPiece name, Graph* g, Endpoint input) { NodeDef ndef; ndef.set_name(g->NewName(absl::StrCat(kNodeLabel, "/", name))); ndef.set_op("Identity"); - // NOTE(skyewm): we explicitly set the device here to address a multi-GPU - // performance issue where this Identity would be placed alone on a GPU, - // causing unnecessary device traffic. See b/122483225 for details. - ndef.set_device(input.node->def().device()); ndef.add_input(input.name()); AddNodeAttr("T", BaseType(input.dtype()), &ndef); Status s; @@ -1393,12 +1389,14 @@ bool RemoveListArrayConverter(Graph* g) { } gtl::InlinedVector identity_nodes(n->num_inputs(), nullptr); - const auto no_op = [&](StringPiece name) { + const auto no_op = [&](StringPiece name) -> Node* { return AddNoOp(absl::StrCat(n->name(), "/", name), g); }; - const auto identity = [&](StringPiece name, Endpoint input) { - return AddIdentity(absl::StrCat(n->name(), "/", name), g, input); + const auto identity = [&](StringPiece name, Endpoint input) -> Node* { + Node* node = AddIdentity(absl::StrCat(n->name(), "/", name), g, input); + node->set_requested_device(input.node->def().device()); + return node; }; // Process input edges first. @@ -1495,6 +1493,152 @@ Status InstantiateFunctionCall(const NodeDef& call_def, namespace { +std::vector InputDevices(const Node& caller) { + std::vector input_devices(caller.in_edges().size()); + for (const Edge* edge : caller.in_edges()) { + if (edge->IsControlEdge()) continue; + const string& input_device = edge->src()->has_assigned_device_name() + ? edge->src()->assigned_device_name() + : edge->src()->requested_device(); + input_devices[edge->dst_input()] = input_device; + } + return input_devices; +} + +// Place input nodes on the same device as the correspinding caller input +// node. Do not specify any placement for all other nodes. +class DefaultFunctionBodyPlacer : public InlinedFunctionBodyPlacer { + public: + explicit DefaultFunctionBodyPlacer(const Node& caller) + : input_devices_(InputDevices(caller)) {} + + absl::optional InputNodeDevice(int input_index) const override { + return input_devices_[input_index]; + } + absl::optional OutputNodeDevice(int output_index) const override { + return absl::nullopt; + } + absl::optional ControlNodeDevice() const override { + return absl::nullopt; + } + absl::optional BodyNodeDevice(const NodeDef& ndef) const override { + return absl::nullopt; + } + + private: + const std::vector input_devices_; +}; + +// Place all nodes on the same device as caller node. +class SingleDeviceFunctionBodyPlacer : public InlinedFunctionBodyPlacer { + public: + explicit SingleDeviceFunctionBodyPlacer(const Node& caller) + : caller_device_(caller.def().device()) {} + + absl::optional InputNodeDevice(int input_index) const override { + return caller_device_; + } + absl::optional OutputNodeDevice(int output_index) const override { + return caller_device_; + } + absl::optional ControlNodeDevice() const override { + return caller_device_; + } + absl::optional BodyNodeDevice(const NodeDef& ndef) const override { + return caller_device_; + } + + private: + const string caller_device_; +}; + +// Place input nodes on the same device as the correspinding caller input +// node. Do not place output node. Place control nodes on the same device as +// caller node. For all function body nodes overrides job, replica and task +// parts of the device assignment to match function caller node. +class MultiDeviceFunctionBodyPlacer : public InlinedFunctionBodyPlacer { + public: + explicit MultiDeviceFunctionBodyPlacer(const Node& caller) + : caller_device_(caller.def().device()), + input_devices_(InputDevices(caller)) { + has_parsed_caller_device_ = + DeviceNameUtils::ParseFullName(caller_device_, &caller_parsed_device_); + } + + absl::optional InputNodeDevice(int input_index) const override { + return input_devices_[input_index]; + } + absl::optional OutputNodeDevice(int output_index) const override { + return absl::nullopt; + } + absl::optional ControlNodeDevice() const override { + return caller_device_; + } + absl::optional BodyNodeDevice(const NodeDef& ndef) const override { + // TODO(ezhulenev): If function would have been instantiated as a + // multi-device function and executed via FunctionLibraryRuntime, it could + // be potentially placed on any available device. However there are multiple + // tests relying on this assumption. Fix them, and remove this line. + if (ndef.device().empty()) return caller_device_; + + if (!has_parsed_caller_device_) return ndef.device(); + + DeviceNameUtils::ParsedName ndef_parsed_device; + if (!DeviceNameUtils::ParseFullName(ndef.device(), &ndef_parsed_device)) + return ndef.device(); + + if (caller_parsed_device_.has_job) { + ndef_parsed_device.has_job = caller_parsed_device_.has_job; + ndef_parsed_device.job = caller_parsed_device_.job; + } + + if (caller_parsed_device_.has_replica) { + ndef_parsed_device.has_replica = caller_parsed_device_.has_replica; + ndef_parsed_device.replica = caller_parsed_device_.replica; + } + + if (caller_parsed_device_.has_task) { + ndef_parsed_device.has_task = caller_parsed_device_.has_task; + ndef_parsed_device.task = caller_parsed_device_.task; + } + return DeviceNameUtils::ParsedNameToString(ndef_parsed_device); + } + + private: + string caller_device_; + bool has_parsed_caller_device_; + DeviceNameUtils::ParsedName caller_parsed_device_; + std::vector input_devices_; +}; + +} // namespace + +std::unique_ptr +InlinedFunctionBodyPlacer::DefaultPlacer(const Graph& graph, + const Node& caller) { + VLOG(3) << "Create default placer for inlined function body: " + << SummarizeNode(caller); + return absl::make_unique(caller); +} + +std::unique_ptr +InlinedFunctionBodyPlacer::SingleDevicePlacer(const Graph& graph, + const Node& caller) { + VLOG(3) << "Create single device placer for inlined function body: " + << SummarizeNode(caller); + return absl::make_unique(caller); +} + +std::unique_ptr +InlinedFunctionBodyPlacer::MultiDevicePlacer(const Graph& graph, + const Node& caller) { + VLOG(3) << "Create multi device placer for inlined function body: " + << SummarizeNode(caller); + return absl::make_unique(caller); +} + +namespace { + Status ValidateNoInline(const FunctionBody* fbody) { const auto attr = AttrSlice(&fbody->fdef.attr()); bool noinline = false; @@ -1547,13 +1691,12 @@ string InlineFunctionBodyOptions::DebugString() const { return absl::StrCat( "disable_inlining=", true_false(disable_inlining), ", ignore_noinline=", true_false(ignore_noinline), - ", override_device=", true_false(ignore_noinline), - ", initialize_empty_device=", true_false(initialize_empty_device), ", inline_impl_selection_group_functions=", true_false(inline_impl_selection_group_functions), ", keep_caller_node=", keep_caller_node_str(), ", output_control_src=", output_control_src == OutputControlSrc::kDataOutputs ? "DataOutputs" - : "ControlOutputs"); + : "ControlOutputs", + ", inlined_function_body_placer=", inlined_function_body_placer.name); } Status ValidateInlining(const Node* node, const FunctionBody* fbody, @@ -1711,6 +1854,11 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, return errors::Internal("Inlining mismatch: ", validation.error_message()); } + // Placer is responsible for assigning devices for all nodes that we will add + // to the graph. + const std::unique_ptr placer = + options.inlined_function_body_placer.get(*g, *caller); + // We can't possibly introduce a duplicate control edge during function // inlining, so we skip this check in calls to the 'g->AddControlEdge(...)'. static constexpr bool kDoNotCheckDuplicates = true; @@ -1720,15 +1868,29 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, // control nodes and inlined function inputs and outputs. // Add a NoOp node for function control inputs/outputs. - const auto no_op = [&](StringPiece name) { + const auto no_op = [&](StringPiece name) -> Node* { Node* node = AddNoOp(absl::StrCat(caller->name(), "/", name), g); - node->set_requested_device(caller->def().device()); + const absl::optional device = placer->ControlNodeDevice(); + if (device.has_value()) node->set_requested_device(*device); return node; }; - // Add an Identity node for function data inputs/outputs. - const auto identity = [&](StringPiece name, Endpoint input) { - return AddIdentity(absl::StrCat(caller->name(), "/", name), g, input); + // Add an Identity node for function input. + const auto input_identity = [&](StringPiece name, Endpoint input, + int index) -> Node* { + Node* node = AddIdentity(absl::StrCat(caller->name(), "/", name), g, input); + const absl::optional device = placer->InputNodeDevice(index); + if (device.has_value()) node->set_requested_device(*device); + return node; + }; + + // Add an Identity node for function output. + const auto output_identity = [&](StringPiece name, Endpoint input, + int index) -> Node* { + Node* node = AddIdentity(absl::StrCat(caller->name(), "/", name), g, input); + const absl::optional device = placer->OutputNodeDevice(index); + if (device.has_value()) node->set_requested_device(*device); + return node; }; // ------------------------------------------------------------------------ // @@ -1760,12 +1922,11 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, for (Node* n : fbody->graph->op_nodes()) { NodeDef ndef = n->def(); - if (options.override_device) { - ndef.set_device(caller->def().device()); - } - if (options.initialize_empty_device && ndef.device().empty()) { - ndef.set_device(caller->def().device()); - } + // Maybe override requested node device assignment. + const absl::optional device = placer->BodyNodeDevice(ndef); + if (device.has_value()) ndef.set_device(*device); + + // Add inlined function name to inlined node debug information. PropagateDebugInfoToNode(fbody->fdef.signature().name(), {n}, &ndef); // Add the function node name as a prefix: @@ -1777,9 +1938,6 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, Status added_node; Node* clone = g->AddNode(ndef, &added_node); - if (options.override_device && !caller->assigned_device_name().empty()) { - clone->set_assigned_device_name(caller->assigned_device_name()); - } TF_CHECK_OK(added_node); node_map[n->id()] = clone; @@ -1827,7 +1985,7 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, // The added identity nodes depend on "input_control_node". for (std::size_t i = 0; i < fbody->arg_nodes.size(); ++i) { Node* arg = node_map[fbody->arg_nodes[i]->id()]; - Node* n = identity("input", inputs[i]); + Node* n = input_identity("input", inputs[i], i); if (input_control_node) { g->AddControlEdge(input_control_node, n, kDoNotCheckDuplicates); } @@ -1871,7 +2029,7 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, } } CHECK(data.node != nullptr); - Node* n = identity("output", data); + Node* n = output_identity("output", data, i); outputs[i] = n; for (const Edge* e : ret->in_edges()) { if (e->IsControlEdge()) { diff --git a/tensorflow/core/common_runtime/function.h b/tensorflow/core/common_runtime/function.h index e0bd3704214..715652088f1 100644 --- a/tensorflow/core/common_runtime/function.h +++ b/tensorflow/core/common_runtime/function.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/types/optional.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/graph_optimizer.h" @@ -158,6 +159,50 @@ void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty = false); // TODO(zhifengc): Asks math expert to say the comment again. std::unique_ptr SymbolicGradient(const FunctionBody& f); +// Optionally override device assignment for nodes added to the graph for +// inlined functions: +// (1) Identity nodes added in place of function input arguments. +// (2) Identity nodes added in place of function return values. +// (3) Special NoOp nodes that enforce side-effects execution order. +// (4) All nodes inside function body specified in FunctionDef. +class InlinedFunctionBodyPlacer { + public: + virtual ~InlinedFunctionBodyPlacer() = default; + + virtual absl::optional InputNodeDevice(int input_index) const = 0; + virtual absl::optional OutputNodeDevice(int output_index) const = 0; + virtual absl::optional ControlNodeDevice() const = 0; + virtual absl::optional BodyNodeDevice(const NodeDef& ndef) const = 0; + + // Place input nodes on the same device as the corresponding caller input + // node. Do not specify any placement for all other nodes. + static std::unique_ptr DefaultPlacer( + const Graph& graph, const Node& caller); + + // Place all nodes on the same device as caller node. + static std::unique_ptr SingleDevicePlacer( + const Graph& graph, const Node& caller); + + // Place input nodes on the same device as the corresponding caller input + // node. Do not place output node. Place control nodes on the same device as + // caller node. For all function body nodes overrides job, replica and task + // parts of the device assignment to match function caller node. + static std::unique_ptr MultiDevicePlacer( + const Graph& graph, const Node& caller); + + using Factory = std::function( + const Graph&, const Node&)>; + + struct Config { + string name; + Factory get; + }; + + static Config Default() { return {"default", DefaultPlacer}; } + static Config SingleDevice() { return {"single_device", SingleDevicePlacer}; } + static Config MultiDevice() { return {"multi_device", MultiDevicePlacer}; } +}; + struct InlineFunctionBodyOptions { // All nodes that have incoming control edge *from* the function call node, // will be forwarded to the "output control node". There are two options for @@ -198,16 +243,6 @@ struct InlineFunctionBodyOptions { bool disable_inlining = false; // Ignore '_noinline' function attribute. bool ignore_noinline = false; - // If 'true' function inlining will override explicitly specified devices - // inside function body with the caller node device. - bool override_device = false; - // If 'true' function inlining will fill an empty device annotation inside - // function body with the caller node device. - // TODO(ezhulenev): Remove this flag. This is mostly legacy-compatibility - // mode. We should never explicitly define devices when we inline multi-device - // functions. However we do that in 'lower_function_call_op.cc' and - // 'function_optimizer' for now. - bool initialize_empty_device = false; // If 'true' function inlining will inline functions in implementation // selection group. Normally those functions should not be inlined; they will // be handled by Grappler. @@ -219,6 +254,11 @@ struct InlineFunctionBodyOptions { // Control returns were added to Tensorflow v2 with automatic control // dependencies tracking in Eager mode. OutputControlSource output_control_src = OutputControlSource::kDataOutputs; + // Inlined function body placer decides what requested device assignments + // should be added to the nodes added to the graph. See documentation above + // for available strategies. + InlinedFunctionBodyPlacer::Config inlined_function_body_placer = + InlinedFunctionBodyPlacer::Default(); // A human-readable debug string for this options. string DebugString() const; diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index 46691aa42f1..5bfd9f5eba3 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -1062,6 +1062,124 @@ TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctionsAndKeepCallerNode) { } } +TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctionsAndPlaceInlinedNodes) { + using test::function::NDef; + using KeepCallerNode = InlineFunctionBodyOptions::KeepCallerNode; + + const string arg_device = "/job:arg/replica:0/task:0/device:GPU"; + const string call_device = "/job:call/replica:0/task:1/device:GPU"; + const string body_device = "/job:body/replica:0/task:1/device:CPU"; + + const FunctionDef func = FDH::Create( + "AddFunc", {"i: float"}, {"o: float"}, {}, + {{{"ret"}, "Add", {"i", "i"}, {{"T", DT_FLOAT}}, {}, body_device}}, + /*ret_def=*/{{"o", "ret:z:0"}}); + Init({func}); + + // Construct a graph: + // a = Arg[dtype=DT_FLOAT, _device=arg_device] + // b = AddFunc[_device=call_device](a) + auto construct_graph = [&](std::unique_ptr* g) -> Status { + Scope s = Scope::NewRootScope(); + TF_RETURN_IF_ERROR(s.graph()->AddFunctionLibrary(fdef_lib_)); + auto a = ops::_Arg(s.WithOpName("a").WithDevice(arg_device), DT_FLOAT, 0); + auto b = test::function::Call(&s, "b", "AddFunc", {a}); + TF_RETURN_IF_ERROR(s.ToGraph(g->get())); + for (Node* node : (*g)->op_nodes()) { + if (node->name() == "b") node->set_requested_device(call_device); + } + return Status::OK(); + }; + + const string input_node = "Func/b/input/_0"; + const string output_node = "Func/b/output/_1"; + const string output_control_node = "Func/b/output_control_node/_2"; + + // Construct expected graph after function inlining. + auto expected_graph = [&](const std::vector& placed) -> GraphDef { + return test::function::GDef( + { + NDef("a", "_Arg", {}, {{"T", DT_FLOAT}, {"index", 0}}, placed[0]), + NDef(input_node, "Identity", {"a"}, {{"T", DT_FLOAT}}, placed[1]), + NDef("b/ret", "Add", {input_node, input_node}, {{"T", DT_FLOAT}}, + placed[2]), + NDef(output_node, "Identity", {"b/ret"}, {{"T", DT_FLOAT}}, + placed[3]), + NDef(output_control_node, "NoOp", {"^" + output_node}, {}, + placed[4]), + }, + {func}); + }; + + ExpandInlineFunctionsOptions opts; + opts.native_options.keep_caller_node = KeepCallerNode::kDoNotKeep; + + // Place only input nodes to match input device. + { + opts.native_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::Default(); + + auto g = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(construct_graph(&g)); + + ExpandInlineFunctions(flr0_, g.get(), opts); + GraphDef expected = expected_graph({/*a*/ arg_device, // + /*input*/ arg_device, // + /*body*/ body_device, // + /*output*/ "", // + /*control_output*/ ""} // + ); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } + + // Place all nodes on the call node device. + { + opts.native_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::SingleDevice(); + + auto g = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(construct_graph(&g)); + + ExpandInlineFunctions(flr0_, g.get(), opts); + GraphDef expected = expected_graph({/*a*/ arg_device, // + /*input*/ call_device, // + /*body*/ call_device, // + /*output*/ call_device, // + /*control_output*/ call_device} // + ); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } + + // Multi device function placement. + { + opts.native_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::MultiDevice(); + + auto g = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(construct_graph(&g)); + + const string merged_device = "/job:call/replica:0/task:1/device:CPU:*"; + + ExpandInlineFunctions(flr0_, g.get(), opts); + GraphDef expected = expected_graph({/*a*/ arg_device, // + /*input*/ arg_device, // + /*body*/ merged_device, // + /*output*/ "", // + /*control_output*/ call_device} // + ); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } +} + TEST_F(FunctionLibraryRuntimeTest, PruneBody) { auto T = DT_INT32; FunctionDef stateful_func = FDH::Define( @@ -1285,8 +1403,8 @@ TEST_F(FunctionLibraryRuntimeTest, ControlDeps) { ASSERT_TRUE(g != nullptr); OptimizeGraph(flr0_, &g); - // NOTE: We can remove func0, func1, func2, func9 with a control edge n8->n5. - // But we don't have a pass doing that. + // NOTE: We can remove func0, func1, func2, func9 with a control edge + // n8->n5. But we don't have a pass doing that. { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc index 2171a9cb157..774a5067481 100644 --- a/tensorflow/core/common_runtime/graph_optimizer.cc +++ b/tensorflow/core/common_runtime/graph_optimizer.cc @@ -90,7 +90,8 @@ void GraphOptimizer::Optimize( } if (opts_.do_function_inlining()) { ExpandInlineFunctionsOptions expand_inline_opts; - expand_inline_opts.native_options.override_device = true; + expand_inline_opts.native_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::SingleDevice(); if (!inline_multi_device_functions) { // GraphOptimizer is running: // (1) After partitioning when executing with a Session API. diff --git a/tensorflow/core/common_runtime/lower_function_call_op.cc b/tensorflow/core/common_runtime/lower_function_call_op.cc index 4df335af783..87b024636fc 100644 --- a/tensorflow/core/common_runtime/lower_function_call_op.cc +++ b/tensorflow/core/common_runtime/lower_function_call_op.cc @@ -59,15 +59,16 @@ Status RewriteFunctionCallNode(Node* n, Graph* g, // belong to different devices. This type of functions was added in // Tensorflow 2.0 Eager mode, and it has control outputs to represent // side-effects that must always execute (see `control_ret` in FunctionDef). - inline_options.override_device = false; - inline_options.initialize_empty_device = true; inline_options.output_control_src = OutputControlSrc::kControlOutputs; + inline_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::MultiDevice(); } else { // Native function call (node.type_string() is the function name). These // functions are always executed on a single-device, which is the device of // the function call node. - inline_options.override_device = true; inline_options.output_control_src = OutputControlSrc::kDataOutputs; + inline_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::SingleDevice(); } const FunctionDef* fdef; diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index b467e7b311e..5ec76f59671 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -253,6 +253,7 @@ class MklCPUAllocator : public Allocator { auto l_stats = large_size_allocator_->GetStats(); // Combine statistics from small-size and large-size allocator. + mutex_lock l(mutex_); stats_.num_allocs = l_stats->num_allocs + s_stats->num_allocs; stats_.bytes_in_use = l_stats->bytes_in_use + s_stats->bytes_in_use; stats_.peak_bytes_in_use = diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index 42c5d59b400..6d8cd093531 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -566,13 +566,15 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice( VLOG(1) << "Instantiating MultiDevice function \"" << function_name << "\" on default device \"" << options.target << "\""; if (VLOG_IS_ON(3)) { + int index = 0; VLOG(3) << "Requested input devices:"; for (const string& device : options.input_devices) { - VLOG(3) << " " << device; + VLOG(3) << " " << device << " for input at index " << index++; } + index = 0; VLOG(3) << "Requested output devices:"; for (const string& device : options.output_devices) { - VLOG(3) << " " << device; + VLOG(3) << " " << device << " for output at index " << index++; } } diff --git a/tensorflow/core/distributed_runtime/eager/BUILD b/tensorflow/core/distributed_runtime/eager/BUILD index 22d5c50bd4b..bffe4dcca83 100644 --- a/tensorflow/core/distributed_runtime/eager/BUILD +++ b/tensorflow/core/distributed_runtime/eager/BUILD @@ -52,6 +52,7 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:tensor_handle", + "@com_google_absl//absl/types:span", ], ) @@ -62,6 +63,7 @@ cc_library( "eager_service_impl.h", ], deps = [ + ":remote_mgr", ":remote_tensor_handle", "//tensorflow:grpc++", "//tensorflow/c:c_api_internal", @@ -92,6 +94,7 @@ tf_cc_test( srcs = ["eager_service_impl_test.cc"], deps = [ ":eager_service_impl", + ":remote_mgr", "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", "//tensorflow/core:eager_service_proto_cc", @@ -109,6 +112,24 @@ tf_cc_test( ], ) +cc_library( + name = "remote_mgr", + srcs = [ + "remote_mgr.cc", + ], + hdrs = [ + "remote_mgr.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + ":remote_tensor_handle", + "//tensorflow/core:eager_service_proto_cc", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/common_runtime/eager:tensor_handle", + ], +) + cc_library( name = "remote_tensor_handle_data", srcs = ["remote_tensor_handle_data.cc"], diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc index 6db300bc944..ae2fd939bdb 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/eager/execute.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/distributed_runtime/eager/remote_mgr.h" #include "tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h" #include "tensorflow/core/distributed_runtime/server_lib.h" #include "tensorflow/core/distributed_runtime/session_mgr.h" @@ -142,9 +143,12 @@ Status EagerServiceImpl::CreateContext(const CreateContextRequest* request, return s; } + auto remote_mgr = + absl::make_unique(/*is_master=*/false); s = ctx->InitializeRemoteWorker( std::move(remote_eager_workers), worker_session->remote_device_mgr(), - remote_workers, request->context_id(), std::move(rendezvous_creator)); + remote_workers, request->context_id(), std::move(rendezvous_creator), + std::move(remote_mgr)); if (!s.ok()) { delete ctx; return s; @@ -209,9 +213,9 @@ Status EagerServiceImpl::ExecuteOp(const Operation& operation, profiler::TraceMeLevel::kVerbose); for (const auto& remote_handle : operation.inputs()) { tensorflow::TensorHandle* handle; - TF_RETURN_IF_ERROR(server_context->GetTensorHandle( - RemoteTensorHandleInternal(remote_handle), &handle)); - + TF_RETURN_IF_ERROR( + server_context->Context()->RemoteMgr()->DeserializeRemoteTensorHandle( + remote_handle, &handle)); op->AddInput(handle); } } @@ -225,10 +229,13 @@ Status EagerServiceImpl::ExecuteOp(const Operation& operation, TF_RETURN_IF_ERROR(GetNumRetvals(server_context->Context(), operation.name(), operation.attrs(), &num_retvals)); - tensorflow::gtl::InlinedVector retvals; + tensorflow::gtl::InlinedVector retvals( + num_retvals); TF_RETURN_IF_ERROR(EagerExecute(op.get(), &retvals, &num_retvals)); + retvals.resize(num_retvals); - server_context->AddOperationOutputs(retvals, operation.id()); + server_context->Context()->RemoteMgr()->AddOperationOutputs(retvals, + operation.id()); for (auto* handle : retvals) { TF_RETURN_IF_ERROR(TensorHandleShape(handle, queue_response->add_shape())); @@ -253,7 +260,7 @@ Status EagerServiceImpl::Enqueue(const EnqueueRequest* request, if (item.has_operation()) { TF_RETURN_IF_ERROR(ExecuteOp(item.operation(), context, queue_response)); } else { - TF_RETURN_IF_ERROR(context->DeleteTensorHandle( + TF_RETURN_IF_ERROR(context->Context()->RemoteMgr()->DeleteTensorHandle( RemoteTensorHandleInternal(item.handle_to_decref()))); } } @@ -341,7 +348,8 @@ Status EagerServiceImpl::SendTensor(const SendTensorRequest* request, tensor_handle->Unref(); } - context->AddOperationOutputs(tensors, request->op_id()); + context->Context()->RemoteMgr()->AddOperationOutputs(tensors, + request->op_id()); return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h index a33c678e789..b64c0ffb28c 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_EAGER_SERVICE_IMPL_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_EAGER_SERVICE_IMPL_H_ -#include #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" @@ -112,56 +111,12 @@ class EagerServiceImpl { RecordAccess(); } ~ServerContext() { - for (const auto& entry : tensors_) { - entry.second->Unref(); - } ctx_->Unref(); } tensorflow::EagerContext* Context() const { return ctx_; } - void AddOperationOutputs( - const gtl::ArraySlice& handles, - int64 operation_id) { - mutex_lock l(tensors_mu_); - for (int i = 0; i < handles.size(); i++) { - // TODO(nareshmodi): Correctly handle operation_id not being unique. - tensors_.emplace(RemoteTensorHandleInternal(operation_id, i), - handles[i]); - } - } - - Status GetTensorHandle(const RemoteTensorHandleInternal& remote_handle, - tensorflow::TensorHandle** handle) { - mutex_lock l(tensors_mu_); - auto iter = tensors_.find(remote_handle); - if (iter == tensors_.end()) { - return errors::InvalidArgument( - "Unable to find the relevant tensor remote_handle: Op ID: ", - remote_handle.op_id, ", Output num: ", remote_handle.output_num); - } - - *handle = iter->second; - - return Status::OK(); - } - - Status DeleteTensorHandle(const RemoteTensorHandleInternal& remote_handle) { - mutex_lock l(tensors_mu_); - auto iter = tensors_.find(remote_handle); - if (iter == tensors_.end()) { - return errors::InvalidArgument( - "Unable to find the relevant tensor remote_handle: Op ID: ", - remote_handle.op_id, ", Output num: ", remote_handle.output_num); - } - - iter->second->Unref(); - tensors_.erase(iter); - - return Status::OK(); - } - void RecordAccess() { mutex_lock l(last_accessed_mu_); last_accessed_micros_ = env_->env->NowMicros(); @@ -175,18 +130,9 @@ class EagerServiceImpl { } private: - using RemoteTensorHandleMap = - gtl::FlatMap; - // The context for this execution. tensorflow::EagerContext* ctx_; - // The state related to the context for this execution. - mutex tensors_mu_; - RemoteTensorHandleMap tensors_ GUARDED_BY(tensors_mu_); - const WorkerEnv* const env_; // Not owned. mutex last_accessed_mu_; diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc index c3b3cb42a51..39384c127ed 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/c/c_api_internal.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/distributed_runtime/eager/remote_mgr.h" #include "tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h" #include "tensorflow/core/distributed_runtime/session_mgr.h" #include "tensorflow/core/distributed_runtime/test_utils.h" @@ -48,7 +49,8 @@ class TestEagerServiceImpl : public EagerServiceImpl { TF_RETURN_IF_ERROR(GetServerContext(context_id, &context)); core::ScopedUnref context_unref(context); - return context->GetTensorHandle(remote_handle, handle); + return context->Context()->RemoteMgr()->GetTensorHandle(remote_handle, + handle); } }; diff --git a/tensorflow/core/distributed_runtime/eager/remote_execute_node.h b/tensorflow/core/distributed_runtime/eager/remote_execute_node.h index c32894acbbc..761efff0796 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_execute_node.h +++ b/tensorflow/core/distributed_runtime/eager/remote_execute_node.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_REMOTE_EXECUTE_NODE_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_REMOTE_EXECUTE_NODE_H_ +#include "absl/types/span.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" #include "tensorflow/core/distributed_runtime/eager/eager_client.h" @@ -26,26 +27,27 @@ namespace eager { // RemoteExecuteNode is an implementation of EagerNode which enqueues // an operation via RPC in a remote EagerService. -class RemoteExecuteNode : public tensorflow::EagerNode { +class RemoteExecuteNode : public EagerNode { public: - RemoteExecuteNode(std::unique_ptr request, + RemoteExecuteNode(std::unique_ptr request, Device* device, EagerClient* eager_client, const gtl::InlinedVector& inputs, - TensorHandle** retvals, int num_retvals) - : tensorflow::EagerNode(), + absl::Span retvals) + : EagerNode(), request_(std::move(request)), + device_(device), eager_client_(eager_client), inputs_(inputs) { // Copy the output handles, since the container for them might get // destroyed. - for (int i = 0; i < num_retvals; i++) { - retvals_.push_back(retvals[i]); - retvals_[i]->Ref(); + for (auto handle : retvals) { + handle->Ref(); + retvals_.push_back(handle); } // This is required to ensure that the tensor handles stay alive across the // execution. - for (auto* handle : inputs_) { + for (auto handle : inputs_) { handle->Ref(); } } @@ -67,15 +69,15 @@ class RemoteExecuteNode : public tensorflow::EagerNode { } for (int i = 0; i < retvals_.size(); i++) { - Status s = - retvals_[i]->SetRemoteShape(response.queue_response(0).shape(i)); + Status s = retvals_[i]->SetRemoteShape( + response.queue_response(0).shape(i), device_); if (!s.ok()) { retvals_[i]->Poison(s); } retvals_[i]->Unref(); } - for (auto* handle : inputs_) { + for (auto handle : inputs_) { handle->Unref(); } @@ -83,18 +85,19 @@ class RemoteExecuteNode : public tensorflow::EagerNode { } void Abort(Status status) override { - for (int i = 0; i < retvals_.size(); i++) { - retvals_[i]->Poison(status); - retvals_[i]->Unref(); + for (auto handle : retvals_) { + handle->Poison(status); + handle->Unref(); } - for (auto* handle : inputs_) { + for (auto handle : inputs_) { handle->Unref(); } } private: std::unique_ptr request_; + Device* device_; // Not owned EagerClient* eager_client_; // Not owned, and must outlive this node. gtl::InlinedVector inputs_; gtl::InlinedVector retvals_; diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr.cc b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc new file mode 100644 index 00000000000..a7e00272029 --- /dev/null +++ b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc @@ -0,0 +1,89 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/distributed_runtime/eager/remote_mgr.h" + +#include "tensorflow/core/distributed_runtime/eager/remote_tensor_handle.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { +namespace eager { + +void RemoteMgr::AddOperationOutputs( + const gtl::ArraySlice handles, + int64 operation_id) { + mutex_lock l(remote_tensor_handle_mu_); + for (int i = 0; i < handles.size(); i++) { + // TODO(nareshmodi): Correctly handle operation_id not being unique. + remote_tensor_handle_map_.emplace( + RemoteTensorHandleInternal(operation_id, i), handles[i]); + } +} + +Status RemoteMgr::GetTensorHandle( + const RemoteTensorHandleInternal& remote_handle, + tensorflow::TensorHandle** handle) { + tf_shared_lock l(remote_tensor_handle_mu_); + auto iter = remote_tensor_handle_map_.find(remote_handle); + if (iter == remote_tensor_handle_map_.end()) { + return errors::InvalidArgument( + "Unable to find the relevant tensor remote_handle: Op ID: ", + remote_handle.op_id, ", Output num: ", remote_handle.output_num); + } + + *handle = iter->second; + + return Status::OK(); +} + +Status RemoteMgr::DeleteTensorHandle( + const RemoteTensorHandleInternal& remote_handle) { + mutex_lock l(remote_tensor_handle_mu_); + auto iter = remote_tensor_handle_map_.find(remote_handle); + if (iter == remote_tensor_handle_map_.end()) { + return errors::InvalidArgument( + "Unable to find the relevant tensor remote_handle: Op ID: ", + remote_handle.op_id, ", Output num: ", remote_handle.output_num); + } + + iter->second->Unref(); + remote_tensor_handle_map_.erase(iter); + + return Status::OK(); +} + +Status RemoteMgr::SerializeRemoteTensorHandle(TensorHandle* in, + RemoteTensorHandle* out, + Device* device) { + // TODO(fishx): support serializing local tensor handle. + int64 op_id; + int32 output_num; + TF_RETURN_IF_ERROR(in->RemoteAddress(device, &op_id, &output_num)); + out->Clear(); + out->set_op_id(op_id); + out->set_output_num(output_num); + return Status::OK(); +} + +Status RemoteMgr::DeserializeRemoteTensorHandle(const RemoteTensorHandle& in, + TensorHandle** out) { + // TODO(fishx): support the case when the remote tensor handle does not exist + // in the map. + TF_RETURN_IF_ERROR(GetTensorHandle(RemoteTensorHandleInternal(in), out)); + return Status::OK(); +} + +} // namespace eager +} // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr.h b/tensorflow/core/distributed_runtime/eager/remote_mgr.h new file mode 100644 index 00000000000..7b4a9bfa84f --- /dev/null +++ b/tensorflow/core/distributed_runtime/eager/remote_mgr.h @@ -0,0 +1,85 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_REMOTE_MGR_H_ +#define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_REMOTE_MGR_H_ + +#include + +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/distributed_runtime/eager/remote_tensor_handle.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +namespace eager { + +// This class manages the states required to setup an eager cluster. +// TODO(fishx): Move remote state from context to this class. +class RemoteMgr { + public: + explicit RemoteMgr(bool is_master) : is_master_(is_master) {} + + ~RemoteMgr() { + for (const auto& entry : remote_tensor_handle_map_) { + entry.second->Unref(); + } + } + + bool IsMaster() { return is_master_; } + + void AddOperationOutputs( + const gtl::ArraySlice handles, + int64 operation_id); + + Status GetTensorHandle(const RemoteTensorHandleInternal& remote_handle, + tensorflow::TensorHandle** handle); + + Status DeleteTensorHandle(const RemoteTensorHandleInternal& remote_handle); + + // Helper function to create monotonically increasing ids unique to this + // context. + uint64 NextOpId() { + DCHECK(is_master_); + mutex_lock l(next_id_mutex_); + return next_op_id_++; + } + + Status SerializeRemoteTensorHandle(TensorHandle* in, RemoteTensorHandle* out, + Device* device); + + Status DeserializeRemoteTensorHandle(const RemoteTensorHandle& in, + TensorHandle** out); + + private: + bool is_master_; + + using RemoteTensorHandleMap = + gtl::FlatMap; + mutex remote_tensor_handle_mu_; + // This map maintains the TensorHandles that is required by remote worker + // in the cluster. + RemoteTensorHandleMap remote_tensor_handle_map_ + GUARDED_BY(remote_tensor_handle_mu_); + + mutex next_id_mutex_; + uint64 next_op_id_ GUARDED_BY(next_id_mutex_) = 1; +}; + +} // namespace eager +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_REMOTE_MGR_H_ diff --git a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc index 2e245680a44..d3a7c60af31 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc @@ -27,8 +27,6 @@ void DestoryRemoteTensorHandle(EagerContext* ctx, eager::EagerClient* eager_client, uint64 context_id, uint64 op_id, int output_num) { - auto cleanup = gtl::MakeCleanup([ctx]() { ctx->Unref(); }); - if (ctx->GetContextId() != context_id) { // This means that this tensor was pointing to a remote device, which // has been changed out from under us. Simply return since there is @@ -43,18 +41,13 @@ void DestoryRemoteTensorHandle(EagerContext* ctx, handle_to_decref->set_op_id(op_id); handle_to_decref->set_output_num(output_num); - if (ctx->Async()) { - ctx->ExecutorAdd(absl::make_unique( - std::move(request), eager_client)); - } else { - eager::EnqueueRequest* actual_request = request.release(); - eager::EnqueueResponse* response = new eager::EnqueueResponse; - eager_client->EnqueueAsync( - actual_request, response, - [actual_request, response](const tensorflow::Status& s) { - delete actual_request; - delete response; - }); + std::unique_ptr node( + absl::make_unique(std::move(request), + eager_client)); + Status s = ctx->Async() ? ctx->ExecutorAdd(std::move(node)) : node->Run(); + if (!s.ok()) { + LOG(ERROR) << "Unable to destroy remote tensor handles: " + << s.error_message(); } } @@ -80,6 +73,7 @@ RemoteTensorHandleData::RemoteTensorHandleData(int64 op_id, int output_num, RemoteTensorHandleData::~RemoteTensorHandleData() { DestoryRemoteTensorHandle(ctx_, eager_client_, context_id_, op_id_, output_num_); + ctx_->Unref(); } Status RemoteTensorHandleData::Tensor(const tensorflow::Tensor** t) const { @@ -143,6 +137,7 @@ UnshapedRemoteTensorHandleData::~UnshapedRemoteTensorHandleData() { DestoryRemoteTensorHandle(ctx_, eager_client_, context_id_, op_id_, output_num_); } + ctx_->Unref(); } Status UnshapedRemoteTensorHandleData::Tensor( diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc index 33c2b17e113..b3164f0956e 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc @@ -64,7 +64,7 @@ class GrpcEagerClient : public EagerClient { enqueue_dispatchers_.erase(request->context_id()); } else { LOG(ERROR) << "Remote EagerContext with id " << request->context_id() - << " does not seems to exist."; + << " does not seem to exist."; } } diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index 6dff1ffe56e..a6b61148437 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -651,6 +651,10 @@ void Model::Optimize(AutotuneAlgorithm algorithm, int64 cpu_budget) { switch (algorithm) { case AutotuneAlgorithm::HILL_CLIMB: OptimizeHillClimb(cpu_budget); + break; + case AutotuneAlgorithm::GRADIENT_DESCENT: + OptimizeGradientDescent(cpu_budget); + break; } } @@ -715,13 +719,106 @@ std::map> Model::CollectTunableParameters( return parameters; } +void Model::OptimizeGradientDescent(int64 cpu_budget) { + std::shared_ptr snapshot; + { + tf_shared_lock lock(mu_); + snapshot = output_->Snapshot(nullptr); + } + VLOG(2) << "Starting optimization of tunable parameters with GradientDescent"; + const double processing_time = TotalProcessingTime(snapshot); + auto parameters = CollectTunableParameters(snapshot); + for (auto& pair : parameters) { + pair.second->value = pair.second->min; + } + // Gradient descent step size. + constexpr double kDescentStep = 0.7L; + + // Optimization is stopped once the `OutputTime` improvement is smaller than + // this value. + constexpr double kOptimizationPrecision = 100.0L; + + // Penalizing step for the parameters after we overoptimize (output time < + // processing time / cpu budget) the objective. + constexpr double kParametersPenalty = 0.05L; + + // Maximum number of iterations for optimization. + constexpr int64 kMaxIterations = 100; + + double output_time = 0; + double new_output_time; + double new_value; + for (int i = 0; i < kMaxIterations; ++i) { + std::map gradient; + new_output_time = OutputTime(snapshot, &gradient); + if (std::abs(output_time - new_output_time) < kOptimizationPrecision || + new_output_time < processing_time / cpu_budget) { + break; + } + double max_abs_derivative = 1.0; + for (auto& pair : parameters) { + if (pair.second->value != pair.second->max) { + max_abs_derivative = + std::max(max_abs_derivative, std::abs(gradient[pair.first])); + } + } + // Maximizes parameters on early stages of the model. + if (max_abs_derivative < kOptimizationPrecision) { + for (auto& pair : parameters) { + pair.second->value = pair.second->max; + } + break; + } + for (auto& pair : parameters) { + new_value = pair.second->value - + kDescentStep * gradient[pair.first] / max_abs_derivative; + // Projection on a feasible interval. + if (new_value > pair.second->max) { + pair.second->value = pair.second->max; + } else if (new_value < pair.second->min) { + pair.second->value = pair.second->min; + } else { + pair.second->value = new_value; + } + } + output_time = new_output_time; + } + // Penalize parameters if we overoptimized the objective. + for (int i = 0; + i < kMaxIterations && new_output_time < processing_time / cpu_budget; + ++i) { + for (auto& pair : parameters) { + new_value = pair.second->value - kParametersPenalty; + // Projection on a feasible interval. + if (new_value > pair.second->max) { + pair.second->value = pair.second->max; + } else if (new_value < pair.second->min) { + pair.second->value = pair.second->min; + } else { + pair.second->value = new_value; + } + } + new_output_time = OutputTime(snapshot, /*gradient=*/nullptr); + } + VLOG(2) << "Number of tunable parameters: " << parameters.size(); + for (auto& pair : parameters) { + pair.second->value = std::round(pair.second->value); + auto& parameter = pair.second; + VLOG(2) << "Setting tunable parameter " << pair.first << " to " + << parameter->value; + mutex_lock l(*parameter->state->mu); + parameter->state->value = parameter->value; + parameter->state->cond_var->notify_all(); + } +} + void Model::OptimizeHillClimb(int64 cpu_budget) { std::shared_ptr snapshot; { tf_shared_lock lock(mu_); snapshot = output_->Snapshot(nullptr); } - VLOG(2) << "Starting optimization of tunable parameters"; + VLOG(2) << "Starting optimization of tunable parameters with HillClimb"; const double processing_time = TotalProcessingTime(snapshot); auto parameters = CollectTunableParameters(snapshot); for (auto& pair : parameters) { diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index 3ae279350b4..2687cc6e534 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -40,6 +40,7 @@ constexpr int64 kAutotune = -1; enum class AutotuneAlgorithm { HILL_CLIMB = 0, + GRADIENT_DESCENT = 1, }; // Represents thread-safe state that can be shared between an input pipeline and @@ -543,6 +544,15 @@ class Model { // element divided by CPU budget. void OptimizeHillClimb(int64 cpu_budget); + // This optimization algorithm starts by setting all tunable parallelism + // parameters to the minimum value. It then improves current parameters by + // making a step in the direction opposite to the gradient of `OutputTime` and + // projecting resulting values on the feasible intervals. Improvement step is + // repeated until either the output time improvement is smaller than threshold + // value or the output time is less than the processing time needed to produce + // an element divided by CPU budget. + void OptimizeGradientDescent(int64 cpu_budget); + // Collects the output time and if `gradient` is not `nullptr`, the output // time gradient w.r.t. tunable parameters of the subtree rooted in the given // node and the last input time. diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc index 681ce8bba5c..656202df631 100644 --- a/tensorflow/core/framework/node_def_util.cc +++ b/tensorflow/core/framework/node_def_util.cc @@ -814,9 +814,14 @@ Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix, } // Update colocation constraints. - auto class_attr = node_def->mutable_attr()->find("_class"); + constexpr char kClassAttr[] = "_class"; + auto class_attr = node_def->mutable_attr()->find(kClassAttr); if (class_attr != node_def->mutable_attr()->end()) { - class_attr->second.set_s(strings::StrCat(prefix, class_attr->second.s())); + AttrValue new_value; + new_value.mutable_list()->add_s( + strings::StrCat(prefix, class_attr->second.s())); + node_def->mutable_attr()->erase(kClassAttr); + node_def->mutable_attr()->insert({kClassAttr, new_value}); } return Status::OK(); diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 1a38df27dc5..44e94b83c7a 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -807,6 +807,12 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { // Update graph_costs_ and per-op costs. const NodeDef* node = ready_nodes_->GetCurrNode(); auto& node_state = node_map_[node]; + // TODO(dyoon, andiryxu): Consider to revisit node execution w.r.t. Switch and + // Merge -- it can create a loop which may include loop-carried dependency, + // diverge-merge, and other complex execution patterns. + bool previously_executed_merge = + IsMerge(*node) && (node_state.time_finished != Costs::Duration::max()); + // If there is annotation in the graph about execution times, we use that // number, otherwise, we assume the node is executed once. node_state.execution_count = node->attr().count(kExecutionCount) == 0 @@ -876,8 +882,15 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { << ", scheduled: " << node_state.time_scheduled.count() << ", finished: " << node_state.time_finished.count(); - // Checks outputs, and adds ready nodes to queue. - AddOutputNodesToReadyQueue(node, curr_time); + if (previously_executed_merge) { + // Skip AddOutputNodesToReadyQueue; this is due to Switch-Merge. + VLOG(1) << "node [ " << node->name() << ", " << node->op() << " ] " + << "is executed more than once. " + << "Skip scheduling its output nodes."; + } else { + // Checks outputs, and adds ready nodes to queue. + AddOutputNodesToReadyQueue(node, curr_time); + } // Increment num_outputs_executed of the input nodes and maybe update memory. for (const auto& input_port : node_state.inputs) { diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc index 21ca9ca4fc6..588bfce5e90 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc @@ -625,10 +625,10 @@ class TestVirtualScheduler : public VirtualScheduler { public: TestVirtualScheduler(const bool use_static_shapes, const bool use_aggressive_shape_inference, - Cluster* cluster) + ReadyNodeManager* ready_node_manager, Cluster* cluster) : VirtualScheduler( use_static_shapes, use_aggressive_shape_inference, cluster, - &ready_node_manager_, + ready_node_manager, absl::make_unique(cluster->GetDevices())) { enable_mem_usage_tracking(); } @@ -638,9 +638,6 @@ class TestVirtualScheduler : public VirtualScheduler { FRIEND_TEST(VirtualSchedulerTest, ComplexDependency); FRIEND_TEST(VirtualSchedulerTest, Variable); FRIEND_TEST(VirtualSchedulerTest, InterDeviceTransfer); - - protected: - FirstReadyManager ready_node_manager_; }; class VirtualSchedulerTest : public ::testing::Test { @@ -659,7 +656,8 @@ class VirtualSchedulerTest : public ::testing::Test { cluster_ = absl::make_unique(devices); scheduler_ = absl::make_unique( /*use_static_shapes=*/true, - /*use_aggressive_shape_inference=*/true, cluster_.get()); + /*use_aggressive_shape_inference=*/true, &first_ready_manager_, + cluster_.get()); } DeviceProperties GetDummyCPUDevice() { @@ -689,12 +687,10 @@ class VirtualSchedulerTest : public ::testing::Test { auto c0 = ops::Conv2D(s.WithOpName("c0"), x, f, strides, "SAME"); auto c1 = ops::Conv2D(s.WithOpName("c1"), y, f, strides, "SAME"); auto c2 = ops::Conv2D(s.WithOpName("c2"), z, f, strides, "SAME"); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); grappler_item_->id = "test_conv2d_graph"; - grappler_item_->graph = def; grappler_item_->fetch = {"c0", "c1"}; dependency_["c0"] = {"x", "f"}; @@ -710,12 +706,11 @@ class VirtualSchedulerTest : public ::testing::Test { {kernel_, kernel_, depth_in_, depth_out_}, DT_FLOAT); std::vector strides = {1, 1, 1, 1}; auto y = ops::Conv2D(s.WithOpName("y"), x, f, strides, "SAME"); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); grappler_item_->id = "test_conv2d_var_graph"; - grappler_item_->graph = def; + grappler_item_->fetch = {"y"}; dependency_["y"] = {"x", "f"}; @@ -740,12 +735,9 @@ class VirtualSchedulerTest : public ::testing::Test { auto abcd = ops::MatMul(s.WithOpName("abcd"), abc, d); auto abcde = ops::MatMul(s.WithOpName("abcde"), abcd, e); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); - - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); grappler_item_->id = "test_matmul_sequence_graph"; - grappler_item_->graph = def; grappler_item_->fetch = {"abcde"}; dependency_["ab"] = {"a", "b"}; @@ -763,12 +755,10 @@ class VirtualSchedulerTest : public ::testing::Test { auto w = ops::RandomUniform(s.WithOpName("w"), {10, 10, 10, 10}, DT_FLOAT); OutputList input_tensors = {x, y, z, w}; auto out = ops::AddN(s.WithOpName("out"), input_tensors); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); grappler_item_->id = "test_addn_graph"; - grappler_item_->graph = def; grappler_item_->fetch = {"out"}; dependency_["out"] = {"x", "y", "z", "w"}; @@ -780,12 +770,10 @@ class VirtualSchedulerTest : public ::testing::Test { auto unnecessary = ops::Placeholder(s.WithOpName("unnecessary"), DT_FLOAT); auto x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); - grappler_item_.reset(new GrapplerItem); grappler_item_->id = "test_extra_placeholders"; - grappler_item_->graph = def; grappler_item_->fetch = {"x"}; // Grappler Item Builder puts all placeholder nodes into the feed @@ -804,17 +792,62 @@ class VirtualSchedulerTest : public ::testing::Test { } auto out = ops::NoOp(s.WithControlDependencies(input_tensors).WithOpName("out")); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); + grappler_item_->id = "test_control_dependency_graph"; - grappler_item_->graph = def; grappler_item_->fetch = {"out"}; dependency_["out"] = input_noop_names; } + void CreateGrapplerItemWithAddFromOneTensor() { + Scope s = Scope::NewRootScope().WithDevice(kCPU0); + auto x = tensorflow::ops::RandomUniform( + s.WithOpName("x"), {batch_size_, width_, height_, depth_in_}, DT_FLOAT); + + auto y = tensorflow::ops::Add(s.WithOpName("y"), x, x); + Output fetch = ops::Identity(s.WithOpName("fetch"), y); + + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); + + grappler_item_->id = "test_add_from_one_tensor"; + grappler_item_->fetch = {"fetch"}; + + dependency_["fetch"] = {"y"}; + dependency_["y"] = {"x"}; + } + + void CreateGrapplerItemWithSwitchMergeInput() { + // sw = Switch(x, pred) + // a = Add(S:1, b) + // m = Merge(sw:0, a) + // y = Add(m, z) + + Scope s = Scope::NewRootScope().WithDevice(kCPU0); + auto x = ops::RandomUniform( + s.WithOpName("x"), {batch_size_, width_, height_, depth_in_}, DT_FLOAT); + auto pred = ops::Const(s.WithOpName("pred"), false, {}); + auto sw = ops::Switch(s.WithOpName("switch"), x, pred); + auto b = ops::RandomUniform( + s.WithOpName("b"), {batch_size_, width_, height_, depth_in_}, DT_FLOAT); + auto a = ops::Add(s.WithOpName("a"), sw.output_true, b); + auto m = ops::Merge(s.WithOpName("m"), {sw.output_false, a.z}); + auto z = ops::RandomUniform( + s.WithOpName("z"), {batch_size_, width_, height_, depth_in_}, DT_FLOAT); + auto y = ops::Add(s.WithOpName("y"), m.output, z); + + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); + + grappler_item_->id = "test_add_merge_switch"; + grappler_item_->fetch = {"y"}; + + dependency_["y"] = {"m", "z"}; + } + // FusedBN [an op with multiple outputs] with multiple consumers (including // control dependency). void CreateGrapplerItemWithBatchNorm() { @@ -846,12 +879,10 @@ class VirtualSchedulerTest : public ::testing::Test { }; auto z4 = ops::NoOp(s.WithControlDependencies(batch_var).WithOpName("z4")); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); - grappler_item_.reset(new GrapplerItem); grappler_item_->id = "test_complex_dependency_graph"; - grappler_item_->graph = def; grappler_item_->fetch = {"z1", "z2", "z3", "z4"}; dependency_["bn"] = {"x", "scale", "offset", "mean", "var"}; @@ -975,7 +1006,8 @@ versions { } )EOF"; - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); + CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &grappler_item_->graph)); grappler_item_->id = "test_graph"; @@ -1032,7 +1064,7 @@ versions { } )EOF"; - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &grappler_item_->graph)); grappler_item_->id = "test_graph"; @@ -1428,7 +1460,7 @@ versions { } )EOF"; - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &grappler_item_->graph)); grappler_item_->id = "test_graph"; @@ -2095,7 +2127,7 @@ versions { producer: 27 })EOF"; - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &grappler_item_->graph)); grappler_item_->id = "test_graph"; @@ -2136,12 +2168,9 @@ versions { .WithControlDependencies(y) .WithDevice(kCPU1)); - GraphDef def; - TF_CHECK_OK(s.ToGraphDef(&def)); - - grappler_item_.reset(new GrapplerItem); + grappler_item_ = absl::make_unique(); + TF_CHECK_OK(s.ToGraphDef(&grappler_item_->graph)); grappler_item_->id = "test_conv2d_graph"; - grappler_item_->graph = def; grappler_item_->fetch = {"y1", "y2", "batch_mean1", "batch_var1", "control_dep"}; @@ -2280,6 +2309,8 @@ versions { // cluster_ and scheduler_ are initialized in the c'tor. std::unique_ptr cluster_; std::unique_ptr scheduler_; + FirstReadyManager first_ready_manager_; + CompositeNodeManager composite_node_manager_; // grappler_item_ will be initialized differently for each test case. std::unique_ptr grappler_item_; @@ -2933,6 +2964,51 @@ TEST_F(VirtualSchedulerTest, GraphWihtOnlyRecv) { EXPECT_GT(ops_executed.count("Recv"), 0); } +TEST_F(VirtualSchedulerTest, AddMergeSwitch) { + // Override scheduler_ with CompositeNodeNamager. + scheduler_ = absl::make_unique( + /*use_static_shapes=*/true, + /*use_aggressive_shape_inference=*/true, &composite_node_manager_, + cluster_.get()); + CreateGrapplerItemWithSwitchMergeInput(); + InitScheduler(); + + // pred --+ z --+ + // | | + // V V + // x -> Switch --------> Merge ---> Add --> y + // | ^ + // | | + // +-----> Add -----+ + // ^ + // | + // b --------------+ + + // Run the scheduler. The current VirtualScheduler, w/o annotation, triggers + // both outputs of Switch; then Merge (as long as one input is ready, it's z + // is ready, if we just use num_inputs_ready counter, the final Add becomes + // ready. possible to skipt scheduling z. (Need to use CompositeNodeManager + // to test this case). + auto ops_executed = RunScheduler(""); + + EXPECT_GT(ops_executed.count("z"), 0); +} + +TEST_F(VirtualSchedulerTest, AddFromOneTensor) { + CreateGrapplerItemWithAddFromOneTensor(); + InitScheduler(); + + // x -+----> Add --> y + // | ^ + // | | + // +-------+ + + // Run the scheduler. + auto ops_executed = RunScheduler(""); + EXPECT_GT(ops_executed.count("y"), 0); + EXPECT_GT(ops_executed.count("x"), 0); +} + } // namespace } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index cb6f77efd1a..273460050fc 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -190,6 +190,13 @@ bool GetElementUnexhaustive(const Tensor& t, int i, const std::set& dtypes, } } +bool NodeIsOnCpu(const NodeDef& node) { + string task; + string device; + return DeviceNameUtils::SplitDeviceName(node.device(), &task, &device) && + absl::StrContains(device, DEVICE_CPU); +} + // Graph optimizer context extension specific to ArithmeticOptimizer. struct ArithmeticOptimizerContext { explicit ArithmeticOptimizerContext(SetVector* nodes_to_simplify) @@ -2361,13 +2368,7 @@ class ReplaceMulWithSquare : public ArithmeticOptimizerStage { const DataType type = GetDataTypeFromAttr(*node, "T"); bool is_complex = (type == DT_COMPLEX64) || (type == DT_COMPLEX128); - string task; - string device; - bool is_on_cpu = - DeviceNameUtils::SplitDeviceName(node->device(), &task, &device) && - absl::StrContains(device, DEVICE_CPU); - - if (!is_complex || is_on_cpu) { + if (!is_complex || NodeIsOnCpu(*node)) { NodeDef* new_square_node = AddCopyNode(optimized_node_name, node); new_square_node->set_op("Square"); for (int i = 1; i < new_square_node->input_size(); ++i) { @@ -2528,6 +2529,30 @@ class ConvertPowStage : public ArithmeticOptimizerStage { node->set_input(1, AsControlDependency(y->name())); AddToOptimizationQueue(node); AddToOptimizationQueue(y); + } else if (curr == complex128(3, 0)) { + // TODO(courbet): Use 'Cube' when it's added to TF ops. + if (NodeIsOnCpu(*node)) { + // We create an inner square node: inner_square = square(x) + const NodeScopeAndName scope_and_name = + ParseNodeScopeAndName(node->name()); + const string inner_square_name = + OptimizedNodeName(scope_and_name, "_inner"); + NodeDef* inner_square_node = ctx().node_map->GetNode(inner_square_name); + if (inner_square_node == nullptr) { + inner_square_node = AddCopyNode(inner_square_name, node); + inner_square_node->set_op("Square"); + inner_square_node->mutable_input()->RemoveLast(); + } + ctx().node_map->AddOutput(x->name(), inner_square_node->name()); + // We modify `node`: node = mul(x, inner_square); + node->set_op("Mul"); + node->set_input(1, inner_square_node->name()); + node->add_input(AsControlDependency(y->name())); + + AddToOptimizationQueue(node); + AddToOptimizationQueue(inner_square_node); + AddToOptimizationQueue(y); + } } else if (curr == complex128(1, 0) && ShapesSymbolicallyEqual(value_props.shape(), output_shape)) { // Pow could be used to broadcast, so make sure the shapes of the two @@ -2985,17 +3010,6 @@ class UnaryOpsComposition : public ArithmeticOptimizerStage { DrivesControlDependency(node)); } - // UnaryOpsComposition is defined only for CPU. - bool NodeIsOnCpu(const NodeDef& node) const { - using absl::StartsWith; - - string task; - string device; - - return DeviceNameUtils::SplitDeviceName(node.device(), &task, &device) && - StartsWith(device, DEVICE_CPU); - } - bool NodeIsAlreadyFused(const NodeDef& node) const { return fused_nodes_.count(node.name()) > 0; } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index d9ce9f66b7a..ae3da034212 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -2728,6 +2728,7 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); auto x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); auto y2 = ops::Const(s.WithOpName("y2"), {2.0f, 2.0f}, {1, 2}); + auto y3 = ops::Const(s.WithOpName("y3"), {3.0f, 3.0f}, {1, 2}); auto y1 = ops::Const(s.WithOpName("y1"), {1.0f, 1.0f}, {1, 2}); auto yPoint5 = ops::Const(s.WithOpName("y.5"), {0.5f, 0.5f}, {1, 2}); auto y0 = ops::Const(s.WithOpName("y0"), {0.0f, 0.0f}, {1, 2}); @@ -2738,6 +2739,8 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) { auto ones = ops::Const(s.WithOpName("ones"), {1.0f, 1.0f, 1.0f}, {1, 3}); auto zeros = ops::Const(s.WithOpName("zeros"), {0.0f, 0.0f, 0.0f}, {1, 3}); Output out2 = ops::Pow(s.WithOpName("out2"), x, y2); + Output out3 = + ops::Pow(s.WithOpName("out3").WithDevice("/device:CPU:0"), x, y3); Output out1 = ops::Pow(s.WithOpName("out1"), x, y1); Output outPoint5 = ops::Pow(s.WithOpName("out.5"), x, yPoint5); Output out0 = ops::Pow(s.WithOpName("out0"), x, y0); @@ -2748,18 +2751,18 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) { Output out_bcast2 = ops::Pow(s.WithOpName("out_bcast2"), z, zeros); GrapplerItem item; - item.fetch = {"out2", "out1", "out.5", "out0", "out_.5", - "out_1", "out", "out_bcast1", "out_bcast2"}; + item.fetch = {"out2", "out3", "out1", "out.5", "out0", + "out_.5", "out_1", "out", "out_bcast1", "out_bcast2"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); auto tensors_expected = EvaluateNodes(item.graph, item.fetch); - ASSERT_EQ(tensors_expected.size(), 9); + ASSERT_EQ(tensors_expected.size(), 10); GraphDef got; ArithmeticOptimizer optimizer; EnableOnlyConvertPow(&optimizer); OptimizeAndPrune(&optimizer, &item, &got); auto tensors = EvaluateNodes(got, item.fetch); - ASSERT_EQ(tensors.size(), 9); + ASSERT_EQ(tensors.size(), 10); for (int i = 0; i < tensors.size(); ++i) { EXPECT_EQ(tensors[i].NumElements(), tensors_expected[i].NumElements()); @@ -2773,6 +2776,12 @@ TEST_F(ArithmeticOptimizerTest, ConvertPow) { AddNode("ones", "Const", {}, {}, &want); AddNode("zeros", "Const", {}, {}, &want); AddNode("out2", "Square", {"x"}, {}, &want); + AddNode("ArithmeticOptimizer/ConvertPow__inner_out3", "Square", {"x"}, {}, + &want) + ->set_device("/device:CPU:0"); + AddNode("out3", "Mul", {"x", "ArithmeticOptimizer/ConvertPow__inner_out3"}, + {}, &want) + ->set_device("/device:CPU:0"); AddNode("out1", "Identity", {"x"}, {}, &want); AddNode("out.5", "Sqrt", {"x"}, {}, &want); AddNode("out0", "Const", {AsControlDependency("x")}, {}, &want); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 2a593be9635..54ef5567197 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1090,7 +1090,9 @@ Status CreateConstantTensorAttrValue(DataType type, double value, SET_TENSOR_VAL_CASE(DT_QUINT8, int32, int); SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); default: - return errors::InvalidArgument("Unsupported type: ", type); + return errors::InvalidArgument( + "Unsupported type in CreateConstantTensorAttrValue: ", + DataTypeString(type)); } return Status::OK(); } @@ -1888,13 +1890,15 @@ Status ConstantFolding::ReplaceOperationWithConstant( double value, const GraphProperties& properties, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph) { const DataType dtype = GetDataTypeFromNodeOrProps(*node, properties); - if (dtype == DT_INVALID) { + AttrValue tensor_attr; + Status s = CreateConstantTensorAttrValue(dtype, value, shape, &tensor_attr); + if (!s.ok()) { + // Fail gracefully without mutating the graph. + VLOG(1) << "Failed to replace node " << node->name() << " of type " + << DataTypeString(dtype) << " with constant tensor of value " + << value; return Status::OK(); } - - AttrValue tensor_attr; - TF_RETURN_IF_ERROR( - CreateConstantTensorAttrValue(dtype, value, shape, &tensor_attr)); return ReplaceOperationWithConstantTensor(dtype, tensor_attr.mutable_tensor(), node, graph); } diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index 7a31127d8f9..8fffe36e84d 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -17,6 +17,7 @@ cc_library( ":filter_fusion", ":filter_with_random_uniform_fusion", ":hoist_random_uniform", + ":inject_prefetch", ":latency_all_edges", ":make_sloppy", ":map_and_batch_fusion", @@ -288,6 +289,26 @@ tf_cc_test( ] + tf_protos_all(), ) +cc_library( + name = "inject_prefetch", + srcs = ["inject_prefetch.cc"], + hdrs = ["inject_prefetch.h"], + deps = [ + ":graph_utils", + ":optimizer_base", + "//tensorflow/core/grappler:mutable_graph_view", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + "//tensorflow/core:lib_internal", + ] + tf_protos_all(), + alwayslink = 1, +) + cc_library( name = "latency_all_edges", srcs = ["latency_all_edges.cc"], diff --git a/tensorflow/core/grappler/optimizers/data/inject_prefetch.cc b/tensorflow/core/grappler/optimizers/data/inject_prefetch.cc new file mode 100644 index 00000000000..4050d099d74 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/inject_prefetch.cc @@ -0,0 +1,93 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/inject_prefetch.h" + +#include "tensorflow/core/framework/model.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/mutable_graph_view.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/protobuf.h" + +namespace tensorflow { +namespace grappler { +namespace { + +constexpr std::array kAsyncDatasetOps = { + "ParallelMapDataset", "ParallelInterleaveDatasetV2", "MapAndBatchDataset", + "ExperimentalMapAndBatchDataset"}; + +} // namespace + +Status InjectPrefetch::OptimizeAndCollectStats(Cluster* cluster, + const GrapplerItem& item, + GraphDef* output, + OptimizationStats* stats) { + *output = item.graph; + MutableGraphView graph(output); + + std::vector async_datasets; + for (const NodeDef& node : item.graph.node()) { + for (const auto& async_dataset_op : kAsyncDatasetOps) { + if (node.op() == async_dataset_op) { + async_datasets.push_back(&node); + break; + } + } + } + + if (async_datasets.empty()) return Status::OK(); + + // Add a const node with value kAutotune + NodeDef* autotune_value = + graph_utils::AddScalarConstNode(data::model::kAutotune, &graph); + + for (const NodeDef* async_dataset_node : async_datasets) { + NodeDef prefetch_node; + graph_utils::SetUniqueGraphNodeName( + strings::StrCat("autotune/prefetch_", async_dataset_node->name()), + graph.graph(), &prefetch_node); + prefetch_node.set_op("PrefetchDataset"); + // `input_dataset` input + *prefetch_node.mutable_input()->Add() = async_dataset_node->name(); + // `buffer_size` input + *prefetch_node.mutable_input()->Add() = autotune_value->name(); + + for (const auto& attr_name : {"output_types", "output_shapes"}) { + graph_utils::CopyAttribute(attr_name, *async_dataset_node, + &prefetch_node); + } + + auto* added_node = graph.AddNode(std::move(prefetch_node)); + TF_RETURN_IF_ERROR( + graph.UpdateFanouts(async_dataset_node->name(), added_node->name())); + } + return Status::OK(); +} + +void InjectPrefetch::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) { + // no-op +} + +REGISTER_GRAPH_OPTIMIZER_AS(InjectPrefetch, "inject_prefetch"); + +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/inject_prefetch.h b/tensorflow/core/grappler/optimizers/data/inject_prefetch.h new file mode 100644 index 00000000000..8f51dab4d9f --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/inject_prefetch.h @@ -0,0 +1,51 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_INJECT_PREFETCH_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_INJECT_PREFETCH_H_ + +#include "tensorflow/core/grappler/optimizers/data/optimizer_base.h" + +namespace tensorflow { +namespace grappler { + +// This optimization adds `Prefetch(AUTOTUNE)` after all asynchronous tf.data +// transformations. This reduces the problem of tuning buffer sizes of these +// asynchronous transformations to tuning buffer sizes of the prefetch +// transformation. +class InjectPrefetch : public TFDataOptimizerBase { + public: + InjectPrefetch() = default; + ~InjectPrefetch() override = default; + + string name() const override { return "autotune_buffers"; }; + + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + return Status::OK(); + } + + Status OptimizeAndCollectStats(Cluster* cluster, const GrapplerItem& item, + GraphDef* output, + OptimizationStats* stats) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override; +}; + +} // namespace grappler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_INJECT_PREFETCH_H_ diff --git a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc index 9e6b7f2bdef..b364296d9a9 100644 --- a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc @@ -36,7 +36,7 @@ using ConfigMap = std::map; // tf.data optimizations, in the order we want to perform them. -constexpr std::array kTFDataOptimizations = { +constexpr std::array kTFDataOptimizations = { "noop_elimination", "shuffle_and_repeat_fusion", "map_fusion", @@ -50,7 +50,8 @@ constexpr std::array kTFDataOptimizations = { "latency_all_edges", "make_sloppy", "parallel_batch", - "slack"}; + "slack", + "inject_prefetch"}; // Standard grappler optimizations, in the order we want to perform them. constexpr std::array kGrapplerOptimizations = { diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 6d04c203d74..59a550ee793 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -1190,19 +1190,20 @@ Status InlineFunctionCalls(const GrapplerItem& item, // `PartitionedCall` is a TF-2.0 function call mechanism for multi-device // functions: - // a) Function can be multi-device, and we can't override device placements. + // a) Function can be multi-device. // b) Automatic control dependencies tracking guarantees that all function // side-effectful nodes will have a path to one of the control outputs. // Control outputs and control edges between side-effectful (stateful) // nodes are used to explicitly mark the nodes that must execute, and to // define their execution order. if (n->IsPartitionedCall() || force_inline_as_multi_device) { - inline_options.override_device = false; - inline_options.initialize_empty_device = true; inline_options.output_control_src = OutputControlSource::kControlOutputs; + inline_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::MultiDevice(); } else { - inline_options.override_device = true; inline_options.output_control_src = OutputControlSource::kDataOutputs; + inline_options.inlined_function_body_placer = + InlinedFunctionBodyPlacer::SingleDevice(); } if (fetch_nodes.contains(n->name())) { diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 14553997b19..5849bae857f 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -520,14 +520,16 @@ TEST_F(FunctionOptimizerTest, InlineIndirectFunctionSimpleFunction) { {NDef("a", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice), NDef("b", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice), - // Function body nodes are not placed, however function input nodes - // must copy device assignment from input arguments. + // Function body nodes copy only job/task/replica parts of device + // assignment, and function input nodes must copy full device + // assignment from input arguments. Optimized graph is not fully + // placed. NDef(input_x, "Identity", {"a"}, {{"T", DT_FLOAT}}, kDevice), NDef(input_y, "Identity", {"b"}, {{"T", DT_FLOAT}}, kDevice), - // TODO(ezhulenev): Currently inlined function body "implicitly placed" - // with a 'inline_options.initialize_empty_device' flag. + // NOTE(ezhulenev): Currently multi-device function inlining placer + // strategy will override all empty devices with function call device. NDef("c/mul", "Mul", {input_x, input_y}, {{"T", DT_FLOAT}}, kDevice), - NDef(output_z, "Identity", {"c/mul"}, {{"T", DT_FLOAT}}, kDevice), + NDef(output_z, "Identity", {"c/mul"}, {{"T", DT_FLOAT}}), NDef("d", "Identity", {output_z}, {{"T", DT_FLOAT}}, kDevice)}, // Function library. @@ -780,7 +782,7 @@ TEST_F(FunctionOptimizerTest, InlineIndirectFunctionWithDevicePlacement) { NDef(input_x, "Identity", {"a"}, {{"T", DT_FLOAT}}, cpu0), NDef(input_y, "Identity", {"b"}, {{"T", DT_FLOAT}}, cpu1), NDef("c/mul", "Mul", {input_x, input_y}, {{"T", DT_FLOAT}}, cpu1), - NDef(output_z, "Identity", {"c/mul"}, {{"T", DT_FLOAT}}, cpu1), + NDef(output_z, "Identity", {"c/mul"}, {{"T", DT_FLOAT}}, cpu0), NDef("d", "Identity", {output_z}, {{"T", DT_FLOAT}}, cpu0)}, // Function library. diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_test.cc index 9636d417ec9..a48fde74c09 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_test.cc @@ -472,7 +472,6 @@ TEST_F(GenericLayoutOptimizerTest, DoNotPruneNonAddedCancellableTransposes) { utils::GraphView graph_view(&output, &status); TF_ASSERT_OK(status); - LOG(INFO) << graph_view.graph()->DebugString(); auto* input_node = graph_view.GetNode("input"); ASSERT_NE(input_node, nullptr); diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc index e9953f057aa..f2f784a4abb 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc @@ -473,7 +473,7 @@ Status Transposer::UpdateEdge( return Status::OK(); } -bool Transposer::IsFanoutPortDimsN(const utils::MutableNodeView& node, int port, +bool Transposer::IsFanoutPortRankN(const utils::MutableNodeView& node, int port, int n) const { const auto* output_shape_attr = node.GetAttr(kAttrOutputShape); if (output_shape_attr == nullptr || @@ -484,26 +484,68 @@ bool Transposer::IsFanoutPortDimsN(const utils::MutableNodeView& node, int port, return !shape.unknown_rank() && shape.dim_size() == n; } -bool Transposer::IsFanoutPortsDimsN(const utils::MutableNodeView& node, +bool Transposer::IsFanoutPortsRankN(const utils::MutableNodeView& node, absl::Span ports, int n) const { - for (auto port : ports) { - if (!IsFanoutPortDimsN(node, port, n)) { + for (const auto& port : ports) { + if (!IsFanoutPortRankN(node, port, n)) { return false; } } return true; } -bool Transposer::IsFaninPortDimsN(const utils::MutableNodeView& node, int port, +bool Transposer::IsFaninPortRankN(const utils::MutableNodeView& node, int port, int n) const { if (port < node.NumRegularFanins() && port >= 0) { const auto& regular_fanin = node.GetRegularFanin(port); - return IsFanoutPortDimsN(*regular_fanin.node_view(), regular_fanin.index(), + return IsFanoutPortRankN(*regular_fanin.node_view(), regular_fanin.index(), n); } return false; } +bool Transposer::IsFaninPortDimsNIfConst(const utils::MutableNodeView& node, + int port, + absl::Span dims) const { + if (port < node.NumRegularFanins() && port >= 0) { + const auto& regular_fanin = node.GetRegularFanin(port); + const auto* fanin_node_view = regular_fanin.node_view(); + if (!IsConstant(*fanin_node_view->node())) { + return true; + } + // If fanin is a Const, check tensor to see if dimensions match. + const auto* value_attr = fanin_node_view->GetAttr(kAttrValue); + if (value_attr == nullptr) { + return false; + } + Tensor tensor; + if (!tensor.FromProto(value_attr->tensor())) { + return false; + } + if (tensor.dims() != dims.size()) { + return false; + } + for (int i = 0; i < dims.size(); ++i) { + if (tensor.dim_size(i) != dims[i]) { + return false; + } + } + return true; + } + return false; +} + +bool Transposer::IsFaninPortsDimsNIfConst(const utils::MutableNodeView& node, + absl::Span ports, + absl::Span dims) const { + for (const auto& port : ports) { + if (!IsFaninPortDimsNIfConst(node, port, dims)) { + return false; + } + } + return true; +} + bool Transposer::CanProcessNode(const TransposeContext& context, const utils::MutableNodeView& node) const { return !context.nodes_to_preserve.contains(node.GetName()) && @@ -637,7 +679,7 @@ bool LayoutSensitiveOpTransposer::ShouldNotProcess( Status DefaultLayoutSensitiveOpTransposer::TransposeNode( TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsDefaultLayoutSensitiveOp(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4)) { + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4)) { return Status::OK(); } const NodeDef* node_def = node->node(); @@ -654,7 +696,7 @@ Status DefaultLayoutSensitiveOpTransposer::TransposeNode( Status BiasAddGradTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsBiasAddGrad(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFaninPortDimsN(*node, 0, 4)) { + if (!ShouldProcess(*context, *node) || !IsFaninPortRankN(*node, 0, 4)) { return Status::OK(); } TF_RETURN_IF_ERROR(UpdateNode(context, node)); @@ -668,7 +710,7 @@ Status BiasAddGradTransposer::TransposeNode(TransposeContext* context, Status Conv2DBackpropFilterTransposer::TransposeNode( TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsConv2DBackpropFilter(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || ShouldNotProcess(*context, *node)) { return Status::OK(); } @@ -684,7 +726,7 @@ Status Conv2DBackpropFilterTransposer::TransposeNode( Status Conv2DBackpropInputTransposer::TransposeNode( TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsConv2DBackpropInput(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || ShouldNotProcess(*context, *node)) { return Status::OK(); } @@ -708,7 +750,7 @@ bool FusedBatchNormGradTransposer::IsTraining( Status FusedBatchNormGradTransposer::TransposeNode( TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsFusedBatchNormGrad(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsTraining(*node) || ShouldNotProcess(*context, *node)) { return Status::OK(); } @@ -728,7 +770,7 @@ Status MaxPoolV2Transposer::TransposeNode(TransposeContext* context, const auto& data_fanin = node->GetRegularFanin(0); auto* data_fanin_node = data_fanin.node_view(); if (!ShouldProcess(*context, *node) || - !IsFanoutPortDimsN(*data_fanin_node, data_fanin.index(), 4)) { + !IsFanoutPortRankN(*data_fanin_node, data_fanin.index(), 4)) { return Status::OK(); } TF_RETURN_IF_ERROR(UpdateNode(context, node)); @@ -742,7 +784,7 @@ Status MaxPoolV2Transposer::TransposeNode(TransposeContext* context, Status MaxPoolGradTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsMaxPoolGrad(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4)) { + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4)) { return Status::OK(); } TF_RETURN_IF_ERROR(UpdateNode(context, node)); @@ -755,7 +797,7 @@ Status MaxPoolGradTransposer::TransposeNode(TransposeContext* context, Status MaxPoolGradV2Transposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsMaxPoolGradV2(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4)) { + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4)) { return Status::OK(); } TF_RETURN_IF_ERROR(UpdateNode(context, node)); @@ -862,7 +904,7 @@ std::vector LayoutAgnosticOpTransposer::GetVariadic4DFaninPorts( const auto& regular_fanin = node.GetRegularFanin(i); auto* regular_fanin_node = regular_fanin.node_view(); int regular_fanin_port = regular_fanin.index(); - if (IsFanoutPortDimsN(*regular_fanin_node, regular_fanin_port, 4) && + if (IsFanoutPortRankN(*regular_fanin_node, regular_fanin_port, 4) && ((IsAfterDstToSrcTransform(context, *regular_fanin_node) && IsLayoutAgnosticOp(*regular_fanin_node->node())) || IsLayoutOptimizerAddedDstToSrcTranspose(context, @@ -876,7 +918,7 @@ std::vector LayoutAgnosticOpTransposer::GetVariadic4DFaninPorts( Status DefaultLayoutAgnosticOpTransposer::TransposeNode( TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsDefaultLayoutAgnosticOp(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -888,7 +930,7 @@ Status DefaultLayoutAgnosticOpTransposer::TransposeNode( Status AddNTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsAddN(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -900,7 +942,7 @@ Status AddNTransposer::TransposeNode(TransposeContext* context, bool BinaryOpTransposer::IsNDOperateWithMD(const utils::MutableNodeView& node, int n, int m) { - return IsFaninPortDimsN(node, 0, n) && IsFaninPortDimsN(node, 1, m); + return IsFaninPortRankN(node, 0, n) && IsFaninPortRankN(node, 1, m); } bool BinaryOpTransposer::IsFaninShapeSupported( @@ -913,10 +955,10 @@ bool BinaryOpTransposer::IsFaninShapeSupported( std::vector BinaryOpTransposer::Get4DDataFaninPorts( const utils::MutableNodeView& node) { std::vector values; - if (IsFaninPortDimsN(node, 0, 4)) { + if (IsFaninPortRankN(node, 0, 4)) { values.push_back(0); } - if (IsFaninPortDimsN(node, 1, 4)) { + if (IsFaninPortRankN(node, 1, 4)) { values.push_back(1); } return values; @@ -1039,7 +1081,7 @@ Status BinaryOpTransposer::TransposeNode(TransposeContext* context, Status ConcatOpTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsConcat(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1061,7 +1103,8 @@ Status ConcatOpTransposer::TransposeNode(TransposeContext* context, Status FillOpTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsFill(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || + !IsFaninPortDimsNIfConst(*node, 0, {4}) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1089,7 +1132,7 @@ bool MergeTransposer::IsEveryFaninAfterDstToSrcTransform( const TransposeContext& context, const utils::MutableNodeView& node) const { for (const auto& regular_fanin : node.GetRegularFanins()) { auto* regular_fanin_node = regular_fanin.node_view(); - if (IsFanoutPortDimsN(*regular_fanin_node, regular_fanin.index(), 4) && + if (IsFanoutPortRankN(*regular_fanin_node, regular_fanin.index(), 4) && ((IsAfterDstToSrcTransform(context, *regular_fanin_node) && IsLayoutAgnosticOp(*regular_fanin_node->node())) || IsLayoutOptimizerAddedDstToSrcTranspose(context, @@ -1104,7 +1147,7 @@ bool MergeTransposer::IsEveryFaninAfterDstToSrcTransform( Status MergeTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsMerge(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsEveryFaninAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1118,7 +1161,8 @@ Status PadTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsMirrorPad(*node->node()) || IsMirrorPadGrad(*node->node()) || IsPad(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || + !IsFaninPortDimsNIfConst(*node, 1, {4, 2}) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1175,7 +1219,7 @@ bool ReduceTransposer::IsReduceAxisSupported( Status ReduceTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsReduceOp(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFaninPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFaninPortRankN(*node, 0, 4) || !IsReduceAxisSupported(*context, *node) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); @@ -1193,7 +1237,7 @@ Status ReduceTransposer::TransposeNode(TransposeContext* context, Status ReverseV2Transposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsReverseV2(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1206,15 +1250,15 @@ Status ReverseV2Transposer::TransposeNode(TransposeContext* context, bool SelectTransposer::IsFaninScalarVector4D( const utils::MutableNodeView& fanin, int port) { - return IsFanoutPortDimsN(fanin, port, 0) || - IsFanoutPortDimsN(fanin, port, 1) || IsFanoutPortDimsN(fanin, port, 4); + return IsFanoutPortRankN(fanin, port, 0) || + IsFanoutPortRankN(fanin, port, 1) || IsFanoutPortRankN(fanin, port, 4); } std::vector SelectTransposer::GetFaninPorts( const utils::MutableNodeView& fanin, int port) { // Input 0 could be a scalar, a vector with size matching the first dimension // of input 1 and 2, or must have the same shape as input 1 and 2. - if (IsFanoutPortDimsN(fanin, port, 4)) { + if (IsFanoutPortRankN(fanin, port, 4)) { return {0, 1, 2}; } return {1, 2}; @@ -1225,7 +1269,7 @@ Status SelectTransposer::TransposeNode(TransposeContext* context, DCHECK(IsSelect(*node->node())); const auto& regular_fanin_0 = node->GetRegularFanin(0); auto* regular_fanin_0_node = regular_fanin_0.node_view(); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsFaninScalarVector4D(*regular_fanin_0_node, regular_fanin_0.index()) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); @@ -1240,7 +1284,7 @@ Status SelectTransposer::TransposeNode(TransposeContext* context, Status ShapeTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsShape(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFaninPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFaninPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1267,7 +1311,8 @@ Status ShapeNTransposer::TransposeNode(TransposeContext* context, Status SliceTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsSlice(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || + !IsFaninPortsDimsNIfConst(*node, {1, 2}, {4}) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1282,7 +1327,7 @@ Status SplitTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsSplit(*node->node())); const auto ports = GetDataFanoutPorts(*node); - if (!ShouldProcess(*context, *node) || !IsFanoutPortsDimsN(*node, ports, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortsRankN(*node, ports, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1298,7 +1343,7 @@ Status SplitVTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsSplitV(*node->node())); const auto ports = GetDataFanoutPorts(*node); - if (!ShouldProcess(*context, *node) || !IsFanoutPortsDimsN(*node, ports, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortsRankN(*node, ports, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1356,8 +1401,8 @@ bool SqueezeTransposer::IsAlongNHW(const utils::MutableNodeView& node) const { bool SqueezeTransposer::IsDimsSupported( const utils::MutableNodeView& node) const { - return (IsFanoutPortDimsN(node, 0, 2) && IsAlongHW(node)) || - (IsFanoutPortDimsN(node, 0, 1) && IsAlongNHW(node)); + return (IsFanoutPortRankN(node, 0, 2) && IsAlongHW(node)) || + (IsFanoutPortRankN(node, 0, 1) && IsAlongNHW(node)); } Status SqueezeTransposer::UpdateSqueezeDims(TransposeContext* context, @@ -1444,7 +1489,8 @@ Status StridedSliceTransposer::PermuteMask(TransposeContext* context, Status StridedSliceTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsStridedSlice(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || + !IsFaninPortsDimsNIfConst(*node, {1, 2, 3}, {4}) || !HasOnlyBeginEndMask(*node) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); @@ -1461,7 +1507,7 @@ Status StridedSliceTransposer::TransposeNode(TransposeContext* context, Status SwitchTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsSwitch(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFaninPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFaninPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1474,7 +1520,7 @@ Status SwitchTransposer::TransposeNode(TransposeContext* context, Status TernaryOpTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsTernaryOp(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1487,7 +1533,8 @@ Status TernaryOpTransposer::TransposeNode(TransposeContext* context, Status TileTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsTile(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || + !IsFaninPortDimsNIfConst(*node, 1, {4}) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1501,7 +1548,7 @@ Status TileTransposer::TransposeNode(TransposeContext* context, Status UnaryGradTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsUnaryGrad(*node->node())); - if (!ShouldProcess(*context, *node) || !IsFanoutPortDimsN(*node, 0, 4) || + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 4) || !IsAfterDstToSrcTransform(*context, *node)) { return Status::OK(); } @@ -1514,15 +1561,12 @@ Status UnaryGradTransposer::TransposeNode(TransposeContext* context, // Utils. bool IsDefaultLayoutSensitiveOp(const NodeDef& node) { - std::set default_layout_sensitive_ops = {"AvgPool", - "BiasAdd", - "Conv2D", - "DepthToSpace", - "FusedBatchNorm", - "FusedBatchNormV2", - "FusedConv2DBiasActivation", - "MaxPool", - "SpaceToDepth"}; + std::set default_layout_sensitive_ops = { + "AvgPool", "BiasAdd", + "Conv2D", "DepthToSpace", + "FusedBatchNorm", "FusedBatchNormV2", + "FusedBatchNormV3", "FusedConv2DBiasActivation", + "MaxPool", "SpaceToDepth"}; return default_layout_sensitive_ops.find(node.op()) != default_layout_sensitive_ops.end(); } diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h index ac5c7dc100f..2fb98102683 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h @@ -139,12 +139,21 @@ class Transposer { utils::MutationNewNode* added_node); protected: - bool IsFanoutPortDimsN(const utils::MutableNodeView& node, int port, + bool IsFanoutPortRankN(const utils::MutableNodeView& node, int port, int n) const; - bool IsFanoutPortsDimsN(const utils::MutableNodeView& node, + bool IsFanoutPortsRankN(const utils::MutableNodeView& node, absl::Span ports, int n) const; - bool IsFaninPortDimsN(const utils::MutableNodeView& node, int port, + bool IsFaninPortRankN(const utils::MutableNodeView& node, int port, int n) const; + + // Checks if fanin at specified port(s) has dimensions `dims` iff fanin is a + // Const. If fanin is not a Const, no dimensions will be checked and this will + // return true. + bool IsFaninPortDimsNIfConst(const utils::MutableNodeView& node, int port, + absl::Span dims) const; + bool IsFaninPortsDimsNIfConst(const utils::MutableNodeView& node, + absl::Span ports, + absl::Span dims) const; bool CanProcessNode(const TransposeContext& context, const utils::MutableNodeView& node) const; string GetDeviceName(const VirtualPlacer* virtual_placer, diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc index 6c811a1552f..083a92614dd 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc @@ -3219,9 +3219,9 @@ TEST_F(TransposerTest, StridedSliceTransposer) { auto attrs = ops::StridedSlice::Attrs().BeginMask(0xB).EndMask(0x7); - auto begin = ops::Const(scope.WithOpName("begin"), {2, 0, 2}, {3}); - auto end = ops::Const(scope.WithOpName("end"), {34, 4, 3}, {3}); - auto strides = ops::Const(scope.WithOpName("strides"), {7, 2, 1}, {3}); + auto begin = ops::Const(scope.WithOpName("begin"), {2, 0, 2, 1}, {4}); + auto end = ops::Const(scope.WithOpName("end"), {34, 4, 3, 1}, {4}); + auto strides = ops::Const(scope.WithOpName("strides"), {7, 2, 1, 1}, {4}); auto strided_slice_op = ops::StridedSlice( scope.WithOpName("stridedslice").WithDevice("/device:GPU:0"), conv2d, @@ -3309,9 +3309,9 @@ TEST_F(TransposerTest, StridedSliceTransposerEllipsisMaskPresent) { auto attrs = ops::StridedSlice::Attrs().BeginMask(0xB).EndMask(0x7).EllipsisMask(0x2); - auto begin = ops::Const(scope.WithOpName("begin"), {2, 0, 2}, {3}); - auto end = ops::Const(scope.WithOpName("end"), {34, 4, 3}, {3}); - auto strides = ops::Const(scope.WithOpName("strides"), {7, 2, 1}, {3}); + auto begin = ops::Const(scope.WithOpName("begin"), {2, 0, 2, 1}, {4}); + auto end = ops::Const(scope.WithOpName("end"), {34, 4, 3, 1}, {4}); + auto strides = ops::Const(scope.WithOpName("strides"), {7, 2, 1, 1}, {4}); auto strided_slice_op = ops::StridedSlice( scope.WithOpName("stridedslice").WithDevice("/device:GPU:0"), conv2d, @@ -3350,6 +3350,90 @@ TEST_F(TransposerTest, StridedSliceTransposerEllipsisMaskPresent) { updated_stridedslice_node->GetName(), 0); } +TEST_F(TransposerTest, StridedSliceTransposerConstFaninBadRank) { +#if !GOOGLE_CUDA + GTEST_SKIP() << "CUDA is not enabled"; +#endif // !GOOGLE_CUDA + GrapplerItem item; + Scope scope = Scope::NewRootScope(); + + auto input = + ops::RandomUniform(scope.WithOpName("input"), + {kBatchSize, kHeight, kWidth, kDepthIn}, DT_FLOAT); + auto filter = + ops::RandomUniform(scope.WithOpName("filter"), + {kHeight, kWidth, kDepthIn, kDepthOut}, DT_FLOAT); + Output conv2d = ops::Conv2D( + scope.WithOpName("conv2d").WithDevice("/device:GPU:0"), input, filter, + {1, 2, 4, 1}, "SAME", ops::Conv2D::DataFormat(kSrcFormat)); + + auto attrs = ops::StridedSlice::Attrs().BeginMask(0xB).EndMask(0x7); + + auto begin = ops::Const(scope.WithOpName("begin"), {2, 0, 2}, {3}); + auto end = ops::Const(scope.WithOpName("end"), {34, 4, 3}, {3}); + auto strides = ops::Const(scope.WithOpName("strides"), {7, 2, 1}, {3}); + + auto strided_slice_op = ops::StridedSlice( + scope.WithOpName("stridedslice").WithDevice("/device:GPU:0"), conv2d, + begin, end, strides, attrs); + auto z = ops::Identity(scope.WithOpName("z"), strided_slice_op); + TF_ASSERT_OK(scope.ToGraphDef(&item.graph)); + + TransposeContext context; + TF_ASSERT_OK(TransposeContext::InitializeTransposeContext( + item, virtual_cluster_.get(), kSrcFormat, kDstFormat, kGPU, &context)); + + DefaultLayoutSensitiveOpTransposer conv2d_transposer; + auto* c2d = context.graph_view->GetNode("conv2d"); + ASSERT_NE(c2d, nullptr); + TF_ASSERT_OK(conv2d_transposer.TransposeNode(&context, c2d)); + + StridedSliceTransposer stridedslice_transposer; + auto* stridedslice = context.graph_view->GetNode("stridedslice"); + ASSERT_NE(stridedslice, nullptr); + TF_ASSERT_OK(stridedslice_transposer.TransposeNode(&context, stridedslice)); + + auto* input_transpose_node = context.graph_view->GetNode( + "stridedslice-0-TransposeNHWCToNCHW-LayoutOptimizer"); + ASSERT_EQ(input_transpose_node, nullptr); + + auto* begin_node = context.graph_view->GetNode( + "stridedslice-1-DataFormatVecPermuteNHWCToNCHW-LayoutOptimizer"); + ASSERT_EQ(begin_node, nullptr); + auto* end_node = context.graph_view->GetNode( + "stridedslice-2-DataFormatVecPermuteNHWCToNCHW-LayoutOptimizer"); + ASSERT_EQ(end_node, nullptr); + auto* strides_node = context.graph_view->GetNode( + "stridedslice-3-DataFormatVecPermuteNHWCToNCHW-LayoutOptimizer"); + ASSERT_EQ(strides_node, nullptr); + + auto* updated_stridedslice_node = context.graph_view->GetNode("stridedslice"); + ASSERT_NE(updated_stridedslice_node, nullptr); + ASSERT_EQ(updated_stridedslice_node->NumRegularFanins(), 4); + VerifyRegularFaninMatch(updated_stridedslice_node, 0, + "conv2d-0-0-TransposeNCHWToNHWC-LayoutOptimizer", 0); + VerifyRegularFaninMatch(updated_stridedslice_node, 1, "begin", 0); + VerifyRegularFaninMatch(updated_stridedslice_node, 2, "end", 0); + VerifyRegularFaninMatch(updated_stridedslice_node, 3, "strides", 0); + const auto* begin_mask_attr = + updated_stridedslice_node->GetAttr("begin_mask"); + ASSERT_NE(begin_mask_attr, nullptr); + EXPECT_EQ(begin_mask_attr->i(), 0xB); + const auto* end_mask_attr = updated_stridedslice_node->GetAttr("end_mask"); + ASSERT_NE(end_mask_attr, nullptr); + EXPECT_EQ(end_mask_attr->i(), 0x7); + + auto* output_transpose_node = context.graph_view->GetNode( + "stridedslice-0-0-TransposeNCHWToNHWC-LayoutOptimizer"); + ASSERT_EQ(output_transpose_node, nullptr); + + auto* z_output_node = context.graph_view->GetNode("z"); + ASSERT_NE(z_output_node, nullptr); + ASSERT_EQ(z_output_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(z_output_node, 0, + updated_stridedslice_node->GetName(), 0); +} + TEST_F(TransposerTest, ReduceTransposerKeepDims) { #if !GOOGLE_CUDA GTEST_SKIP() << "CUDA is not enabled"; diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc index 13fb883217a..29bc154eb0e 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc @@ -40,6 +40,9 @@ namespace tensorflow { namespace grappler { namespace { + +const char kScopedAllocatorAttrName[] = "_scoped_allocator"; + // Node names often have some kind of name_scope prefix, with slashes, // and a _nn numeric suffix. Returns true if the main part of the node_name // matches op_name, i.e. it looks from the name like this node is @@ -161,27 +164,51 @@ Status RemoveEdge(const string& input_edge_name, const string& from_node_name, return Status::OK(); } -// If `input` to `op` is an Exit node, rewrite the graph to insert an identity -// op between `input` and `op`. Return the input to op in `new_input`. -// `edge_name` gives the name of the edge from `input` to `op`, and -// `edge_position` is the output position of this edge on `input`. +// In certain cases, we would like to insert an identity op between `input` and +// `op` to ensure correctness. We currently do this in 2 cases: when `input` is +// Exit node, or when `input` is already marked for allocation with another +// scoped allocator op. // -// (loop) is rewritten to (loop) +// If `input` is an Exit node, we add an identity to avoid the case when Exit +// has inputs from different frames. +// +// If `input` has kScopedAllocatorAttrName attribute, this means that it was +// previously marked for allocation with a different scope id. Since there can +// be only one scope id per output, we insert an identity between the input and +// op. This will ensure that the identity becomes the new input to op, and this +// identity can be marked with a new scope id different from `input`. +// +// If the graph is rewritten, this function will perform the following change: +// +// input input // | | -// Exit Exit -// | | -// (collective op) Identity +// op Identity // | -// (collective op) -// This avoids the case when Exit has inputs from different frames. -Status MaybeRewriteExitNode(ScopedAllocatorOptimizer* sa_opti, - int64 invocation_count, GraphDef* graph, - NodeMap* node_map, const DataType& dtype, - NodeDef* input, const string& edge_name, - int edge_position, NodeDef* op, - NodeDef** new_input) { - if (!IsExit(*input)) { +// op +// +// This function returns the input to op in `new_input`, and the output index +// from input to op in `new_output_index`. +// `edge_name` gives the name of the edge from `input` to `op`, and +// `output_index` is the output index of this edge on `input`. +Status MaybeRewriteInput(ScopedAllocatorOptimizer* sa_opti, + int64 invocation_count, GraphDef* graph, + NodeMap* node_map, const DataType& dtype, + NodeDef* input, const string& edge_name, + int output_index, NodeDef* op, NodeDef** new_input, + int* new_output_index) { + bool rewrite = false; + if (IsExit(*input)) { + rewrite = true; + } else { + AttrSlice input_attrs = AttrSlice(*input); + std::vector scopes; + Status sa_status = + GetNodeAttr(input_attrs, kScopedAllocatorAttrName, &scopes); + rewrite = sa_status.ok(); + } + if (!rewrite) { *new_input = input; + *new_output_index = output_index; return Status::OK(); } @@ -197,15 +224,21 @@ Status MaybeRewriteExitNode(ScopedAllocatorOptimizer* sa_opti, NodeDefBuilder identity_builder(identity_name, "Identity"); identity_builder.Device(op->device()); identity_builder.Attr("T", dtype); - // Connect `input` to `identity`. - identity_builder.Input(NodeDefBuilder::NodeOut(input->name(), 0, dtype)); + // Connect output at `output_index` from `input` to `identity`. + identity_builder.Input( + NodeDefBuilder::NodeOut(input->name(), output_index, dtype)); LOG_WARNING_AND_RETURN_IF_ERROR(identity_builder.Finalize(identity)); node_map->AddNode(identity_name, identity); node_map->AddOutput(input->name(), identity_name); - // Connect `identity` to `op`. + // Connect `identity` to `op`. Both output index at `identity` and input + // index at `op` are 0. node_map->AddOutput(identity_name, op->name()); - *op->mutable_input(edge_position) = strings::StrCat(identity_name, ":", 0); + *op->mutable_input(0) = strings::StrCat(identity_name, ":", 0); *new_input = identity; + *new_output_index = 0; + VLOG(1) << "Rewrite input " << edge_name << " op " << op->name() + << " old output index " << output_index << " with identity " + << identity_name << " new output index 0"; return Status::OK(); } @@ -219,7 +252,7 @@ Status GetInputs(ScopedAllocatorOptimizer* sa_opti, int64 invocation_count, VLOG(1) << "Getinputs"; for (NodeDef* n : ops) { NodeDef* inode = nullptr; - int position = 0; + int output_index = 0; VLOG(2) << "for node " << n->name(); for (const auto& input_name : n->input()) { if (!IsControlInput(input_name)) { @@ -227,16 +260,18 @@ Status GetInputs(ScopedAllocatorOptimizer* sa_opti, int64 invocation_count, return errors::Internal("Found more than one input for node ", n->name()); } - ParseNodeName(input_name, &position); + ParseNodeName(input_name, &output_index); inode = node_map->GetNode(input_name); if (inode == nullptr) { return errors::Internal("Did not find node ", input_name); } - VLOG(2) << "inode " << inode->DebugString(); - LOG_WARNING_AND_RETURN_IF_ERROR(MaybeRewriteExitNode( + VLOG(2) << "inode " << inode->DebugString() << " output_index " + << output_index; + LOG_WARNING_AND_RETURN_IF_ERROR(MaybeRewriteInput( sa_opti, invocation_count, graph, node_map, dtype, inode, - input_name, position, n, &inode)); - VLOG(2) << "inode after rewrite " << inode->DebugString(); + input_name, output_index, n, &inode, &output_index)); + VLOG(2) << "inode after rewrite " << inode->DebugString() + << " output_index " << output_index; } } AttrSlice inode_attrs = AttrSlice(*inode); @@ -247,7 +282,7 @@ Status GetInputs(ScopedAllocatorOptimizer* sa_opti, int64 invocation_count, return errors::Internal("ScopedAllocatorOptimizer expected input type ", dtype, " but found ", inode_dtype); } - inputs->emplace_back(inode, position, n); + inputs->emplace_back(inode, output_index, n); } return Status::OK(); } @@ -274,19 +309,24 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { ~UnaryElementwiseRewriter() override {} // Return non-OK if any input is already committed to a ScopedAllocator. + // + // We insert an identity to ensure that inputs are not committed to different + // scope ids in `MaybeRewriteInput`, so this function is basically a sanity + // check. Status CheckExistingScopedAllocator(const std::vector& inputs) { for (const InputDesc& nd : inputs) { VLOG(2) << "get attrs for " << nd.from_node_def->name(); AttrSlice n_attrs = AttrSlice(*nd.from_node_def); - int sa_id; - Status ss = GetNodeAttr(n_attrs, "sa_id", &sa_id); + std::vector scope_ids; + Status ss = GetNodeAttr(n_attrs, kScopedAllocatorAttrName, &scope_ids); if (ss.ok()) { - LOG(INFO) << "Abandoning PARewriter because input " - << nd.from_node_def->name() << " is already assigned " - << "to ScopedAllocator " << sa_id; + LOG(INFO) << "Abandoning ScopedAllocatorOptimizer because input " + << nd.from_node_def->name() << " output " << scope_ids[0] + << " is already assigned to scope_id " << scope_ids[1]; return errors::Internal( - "Abandoning PARewriter because input ", nd.from_node_def->name(), - " is already assigned to ScopedAllocator ", sa_id); + "Abandoning ScopedAllocatorOptimizer because input ", + nd.from_node_def->name(), " output ", scope_ids[0], " is already ", + "assigned to scope_id ", scope_ids[1]); } } return Status::OK(); @@ -397,7 +437,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { nd.from_node_def->add_input(strings::StrCat("^", sa_name)); // This attribute says: allocate output_slot from // ScopedAllocator instance sa_id + 1 + i. - ScopedAllocatorOptimizer::ExtendNodeAttr("_scoped_allocator", + ScopedAllocatorOptimizer::ExtendNodeAttr(kScopedAllocatorAttrName, {nd.output_slot, sa_id + 1 + i}, nd.from_node_def); node_map->AddOutput(sa_name, nd.from_node_def->name()); @@ -535,7 +575,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { // maintained by NodeMap in the loop. std::set output_nodes = node_map->GetOutputs(old_op->name()); VLOG(3) << "old_op " << old_op->name() << " had " << output_nodes.size() - << " outputs. Moving them to the PASplit node."; + << " outputs. Moving them to the ScopedAllocatorSplit node."; if (VLOG_IS_ON(2)) { for (NodeDef* n : output_nodes) { VLOG(3) << " output: " << n->name(); @@ -908,7 +948,8 @@ Status ScopedAllocatorOptimizer::ProcessGraphDef( VLOG(1) << "Processing " << op_name << " set size " << it.second.size(); Rewriter* rewriter = GetRewriter(op_name); if (!rewriter) { - LOG(ERROR) << "Failed to find PARewriter for op_name " << op_name; + LOG(ERROR) << "Failed to find Rewriter in ScopedAllocatorOptimizer " + << "for op_name " << op_name; continue; } rewriter->SetGraphProperties(graph_properties); diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc index a6b9257cd25..5fd8a12e7e1 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc @@ -50,7 +50,7 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { std::vector EvaluateNodes(const GraphDef& graph, const std::vector& fetch) { SessionOptions options; - std::unique_ptr session(NewSession(options)); + std::unique_ptr session(NewSession(options)); TF_CHECK_OK(session->Create(graph)); RunOptions run_options; std::vector output_tensors; @@ -78,7 +78,7 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { r1 r2 */ void BuildAbsGraph(GraphDef* graph_def, bool forward) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Scope s = Scope::NewRootScope(); s = s.WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"); Output a = @@ -104,6 +104,55 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { TF_CHECK_OK(s.ToGraphDef(graph_def)); } + // Constructs the following graph. + // + // We have 2 different name scopes in this graph. s3, a3, a4, r3, and r4 are + // all under "sub" scope. All other nodes are in the root scope. + // + // The intention is to test that ScopedAllocatorOptimizer works well with a + // graph that has multiple name scopes. In particular, it should work when a + // node (in this case s2) is an input to two nodes in different name scopes + // (a2 and sub/a3) which may be scope allocated. + /* + a b c a b + \ / \ / \ / + s1 s2------ sub/s3 + | | | | + a1 a2 sub/a4 sub/a3 + | | | | + r1 r2 sub/r4 sub/r3 + */ + void BuildGraphWithMultipleScopes(GraphDef* graph_def) { + Scope root_scope = Scope::NewRootScope(); + root_scope = + root_scope.WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"); + + Output a = ops::Const(root_scope.WithOpName("a"), + {1.0, 0.0, 0.0, -1.0}, {2, 2}); + Output b = ops::Const(root_scope.WithOpName("b"), + {1.0, -2.0, 3.0, 4.0}, {2, 2}); + Output c = ops::Const(root_scope.WithOpName("c"), + {-5.0, -2.0, 0.0, -2.0}, {2, 2}); + + // Root scope ops. + Output s1 = ops::Add(root_scope.WithOpName("s1"), a, b); + Output s2 = ops::Add(root_scope.WithOpName("s2"), b, c); + Output a1 = ops::Abs(root_scope.WithOpName("a1"), s1); + Output a2 = ops::Abs(root_scope.WithOpName("a2"), s2); + Output r1 = ops::Reshape(root_scope.WithOpName("r1"), a1, {1, 4}); + Output r2 = ops::Reshape(root_scope.WithOpName("r2"), a2, {4, 1}); + + // Sub scope ops. + Scope sub_scope = root_scope.NewSubScope("sub"); + Output s3 = ops::Add(sub_scope.WithOpName("s3"), a, b); + Output a3 = ops::Abs(sub_scope.WithOpName("a3"), s3); + Output a4 = ops::Abs(sub_scope.WithOpName("a4"), s2); + Output r3 = ops::Reshape(sub_scope.WithOpName("r3"), a3, {1, 4}); + Output r4 = ops::Reshape(sub_scope.WithOpName("r4"), a4, {4, 1}); + + TF_CHECK_OK(root_scope.ToGraphDef(graph_def)); + } + void SetShapes(GraphDef* graph_def) { TensorShapeProto shape_proto; shape_proto.add_dim()->set_size(2); @@ -116,12 +165,11 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { } } - // Constructs a graph by calling BuildAbsGraph, then executes it and returns - // r1, r2, and scoped_allocator_1_2_Abs:0. - void BuildAndExecuteAbsGraph(bool forward, std::vector* outputs) { - GrapplerItem item; - BuildAbsGraph(&item.graph, forward); - + // Invokes ScopedAllocatorOptimizer on `graph_def`, then executes it and + // returns the outputs specifed by `output_names` in `outputs`. + void ExecuteGraph(const GraphDef& graph_def, + const std::vector& output_names, + std::vector* outputs) { // Turn off all optimization except the ScopedAllocatorOptimizer // to avoid anything that would alter the expected graph input/output, // e.g. by constant folding away all calculations. @@ -136,36 +184,22 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { rwcfg->clear_optimizers(); (*rwcfg->add_optimizers()) = "scoped_allocator"; rwcfg->mutable_scoped_allocator_opts()->add_enable_op("Abs"); - std::unique_ptr session(CreateSession(item.graph, config)); + std::unique_ptr session(CreateSession(graph_def, config)); - // Request two targets: one fetch output and one non-fetched output. - std::vector output_names = {"r1:0", "r2:0"}; - if (!forward) { - output_names.push_back("scoped_allocator_1_2_Abs:0"); - } std::vector> inputs; std::vector target_nodes = {}; Status s = session->Run(inputs, output_names, target_nodes, outputs); TF_ASSERT_OK(s); - ASSERT_EQ(outputs->size(), forward ? 2 : 3); + ASSERT_EQ(outputs->size(), output_names.size()); } - // Validates that output[0] matches expected0 and outputs[1] matches - // expected1. + // Validates that outputs match expected. void ValidateValues(const std::vector& outputs, - const std::vector& expected0, - const std::vector& expected1) { - for (int oi = 0; oi < outputs.size(); ++oi) { - if (oi == 0) { - ASSERT_EQ(expected0.size(), outputs[oi].NumElements()); - for (int i = 0; i < expected0.size(); ++i) { - EXPECT_EQ(expected0[i], outputs[oi].flat()(i)); - } - } else if (oi == 1) { - ASSERT_EQ(expected1.size(), outputs[oi].NumElements()); - for (int i = 0; i < expected1.size(); ++i) { - EXPECT_EQ(expected1[i], outputs[oi].flat()(i)); - } + const std::vector>& expected) { + for (int i = 0; i < expected.size(); ++i) { + EXPECT_EQ(expected[i].size(), outputs[i].NumElements()); + for (int j = 0; j < expected[i].size(); ++j) { + EXPECT_EQ(expected[i][j], outputs[i].flat()(j)); } } } @@ -230,13 +264,29 @@ TEST_F(ScopedAllocatorOptimizerTest, UnaryRewriteOnly) { TEST_F(ScopedAllocatorOptimizerTest, UnaryExecute) { // Builds the same graph as UnaryRewriteOnly but also executes it and // validates the output. + GraphDef graph_def; + BuildAbsGraph(&graph_def, /*forward=*/false); + SetShapes(&graph_def); std::vector outputs; - BuildAndExecuteAbsGraph(false, &outputs); + ExecuteGraph(graph_def, + /*output_names=*/{"r1:0", "r2:0", "scoped_allocator_1_2_Abs:0"}, + &outputs); // a + b == 2, -2, 3, 3 // b + c == -4, -4, 3, 2 - std::vector expected_r1({2, 2, 3, 3}); - std::vector expected_r2({4, 4, 3, 2}); - ValidateValues(outputs, expected_r1, expected_r2); + ValidateValues(outputs, /*expected=*/{{2, 2, 3, 3}, {4, 4, 3, 2}}); +} + +TEST_F(ScopedAllocatorOptimizerTest, MultipleScopes) { + GraphDef graph_def; + BuildGraphWithMultipleScopes(&graph_def); + SetShapes(&graph_def); + std::vector outputs; + ExecuteGraph(graph_def, + /*output_names=*/{"r1:0", "r2:0", "sub/r3:0", "sub/r4:0"}, + &outputs); + ValidateValues( + outputs, + /*expected=*/{{2, 2, 3, 3}, {4, 4, 3, 2}, {2, 2, 3, 3}, {4, 4, 3, 2}}); } // Tests static ScopedAllocatorOptimizer::ExtendNodeAttr. @@ -264,13 +314,14 @@ TEST_F(ScopedAllocatorOptimizerTest, Extend) { TEST_F(ScopedAllocatorOptimizerTest, ForwardInputToOutput) { // Test that kernels that forward the input to output using `set_output` work // well with scoped allocator optimization. + GraphDef graph_def; + BuildAbsGraph(&graph_def, /*forward=*/true); + SetShapes(&graph_def); std::vector outputs; - BuildAndExecuteAbsGraph(true, &outputs); + ExecuteGraph(graph_def, /*output_names=*/{"r1:0", "r2:0"}, &outputs); // a + b == 2, -2, 3, 3 // b + c == -4, -4, 3, 2 - std::vector expected_r1({2, 2, 3, 3}); - std::vector expected_r2({4, 4, 3, 2}); - ValidateValues(outputs, expected_r1, expected_r2); + ValidateValues(outputs, /*expected=*/{{2, 2, 3, 3}, {4, 4, 3, 2}}); } } // namespace diff --git a/tensorflow/core/grappler/utils/graph_view.cc b/tensorflow/core/grappler/utils/graph_view.cc index 0f210529840..0dccee582ee 100644 --- a/tensorflow/core/grappler/utils/graph_view.cc +++ b/tensorflow/core/grappler/utils/graph_view.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/core/grappler/utils/graph_view.h" +#include + #include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" @@ -22,6 +24,7 @@ limitations under the License. #include "tensorflow/core/graph/tensor_id.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/graph_view_internal.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -264,21 +267,24 @@ void Mutation::AddMutation( node->update_index_ = updated_nodes_.size(); updated_nodes_.emplace_back(graph_view_, node->node_index_); mutate_fn(&updated_nodes_.back()); - } else { + } else if (!removed_nodes_[node->node_index_]) { auto& diff = updated_nodes_[node->update_index_]; - if (!diff.removed) { - mutate_fn(&diff); - } + mutate_fn(&diff); } } void Mutation::RemoveNode(MutableNodeView* node) { - AddMutation(node, [](MutableNodeViewDiff* diff) { - // Clear existing MutableNodeViewDiff as when node is removed no change to - // its internal state matter. - internal::Reset(diff); - internal::SetRemoved(diff, true); - }); + auto& update_index = node->update_index_; + if (update_index != internal::kMissingIndex) { + if (update_index < updated_nodes_.size() - 1) { + graph_view_->nodes_[updated_nodes_.back().node_index].update_index_ = + update_index; + std::swap(updated_nodes_[update_index], updated_nodes_.back()); + } + updated_nodes_.pop_back(); + update_index = internal::kMissingIndex; + } + removed_nodes_[node->node_index_] = true; } void Mutation::UpdateNodeName(MutableNodeView* node, absl::string_view name) { @@ -408,6 +414,7 @@ void Mutation::RemoveNodeAttr(const MutationNewNode& node, void Mutation::ResetInternal() { std::vector().swap(updated_nodes_); + std::vector(graph_view_->NumNodes()).swap(removed_nodes_); std::vector().swap(new_nodes_); } @@ -469,6 +476,7 @@ MutableGraphView::MutableGraphView(GraphDef* graph, Status* status) return; } AddFaninsInternal(&fanins); + mutation_.ResetInternal(); *status = Status::OK(); } @@ -591,16 +599,23 @@ Status MutableGraphView::GetNodeNamesAndPartitionUpdatedNodes( std::vector* renamed_nodes, std::vector* inplace_nodes, std::vector* empty_diff_node_indices) { + // For all nodes to be removed and renamed, mark their original names as + // missing and put associated node index in graph. for (const auto& diff : mutation_.updated_nodes_) { - // For all nodes to be removed and renamed, mark their original names as - // missing and put associated node index in graph. - if (diff.removed || diff.update_name) { + if (diff.update_name) { const int index = diff.node_index; const string& node_name = nodes_[index].GetName(); node_names->emplace(node_name, index); } } + for (int i = 0; i < mutation_.removed_nodes_.size(); ++i) { + if (mutation_.removed_nodes_[i]) { + const string& node_name = nodes_[i].GetName(); + node_names->emplace(node_name, i); + } + } + auto name_conflict = [](const absl::string_view node_name) { return errors::InvalidArgument(kMutableGraphViewApplyError, "multiple nodes with the name: '", node_name, @@ -617,8 +632,6 @@ Status MutableGraphView::GetNodeNamesAndPartitionUpdatedNodes( if (internal::IsEmpty(&diff)) { empty_diff_node_indices->emplace_back(diff.node_index); continue; - } else if (diff.removed) { - continue; } // Get name of updated node after potential mutation. const string& node_name = @@ -699,16 +712,15 @@ Status MutableGraphView::RemovedOrMissingNodeFanoutsWellFormed( // Check all fanouts of a single port. MutableNodeView* fanout_view = regular_fanout.node_view(); if (fanout_view->update_index_ == internal::kMissingIndex) { - if (!overwritten_nodes[fanout_view->node_index_]) { + if (mutation_.removed_nodes_[fanout_view->node_index_]) { + // Fanout node will be removed, this can be ignored. + continue; + } else if (!overwritten_nodes[fanout_view->node_index_]) { // Fanout is not updated or removed/overwritten. return bad_fanout(fanout_view->GetName(), node_name_state.first); } } else { auto& diff = mutation_.updated_nodes_[fanout_view->update_index_]; - if (diff.removed) { - // Fanout node will be removed, this can be ignored. - continue; - } const int last_index = fanout_view->NumRegularFanins() - diff.num_regular_inputs_to_remove - 1; if (regular_fanout.index() > last_index) { @@ -726,16 +738,15 @@ Status MutableGraphView::RemovedOrMissingNodeFanoutsWellFormed( for (const auto& controlled_fanout : node_view.GetControlledFanouts()) { MutableNodeView* fanout_view = controlled_fanout.node_view(); if (fanout_view->update_index_ == internal::kMissingIndex) { - if (!overwritten_nodes[fanout_view->node_index_]) { + if (mutation_.removed_nodes_[fanout_view->node_index_]) { + // Fanout node will be removed, this can be ignored. + continue; + } else if (!overwritten_nodes[fanout_view->node_index_]) { // Fanout is not updated or removed/overwritten. return bad_fanout(fanout_view->GetName(), node_name_state.first); } } else { auto& diff = mutation_.updated_nodes_[fanout_view->update_index_]; - if (diff.removed) { - // Fanout node will be removed, this can be ignored. - continue; - } // Check if controlling fanin is removed. if (diff.controlling_inputs_to_remove.find( controlled_fanout.fanin_index_) == @@ -789,7 +800,7 @@ Status MutableGraphView::CheckNodeNamesAndFanins( Status MutableGraphView::CheckKernelRegisteredForNodes() { Status s; for (auto& diff : mutation_.updated_nodes_) { - if (internal::IsEmpty(&diff) || diff.removed) { + if (internal::IsEmpty(&diff)) { continue; } @@ -903,10 +914,8 @@ void MutableGraphView::FixRenamedNodes( nodes_[renamed.overwritten_node_index_]; ReplaceNodeFanouts(&renamed_node, &node_to_overwrite); node_index_by_name_.erase(node_to_overwrite.GetName()); - if (node_to_overwrite.update_index_ != internal::kMissingIndex && - mutation_.updated_nodes_[node_to_overwrite.update_index_].removed) { - (*overwritten_name_removed_nodes)[node_to_overwrite.update_index_] = - true; + if (mutation_.removed_nodes_[node_to_overwrite.node_index_]) { + (*overwritten_name_removed_nodes)[node_to_overwrite.node_index_] = true; } } else { // No existing fanouts. @@ -939,15 +948,7 @@ void MutableGraphView::AddNewNodes( node_def->mutable_device()->swap(*new_node.node.mutable_device()); node_def->mutable_input()->Clear(); node_def->mutable_attr()->swap(*new_node.node.mutable_attr()); - if (node_view.update_index_ != internal::kMissingIndex) { - // The only case for this to occur is if a node is explicitly marked for - // removal. In that case, unlink it from it's associated - // MutableNodeViewDiff. - mutation_.updated_nodes_[node_view.update_index_].node_index = - internal::kMissingIndex; - mutation_.updated_nodes_[node_view.update_index_].removed = false; - node_view.update_index_ = internal::kMissingIndex; - } + mutation_.removed_nodes_[node_index] = false; } else { // New node. auto* new_node_def = graph_->add_node(); @@ -1169,8 +1170,7 @@ inline void MutableGraphView::AddControllingFaninInternal( void MutableGraphView::ApplyNodeUpdates() { for (auto& diff : mutation_.updated_nodes_) { - if (diff.removed || diff.node_index == internal::kMissingIndex || - internal::IsEmpty(&diff)) { + if (internal::IsEmpty(&diff)) { continue; } MutableNodeView& node_view = nodes_[diff.node_index]; @@ -1299,12 +1299,11 @@ void MutableGraphView::RemoveNodesInternal( std::vector node_indices_to_remove; node_indices_to_remove.reserve(mutation_.updated_nodes_.size() + overwritten_nodes.size()); - for (int i = 0; i < mutation_.updated_nodes_.size(); ++i) { - const auto& diff = mutation_.updated_nodes_[i]; - if (diff.removed) { - auto& node = nodes_[diff.node_index]; + for (int i = 0; i < mutation_.removed_nodes_.size(); ++i) { + if (mutation_.removed_nodes_[i]) { + auto& node = nodes_[i]; RemoveAllFaninFanoutInternal(&node); - node_indices_to_remove.push_back(diff.node_index); + node_indices_to_remove.push_back(i); if (!overwritten_name_removed_nodes[i]) { node_index_by_name_.erase(node.GetName()); } @@ -1645,8 +1644,7 @@ Status MutableGraphView::ApplyMutationInternal() { // Node name and associated fanouts. absl::flat_hash_map renamed_fanouts; // Removed nodes where name was overwritten by a renamed node. - std::vector overwritten_name_removed_nodes( - mutation_.updated_nodes_.size()); + std::vector overwritten_name_removed_nodes(nodes_.size()); // Fix renaming of existing nodes by swapping fanouts and rehashing names. // This will also overwrite removed or unmodified nodes. FixRenamedNodes(&renamed_nodes, &renamed_fanouts, diff --git a/tensorflow/core/grappler/utils/graph_view.h b/tensorflow/core/grappler/utils/graph_view.h index d5186cf08e3..456c68a30e9 100644 --- a/tensorflow/core/grappler/utils/graph_view.h +++ b/tensorflow/core/grappler/utils/graph_view.h @@ -359,6 +359,7 @@ class Mutation { MutableGraphView* graph_view_ = nullptr; int mutation_counter_ = 0; std::vector updated_nodes_; + std::vector removed_nodes_; using MutationNewNodeHolder = internal::NewNode; std::vector new_nodes_; diff --git a/tensorflow/core/grappler/utils/graph_view_internal.h b/tensorflow/core/grappler/utils/graph_view_internal.h index b1756a465fe..837c05ecdbd 100644 --- a/tensorflow/core/grappler/utils/graph_view_internal.h +++ b/tensorflow/core/grappler/utils/graph_view_internal.h @@ -364,7 +364,6 @@ struct NodeViewDiff { GraphViewT* graph_view; int node_index; - bool removed = false; string name; bool update_name = false; string op; @@ -393,12 +392,6 @@ struct NodeViewDiff { AttrValueMap processed_attrs; }; -// Sets node for removal via diff. -template -inline void SetRemoved(NodeViewDiff* diff, bool removed) { - diff->removed = removed; -} - // Updates node name. If `name` is the same as the name in the original node, // the field will be cleared in the diff. template @@ -629,8 +622,8 @@ template inline bool IsEmpty(NodeViewDiff* diff) { ResizeByTrimmingEndForValue(&diff->regular_inputs_to_remove, false); ResizeByTrimmingEndForValue(&diff->regular_inputs_to_add, EmptyTensorId()); - return !diff->removed && !diff->update_name && !diff->update_op && - !diff->update_device && diff->regular_inputs_to_add.empty() && + return !diff->update_name && !diff->update_op && !diff->update_device && + diff->regular_inputs_to_add.empty() && diff->regular_inputs_to_update.empty() && diff->regular_inputs_to_remove.empty() && diff->controlling_inputs_to_add.empty() && @@ -641,7 +634,6 @@ inline bool IsEmpty(NodeViewDiff* diff) { // Resets and clears existing diff. template inline void Reset(NodeViewDiff* diff) { - diff->removed = false; diff->name.clear(); diff->update_name = false; diff->op.clear(); diff --git a/tensorflow/core/grappler/utils/graph_view_internal_test.cc b/tensorflow/core/grappler/utils/graph_view_internal_test.cc index cb959aea16b..865badc71d6 100644 --- a/tensorflow/core/grappler/utils/graph_view_internal_test.cc +++ b/tensorflow/core/grappler/utils/graph_view_internal_test.cc @@ -52,30 +52,6 @@ absl::flat_hash_map GetUpdatedNodeNames( using MutableNodeViewDiff = NodeViewDiff; -TEST(MutableNodeViewDiffTest, SetRemoved) { - GraphDef graph = SimpleTestGraphForMutation(); - - Status s; - MutableGraphView graph_view(&graph, &s); - TF_ASSERT_OK(s); - auto updated_node_names = GetUpdatedNodeNames(&graph_view); - - MutableNodeView* d_node = graph_view.GetNode("d"); - ASSERT_NE(d_node, nullptr); - - MutableNodeViewDiff diff(&graph_view, d_node->node_index()); - EXPECT_TRUE(IsEmpty(&diff)); - EXPECT_TRUE(IsWellFormed(&diff, updated_node_names)); - - SetRemoved(&diff, true); - EXPECT_FALSE(IsEmpty(&diff)); - EXPECT_TRUE(IsWellFormed(&diff, updated_node_names)); - - SetRemoved(&diff, false); - EXPECT_TRUE(IsEmpty(&diff)); - EXPECT_TRUE(IsWellFormed(&diff, updated_node_names)); -} - TEST(MutableNodeViewDiffTest, UpdateName) { GraphDef graph = SimpleTestGraphForMutation(); diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 364a05bd901..572afde42d9 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -34,7 +34,6 @@ load( ) load( "//third_party/mkl:build_defs.bzl", - "if_mkl", "if_mkl_ml", "mkl_deps", ) @@ -1097,7 +1096,9 @@ tf_kernel_library( tf_kernel_library( name = "identity_n_op", prefix = "identity_n_op", - deps = ARRAY_DEPS, + deps = ARRAY_DEPS + [ + "//tensorflow/core:core_cpu_internal", + ], ) tf_kernel_library( @@ -4348,6 +4349,7 @@ tf_kernel_library( prefix = "fused_batch_norm_op", deps = NN_DEPS + [ ":fill_functor", + ":redux_functor", ] + if_cuda([ "//tensorflow/core:stream_executor", ]), @@ -7462,8 +7464,10 @@ tf_mkl_kernel_library( ], deps = [ ":bounds_check", + ":fill_functor", ":matmul_op", ":ops_util", + "//third_party/eigen3", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc index 08979666d2f..00b920c5f45 100644 --- a/tensorflow/core/kernels/bias_op.cc +++ b/tensorflow/core/kernels/bias_op.cc @@ -27,9 +27,11 @@ limitations under the License. #include "tensorflow/core/kernels/redux_functor.h" #include "tensorflow/core/util/tensor_format.h" -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/kernels/bias_op_gpu.h" #include "tensorflow/core/platform/stream_executor.h" +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if GOOGLE_CUDA #include "tensorflow/stream_executor/cuda/cuda_stream.h" #endif // GOOGLE_CUDA @@ -273,7 +275,7 @@ class BiasGradOp : public OpKernel { using AccumT = typename AccumulatorType::type; if (data_format_ == FORMAT_NCHW) { const functor::ReduceMiddleDimensions< - T, AccumT, Eigen::internal::scalar_sum_op, + T, AccumT, T, Eigen::internal::scalar_sum_op, Eigen::internal::SumReducer> redux; Eigen::DSizes three_dims(batch, channel, @@ -282,7 +284,7 @@ class BiasGradOp : public OpKernel { output, 1); } else { const functor::ReduceOuterDimensions< - T, AccumT, Eigen::internal::scalar_sum_op> + T, AccumT, T, Eigen::internal::scalar_sum_op> redux; Eigen::DSizes two_dims(batch * height * width * depth, @@ -318,7 +320,7 @@ REGISTER_KERNEL(double); #undef REGISTER_KERNEL #endif // TENSORFLOW_USE_SYCL -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template class BiasOp : public BinaryOp { public: @@ -617,6 +619,6 @@ class BiasGradOp : public OpKernel { TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace tensorflow diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc index 764d087f45d..843aedc3e0f 100644 --- a/tensorflow/core/kernels/bias_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU @@ -53,7 +53,7 @@ struct AccumulatorType { template __global__ void BiasNHWCKernel(int32 nthreads, const T* input, const T* bias, T* output, int32 bias_size) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { + GPU_1D_KERNEL_LOOP(index, nthreads) { int32 bias_offset = index % bias_size; output[index] = ldg(input + index) + ldg(bias + bias_offset); } @@ -62,7 +62,7 @@ __global__ void BiasNHWCKernel(int32 nthreads, const T* input, const T* bias, template __global__ void BiasNCHWKernel(int32 nthreads, const T* input, const T* bias, T* output, int32 bias_size, int32 image_size) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { + GPU_1D_KERNEL_LOOP(index, nthreads) { int32 index2 = index / image_size; int32 bias_offset = index2 % bias_size; output[index] = ldg(input + index) + ldg(bias + bias_offset); @@ -99,9 +99,9 @@ void BiasGPU::compute(const GPUDevice& d, const T* input, const T* bias, template __global__ void BiasGradNHWC_Naive(int32 nthreads, const T* output_backprop, T* bias_backprop, int32 bias_size) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { + GPU_1D_KERNEL_LOOP(index, nthreads) { int32 bias_offset = index % bias_size; - CudaAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); + GpuAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); } } @@ -110,14 +110,13 @@ template __global__ void BiasGradNCHW_Naive(int32 nthreads, const T* output_backprop, T* bias_backprop, int32 bias_size, int32 image_size) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { + GPU_1D_KERNEL_LOOP(index, nthreads) { int32 index2 = index / image_size; int32 bias_offset = index2 % bias_size; - CudaAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); + GpuAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); } } -extern __shared__ char s_buf[]; template __global__ void BiasGradNHWC_SharedAtomics(int32 nthreads, @@ -134,12 +133,12 @@ __global__ void BiasGradNHWC_SharedAtomics(int32 nthreads, for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int32 bias_offset = index % bias_size; - CudaAtomicAdd(s_data + bias_offset, AccT(ldg(output_backprop + index))); + GpuAtomicAdd(s_data + bias_offset, AccT(ldg(output_backprop + index))); } __syncthreads(); for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { - CudaAtomicAdd(bias_backprop + index, T(s_data[index])); + GpuAtomicAdd(bias_backprop + index, T(s_data[index])); } } @@ -175,21 +174,34 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop, // Write the accumulated sum in this thread to the shared memory. Each thread // shifts their write location to avoid bank conflict. int bias_offset = threadIdx.x % 32; - CudaAtomicAdd(s_data + bias_offset, sum); + GpuAtomicAdd(s_data + bias_offset, sum); __syncthreads(); // Accumulate the results in the shared memory into the first element. // No syncthreads is needed since this is only in the same warp. int32 thread_index = threadIdx.x; +#if GOOGLE_CUDA if (thread_index < 32) { AccT data = s_data[thread_index]; for (int32 delta = warpSize / 2; delta > 0; delta /= 2) { - data += CudaShuffleXorSync(kCudaWarpAll, data, delta); + data += GpuShuffleXorSync(kCudaWarpAll, data, delta); } if (thread_index == 0) { - CudaAtomicAdd(bias_backprop + bias_index, T(data)); + GpuAtomicAdd(bias_backprop + bias_index, T(data)); } } +#elif TENSORFLOW_USE_ROCM + if (thread_index < 16) s_data[thread_index] += s_data[thread_index + 16]; + if (thread_index < 8) s_data[thread_index] += s_data[thread_index + 8]; + if (thread_index < 4) s_data[thread_index] += s_data[thread_index + 4]; + if (thread_index < 2) s_data[thread_index] += s_data[thread_index + 2]; + if (thread_index < 1) s_data[thread_index] += s_data[thread_index + 1]; + + // The first thread writes out the accumulated result to the global location. + if (thread_index == 0) { + GpuAtomicAdd(bias_backprop + bias_index, T(s_data[0])); + } +#endif } template @@ -252,8 +264,8 @@ void BiasGradGPU::DoRowReduction(OpKernelContext* context, T* output, const T* input, int rows, int cols) { typedef const Eigen::array::Tensor::Index, 1>& ReductionAxes; Constants constants; - cub::Sum op; - functor::ReduceImpl( + gpuprim::Sum op; + functor::ReduceImpl( context, output, input, 2, rows, cols, 1, 1, constants.kOne, op); } @@ -262,8 +274,8 @@ void BiasGradGPU::DoColReduction(OpKernelContext* context, T* output, const T* input, int rows, int cols) { typedef const Eigen::array::Tensor::Index, 1>& ReductionAxes; Constants constants; - cub::Sum op; - functor::ReduceImpl( + gpuprim::Sum op; + functor::ReduceImpl( context, output, input, 2, rows, cols, 1, 1, constants.kZero, op); } @@ -275,4 +287,4 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); } // end namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/cholesky_op.cc b/tensorflow/core/kernels/cholesky_op.cc index bcd42dc8d7a..744436c06e2 100644 --- a/tensorflow/core/kernels/cholesky_op.cc +++ b/tensorflow/core/kernels/cholesky_op.cc @@ -144,8 +144,8 @@ class CholeskyOpGpu : public AsyncOpKernel { const int64 batch_size = input_reshaped.dimension(0); std::vector dev_info; dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, "potrf")); - // TODO(rmlarsen): Parallelize over batches if it turns out to be - // an important use case. + // TODO(rmlarsen): Use PotrfBatched for factoring many small matrices in + // parallel. for (int batch = 0; batch < batch_size; ++batch) { OP_REQUIRES_OK_ASYNC(context, solver->Potrf(CUBLAS_FILL_MODE_UPPER, n, diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc index 6e26cc1d541..9abf5439571 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/kernels/cuda_solvers.cc @@ -387,6 +387,43 @@ static inline Status PotrfImpl(BufSizeFnT bufsize, SolverFnT solver, TF_CALL_LAPACK_TYPES(POTRF_INSTANCE); +#if CUDA_VERSION >= 9020 +template +static inline Status PotrfBatchedImpl( + SolverFnT solver, CudaSolver* cuda_solver, OpKernelContext* context, + cusolverDnHandle_t cusolver_dn_handle, cublasFillMode_t uplo, int n, + const Scalar* const host_a_dev_ptrs[], int lda, + DeviceLapackInfo* dev_lapack_info, int batch_size) { + mutex_lock lock(handle_map_mutex); + using CudaScalar = typename CUDAComplexT::type; + ScratchSpace dev_a_dev_ptrs = + cuda_solver->GetScratchSpace(sizeof(CudaScalar*) * batch_size, "", + /* on_host */ false); + if (!CopyHostToDevice(context, dev_a_dev_ptrs.mutable_data() /* dest */, + host_a_dev_ptrs /* source */, dev_a_dev_ptrs.bytes())) { + return errors::Internal("PotrfBatched: failed to copy pointers to device"); + } + TF_RETURN_IF_CUSOLVER_ERROR( + solver(cusolver_dn_handle, uplo, n, + reinterpret_cast(dev_a_dev_ptrs.mutable_data()), lda, + dev_lapack_info->mutable_data(), batch_size)); + return Status::OK(); +} + +#define POTRF_BATCHED_INSTANCE(Scalar, type_prefix) \ + template <> \ + Status CudaSolver::PotrfBatched( \ + cublasFillMode_t uplo, int n, const Scalar* const host_a_dev_ptrs[], \ + int lda, DeviceLapackInfo* dev_lapack_info, int batch_size) { \ + return PotrfBatchedImpl(DN_SOLVER_FN(potrfBatched, type_prefix), this, \ + context_, cusolver_dn_handle_, uplo, n, \ + host_a_dev_ptrs, lda, dev_lapack_info, \ + batch_size); \ + } + +TF_CALL_LAPACK_TYPES(POTRF_BATCHED_INSTANCE); +#endif // CUDA_VERSION >= 9020 + template static inline Status GetrfImpl(BufSizeFnT bufsize, SolverFnT solver, CudaSolver* cuda_solver, diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/kernels/cuda_solvers.h index e4ae1d93227..9679fad09ac 100644 --- a/tensorflow/core/kernels/cuda_solvers.h +++ b/tensorflow/core/kernels/cuda_solvers.h @@ -208,13 +208,25 @@ class CudaSolver { const Scalar* B, int ldb, Scalar* C, int ldc) const TF_MUST_USE_RESULT; - // Computes the Cholesky factorization A = L * L^T for a single matrix. + // Computes the Cholesky factorization A = L * L^H for a single matrix. // Returns Status::OK() if the kernel was launched successfully. See: // http://docs.nvidia.com/cuda/cusolver/#cuds-lt-t-gt-potrf template Status Potrf(cublasFillMode_t uplo, int n, Scalar* dev_A, int lda, int* dev_lapack_info) TF_MUST_USE_RESULT; +#if CUDA_VERSION >= 9020 + // Computes the Cholesky factorization A = L * L^H for a batch of small + // matrices. + // Returns Status::OK() if the kernel was launched successfully. See: + // http://docs.nvidia.com/cuda/cusolver/index.html#cuds-lt-t-gt-potrfBatched + template + Status PotrfBatched(cublasFillMode_t uplo, int n, + const Scalar* const host_a_dev_ptrs[], int lda, + DeviceLapackInfo* dev_lapack_info, + int batch_size) TF_MUST_USE_RESULT; +#endif // CUDA_VERSION >= 9020 + // LU factorization. // Computes LU factorization with partial pivoting P * A = L * U. // See: http://docs.nvidia.com/cuda/cusolver/#cuds-lt-t-gt-getrf @@ -230,7 +242,7 @@ class CudaSolver { int* dev_lapack_info) const TF_MUST_USE_RESULT; // Computes partially pivoted LU factorizations for a batch of small matrices. - // Returns Status::OK() if the kernel was launched successfully.See: + // Returns Status::OK() if the kernel was launched successfully. See: // http://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-getrfbatched template Status GetrfBatched(int n, const Scalar* const host_a_dev_ptrs[], int lda, diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc index d6ce0f1cfa5..739ccf7730a 100644 --- a/tensorflow/core/kernels/cwise_ops_test.cc +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -147,6 +147,67 @@ BM_BINARY_SCALAR(sycl, DivNoNan); #undef BM_BINARY_SCALAR +// Three implementations of x^3. +Graph* CubeWithPow3(int num) { + Graph* g = new Graph(OpRegistry::Global()); + Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); + lhs.flat().setRandom(); + Tensor rhs(DT_FLOAT, TensorShape({})); + rhs.flat().setConstant(3); + test::graph::Binary(g, "Pow", test::graph::Constant(g, lhs), + test::graph::Constant(g, rhs)); + return g; +} + +Graph* CubeWithTwoMuls(int num) { + Graph* g = new Graph(OpRegistry::Global()); + Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); + lhs.flat().setRandom(); + auto* x = test::graph::Constant(g, lhs); + auto* inner = test::graph::Binary(g, "Mul", x, x); + test::graph::Binary(g, "Mul", x, inner); + return g; +} + +Graph* CubeWithMulSquare(int num) { + Graph* g = new Graph(OpRegistry::Global()); + Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); + lhs.flat().setRandom(); + auto* x = test::graph::Constant(g, lhs); + auto* inner = test::graph::Unary(g, "Square", x); + test::graph::Binary(g, "Mul", test::graph::Constant(g, lhs), inner); + return g; +} + +#define BM_CUBE(DEVICE, Impl) \ + void BM_##DEVICE##_Cube_##Impl(int iters, int num) { \ + const int64 tot = static_cast(iters) * num; \ + testing::UseRealTime(); \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, Impl(num)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_Cube_##Impl) \ + ->Arg(1 << 12) /* must >= 4096 */ \ + ->Arg(1 << 16) \ + ->Arg(1 << 20); + +BM_CUBE(cpu, CubeWithPow3); +BM_CUBE(cpu, CubeWithTwoMuls); +BM_CUBE(cpu, CubeWithMulSquare); +#if GOOGLE_CUDA +BM_CUBE(gpu, CubeWithPow3); +BM_CUBE(gpu, CubeWithTwoMuls); +BM_CUBE(gpu, CubeWithMulSquare); +#endif // GOOGLE_CUDA +#ifdef TENSORFLOW_USE_SYCL +BM_CUBE(sycl, CubeWithPow3); +BM_CUBE(sycl, CubeWithTwoMuls); +BM_CUBE(sycl, CubeWithMulSquare); +#endif // TENSORFLOW_USE_SYCL + +#undef BM_CUBE + template Graph* BiasAdd(int rows, int cols, DataType type) { Graph* g = new Graph(OpRegistry::Global()); diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 119f61d29ca..cee4ec2221f 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -984,6 +984,39 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "text_line_dataset_op", + srcs = ["text_line_dataset_op.cc"], + hdrs = ["text_line_dataset_op.h"], + deps = [ + ":name_utils", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + +tf_cc_test( + name = "text_line_dataset_op_test", + size = "small", + srcs = ["text_line_dataset_op_test.cc"], + deps = [ + ":dataset_test_base", + ":dataset_utils", + ":iterator_ops", + ":text_line_dataset_op", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib_internal", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_kernel_library( name = "iterator_ops", srcs = ["iterator_ops.cc"], @@ -1063,7 +1096,9 @@ tf_kernel_library( tf_kernel_library( name = "cache_dataset_ops", srcs = ["cache_dataset_ops.cc"], + hdrs = ["cache_dataset_ops.h"], deps = [ + ":name_utils", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -1072,6 +1107,23 @@ tf_kernel_library( ], ) +tf_cc_test( + name = "cache_dataset_ops_test", + srcs = ["cache_dataset_ops_test.cc"], + deps = [ + ":cache_dataset_ops", + ":dataset_test_base", + ":dataset_utils", + ":iterator_ops", + ":tensor_slice_dataset_op", + "//tensorflow/core:framework", + "//tensorflow/core:ptr_util", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_kernel_library( name = "optimize_dataset_op", srcs = ["optimize_dataset_op.cc"], @@ -1165,6 +1217,7 @@ tf_kernel_library( ":take_dataset_op", ":tensor_dataset_op", ":tensor_slice_dataset_op", + ":text_line_dataset_op", ":window_dataset_op", ":zip_dataset_op", "//tensorflow/core:array_ops_op_lib", diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc index 341a02cc259..9b1fed90463 100644 --- a/tensorflow/core/kernels/data/cache_dataset_ops.cc +++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc @@ -12,10 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/kernels/data/cache_dataset_ops.h" + #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/name_utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" @@ -23,117 +25,245 @@ limitations under the License. namespace tensorflow { namespace data { -namespace { // See documentation in ../../ops/dataset_ops.cc for a high-level description of // the following op. -class CacheDatasetOp : public UnaryDatasetOpKernel { +/* static */ constexpr const char* const CacheDatasetOp::kDatasetType; +/* static */ constexpr const char* const CacheDatasetOp::kInputDataset; +/* static */ constexpr const char* const CacheDatasetOp::kFileName; +/* static */ constexpr const char* const CacheDatasetOp::kOutputTypes; +/* static */ constexpr const char* const CacheDatasetOp::kOutputShapes; + +constexpr char kKeyStrFormat[] = "%%%zuzu_%%%zuzu"; +constexpr char kPaddingSizeStrFormat[] = "%zu"; +constexpr char kFileDatasetPrefix[] = "File"; +constexpr char kMode[] = "Mode"; +constexpr char kLockFileSuffix[] = ".lockfile"; +constexpr char kIterationCompleted[] = "iteration_completed"; +constexpr char kCurIndex[] = "cur_index"; +constexpr char kShardId[] = "shard_id"; +constexpr char kCreatedAt[] = "Created at"; +constexpr char kMemoryDatasetPrefix[] = "Memory"; +constexpr char kMemoryCache[] = "MemoryCache"; +constexpr char kTFData[] = "tf_data"; +constexpr char kCacheClaimed[] = "cache_claimed"; +constexpr char kCacheSize[] = "cache_size"; +constexpr char kCache[] = "cache"; +constexpr char kSizeSuffix[] = ".size"; +constexpr char kCacheCompleted[] = "cache_completed"; +constexpr char kIndex[] = "index"; +constexpr char kImpl[] = "Impl"; + +class CacheDatasetOp::FileDataset : public DatasetBase { public: - explicit CacheDatasetOp(OpKernelConstruction* ctx) - : UnaryDatasetOpKernel(ctx) {} + explicit FileDataset(OpKernelContext* ctx, const DatasetBase* input, + string filename, Env* env) + : DatasetBase(DatasetContext(ctx)), + input_(input), + filename_(std::move(filename)), + env_(env), + num_tensors_(input->output_dtypes().size()), + tensor_index_padding_size_(StringPaddingSize(num_tensors_)), + item_index_padding_size_(StringPaddingSize(kMaxItems)), + tensor_format_string_(strings::Printf(kKeyStrFormat, + item_index_padding_size_, + tensor_index_padding_size_)) { + input_->Ref(); + DCHECK_EQ(item_index_padding_size_, 7); + } - void MakeDataset(OpKernelContext* ctx, DatasetBase* input, - DatasetBase** output) override { - // Parse out the filenames tensor. - string filename; - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "filename", &filename)); + ~FileDataset() override { input_->Unref(); } - if (filename.empty()) { - *output = new MemoryDataset(ctx, input); - } else { - *output = new FileDataset(ctx, input, filename, ctx->env()); - } + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + name_utils::IteratorPrefixParams params; + params.dataset_prefix = kFileDatasetPrefix; + return absl::make_unique(FileIterator::Params{ + this, name_utils::IteratorPrefix(kDatasetType, prefix, params)}); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() const override { + name_utils::DatasetDebugStringParams params; + params.dataset_prefix = kFileDatasetPrefix; + return name_utils::DatasetDebugString(kDatasetType, params); + } + + int64 Cardinality() const override { return input_->Cardinality(); } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph = nullptr; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph)); + Node* filename = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(filename_, &filename)); + TF_RETURN_IF_ERROR(b->AddDataset(this, {input_graph, filename}, output)); + return Status::OK(); } private: - class FileDataset : public DatasetBase { + static size_t StringPaddingSize(size_t num_tensors) { + return strings::Printf(kPaddingSizeStrFormat, num_tensors - 1).size(); + } + + string FormatName(size_t item_index, size_t tensor_index) const { + return strings::Printf(tensor_format_string_.c_str(), item_index, + tensor_index); + } + + class FileIterator : public DatasetIterator { public: - explicit FileDataset(OpKernelContext* ctx, const DatasetBase* input, - string filename, Env* env) - : DatasetBase(DatasetContext(ctx)), - input_(input), - filename_(std::move(filename)), - env_(env), - num_tensors_(input->output_dtypes().size()), - tensor_index_padding_size_(StringPaddingSize(num_tensors_)), - item_index_padding_size_(StringPaddingSize(kMaxItems)), - tensor_format_string_(strings::Printf("%%%zuzu_%%%zuzu", - item_index_padding_size_, - tensor_index_padding_size_)) { - input_->Ref(); - DCHECK_EQ(item_index_padding_size_, 7); + explicit FileIterator(const Params& params) + : DatasetIterator(params) { + if (params.dataset->env_ + ->FileExists(MetaFilename(params.dataset->filename_)) + .ok()) { + mode_ = Mode::read; + } else { + mode_ = Mode::write; + } + InitializeIterator(); } - ~FileDataset() override { input_->Unref(); } - - std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { - return absl::make_unique( - FileIterator::Params{this, strings::StrCat(prefix, "::FileCache")}); + Status Initialize(IteratorContext* ctx) override { + mutex_lock l(mu_); + return iterator_->Initialize(ctx); } - const DataTypeVector& output_dtypes() const override { - return input_->output_dtypes(); + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + return iterator_->GetNext(ctx, out_tensors, end_of_sequence); } - const std::vector& output_shapes() const override { - return input_->output_shapes(); - } - - string DebugString() const override { - return "CacheDatasetOp::FileDataset"; - } - - int64 Cardinality() const override { return input_->Cardinality(); } - protected: - Status AsGraphDefInternal(SerializationContext* ctx, - DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_graph = nullptr; - TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph)); - Node* filename = nullptr; - TF_RETURN_IF_ERROR(b->AddScalar(filename_, &filename)); - TF_RETURN_IF_ERROR(b->AddDataset(this, {input_graph, filename}, output)); - return Status::OK(); + std::shared_ptr CreateNode( + IteratorContext* ctx, model::Node::Args args) const override { + return model::MakeKnownRatioNode(std::move(args), + /*ratio=*/1); + } + + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kMode), mode_)); + return SaveInput(writer, iterator_); + } + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + { + int64 temp; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kMode), &temp)); + mode_ = static_cast(temp); + } + if (mode_ == Mode::write && + dataset() + ->env_->FileExists(MetaFilename(dataset()->filename_)) + .ok()) { + // This could happen if the cache was completely written after the + // checkpoint was saved. + LOG(WARNING) + << "It looks like the cache was already completely written(" + << MetaFilename(dataset()->filename_) + << ") after the last checkpoint was saved. Attempting to read " + << "the cache instead of continuing to write. If this is a " + << "mistake, please remove the above file and try running again."; + mode_ = Mode::read; + } + InitializeIterator(); + TF_RETURN_IF_ERROR(iterator_->Initialize(ctx)); + return RestoreInput(ctx, reader, iterator_); } private: - static size_t StringPaddingSize(size_t num_tensors) { - return strings::Printf("%zu", num_tensors - 1).size(); - } - - string FormatName(size_t item_index, size_t tensor_index) const { - return strings::Printf(tensor_format_string_.c_str(), item_index, - tensor_index); - } - - class FileIterator : public DatasetIterator { + // FileWriterIterator passes through and caches items from the input + // FileDataset. + // + // This iterator is used when the cache directory is not found on disk. It + // creates the cache directory, and passes on the underlying iterator's + // elements. + // + // Caching is performed by writing the input tensors to disk using the + // `BundleWriter`. Note that the cache gets fully flushed to disk only + // after the input iterator has been fully exhausted. If the program + // exits, before completion of an epoch, the cached state would be lost. + // To ensure that the partial cache persists across sessions, one should + // checkpoint the input pipeline. On each call to `SaveInternal` the + // partial cache gets flushed to disk in files with prefix + // _ where shard_id is unique for each checkpoint. + // When all elements have been produced, these shards get coalesced. + class FileWriterIterator : public DatasetIterator { public: - explicit FileIterator(const Params& params) - : DatasetIterator(params) { - if (params.dataset->env_ - ->FileExists(MetaFilename(params.dataset->filename_)) - .ok()) { - mode_ = Mode::read; - } else { - mode_ = Mode::write; - } - InitializeIterator(); - } + explicit FileWriterIterator(const Params& params) + : DatasetIterator(params), + cur_index_(0), + shard_id_(0), + filename_( + strings::StrCat(params.dataset->filename_, "_", shard_id_)), + lockfile_(strings::StrCat(filename_, kLockFileSuffix)), + lockfile_created_(false), + iteration_completed_(false) {} Status Initialize(IteratorContext* ctx) override { - mutex_lock l(mu_); - return iterator_->Initialize(ctx); + return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_); } Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); - return iterator_->GetNext(ctx, out_tensors, end_of_sequence); + *end_of_sequence = false; + TF_RETURN_IF_ERROR(EnsureLockFileExists(end_of_sequence)); + if (*end_of_sequence) { + return Status::OK(); + } + TF_RETURN_IF_ERROR(writer_->status()); + if (cur_index_ >= kMaxItems) { + // As a courtesy, close the [truncated] cache file. + Status s = Finish(); + if (!s.ok()) { + LOG(ERROR) << s; + } + return errors::InvalidArgument( + "Upstream iterator is producing more than ", kMaxItems, + " items, which is more than the cache limit."); + } + + TF_RETURN_IF_ERROR( + input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); + if (*end_of_sequence && out_tensors->empty()) { + TF_RETURN_IF_ERROR(Finish()); + cur_index_++; + return Status::OK(); + } + if (out_tensors->size() != dataset()->num_tensors_) { + return errors::Internal( + "Upstream iterator returned invalid number of tensors. " + "Expected ", + dataset()->num_tensors_, " got: ", out_tensors->size()); + } + size_t tensor_index = 0; + for (const Tensor& t : *out_tensors) { + DCHECK_LT(tensor_index, dataset()->num_tensors_); + string key = dataset()->FormatName(cur_index_, tensor_index++); + TF_RETURN_IF_ERROR(writer_->Add(key, t)); + } + if (*end_of_sequence) { + TF_RETURN_IF_ERROR(Finish()); + } + cur_index_++; + return Status::OK(); } protected: @@ -145,578 +275,219 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("mode"), mode_)); - return SaveInput(writer, iterator_); - } - Status RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - { - int64 temp; - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("mode"), &temp)); - mode_ = static_cast(temp); - } - if (mode_ == Mode::write && - dataset() - ->env_->FileExists(MetaFilename(dataset()->filename_)) - .ok()) { - // This could happen if the cache was completely written after the - // checkpoint was saved. - LOG(WARNING) - << "It looks like the cache was already completely written(" - << MetaFilename(dataset()->filename_) - << ") after the last checkpoint was saved. Attempting to read " - << "the cache instead of continuing to write. If this is a " - << "mistake, please remove the above file and try running again."; - mode_ = Mode::read; - } - InitializeIterator(); - TF_RETURN_IF_ERROR(iterator_->Initialize(ctx)); - return RestoreInput(ctx, reader, iterator_); - } - - private: - // FileWriterIterator passes through and caches items from the input - // FileDataset. - // - // This iterator is used when the cache directory is not found on disk. It - // creates the cache directory, and passes on the underlying iterator's - // elements. - // - // Caching is performed by writing the input tensors to disk using the - // `BundleWriter`. Note that the cache gets fully flushed to disk only - // after the input iterator has been fully exhausted. If the program - // exits, before completion of an epoch, the cached state would be lost. - // To ensure that the partial cache persists across sessions, one should - // checkpoint the input pipeline. On each call to `SaveInternal` the - // partial cache gets flushed to disk in files with prefix - // _ where shard_id is unique for each checkpoint. - // When all elements have been produced, these shards get coalesced. - class FileWriterIterator : public DatasetIterator { - public: - explicit FileWriterIterator(const Params& params) - : DatasetIterator(params), - cur_index_(0), - shard_id_(0), - filename_( - strings::StrCat(params.dataset->filename_, "_", shard_id_)), - lockfile_(strings::StrCat(filename_, ".lockfile")), - lockfile_created_(false), - iteration_completed_(false) {} - - Status Initialize(IteratorContext* ctx) override { - return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_); - } - - Status GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override { - mutex_lock l(mu_); - *end_of_sequence = false; - TF_RETURN_IF_ERROR(EnsureLockFileExists(end_of_sequence)); - if (*end_of_sequence) { - return Status::OK(); - } - TF_RETURN_IF_ERROR(writer_->status()); - if (cur_index_ >= kMaxItems) { - // As a courtesy, close the [truncated] cache file. - Status s = Finish(); - if (!s.ok()) { - LOG(ERROR) << s; - } - return errors::InvalidArgument( - "Upstream iterator is producing more than ", kMaxItems, - " items, which is more than the cache limit."); - } - + if (iteration_completed_) { TF_RETURN_IF_ERROR( - input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); - if (*end_of_sequence && out_tensors->empty()) { - TF_RETURN_IF_ERROR(Finish()); - cur_index_++; - return Status::OK(); - } - if (out_tensors->size() != dataset()->num_tensors_) { - return errors::Internal( - "Upstream iterator returned invalid number of tensors. " - "Expected ", - dataset()->num_tensors_, " got: ", out_tensors->size()); - } - size_t tensor_index = 0; - for (const Tensor& t : *out_tensors) { - DCHECK_LT(tensor_index, dataset()->num_tensors_); - string key = dataset()->FormatName(cur_index_, tensor_index++); - TF_RETURN_IF_ERROR(writer_->Add(key, t)); - } - if (*end_of_sequence) { - TF_RETURN_IF_ERROR(Finish()); - } - cur_index_++; + writer->WriteScalar(full_name(kIterationCompleted), "")); return Status::OK(); } - protected: - std::shared_ptr CreateNode( - IteratorContext* ctx, model::Node::Args args) const override { - return model::MakeKnownRatioNode(std::move(args), - /*ratio=*/1); - } - - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - if (iteration_completed_) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("iteration_completed"), "")); - return Status::OK(); - } - - // lockfile is created on the first call to GetNextInternal. The - // absence of a lockfile means that GetNextInternal was not called - // and hence nothing was written to cache. So we don't need to worry - // about flushing the current shard. This ensures that we never write - // empty shards. - if (lockfile_created_) { - // Flush the current bundle. - TF_RETURN_IF_ERROR(writer_->Finish()); - - // Note: We do not delete the lockfile here. We keep lockfiles of - // all shards around until the entire cache has been written to - // prevent concurrent iterators from corrupting any of the shards. - - // Start caching to a new shard. - shard_id_++; - filename_ = strings::StrCat(dataset()->filename_, "_", shard_id_); - lockfile_ = strings::StrCat(filename_, ".lockfile"); - lockfile_created_ = false; - } - TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_)); - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("cur_index"), cur_index_)); - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("shard_id"), shard_id_)); - return Status::OK(); - } - - Status RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - if (reader->Contains(full_name("iteration_completed"))) { - iteration_completed_ = true; - return Status::OK(); - } - - TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_)); - int64 temp; - // TODO(b/78048575): Update this when saving size_t tensors directly - // is supported. - { - TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("cur_index"), &temp)); - cur_index_ = static_cast(temp); - if (cur_index_ != temp) { - return errors::Internal("Invalid value for cur_index ", temp); - } - } - // TODO(b/78048575): Update this when saving size_t tensors directly - // is supported. - { - TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("shard_id"), &temp)); - shard_id_ = static_cast(temp); - if (shard_id_ != temp) { - return errors::Internal("Invalid value for shard_id ", temp); - } - } - filename_ = strings::StrCat(dataset()->filename_, "_", shard_id_); - lockfile_ = strings::StrCat(filename_, ".lockfile"); - writer_ = absl::make_unique(dataset()->env_, filename_); - return Status::OK(); - } - - private: - Status EnsureLockFileExists(bool* end_of_sequence) - EXCLUSIVE_LOCKS_REQUIRED(mu_) { - if (iteration_completed_) { - *end_of_sequence = true; - return Status::OK(); - } - if (lockfile_created_ && !iteration_completed_) return Status::OK(); - - // Perform rudimentary locking to help catch concurrent writes to the - // same cache files. - - // 1. Check that a checkpoint for the shard has not already been - // written. - if (dataset()->env_->FileExists(MetaFilename(filename_)).ok()) { - return errors::AlreadyExists("Existing cache files found: \n", - MetaFilename(filename_), "\n", - DataFilename(filename_, 0, 1), "\n", - "To continue delete the above files."); - } - - // 2. Check that there isn't a concurrent iterator that is writing - // to cache. - if (dataset()->env_->FileExists(lockfile_).ok()) { - // Attempt to read the contents of the lockfile. - char contents_scratch[151] = {0}; // Initialize all to 0. - StringPiece contents; - std::unique_ptr file; - if (dataset()->env_->NewRandomAccessFile(lockfile_, &file).ok()) { - file->Read(0, 150, &contents, contents_scratch).IgnoreError(); - } - return errors::AlreadyExists( - "There appears to be a concurrent caching iterator running - " - "cache lockfile already exists ('", - lockfile_, - "'). If you are sure no other running TF computations are " - "using this cache prefix, delete the lockfile and " - "re-initialize the iterator. Lockfile contents: ", - contents); - } - // Create the file, and write some basic contents. - std::unique_ptr lockfile; - TF_RETURN_IF_ERROR( - dataset()->env_->NewWritableFile(lockfile_, &lockfile)); - TF_RETURN_IF_ERROR(lockfile->Append( - strings::StrCat("Created at: ", dataset()->env_->NowSeconds()))); - - // At this point we know that - // 1. There is no conflicting checkpoint with prefix `filename_`. - // 2. There is no concurrent session that is trying to write a ckpt - // to filename. - // So it is safe to create a BundleWriter here. Note that it is - // unsafe to initialize the BundleWriter anywhere the above - // conditions are not met since BundleWriter's constructor creates - // new temp files which can delete the temp files created by a - // BundleWriter in another Session. - writer_ = absl::make_unique(dataset()->env_, filename_); - lockfile_created_ = true; - return Status::OK(); - } - - Status Finish() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - iteration_completed_ = true; + // lockfile is created on the first call to GetNextInternal. The + // absence of a lockfile means that GetNextInternal was not called + // and hence nothing was written to cache. So we don't need to worry + // about flushing the current shard. This ensures that we never write + // empty shards. + if (lockfile_created_) { // Flush the current bundle. TF_RETURN_IF_ERROR(writer_->Finish()); - // Merge all the bundles. - // Currently there are `shard_id_ + 1` bundles, one for each - // checkpoint. Each bundle has prefix _ where `id` is an - // integer starting at 0 an incremented by 1 for each new checkpoint. - // We merge all these bundles into a bundle with prefix so - // that the next call to `MakeIterator` can build a - // `FileReaderIterator`. - { - std::vector prefixes; - prefixes.reserve(shard_id_ + 1); - for (size_t i = 0; i <= shard_id_; ++i) { - prefixes.emplace_back( - strings::StrCat(dataset()->filename_, "_", i)); - } - TF_RETURN_IF_ERROR( - MergeBundles(dataset()->env_, prefixes, dataset()->filename_)); - } - // Delete all lockfiles. - for (size_t i = 0; i <= shard_id_; ++i) { - TF_RETURN_IF_ERROR(dataset()->env_->DeleteFile( - strings::StrCat(dataset()->filename_, "_", i, ".lockfile"))); - } + + // Note: We do not delete the lockfile here. We keep lockfiles of + // all shards around until the entire cache has been written to + // prevent concurrent iterators from corrupting any of the shards. + + // Start caching to a new shard. + shard_id_++; + filename_ = strings::StrCat(dataset()->filename_, "_", shard_id_); + lockfile_ = strings::StrCat(filename_, kLockFileSuffix); + lockfile_created_ = false; + } + TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name(kCurIndex), cur_index_)); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kShardId), shard_id_)); + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (reader->Contains(full_name(kIterationCompleted))) { + iteration_completed_ = true; return Status::OK(); } - mutex mu_; - size_t cur_index_ GUARDED_BY(mu_); - // Index of the current shard. This gets incremented whenever a new - // cache shard is saved. - size_t shard_id_ GUARDED_BY(mu_); - std::unique_ptr input_impl_ GUARDED_BY(mu_); - // The current prefix for the cache file. This is equal to - // `StrCat(dataset()->filename_, "_", shard_id_)`. - string filename_; - std::unique_ptr writer_ GUARDED_BY(mu_); - string lockfile_ GUARDED_BY(mu_); - bool lockfile_created_ GUARDED_BY(mu_); - bool iteration_completed_ GUARDED_BY(mu_); - }; // FileWriterIterator - - class FileReaderIterator : public DatasetIterator { - public: - explicit FileReaderIterator(const Params& params) - : DatasetIterator(params), - cur_index_(0), - reader_(dataset()->env_, dataset()->filename_), - iterator_restored_(false) {} - - Status GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override { - mutex_lock l(mu_); - *end_of_sequence = false; - TF_RETURN_IF_ERROR(reader_.status()); - if (!reader_.Valid()) { - *end_of_sequence = true; - return Status::OK(); + TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_)); + int64 temp; + // TODO(b/78048575): Update this when saving size_t tensors directly + // is supported. + { + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kCurIndex), &temp)); + cur_index_ = static_cast(temp); + if (cur_index_ != temp) { + return errors::Internal("Invalid value for cur_index ", temp); } - out_tensors->clear(); - out_tensors->resize(dataset()->num_tensors_); - - for (size_t i = 0; i < dataset()->num_tensors_; ++i) { - // When the iterator is restored from the checkpoint, `reader_` is - // already pointing at `key` so we do not need to skip the header - // entry. - if (!iterator_restored_) { - reader_.Next(); // The first entry in the table is a header. - } else { - iterator_restored_ = false; - } - if (!reader_.Valid()) { - out_tensors->clear(); - *end_of_sequence = true; - return Status::OK(); - } - StringPiece key = reader_.key(); - DCHECK_EQ(key, dataset()->FormatName(cur_index_, i)); - TF_RETURN_IF_ERROR(reader_.ReadCurrent(&(*out_tensors)[i])); - TF_RETURN_IF_ERROR(reader_.status()); + } + // TODO(b/78048575): Update this when saving size_t tensors directly + // is supported. + { + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kShardId), &temp)); + shard_id_ = static_cast(temp); + if (shard_id_ != temp) { + return errors::Internal("Invalid value for shard_id ", temp); } - cur_index_++; - return Status::OK(); } - - protected: - std::shared_ptr CreateNode( - IteratorContext* ctx, model::Node::Args args) const override { - return model::MakeKnownRatioNode(std::move(args), - /*ratio=*/1); - } - - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("cur_index"), cur_index_)); - return Status::OK(); - } - - Status RestoreInternal( - IteratorContext* ctx, - IteratorStateReader* iterator_state_reader) override { - mutex_lock l(mu_); - { - // TODO(b/78048575): Update this when saving size_t tensors directly - // is supported. - int64 temp; - TF_RETURN_IF_ERROR(iterator_state_reader->ReadScalar( - full_name("cur_index"), &temp)); - cur_index_ = static_cast(temp); - if (cur_index_ != temp) { - return errors::Internal("Invalid value for cur_index ", temp); - } - } - if (!reader_.Valid()) { - return errors::Internal("Error initializing BundleReader."); - } - reader_.Seek(dataset()->FormatName(cur_index_, 0)); - iterator_restored_ = true; - return Status::OK(); - } - - private: - mutex mu_; - size_t cur_index_ GUARDED_BY(mu_); - BundleReader reader_ GUARDED_BY(mu_); - bool iterator_restored_ GUARDED_BY(mu_); - }; // FileReaderIterator - - void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - // We intentionally use the same prefix for both `FileReaderIterator` - // and `FileWriterIterator`. Since at any time there will be at most - // one of them alive, there should be no conflicts. This allows both - // iterators to use a common key for `cur_index`. We leverage this - // in the corner case when this iterator is restored from an old - // checkpoint in `write` mode and the cache has been completely - // flushed to disk since then. In that case we simply build a - // `FileReaderIterator` and seek to the `cur_index`. - switch (mode_) { - case Mode::read: - iterator_ = absl::make_unique( - FileReaderIterator::Params{dataset(), - strings::StrCat(prefix(), "Impl")}); - break; - case Mode::write: - iterator_ = absl::make_unique( - FileWriterIterator::Params{dataset(), - strings::StrCat(prefix(), "Impl")}); - } - } - - mutex mu_; - enum Mode { read, write }; - Mode mode_ GUARDED_BY(mu_); - std::unique_ptr iterator_ GUARDED_BY(mu_); - }; // FileIterator - - const DatasetBase* const input_; - const string filename_; - Env* const env_; - const size_t num_tensors_; - const size_t tensor_index_padding_size_; - static const size_t kMaxItems = 10000000; // 10 million - const size_t item_index_padding_size_; - const string tensor_format_string_; - }; // FileDataset - - class MemoryDataset : public DatasetBase { - public: - explicit MemoryDataset(OpKernelContext* ctx, const DatasetBase* input) - : DatasetBase(DatasetContext(ctx)), input_(input) { - input->Ref(); - } - - ~MemoryDataset() override { input_->Unref(); } - - std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { - return absl::make_unique(MemoryIterator::Params{ - this, strings::StrCat(prefix, "::MemoryCache")}); - } - - const DataTypeVector& output_dtypes() const override { - return input_->output_dtypes(); - } - - const std::vector& output_shapes() const override { - return input_->output_shapes(); - } - - string DebugString() const override { - return "CacheDatasetOp::MemoryDataset"; - } - - int64 Cardinality() const override { return input_->Cardinality(); } - - protected: - Status AsGraphDefInternal(SerializationContext* ctx, - DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_node = nullptr; - TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_node)); - Node* filename_node = nullptr; - TF_RETURN_IF_ERROR(b->AddScalar(string(""), &filename_node)); - TF_RETURN_IF_ERROR( - b->AddDataset(this, {input_node, filename_node}, output)); - return Status::OK(); - } - - private: - // A thread-safe data structure for caching dataset elements. - // - // The expected use is that a single `MemoryWriterIterator` populates the - // cache with dataset elements. Once all elements are cached, the cache can - // be used by one or more `MemoryReaderIterator`s. - class MemoryCache : public ResourceBase { - public: - MemoryCache() = default; - - string DebugString() const override { - return "CacheDataset::MemoryCache"; - } - - // Marks the cache as completed. - void Complete() { - mutex_lock l(mu_); - completed_ = true; - } - - // Returns whether the cache is claimed. - bool IsClaimed() { - tf_shared_lock l(mu_); - return claimed_; - } - - // Returns whether the cache is completed. - bool IsCompleted() { - tf_shared_lock l(mu_); - return completed_; - } - - // Attempts to claim the cache, returning whether the cache was claimed. - bool MaybeClaim() { - mutex_lock l(mu_); - if (!claimed_) { - claimed_ = true; - return true; - } - return false; - } - - // Resets the cache. - void Reset() { - mutex_lock l(mu_); - claimed_ = false; - completed_ = false; - cache_.clear(); - } - - // Returns the element at the given index. - const std::vector& at(int64 index) { - tf_shared_lock l(mu_); - DCHECK(index < cache_.size()); - return cache_[index]; - } - - // Adds the element to the cache. - void emplace_back(std::vector element) { - mutex_lock l(mu_); - cache_.emplace_back(std::move(element)); - } - - // Returns the size of the cache. - size_t size() { - tf_shared_lock l(mu_); - return cache_.size(); + filename_ = strings::StrCat(dataset()->filename_, "_", shard_id_); + lockfile_ = strings::StrCat(filename_, kLockFileSuffix); + writer_ = absl::make_unique(dataset()->env_, filename_); + return Status::OK(); } private: - mutex mu_; - // Determines whether a writer has claimed the cache. - bool claimed_ GUARDED_BY(mu_) = false; - // Determines whether all elements of the dataset have been cached. - bool completed_ GUARDED_BY(mu_) = false; - std::vector> cache_ GUARDED_BY(mu_); - }; - - class MemoryIterator : public DatasetIterator { - public: - explicit MemoryIterator(const Params& params) - : DatasetIterator(params) {} - - ~MemoryIterator() override { cache_->Unref(); } - - Status Initialize(IteratorContext* ctx) override { - mutex_lock l(mu_); - // Use the resource manager in the iterator context to get / create - // a cache. - ResourceMgr* mgr = ctx->resource_mgr(); - const string name = strings::StrCat( - prefix(), "::", dataset()->node_name(), "::MemoryCache"); - TF_RETURN_IF_ERROR(mgr->LookupOrCreate( - "tf_data", name, &cache_, [](MemoryCache** cache) { - *cache = new MemoryCache(); - return Status::OK(); - })); - mode_ = cache_->MaybeClaim() ? Mode::write : Mode::read; - InitializeIterator(); - if (mode_ == Mode::read && !cache_->IsCompleted()) { - return errors::Internal( - "Cache should only be read after it has been completed."); + Status EnsureLockFileExists(bool* end_of_sequence) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (iteration_completed_) { + *end_of_sequence = true; + return Status::OK(); } - return iterator_->Initialize(ctx); + if (lockfile_created_ && !iteration_completed_) return Status::OK(); + + // Perform rudimentary locking to help catch concurrent writes to the + // same cache files. + + // 1. Check that a checkpoint for the shard has not already been + // written. + if (dataset()->env_->FileExists(MetaFilename(filename_)).ok()) { + return errors::AlreadyExists("Existing cache files found: \n", + MetaFilename(filename_), "\n", + DataFilename(filename_, 0, 1), "\n", + "To continue delete the above files."); + } + + // 2. Check that there isn't a concurrent iterator that is writing + // to cache. + if (dataset()->env_->FileExists(lockfile_).ok()) { + // Attempt to read the contents of the lockfile. + char contents_scratch[151] = {0}; // Initialize all to 0. + StringPiece contents; + std::unique_ptr file; + if (dataset()->env_->NewRandomAccessFile(lockfile_, &file).ok()) { + file->Read(0, 150, &contents, contents_scratch).IgnoreError(); + } + return errors::AlreadyExists( + "There appears to be a concurrent caching iterator running - " + "cache lockfile already exists ('", + lockfile_, + "'). If you are sure no other running TF computations are " + "using this cache prefix, delete the lockfile and " + "re-initialize the iterator. Lockfile contents: ", + contents); + } + // Create the file, and write some basic contents. + std::unique_ptr lockfile; + TF_RETURN_IF_ERROR( + dataset()->env_->NewWritableFile(lockfile_, &lockfile)); + TF_RETURN_IF_ERROR(lockfile->Append( + strings::StrCat(kCreatedAt, ": ", dataset()->env_->NowSeconds()))); + + // At this point we know that + // 1. There is no conflicting checkpoint with prefix `filename_`. + // 2. There is no concurrent session that is trying to write a ckpt + // to filename. + // So it is safe to create a BundleWriter here. Note that it is + // unsafe to initialize the BundleWriter anywhere the above + // conditions are not met since BundleWriter's constructor creates + // new temp files which can delete the temp files created by a + // BundleWriter in another Session. + writer_ = absl::make_unique(dataset()->env_, filename_); + lockfile_created_ = true; + return Status::OK(); } + Status Finish() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + iteration_completed_ = true; + // Flush the current bundle. + TF_RETURN_IF_ERROR(writer_->Finish()); + // Merge all the bundles. + // Currently there are `shard_id_ + 1` bundles, one for each + // checkpoint. Each bundle has prefix _ where `id` is an + // integer starting at 0 an incremented by 1 for each new checkpoint. + // We merge all these bundles into a bundle with prefix so + // that the next call to `MakeIterator` can build a + // `FileReaderIterator`. + { + std::vector prefixes; + prefixes.reserve(shard_id_ + 1); + for (size_t i = 0; i <= shard_id_; ++i) { + prefixes.emplace_back( + strings::StrCat(dataset()->filename_, "_", i)); + } + TF_RETURN_IF_ERROR( + MergeBundles(dataset()->env_, prefixes, dataset()->filename_)); + } + // Delete all lockfiles. + for (size_t i = 0; i <= shard_id_; ++i) { + TF_RETURN_IF_ERROR(dataset()->env_->DeleteFile( + strings::StrCat(dataset()->filename_, "_", i, kLockFileSuffix))); + } + return Status::OK(); + } + + mutex mu_; + size_t cur_index_ GUARDED_BY(mu_); + // Index of the current shard. This gets incremented whenever a new + // cache shard is saved. + size_t shard_id_ GUARDED_BY(mu_); + std::unique_ptr input_impl_ GUARDED_BY(mu_); + // The current prefix for the cache file. This is equal to + // `StrCat(dataset()->filename_, "_", shard_id_)`. + string filename_; + std::unique_ptr writer_ GUARDED_BY(mu_); + string lockfile_ GUARDED_BY(mu_); + bool lockfile_created_ GUARDED_BY(mu_); + bool iteration_completed_ GUARDED_BY(mu_); + }; // FileWriterIterator + + class FileReaderIterator : public DatasetIterator { + public: + explicit FileReaderIterator(const Params& params) + : DatasetIterator(params), + cur_index_(0), + reader_(dataset()->env_, dataset()->filename_), + iterator_restored_(false) {} + Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); - return iterator_->GetNext(ctx, out_tensors, end_of_sequence); + *end_of_sequence = false; + TF_RETURN_IF_ERROR(reader_.status()); + if (!reader_.Valid()) { + *end_of_sequence = true; + return Status::OK(); + } + out_tensors->clear(); + out_tensors->resize(dataset()->num_tensors_); + + for (size_t i = 0; i < dataset()->num_tensors_; ++i) { + // When the iterator is restored from the checkpoint, `reader_` is + // already pointing at `key` so we do not need to skip the header + // entry. + if (!iterator_restored_) { + reader_.Next(); // The first entry in the table is a header. + } else { + iterator_restored_ = false; + } + if (!reader_.Valid()) { + out_tensors->clear(); + *end_of_sequence = true; + return Status::OK(); + } + StringPiece key = reader_.key(); + DCHECK_EQ(key, dataset()->FormatName(cur_index_, i)); + TF_RETURN_IF_ERROR(reader_.ReadCurrent(&(*out_tensors)[i])); + TF_RETURN_IF_ERROR(reader_.status()); + } + cur_index_++; + return Status::OK(); } protected: @@ -728,240 +499,494 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("mode"), mode_)); - if (cache_->IsClaimed()) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name(kCurIndex), cur_index_)); + return Status::OK(); + } + + Status RestoreInternal( + IteratorContext* ctx, + IteratorStateReader* iterator_state_reader) override { + mutex_lock l(mu_); + { + // TODO(b/78048575): Update this when saving size_t tensors directly + // is supported. + int64 temp; TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("cache_claimed"), "")); - size_t cache_size = cache_->size(); - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("cache_size"), cache_size)); - for (size_t i = 0; i < cache_size; i++) { - auto& element = cache_->at(i); - TF_RETURN_IF_ERROR(writer->WriteScalar( - full_name(strings::StrCat("cache[", i, "].size")), - element.size())); - for (size_t j = 0; j < element.size(); ++j) { - TF_RETURN_IF_ERROR(writer->WriteTensor( - full_name(strings::StrCat("cache[", i, "][", j, "]")), - element[j])); - } - } - if (cache_->IsCompleted()) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("cache_completed"), "")); + iterator_state_reader->ReadScalar(full_name(kCurIndex), &temp)); + cur_index_ = static_cast(temp); + if (cur_index_ != temp) { + return errors::Internal("Invalid value for cur_index ", temp); } } - return SaveInput(writer, iterator_); + if (!reader_.Valid()) { + return errors::Internal("Error initializing BundleReader."); + } + reader_.Seek(dataset()->FormatName(cur_index_, 0)); + iterator_restored_ = true; + return Status::OK(); + } + + private: + mutex mu_; + size_t cur_index_ GUARDED_BY(mu_); + BundleReader reader_ GUARDED_BY(mu_); + bool iterator_restored_ GUARDED_BY(mu_); + }; // FileReaderIterator + + void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + // We intentionally use the same prefix for both `FileReaderIterator` + // and `FileWriterIterator`. Since at any time there will be at most + // one of them alive, there should be no conflicts. This allows both + // iterators to use a common key for `cur_index`. We leverage this + // in the corner case when this iterator is restored from an old + // checkpoint in `write` mode and the cache has been completely + // flushed to disk since then. In that case we simply build a + // `FileReaderIterator` and seek to the `cur_index`. + switch (mode_) { + case Mode::read: + iterator_ = + absl::make_unique(FileReaderIterator::Params{ + dataset(), strings::StrCat(prefix(), kImpl)}); + break; + case Mode::write: + iterator_ = + absl::make_unique(FileWriterIterator::Params{ + dataset(), strings::StrCat(prefix(), kImpl)}); + } + } + + mutex mu_; + enum Mode { read, write }; + Mode mode_ GUARDED_BY(mu_); + std::unique_ptr iterator_ GUARDED_BY(mu_); + }; // FileIterator + + const DatasetBase* const input_; + const string filename_; + Env* const env_; + const size_t num_tensors_; + const size_t tensor_index_padding_size_; + static const size_t kMaxItems = 10000000; // 10 million + const size_t item_index_padding_size_; + const string tensor_format_string_; +}; // FileDataset + +class CacheDatasetOp::MemoryDataset : public DatasetBase { + public: + explicit MemoryDataset(OpKernelContext* ctx, const DatasetBase* input) + : DatasetBase(DatasetContext(ctx)), input_(input) { + input->Ref(); + } + + ~MemoryDataset() override { input_->Unref(); } + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + name_utils::IteratorPrefixParams params; + params.dataset_prefix = kMemoryDatasetPrefix; + return absl::make_unique(MemoryIterator::Params{ + this, name_utils::IteratorPrefix(kDatasetType, prefix, params)}); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() const override { + name_utils::DatasetDebugStringParams params; + params.dataset_prefix = kMemoryDatasetPrefix; + return name_utils::DatasetDebugString(kDatasetType, params); + } + + int64 Cardinality() const override { return input_->Cardinality(); } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_node = nullptr; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_node)); + Node* filename_node = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(string(""), &filename_node)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_node, filename_node}, output)); + return Status::OK(); + } + + private: + // A thread-safe data structure for caching dataset elements. + // + // The expected use is that a single `MemoryWriterIterator` populates the + // cache with dataset elements. Once all elements are cached, the cache can + // be used by one or more `MemoryReaderIterator`s. + class MemoryCache : public ResourceBase { + public: + MemoryCache() = default; + + string DebugString() const override { return "CacheDataset::MemoryCache"; } + + // Marks the cache as completed. + void Complete() { + mutex_lock l(mu_); + completed_ = true; + } + + // Returns whether the cache is claimed. + bool IsClaimed() { + tf_shared_lock l(mu_); + return claimed_; + } + + // Returns whether the cache is completed. + bool IsCompleted() { + tf_shared_lock l(mu_); + return completed_; + } + + // Attempts to claim the cache, returning whether the cache was claimed. + bool MaybeClaim() { + mutex_lock l(mu_); + if (!claimed_) { + claimed_ = true; + return true; + } + return false; + } + + // Resets the cache. + void Reset() { + mutex_lock l(mu_); + claimed_ = false; + completed_ = false; + cache_.clear(); + } + + // Returns the element at the given index. + const std::vector& at(int64 index) { + tf_shared_lock l(mu_); + DCHECK(index < cache_.size()); + return cache_[index]; + } + + // Adds the element to the cache. + void emplace_back(std::vector element) { + mutex_lock l(mu_); + cache_.emplace_back(std::move(element)); + } + + // Returns the size of the cache. + size_t size() { + tf_shared_lock l(mu_); + return cache_.size(); + } + + private: + mutex mu_; + // Determines whether a writer has claimed the cache. + bool claimed_ GUARDED_BY(mu_) = false; + // Determines whether all elements of the dataset have been cached. + bool completed_ GUARDED_BY(mu_) = false; + std::vector> cache_ GUARDED_BY(mu_); + }; + + class MemoryIterator : public DatasetIterator { + public: + explicit MemoryIterator(const Params& params) + : DatasetIterator(params) {} + + ~MemoryIterator() override { cache_->Unref(); } + + Status Initialize(IteratorContext* ctx) override { + mutex_lock l(mu_); + // Use the resource manager in the iterator context to get / create + // a cache. + ResourceMgr* mgr = ctx->resource_mgr(); + const string name = strings::StrCat(prefix(), name_utils::kDelimiter, + dataset()->node_name(), + name_utils::kDelimiter, kMemoryCache); + TF_RETURN_IF_ERROR(mgr->LookupOrCreate( + kTFData, name, &cache_, [](MemoryCache** cache) { + *cache = new MemoryCache(); + return Status::OK(); + })); + mode_ = cache_->MaybeClaim() ? Mode::write : Mode::read; + InitializeIterator(); + if (mode_ == Mode::read && !cache_->IsCompleted()) { + return errors::Internal( + "Cache should only be read after it has been completed."); + } + return iterator_->Initialize(ctx); + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + return iterator_->GetNext(ctx, out_tensors, end_of_sequence); + } + + protected: + std::shared_ptr CreateNode( + IteratorContext* ctx, model::Node::Args args) const override { + return model::MakeKnownRatioNode(std::move(args), + /*ratio=*/1); + } + + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kMode), mode_)); + if (cache_->IsClaimed()) { + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kCacheClaimed), "")); + size_t cache_size = cache_->size(); + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name(kCacheSize), cache_size)); + for (size_t i = 0; i < cache_size; i++) { + auto& element = cache_->at(i); + TF_RETURN_IF_ERROR(writer->WriteScalar( + full_name(strings::StrCat(kCache, "[", i, "]", kSizeSuffix)), + element.size())); + for (size_t j = 0; j < element.size(); ++j) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + full_name(strings::StrCat(kCache, "[", i, "][", j, "]")), + element[j])); + } + } + if (cache_->IsCompleted()) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name(kCacheCompleted), "")); + } + } + return SaveInput(writer, iterator_); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + iterator_.reset(); + cache_->Reset(); + { + int64 temp; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kMode), &temp)); + mode_ = static_cast(temp); + } + if (reader->Contains(full_name(kCacheClaimed))) { + CHECK(cache_->MaybeClaim()); + size_t cache_size; + { + int64 temp; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kCacheSize), &temp)); + cache_size = static_cast(temp); + } + for (size_t i = 0; i < cache_size; ++i) { + std::vector element; + size_t element_size; + { + int64 temp; + TF_RETURN_IF_ERROR(reader->ReadScalar( + full_name(strings::StrCat(kCache, "[", i, "]", kSizeSuffix)), + &temp)); + element_size = static_cast(temp); + } + element.reserve(element_size); + for (size_t j = 0; j < element_size; ++j) { + element.emplace_back(); + TF_RETURN_IF_ERROR(reader->ReadTensor( + full_name(strings::StrCat(kCache, "[", i, "][", j, "]")), + &element.back())); + } + cache_->emplace_back(std::move(element)); + } + if (reader->Contains(full_name(kCacheCompleted))) { + cache_->Complete(); + } + } + InitializeIterator(); + TF_RETURN_IF_ERROR(iterator_->Initialize(ctx)); + return RestoreInput(ctx, reader, iterator_); + } + + private: + class MemoryWriterIterator : public DatasetIterator { + public: + explicit MemoryWriterIterator(const Params& params, MemoryCache* cache) + : DatasetIterator(params), cache_(cache) { + CHECK(cache_); + } + + ~MemoryWriterIterator() override { + mutex_lock l(mu_); + if (cache_->size() > 0 && !cache_->IsCompleted()) { + LOG(WARNING) + << "The calling iterator did not fully read the dataset being " + "cached. In order to avoid unexpected truncation of the " + "dataset, the partially cached contents of the dataset " + "will be discarded. This can happen if you have an input " + "pipeline similar to `dataset.cache().take(k).repeat()`. " + "You should use `dataset.take(k).cache().repeat()` instead."; + cache_->Reset(); + } + } + + Status Initialize(IteratorContext* ctx) override { + return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_); + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR( + input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); + if (*end_of_sequence) { + cache_->Complete(); + return Status::OK(); + } + RecordBufferEnqueue(ctx, *out_tensors); + cache_->emplace_back(*out_tensors); + return Status::OK(); + } + + protected: + std::shared_ptr CreateNode( + IteratorContext* ctx, model::Node::Args args) const override { + return model::MakeKnownRatioNode(std::move(args), + /*ratio=*/1); + } + + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + return SaveInput(writer, input_impl_); } Status RestoreInternal(IteratorContext* ctx, IteratorStateReader* reader) override { mutex_lock l(mu_); - iterator_.reset(); - cache_->Reset(); - { - int64 temp; - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("mode"), &temp)); - mode_ = static_cast(temp); - } - if (reader->Contains(full_name("cache_claimed"))) { - CHECK(cache_->MaybeClaim()); - size_t cache_size; - { - int64 temp; - TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("cache_size"), &temp)); - cache_size = static_cast(temp); - } - for (size_t i = 0; i < cache_size; ++i) { - std::vector element; - size_t element_size; - { - int64 temp; - TF_RETURN_IF_ERROR(reader->ReadScalar( - full_name(strings::StrCat("cache[", i, "].size")), &temp)); - element_size = static_cast(temp); - } - element.reserve(element_size); - for (size_t j = 0; j < element_size; ++j) { - element.emplace_back(); - TF_RETURN_IF_ERROR(reader->ReadTensor( - full_name(strings::StrCat("cache[", i, "][", j, "]")), - &element.back())); - } - cache_->emplace_back(std::move(element)); - } - if (reader->Contains(full_name("cache_completed"))) { - cache_->Complete(); - } - } - InitializeIterator(); - TF_RETURN_IF_ERROR(iterator_->Initialize(ctx)); - return RestoreInput(ctx, reader, iterator_); + return RestoreInput(ctx, reader, input_impl_); } private: - class MemoryWriterIterator : public DatasetIterator { - public: - explicit MemoryWriterIterator(const Params& params, MemoryCache* cache) - : DatasetIterator(params), cache_(cache) { - CHECK(cache_); - } + mutex mu_; + std::unique_ptr input_impl_ GUARDED_BY(mu_); + MemoryCache* const cache_ GUARDED_BY(mu_); // not owned. + }; // MemoryWriterIterator - ~MemoryWriterIterator() override { - mutex_lock l(mu_); - if (cache_->size() > 0 && !cache_->IsCompleted()) { - LOG(WARNING) - << "The calling iterator did not fully read the dataset being " - "cached. In order to avoid unexpected truncation of the " - "dataset, the partially cached contents of the dataset " - "will be discarded. This can happen if you have an input " - "pipeline similar to `dataset.cache().take(k).repeat()`. " - "You should use `dataset.take(k).cache().repeat()` instead."; - cache_->Reset(); - } - } + class MemoryReaderIterator : public DatasetIterator { + public: + explicit MemoryReaderIterator(const Params& params, MemoryCache* cache) + : DatasetIterator(params), cache_(cache), index_(0) { + CHECK(cache); + } - Status Initialize(IteratorContext* ctx) override { - return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_); + Status Initialize(IteratorContext* ctx) override { + // The memory allocated for the cache is owned by the parent + // dataset but performance modeling uses the iterator abstraction and + // thus we record the memory allocated for the cache here. The caveat + // is that this is incorrect if there are concurrent instances of this + // iterator. + tf_shared_lock l(mu_); + for (size_t i = 0; i < cache_->size(); ++i) { + RecordBufferEnqueue(ctx, cache_->at(i)); } + return Status::OK(); + } - Status GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR( - input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); - if (*end_of_sequence) { - cache_->Complete(); - return Status::OK(); - } - RecordBufferEnqueue(ctx, *out_tensors); - cache_->emplace_back(*out_tensors); + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + if (index_ < cache_->size()) { + const std::vector& cache_tensors = cache_->at(index_); + out_tensors->insert(out_tensors->begin(), cache_tensors.begin(), + cache_tensors.end()); + index_++; + *end_of_sequence = false; return Status::OK(); - } - - protected: - std::shared_ptr CreateNode( - IteratorContext* ctx, model::Node::Args args) const override { - return model::MakeKnownRatioNode(std::move(args), - /*ratio=*/1); - } - - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - return SaveInput(writer, input_impl_); - } - - Status RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - return RestoreInput(ctx, reader, input_impl_); - } - - private: - mutex mu_; - std::unique_ptr input_impl_ GUARDED_BY(mu_); - MemoryCache* const cache_ GUARDED_BY(mu_); // not owned. - }; // MemoryWriterIterator - - class MemoryReaderIterator : public DatasetIterator { - public: - explicit MemoryReaderIterator(const Params& params, MemoryCache* cache) - : DatasetIterator(params), cache_(cache), index_(0) { - CHECK(cache); - } - - Status Initialize(IteratorContext* ctx) override { - // The memory allocated for the cache is owned by the parent - // dataset but performance modeling uses the iterator abstraction and - // thus we record the memory allocated for the cache here. The caveat - // is that this is incorrect if there are concurrent instances of this - // iterator. - tf_shared_lock l(mu_); - for (size_t i = 0; i < cache_->size(); ++i) { - RecordBufferEnqueue(ctx, cache_->at(i)); - } + } else { + *end_of_sequence = true; return Status::OK(); } - - Status GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override { - mutex_lock l(mu_); - if (index_ < cache_->size()) { - const std::vector& cache_tensors = cache_->at(index_); - out_tensors->insert(out_tensors->begin(), cache_tensors.begin(), - cache_tensors.end()); - index_++; - *end_of_sequence = false; - return Status::OK(); - } else { - *end_of_sequence = true; - return Status::OK(); - } - } - - protected: - std::shared_ptr CreateNode( - IteratorContext* ctx, model::Node::Args args) const override { - return model::MakeKnownRatioNode(std::move(args), - /*ratio=*/1); - } - - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("index"), index_)); - return Status::OK(); - } - - Status RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - { - int64 temp; - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("index"), &temp)); - index_ = static_cast(temp); - } - return Status::OK(); - } - - private: - mutex mu_; - MemoryCache* const cache_ GUARDED_BY(mu_); // not owned. - size_t index_ GUARDED_BY(mu_); - }; // MemoryReaderIterator - - void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - switch (mode_) { - case Mode::read: - iterator_ = absl::make_unique( - MemoryReaderIterator::Params{dataset(), - strings::StrCat(prefix(), "Impl")}, - cache_); - break; - case Mode::write: - iterator_ = absl::make_unique( - MemoryWriterIterator::Params{dataset(), - strings::StrCat(prefix(), "Impl")}, - cache_); - } } + protected: + std::shared_ptr CreateNode( + IteratorContext* ctx, model::Node::Args args) const override { + return model::MakeKnownRatioNode(std::move(args), + /*ratio=*/1); + } + + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kIndex), index_)); + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + { + int64 temp; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kIndex), &temp)); + index_ = static_cast(temp); + } + return Status::OK(); + } + + private: mutex mu_; - MemoryCache* cache_ GUARDED_BY(mu_); // not owned. - enum Mode { read, write }; - Mode mode_ GUARDED_BY(mu_); - std::unique_ptr iterator_ GUARDED_BY(mu_); - }; // MemoryIterator + MemoryCache* const cache_ GUARDED_BY(mu_); // not owned. + size_t index_ GUARDED_BY(mu_); + }; // MemoryReaderIterator - const DatasetBase* const input_; - }; // MemoryDataset -}; // CacheDatasetOp + void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + switch (mode_) { + case Mode::read: + iterator_ = absl::make_unique( + MemoryReaderIterator::Params{dataset(), + strings::StrCat(prefix(), kImpl)}, + cache_); + break; + case Mode::write: + iterator_ = absl::make_unique( + MemoryWriterIterator::Params{dataset(), + strings::StrCat(prefix(), kImpl)}, + cache_); + } + } + mutex mu_; + MemoryCache* cache_ GUARDED_BY(mu_); // not owned. + enum Mode { read, write }; + Mode mode_ GUARDED_BY(mu_); + std::unique_ptr iterator_ GUARDED_BY(mu_); + }; // MemoryIterator + + const DatasetBase* const input_; +}; // MemoryDataset + +CacheDatasetOp::CacheDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + +void CacheDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) { + // Parse out the filenames tensor. + string filename; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kFileName, &filename)); + + if (filename.empty()) { + *output = new MemoryDataset(ctx, input); + } else { + *output = new FileDataset(ctx, input, filename, ctx->env()); + } +} + +namespace { REGISTER_KERNEL_BUILDER(Name("CacheDataset").Device(DEVICE_CPU), CacheDatasetOp); - } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.h b/tensorflow/core/kernels/data/cache_dataset_ops.h new file mode 100644 index 00000000000..af023a60075 --- /dev/null +++ b/tensorflow/core/kernels/data/cache_dataset_ops.h @@ -0,0 +1,45 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_DATA_CACHE_DATASET_OP_H_ +#define TENSORFLOW_CORE_KERNELS_DATA_CACHE_DATASET_OP_H_ + +#include "tensorflow/core/framework/dataset.h" + +namespace tensorflow { +namespace data { + +class CacheDatasetOp : public UnaryDatasetOpKernel { + public: + static constexpr const char* const kDatasetType = "Cache"; + static constexpr const char* const kInputDataset = "input_dataset"; + static constexpr const char* const kFileName = "filename"; + static constexpr const char* const kOutputTypes = "output_types"; + static constexpr const char* const kOutputShapes = "output_shapes"; + + explicit CacheDatasetOp(OpKernelConstruction* ctx); + + protected: + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override; + + private: + class FileDataset; + class MemoryDataset; +}; + +} // namespace data +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_DATA_CACHE_DATASET_OP_H_ diff --git a/tensorflow/core/kernels/data/cache_dataset_ops_test.cc b/tensorflow/core/kernels/data/cache_dataset_ops_test.cc new file mode 100644 index 00000000000..812d719946f --- /dev/null +++ b/tensorflow/core/kernels/data/cache_dataset_ops_test.cc @@ -0,0 +1,533 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/data/cache_dataset_ops.h" + +#include "tensorflow/core/kernels/data/dataset_test_base.h" + +namespace tensorflow { +namespace data { +namespace { + +constexpr char kNodeName[] = "cache_dataset"; +constexpr char kIteratorPrefix[] = "Iterator"; +constexpr char kFileDatasetPrefix[] = "File"; +constexpr char kMemoryDatasetPrefix[] = "Memory"; + +class CacheDatasetOpTest : public DatasetOpsTestBase { + protected: + // Creates `TensorSliceDataset` variant tensor from the input vector of + // tensors. + Status CreateTensorSliceDatasetTensor( + std::vector* const tensor_vector, Tensor* dataset_tensor) { + DatasetBase* tensor_slice_dataset; + TF_RETURN_IF_ERROR(CreateTensorSliceDataset( + "tensor_slice_node", tensor_vector, &tensor_slice_dataset)); + TF_RETURN_IF_ERROR( + StoreDatasetInVariantTensor(tensor_slice_dataset, dataset_tensor)); + return Status::OK(); + } + + // Create a new `CacheDataset` op kernel. + Status CreateCacheDatasetOpKernel( + const DataTypeVector& output_types, + const std::vector& output_shapes, + std::unique_ptr* cache_dataset_op_kernel) { + NodeDef node_def = test::function::NDef( + kNodeName, name_utils::OpName(CacheDatasetOp::kDatasetType), + {CacheDatasetOp::kInputDataset, CacheDatasetOp::kFileName}, + {{CacheDatasetOp::kOutputTypes, output_types}, + {CacheDatasetOp::kOutputShapes, output_shapes}}); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, cache_dataset_op_kernel)); + return Status::OK(); + } + + // Create a new `CacheDataset` op kernel context. + Status CreateCacheDatasetContext( + OpKernel* const op_kernel, + gtl::InlinedVector* const inputs, + std::unique_ptr* context) { + TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs)); + TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context)); + return Status::OK(); + } +}; + +struct TestCase { + std::vector input_tensors; + string file_name; + std::vector expected_outputs; + DataTypeVector expected_output_dtypes; + std::vector expected_output_shapes; + int64 expected_cardinality; + std::vector breakpoints; +}; + +// Test case 1: cache data in file. +TestCase TestCase1() { + return { + /*input_tensors*/ {DatasetOpsTestBase::CreateTensor( + TensorShape{3, 3, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8})}, + /*file_name*/ absl::StrCat(testing::TmpDir(), "/cache_data"), + /*expected_outputs*/ + {DatasetOpsTestBase::CreateTensor(TensorShape{3, 1}, {0, 1, 2}), + DatasetOpsTestBase::CreateTensor(TensorShape{3, 1}, {3, 4, 5}), + DatasetOpsTestBase::CreateTensor(TensorShape{3, 1}, {6, 7, 8})}, + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({3, 1})}, + /*expected_cardinality*/ 3, + /*breakpoints*/ {0, 4, 11}}; +} + +// Test case 2: cache empty data in file. +TestCase TestCase2() { + return {/*input_tensors*/ { + DatasetOpsTestBase::CreateTensor(TensorShape{0}, {})}, + /*file_name*/ absl::StrCat(testing::TmpDir(), "/empty_cache_data"), + /*expected_outputs*/ {}, + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 0, + /*breakpoints*/ {0, 4, 11}}; +} + +// Test case 3: cache data in memory. +TestCase TestCase3() { + return { + /*input_tensors*/ {DatasetOpsTestBase::CreateTensor( + TensorShape{3, 3, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8})}, + /*file_name*/ "", + /*expected_outputs*/ + {DatasetOpsTestBase::CreateTensor(TensorShape{3, 1}, {0, 1, 2}), + DatasetOpsTestBase::CreateTensor(TensorShape{3, 1}, {3, 4, 5}), + DatasetOpsTestBase::CreateTensor(TensorShape{3, 1}, {6, 7, 8})}, + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({3, 1})}, + /*expected_cardinality*/ 3, + /*breakpoints*/ {0, 4, 11}}; +} + +// Test case 4: cache empty data in memory. +TestCase TestCase4() { + return {/*input_tensors*/ { + DatasetOpsTestBase::CreateTensor(TensorShape{0}, {})}, + /*file_name*/ "", + /*expected_outputs*/ {}, + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 0, + /*breakpoints*/ {0, 4, 11}}; +} + +class ParameterizedCacheDatasetOpTest + : public CacheDatasetOpTest, + public ::testing::WithParamInterface {}; + +TEST_P(ParameterizedCacheDatasetOpTest, GetNext) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(cache_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(cache_dataset->MakeIterator(iterator_ctx.get(), kIteratorPrefix, + &iterator)); + + // Test the write mode. + bool end_of_sequence = false; + std::vector out_tensors; + while (!end_of_sequence) { + std::vector next; + TF_EXPECT_OK( + iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + } + TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, + /*compare_order*/ true)); + + // Test the read mode. + TF_ASSERT_OK(cache_dataset->MakeIterator(iterator_ctx.get(), kIteratorPrefix, + &iterator)); + end_of_sequence = false; + out_tensors.clear(); + while (!end_of_sequence) { + std::vector next; + TF_EXPECT_OK( + iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + } + TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, + /*compare_order*/ true)); +} + +TEST_F(CacheDatasetOpTest, DatasetNodeName) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = TestCase1(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + EXPECT_EQ(cache_dataset->node_name(), kNodeName); +} + +TEST_P(ParameterizedCacheDatasetOpTest, DatasetTypeString) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + EXPECT_EQ(cache_dataset->type_string(), + name_utils::OpName(CacheDatasetOp::kDatasetType)); +} + +TEST_P(ParameterizedCacheDatasetOpTest, DatasetOutputDtypes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + TF_EXPECT_OK(VerifyTypesMatch(cache_dataset->output_dtypes(), + test_case.expected_output_dtypes)); +} + +TEST_P(ParameterizedCacheDatasetOpTest, DatasetOutputShapes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + TF_EXPECT_OK(VerifyShapesCompatible(cache_dataset->output_shapes(), + test_case.expected_output_shapes)); +} + +TEST_P(ParameterizedCacheDatasetOpTest, Cardinality) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + EXPECT_EQ(cache_dataset->Cardinality(), test_case.expected_cardinality); +} + +TEST_P(ParameterizedCacheDatasetOpTest, DatasetSave) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + std::unique_ptr serialization_context; + TF_ASSERT_OK(CreateSerializationContext(&serialization_context)); + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_ASSERT_OK(cache_dataset->Save(serialization_context.get(), &writer)); + TF_ASSERT_OK(writer.Flush()); +} + +TEST_P(ParameterizedCacheDatasetOpTest, IteratorOutputShapes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(cache_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(cache_dataset->MakeIterator(iterator_ctx.get(), kIteratorPrefix, + &iterator)); + + TF_EXPECT_OK(VerifyTypesMatch(iterator->output_dtypes(), + test_case.expected_output_dtypes)); +} + +TEST_P(ParameterizedCacheDatasetOpTest, IteratorOutputPrefix) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(cache_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(cache_dataset->MakeIterator(iterator_ctx.get(), kIteratorPrefix, + &iterator)); + + name_utils::IteratorPrefixParams params; + params.dataset_prefix = + test_case.file_name.empty() ? kMemoryDatasetPrefix : kFileDatasetPrefix; + EXPECT_EQ(iterator->prefix(), + name_utils::IteratorPrefix(CacheDatasetOp::kDatasetType, + kIteratorPrefix, params)); +} + +TEST_P(ParameterizedCacheDatasetOpTest, Roundtrip) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr cache_dataset_kernel; + TF_ASSERT_OK(CreateCacheDatasetOpKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &cache_dataset_kernel)); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor file_name = CreateTensor(TensorShape{}, {test_case.file_name}); + gtl::InlinedVector inputs( + {TensorValue(&tensor_slice_dataset_tensor), TensorValue(&file_name)}); + std::unique_ptr cache_dataset_context; + TF_ASSERT_OK(CreateCacheDatasetContext(cache_dataset_kernel.get(), &inputs, + &cache_dataset_context)); + DatasetBase* cache_dataset; + TF_ASSERT_OK(CreateDataset(cache_dataset_kernel.get(), + cache_dataset_context.get(), &cache_dataset)); + core::ScopedUnref scoped_unref(cache_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(cache_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(cache_dataset->MakeIterator(iterator_ctx.get(), kIteratorPrefix, + &iterator)); + + bool end_of_sequence = false; + std::vector out_tensors; + // For MemoryIterator in the read mode, the cache needs to be completed before + // it has been read. + if (test_case.file_name.empty()) { + while (!end_of_sequence) { + TF_EXPECT_OK(iterator->GetNext(iterator_ctx.get(), &out_tensors, + &end_of_sequence)); + } + end_of_sequence = false; + out_tensors.clear(); + TF_ASSERT_OK(cache_dataset->MakeIterator(iterator_ctx.get(), + kIteratorPrefix, &iterator)); + } + + std::unique_ptr serialization_ctx; + TF_ASSERT_OK(CreateSerializationContext(&serialization_ctx)); + int cur_iteration = 0; + auto expected_outputs_it = test_case.expected_outputs.begin(); + for (int breakpoint : test_case.breakpoints) { + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer)); + TF_EXPECT_OK(writer.Flush()); + VariantTensorDataReader reader(&data); + TF_EXPECT_OK(RestoreIterator(iterator_ctx.get(), &reader, kIteratorPrefix, + *cache_dataset, &iterator)); + + while (cur_iteration <= breakpoint) { + out_tensors.clear(); + TF_EXPECT_OK(iterator->GetNext(iterator_ctx.get(), &out_tensors, + &end_of_sequence)); + if (!end_of_sequence) { + EXPECT_LT(expected_outputs_it, test_case.expected_outputs.end()); + TF_EXPECT_OK(ExpectEqual(out_tensors.back(), *expected_outputs_it)); + expected_outputs_it++; + } + cur_iteration++; + } + + if (breakpoint >= test_case.expected_cardinality) { + EXPECT_TRUE(end_of_sequence); + EXPECT_EQ(expected_outputs_it, test_case.expected_outputs.end()); + } else { + EXPECT_FALSE(end_of_sequence); + } + } +} + +INSTANTIATE_TEST_SUITE_P( + CacheDatasetOpTest, ParameterizedCacheDatasetOpTest, + ::testing::ValuesIn(std::vector({TestCase1(), TestCase2(), + TestCase3(), TestCase4()}))); + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_test_base.cc b/tensorflow/core/kernels/data/dataset_test_base.cc index 274b99cee3c..8bfa0b18ba8 100644 --- a/tensorflow/core/kernels/data/dataset_test_base.cc +++ b/tensorflow/core/kernels/data/dataset_test_base.cc @@ -22,6 +22,68 @@ limitations under the License. namespace tensorflow { namespace data { +string ToString(CompressionType compression_type) { + switch (compression_type) { + case CompressionType::ZLIB: + return "ZLIB"; + case CompressionType::GZIP: + return "GZIP"; + case CompressionType::RAW: + return "RAW"; + case CompressionType::UNCOMPRESSED: + return ""; + } +} + +io::ZlibCompressionOptions GetZlibCompressionOptions( + CompressionType compression_type) { + switch (compression_type) { + case CompressionType::ZLIB: + return io::ZlibCompressionOptions::DEFAULT(); + case CompressionType::GZIP: + return io::ZlibCompressionOptions::GZIP(); + case CompressionType::RAW: + return io::ZlibCompressionOptions::RAW(); + case CompressionType::UNCOMPRESSED: + LOG(WARNING) << "ZlibCompressionOptions does not have an option for " + << ToString(compression_type); + return io::ZlibCompressionOptions::DEFAULT(); + } +} + +Status WriteDataToFile(const string& filename, const char* data) { + return WriteDataToFile(filename, data, CompressionParams()); +} + +Status WriteDataToFile(const string& filename, const char* data, + const CompressionParams& params) { + Env* env = Env::Default(); + std::unique_ptr file_writer; + TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer)); + if (params.compression_type == CompressionType::UNCOMPRESSED) { + TF_RETURN_IF_ERROR(file_writer->Append(data)); + } else if (params.compression_type == CompressionType::ZLIB || + params.compression_type == CompressionType::GZIP || + params.compression_type == CompressionType::RAW) { + auto zlib_compression_options = + GetZlibCompressionOptions(params.compression_type); + io::ZlibOutputBuffer out(file_writer.get(), params.input_buffer_size, + params.output_buffer_size, + zlib_compression_options); + TF_RETURN_IF_ERROR(out.Init()); + TF_RETURN_IF_ERROR(out.Append(data)); + TF_RETURN_IF_ERROR(out.Close()); + } else { + return tensorflow::errors::InvalidArgument( + "Unsupported compression_type: ", ToString(params.compression_type)); + } + + TF_RETURN_IF_ERROR(file_writer->Flush()); + TF_RETURN_IF_ERROR(file_writer->Close()); + + return Status::OK(); +} + template Status IsEqual(const Tensor& t1, const Tensor& t2) { if (t1.dtype() != t2.dtype()) { diff --git a/tensorflow/core/kernels/data/dataset_test_base.h b/tensorflow/core/kernels/data/dataset_test_base.h index edd12667724..6d99250c54c 100644 --- a/tensorflow/core/kernels/data/dataset_test_base.h +++ b/tensorflow/core/kernels/data/dataset_test_base.h @@ -34,6 +34,9 @@ limitations under the License. #include "tensorflow/core/kernels/data/name_utils.h" #include "tensorflow/core/kernels/data/range_dataset_op.h" #include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/lib/io/zlib_compression_options.h" +#include "tensorflow/core/lib/io/zlib_outputbuffer.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/ptr_util.h" @@ -41,6 +44,33 @@ limitations under the License. namespace tensorflow { namespace data { +enum class CompressionType { ZLIB = 0, GZIP = 1, RAW = 2, UNCOMPRESSED = 3 }; + +// Returns a string representation for the given compression type. +string ToString(CompressionType compression_type); + +// Gets the specified zlib compression options according to the compression +// type. Note that `CompressionType::UNCOMPRESSED` is not supported because +// `ZlibCompressionOptions` does not have an option. +io::ZlibCompressionOptions GetZlibCompressionOptions( + CompressionType compression_type); + +// Used to specify parameters when writing data into files with compression. +// `input_buffer_size` and `output_buffer_size` specify the input and output +// buffer size when ZLIB and GZIP compression is used. +struct CompressionParams { + CompressionType compression_type = CompressionType::UNCOMPRESSED; + int32 input_buffer_size = 0; + int32 output_buffer_size = 0; +}; + +// Writes the input data into the file without compression. +Status WriteDataToFile(const string& filename, const char* data); + +// Writes the input data into the file with the specified compression. +Status WriteDataToFile(const string& filename, const char* data, + const CompressionParams& params); + // Helpful functions to test Dataset op kernels. class DatasetOpsTestBase : public ::testing::Test { public: diff --git a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc index d35786c6b32..2e89af6abb3 100644 --- a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc @@ -91,12 +91,7 @@ class SnapshotWriter { TF_RETURN_IF_ERROR(dest_->Append(StringPiece(header, sizeof(header)))); - // TODO(frankchn): Remove after ZlibOutputBuffer Cord support is added. - if (compression_type_.empty()) { - return dest_->Append(data); - } else { - return dest_->Append(data.ToString()); - } + return dest_->Append(data); } #endif // PLATFORM_GOOGLE diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index f28386ccd0b..a5a49b2c7eb 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -617,7 +617,14 @@ void DeleteIteratorOp::Compute(OpKernelContext* ctx) { // The iterator resource is guaranteed to exist because the variant tensor // wrapping the deleter is provided as an unused input to this op, which // guarantees that it has not run yet. - OP_REQUIRES_OK(ctx, ctx->resource_manager()->Delete(handle)); + Status s = ctx->resource_manager()->Delete(handle); + if (errors::IsNotFound(s)) { + // TODO(b/135948230): Investigate why is the above statement not true and + // then get rid of the special case. + ctx->SetStatus(Status::OK()); + return; + } + ctx->SetStatus(s); } namespace { diff --git a/tensorflow/core/kernels/data/name_utils.cc b/tensorflow/core/kernels/data/name_utils.cc index 391f45014c8..b6404892fdb 100644 --- a/tensorflow/core/kernels/data/name_utils.cc +++ b/tensorflow/core/kernels/data/name_utils.cc @@ -65,10 +65,11 @@ string IteratorPrefix(const string& dataset_type, const string& prefix) { string IteratorPrefix(const string& dataset_type, const string& prefix, const IteratorPrefixParams& params) { if (params.op_version == 1) { - return strings::StrCat(prefix, kDelimiter, dataset_type); + return strings::StrCat(prefix, kDelimiter, params.dataset_prefix, + dataset_type); } - return strings::StrCat(prefix, kDelimiter, dataset_type, kVersion, - params.op_version); + return strings::StrCat(prefix, kDelimiter, params.dataset_prefix, + dataset_type, kVersion, params.op_version); } } // namespace name_utils diff --git a/tensorflow/core/kernels/data/name_utils.h b/tensorflow/core/kernels/data/name_utils.h index 0efa825ec5e..5171b8e05e3 100644 --- a/tensorflow/core/kernels/data/name_utils.h +++ b/tensorflow/core/kernels/data/name_utils.h @@ -44,6 +44,7 @@ struct DatasetDebugStringParams { struct IteratorPrefixParams { int op_version = 1; + string dataset_prefix = ""; }; // Merge the given args in the format of "(arg1, arg2, ..., argn)". diff --git a/tensorflow/core/kernels/data/reader_dataset_ops.cc b/tensorflow/core/kernels/data/reader_dataset_ops.cc index 9ab687c0d7d..9b6e987ea6a 100644 --- a/tensorflow/core/kernels/data/reader_dataset_ops.cc +++ b/tensorflow/core/kernels/data/reader_dataset_ops.cc @@ -30,249 +30,6 @@ namespace { // See documentation in ../../ops/dataset_ops.cc for a high-level // description of the following ops. -constexpr char kTextLineDatasetName[] = "TextLine"; - -class TextLineDatasetOp : public DatasetOpKernel { - public: - using DatasetOpKernel::DatasetOpKernel; - - void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { - const Tensor* filenames_tensor; - OP_REQUIRES_OK(ctx, ctx->input("filenames", &filenames_tensor)); - OP_REQUIRES( - ctx, filenames_tensor->dims() <= 1, - errors::InvalidArgument("`filenames` must be a scalar or a vector.")); - - string compression_type; - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "compression_type", - &compression_type)); - - int64 buffer_size = -1; - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "buffer_size", &buffer_size)); - OP_REQUIRES( - ctx, buffer_size >= 0, - errors::InvalidArgument("`buffer_size` must be >= 0 (0 == default)")); - - io::ZlibCompressionOptions zlib_compression_options = - io::ZlibCompressionOptions::DEFAULT(); - if (compression_type == "ZLIB") { - zlib_compression_options = io::ZlibCompressionOptions::DEFAULT(); - } else if (compression_type == "GZIP") { - zlib_compression_options = io::ZlibCompressionOptions::GZIP(); - } else { - OP_REQUIRES(ctx, compression_type.empty(), - errors::InvalidArgument("Unsupported compression_type.")); - } - - if (buffer_size != 0) { - // Set the override size. - zlib_compression_options.input_buffer_size = buffer_size; - } - - std::vector filenames; - filenames.reserve(filenames_tensor->NumElements()); - for (int i = 0; i < filenames_tensor->NumElements(); ++i) { - filenames.push_back(filenames_tensor->flat()(i)); - } - - *output = new Dataset(ctx, std::move(filenames), compression_type, - zlib_compression_options); - } - - private: - class Dataset : public DatasetBase { - public: - Dataset(OpKernelContext* ctx, std::vector filenames, - const string& compression_type, - const io::ZlibCompressionOptions& options) - : DatasetBase(DatasetContext(ctx)), - filenames_(std::move(filenames)), - compression_type_(compression_type), - use_compression_(!compression_type.empty()), - options_(options) {} - - std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { - return absl::make_unique(Iterator::Params{ - this, strings::StrCat(prefix, "::", kTextLineDatasetName)}); - } - - const DataTypeVector& output_dtypes() const override { - static DataTypeVector* dtypes = new DataTypeVector({DT_STRING}); - return *dtypes; - } - - const std::vector& output_shapes() const override { - static std::vector* shapes = - new std::vector({{}}); - return *shapes; - } - - string DebugString() const override { return "TextLineDatasetOp::Dataset"; } - - protected: - Status AsGraphDefInternal(SerializationContext* ctx, - DatasetGraphDefBuilder* b, - Node** output) const override { - Node* filenames = nullptr; - Node* compression_type = nullptr; - Node* buffer_size = nullptr; - TF_RETURN_IF_ERROR(b->AddVector(filenames_, &filenames)); - TF_RETURN_IF_ERROR(b->AddScalar(compression_type_, &compression_type)); - TF_RETURN_IF_ERROR( - b->AddScalar(options_.input_buffer_size, &buffer_size)); - TF_RETURN_IF_ERROR(b->AddDataset( - this, {filenames, compression_type, buffer_size}, output)); - return Status::OK(); - } - - private: - class Iterator : public DatasetIterator { - public: - explicit Iterator(const Params& params) - : DatasetIterator(params) {} - - Status GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override { - mutex_lock l(mu_); - do { - // We are currently processing a file, so try to read the next line. - if (buffered_input_stream_) { - string line_contents; - Status s = buffered_input_stream_->ReadLine(&line_contents); - - if (s.ok()) { - // Produce the line as output. - metrics::RecordTFDataBytesRead(kTextLineDatasetName, - line_contents.size()); - out_tensors->emplace_back(ctx->allocator({}), DT_STRING, - TensorShape({})); - out_tensors->back().scalar()() = std::move(line_contents); - *end_of_sequence = false; - return Status::OK(); - } else if (!errors::IsOutOfRange(s)) { - // Report non-EOF errors to the caller. - return s; - } - // We have reached the end of the current file, so maybe - // move on to next file. - ResetStreamsLocked(); - ++current_file_index_; - } - - // Iteration ends when there are no more files to process. - if (current_file_index_ == dataset()->filenames_.size()) { - *end_of_sequence = true; - return Status::OK(); - } - - TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); - } while (true); - } - - protected: - std::shared_ptr CreateNode( - IteratorContext* ctx, model::Node::Args args) const override { - return model::MakeSourceNode(std::move(args)); - } - - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("current_file_index"), - current_file_index_)); - - // `buffered_input_stream_` is empty if - // 1. GetNext has not been called even once. - // 2. All files have been read and iterator has been exhausted. - if (buffered_input_stream_) { - TF_RETURN_IF_ERROR(writer->WriteScalar( - full_name("current_pos"), buffered_input_stream_->Tell())); - } - return Status::OK(); - } - - Status RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - ResetStreamsLocked(); - int64 current_file_index; - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_file_index"), - ¤t_file_index)); - current_file_index_ = size_t(current_file_index); - // The key "current_pos" is written only if the iterator was saved - // with an open file. - if (reader->Contains(full_name("current_pos"))) { - int64 current_pos; - TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("current_pos"), ¤t_pos)); - - TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); - TF_RETURN_IF_ERROR(buffered_input_stream_->Seek(current_pos)); - } - return Status::OK(); - } - - private: - // Sets up reader streams to read from the file at `current_file_index_`. - Status SetupStreamsLocked(Env* env) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - if (current_file_index_ >= dataset()->filenames_.size()) { - return errors::InvalidArgument( - "current_file_index_:", current_file_index_, - " >= filenames_.size():", dataset()->filenames_.size()); - } - - // Actually move on to next file. - TF_RETURN_IF_ERROR(env->NewRandomAccessFile( - dataset()->filenames_[current_file_index_], &file_)); - input_stream_ = - absl::make_unique(file_.get(), false); - - if (dataset()->use_compression_) { - zlib_input_stream_ = absl::make_unique( - input_stream_.get(), dataset()->options_.input_buffer_size, - dataset()->options_.input_buffer_size, dataset()->options_); - buffered_input_stream_ = absl::make_unique( - zlib_input_stream_.get(), dataset()->options_.input_buffer_size, - false); - } else { - buffered_input_stream_ = absl::make_unique( - input_stream_.get(), dataset()->options_.input_buffer_size, - false); - } - return Status::OK(); - } - - // Resets all reader streams. - void ResetStreamsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - input_stream_.reset(); - zlib_input_stream_.reset(); - buffered_input_stream_.reset(); - file_.reset(); - } - - mutex mu_; - std::unique_ptr input_stream_ - GUARDED_BY(mu_); - std::unique_ptr zlib_input_stream_ GUARDED_BY(mu_); - std::unique_ptr buffered_input_stream_ - GUARDED_BY(mu_); - size_t current_file_index_ GUARDED_BY(mu_) = 0; - std::unique_ptr file_ - GUARDED_BY(mu_); // must outlive input_stream_ - }; - - const std::vector filenames_; - const string compression_type_; - const bool use_compression_; - const io::ZlibCompressionOptions options_; - }; -}; - -REGISTER_KERNEL_BUILDER(Name("TextLineDataset").Device(DEVICE_CPU), - TextLineDatasetOp); - constexpr char kFixedLengthRecordDatasetName[] = "FixedLengthRecord"; class FixedLengthRecordDatasetOp : public DatasetOpKernel { diff --git a/tensorflow/core/kernels/data/text_line_dataset_op.cc b/tensorflow/core/kernels/data/text_line_dataset_op.cc new file mode 100644 index 00000000000..b8302b890c8 --- /dev/null +++ b/tensorflow/core/kernels/data/text_line_dataset_op.cc @@ -0,0 +1,281 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/data/text_line_dataset_op.h" + +#include "tensorflow/core/common_runtime/metrics.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/name_utils.h" +#include "tensorflow/core/lib/io/buffered_inputstream.h" +#include "tensorflow/core/lib/io/inputbuffer.h" +#include "tensorflow/core/lib/io/random_inputstream.h" +#include "tensorflow/core/lib/io/zlib_compression_options.h" +#include "tensorflow/core/lib/io/zlib_inputstream.h" + +namespace tensorflow { +namespace data { + +/* static */ constexpr const char* const TextLineDatasetOp::kDatasetType; +/* static */ constexpr const char* const TextLineDatasetOp::kFileNames; +/* static */ constexpr const char* const TextLineDatasetOp::kCompressionType; +/* static */ constexpr const char* const TextLineDatasetOp::kBufferSize; + +constexpr char kZLIB[] = "ZLIB"; +constexpr char kGZIP[] = "GZIP"; +constexpr char kCurrentFileIndex[] = "current_file_index"; +constexpr char kCurrentPos[] = "current_pos"; + +class TextLineDatasetOp::Dataset : public DatasetBase { + public: + Dataset(OpKernelContext* ctx, std::vector filenames, + const string& compression_type, + const io::ZlibCompressionOptions& options) + : DatasetBase(DatasetContext(ctx)), + filenames_(std::move(filenames)), + compression_type_(compression_type), + use_compression_(!compression_type.empty()), + options_(options) {} + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return absl::make_unique(Iterator::Params{ + this, + name_utils::IteratorPrefix(TextLineDatasetOp::kDatasetType, prefix)}); + } + + const DataTypeVector& output_dtypes() const override { + static DataTypeVector* dtypes = new DataTypeVector({DT_STRING}); + return *dtypes; + } + + const std::vector& output_shapes() const override { + static std::vector* shapes = + new std::vector({{}}); + return *shapes; + } + + string DebugString() const override { + return name_utils::DatasetDebugString(kDatasetType); + } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + Node* filenames = nullptr; + Node* compression_type = nullptr; + Node* buffer_size = nullptr; + TF_RETURN_IF_ERROR(b->AddVector(filenames_, &filenames)); + TF_RETURN_IF_ERROR(b->AddScalar(compression_type_, &compression_type)); + TF_RETURN_IF_ERROR(b->AddScalar(options_.input_buffer_size, &buffer_size)); + TF_RETURN_IF_ERROR(b->AddDataset( + this, {filenames, compression_type, buffer_size}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + do { + // We are currently processing a file, so try to read the next line. + if (buffered_input_stream_) { + string line_contents; + Status s = buffered_input_stream_->ReadLine(&line_contents); + + if (s.ok()) { + // Produce the line as output. + metrics::RecordTFDataBytesRead( + name_utils::OpName(TextLineDatasetOp::kDatasetType), + line_contents.size()); + out_tensors->emplace_back(ctx->allocator({}), DT_STRING, + TensorShape({})); + out_tensors->back().scalar()() = std::move(line_contents); + *end_of_sequence = false; + return Status::OK(); + } else if (!errors::IsOutOfRange(s)) { + // Report non-EOF errors to the caller. + return s; + } + // We have reached the end of the current file, so maybe + // move on to next file. + ResetStreamsLocked(); + ++current_file_index_; + } + + // Iteration ends when there are no more files to process. + if (current_file_index_ == dataset()->filenames_.size()) { + *end_of_sequence = true; + return Status::OK(); + } + + TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); + } while (true); + } + + protected: + std::shared_ptr CreateNode( + IteratorContext* ctx, model::Node::Args args) const override { + return model::MakeSourceNode(std::move(args)); + } + + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kCurrentFileIndex), + current_file_index_)); + // `buffered_input_stream_` is empty if + // 1. GetNext has not been called even once. + // 2. All files have been read and iterator has been exhausted. + if (buffered_input_stream_) { + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kCurrentPos), + buffered_input_stream_->Tell())); + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + ResetStreamsLocked(); + int64 current_file_index; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kCurrentFileIndex), + ¤t_file_index)); + current_file_index_ = size_t(current_file_index); + // The key "current_pos" is written only if the iterator was saved + // with an open file. + if (reader->Contains(full_name(kCurrentPos))) { + int64 current_pos; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name(kCurrentPos), ¤t_pos)); + + TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); + TF_RETURN_IF_ERROR(buffered_input_stream_->Seek(current_pos)); + } + return Status::OK(); + } + + private: + // Sets up reader streams to read from the file at `current_file_index_`. + Status SetupStreamsLocked(Env* env) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (current_file_index_ >= dataset()->filenames_.size()) { + return errors::InvalidArgument( + "current_file_index_:", current_file_index_, + " >= filenames_.size():", dataset()->filenames_.size()); + } + + // Actually move on to next file. + TF_RETURN_IF_ERROR(env->NewRandomAccessFile( + dataset()->filenames_[current_file_index_], &file_)); + input_stream_ = + absl::make_unique(file_.get(), false); + + if (dataset()->use_compression_) { + zlib_input_stream_ = absl::make_unique( + input_stream_.get(), dataset()->options_.input_buffer_size, + dataset()->options_.input_buffer_size, dataset()->options_); + buffered_input_stream_ = absl::make_unique( + zlib_input_stream_.get(), dataset()->options_.input_buffer_size, + false); + } else { + buffered_input_stream_ = absl::make_unique( + input_stream_.get(), dataset()->options_.input_buffer_size, false); + } + return Status::OK(); + } + + // Resets all reader streams. + void ResetStreamsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + input_stream_.reset(); + zlib_input_stream_.reset(); + buffered_input_stream_.reset(); + file_.reset(); + } + + mutex mu_; + std::unique_ptr input_stream_ GUARDED_BY(mu_); + std::unique_ptr zlib_input_stream_ GUARDED_BY(mu_); + std::unique_ptr buffered_input_stream_ + GUARDED_BY(mu_); + size_t current_file_index_ GUARDED_BY(mu_) = 0; + std::unique_ptr file_ + GUARDED_BY(mu_); // must outlive input_stream_ + }; + + const std::vector filenames_; + const string compression_type_; + const bool use_compression_; + const io::ZlibCompressionOptions options_; +}; + +TextLineDatasetOp::TextLineDatasetOp(OpKernelConstruction* ctx) + : DatasetOpKernel(ctx) {} + +void TextLineDatasetOp::MakeDataset(OpKernelContext* ctx, + DatasetBase** output) { + const Tensor* filenames_tensor; + OP_REQUIRES_OK(ctx, ctx->input(kFileNames, &filenames_tensor)); + OP_REQUIRES( + ctx, filenames_tensor->dims() <= 1, + errors::InvalidArgument("`filenames` must be a scalar or a vector.")); + + string compression_type; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kCompressionType, + &compression_type)); + + int64 buffer_size = -1; + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, kBufferSize, &buffer_size)); + OP_REQUIRES( + ctx, buffer_size >= 0, + errors::InvalidArgument("`buffer_size` must be >= 0 (0 == default)")); + + io::ZlibCompressionOptions zlib_compression_options = + io::ZlibCompressionOptions::DEFAULT(); + if (compression_type == kZLIB) { + zlib_compression_options = io::ZlibCompressionOptions::DEFAULT(); + } else if (compression_type == kGZIP) { + zlib_compression_options = io::ZlibCompressionOptions::GZIP(); + } else { + OP_REQUIRES(ctx, compression_type.empty(), + errors::InvalidArgument("Unsupported compression_type.")); + } + + if (buffer_size != 0) { + // Set the override size. + zlib_compression_options.input_buffer_size = buffer_size; + } + + std::vector filenames; + filenames.reserve(filenames_tensor->NumElements()); + for (int i = 0; i < filenames_tensor->NumElements(); ++i) { + filenames.push_back(filenames_tensor->flat()(i)); + } + + *output = new Dataset(ctx, std::move(filenames), compression_type, + zlib_compression_options); +} + +namespace { +REGISTER_KERNEL_BUILDER(Name("TextLineDataset").Device(DEVICE_CPU), + TextLineDatasetOp); +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/text_line_dataset_op.h b/tensorflow/core/kernels/data/text_line_dataset_op.h new file mode 100644 index 00000000000..3621b57ada2 --- /dev/null +++ b/tensorflow/core/kernels/data/text_line_dataset_op.h @@ -0,0 +1,42 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_DATA_TEXT_LINE_DATASET_OP_H_ +#define TENSORFLOW_CORE_KERNELS_DATA_TEXT_LINE_DATASET_OP_H_ + +#include "tensorflow/core/framework/dataset.h" + +namespace tensorflow { +namespace data { + +class TextLineDatasetOp : public DatasetOpKernel { + public: + static constexpr const char* const kDatasetType = "TextLine"; + static constexpr const char* const kFileNames = "filenames"; + static constexpr const char* const kCompressionType = "compression_type"; + static constexpr const char* const kBufferSize = "buffer_size"; + + explicit TextLineDatasetOp(OpKernelConstruction* ctx); + + protected: + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override; + + private: + class Dataset; +}; + +} // namespace data +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_DATA_TEXT_LINE_DATASET_OP_H_ diff --git a/tensorflow/core/kernels/data/text_line_dataset_op_test.cc b/tensorflow/core/kernels/data/text_line_dataset_op_test.cc new file mode 100644 index 00000000000..d5909c857e6 --- /dev/null +++ b/tensorflow/core/kernels/data/text_line_dataset_op_test.cc @@ -0,0 +1,624 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/data/text_line_dataset_op.h" + +#include "tensorflow/core/kernels/data/dataset_test_base.h" + +namespace tensorflow { +namespace data { +namespace { + +constexpr char kNodeName[] = "text_line_dataset"; +constexpr char kIteratorPrefix[] = "Iterator"; + +class TextLineDatasetOpTest : public DatasetOpsTestBase { + protected: + // Create a new `TextLineDataset` op kernel. + Status CreateTextLineDatasetOpKernel( + std::unique_ptr* text_line_dataset_op_kernel) { + NodeDef node_def = test::function::NDef( + kNodeName, name_utils::OpName(TextLineDatasetOp::kDatasetType), + {TextLineDatasetOp::kFileNames, TextLineDatasetOp::kCompressionType, + TextLineDatasetOp::kBufferSize}, + {}); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, text_line_dataset_op_kernel)); + return Status::OK(); + } + + // Create a new `TextLineDataset` op kernel context + Status CreateTextLineDatasetContext( + OpKernel* const op_kernel, + gtl::InlinedVector* const inputs, + std::unique_ptr* context) { + TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs)); + TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context)); + return Status::OK(); + } +}; + +struct TestCase { + std::vector filenames; + std::vector texts; + CompressionType compression_type; + int64 buffer_size; + std::vector expected_outputs; + DataTypeVector expected_output_dtypes; + std::vector expected_output_shapes; + int64 expected_cardinality; + std::vector breakpoints; +}; + +Status CreateTestFiles(const TestCase& test_case) { + if (test_case.filenames.size() != test_case.texts.size()) { + return tensorflow::errors::InvalidArgument( + "The number of files does not match with the contents"); + } + if (test_case.compression_type == CompressionType::UNCOMPRESSED) { + for (int i = 0; i < test_case.filenames.size(); ++i) { + TF_RETURN_IF_ERROR( + WriteDataToFile(test_case.filenames[i], test_case.texts[i].data())); + } + } else { + CompressionParams params; + params.compression_type = test_case.compression_type; + params.input_buffer_size = test_case.buffer_size; + params.output_buffer_size = test_case.buffer_size; + for (int i = 0; i < test_case.filenames.size(); ++i) { + TF_RETURN_IF_ERROR(WriteDataToFile(test_case.filenames[i], + test_case.texts[i].data(), params)); + } + } + return Status::OK(); +} + +// Test case 1: multiple text files with ZLIB compression. +TestCase TestCase1() { + return { + /*filenames*/ {absl::StrCat(testing::TmpDir(), "/text_line_ZLIB_1"), + absl::StrCat(testing::TmpDir(), "/text_line_ZLIB_2")}, + /*texts*/ + {absl::StrCat("hello world\n", "11223334455\n"), + absl::StrCat("abcd, EFgH\n", " \n", "$%^&*()\n")}, + /*compression_type*/ CompressionType::ZLIB, + /*buffer_size*/ 10, + /*expected_outputs*/ + {DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"hello world"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"11223334455"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"abcd, EFgH"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {" "}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), {"$%^&*()"})}, + /*expected_output_dtypes*/ {DT_STRING}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ kUnknownCardinality, + /*breakpoints*/ {0, 2, 6}}; +} + +// Test case 2: multiple text files with GZIP compression. +TestCase TestCase2() { + return { + /*filenames*/ {absl::StrCat(testing::TmpDir(), "/text_line_GZIP_1"), + absl::StrCat(testing::TmpDir(), "/text_line_GZIP_2")}, + /*texts*/ + {absl::StrCat("hello world\n", "11223334455\n"), + absl::StrCat("abcd, EFgH\n", " \n", "$%^&*()\n")}, + /*compression_type*/ CompressionType::GZIP, + /*buffer_size*/ 10, + /*expected_outputs*/ + {DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"hello world"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"11223334455"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"abcd, EFgH"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {" "}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), {"$%^&*()"})}, + /*expected_output_dtypes*/ {DT_STRING}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ kUnknownCardinality, + /*breakpoints*/ {0, 2, 6}}; +} + +// Test case 3: multiple text files without compression. +TestCase TestCase3() { + return { + /*filenames*/ { + absl::StrCat(testing::TmpDir(), "/text_line_UNCOMPRESSED_1"), + absl::StrCat(testing::TmpDir(), "/text_line_UNCOMPRESSED_2")}, + /*texts*/ + {absl::StrCat("hello world\n", "11223334455\n"), + absl::StrCat("abcd, EFgH\n", " \n", "$%^&*()\n")}, + /*compression_type*/ CompressionType::UNCOMPRESSED, + /*buffer_size*/ 10, + /*expected_outputs*/ + {DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"hello world"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"11223334455"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {"abcd, EFgH"}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), + {" "}), + DatasetOpsTestBase::CreateTensor(TensorShape({}), {"$%^&*()"})}, + /*expected_output_dtypes*/ {DT_STRING}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ kUnknownCardinality, + /*breakpoints*/ {0, 2, 6}}; +} + +class ParameterizedTextLineDatasetOpTest + : public TextLineDatasetOpTest, + public ::testing::WithParamInterface {}; + +TEST_P(ParameterizedTextLineDatasetOpTest, GetNext) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(text_line_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(text_line_dataset->MakeIterator(iterator_ctx.get(), + kIteratorPrefix, &iterator)); + bool end_of_sequence = false; + std::vector out_tensors; + while (!end_of_sequence) { + std::vector next; + TF_EXPECT_OK( + iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + } + + TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, + /*compare_order*/ true)); +} + +TEST_F(TextLineDatasetOpTest, DatasetNodeName) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = TestCase1(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + EXPECT_EQ(text_line_dataset->node_name(), kNodeName); +} + +TEST_F(TextLineDatasetOpTest, DatasetTypeString) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = TestCase1(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + EXPECT_EQ(text_line_dataset->type_string(), + name_utils::OpName(TextLineDatasetOp::kDatasetType)); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, DatasetOutputDtypes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + TF_EXPECT_OK(VerifyTypesMatch(text_line_dataset->output_dtypes(), + test_case.expected_output_dtypes)); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, DatasetOutputShapes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + TF_EXPECT_OK(VerifyShapesCompatible(text_line_dataset->output_shapes(), + test_case.expected_output_shapes)); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, Cardinality) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + EXPECT_EQ(text_line_dataset->Cardinality(), test_case.expected_cardinality); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, DatasetSave) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + + std::unique_ptr serialization_context; + TF_ASSERT_OK(CreateSerializationContext(&serialization_context)); + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_ASSERT_OK(text_line_dataset->Save(serialization_context.get(), &writer)); + TF_ASSERT_OK(writer.Flush()); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, IteratorOutputDtypes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(text_line_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(text_line_dataset->MakeIterator(iterator_ctx.get(), + kIteratorPrefix, &iterator)); + + TF_EXPECT_OK(VerifyTypesMatch(iterator->output_dtypes(), + test_case.expected_output_dtypes)); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, IteratorOutputShapes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(text_line_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(text_line_dataset->MakeIterator(iterator_ctx.get(), + kIteratorPrefix, &iterator)); + + TF_EXPECT_OK(VerifyShapesCompatible(iterator->output_shapes(), + test_case.expected_output_shapes)); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, IteratorOutputPrefix) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(text_line_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(text_line_dataset->MakeIterator(iterator_ctx.get(), + kIteratorPrefix, &iterator)); + + EXPECT_EQ(iterator->prefix(), + name_utils::IteratorPrefix(TextLineDatasetOp::kDatasetType, + kIteratorPrefix)); +} + +TEST_P(ParameterizedTextLineDatasetOpTest, Roundtrip) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + TF_ASSERT_OK(CreateTestFiles(test_case)); + + std::unique_ptr text_line_dataset_kernel; + TF_ASSERT_OK(CreateTextLineDatasetOpKernel(&text_line_dataset_kernel)); + + int64 num_files = test_case.filenames.size(); + Tensor filenames = + CreateTensor(TensorShape({num_files}), test_case.filenames); + Tensor compression_type = CreateTensor( + TensorShape({}), {ToString(test_case.compression_type)}); + Tensor buffer_size = + CreateTensor(TensorShape({}), {test_case.buffer_size}); + gtl::InlinedVector inputs{TensorValue(&filenames), + TensorValue(&compression_type), + TensorValue(&buffer_size)}; + std::unique_ptr text_line_dataset_context; + TF_ASSERT_OK(CreateTextLineDatasetContext( + text_line_dataset_kernel.get(), &inputs, &text_line_dataset_context)); + + DatasetBase* text_line_dataset; + TF_ASSERT_OK(CreateDataset(text_line_dataset_kernel.get(), + text_line_dataset_context.get(), + &text_line_dataset)); + core::ScopedUnref scoped_unref(text_line_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(text_line_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK(text_line_dataset->MakeIterator(iterator_ctx.get(), + kIteratorPrefix, &iterator)); + + std::unique_ptr serialization_ctx; + TF_ASSERT_OK(CreateSerializationContext(&serialization_ctx)); + + bool end_of_sequence = false; + std::vector out_tensors; + int cur_iteration = 0; + const std::vector& breakpoints = test_case.breakpoints; + for (int breakpoint : breakpoints) { + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer)); + TF_EXPECT_OK(writer.Flush()); + VariantTensorDataReader reader(&data); + TF_EXPECT_OK(RestoreIterator(iterator_ctx.get(), &reader, kIteratorPrefix, + *text_line_dataset, &iterator)); + + while (cur_iteration <= breakpoint) { + std::vector next; + TF_EXPECT_OK( + iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + cur_iteration++; + } + } + + TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, + /*compare_order*/ true)); +} + +INSTANTIATE_TEST_SUITE_P(TextLineDatasetOpTest, + ParameterizedTextLineDatasetOpTest, + ::testing::ValuesIn(std::vector( + {TestCase1(), TestCase2(), TestCase3()}))); + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc index 4179f17deee..70bd659be66 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/kernels/fused_batch_norm_op.h" +#include "tensorflow/core/kernels/redux_functor.h" #include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/tensor_format.h" @@ -358,7 +359,6 @@ struct FusedBatchNormGrad { typename TTypes::ConstVec mean(mean_input.vec()); typename TTypes::ConstVec variance(variance_input.vec()); typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); - typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); // Note: the following formulas are used to compute the gradients for @@ -378,12 +378,10 @@ struct FusedBatchNormGrad { #if !defined(EIGEN_HAS_INDEX_LIST) Eigen::DSizes one_by_depth(1, depth); - Eigen::array reduce_dims({0}); Eigen::array bcast_spec({rest_size, 1}); #else Eigen::IndexList, Eigen::Index> one_by_depth; one_by_depth.set(1, depth); - Eigen::IndexList> reduce_dims; Eigen::IndexList> bcast_spec; bcast_spec.set(0, rest_size); #endif @@ -391,41 +389,182 @@ struct FusedBatchNormGrad { auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + // Eigen is notoriously bad at reducing outer dimension, so we materialize + // all temporary tensors that require reduction, and then use Eigen redux + // functor, that is optimized for this particular task. + // + // All reductions are of this type: [rest_size, depth] -> [depth]. + using ScalarSum = Eigen::internal::scalar_sum_op; + const functor::ReduceOuterDimensions redux_sum_t; + const functor::ReduceOuterDimensions redux_sum_u; + + auto scratch_dtype = DataTypeToEnum::value; + + // Allocate a temporary workspace of [depth] shape. + Tensor scratch_one_by_depth; + OP_REQUIRES_OK(context, context->allocate_temp(scratch_dtype, {depth}, + &scratch_one_by_depth)); + + // Maybe allocate a temporary workspace of [rest_size, depth] shape. + Tensor scratch_rest_by_depth; + if (std::is_same::value) { + OP_REQUIRES(context, + scratch_rest_by_depth.CopyFrom(*x_backprop_output, + {rest_size, depth}), + errors::Internal("Failed to copy a tensor")); + } else { + OP_REQUIRES_OK(context, + context->allocate_temp(scratch_dtype, {rest_size, depth}, + &scratch_rest_by_depth)); + } + + typename TTypes::Tensor scratch_tensor( + scratch_rest_by_depth.tensor()); + typename TTypes::Vec scratch_vector(scratch_one_by_depth.vec()); + auto x_mean_rest_by_depth = mean.reshape(one_by_depth).broadcast(bcast_spec); - auto x_centered = (x_rest_by_depth - x_mean_rest_by_depth).eval(); + auto x_centered = (x_rest_by_depth - x_mean_rest_by_depth); auto coef0 = (variance + epsilon).rsqrt(); auto coef0_rest_by_depth = - coef0.eval().reshape(one_by_depth).broadcast(bcast_spec); + coef0.reshape(one_by_depth).broadcast(bcast_spec); auto x_scaled = x_centered * coef0_rest_by_depth; auto y_backprop_rest_by_depth = - y_backprop.eval().reshape(rest_by_depth).template cast(); - scale_backprop.device(d) = - (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims); - auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims); - offset_backprop.device(d) = y_backprop_sum; + y_backprop.reshape(rest_by_depth).template cast(); - auto y_backprop_sum_one_by_depth = - y_backprop_sum.eval().reshape(one_by_depth); + // Compute `scale_backprop_output`: + // scale_backprop = + // (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims) + scratch_tensor.device(d) = y_backprop_rest_by_depth * x_scaled; + redux_sum_u(d, rest_by_depth, scratch_rest_by_depth, scale_backprop_output); + + // Compute 'offset_backprop_output': + // offset_backprop = + // y_backprop_rest_by_depth.sum(reduce_dims) + redux_sum_t(d, rest_by_depth, y_backprop_input, offset_backprop_output); + auto y_backprop_sum = offset_backprop; + + auto y_backprop_sum_one_by_depth = y_backprop_sum.reshape(one_by_depth); auto y_backprop_mean_one_by_depth = y_backprop_sum_one_by_depth * rest_size_inv; auto y_backprop_mean_rest_by_depth = y_backprop_mean_one_by_depth.broadcast(bcast_spec); auto y_backprop_centered = y_backprop_rest_by_depth - y_backprop_mean_rest_by_depth; - auto coef1 = - (scale * coef0).eval().reshape(one_by_depth).broadcast(bcast_spec); - auto coef2 = (coef0.square() * - (y_backprop_rest_by_depth * x_centered).mean(reduce_dims)) - .eval() + + // Compute expression: + // y_backprop_centered_mean = + // (y_backprop_rest_by_depth * x_centered).mean(reduce_dims) + scratch_tensor.device(d) = y_backprop_rest_by_depth * x_centered; + redux_sum_u(d, rest_by_depth, scratch_rest_by_depth, &scratch_one_by_depth); + auto y_backprop_centered_mean = scratch_vector / static_cast(rest_size); + + auto coef1 = (scale * coef0).reshape(one_by_depth).broadcast(bcast_spec); + auto coef2 = (coef0.square() * y_backprop_centered_mean) .reshape(one_by_depth) + .eval() .broadcast(bcast_spec); + x_backprop.reshape(rest_by_depth).device(d) = (coef1 * (y_backprop_centered - x_centered * coef2)).template cast(); } }; +template +struct FusedBatchNormFreezeGrad { + void operator()(OpKernelContext* context, const Tensor& y_backprop_input, + const Tensor& x_input, const Tensor& scale_input, + const Tensor& pop_mean_input, + const Tensor& pop_variance_input, U epsilon, + Tensor* x_backprop_output, Tensor* scale_backprop_output, + Tensor* offset_backprop_output) { + typename TTypes::ConstTensor y_backprop( + y_backprop_input.tensor()); + typename TTypes::ConstTensor input(x_input.tensor()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec pop_mean(pop_mean_input.vec()); + typename TTypes::ConstVec pop_var(pop_variance_input.vec()); + typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); + typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); + + const int depth = pop_mean.dimension(0); + const int rest_size = input.size() / depth; + + const CPUDevice& d = context->eigen_device(); + + // Allocate two temporary workspaces of [depth] shape. + Tensor scratch1_vec, scratch2_vec; + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::value, + {depth}, &scratch1_vec)); + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::value, + {depth}, &scratch2_vec)); + + // Maybe allocate a temporary workspace of [rest_size, depth] shape. + Tensor scratch3_tensor; + if (std::is_same::value) { + OP_REQUIRES( + context, + scratch3_tensor.CopyFrom(*x_backprop_output, {rest_size, depth}), + errors::Internal("Failed to copy a tensor")); + } else { + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::value, + {rest_size, depth}, + &scratch3_tensor)); + } + + typename TTypes::Vec scratch1(scratch1_vec.vec()); + typename TTypes::Vec scratch2(scratch2_vec.vec()); + typename TTypes::Tensor scratch3(scratch3_tensor.tensor()); + + Eigen::DSizes rest_by_depth(rest_size, depth); +#if !defined(EIGEN_HAS_INDEX_LIST) + Eigen::DSizes one_by_depth(1, depth); + Eigen::array rest_by_one({rest_size, 1}); +#else + Eigen::IndexList, Eigen::Index> one_by_depth; + one_by_depth.set(1, depth); + Eigen::IndexList> rest_by_one; + rest_by_one.set(0, rest_size); +#endif + + // Sum reduction along the 0th dimension using custom CPU functor. + using ScalarSum = Eigen::internal::scalar_sum_op; + const functor::ReduceOuterDimensions redux_sum_t; + const functor::ReduceOuterDimensions redux_sum_u; + + // offset_backprop = sum(y_backprop) + // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon)) + // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) + + // NOTE: DEFAULT DEVICE comment is added to expression assignments that + // we don't want to be executed in a thread pool. + + auto y_backprop_rest_by_depth = + y_backprop.reshape(rest_by_depth).template cast(); + auto input_rest_by_depth = input.reshape(rest_by_depth).template cast(); + + // offset_backprop = sum(y_backprop) + redux_sum_t(d, rest_by_depth, y_backprop_input, offset_backprop_output); + + // scratch1 = rsqrt(pop_var + epsilon) + scratch1 = (pop_var + pop_var.constant(epsilon)).rsqrt(); // DEFAULT DEVICE + + // scratch2 = sum(y_backprop * (x - mean)) + scratch3.device(d) = + y_backprop_rest_by_depth * + (input_rest_by_depth - + pop_mean.reshape(one_by_depth).broadcast(rest_by_one)); + redux_sum_u(d, rest_by_depth, scratch3_tensor, &scratch2_vec); + + x_backprop.reshape(rest_by_depth).device(d) = + (y_backprop_rest_by_depth * + ((scratch1 * scale).reshape(one_by_depth).broadcast(rest_by_one))) + .template cast(); + scale_backprop = scratch2 * scratch1; // DEFAULT DEVICE + } +}; + #if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM namespace { // See implementation under GOOGLE_CUDA #ifdef below. @@ -827,12 +966,11 @@ struct FusedBatchNormGrad { #define DECLARE_GPU_SPEC(T, U) \ template <> \ void FusedBatchNormFreezeGrad::operator()( \ - const GPUDevice& d, const Tensor& y_backprop_input, \ + OpKernelContext* context, const Tensor& y_backprop_input, \ const Tensor& x_input, const Tensor& scale_input, \ const Tensor& mean_input, const Tensor& variance_input, U epsilon, \ Tensor* x_backprop_output, Tensor* scale_backprop_output, \ - Tensor* offset_backprop_output, typename TTypes::Vec scratch1, \ - typename TTypes::Vec scratch2); \ + Tensor* offset_backprop_output); \ extern template struct FusedBatchNormFreezeGrad; \ template <> \ void FusedBatchNormInferenceFunctor::operator()( \ @@ -1152,18 +1290,10 @@ class FusedBatchNormGradOpBase : public OpKernel { << "The implementation of FusedBatchNormGrad with is_training=False " "only support " << "NHWC tensor format for now."; - Tensor scratch1, scratch2; - OP_REQUIRES_OK(context, - context->allocate_temp(DataTypeToEnum::value, - scale_offset_shape, &scratch1)); - OP_REQUIRES_OK(context, - context->allocate_temp(DataTypeToEnum::value, - scale_offset_shape, &scratch2)); functor::FusedBatchNormFreezeGrad()( - context->eigen_device(), y_backprop, x, scale, - saved_mean_or_pop_mean, saved_maybe_inv_var_or_pop_var, epsilon_, - x_backprop, scale_backprop, offset_backprop, scratch1.vec(), - scratch2.vec()); + context, y_backprop, x, scale, saved_mean_or_pop_mean, + saved_maybe_inv_var_or_pop_var, epsilon_, x_backprop, scale_backprop, + offset_backprop); } } diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cu.cc b/tensorflow/core/kernels/fused_batch_norm_op.cu.cc index ff088bd6f88..0d2c1c4015d 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cu.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cu.cc @@ -26,6 +26,83 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { +// TODO(ezhulenev): Use CUB reductions on GPU. +template +struct FusedBatchNormFreezeGrad { + void operator()(OpKernelContext* context, const Tensor& y_backprop_input, + const Tensor& x_input, const Tensor& scale_input, + const Tensor& pop_mean_input, + const Tensor& pop_variance_input, U epsilon, + Tensor* x_backprop_output, Tensor* scale_backprop_output, + Tensor* offset_backprop_output) { + typename TTypes::ConstTensor y_backprop( + y_backprop_input.tensor()); + typename TTypes::ConstTensor input(x_input.tensor()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec pop_mean(pop_mean_input.vec()); + typename TTypes::ConstVec pop_var(pop_variance_input.vec()); + typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); + typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); + typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); + + const int depth = pop_mean.dimension(0); + const int rest_size = input.size() / depth; + + // Allocate two temporary workspaces of [depth] shape. + Tensor scratch1_vec, scratch2_vec; + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::value, + {depth}, &scratch1_vec)); + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::value, + {depth}, &scratch2_vec)); + + typename TTypes::Vec scratch1(scratch1_vec.vec()); + typename TTypes::Vec scratch2(scratch2_vec.vec()); + + const GPUDevice& d = context->eigen_device(); + + Eigen::DSizes rest_by_depth(rest_size, depth); +#if !defined(EIGEN_HAS_INDEX_LIST) + Eigen::DSizes one_by_depth(1, depth); + Eigen::array reduction_axis{0}; + Eigen::array rest_by_one({rest_size, 1}); +#else + Eigen::IndexList, Eigen::Index> one_by_depth; + one_by_depth.set(1, depth); + Eigen::IndexList > reduction_axis; + Eigen::IndexList > rest_by_one; + rest_by_one.set(0, rest_size); +#endif + + // offset_backprop = sum(y_backprop) + // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon)) + // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) + + auto y_backprop_rest_by_depth = + y_backprop.reshape(rest_by_depth).template cast(); + auto input_rest_by_depth = input.reshape(rest_by_depth).template cast(); + + offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis); + + // scratch1 = rsqrt(pop_var + epsilon) + scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt(); + + // scratch2 = sum(y_backprop * (x - mean)) + scratch2.device(d) = + (y_backprop_rest_by_depth * + (input_rest_by_depth - + pop_mean.reshape(one_by_depth).broadcast(rest_by_one))) + .sum(reduction_axis); + + x_backprop.reshape(rest_by_depth).device(d) = + (y_backprop_rest_by_depth * ((scratch1 * scale) + .eval() + .reshape(one_by_depth) + .broadcast(rest_by_one))) + .template cast(); + scale_backprop.device(d) = scratch2 * scratch1; + } +}; + template struct FusedBatchNormFreezeGrad; template struct FusedBatchNormFreezeGrad; diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h index 2cb19e15ddb..4936192377c 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.h +++ b/tensorflow/core/kernels/fused_batch_norm_op.h @@ -85,71 +85,15 @@ struct FusedBatchNormInferenceFunctor { #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Functor used by FusedBatchNormGradOp to do the computations when -// is_training=False. Both CPU and GPU will use this functor. +// is_training=False. template struct FusedBatchNormFreezeGrad { - void operator()(const Device& d, const Tensor& y_backprop_input, + void operator()(OpKernelContext* context, const Tensor& y_backprop_input, const Tensor& x_input, const Tensor& scale_input, const Tensor& pop_mean_input, const Tensor& pop_variance_input, U epsilon, Tensor* x_backprop_output, Tensor* scale_backprop_output, - Tensor* offset_backprop_output, - typename TTypes::Vec scratch1, - typename TTypes::Vec scratch2) { - typename TTypes::ConstTensor y_backprop( - y_backprop_input.tensor()); - typename TTypes::ConstTensor input(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec pop_mean(pop_mean_input.vec()); - typename TTypes::ConstVec pop_var(pop_variance_input.vec()); - typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); - typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); - typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); - - const int depth = pop_mean.dimension(0); - const int rest_size = input.size() / depth; - - Eigen::DSizes rest_by_depth(rest_size, depth); -#if !defined(EIGEN_HAS_INDEX_LIST) - Eigen::DSizes one_by_depth(1, depth); - Eigen::array reduction_axis{0}; - Eigen::array rest_by_one({rest_size, 1}); -#else - Eigen::IndexList, Eigen::Index> one_by_depth; - one_by_depth.set(1, depth); - Eigen::IndexList > reduction_axis; - Eigen::IndexList > rest_by_one; - rest_by_one.set(0, rest_size); -#endif - - // offset_backprop = sum(y_backprop) - // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon)) - // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) - - auto y_backprop_rest_by_depth = - y_backprop.reshape(rest_by_depth).template cast(); - auto input_rest_by_depth = input.reshape(rest_by_depth).template cast(); - - offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis); - - // scratch1 = rsqrt(pop_var + epsilon) - scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt(); - - // scratch2 = sum(y_backprop * (x - mean)) - scratch2.device(d) = - (y_backprop_rest_by_depth * - (input_rest_by_depth - - pop_mean.reshape(one_by_depth).broadcast(rest_by_one))) - .sum(reduction_axis); - - x_backprop.reshape(rest_by_depth).device(d) = - (y_backprop_rest_by_depth * ((scratch1 * scale) - .eval() - .reshape(one_by_depth) - .broadcast(rest_by_one))) - .template cast(); - scale_backprop.device(d) = scratch2 * scratch1; - } + Tensor* offset_backprop_output) {} }; } // namespace functor diff --git a/tensorflow/core/kernels/fused_batch_norm_op_test.cc b/tensorflow/core/kernels/fused_batch_norm_op_test.cc index f765a3ee43d..5297d3ee138 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op_test.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op_test.cc @@ -269,6 +269,22 @@ BM_FusedBatchNorm(64, 14, 14, 256, fp16, true, NCHW, gpu); BENCHMARK(BM_NAME(FusedBatchNormGrad, N, H, W, C, T, IS_TRAINING, FORMAT, \ DEVICE)); +#define BM_FusedBatchNormGradResnetShapes(T, IS_TRAINING, FORMAT, DEVICE) \ + BM_FusedBatchNormGrad(64, 56, 56, 64, T, IS_TRAINING, FORMAT, DEVICE); \ + BM_FusedBatchNormGrad(64, 56, 56, 128, T, IS_TRAINING, FORMAT, DEVICE); \ + BM_FusedBatchNormGrad(64, 56, 56, 256, T, IS_TRAINING, FORMAT, DEVICE); \ + \ + BM_FusedBatchNormGrad(64, 28, 28, 128, T, IS_TRAINING, FORMAT, DEVICE); \ + BM_FusedBatchNormGrad(64, 28, 28, 256, T, IS_TRAINING, FORMAT, DEVICE); \ + BM_FusedBatchNormGrad(64, 28, 28, 512, T, IS_TRAINING, FORMAT, DEVICE); \ + \ + BM_FusedBatchNormGrad(64, 14, 14, 128, T, IS_TRAINING, FORMAT, DEVICE); \ + BM_FusedBatchNormGrad(64, 14, 14, 256, T, IS_TRAINING, FORMAT, DEVICE); \ + BM_FusedBatchNormGrad(64, 14, 14, 1024, T, IS_TRAINING, FORMAT, DEVICE) + +BM_FusedBatchNormGradResnetShapes(fp32, true, NHWC, cpu); +BM_FusedBatchNormGradResnetShapes(fp32, false, NHWC, cpu); + #ifdef GOOGLE_CUDA BM_FusedBatchNormGrad(64, 14, 14, 256, fp32, true, NHWC, gpu); BM_FusedBatchNormGrad(64, 14, 14, 256, fp16, true, NHWC, gpu); diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h index c3d2f701398..ec3865cc3ee 100644 --- a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h @@ -101,37 +101,21 @@ struct GatherNdSlice { typename TTypes::ConstMatrix Tindices, typename TTypes::Matrix Tout) { std::atomic error_loc(-1); - - const Eigen::DenseIndex batch_size = Tindices.dimension(0); -#if !defined(EIGEN_HAS_INDEX_LIST) - Eigen::Tensor::Dimensions reshape_dims{{ 1 }}; - Eigen::array broadcast_dims{{ batch_size }}; -#else - Eigen::IndexList > reshape_dims; - Eigen::IndexList broadcast_dims; - broadcast_dims.set(0, batch_size); -#endif + const Eigen::Index batch_size = Tindices.dimension(0); generator::GatherNdSliceGenerator gather_nd_generator( slice_size, Tindices, Tparams, Tout, &error_loc); -#if defined(INTEL_MKL) && defined(ENABLE_MKL) -// Eigen implementation below is not highly performant. gather_nd_generator -// does not seem to be called in parallel, leading to very poor performance. -// Additionally, since it uses scalar (Tscratch) to invoke 'generate', it -// needs to go through redundant operations like 'reshape', 'broadcast' and -// 'sum'. OpenMP loop below essentially does same thing as Eigen code, but -// is considerably more efficient. -#pragma omp parallel for - for (Eigen::DenseIndex i = 0; i < batch_size; i++) { - const Eigen::array loc{i}; - gather_nd_generator(loc); - } -#else // INTEL_MKL && ENABLE_MKL - Tscratch.device(d) = Tscratch.reshape(reshape_dims) - .broadcast(broadcast_dims) - .generate(gather_nd_generator) - .sum(); -#endif // INTEL_MKL && ENABLE_MKL + auto compute_shard = [&](Eigen::Index begin, Eigen::Index end) { + for (Eigen::Index i = begin; i < end; ++i) { + const Eigen::array loc{i}; + gather_nd_generator(loc); + } + }; + Eigen::Index bytes_moved = sizeof(T) * (slice_size + IXDIM); + auto cost = Eigen::TensorOpCost(bytes_moved /* bytes loaded */, + bytes_moved /* bytes stored */, + slice_size + IXDIM /* compute cycles */); + d.parallelFor(batch_size, cost, compute_shard); // error_loc() returns -1 if there's no out-of-bounds index, // otherwise it returns the location of an OOB index in Tindices. diff --git a/tensorflow/core/kernels/histogram_op.cc b/tensorflow/core/kernels/histogram_op.cc index 75f896b407d..7b66c879fbf 100644 --- a/tensorflow/core/kernels/histogram_op.cc +++ b/tensorflow/core/kernels/histogram_op.cc @@ -48,18 +48,22 @@ struct HistogramFixedWidthFunctor { const double step = static_cast(value_range(1) - value_range(0)) / static_cast(nbins); + const double nbins_minus_1 = static_cast(nbins - 1); // The calculation is done by finding the slot of each value in `values`. // With [a, b]: // step = (b - a) / nbins // (x - a) / step // , then the entries are mapped to output. + + // Bug fix: Switch the order of cwiseMin and int32-casting to avoid + // producing a negative index when casting an big int64 number to int32 index_to_bin.device(d) = ((values.cwiseMax(value_range(0)) - values.constant(value_range(0))) .template cast() / step) - .template cast() - .cwiseMin(nbins - 1); + .cwiseMin(nbins_minus_1) + .template cast(); out.setZero(); for (int32 i = 0; i < index_to_bin.size(); i++) { diff --git a/tensorflow/core/kernels/identity_n_op.cc b/tensorflow/core/kernels/identity_n_op.cc index 9746b1fab3e..746a29bf5aa 100644 --- a/tensorflow/core/kernels/identity_n_op.cc +++ b/tensorflow/core/kernels/identity_n_op.cc @@ -16,6 +16,7 @@ limitations under the License. // See docs in ../ops/array_ops.cc. #include "tensorflow/core/kernels/identity_n_op.h" +#include "tensorflow/core/common_runtime/input_colocation_exemption_registry.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -24,5 +25,10 @@ limitations under the License. namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("IdentityN").Device(DEVICE_DEFAULT), IdentityNOp); +// Do not worry about colocating IdentityN op with its resource inputs since +// it just forwards it's inputs anyway. This is needed because we create +// IdentityN nodes to club "all" outputs of functional ops while lowering to +// make the original functional op fetchable. +REGISTER_INPUT_COLOCATION_EXEMPTION("IdentityN"); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 6c50ad0ccea..14bfc9a5ffa 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -247,7 +247,9 @@ class MklConcatOp : public OpKernel { ConstMatrixVector; explicit MklConcatOp(OpKernelConstruction* c) - : OpKernel(c), eigen_concat_op_(c) {} + : OpKernel(c), + eigen_concat_op_(c), + data_format_(TensorFormat::FORMAT_NCHW) {} void Compute(OpKernelContext* context) override { try { diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index c12a4ff0f0c..e9be11a4ded 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -411,15 +411,18 @@ class MklDnnConvUtil { input_cols, filter_cols, dilation_cols, stride_cols, padding_type, &out_cols, &pad_left, &pad_right)); } else { - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_planes, filter_planes, stride_planes, - padding_, &out_planes, &pad_D1, &pad_D2)); - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_rows, filter_rows, stride_rows, - padding_, &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_cols, filter_cols, stride_cols, - padding_, &out_cols, &pad_left, &pad_right)); + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerboseV2( + input_planes, filter_planes, dilation_planes, + stride_planes, padding_, &out_planes, + &pad_D1, &pad_D2)); + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2( + input_rows, filter_rows, dilation_rows, stride_rows, + padding_, &out_rows, &pad_top, &pad_bottom)); + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2( + input_cols, filter_cols, dilation_cols, stride_cols, + padding_, &out_cols, &pad_left, &pad_right)); } if (is_conv2d) { @@ -559,8 +562,6 @@ class MklConvBackpropCommonOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); int stride_n = GetTensorDim(strides_, data_format_, 'N'); int stride_c = GetTensorDim(strides_, data_format_, 'C'); - const int64 stride_h = GetTensorDim(strides_, data_format_, 'H'); - const int64 stride_w = GetTensorDim(strides_, data_format_, 'W'); OP_REQUIRES( context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 455522baf5b..9c6c2b1ea37 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -510,6 +510,7 @@ class MklFusedBatchNormOp : public OpKernel { OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_), errors::InvalidArgument("Invalid data format")); OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_)); + depth_ = 0; } void Compute(OpKernelContext* context) override { @@ -834,6 +835,7 @@ class MklFusedBatchNormGradOp : public OpKernel { OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_), errors::InvalidArgument("Invalid data format")); OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_)); + depth_ = 0; } void Compute(OpKernelContext* context) override { @@ -1046,7 +1048,7 @@ class MklFusedBatchNormGradOp : public OpKernel { private: float epsilon_; TensorFormat tensor_format_; - int depth_; // batch normalization is done for per channel. + size_t depth_; // batch normalization is done for per channel. bool is_training_; engine cpu_engine = engine(engine::cpu, 0); diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index ec440a0aedf..a24bd71d53c 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -464,7 +464,8 @@ class MklPoolingOpBase : public OpKernel { // We may not get this attribute for this node if it does not go through // graph rewrite pass. So we do not check for error while retrieving this // attribute value. - context->GetAttr("workspace_enabled", &this->workspace_enabled_); + TF_CHECK_OK( + context->GetAttr("workspace_enabled", &this->workspace_enabled_)); } void Compute(OpKernelContext* context) override = 0; diff --git a/tensorflow/core/kernels/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl_qmatmul_op.cc index eb4d519ef72..fc571602b35 100644 --- a/tensorflow/core/kernels/mkl_qmatmul_op.cc +++ b/tensorflow/core/kernels/mkl_qmatmul_op.cc @@ -222,10 +222,10 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { bias_mem(nullptr), dst_mem(nullptr), fwd_desc(nullptr), + fwd_pd(nullptr), src_md(nullptr), weight_md(nullptr), bias_md(nullptr), - fwd_pd(nullptr), matmul_fwd(nullptr), fwd_stream(nullptr) {} }; @@ -719,6 +719,7 @@ class MklDnnQuantizedMatMulOp : public OpKernel { context->CtxFailure( errors::InvalidArgument("Quantization mode must be" "either MIN_FIRST or SCALED.")); + return nullptr; } } } diff --git a/tensorflow/core/kernels/mkl_qmatmul_op_test.cc b/tensorflow/core/kernels/mkl_qmatmul_op_test.cc index e4db9653cdd..f018e12313a 100644 --- a/tensorflow/core/kernels/mkl_qmatmul_op_test.cc +++ b/tensorflow/core/kernels/mkl_qmatmul_op_test.cc @@ -51,7 +51,7 @@ class ConvMklToTF : public OpsTestBase { .Input(FakeInput(dtype)) // Input .Input(FakeInput(DT_UINT8)) // MKL second tensor .Attr("T", dtype) - .Attr("_kernel", "MklOp") + .Attr("_kernel", "MklLayoutDependentOp") .Finalize(node_def())); TF_EXPECT_OK(InitOp()); AddInputFromArray(first.shape(), first.flat()); @@ -444,10 +444,22 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReluAndReq) { // After Bias addition // 74+10=84, 80-20=60, 86+30=116, 92-40=52, // 173+10=183, 188-20=168, 203+30=233, 218-40=178 - // After Relu and Requantize + // After Relu // 84, 60, 116, 52, 183, 168, 233, 178 + // After Requantize + // requantscale = scale_int32 / scale_eightbit / static_cast(1 << 23) + // requantscale = 2^31/255/2^23 ~= 1.00392 + // 84 * 1.00392 ~= 84.329 ~= 84 + // 60 * 1.00392 ~= 60.235 ~= 60 + // 116 * 1.00392 ~= 116.454 ~= 116 + // 52 * 1.00392 ~= 52.203 ~= 52 + // 183 * 1.00392 ~= 183.717 ~= 184 + // 168 * 1.00392 ~= 168.658 ~= 169 + // 233 * 1.00392 ~= 233.913 ~= 234 + // 178 * 1.00392 ~= 178.698 ~= 179 + Tensor expected(allocator(), DT_QUINT8, TensorShape({2, 4})); - test::FillValues(&expected, {84, 60, 116, 52, 183, 168, 233, 178}); + test::FillValues(&expected, {84, 60, 116, 52, 184, 169, 234, 179}); const Tensor& output = *GetOutput(0); const Tensor& mkl_shape_tensor = *GetOutput(3); diff --git a/tensorflow/core/kernels/redux_functor.h b/tensorflow/core/kernels/redux_functor.h index 24dc876ef8e..30038c62dbd 100644 --- a/tensorflow/core/kernels/redux_functor.h +++ b/tensorflow/core/kernels/redux_functor.h @@ -35,16 +35,18 @@ namespace functor { // input: [D1, D2, ... , DN] // -> // output: [Di, ... , DN] where i belongs to set [1,N] -template +template struct ReduceOuterDimensions { - ReduceOuterDimensions(){}; + ReduceOuterDimensions() {} + template void operator()(const CPUDevice& device, const Eigen::DSizes& input_dims, const Tensor& input, Tensor* output) const { // Compute inner and outer dim after reshaping into 2d tensor. const int num_output_dims = output->dims(); - auto output_dims = output->template flat().dimensions(); + auto output_dims = output->template flat().dimensions(); Eigen::Index inner_dim = 1, outer_dim = 1; for (int i = 0; i < num_dims - num_output_dims; ++i) @@ -54,8 +56,8 @@ struct ReduceOuterDimensions { if (1 == outer_dim) { // Nothing to do but passing input to output. - output->template flat() = - input.template flat().reshape(output_dims); + output->template flat() = + input.template flat().reshape(output_dims); return; } @@ -63,13 +65,15 @@ struct ReduceOuterDimensions { const Eigen::Index num_threads = device.numThreads(); // If the inner dim parallelism is large enough - if (inner_dim > num_threads * 16) { + // TODO(ezhulenev): There seems to be no benefits in going this route. Check + // if this can be improved, or use better heuristic? + if (inner_dim > num_threads * 32) { // Do not create more blocks than there are threads in a pool. const Eigen::Index num_blocks = num_threads; // Block size along the outer dimension. const Eigen::Index inner_block_size = Eigen::divup(inner_dim, num_blocks); - const T* input_data = input.template flat().data(); + const InputT* input_data = input.template flat().data(); // Allocate temporary buffer for partial reductions. Eigen::Tensor buffer( @@ -82,7 +86,7 @@ struct ReduceOuterDimensions { Eigen::Unaligned>; using Input = Eigen::TensorMap< - Eigen::Tensor, + Eigen::Tensor, Eigen::Unaligned>; const auto compute = [inner_dim, outer_dim, num_blocks, inner_block_size, @@ -94,7 +98,7 @@ struct ReduceOuterDimensions { inner_dim_limit = std::min(inner_dim, inner_dim_limit); Eigen::Index my_job_len = inner_dim_limit - inner_dim_start; - const T* my_job_start = input_data + inner_dim_start; + const InputT* my_job_start = input_data + inner_dim_start; Buffer buf(buffer_data + inner_dim_start, my_job_len); for (Eigen::Index i = 0; i < outer_dim; ++i) { @@ -107,7 +111,7 @@ struct ReduceOuterDimensions { // Compute cost of reducing a single block. const Eigen::Index compute_size = outer_dim * inner_block_size; - const Eigen::Index compute_input_bytes = compute_size * sizeof(T); + const Eigen::Index compute_input_bytes = compute_size * sizeof(InputT); const Eigen::TensorOpCost cost( compute_input_bytes, 0, // We'll be mostly writing to L1, assume store cost is 0 @@ -116,8 +120,8 @@ struct ReduceOuterDimensions { device.parallelFor(num_blocks, cost, compute); // Write final result to the output. - output->template flat() = - buffer.template cast().reshape(output_dims); + output->template flat() = + buffer.template cast().reshape(output_dims); } else { // Compute block size along the outer dimension for efficiency. const Eigen::Index parallel_cell_size = inner_dim; @@ -136,7 +140,7 @@ struct ReduceOuterDimensions { // Block size along the outer dimension. const Eigen::Index outer_block_size = Eigen::divup(outer_dim, num_blocks); - const T* input_data = input.template flat().data(); + const InputT* input_data = input.template flat().data(); // Allocate temporary buffer for partial reductions. Tensor buffer(DataTypeToEnum::v(), {num_blocks, inner_dim}); @@ -148,7 +152,7 @@ struct ReduceOuterDimensions { Eigen::Unaligned>; using Input = Eigen::TensorMap< - Eigen::Tensor, + Eigen::Tensor, Eigen::Unaligned>; const auto compute = [inner_dim, num_blocks, outer_block_size, @@ -170,7 +174,7 @@ struct ReduceOuterDimensions { // Compute cost of reducing a single block. const Eigen::Index compute_size = outer_block_size * inner_dim; - const Eigen::Index compute_input_bytes = compute_size * sizeof(T); + const Eigen::Index compute_input_bytes = compute_size * sizeof(InputT); const Eigen::TensorOpCost cost( compute_input_bytes, 0, // We'll be mostly writing to L1, assume store cost is 0 @@ -187,7 +191,8 @@ struct ReduceOuterDimensions { const decltype(buf)>(buf0, buf); } // Write final result to the output. - output->template flat() = buf0.template cast().reshape(output_dims); + output->template flat() = + buf0.template cast().reshape(output_dims); } } }; @@ -197,9 +202,11 @@ struct ReduceOuterDimensions { // input: [D1, D2, ... , DN] // -> // output: [Di, ... , Dj] where i & j belongs to set [1,N]. -template +template struct ReduceMiddleDimensions { - ReduceMiddleDimensions(){}; + ReduceMiddleDimensions() {} + template void operator()(const CPUDevice& device, const Eigen::DSizes& input_dims, @@ -207,7 +214,7 @@ struct ReduceMiddleDimensions { const int axis_begin_dim) const { // Compute dims after reshaping into 3d tensor. const int num_output_dims = output->dims(); - auto output_dims = output->template flat().dimensions(); + auto output_dims = output->template flat().dimensions(); Eigen::Index inner_dim = 1, middle_dim = 1, outer_dim = 1; for (int i = 0; i < axis_begin_dim; ++i) outer_dim *= input_dims[i]; @@ -218,12 +225,12 @@ struct ReduceMiddleDimensions { if ((1 == inner_dim * outer_dim)) { // Nothing to do. - output->template flat() = - input.template flat().reshape(output_dims); + output->template flat() = + input.template flat().reshape(output_dims); return; } else if (1 == inner_dim) { // Equivalent to ReduceOuterDimensions. - const ReduceOuterDimensions redux; + const ReduceOuterDimensions redux; redux(device, input_dims, input, output); return; } @@ -247,7 +254,7 @@ struct ReduceMiddleDimensions { const Eigen::Index outer_block_size = Eigen::divup(total_workload, num_blocks); - const T* input_data = input.template flat().data(); + const InputT* input_data = input.template flat().data(); // Allocate temporary buffer for partial reductions. Eigen::Tensor buffer(num_blocks, middle_dim); @@ -255,7 +262,7 @@ struct ReduceMiddleDimensions { AccumT* buffer_data = buffer.data(); using Buffer = Eigen::TensorMap>; - using Input = Eigen::TensorMap>; + using Input = Eigen::TensorMap>; Eigen::array reduction_axis = {0}; Reducer reducer; @@ -301,7 +308,7 @@ struct ReduceMiddleDimensions { // Compute cost of reducing a single block. const Eigen::Index compute_size = outer_block_size * inner_dim; - const Eigen::Index compute_input_bytes = compute_size * sizeof(T); + const Eigen::Index compute_input_bytes = compute_size * sizeof(InputT); const Eigen::TensorOpCost cost( compute_input_bytes, 0, // We'll be mostly writing to L1, assume store cost is 0 @@ -322,7 +329,8 @@ struct ReduceMiddleDimensions { } // Write final result to the output. - output->template flat() = buf0.template cast().reshape(output_dims); + output->template flat() = + buf0.template cast().reshape(output_dims); } }; diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 5948db34c81..967d4a4734e 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -47,7 +47,7 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #endif @@ -260,7 +260,7 @@ void VarHandleOp::Compute(OpKernelContext* ctx) { REGISTER_KERNEL_BUILDER(Name("VarHandleOp").Device(DEVICE_CPU), VarHandleOp); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER_KERNEL_BUILDER( Name("ReadVariableOp").Device(DEVICE_GPU).HostMemory("resource"), ReadVariableOp); @@ -295,7 +295,7 @@ REGISTER_KERNEL_BUILDER(Name("_VarHandlesOp") DT_DOUBLE, DT_BOOL, DT_VARIANT}), ResourceHandlesOp); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM template class VariableShapeOp : public OpKernel { @@ -324,7 +324,7 @@ REGISTER_KERNEL_BUILDER( Name("VariableShape").Device(DEVICE_CPU).TypeConstraint("out_type"), VariableShapeOp); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER_KERNEL_BUILDER(Name("VariableShape") .Device(DEVICE_GPU) @@ -339,7 +339,7 @@ REGISTER_KERNEL_BUILDER(Name("VariableShape") .HostMemory("input"), VariableShapeOp); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM DestroyResourceOp::DestroyResourceOp(OpKernelConstruction* ctx) : OpKernel(ctx) { @@ -515,7 +515,7 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS); TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_GPU_KERNELS(type) \ REGISTER_KERNEL_BUILDER(Name("AssignVariableOp") \ .Device(DEVICE_GPU) \ @@ -527,7 +527,7 @@ TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); TF_CALL_int64(REGISTER_GPU_KERNELS); TF_CALL_variant(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM template class AssignUpdateVariableOp : public OpKernel { @@ -575,7 +575,7 @@ class AssignUpdateVariableOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_GPU_KERNELS(type) \ REGISTER_KERNEL_BUILDER(Name("AssignAddVariableOp") \ .Device(DEVICE_GPU) \ @@ -591,7 +591,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM class VarIsInitializedOp : public OpKernel { public: @@ -616,13 +616,13 @@ class VarIsInitializedOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("VarIsInitializedOp").Device(DEVICE_CPU), VarIsInitializedOp); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER_KERNEL_BUILDER(Name("VarIsInitializedOp") .Device(DEVICE_GPU) .HostMemory("resource") .HostMemory("is_initialized"), IsResourceInitialized); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM template class ResourceGatherOp : public OpKernel { @@ -762,7 +762,7 @@ TF_CALL_ALL_TYPES(REGISTER_GATHER_CPU); TF_CALL_QUANTIZED_TYPES(REGISTER_GATHER_CPU); // Registers GPU kernels. -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU); @@ -784,7 +784,7 @@ REGISTER_KERNEL_BUILDER(Name("ResourceGather") .TypeConstraint("Tindices"), ResourceGatherOp) -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #undef REGISTER_GATHER_CPU #undef REGISTER_GATHER_GPU @@ -834,12 +834,12 @@ class ResourceGatherNdOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER_GATHER_ND_CPU); // Registers GPU kernels. -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_GATHER_ND_GPU(type) REGISTER_GATHER_ND_ALL_INDICES(GPU, type) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #undef REGISTER_GATHER_ND_CPU #undef REGISTER_GATHER_ND_GPU @@ -958,7 +958,7 @@ REGISTER_SCATTER_KERNEL(Variant, CPU, "ResourceScatterUpdate", scatter_op::UpdateOp::ASSIGN); // Registers GPU kernels. -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_SCATTER_ARITHMETIC_GPU(type) \ REGISTER_SCATTER_ARITHMETIC(type, GPU); #define REGISTER_SCATTER_MINMAX_GPU(type) REGISTER_SCATTER_MINMAX(type, GPU); @@ -992,7 +992,7 @@ REGISTER_KERNEL_BUILDER(Name("ResourceScatterUpdate") ResourceScatterUpdateOp) -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #undef REGISTER_SCATTER_ARITHMETIC #undef REGISTER_SCATTER_ARITHMETIC_CPU diff --git a/tensorflow/core/kernels/topk_op.cc b/tensorflow/core/kernels/topk_op.cc index f51deb20196..02b99e44880 100644 --- a/tensorflow/core/kernels/topk_op.cc +++ b/tensorflow/core/kernels/topk_op.cc @@ -123,12 +123,14 @@ struct TopKFunctor { input.maximum(/*dims=*/reduce_on_cols).eval().reshape(rows_by_one); // Get the indices of the maximum values. for (int r = 0; r < num_rows; ++r) { + indices(r, 0) = 0; for (int c = 0; c < num_cols; ++c) { if (values(r, 0) == input(r, c)) { indices(r, 0) = c; break; } } + values(r, 0) = input(r, indices(r, 0)); } return Status::OK(); diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.cc b/tensorflow/core/lib/io/zlib_outputbuffer.cc index 6355e54aaa2..3b3b4745508 100644 --- a/tensorflow/core/lib/io/zlib_outputbuffer.cc +++ b/tensorflow/core/lib/io/zlib_outputbuffer.cc @@ -190,6 +190,17 @@ Status ZlibOutputBuffer::Append(StringPiece data) { return Status::OK(); } +#if defined(PLATFORM_GOOGLE) +Status ZlibOutputBuffer::Append(const absl::Cord& cord) { + absl::CordReader reader(cord); + absl::string_view fragment; + while (reader.ReadFragment(&fragment)) { + TF_RETURN_IF_ERROR(Append(fragment)); + } + return Status::OK(); +} +#endif + Status ZlibOutputBuffer::Flush() { TF_RETURN_IF_ERROR(DeflateBuffered(Z_PARTIAL_FLUSH)); TF_RETURN_IF_ERROR(FlushOutputBufferToFile()); diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.h b/tensorflow/core/lib/io/zlib_outputbuffer.h index 68f05963226..1eabb2c7b7b 100644 --- a/tensorflow/core/lib/io/zlib_outputbuffer.h +++ b/tensorflow/core/lib/io/zlib_outputbuffer.h @@ -65,6 +65,10 @@ class ZlibOutputBuffer : public WritableFile { // To immediately write contents to file call `Flush()`. Status Append(StringPiece data) override; +#if defined(PLATFORM_GOOGLE) + Status Append(const absl::Cord& cord) override; +#endif + // Deflates any cached input and writes all output to file. Status Flush() override; diff --git a/tensorflow/core/platform/default/device_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc index 38cdb65c566..04e6282edbe 100644 --- a/tensorflow/core/platform/default/device_tracer.cc +++ b/tensorflow/core/platform/default/device_tracer.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/abi.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mem.h" @@ -586,7 +587,7 @@ class CudaEventCollector { auto elapsed_us = GetElapsedTimeUs(record.start_event, record.stop_event); auto stats = absl::make_unique(); - std::string node_name = record.kernel_name; + std::string node_name = port::MaybeAbiDemangle(record.kernel_name); // Sometimes CUPTI returns invalid characters. See b/129892466. if (!IsAscii(node_name)) { node_name = ""; diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index c6f515fee13..b1976d7382a 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 90 // Updated: 2019/7/8 +#define TF_GRAPH_DEF_VERSION 94 // Updated: 2019/7/12 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // diff --git a/tensorflow/core/util/batch_util.cc b/tensorflow/core/util/batch_util.cc index 8b694d9cc32..e1c32cd0069 100644 --- a/tensorflow/core/util/batch_util.cc +++ b/tensorflow/core/util/batch_util.cc @@ -42,42 +42,56 @@ Status ValidateInput(const Tensor& parent, const Tensor& element, int64 index) { } template -Status HandleElementToSlice(Tensor element, Tensor* parent, int64 index, +Status HandleElementToSlice(T* src, T* dest, int64 num_values, bool /* can_move */) { - parent->flat_outer_dims().chip(index, 0) = element.flat(); + static_assert(is_simple_type::value, "Memcpy requires a simple type."); + memcpy(dest, src, num_values * sizeof(T)); return Status::OK(); } template <> -Status HandleElementToSlice(Tensor element, Tensor* parent, int64 index, +Status HandleElementToSlice(string* src, string* dest, int64 num_values, bool can_move) { - auto parent_as_matrix = parent->flat_outer_dims(); - auto element_flat = element.flat(); if (can_move) { - for (int64 i = 0; i < element.NumElements(); ++i) { - parent_as_matrix(index, i) = std::move(element_flat(i)); + for (int64 i = 0; i < num_values; ++i) { + *dest++ = std::move(*src++); } } else { - parent_as_matrix.chip(index, 0) = element_flat; + std::copy_n(src, num_values, dest); } return Status::OK(); } template <> -Status HandleElementToSlice(Tensor element, Tensor* parent, - int64 index, bool can_move) { - auto parent_as_matrix = parent->flat_outer_dims(); - auto element_flat = element.flat(); +Status HandleElementToSlice(Variant* src, Variant* dest, + int64 num_values, bool can_move) { if (can_move) { - for (int64 i = 0; i < element.NumElements(); ++i) { - parent_as_matrix(index, i) = std::move(element_flat(i)); + for (int64 i = 0; i < num_values; ++i) { + *dest++ = std::move(*src++); } } else { - parent_as_matrix.chip(index, 0) = element_flat; + std::copy_n(src, num_values, dest); } return Status::OK(); } +template <> +Status HandleElementToSlice(ResourceHandle* src, + ResourceHandle* dest, + int64 num_values, + bool /* can_move */) { + std::copy_n(src, num_values, dest); + return Status::OK(); +} + +template <> +Status HandleElementToSlice(Eigen::half* src, Eigen::half* dest, + int64 num_values, + bool /* can_move */) { + std::copy_n(src, num_values, dest); + return Status::OK(); +} + // TODO(b/78245576): Consider removing this overload. template void HandleSliceToElement(const Tensor& parent, Tensor* element, int64 index) { @@ -123,12 +137,13 @@ void HandleSliceToElement(Tensor* parent, Tensor* element, int64 index, // Copies element into the index^th slice of parent (in the 0th dimension). Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) { TF_RETURN_IF_ERROR(ValidateInput(*parent, element, index)); - + const int64 num_values = element.NumElements(); bool can_move = element.RefCountIsOne(); -#define HANDLE_TYPE(T) \ - case DataTypeToEnum::value: { \ - return HandleElementToSlice(std::move(element), parent, index, \ - can_move); \ +#define HANDLE_TYPE(T) \ + case DataTypeToEnum::value: { \ + T* src = element.base(); \ + T* dest = parent->base() + (num_values * index); \ + return HandleElementToSlice(src, dest, num_values, can_move); \ } switch (element.dtype()) { diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 1b62dad8878..36ae80a6d2a 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -525,8 +525,8 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, TensorShape output_shape = mkl_shape.GetTfShape(); // Allocate output tensor. - context->allocate_temp(DataTypeToEnum::v(), output_shape, - &output_tensor); + TF_CHECK_OK(context->allocate_temp(DataTypeToEnum::v(), output_shape, + &output_tensor)); auto cpu_engine = engine(engine::cpu, 0); MklDnnData input(&cpu_engine); @@ -576,7 +576,7 @@ inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) { inline void GetMklInputList(OpKernelContext* ctext, StringPiece name, OpInputList* input_tensors) { CHECK_NOTNULL(input_tensors); - ctext->input_list(name, input_tensors); + TF_CHECK_OK(ctext->input_list(name, input_tensors)); } inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD index 916856a7a8d..6782e518d4f 100644 --- a/tensorflow/core/util/tensor_bundle/BUILD +++ b/tensorflow/core/util/tensor_bundle/BUILD @@ -18,6 +18,8 @@ package( filegroup( name = "mobile_srcs", srcs = [ + "byte_swap.cc", + "byte_swap.h", "naming.cc", "naming.h", "tensor_bundle.cc", @@ -32,8 +34,14 @@ alias( cc_library( name = "tensor_bundle", - srcs = ["tensor_bundle.cc"], - hdrs = ["tensor_bundle.h"], + srcs = [ + "byte_swap.cc", + "tensor_bundle.cc", + ], + hdrs = [ + "byte_swap.h", + "tensor_bundle.h", + ], copts = tf_copts() + if_not_windows(["-Wno-sign-compare"]), deps = [ ":naming", diff --git a/tensorflow/core/util/tensor_bundle/byte_swap.cc b/tensorflow/core/util/tensor_bundle/byte_swap.cc new file mode 100644 index 00000000000..89a808ab2cf --- /dev/null +++ b/tensorflow/core/util/tensor_bundle/byte_swap.cc @@ -0,0 +1,118 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/tensor_bundle/byte_swap.h" + +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +Status ByteSwapArray(char* array, size_t bytes_per_elem, int array_len) { + if (bytes_per_elem == 1) { + // No-op + return Status::OK(); + } else if (bytes_per_elem == 2) { + auto array_16 = reinterpret_cast(array); + for (int i = 0; i < array_len; i++) { + array_16[i] = BYTE_SWAP_16(array_16[i]); + } + return Status::OK(); + } else if (bytes_per_elem == 4) { + auto array_32 = reinterpret_cast(array); + for (int i = 0; i < array_len; i++) { + array_32[i] = BYTE_SWAP_32(array_32[i]); + } + return Status::OK(); + } else if (bytes_per_elem == 8) { + auto array_64 = reinterpret_cast(array); + for (int i = 0; i < array_len; i++) { + array_64[i] = BYTE_SWAP_64(array_64[i]); + } + return Status::OK(); + } else { + return errors::Unimplemented("Byte-swapping of ", bytes_per_elem, + "-byte values not supported."); + } +} + +Status ByteSwapTensor(Tensor* t) { + size_t bytes_per_elem = 0; + int array_len = t->NumElements(); + + switch (t->dtype()) { + // Types that don't need byte-swapping + case DT_STRING: + case DT_QINT8: + case DT_QUINT8: + case DT_BOOL: + case DT_UINT8: + case DT_INT8: + return Status::OK(); + + // 16-bit types + case DT_BFLOAT16: + case DT_HALF: + case DT_QINT16: + case DT_QUINT16: + case DT_UINT16: + case DT_INT16: + bytes_per_elem = 2; + break; + + // 32-bit types + case DT_FLOAT: + case DT_INT32: + case DT_QINT32: + case DT_UINT32: + bytes_per_elem = 4; + break; + + // 64-bit types + case DT_INT64: + case DT_DOUBLE: + case DT_UINT64: + bytes_per_elem = 8; + break; + + // Complex types need special handling + case DT_COMPLEX64: + bytes_per_elem = 4; + array_len *= 2; + break; + + case DT_COMPLEX128: + bytes_per_elem = 8; + array_len *= 2; + break; + + // Types that ought to be supported in the future + case DT_RESOURCE: + case DT_VARIANT: + return errors::Unimplemented( + "Byte-swapping not yet implemented for tensors with dtype ", + t->dtype()); + + // Byte-swapping shouldn't make sense for other dtypes. + default: + return errors::Unimplemented( + "Byte-swapping not supported for tensors with dtype ", t->dtype()); + } + + char* backing_buffer = const_cast((t->tensor_data().data())); + TF_RETURN_IF_ERROR(ByteSwapArray(backing_buffer, bytes_per_elem, array_len)); + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/util/tensor_bundle/byte_swap.h b/tensorflow/core/util/tensor_bundle/byte_swap.h new file mode 100644 index 00000000000..ea59e644ec0 --- /dev/null +++ b/tensorflow/core/util/tensor_bundle/byte_swap.h @@ -0,0 +1,113 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_BYTE_SWAP_H_ +#define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_BYTE_SWAP_H_ + +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/byte_order.h" + +// Define basic byte swapping operations. +// These operations must be macros to use compiler intrinsics. +// Note that the code here is written for portability, not speed. Byte swapping +// only happens when importing a checkpoint from one hardware architecture onto +// a different architecture. If these operations become part of a fast path, +// then the function ByteSwapArray() below should be rewritten to use +// architecture-appropriate SIMD instructions that swap multiple words at once. + +#if defined(__linux__) + +// Use the Gnu byte swap macros when available. See bswap(3) for more info. +#include +#define BYTE_SWAP_16(x) bswap_16(x) +#define BYTE_SWAP_32(x) bswap_32(x) +#define BYTE_SWAP_64(x) bswap_64(x) + +#elif defined(PLATFORM_WINDOWS) + +// On windows, byte-swapping is in winsock.h, and winsock2.h has a version of +// of htonl that can byte-swap 64-bit values. +#include +#define BYTE_SWAP_16(x) htons(x) +#define BYTE_SWAP_32(x) htonl(x) +// At the moment the 64-bit and 128-bit byte-swapping routines in Winsock2 are +// disabled in TensorFlow's standard Windows build environment, so we use +// htonl() instead of "#define BYTE_SWAP_64(x) htonll (x)". +#define BYTE_SWAP_64(x) \ + ((uint64_t(htonl((x)&0x00000000ffffffffUL)) << 32) | \ + (htonl(((x)&0xffffffff00000000UL) >> 32))) + +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +// On non-Linux, non-Windows, but little-endian, environments, use htonl/s, +// which byte-swap when the host byte order is little-endian. POSIX doesn't +// define a 64-bit version of these library functions, so we roll our own. +#include +#define BYTE_SWAP_16(x) htons(x) +#define BYTE_SWAP_32(x) htonl(x) +#define BYTE_SWAP_64(x) \ + ((uint64_t(htonl((x)&0x00000000ffffffffUL)) << 32) | \ + (htonl(((x)&0xffffffff00000000UL) >> 32))) + +#else // not defined(__linux__) and not defined(PLATFORM_WINDOWS) + // and (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__) + +// Fall back on a non-optimized implementation on other big-endian targets. +// This code swaps one byte at a time and is probably an order of magnitude +// slower. + +#define BYTE_SWAP_16(x) ((((x)&0x00ff) << 8) | (((x)&0xff00) >> 8)) + +#define BYTE_SWAP_32(x) \ + ((((x)&0x000000ffU) << 24) | (((x)&0x0000ff00U) << 8) | \ + (((x)&0x00ff0000U) >> 8) | (((x)&0xff000000U) >> 24)) + +#define BYTE_SWAP_64(x) \ + ((((x)&0x00000000000000ffUL) << 56) | (((x)&0x000000000000ff00UL) << 40) | \ + (((x)&0x0000000000ff0000UL) << 24) | (((x)&0x00000000ff000000UL) << 8) | \ + (((x)&0x000000ff00000000UL) >> 8) | (((x)&0x0000ff0000000000UL) >> 24) | \ + (((x)&0x00ff000000000000UL) >> 40) | (((x)&0xff00000000000000UL) >> 56)) + +#endif // defined(__linux__) + +namespace tensorflow { + +// Byte-swap an entire array of atomic C/C++ types in place. +// +// Note: When calling this function on arrays of std::complex<> types, +// multiply the number of elements by 2 and divide the bytes per element by 2. +// +// Args: +// array: Pointer to the beginning of the array +// bytes_per_elem: Number of bytes in each element of the array +// array_len: Number of elements in the array +// +// Returns: Status::OK() on success, -1 otherwise +// +Status ByteSwapArray(char *array, size_t bytes_per_elem, int array_len); + +// Byte-swap a tensor's backing buffer in place. +// +// Args: +// t: Tensor to be modified IN PLACE. Any tensors that share a backing +// buffer with this one will also end up byte-swapped. +// Returns: Status::OK() on success, -1 otherwise +// TODO(frreiss): Should this be a member of the Tensor class? +Status ByteSwapTensor(Tensor *t); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_BYTE_SWAP_H_ diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc index 111ccdc48f4..0756b47f220 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/util/saved_tensor_slice_util.h" +#include "tensorflow/core/util/tensor_bundle/byte_swap.h" #include "tensorflow/core/util/tensor_slice_util.h" namespace tensorflow { @@ -69,7 +70,7 @@ namespace { // bytes) and string bytes, and stores it into "actual_crc32c". Status ReadStringTensor(io::InputBuffer* buffered_file, size_t num_elements, size_t offset, size_t size, string* destination, - uint32* actual_crc32c) { + uint32* actual_crc32c, bool need_to_swap_bytes) { if (size == 0) return Status::OK(); CHECK_GT(size, 0); @@ -81,14 +82,22 @@ Status ReadStringTensor(io::InputBuffer* buffered_file, size_t num_elements, if (string_lengths[i] <= UINT32_MAX) { // We need to do this because older checkpoints only used uint32s and we // should still support them. - const uint32 elem_size_uint32 = static_cast(string_lengths[i]); + uint32 elem_size_uint32 = static_cast(string_lengths[i]); + if (need_to_swap_bytes) { + // Checksum would have been computed on the source machine's byte order + elem_size_uint32 = BYTE_SWAP_32(elem_size_uint32); + } *actual_crc32c = crc32c::Extend( *actual_crc32c, reinterpret_cast(&elem_size_uint32), sizeof(uint32)); } else { - *actual_crc32c = crc32c::Extend( - *actual_crc32c, reinterpret_cast(&string_lengths[i]), - sizeof(uint64)); + uint64 length = string_lengths[i]; + if (need_to_swap_bytes) { + length = BYTE_SWAP_64(length); + } + *actual_crc32c = + crc32c::Extend(*actual_crc32c, reinterpret_cast(&length), + sizeof(uint64)); } } if (offset + size < buffered_file->Tell()) { @@ -97,20 +106,23 @@ Status ReadStringTensor(io::InputBuffer* buffered_file, size_t num_elements, } // Reads the length-checksum. - uint32 length_checksum = 0; + uint32 raw_length_checksum = 0; // Bytes in file + uint32 length_checksum = 0; // In-memory representation size_t unused_bytes_read = 0; TF_RETURN_IF_ERROR(buffered_file->ReadNBytes( - sizeof(uint32), reinterpret_cast(&length_checksum), + sizeof(uint32), reinterpret_cast(&raw_length_checksum), &unused_bytes_read)); + length_checksum = need_to_swap_bytes ? BYTE_SWAP_32(raw_length_checksum) + : raw_length_checksum; if (crc32c::Unmask(length_checksum) != *actual_crc32c) { return errors::DataLoss( "The length checksum does not match: expected ", strings::Printf("%08u", crc32c::Unmask(length_checksum)), " but actual is ", strings::Printf("%08u", *actual_crc32c)); } - *actual_crc32c = - crc32c::Extend(*actual_crc32c, reinterpret_cast(&length_checksum), - sizeof(uint32)); + *actual_crc32c = crc32c::Extend(*actual_crc32c, + reinterpret_cast(&raw_length_checksum), + sizeof(uint32)); // Reads the actual string bytes. for (size_t i = 0; i < num_elements; ++i) { @@ -719,7 +731,8 @@ BundleReader::BundleReader(Env* env, StringPiece prefix) prefix_(prefix), metadata_(nullptr), table_(nullptr), - iter_(nullptr) { + iter_(nullptr), + need_to_swap_bytes_(false) { const string filename = MetaFilename(prefix_); uint64 file_size; status_ = env_->GetFileSize(filename, &file_size); @@ -751,9 +764,7 @@ BundleReader::BundleReader(Env* env, StringPiece prefix) if ((header.endianness() == BundleHeaderProto::BIG && port::kLittleEndian) || (header.endianness() == BundleHeaderProto::LITTLE && !port::kLittleEndian)) { - status_ = errors::Unimplemented( - "Reading a bundle with different endianness from the reader"); - return; + need_to_swap_bytes_ = true; } status_ = CheckVersions(header.version(), kTensorBundleVersion, kTensorBundleMinProducer, "Checkpoint", "checkpoint"); @@ -852,8 +863,20 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) { TF_RETURN_IF_ERROR(buffered_file->ReadNBytes(entry.size(), backing_buffer, &unused_bytes_read)); } + // Note that we compute the checksum *before* byte-swapping. The checksum + // should be on the bytes in the order they appear in the file. actual_crc32c = crc32c::Value(backing_buffer, entry.size()); + if (need_to_swap_bytes_) { + TF_RETURN_IF_ERROR(ByteSwapTensor(ret)); + } } else if (entry.dtype() == DT_VARIANT) { + if (need_to_swap_bytes_) { + return errors::Unimplemented( + "TensorBundle at ", prefix_, + "is of a different endianness than this machine's hardware, and " + "the bundle contains a variant (arbitrary C++ type) tensor. " + "Byte-swapping of variant tensors is not currently implemented."); + } // Relies on io::InputBuffer's buffering, because we issue many neighboring // reads for a single string tensor. TF_RETURN_IF_ERROR(ReadVariantTensor(buffered_file, ret, entry.offset(), @@ -863,7 +886,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) { // reads for a single string tensor. TF_RETURN_IF_ERROR(ReadStringTensor( buffered_file, ret->NumElements(), entry.offset(), entry.size(), - GetStringBackingBuffer(*ret), &actual_crc32c)); + GetStringBackingBuffer(*ret), &actual_crc32c, need_to_swap_bytes_)); } if (crc32c::Unmask(entry.crc32c()) != actual_crc32c) { return errors::DataLoss( diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h index 3a2ffbb4952..0320878df8d 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h @@ -300,6 +300,10 @@ class BundleReader { // the header entry in the metadata table. int num_shards_; + // Flag that this class sets to true when the endianness of the target bundle + // differs from that of the current system's processor architecture. + bool need_to_swap_bytes_; + friend class TensorBundleAlignmentTest; // For testing data alignment. TF_DISALLOW_COPY_AND_ASSIGN(BundleReader); diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc index 082a69dc168..7cd4b82c815 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/tensor_util.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_op_registry.h" @@ -30,15 +31,19 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/util/tensor_bundle/byte_swap.h" namespace tensorflow { namespace { +// Prepend the current test case's working temporary directory to string Prefix(const string& prefix) { return strings::StrCat(testing::TmpDir(), "/", prefix); } +// Construct a data input directory by prepending the test data root +// directory to string TestdataPrefix(const string& prefix) { return strings::StrCat(testing::TensorFlowSrcRoot(), "/core/util/tensor_bundle/testdata/", prefix); @@ -56,6 +61,14 @@ Tensor Constant_2x3(T v) { return Constant(v, TensorShape({2, 3})); } +Tensor ByteSwap(Tensor t) { + Tensor ret = tensor::DeepCopy(t); + TF_EXPECT_OK(ByteSwapTensor(&ret)); + return ret; +} + +// Assert that has a tensor under matching in +// terms of both shape, dtype, and value template void Expect(BundleReader* reader, const string& key, const Tensor& expected_val) { @@ -122,9 +135,12 @@ std::vector AllTensorKeys(BundleReader* reader) { Status FlipEndiannessBit(const string& prefix) { Env* env = Env::Default(); const string metadata_tmp_path = Prefix("some_tmp_path"); - std::unique_ptr file; - TF_RETURN_IF_ERROR(env->NewWritableFile(metadata_tmp_path, &file)); - table::TableBuilder builder(table::Options(), file.get()); + std::unique_ptr metadata_file; + TF_RETURN_IF_ERROR(env->NewWritableFile(metadata_tmp_path, &metadata_file)); + // We create the builder lazily in case we run into an exception earlier, in + // which case we'd forget to call Finish() and TableBuilder's destructor + // would complain. + std::unique_ptr builder; // Reads the existing metadata file, and fills the builder. { @@ -151,15 +167,18 @@ Status FlipEndiannessBit(const string& prefix) { } else { header.set_endianness(BundleHeaderProto::LITTLE); } - builder.Add(iter->key(), header.SerializeAsString()); + builder.reset( + new table::TableBuilder(table::Options(), metadata_file.get())); + builder->Add(iter->key(), header.SerializeAsString()); iter->Next(); // Adds the non-header entries unmodified. - for (; iter->Valid(); iter->Next()) builder.Add(iter->key(), iter->value()); + for (; iter->Valid(); iter->Next()) + builder->Add(iter->key(), iter->value()); } - TF_RETURN_IF_ERROR(builder.Finish()); + TF_RETURN_IF_ERROR(builder->Finish()); TF_RETURN_IF_ERROR(env->RenameFile(metadata_tmp_path, MetaFilename(prefix))); - return file->Close(); + return metadata_file->Close(); } template @@ -259,6 +278,207 @@ void TestBasic() { } } +// Type-specific subroutine of SwapBytes test below +template +void TestByteSwap(const T* forward, const T* swapped, int array_len) { + auto bytes_per_elem = sizeof(T); + + // Convert the entire array at once + std::unique_ptr forward_copy(new T[array_len]); + std::memcpy(forward_copy.get(), forward, array_len * bytes_per_elem); + TF_EXPECT_OK(ByteSwapArray(reinterpret_cast(forward_copy.get()), + bytes_per_elem, array_len)); + for (int i = 0; i < array_len; i++) { + EXPECT_EQ(forward_copy.get()[i], swapped[i]); + } + + // Then the array wrapped in a tensor + auto shape = TensorShape({array_len}); + auto dtype = DataTypeToEnum::value; + Tensor forward_tensor(dtype, shape); + Tensor swapped_tensor(dtype, shape); + std::memcpy(const_cast(forward_tensor.tensor_data().data()), forward, + array_len * bytes_per_elem); + std::memcpy(const_cast(swapped_tensor.tensor_data().data()), swapped, + array_len * bytes_per_elem); + TF_EXPECT_OK(ByteSwapTensor(&forward_tensor)); + test::ExpectTensorEqual(forward_tensor, swapped_tensor); +} + +// Unit test of the byte-swapping operations that TensorBundle uses. +TEST(TensorBundleTest, SwapBytes) { + // A bug in the compiler on MacOS causes ByteSwap() and FlipEndiannessBit() + // to be removed from the executable if they are only called from templated + // functions. As a workaround, we make some dummy calls here. + // TODO(frreiss): Remove this workaround when the compiler bug is fixed. + ByteSwap(Constant_2x3(42)); + EXPECT_NE(Status::OK(), FlipEndiannessBit(Prefix("not_a_valid_prefix"))); + + // Test patterns, manually swapped so that we aren't relying on the + // correctness of our own byte-swapping macros when testing those macros. + // At least one of the entries in each list has the sign bit set when + // interpreted as a signed int. + const int arr_len_16 = 4; + const uint16_t forward_16[] = {0x1de5, 0xd017, 0xf1ea, 0xc0a1}; + const uint16_t swapped_16[] = {0xe51d, 0x17d0, 0xeaf1, 0xa1c0}; + const int arr_len_32 = 2; + const uint32_t forward_32[] = {0x0ddba115, 0xf01dab1e}; + const uint32_t swapped_32[] = {0x15a1db0d, 0x1eab1df0}; + const int arr_len_64 = 2; + const uint64_t forward_64[] = {0xf005ba11caba1000, 0x5ca1ab1ecab005e5}; + const uint64_t swapped_64[] = {0x0010baca11ba05f0, 0xe505b0ca1eaba15c}; + + // 16-bit types + TestByteSwap(forward_16, swapped_16, arr_len_16); + TestByteSwap(reinterpret_cast(forward_16), + reinterpret_cast(swapped_16), arr_len_16); + TestByteSwap(reinterpret_cast(forward_16), + reinterpret_cast(swapped_16), arr_len_16); + + // 32-bit types + TestByteSwap(forward_32, swapped_32, arr_len_32); + TestByteSwap(reinterpret_cast(forward_32), + reinterpret_cast(swapped_32), arr_len_32); + TestByteSwap(reinterpret_cast(forward_32), + reinterpret_cast(swapped_32), arr_len_32); + + // 64-bit types + // Cast to uint64*/int64* to make DataTypeToEnum happy + TestByteSwap(reinterpret_cast(forward_64), + reinterpret_cast(swapped_64), arr_len_64); + TestByteSwap(reinterpret_cast(forward_64), + reinterpret_cast(swapped_64), arr_len_64); + TestByteSwap(reinterpret_cast(forward_64), + reinterpret_cast(swapped_64), arr_len_64); + + // Complex types. + // Logic for complex number handling is only in ByteSwapTensor, so don't test + // ByteSwapArray + const float* forward_float = reinterpret_cast(forward_32); + const float* swapped_float = reinterpret_cast(swapped_32); + const double* forward_double = reinterpret_cast(forward_64); + const double* swapped_double = reinterpret_cast(swapped_64); + Tensor forward_complex64 = Constant_2x3( + std::complex(forward_float[0], forward_float[1])); + Tensor swapped_complex64 = Constant_2x3( + std::complex(swapped_float[0], swapped_float[1])); + Tensor forward_complex128 = Constant_2x3( + std::complex(forward_double[0], forward_double[1])); + Tensor swapped_complex128 = Constant_2x3( + std::complex(swapped_double[0], swapped_double[1])); + + TF_EXPECT_OK(ByteSwapTensor(&forward_complex64)); + test::ExpectTensorEqual(forward_complex64, swapped_complex64); + + TF_EXPECT_OK(ByteSwapTensor(&forward_complex128)); + test::ExpectTensorEqual(forward_complex128, swapped_complex128); +} + +// Basic test of alternate-endianness support. Generates a bundle in +// the opposite of the current system's endianness and attempts to +// read the bundle back in. Does not exercise sharding or access to +// nonaligned tensors. Does cover the major access types exercised +// in TestBasic. +template +void TestEndianness() { + { + // Write out a TensorBundle in the opposite of this host's endianness. + BundleWriter writer(Env::Default(), Prefix("foo")); + TF_EXPECT_OK(writer.Add("foo_003", ByteSwap(Constant_2x3(T(3))))); + TF_EXPECT_OK(writer.Add("foo_000", ByteSwap(Constant_2x3(T(0))))); + TF_EXPECT_OK(writer.Add("foo_002", ByteSwap(Constant_2x3(T(2))))); + TF_EXPECT_OK(writer.Add("foo_001", ByteSwap(Constant_2x3(T(1))))); + TF_ASSERT_OK(writer.Finish()); + TF_ASSERT_OK(FlipEndiannessBit(Prefix("foo"))); + } + { + BundleReader reader(Env::Default(), Prefix("foo")); + TF_ASSERT_OK(reader.status()); + EXPECT_EQ( + AllTensorKeys(&reader), + std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); + Expect(&reader, "foo_000", Constant_2x3(T(0))); + Expect(&reader, "foo_001", Constant_2x3(T(1))); + Expect(&reader, "foo_002", Constant_2x3(T(2))); + Expect(&reader, "foo_003", Constant_2x3(T(3))); + } + { + BundleReader reader(Env::Default(), Prefix("foo")); + TF_ASSERT_OK(reader.status()); + ExpectNext(&reader, Constant_2x3(T(0))); + ExpectNext(&reader, Constant_2x3(T(1))); + ExpectNext(&reader, Constant_2x3(T(2))); + ExpectNext(&reader, Constant_2x3(T(3))); + EXPECT_TRUE(reader.Valid()); + reader.Next(); + EXPECT_FALSE(reader.Valid()); + } + { + BundleWriter writer(Env::Default(), Prefix("bar")); + TF_EXPECT_OK(writer.Add("bar_003", ByteSwap(Constant_2x3(T(3))))); + TF_EXPECT_OK(writer.Add("bar_000", ByteSwap(Constant_2x3(T(0))))); + TF_EXPECT_OK(writer.Add("bar_002", ByteSwap(Constant_2x3(T(2))))); + TF_EXPECT_OK(writer.Add("bar_001", ByteSwap(Constant_2x3(T(1))))); + TF_ASSERT_OK(writer.Finish()); + TF_ASSERT_OK(FlipEndiannessBit(Prefix("bar"))); + } + { + BundleReader reader(Env::Default(), Prefix("bar")); + TF_ASSERT_OK(reader.status()); + EXPECT_EQ( + AllTensorKeys(&reader), + std::vector({"bar_000", "bar_001", "bar_002", "bar_003"})); + Expect(&reader, "bar_003", Constant_2x3(T(3))); + Expect(&reader, "bar_002", Constant_2x3(T(2))); + Expect(&reader, "bar_001", Constant_2x3(T(1))); + Expect(&reader, "bar_000", Constant_2x3(T(0))); + } + { + BundleReader reader(Env::Default(), Prefix("bar")); + TF_ASSERT_OK(reader.status()); + ExpectNext(&reader, Constant_2x3(T(0))); + ExpectNext(&reader, Constant_2x3(T(1))); + ExpectNext(&reader, Constant_2x3(T(2))); + ExpectNext(&reader, Constant_2x3(T(3))); + EXPECT_TRUE(reader.Valid()); + reader.Next(); + EXPECT_FALSE(reader.Valid()); + } + TF_ASSERT_OK(MergeBundles(Env::Default(), {Prefix("foo"), Prefix("bar")}, + Prefix("merged"))); + { + BundleReader reader(Env::Default(), Prefix("merged")); + TF_ASSERT_OK(reader.status()); + EXPECT_EQ( + AllTensorKeys(&reader), + std::vector({"bar_000", "bar_001", "bar_002", "bar_003", + "foo_000", "foo_001", "foo_002", "foo_003"})); + Expect(&reader, "bar_000", Constant_2x3(T(0))); + Expect(&reader, "bar_001", Constant_2x3(T(1))); + Expect(&reader, "bar_002", Constant_2x3(T(2))); + Expect(&reader, "bar_003", Constant_2x3(T(3))); + Expect(&reader, "foo_000", Constant_2x3(T(0))); + Expect(&reader, "foo_001", Constant_2x3(T(1))); + Expect(&reader, "foo_002", Constant_2x3(T(2))); + Expect(&reader, "foo_003", Constant_2x3(T(3))); + } + { + BundleReader reader(Env::Default(), Prefix("merged")); + TF_ASSERT_OK(reader.status()); + ExpectNext(&reader, Constant_2x3(T(0))); + ExpectNext(&reader, Constant_2x3(T(1))); + ExpectNext(&reader, Constant_2x3(T(2))); + ExpectNext(&reader, Constant_2x3(T(3))); + ExpectNext(&reader, Constant_2x3(T(0))); + ExpectNext(&reader, Constant_2x3(T(1))); + ExpectNext(&reader, Constant_2x3(T(2))); + ExpectNext(&reader, Constant_2x3(T(3))); + EXPECT_TRUE(reader.Valid()); + reader.Next(); + EXPECT_FALSE(reader.Valid()); + } +} + template void TestNonStandardShapes() { { @@ -321,6 +541,23 @@ TEST(TensorBundleTest, Basic) { TestBasic(); } +TEST(TensorBundleTest, Endianness) { + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); +} + TEST(TensorBundleTest, PartitionedVariables) { const TensorShape kFullShape({5, 10}); // Adds two slices. @@ -730,20 +967,6 @@ TEST(TensorBundleTest, Checksum) { } } -TEST(TensorBundleTest, Endianness) { - BundleWriter writer(Env::Default(), Prefix("end")); - TF_EXPECT_OK(writer.Add("key", Constant_2x3(1.0))); - TF_ASSERT_OK(writer.Finish()); - - // Flips the endianness bit. - TF_ASSERT_OK(FlipEndiannessBit(Prefix("end"))); - - BundleReader reader(Env::Default(), Prefix("end")); - EXPECT_TRUE(errors::IsUnimplemented(reader.status())); - EXPECT_TRUE(absl::StrContains(reader.status().ToString(), - "different endianness from the reader")); -} - TEST(TensorBundleTest, TruncatedTensorContents) { Env* env = Env::Default(); BundleWriter writer(env, Prefix("end")); diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py index 5c52a2c8461..d701444b1ab 100644 --- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py +++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py @@ -153,7 +153,7 @@ def run_training(): tf.local_variables_initializer()) # Create a session for running operations in the Graph. - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: # Initialize the variables (the trained variables and the # epoch counter). sess.run(init_op) diff --git a/tensorflow/examples/label_image/label_image.py b/tensorflow/examples/label_image/label_image.py index fe5e0fc684a..f675ec35ec8 100644 --- a/tensorflow/examples/label_image/label_image.py +++ b/tensorflow/examples/label_image/label_image.py @@ -58,7 +58,7 @@ def read_tensor_from_image_file(file_name, dims_expander = tf.expand_dims(float_caster, 0) resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width]) normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) - sess = tf.Session() + sess = tf.compat.v1.Session() result = sess.run(normalized) return result @@ -128,7 +128,7 @@ if __name__ == "__main__": input_operation = graph.get_operation_by_name(input_name) output_operation = graph.get_operation_by_name(output_name) - with tf.Session(graph=graph) as sess: + with tf.compat.v1.Session(graph=graph) as sess: results = sess.run(output_operation.outputs[0], { input_operation.outputs[0]: t }) diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py index 983c0a3bafa..6c2ce3f13eb 100644 --- a/tensorflow/examples/speech_commands/input_data.py +++ b/tensorflow/examples/speech_commands/input_data.py @@ -122,7 +122,7 @@ def load_wav_file(filename): Returns: Numpy array holding the sample data as floats between -1.0 and 1.0. """ - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) wav_loader = io_ops.read_file(wav_filename_placeholder) wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) @@ -139,7 +139,7 @@ def save_wav_file(filename, wav_data, sample_rate): wav_data: 2D array of float PCM-encoded audio data. sample_rate: Samples per second to encode in the file. """ - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) sample_rate_placeholder = tf.placeholder(tf.int32, []) wav_data_placeholder = tf.placeholder(tf.float32, [None, 1]) @@ -349,7 +349,7 @@ class AudioProcessor(object): background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME) if not os.path.exists(background_dir): return self.background_data - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) wav_loader = io_ops.read_file(wav_filename_placeholder) wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) @@ -654,7 +654,7 @@ class AudioProcessor(object): words_list = self.words_list data = np.zeros((sample_count, desired_samples)) labels = [] - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) wav_loader = io_ops.read_file(wav_filename_placeholder) wav_decoder = contrib_audio.decode_wav( diff --git a/tensorflow/examples/speech_commands/label_wav.py b/tensorflow/examples/speech_commands/label_wav.py index eb8323454c2..5af16691e82 100644 --- a/tensorflow/examples/speech_commands/label_wav.py +++ b/tensorflow/examples/speech_commands/label_wav.py @@ -59,7 +59,7 @@ def load_labels(filename): def run_graph(wav_data, labels, input_layer_name, output_layer_name, num_top_predictions): """Runs the audio data through the graph and prints predictions.""" - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: # Feed the audio data as input to the graph. # predictions will contain a two-dimensional array, where one # dimension represents the input image count, and the other has diff --git a/tensorflow/examples/speech_commands/label_wav_dir.py b/tensorflow/examples/speech_commands/label_wav_dir.py index 2e1890c3e86..eb6fb757c10 100644 --- a/tensorflow/examples/speech_commands/label_wav_dir.py +++ b/tensorflow/examples/speech_commands/label_wav_dir.py @@ -60,7 +60,7 @@ def load_labels(filename): def run_graph(wav_dir, labels, input_layer_name, output_layer_name, num_top_predictions): """Runs the audio data through the graph and prints predictions.""" - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: # Feed the audio data as input to the graph. # predictions will contain a two-dimensional array, where one # dimension represents the input image count, and the other has diff --git a/tensorflow/examples/speech_commands/label_wav_test.py b/tensorflow/examples/speech_commands/label_wav_test.py index 77a88f98e16..3c833d66735 100644 --- a/tensorflow/examples/speech_commands/label_wav_test.py +++ b/tensorflow/examples/speech_commands/label_wav_test.py @@ -48,7 +48,7 @@ class LabelWavTest(test.TestCase): input_name = "test_input" output_name = "test_output" graph_filename = os.path.join(tmp_dir, "test_graph.pb") - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf.placeholder(tf.string, name=input_name) tf.zeros([1, 3], name=output_name) with open(graph_filename, "wb") as f: diff --git a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py index 35ca1b2f7f3..e61cbab6ef4 100644 --- a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py +++ b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py @@ -149,7 +149,7 @@ def run_training(): saver = tf.train.Saver() # Create a session for running Ops on the Graph. - sess = tf.Session() + sess = tf.compat.v1.Session() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py b/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py index 2945660dad5..28fdc2059f8 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py +++ b/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py @@ -64,7 +64,7 @@ def main(_): config.graph_options.optimizer_options.global_jit_level = jit_level run_metadata = tf.RunMetadata() - sess = tf.Session(config=config) + sess = tf.compat.v1.Session(config=config) tf.global_variables_initializer().run(session=sess) # Train train_loops = 1000 diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py index 3485e7afbf1..efe35ca096f 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py +++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py @@ -103,7 +103,7 @@ def train(): with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) tf.summary.scalar('dropout_keep_probability', keep_prob) - dropped = tf.nn.dropout(hidden1, keep_prob) + dropped = tf.nn.dropout(hidden1, rate=(1 - keep_prob)) # Do not apply softmax activation yet, see below. y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity) diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index ebfaacb8a2c..380cd2be515 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -226,7 +226,7 @@ def word2vec_basic(log_dir): # Step 5: Begin training. num_steps = 100001 - with tf.Session(graph=graph) as session: + with tf.compat.v1.Session(graph=graph) as session: # Open a writer to write summaries. writer = tf.summary.FileWriter(log_dir, session.graph) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 43c5df0fb5f..75a20af6ef3 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -40809,7 +40809,7 @@ func UniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.Data // // Note that this routine only supports wildcard characters in the // basename portion of the pattern, not in the directory portion. -// Note also that the order of filenames returned can be non-deterministic. +// Note also that the order of filenames returned is deterministic. // // Arguments: // pattern: Shell wildcard pattern(s). Scalar or vector of type string. diff --git a/tensorflow/java/src/main/java/org/tensorflow/EagerSession.java b/tensorflow/java/src/main/java/org/tensorflow/EagerSession.java index d3bb43a8958..972e9cc1064 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/EagerSession.java +++ b/tensorflow/java/src/main/java/org/tensorflow/EagerSession.java @@ -179,7 +179,12 @@ public final class EagerSession implements ExecutionEnvironment, AutoCloseable { /** Builds an eager session with the selected options. */ public EagerSession build() { - return new EagerSession(this); + return new EagerSession(this, new ReferenceQueue()); + } + + // For garbage-collection tests only + EagerSession buildForGcTest(ReferenceQueue gcQueue) { + return new EagerSession(this, gcQueue); } private boolean async; @@ -344,6 +349,10 @@ public final class EagerSession implements ExecutionEnvironment, AutoCloseable { return nativeHandle; } + ResourceCleanupStrategy resourceCleanupStrategy() { + return resourceCleanupStrategy; + } + /** * A reference to one or more allocated native resources. * @@ -412,6 +421,10 @@ public final class EagerSession implements ExecutionEnvironment, AutoCloseable { */ private static class NativeResourceCollector { + NativeResourceCollector(ReferenceQueue garbageQueue) { + this.garbageQueue = garbageQueue; + } + void attach(NativeReference nativeRef) { synchronized (nativeRefs) { nativeRefs.put(nativeRef, null); @@ -484,17 +497,18 @@ public final class EagerSession implements ExecutionEnvironment, AutoCloseable { private final ExecutorService cleanupService = Executors.newSingleThreadExecutor(); private final Map nativeRefs = new IdentityHashMap<>(); - private final ReferenceQueue garbageQueue = new ReferenceQueue<>(); + private final ReferenceQueue garbageQueue; private volatile boolean cleanupInBackground = false; } private static volatile EagerSession defaultSession = null; - private final NativeResourceCollector nativeResources = new NativeResourceCollector(); + private final NativeResourceCollector nativeResources; private final ResourceCleanupStrategy resourceCleanupStrategy; private long nativeHandle; - private EagerSession(Options options) { + private EagerSession(Options options, ReferenceQueue garbageQueue) { + this.nativeResources = new NativeResourceCollector(garbageQueue); this.nativeHandle = allocate(options.async, options.devicePlacementPolicy.code, options.config); this.resourceCleanupStrategy = options.resourceCleanupStrategy; @@ -509,11 +523,6 @@ public final class EagerSession implements ExecutionEnvironment, AutoCloseable { } } - // For tests - ResourceCleanupStrategy resourceCleanupStrategy() { - return resourceCleanupStrategy; - } - private static native long allocate(boolean async, int devicePlacementPolicy, byte[] config); private static native void delete(long handle); diff --git a/tensorflow/java/src/test/java/org/tensorflow/EagerSessionTest.java b/tensorflow/java/src/test/java/org/tensorflow/EagerSessionTest.java index 7db1cecb943..5d6cdc06aa3 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/EagerSessionTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/EagerSessionTest.java @@ -21,8 +21,13 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.lang.ref.Reference; +import java.lang.ref.ReferenceQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import org.junit.Ignore; + import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -40,74 +45,67 @@ public class EagerSessionTest { @Test public void cleanupResourceOnSessionClose() { - AtomicBoolean deleted = new AtomicBoolean(); - + TestReference ref; try (EagerSession s = EagerSession.options() .resourceCleanupStrategy(ResourceCleanupStrategy.ON_SESSION_CLOSE) .build()) { + ref = new TestReference(s, new Object()); + assertFalse(ref.isDeleted()); - new TestReference(s, new Object(), deleted); - - assertFalse(deleted.get()); - runGC(); - assertFalse(deleted.get()); - + // check that reaching safe point did not release resources buildOp(s); - assertFalse(deleted.get()); // reaching safe point did not release resources + assertFalse(ref.isDeleted()); } - assertTrue(deleted.get()); + assertTrue(ref.isDeleted()); } - // TODO(b/135541743): Re-enable once fixed. - // Disabled due to flakiness with -c opt --config=cuda - @Ignore + @Test public void cleanupResourceOnSafePoints() { - AtomicBoolean deleted = new AtomicBoolean(); - + TestGarbageCollectorQueue gcQueue = new TestGarbageCollectorQueue(); try (EagerSession s = EagerSession.options() .resourceCleanupStrategy(ResourceCleanupStrategy.ON_SAFE_POINTS) - .build()) { + .buildForGcTest(gcQueue)) { - new TestReference(s, new Object(), deleted); + TestReference ref = new TestReference(s, new Object()); + assertFalse(ref.isDeleted()); - assertFalse(deleted.get()); - runGC(); - assertFalse(deleted.get()); - - buildOp(s); - assertTrue(deleted.get()); // reaching safe point released resources + // garbage collecting the reference won't release until we reached safe point + gcQueue.collect(ref); + assertFalse(ref.isDeleted()); + buildOp(s); // safe point + assertTrue(ref.isDeleted()); + assertTrue(gcQueue.isEmpty()); } } @Test public void cleanupResourceInBackground() { - AtomicBoolean deleted = new AtomicBoolean(); - + TestGarbageCollectorQueue gcQueue = new TestGarbageCollectorQueue(); try (EagerSession s = EagerSession.options() .resourceCleanupStrategy(ResourceCleanupStrategy.IN_BACKGROUND) - .build()) { + .buildForGcTest(gcQueue)) { - new TestReference(s, new Object(), deleted); + TestReference ref = new TestReference(s, new Object()); + assertFalse(ref.isDeleted()); - assertFalse(deleted.get()); - runGC(); + gcQueue.collect(ref); sleep(50); // allow some time to the background thread for cleaning up resources - assertTrue(deleted.get()); + assertTrue(ref.isDeleted()); + assertTrue(gcQueue.isEmpty()); } } @Test public void clearedResourcesAreNotCleanedUp() { - AtomicBoolean deleted = new AtomicBoolean(); - + TestReference ref; try (EagerSession s = EagerSession.create()) { - TestReference ref = new TestReference(s, new Object(), deleted); + ref = new TestReference(s, new Object()); ref.clear(); } - assertFalse(deleted.get()); + assertFalse(ref.isDeleted()); } @Test @@ -127,7 +125,7 @@ public class EagerSessionTest { EagerSession s = EagerSession.create(); s.close(); try { - new TestReference(s, new Object(), new AtomicBoolean()); + new TestReference(s, new Object()); fail(); } catch (IllegalStateException e) { // ok @@ -158,9 +156,8 @@ public class EagerSessionTest { private static class TestReference extends EagerSession.NativeReference { - TestReference(EagerSession session, Object referent, AtomicBoolean deleted) { + TestReference(EagerSession session, Object referent) { super(session, referent); - this.deleted = deleted; } @Override @@ -170,7 +167,40 @@ public class EagerSessionTest { } } - private final AtomicBoolean deleted; + boolean isDeleted() { + return deleted.get(); + } + + private final AtomicBoolean deleted = new AtomicBoolean(); + } + + private static class TestGarbageCollectorQueue extends ReferenceQueue { + + @Override + public Reference poll() { + return garbage.poll(); + } + + @Override + public Reference remove() throws InterruptedException { + return garbage.take(); + } + + @Override + public Reference remove(long timeout) + throws IllegalArgumentException, InterruptedException { + return garbage.poll(timeout, TimeUnit.MILLISECONDS); + } + + void collect(TestReference ref) { + garbage.add(ref); + } + + boolean isEmpty() { + return garbage.isEmpty(); + } + + private final BlockingQueue garbage = new LinkedBlockingQueue<>(); } private static void buildOp(EagerSession s) { @@ -182,14 +212,6 @@ public class EagerSessionTest { } } - private static void runGC() { - // Warning: There is no way to force the garbage collector to run, so here we simply to our best - // to get it triggered but it might be sufficient on some platforms. Adjust accordingly if some - // cleanup tests start to fail. - System.gc(); - System.runFinalization(); - } - private static void sleep(int millis) { try { Thread.sleep(millis); diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl index 9bdf0547d45..3c9337121f6 100644 --- a/tensorflow/lite/build_def.bzl +++ b/tensorflow/lite/build_def.bzl @@ -235,6 +235,7 @@ def generated_test_models(): "arg_min_max", "avg_pool", "batch_to_space_nd", + "cast", "ceil", "concat", "constant", diff --git a/tensorflow/lite/delegates/gpu/api.cc b/tensorflow/lite/delegates/gpu/api.cc index fb11cbc8ecf..ac649658c34 100644 --- a/tensorflow/lite/delegates/gpu/api.cc +++ b/tensorflow/lite/delegates/gpu/api.cc @@ -62,11 +62,9 @@ ObjectType GetType(const TensorObject& object) { return absl::visit(ObjectTypeGetter{}, object); } -bool IsValid(const TensorObject& object) { - return absl::visit(ObjectValidityChecker{DataType::UNKNOWN}, object); -} +bool IsValid(const TensorObjectDef& def) { return IsValid(def.object_def); } -bool IsCompatible(const TensorObjectDef& def, const TensorObject& object) { +bool IsValid(const TensorObjectDef& def, const TensorObject& object) { return GetType(object) == def.object_def.object_type && absl::visit(ObjectValidityChecker{def.object_def.data_type}, object); } diff --git a/tensorflow/lite/delegates/gpu/api.h b/tensorflow/lite/delegates/gpu/api.h index ca0707b6935..00e80956c43 100644 --- a/tensorflow/lite/delegates/gpu/api.h +++ b/tensorflow/lite/delegates/gpu/api.h @@ -168,17 +168,17 @@ struct TensorObjectDef { } }; +// @return true if tensor object def is defined. +bool IsValid(const TensorObjectDef& def); + using TensorObject = absl::variant; // @return true if object is set and corresponding values are defined. -bool IsValid(const TensorObject& object); +bool IsValid(const TensorObjectDef& def, const TensorObject& object); ObjectType GetType(const TensorObject& object); -// @return true if object representation corresponds to the given definition. -bool IsCompatible(const TensorObjectDef& def, const TensorObject& object); - // @return true if corresponding object is set for the given type bool IsObjectPresent(ObjectType type, const TensorObject& obj); diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 253f792b0fd..986cbe5d5b7 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -816,6 +816,24 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser { } }; +class HardSwishOperationParser : public TFLiteOperationParser { + public: + Status IsSupported(const TfLiteContext* context, + const TfLiteNode* tflite_node, + const TfLiteRegistration*) final { + return CheckInputsOutputs(context, tflite_node, /*inputs=*/1, + /*outputs=*/1); + } + + Status Parse(const TfLiteNode*, const TfLiteRegistration*, + GraphFloat32* graph, ObjectReader* reader) final { + Node* node = graph->NewNode(); + node->operation.type = ToString(OperationType::HARD_SWISH); + RETURN_IF_ERROR(reader->AddInput(node, 0)); + return reader->AddOutputs(node); + } +}; + class ReshapeOperationParser : public TFLiteOperationParser { public: Status IsSupported(const TfLiteContext* context, @@ -1473,9 +1491,9 @@ Status ExtractTensorShape(const TfLiteTensor& tflite_tensor, BHWC* bhwc) { *bhwc = BHWC(dims->data[0], dims->data[1], dims->data[2], dims->data[3]); return OkStatus(); default: - return InvalidArgumentError( - absl::StrCat("Tensor \"", tflite_tensor.name ?: "nullptr", - "\" has bad input dims size: ", dims->size, ".")); + return InvalidArgumentError(absl::StrCat( + "Tensor \"", tflite_tensor.name ? tflite_tensor.name : "nullptr", + "\" has bad input dims size: ", dims->size, ".")); } } @@ -2003,6 +2021,8 @@ std::unique_ptr NewOperationParser( return make_unique(OperationType::DIV); case kTfLiteBuiltinFullyConnected: return make_unique(); + case kTfLiteBuiltinHardSwish: + return make_unique(); case kTfLiteBuiltinLogistic: return make_unique(OperationType::SIGMOID); case kTfLiteBuiltinLog: diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index f7f9d1b7351..eb1f01804df 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -46,50 +46,58 @@ Padding2D& Padding2D::operator-(const Padding2D& value) { std::string ToString(enum OperationType op) { switch (op) { - case OperationType::UNKNOWN: - break; case OperationType::ABS: return "abs"; case OperationType::ADD: return "add"; case OperationType::APPLY_MASK: return "apply_mask"; - case OperationType::BATCH_TO_SPACE: - return "batch_to_space"; - case OperationType::POOLING_2D: - return "pooling_2d"; - case OperationType::MAX_UNPOOLING_2D: - return "max_unpooling"; case OperationType::BATCH_NORMALIZATION: return "batch_normalization"; + case OperationType::BATCH_TO_SPACE: + return "batch_to_space"; case OperationType::CONCAT: return "concat"; case OperationType::CONST: return "const"; case OperationType::CONVOLUTION_2D: return "convolution_2d"; + case OperationType::CONVOLUTION_TRANSPOSED: + return "convolution_transposed"; case OperationType::COS: return "cos"; case OperationType::DEPTHWISE_CONVOLUTION: return "depthwise_convolution"; case OperationType::DIV: return "div"; + case OperationType::FULLY_CONNECTED: + return "fully_connected"; + case OperationType::HARD_SWISH: + return "hard_swish"; case OperationType::LOG: return "log"; + case OperationType::LSTM: + return "lstm"; + case OperationType::MAX_UNPOOLING_2D: + return "max_unpooling"; case OperationType::MUL: return "mul"; + case OperationType::MULTIPLY_SCALAR: + return "multiply_scalar"; case OperationType::PAD: return "pad"; + case OperationType::POOLING_2D: + return "pooling_2d"; case OperationType::POW: return "pow"; case OperationType::PRELU: return "prelu"; case OperationType::RELU: return "relu"; - case OperationType::RESIZE: - return "resize"; case OperationType::RESHAPE: return "reshape"; + case OperationType::RESIZE: + return "resize"; case OperationType::RSQRT: return "rsqrt"; case OperationType::SIGMOID: @@ -110,18 +118,12 @@ std::string ToString(enum OperationType op) { return "squared_diff"; case OperationType::SUB: return "subtract"; - case OperationType::UPSAMPLE_2D: - return "upsample_2d"; - case OperationType::CONVOLUTION_TRANSPOSED: - return "convolution_transposed"; - case OperationType::MULTIPLY_SCALAR: - return "multiply_scalar"; - case OperationType::FULLY_CONNECTED: - return "fully_connected"; case OperationType::TANH: return "tanh"; - case OperationType::LSTM: - return "lstm"; + case OperationType::UPSAMPLE_2D: + return "upsample_2d"; + default: + break; } return "unknown_operation"; } @@ -140,6 +142,7 @@ OperationType OperationTypeFromString(const std::string& name) { {"cos", OperationType::COS}, {"depthwise_convolution", OperationType::DEPTHWISE_CONVOLUTION}, {"fully_connected", OperationType::FULLY_CONNECTED}, + {"hard_swish", OperationType::HARD_SWISH}, {"log", OperationType::LOG}, {"lstm", OperationType::LSTM}, {"max_unpooling", OperationType::MAX_UNPOOLING_2D}, diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index ef825376b31..5e564f6763c 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -46,14 +46,15 @@ enum class OperationType { DEPTHWISE_CONVOLUTION, DIV, FULLY_CONNECTED, + HARD_SWISH, LOG, LSTM, MAX_UNPOOLING_2D, MUL, MULTIPLY_SCALAR, + PAD, POOLING_2D, POW, - PAD, PRELU, RELU, RESHAPE, diff --git a/tensorflow/lite/delegates/gpu/gl/BUILD b/tensorflow/lite/delegates/gpu/gl/BUILD index 87e1f014523..b3385ea1fa6 100644 --- a/tensorflow/lite/delegates/gpu/gl/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/BUILD @@ -12,6 +12,7 @@ cc_library( hdrs = ["api.h"], deps = [ ":command_queue", + ":common_cc_fbs", ":compiler", ":compiler_options", ":gl_call", @@ -23,6 +24,7 @@ cc_library( ":runtime", ":runtime_options", ":stats", + ":variable", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "//tensorflow/lite/delegates/gpu/common:model", @@ -221,7 +223,7 @@ cc_library( ":gl_errors", ":gl_shader", ":portable", - ":uniform_parameter", + ":variable", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "@com_google_absl//absl/types:variant", @@ -298,7 +300,7 @@ cc_library( ":compiler_options", ":gpu_info", ":object", - ":uniform_parameter", + ":variable", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", @@ -360,13 +362,12 @@ cc_library( ":portable", ":runtime_options", ":stats", - ":uniform_parameter", + ":variable", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl/runtime:shared_buffer", "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:variant", ], ) @@ -383,7 +384,7 @@ cc_library( ":common_cc_fbs", ":compiled_model_cc_fbs", ":object", - ":uniform_parameter", + ":variable", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", @@ -405,7 +406,7 @@ cc_test( deps = [ ":object", ":serialization", - ":uniform_parameter", + ":variable", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", @@ -423,8 +424,8 @@ cc_library( ) cc_library( - name = "uniform_parameter", - hdrs = ["uniform_parameter.h"], + name = "variable", + hdrs = ["variable.h"], deps = [ "//tensorflow/lite/delegates/gpu/common:types", "@com_google_absl//absl/types:variant", diff --git a/tensorflow/lite/delegates/gpu/gl/api.cc b/tensorflow/lite/delegates/gpu/gl/api.cc index 60c29abb2e5..2767bc399c6 100644 --- a/tensorflow/lite/delegates/gpu/gl/api.cc +++ b/tensorflow/lite/delegates/gpu/gl/api.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/object.h" #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h" #include "tensorflow/lite/delegates/gpu/gl/runtime.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" #ifndef TFLITE_GPU_BINARY_RELEASE #include "tensorflow/lite/delegates/gpu/gl/serialization.h" @@ -168,7 +169,7 @@ class InferenceContextWithBatchImpl : public InferenceContext { struct ProgramParameters { // A list of uniform parameters to be set. - std::vector parameters; + std::vector parameters; // A list of objects to bind to opengl program. std::vector objects; @@ -277,7 +278,7 @@ class CompiledModelImpl #ifndef TFLITE_GPU_BINARY_RELEASE // Called on deserialization - Status OnProgram(const std::vector& parameters, + Status OnProgram(const std::vector& parameters, const std::vector& objects, const uint3& workgroup_size, const uint3& num_workgroups, size_t partial_shader_index) final { diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/BUILD b/tensorflow/lite/delegates/gpu/gl/compiler/BUILD index 551f128556f..6ff34577844 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/compiler/BUILD @@ -35,7 +35,7 @@ cc_library( deps = [ ":preprocessor", "//tensorflow/lite/delegates/gpu/common:types", - "//tensorflow/lite/delegates/gpu/gl:uniform_parameter", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:variant", @@ -82,6 +82,7 @@ cc_test( ":object_accessor", ":parameter_accessor", "//tensorflow/lite/delegates/gpu/common:types", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/types:variant", "@com_google_googletest//:gtest_main", ], @@ -94,7 +95,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:object", - "//tensorflow/lite/delegates/gpu/gl:uniform_parameter", + "//tensorflow/lite/delegates/gpu/gl:variable", ], ) @@ -113,6 +114,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl:compiler_options", "//tensorflow/lite/delegates/gpu/gl:gpu_info", "//tensorflow/lite/delegates/gpu/gl:object", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/strings", ], ) @@ -175,7 +177,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/gl:node_shader", "//tensorflow/lite/delegates/gpu/gl:object", - "//tensorflow/lite/delegates/gpu/gl:uniform_parameter", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.cc b/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.cc index bd11be6be62..fff15b455c1 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.cc @@ -360,9 +360,37 @@ struct TextureSamplerTypeGetter { return (*this)(uint2()); } - std::string operator()(const uint2&) const { return "sampler2D"; } + std::string operator()(const uint2&) const { + switch (type) { + case DataType::FLOAT16: + case DataType::FLOAT32: + return "sampler2D"; + case DataType::INT32: + case DataType::INT16: + return "isampler2D"; + case DataType::UINT32: + case DataType::UINT16: + return "usampler2D"; + default: + return "unknown_sampler2D"; + } + } - std::string operator()(const uint3&) const { return "sampler2DArray"; } + std::string operator()(const uint3&) const { + switch (type) { + case DataType::FLOAT16: + case DataType::FLOAT32: + return "sampler2DArray"; + case DataType::INT32: + case DataType::INT16: + return "isampler2DArray"; + case DataType::UINT32: + case DataType::UINT16: + return "usampler2DArray"; + default: + return "unknown_sampler2DArray"; + } + } DataType type; }; diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor_test.cc b/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor_test.cc index 2ee6d9de461..0b04210a00d 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "absl/types/variant.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -34,11 +35,11 @@ struct ParameterComparator { const T* v = absl::get_if(&p.value); return v && t == *v; } - const UniformParameter& p; + const Variable& p; }; // partially equal -bool operator==(const UniformParameter& l, const UniformParameter& r) { +bool operator==(const Variable& l, const Variable& r) { return l.name == r.name && absl::visit(ParameterComparator{l}, r.value); } @@ -83,8 +84,8 @@ TEST(Preprocessor, ReadFromBufferByIndex) { EXPECT_EQ(accessor.Rewrite("obj[x,y + 5,z]", &result), RewriteStatus::SUCCESS); EXPECT_THAT(parameters.GetUniformParameters(), - testing::UnorderedElementsAre(UniformParameter{"obj_w", 1}, - UniformParameter{"obj_h", 2})); + testing::UnorderedElementsAre(Variable{"obj_w", 1}, + Variable{"obj_h", 2})); ASSERT_EQ(result, "obj.data[x + $obj_w$ * (y + 5 + $obj_h$ * (z))]"); } @@ -132,8 +133,8 @@ TEST(Preprocessor, WriteToBufferByIndex) { EXPECT_EQ(accessor.Rewrite(" obj[i,j,k] =value", &result), RewriteStatus::SUCCESS); EXPECT_THAT(parameters.GetUniformParameters(), - testing::UnorderedElementsAre(UniformParameter{"obj_w", 1}, - UniformParameter{"obj_h", 2})); + testing::UnorderedElementsAre(Variable{"obj_w", 1}, + Variable{"obj_h", 2})); ASSERT_EQ(result, "obj.data[i + $obj_w$ * (j + $obj_h$ * (k))] = value"); } diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.cc b/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.cc index f3f442bc218..55d7152c0e4 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.cc @@ -68,7 +68,7 @@ struct UniformTypeGetter { }; // Returns GLSL uniform type of the given parameter. -std::string GetUniformType(const UniformParameter::ValueType& value) { +std::string GetUniformType(const Variable::ValueType& value) { return absl::visit(UniformTypeGetter(), value); } @@ -138,7 +138,7 @@ struct ConstGenerator { }; // Appends string representation of a parameter value. -void GetValue(const UniformParameter::ValueType& value, std::string* result) { +void GetValue(const Variable::ValueType& value, std::string* result) { absl::visit(ConstGenerator{result}, value); } @@ -155,11 +155,11 @@ struct UniformDeclarationGenerator { param.name, "[", v.size(), "];\n"); } - const UniformParameter& param; + const Variable& param; std::string* result; }; -void GenerateUniformDeclaration(const UniformParameter& parameter, +void GenerateUniformDeclaration(const Variable& parameter, std::string* result) { absl::visit(UniformDeclarationGenerator{parameter, result}, parameter.value); } @@ -176,7 +176,7 @@ struct VariableLengthGetter { }; // Returns true if value is a vector -bool IsVariableLength(const UniformParameter::ValueType& value) { +bool IsVariableLength(const Variable::ValueType& value) { return absl::visit(VariableLengthGetter(), value); } @@ -222,7 +222,7 @@ struct FieldAccessor { }; // Appends formatted value of the given field. -void GetValue(const UniformParameter::ValueType& value, Field field, +void GetValue(const Variable::ValueType& value, Field field, std::string* result) { absl::visit(FieldAccessor{field, result}, value); } @@ -262,7 +262,7 @@ struct FieldChecker { }; // Returns true if field has field access and field is not out of bounds. -bool HasField(const UniformParameter::ValueType& value, Field field) { +bool HasField(const Variable::ValueType& value, Field field) { return absl::visit(FieldChecker{field}, value); } @@ -322,7 +322,7 @@ RewriteStatus ParameterAccessor::Rewrite(absl::string_view input, return RewriteStatus::SUCCESS; } -bool ParameterAccessor::AddParameter(UniformParameter param) { +bool ParameterAccessor::AddParameter(Variable param) { std::string name = param.name; return name_to_param_.insert({name, std::move(param)}).second; } @@ -353,8 +353,8 @@ std::string ParameterAccessor::GetUniformDeclarations() const { return declarations; } -std::vector ParameterAccessor::GetUniformParameters() const { - std::vector params; +std::vector ParameterAccessor::GetUniformParameters() const { + std::vector params; if (!inline_values_) { for (auto& param : name_to_param_) { params.push_back(param.second); diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.h b/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.h index e6efed0124f..3dacc34d21f 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.h +++ b/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.h @@ -21,7 +21,7 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/gl/compiler/preprocessor.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -51,7 +51,7 @@ class ParameterAccessor : public InlineRewrite { RewriteStatus Rewrite(absl::string_view input, std::string* output) final; // Return true if parameter was successfully added. - bool AddParameter(UniformParameter param); + bool AddParameter(Variable param); // Returns const parameters that need to be inlined in the a shader's code. std::string GetConstDeclarations() const; @@ -60,14 +60,14 @@ class ParameterAccessor : public InlineRewrite { std::string GetUniformDeclarations() const; // Returns a collection of uniform parameters. - std::vector GetUniformParameters() const; + std::vector GetUniformParameters() const; private: const bool inline_values_; // Unique parameter index used for obfuscation. uint32_t unique_param_index_ = 0; - std::unordered_map name_to_param_; + std::unordered_map name_to_param_; }; // Implementation details below. diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor_test.cc b/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor_test.cc index 96182751b9b..d8c634e8c85 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor_test.cc @@ -36,7 +36,7 @@ TEST(Preprocessor, CornerCases) { TEST(Preprocessor, Value) { ParameterAccessor accessor(true); - ASSERT_TRUE(accessor.AddParameter(UniformParameter{"var", int32_t(1)})); + ASSERT_TRUE(accessor.AddParameter({"var", int32_t(1)})); std::string result; EXPECT_EQ(accessor.Rewrite("var", &result), RewriteStatus::SUCCESS); ASSERT_EQ(result, "1"); @@ -44,7 +44,7 @@ TEST(Preprocessor, Value) { TEST(Preprocessor, ValueVec) { ParameterAccessor accessor(true); - ASSERT_TRUE(accessor.AddParameter(UniformParameter{"var", int2(1, 2)})); + ASSERT_TRUE(accessor.AddParameter({"var", int2(1, 2)})); std::string result; EXPECT_EQ(accessor.Rewrite("var", &result), RewriteStatus::SUCCESS); ASSERT_EQ(result, "ivec2(1,2)"); @@ -52,8 +52,7 @@ TEST(Preprocessor, ValueVec) { TEST(Preprocessor, Field) { ParameterAccessor accessor(true); - ASSERT_TRUE( - accessor.AddParameter(UniformParameter{"var", float2(1.0, 2.1234567)})); + ASSERT_TRUE(accessor.AddParameter({"var", float2(1.0, 2.1234567)})); std::string result; EXPECT_EQ(accessor.Rewrite("var.y", &result), RewriteStatus::SUCCESS); ASSERT_EQ(result, "2.123456717f"); @@ -61,8 +60,8 @@ TEST(Preprocessor, Field) { TEST(Preprocessor, FieldFail) { ParameterAccessor accessor(true); - ASSERT_TRUE(accessor.AddParameter(UniformParameter{"var", 1.0f})); - ASSERT_TRUE(accessor.AddParameter(UniformParameter{"vec", float2(1.0, 1.0)})); + ASSERT_TRUE(accessor.AddParameter({"var", 1.0f})); + ASSERT_TRUE(accessor.AddParameter({"vec", float2(1.0, 1.0)})); std::string result; EXPECT_EQ(accessor.Rewrite("var.y", &result), RewriteStatus::ERROR); ASSERT_EQ(result, "INVALID_ACCESS_BY_FIELD"); @@ -76,7 +75,7 @@ TEST(Preprocessor, Variable) { ParameterAccessor accessor(true); std::vector v; v.push_back(int2(1, 2)); - ASSERT_TRUE(accessor.AddParameter(UniformParameter{"var", v})); + ASSERT_TRUE(accessor.AddParameter({"var", v})); std::string result; EXPECT_EQ(accessor.Rewrite("var[i].y", &result), RewriteStatus::SUCCESS); ASSERT_EQ(result, "var[i].y"); @@ -86,7 +85,7 @@ TEST(Preprocessor, Variable) { TEST(Preprocessor, InlineVariableFail) { ParameterAccessor accessor(true); - ASSERT_TRUE(accessor.AddParameter(UniformParameter{"var", 1})); + ASSERT_TRUE(accessor.AddParameter({"var", 1})); std::string result; EXPECT_EQ(accessor.Rewrite("var[i]", &result), RewriteStatus::ERROR); ASSERT_EQ(result, "INVALID_ACCESS_BY_INDEX"); diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc b/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc index 1c81ebff6b2..e8d1d786b0e 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc @@ -28,7 +28,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/compiler/parameter_accessor.h" #include "tensorflow/lite/delegates/gpu/gl/compiler/preprocessor.h" #include "tensorflow/lite/delegates/gpu/gl/object.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -66,15 +66,15 @@ class ParameterRewriter : public InlineRewrite { } // Return true if parameter was successfully added. - bool AddParameter(UniformParameter param) { + bool AddParameter(Variable param) { std::string old_name = param.name; param.name = name_func_(old_name); return name_to_param_.insert({old_name, std::move(param)}).second; } // Returns a collection of uniform parameters with updated names. - std::vector GetUniformParameters() const { - std::vector params; + std::vector GetUniformParameters() const { + std::vector params; params.reserve(name_to_param_.size()); for (auto& param : name_to_param_) { params.push_back(param.second); @@ -86,7 +86,7 @@ class ParameterRewriter : public InlineRewrite { const std::string inline_delimiter_; const NameFunctor name_func_; - std::unordered_map name_to_param_; + std::unordered_map name_to_param_; }; // Rewrites names of all objects according to returned values from the diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/shader_code.h b/tensorflow/lite/delegates/gpu/gl/compiler/shader_code.h index 8d6d52b002a..3f3db3796d1 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/shader_code.h +++ b/tensorflow/lite/delegates/gpu/gl/compiler/shader_code.h @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/gl/object.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -30,7 +30,7 @@ namespace gl { struct ShaderCode { ShaderCode() = default; - ShaderCode(const std::vector& in_parameters, + ShaderCode(const std::vector& in_parameters, const std::vector& in_objects, const uint3& in_workload, const uint3& in_recommended_workgroup, const std::string& in_source_code, @@ -43,7 +43,7 @@ struct ShaderCode { node_indices(in_node_indices) {} // A list of uniform parameters to be set. - std::vector parameters; + std::vector parameters; // A list of objects to bind to opengl program. std::vector objects; diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc b/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc index 0b84b8413a8..30da5472565 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/gl/compiler/preprocessor.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -42,8 +43,8 @@ Status ShaderCodegen::Build(CompiledNodeAttributes attr, return OkStatus(); }; - auto add_parameter = [&](UniformParameter&& param) { - if (!parameters.AddParameter(std::forward(param))) { + auto add_parameter = [&](Variable&& param) { + if (!parameters.AddParameter(std::forward(param))) { return InternalError("There is a parameter with the same name"); } return OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/gl/converters/BUILD b/tensorflow/lite/delegates/gpu/gl/converters/BUILD index 1d40b0e0f86..06c78dcab0b 100644 --- a/tensorflow/lite/delegates/gpu/gl/converters/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/converters/BUILD @@ -30,7 +30,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl:gl_buffer", "//tensorflow/lite/delegates/gpu/gl:gl_program", "//tensorflow/lite/delegates/gpu/gl:gl_shader", - "//tensorflow/lite/delegates/gpu/gl:uniform_parameter", + "//tensorflow/lite/delegates/gpu/gl:variable", ], ) @@ -75,7 +75,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl:gl_buffer", "//tensorflow/lite/delegates/gpu/gl:gl_program", "//tensorflow/lite/delegates/gpu/gl:gl_shader", - "//tensorflow/lite/delegates/gpu/gl:uniform_parameter", + "//tensorflow/lite/delegates/gpu/gl:variable", ], ) diff --git a/tensorflow/lite/delegates/gpu/gl/converters/bhwc_to_phwc4.cc b/tensorflow/lite/delegates/gpu/gl/converters/bhwc_to_phwc4.cc index d48d9544025..8b42dedc332 100644 --- a/tensorflow/lite/delegates/gpu/gl/converters/bhwc_to_phwc4.cc +++ b/tensorflow/lite/delegates/gpu/gl/converters/bhwc_to_phwc4.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/converters/util.h" #include "tensorflow/lite/delegates/gpu/gl/gl_program.h" #include "tensorflow/lite/delegates/gpu/gl/gl_shader.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -89,10 +89,10 @@ Status ConverterBhwcToPhwc4::Convert(const BHWC& shape, const GlBuffer& source, uint3 workload = uint3(shape.w, shape.h, shape.c); uint3 num_workgroups = IntegralDivideRoundUp(workload, workgroup_size_); - RETURN_IF_ERROR(program_.SetParameter(UniformParameter{ - "sizes_", - int4(static_cast(workload.x), static_cast(workload.y), - static_cast(workload.z), static_cast(shape.c))})); + RETURN_IF_ERROR(program_.SetParameter( + {"sizes_", + int4(static_cast(workload.x), static_cast(workload.y), + static_cast(workload.z), static_cast(shape.c))})); RETURN_IF_ERROR(source.BindToIndex(0)); RETURN_IF_ERROR(destination->BindToIndex(1)); if (command_queue) { diff --git a/tensorflow/lite/delegates/gpu/gl/converters/phwc4_to_bhwc.cc b/tensorflow/lite/delegates/gpu/gl/converters/phwc4_to_bhwc.cc index 65f19d4513d..c63fee9f8bd 100644 --- a/tensorflow/lite/delegates/gpu/gl/converters/phwc4_to_bhwc.cc +++ b/tensorflow/lite/delegates/gpu/gl/converters/phwc4_to_bhwc.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/converters/util.h" #include "tensorflow/lite/delegates/gpu/gl/gl_program.h" #include "tensorflow/lite/delegates/gpu/gl/gl_shader.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -85,10 +85,10 @@ Status ConverterPhwc4ToBhwc::Convert(const BHWC& shape, const GlBuffer& source, // TODO(akulik): simply pass workload as soon as UniformParameter // supports uint3 - RETURN_IF_ERROR(program_.SetParameter(UniformParameter{ - "sizes_", - int4(static_cast(workload.x), static_cast(workload.y), - static_cast(workload.z), 0)})); + RETURN_IF_ERROR(program_.SetParameter( + {"sizes_", + int4(static_cast(workload.x), static_cast(workload.y), + static_cast(workload.z), 0)})); RETURN_IF_ERROR(source.BindToIndex(0)); RETURN_IF_ERROR(destination->BindToIndex(1)); if (command_queue) { diff --git a/tensorflow/lite/delegates/gpu/gl/egl_context.cc b/tensorflow/lite/delegates/gpu/gl/egl_context.cc index 6ceaf36a928..146a1921e11 100644 --- a/tensorflow/lite/delegates/gpu/gl/egl_context.cc +++ b/tensorflow/lite/delegates/gpu/gl/egl_context.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/egl_context.h" +#include + #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/gl/gl_call.h" #include "tensorflow/lite/delegates/gpu/gl/gl_errors.h" @@ -54,7 +56,7 @@ Status CreateContext(EGLDisplay display, EGLContext shared_context, } bool HasExtension(EGLDisplay display, const char* name) { - return strstr(eglQueryString(display, EGL_EXTENSIONS), name); + return std::strstr(eglQueryString(display, EGL_EXTENSIONS), name); } } // namespace diff --git a/tensorflow/lite/delegates/gpu/gl/gl_program.cc b/tensorflow/lite/delegates/gpu/gl/gl_program.cc index 9b0cf3c07db..8e631288181 100644 --- a/tensorflow/lite/delegates/gpu/gl/gl_program.cc +++ b/tensorflow/lite/delegates/gpu/gl/gl_program.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/gl/gl_call.h" #include "tensorflow/lite/delegates/gpu/gl/gl_errors.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -180,7 +181,7 @@ GlProgram& GlProgram::operator=(GlProgram&& program) { GlProgram::~GlProgram() { Invalidate(); } -Status GlProgram::SetParameter(const UniformParameter& param) { +Status GlProgram::SetParameter(const Variable& param) { GLint uniform_location; RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glGetUniformLocation, &uniform_location, id_, param.name.c_str())); diff --git a/tensorflow/lite/delegates/gpu/gl/gl_program.h b/tensorflow/lite/delegates/gpu/gl/gl_program.h index ff176344d19..dfd6bde4c59 100644 --- a/tensorflow/lite/delegates/gpu/gl/gl_program.h +++ b/tensorflow/lite/delegates/gpu/gl/gl_program.h @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/gl/gl_shader.h" #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -61,7 +61,7 @@ class GlProgram { // into this program. Status GetBinary(BinaryShader* binary_shader); - Status SetParameter(const UniformParameter& param); + Status SetParameter(const Variable& param); // Executes program Status Dispatch(const uint3& workgroups) const; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD index 97b96129a15..50d204c5348 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD @@ -49,6 +49,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -85,6 +86,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "//tensorflow/lite/delegates/gpu/gl/workgroups:ideal_workgroup_picker", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -123,6 +125,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "//tensorflow/lite/delegates/gpu/gl/workgroups:ideal_workgroup_picker", "@com_google_absl//absl/memory", ], @@ -191,6 +194,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -257,6 +261,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -324,6 +329,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -354,10 +360,10 @@ cc_library( hdrs = ["pooling.h"], deps = [ "//tensorflow/lite/delegates/gpu/common:operations", - "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -427,6 +433,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -493,6 +500,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -528,6 +536,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -588,6 +597,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) @@ -621,6 +631,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", + "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc b/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc index d3aeabcdfc3..c6cdb078a6d 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc @@ -24,6 +24,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -349,7 +350,7 @@ class FlatConcatByHeight : public NodeShader { GeneratedCode* generated_code) const final { auto inputs = ctx.graph->FindInputs(ctx.node->id); std::string code; - std::vector params; + std::vector params; for (int i = 0, shift = 0; i < inputs.size(); shift += inputs[i]->tensor.shape.h, i++) { code += "if ("; @@ -415,7 +416,7 @@ class FlatConcatByWidth : public NodeShader { GeneratedCode* generated_code) const final { auto inputs = ctx.graph->FindInputs(ctx.node->id); std::string code; - std::vector params; + std::vector params; for (int i = 0, shift = 0; i < inputs.size(); shift += inputs[i]->tensor.shape.w, i++) { code += "if ("; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc index 2c19fcc24e4..0314b959e64 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/gl/node_shader.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" #include "tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h" namespace tflite { @@ -50,7 +51,7 @@ class Convolution : public NodeShader { h * attr.dilations.h - attr.padding.prepended.h); } } - std::vector parameters = { + std::vector parameters = { {"input_data_0_h", input->tensor.shape.h}, {"input_data_0_w", input->tensor.shape.w}, {"offsets_count", offsets_count}, @@ -158,7 +159,7 @@ class Convolution1x1 : public NodeShader { int multiplier = SelectMultiplier(input->tensor.shape.w, ctx); - std::vector parameters = { + std::vector parameters = { {"src_depth", IntegralDivideRoundUp(input->tensor.shape.c, 4)}, }; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc index ac381e1ca08..c82723954b9 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/gl/node_shader.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" #include "tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h" namespace tflite { @@ -49,7 +50,7 @@ class DepthwiseConvolution : public NodeShader { h * attr.dilations.h - attr.padding.prepended.h); } } - std::vector parameters = { + std::vector parameters = { {"input_data_0_h", input->tensor.shape.h}, {"input_data_0_w", input->tensor.shape.w}, {"offsets_count", offsets_count}, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc index 37ee322ac8a..8ad2679e62e 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc @@ -34,60 +34,56 @@ class ElementwiseOneArgument : public NodeShader { GeneratedCode* generated_code) const final { std::string source; switch (operation_type_) { - case OperationType::ABS: { + case OperationType::ABS: source = "value_0 = abs(value_0);"; break; - } - case OperationType::SIN: { - source = "value_0 = sin(value_0);"; - break; - } - case OperationType::COS: { + case OperationType::COS: source = "value_0 = cos(value_0);"; break; - } - case OperationType::LOG: { + case OperationType::HARD_SWISH: + source = + "value_0 *= clamp(value_0 / 6.0 + vec4(0.5), vec4(0.0), " + "vec4(1.0));"; + break; + case OperationType::LOG: source = R"( - const float nan = normalize(vec4(0,0,0,0)).x; - value_0.x = value_0.x > 0.0 ? log(value_0.x) : nan; - value_0.y = value_0.y > 0.0 ? log(value_0.y) : nan; - value_0.z = value_0.z > 0.0 ? log(value_0.z) : nan; - value_0.w = value_0.w > 0.0 ? log(value_0.w) : nan; - )"; + const float nan = normalize(vec4(0, 0, 0, 0)).x; + value_0.x = value_0.x > 0.0 ? log(value_0.x) : nan; + value_0.y = value_0.y > 0.0 ? log(value_0.y) : nan; + value_0.z = value_0.z > 0.0 ? log(value_0.z) : nan; + value_0.w = value_0.w > 0.0 ? log(value_0.w) : nan; + )"; break; - } - case OperationType::SQRT: { + case OperationType::RSQRT: source = R"( - const float nan = normalize(vec4(0,0,0,0)).x; - value_0.x = value_0.x >= 0.0 ? sqrt(value_0.x) : nan; - value_0.y = value_0.y >= 0.0 ? sqrt(value_0.y) : nan; - value_0.z = value_0.z >= 0.0 ? sqrt(value_0.z) : nan; - value_0.w = value_0.w >= 0.0 ? sqrt(value_0.w) : nan; - )"; + const float nan = normalize(vec4(0, 0, 0, 0)).x; + value_0.x = value_0.x >= 0.0 ? 1.0 / sqrt(value_0.x) : nan; + value_0.y = value_0.y >= 0.0 ? 1.0 / sqrt(value_0.y) : nan; + value_0.z = value_0.z >= 0.0 ? 1.0 / sqrt(value_0.z) : nan; + value_0.w = value_0.w >= 0.0 ? 1.0 / sqrt(value_0.w) : nan; + )"; break; - } - case OperationType::RSQRT: { - source = R"( - const float nan = normalize(vec4(0,0,0,0)).x; - value_0.x = value_0.x >= 0.0 ? 1.0 / sqrt(value_0.x) : nan; - value_0.y = value_0.y >= 0.0 ? 1.0 / sqrt(value_0.y) : nan; - value_0.z = value_0.z >= 0.0 ? 1.0 / sqrt(value_0.z) : nan; - value_0.w = value_0.w >= 0.0 ? 1.0 / sqrt(value_0.w) : nan; - )"; - break; - } - case OperationType::SQUARE: { - source = "value_0 = value_0 * value_0;"; - break; - } - case OperationType::SIGMOID: { + case OperationType::SIGMOID: source = "value_0 = 1.0 / (1.0 + exp(-1.0 * value_0));"; break; - } - case OperationType::TANH: { + case OperationType::SIN: + source = "value_0 = sin(value_0);"; + break; + case OperationType::SQRT: + source = R"( + const float nan = normalize(vec4(0, 0, 0, 0)).x; + value_0.x = value_0.x >= 0.0 ? sqrt(value_0.x) : nan; + value_0.y = value_0.y >= 0.0 ? sqrt(value_0.y) : nan; + value_0.z = value_0.z >= 0.0 ? sqrt(value_0.z) : nan; + value_0.w = value_0.w >= 0.0 ? sqrt(value_0.w) : nan; + )"; + break; + case OperationType::SQUARE: + source = "value_0 = value_0 * value_0;"; + break; + case OperationType::TANH: source = "value_0 = tanh(value_0);"; break; - } default: return InvalidArgumentError("Incorrect elementwise operation type."); } @@ -183,19 +179,20 @@ std::unique_ptr NewElementwiseNodeShader( OperationType operation_type) { switch (operation_type) { case OperationType::ABS: - case OperationType::SIN: case OperationType::COS: case OperationType::LOG: - case OperationType::SQRT: + case OperationType::HARD_SWISH: case OperationType::RSQRT: - case OperationType::SQUARE: case OperationType::SIGMOID: + case OperationType::SIN: + case OperationType::SQRT: + case OperationType::SQUARE: case OperationType::TANH: return absl::make_unique(operation_type); - case OperationType::SUB: case OperationType::DIV: case OperationType::POW: case OperationType::SQUARED_DIFF: + case OperationType::SUB: return absl::make_unique(operation_type); default: return nullptr; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc index a0d088dbe48..6743664f7e2 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc @@ -28,139 +28,45 @@ namespace gpu { namespace gl { namespace { -class ElementwiseOneArgumentTest : public ::testing::Test { - public: - ElementwiseOneArgumentTest() = default; - ~ElementwiseOneArgumentTest() override = default; +TensorRef GetTensorRef(int ref, const BHWC& shape) { + TensorRef tensor_ref; + tensor_ref.type = DataType::FLOAT32; + tensor_ref.ref = ref; + tensor_ref.shape = shape; + return tensor_ref; +} - TensorRef GetTensorRef(int ref) { - TensorRef tensor_ref; - tensor_ref.type = DataType::FLOAT32; - tensor_ref.ref = ref; - tensor_ref.shape = BHWC(1, 2, 2, 1); - return tensor_ref; - } -}; - -TEST_F(ElementwiseOneArgumentTest, Abs) { +TEST(ElementwiseTest, Abs) { OperationType op_type = OperationType::ABS; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {0.0, 6.2, 2.0, 4.0})); } -TEST_F(ElementwiseOneArgumentTest, Sin) { - OperationType op_type = OperationType::SIN; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, 3.1415926, -3.1415926, 1.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, 0.0, 0.0, 0.841471})); -} - -TEST_F(ElementwiseOneArgumentTest, Cos) { +TEST(ElementwiseTest, Cos) { OperationType op_type = OperationType::COS; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, 3.1415926, -3.1415926, 1})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1.0, -1.0, -1.0, 0.540302})); } -TEST_F(ElementwiseOneArgumentTest, Log) { - OperationType op_type = OperationType::LOG; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {1.0, 3.1415926, 1.0, 1.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, 1.14473, 0.0, 0.0})); -} - -TEST_F(ElementwiseOneArgumentTest, Sqrt) { - OperationType op_type = OperationType::SQRT; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, 1.0, 2.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, 1.0, 1.414213, 2.0})); -} - -TEST_F(ElementwiseOneArgumentTest, Rsqrt) { - OperationType op_type = OperationType::RSQRT; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 4.0, 9.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {1.0, 0.707106, 0.5, 0.333333})); -} - -TEST_F(ElementwiseOneArgumentTest, Square) { - OperationType op_type = OperationType::SQUARE; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 0.5, -3.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {1.0, 4.0, 0.25, 9.0})); -} - -TEST_F(ElementwiseOneArgumentTest, Sigmoid) { - OperationType op_type = OperationType::SIGMOID; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.5, 0.002473, 0.880797, 0.982014})); -} - -TEST_F(ElementwiseOneArgumentTest, Tanh) { - OperationType op_type = OperationType::TANH; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, -0.999987, 0.964027, 0.999329})); -} - -class ElementwiseTwoArgumentsTest : public ::testing::Test { - public: - ElementwiseTwoArgumentsTest() = default; - ~ElementwiseTwoArgumentsTest() override = default; - - TensorRef GetTensorRef(int ref) { - TensorRef tensor_ref; - tensor_ref.type = DataType::FLOAT32; - tensor_ref.ref = ref; - tensor_ref.shape = BHWC(1, 2, 2, 1); - return tensor_ref; - } -}; - -TEST_F(ElementwiseTwoArgumentsTest, Sub) { - OperationType op_type = OperationType::SUB; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); - ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {-1.0, -8.2, -1.0, 0.0})); -} - -TEST_F(ElementwiseTwoArgumentsTest, Div) { +TEST(ElementwiseTest, Div) { OperationType op_type = OperationType::DIV; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, -0.5, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); @@ -168,10 +74,39 @@ TEST_F(ElementwiseTwoArgumentsTest, Div) { Pointwise(FloatNear(1e-6), {0.0, -3.1, -4.0, 1.0})); } -TEST_F(ElementwiseTwoArgumentsTest, Pow) { +TEST(ElementwiseTest, HardSwish) { + OperationType op_type = OperationType::HARD_SWISH; + const BHWC shape(1, 1, 1, 7); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE( + model.PopulateTensor(0, {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6f), + {0.0f, 0.0f, -0.375f, 0.0f, 1.125f, 3.f, 4.5f})); +} + +TEST(ElementwiseTest, Log) { + OperationType op_type = OperationType::LOG; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {1.0, 3.1415926, 1.0, 1.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, 1.14473, 0.0, 0.0})); +} + +TEST(ElementwiseTest, Pow) { OperationType op_type = OperationType::POW; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, 1.0, 2.0, 4.0})); ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); @@ -179,10 +114,73 @@ TEST_F(ElementwiseTwoArgumentsTest, Pow) { Pointwise(FloatNear(1e-6), {0.0, 1.0, 8.0, 256.0})); } -TEST_F(ElementwiseTwoArgumentsTest, SquaredDiff) { +TEST(ElementwiseTest, Rsqrt) { + OperationType op_type = OperationType::RSQRT; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 4.0, 9.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {1.0, 0.707106, 0.5, 0.333333})); +} + +TEST(ElementwiseTest, Sigmoid) { + OperationType op_type = OperationType::SIGMOID; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.5, 0.002473, 0.880797, 0.982014})); +} + +TEST(ElementwiseTest, Sin) { + OperationType op_type = OperationType::SIN; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, 3.1415926, -3.1415926, 1.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, 0.0, 0.0, 0.841471})); +} + +TEST(ElementwiseTest, Sqrt) { + OperationType op_type = OperationType::SQRT; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, 1.0, 2.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, 1.0, 1.414213, 2.0})); +} + +TEST(ElementwiseTest, Square) { + OperationType op_type = OperationType::SQUARE; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 0.5, -3.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {1.0, 4.0, 0.25, 9.0})); +} + +TEST(ElementwiseTest, SquaredDiff) { OperationType op_type = OperationType::SQUARED_DIFF; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, 2.0, 2.0, 4.0})); ASSERT_TRUE(model.PopulateTensor(1, {1.0, 1.0, 5.0, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); @@ -190,6 +188,32 @@ TEST_F(ElementwiseTwoArgumentsTest, SquaredDiff) { Pointwise(FloatNear(1e-6), {1.0, 1.0, 9.0, 0.0})); } +TEST(ElementwiseTest, Sub) { + OperationType op_type = OperationType::SUB; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); + ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {-1.0, -8.2, -1.0, 0.0})); +} + +TEST(ElementwiseTest, Tanh) { + OperationType op_type = OperationType::TANH; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, -0.999987, 0.964027, 0.999329})); +} + } // namespace } // namespace gl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc index 487db2b5d86..f6c7526b5eb 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/convert.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -39,7 +40,7 @@ class FullyConnectedBuffers : public NodeShader { ctx.node->operation.attributes); // TODO(akulik): check that input has h,w == 1,1 - std::vector parameters = { + std::vector parameters = { {"src_depth", IntegralDivideRoundUp(attr.weights.shape.i, 4)}, }; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc index 610679df2ca..fd9302cb00c 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc @@ -24,6 +24,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -36,7 +37,7 @@ class MaxUnpooling : public NodeShader { GeneratedCode* generated_code) const final { auto attr = absl::any_cast( ctx.node->operation.attributes); - std::vector parameters = { + std::vector parameters = { {"stride", int2(attr.strides.w, attr.strides.h)}, {"offset", int2(attr.padding.prepended.w, attr.padding.prepended.h)}, {"window_h", attr.kernel.h}, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc b/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc index 6d6662c9a54..a27835bbf36 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -46,7 +47,7 @@ class Pad : public NodeShader { attr.prepended.h < 0 || attr.prepended.w < 0 || attr.prepended.c < 0) { return UnimplementedError("Negative padding is not supported."); } - std::vector parameters = { + std::vector parameters = { {"input_data_0_h", input->tensor.shape.h}, {"input_data_0_w", input->tensor.shape.w}, {"prepended", diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc index 291c423fe0b..ace3e801c54 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc @@ -24,6 +24,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -40,7 +41,7 @@ Status GenerateMaxPoolingCode(const Pooling2DAttributes& attr, return InvalidArgumentError("Padding is bigger than kernel."); } - std::vector parameters = { + std::vector parameters = { {"input_data_0_h", input->tensor.shape.h}, {"input_data_0_w", input->tensor.shape.w}, {"stride", int2(attr.strides.w, attr.strides.h)}, @@ -100,7 +101,7 @@ Status GenerateAveragePoolingCode(const Pooling2DAttributes& attr, GeneratedCode* generated_code) { auto input = ctx.graph->FindInputs(ctx.node->id)[0]; - std::vector parameters = { + std::vector parameters = { {"input_data_0_h", input->tensor.shape.h}, {"input_data_0_w", input->tensor.shape.w}, {"stride", int2(attr.strides.w, attr.strides.h)}, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index 2201d0018dd..7c93ebd1caf 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -60,10 +60,10 @@ class Registry : public NodeShader { using Type = OperationType; using NewShaderFunc = std::function()>; - auto insert_op = [&](Type type, NewShaderFunc func) { + const auto insert_op = [&](Type type, NewShaderFunc func) { shaders_[ToString(type)].push_back(func()); }; - auto insert_elementwise_op = [&](Type operation_type) { + const auto insert_elementwise_op = [&](Type operation_type) { shaders_[ToString(operation_type)].push_back( NewElementwiseNodeShader(operation_type)); }; @@ -82,26 +82,27 @@ class Registry : public NodeShader { insert_op(Type::MULTIPLY_SCALAR, NewMultiplyScalarNodeShader); insert_op(Type::PAD, NewPadNodeShader); insert_op(Type::POOLING_2D, NewPoolingNodeShader); + insert_op(Type::PRELU, NewPReLUNodeShader); insert_op(Type::RELU, NewReLUNodeShader); insert_op(Type::RESHAPE, NewReshapeNodeShader); - insert_op(Type::PRELU, NewPReLUNodeShader); insert_op(Type::SLICE, NewSliceNodeShader); insert_op(Type::SOFT_MAX, NewSoftMaxNodeShader); insert_op(Type::UPSAMPLE_2D, NewUpsamplingNodeShader); insert_elementwise_op(Type::ABS); insert_elementwise_op(Type::COS); + insert_elementwise_op(Type::DIV); + insert_elementwise_op(Type::HARD_SWISH); insert_elementwise_op(Type::LOG); + insert_elementwise_op(Type::POW); insert_elementwise_op(Type::RSQRT); insert_elementwise_op(Type::SIGMOID); insert_elementwise_op(Type::SIN); insert_elementwise_op(Type::SQRT); insert_elementwise_op(Type::SQUARE); - insert_elementwise_op(Type::TANH); - insert_elementwise_op(Type::SUB); - insert_elementwise_op(Type::DIV); - insert_elementwise_op(Type::POW); insert_elementwise_op(Type::SQUARED_DIFF); + insert_elementwise_op(Type::SUB); + insert_elementwise_op(Type::TANH); #ifndef TFLITE_GPU_BINARY_RELEASE insert_op(Type::MAX_UNPOOLING_2D, NewMaxUnpoolingNodeShader); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc index c00b9f616f5..aa5c6e855bc 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc @@ -24,6 +24,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -36,7 +37,7 @@ class ReLU : public NodeShader { GeneratedCode* generated_code) const final { auto attr = absl::any_cast(ctx.node->operation.attributes); // clamp(value, min(0, alpha * value), clip) - std::vector params; + std::vector params; std::string min; if (attr.alpha == 0) { min = "vec4(0.0)"; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc b/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc index 66f9abb6b90..678aa7a00ee 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc @@ -24,6 +24,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -43,7 +44,7 @@ class Slice : public NodeShader { const int4 heights(attr.starts.h, attr.strides.h, attr.ends.h, 0); const int4 widths(attr.starts.w, attr.strides.w, attr.ends.w, 0); - std::vector parameters = { + std::vector parameters = { {"channels", channels}, {"heights", heights}, {"widths", widths}, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc index 000f2b00c5a..04c80937676 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -53,7 +54,7 @@ class SoftMax : public NodeShader { for (int i = 0; i < reminder; ++i) { mask[i] = 1.0f; } - std::vector parameters = { + std::vector parameters = { {"src_depth", IntegralDivideRoundUp(output->tensor.shape.c, 4)}, {"mask", mask}, }; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc index f3719c5751c..4682765421a 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/gl/node_shader.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -43,7 +44,7 @@ class ConvolutionTransposedBuffers : public NodeShader { const int32_t inner_size_w = (weights.w - 1) / attr.stride.w + 1; const int32_t inner_size_h = (weights.h - 1) / attr.stride.h + 1; - std::vector parameters = { + std::vector parameters = { {"input_data_0_h", input->tensor.shape.h}, {"input_data_0_w", input->tensor.shape.w}, {"src_depth", IntegralDivideRoundUp(weights.i, 4)}, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/upsampling_bilinear.cc b/tensorflow/lite/delegates/gpu/gl/kernels/upsampling_bilinear.cc index baca806b79a..a30e5ad8e17 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/upsampling_bilinear.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/upsampling_bilinear.cc @@ -24,6 +24,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -69,7 +70,7 @@ class UpsamplingBilinear : public NodeShader { }; return OkStatus(); } - std::vector parameters = { + std::vector parameters = { {"input_data_0_h", input->tensor.shape.h}, {"input_data_0_w", input->tensor.shape.w}, {"scale_factor", diff --git a/tensorflow/lite/delegates/gpu/gl/node_shader.h b/tensorflow/lite/delegates/gpu/gl/node_shader.h index 20491272e35..710d4b6d5e8 100644 --- a/tensorflow/lite/delegates/gpu/gl/node_shader.h +++ b/tensorflow/lite/delegates/gpu/gl/node_shader.h @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/compiler_options.h" #include "tensorflow/lite/delegates/gpu/gl/gpu_info.h" #include "tensorflow/lite/delegates/gpu/gl/object.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -58,7 +58,7 @@ enum class IOStructure { struct GeneratedCode { // A list of parameters to be set as uniform or hardcoded in a shader. - std::vector parameters; + std::vector parameters; // A list of objects to bind before shader could be executed. std::vector> objects; diff --git a/tensorflow/lite/delegates/gpu/gl/object.h b/tensorflow/lite/delegates/gpu/gl/object.h index 75e9443764d..3340caca8f3 100644 --- a/tensorflow/lite/delegates/gpu/gl/object.h +++ b/tensorflow/lite/delegates/gpu/gl/object.h @@ -85,8 +85,7 @@ inline const ObjectData* GetData(const Object& object) { inline size_t ByteSizeOf(const Object& object); // @return object that references an object created externally. -template -inline Object MakeObjectRef(ObjectRef unique_id, const SizeT& size, +inline Object MakeObjectRef(ObjectRef unique_id, const ObjectSize& size, AccessType access_type) { return Object{access_type, DataType::FLOAT32, ObjectType::UNKNOWN, 0, size, unique_id}; @@ -122,8 +121,8 @@ inline size_t ByteSizeOf(const Object& object) { return SizeOf(object.data_type) * /* vec4 */ 4 * NumElements(object.size); } -template -Object MakeReadonlyObject(const SizeT& size, const std::vector& data) { +inline Object MakeReadonlyObject(const ObjectSize& size, + const std::vector& data) { return Object{AccessType::READ, DataType::FLOAT32, ObjectType::UNKNOWN, @@ -132,8 +131,8 @@ Object MakeReadonlyObject(const SizeT& size, const std::vector& data) { internal_object::ToBytesVector(data, 16)}; } -template -Object MakeReadonlyTexture(const SizeT& size, const std::vector& data) { +inline Object MakeReadonlyTexture(const ObjectSize& size, + const std::vector& data) { return Object{AccessType::READ, DataType::FLOAT32, ObjectType::TEXTURE, @@ -142,8 +141,8 @@ Object MakeReadonlyTexture(const SizeT& size, const std::vector& data) { internal_object::ToBytesVector(data, 16)}; } -template -Object MakeReadonlyBuffer(const SizeT& size, const std::vector& data) { +inline Object MakeReadonlyBuffer(const ObjectSize& size, + const std::vector& data) { return Object{AccessType::READ, DataType::FLOAT32, ObjectType::BUFFER, @@ -153,15 +152,18 @@ Object MakeReadonlyBuffer(const SizeT& size, const std::vector& data) { } inline Object MakeReadonlyObject(const std::vector& data) { - return MakeReadonlyObject(IntegralDivideRoundUp(data.size(), 4U), data); + return MakeReadonlyObject( + IntegralDivideRoundUp(static_cast(data.size()), 4U), data); } inline Object MakeReadonlyTexture(const std::vector& data) { - return MakeReadonlyTexture(IntegralDivideRoundUp(data.size(), 4U), data); + return MakeReadonlyTexture( + IntegralDivideRoundUp(static_cast(data.size()), 4U), data); } inline Object MakeReadonlyBuffer(const std::vector& data) { - return MakeReadonlyBuffer(IntegralDivideRoundUp(data.size(), 4U), data); + return MakeReadonlyBuffer( + IntegralDivideRoundUp(static_cast(data.size()), 4U), data); } // TODO(akulik): find better place for functions below. diff --git a/tensorflow/lite/delegates/gpu/gl/runtime.cc b/tensorflow/lite/delegates/gpu/gl/runtime.cc index c09512c55e7..7249ac40ce2 100644 --- a/tensorflow/lite/delegates/gpu/gl/runtime.cc +++ b/tensorflow/lite/delegates/gpu/gl/runtime.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/gl_program.h" #include "tensorflow/lite/delegates/gpu/gl/gl_texture.h" #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -187,7 +188,7 @@ Runtime::Runtime(const RuntimeOptions& options, const GpuInfo& gpu_info, } Status Runtime::AddProgram(const GlShader& shader, - const std::vector& parameters, + const std::vector& parameters, const std::vector& objects, const uint3& num_workgroups) { GlProgram program; @@ -542,7 +543,8 @@ Status Runtime::AssignInternalObjects(std::vector* shared_objects) { shared_object.object = shared_ref; if (shared_object.object_type == ObjectType::BUFFER) { // Make a buffer linear. - shared_object.size = NumElements(object.size); + shared_object.size = + static_cast(NumElements(object.size)); } shared_objects->push_back(std::move(shared_object)); is_used_shared_object.push_back(false); @@ -551,8 +553,8 @@ Status Runtime::AssignInternalObjects(std::vector* shared_objects) { Object& shared_object = (*shared_objects)[shared_ref]; switch (object.object_type) { case ObjectType::BUFFER: - shared_object.size = std::max(NumElements(object.size), - NumElements(shared_object.size)); + shared_object.size = std::max( + NumElements(object.size), NumElements(shared_object.size)); break; case ObjectType::TEXTURE: { if (!FitSize(object.size, shared_object.size, diff --git a/tensorflow/lite/delegates/gpu/gl/runtime.h b/tensorflow/lite/delegates/gpu/gl/runtime.h index 6761d730628..23fff931c2a 100644 --- a/tensorflow/lite/delegates/gpu/gl/runtime.h +++ b/tensorflow/lite/delegates/gpu/gl/runtime.h @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/runtime/shared_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/runtime_options.h" #include "tensorflow/lite/delegates/gpu/gl/stats.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -45,7 +45,7 @@ class Runtime { // Takes parameters and objects and prepares GL program. Status AddProgram(const GlShader& shader, - const std::vector& parameters, + const std::vector& parameters, const std::vector& objects, const uint3& num_workgroups); diff --git a/tensorflow/lite/delegates/gpu/gl/serialization.cc b/tensorflow/lite/delegates/gpu/gl/serialization.cc index 0b950884239..200ca1fbb01 100644 --- a/tensorflow/lite/delegates/gpu/gl/serialization.cc +++ b/tensorflow/lite/delegates/gpu/gl/serialization.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -276,16 +277,16 @@ void SerializedCompiledModelBuilder::AddShader(const std::string& shader_src) { } void SerializedCompiledModelBuilder::AddProgram( - const std::vector& parameters, - const std::vector& objects, const uint3& workgroup_size, - const uint3& num_workgroups, size_t shader_index) { + const std::vector& parameters, const std::vector& objects, + const uint3& workgroup_size, const uint3& num_workgroups, + size_t shader_index) { Offset fb_workgroups = Encode(num_workgroups, &builder_); Offset fb_workgroup_size = Encode(workgroup_size, &builder_); Offset>> fb_params; { std::vector> offsets; - for (const UniformParameter& param : parameters) { + for (const Variable& param : parameters) { auto name = builder_.CreateString(param.name); auto data = absl::visit(ParameterValueGetter{&builder_}, param.value); data::UniformParameterBuilder builder(builder_); @@ -344,7 +345,7 @@ absl::Span SerializedCompiledModelBuilder::Finalize( namespace { Status ParseParameter(const data::UniformParameter& fb_parameter, - UniformParameter* parameter) { + Variable* parameter) { parameter->name = fb_parameter.name()->str(); switch (fb_parameter.type()) { case data::ParameterType::INT32: { @@ -539,13 +540,13 @@ Status DeserializeCompiledModel(absl::Span serialized, RETURN_IF_ERROR( handler->OnShader(absl::MakeSpan(shader->c_str(), shader->size()))); } - std::vector parameters; + std::vector parameters; std::vector objects; for (auto program : *model->programs()) { parameters.clear(); objects.clear(); for (auto fb_parameter : *program->parameters()) { - UniformParameter parameter; + Variable parameter; RETURN_IF_ERROR(ParseParameter(*fb_parameter, ¶meter)); parameters.push_back(std::move(parameter)); } diff --git a/tensorflow/lite/delegates/gpu/gl/serialization.h b/tensorflow/lite/delegates/gpu/gl/serialization.h index 5c981731ae2..96c0a0b1073 100644 --- a/tensorflow/lite/delegates/gpu/gl/serialization.h +++ b/tensorflow/lite/delegates/gpu/gl/serialization.h @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/gl/compiled_model_generated.h" #include "tensorflow/lite/delegates/gpu/gl/object.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -46,7 +46,7 @@ class SerializedCompiledModelBuilder { void AddShader(const std::string& shader_src); - void AddProgram(const std::vector& parameters, + void AddProgram(const std::vector& parameters, const std::vector& objects, const uint3& workgroup_size, const uint3& num_workgroups, size_t shader_index); @@ -69,7 +69,7 @@ class DeserializationHandler { virtual Status OnShader(absl::Span shader_src) = 0; - virtual Status OnProgram(const std::vector& parameters, + virtual Status OnProgram(const std::vector& parameters, const std::vector& objects, const uint3& workgroup_size, const uint3& num_workgroups, diff --git a/tensorflow/lite/delegates/gpu/gl/serialization_test.cc b/tensorflow/lite/delegates/gpu/gl/serialization_test.cc index 6256d970f29..38db44122b4 100644 --- a/tensorflow/lite/delegates/gpu/gl/serialization_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/serialization_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include + #include #include #include @@ -28,7 +29,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/gl/object.h" -#include "tensorflow/lite/delegates/gpu/gl/uniform_parameter.h" +#include "tensorflow/lite/delegates/gpu/gl/variable.h" namespace tflite { namespace gpu { @@ -36,7 +37,7 @@ namespace gl { namespace { struct ProgramDesc { - std::vector parameters; + std::vector parameters; std::vector objects; uint3 workgroup_size; uint3 num_workgroups; @@ -49,7 +50,7 @@ struct Handler : public DeserializationHandler { return OkStatus(); } - Status OnProgram(const std::vector& parameters, + Status OnProgram(const std::vector& parameters, const std::vector& objects, const uint3& workgroup_size, const uint3& num_workgroups, size_t shader_index) final { @@ -107,10 +108,10 @@ struct ParameterComparator { auto v = absl::get(a.value); return value.x == v.x && value.y == v.y && value.z == v.z && value.w == v.w; } - UniformParameter a; + Variable a; }; -bool Eq(const UniformParameter& a, const UniformParameter& b) { +bool Eq(const Variable& a, const Variable& b) { return a.name == b.name && absl::visit(ParameterComparator{a}, b.value); } @@ -138,17 +139,17 @@ TEST(Smoke, Read) { builder.AddShader(shader1); builder.AddShader(shader2); - std::vector parameters; - parameters.push_back(UniformParameter{"1", int32_t(1)}); - parameters.push_back(UniformParameter{"2", int2(1, 2)}); - parameters.push_back(UniformParameter{"3", int4(1, 2, 3, 4)}); - parameters.push_back(UniformParameter{"4", uint32_t(10)}); - parameters.push_back(UniformParameter{"5", uint4(10, 20, 30, 40)}); - parameters.push_back(UniformParameter{"6", -2.0f}); - parameters.push_back(UniformParameter{"7", float2(1, -1)}); - parameters.push_back(UniformParameter{"8", float4(1, -1, 2, -2)}); - parameters.push_back(UniformParameter{ - "9", std::vector{int2(1, 2), int2(3, 4), int2(5, 6)}}); + std::vector parameters; + parameters.push_back({"1", int32_t(1)}); + parameters.push_back({"2", int2(1, 2)}); + parameters.push_back({"3", int4(1, 2, 3, 4)}); + parameters.push_back({"4", uint32_t(10)}); + parameters.push_back({"5", uint4(10, 20, 30, 40)}); + parameters.push_back({"6", -2.0f}); + parameters.push_back({"7", float2(1, -1)}); + parameters.push_back({"8", float4(1, -1, 2, -2)}); + parameters.push_back( + {"9", std::vector{int2(1, 2), int2(3, 4), int2(5, 6)}}); std::vector objects; objects.push_back(MakeReadonlyBuffer(std::vector{1, 2, 3, 4})); diff --git a/tensorflow/lite/delegates/gpu/gl/uniform_parameter.h b/tensorflow/lite/delegates/gpu/gl/variable.h similarity index 83% rename from tensorflow/lite/delegates/gpu/gl/uniform_parameter.h rename to tensorflow/lite/delegates/gpu/gl/variable.h index 90e2c237f90..f2f3979b631 100644 --- a/tensorflow/lite/delegates/gpu/gl/uniform_parameter.h +++ b/tensorflow/lite/delegates/gpu/gl/variable.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_UNIFORM_PARAMETER_H_ -#define TENSORFLOW_LITE_DELEGATES_GPU_GL_UNIFORM_PARAMETER_H_ +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_VARIABLE_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_GL_VARIABLE_H_ #include #include @@ -27,7 +27,7 @@ namespace tflite { namespace gpu { namespace gl { -struct UniformParameter { +struct Variable { using ValueType = absl::variant>; @@ -39,4 +39,4 @@ struct UniformParameter { } // namespace gpu } // namespace tflite -#endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_UNIFORM_PARAMETER_H_ +#endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_VARIABLE_H_ diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index 3588cd97169..856939eb9b2 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/mul.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h" @@ -172,6 +173,9 @@ Status Compile(const GraphFloat32& graph, const RuntimeOptions& options, node->operation.attributes), options); break; + case OperationType::HARD_SWISH: + tasks = HardSwish(node_id, inputs[0], outputs[0], options); + break; case OperationType::MAX_UNPOOLING_2D: tasks = MaxUnpooling(node_id, inputs[0], inputs[1], outputs[0], absl::any_cast( diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD index 48ff3632a02..c1b57bd4fc0 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD @@ -12,6 +12,7 @@ cc_library( ":depthwise_conv", ":elementwise", ":fully_connected", + ":hard_swish", ":max_unpooling", ":mul", ":padding", @@ -122,6 +123,18 @@ cc_library( ], ) +cc_library( + name = "hard_swish", + srcs = ["hard_swish.cc"], + hdrs = ["hard_swish.h"], + deps = [ + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:types", + "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", + "//tensorflow/lite/delegates/gpu/metal:runtime_options", + ], +) + cc_library( name = "max_unpooling", srcs = ["max_unpooling.cc"], diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.cc b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.cc new file mode 100644 index 00000000000..fbf2be92627 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.cc @@ -0,0 +1,47 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h" + +#include +#include + +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" +#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" + +namespace tflite { +namespace gpu { +namespace metal { + +std::vector HardSwish(int id, ValueId input_id, + ValueId output_id, + const RuntimeOptions& options) { + auto desc = std::make_shared(); + desc->id = id; + desc->is_linkable = true; + desc->shader_source = R"( + FLT4 linkable$0(FLT4 value, int linear_index, uint3 gid) { + return value * clamp(value / 6.0f + FLT4(0.5f), FLT4(0.0f), FLT4(1.0f)); + } + )"; + desc->input_buffers = {{input_id}}; + desc->output_buffer = {output_id}; + return {desc}; +} + +} // namespace metal +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h new file mode 100644 index 00000000000..fa040ebcb97 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h @@ -0,0 +1,37 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_HARD_SWISH_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_HARD_SWISH_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" +#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" + +namespace tflite { +namespace gpu { +namespace metal { + +std::vector HardSwish(int id, ValueId input_id, + ValueId output_id, + const RuntimeOptions& options); + +} // namespace metal +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_HARD_SWISH_H_ diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index f23a9873486..6d17fa260a8 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -123,13 +123,12 @@ bool IsFloatOrUint8Operator(const TfLiteContext* context, // Check if the operation requires explict conversion from int8 to uint8 values. bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code, const TfLiteNode* node) { + const int input_id = node->inputs->data[0]; + const TfLiteType input_type = context->tensors[input_id].type; switch (builtin_code) { case kTfLiteBuiltinConv2d: case kTfLiteBuiltinDepthwiseConv2d: - case kTfLiteBuiltinFullyConnected: - case kTfLiteBuiltinL2Normalization: { - const int input_id = node->inputs->data[0]; - const TfLiteType input_type = context->tensors[input_id].type; + case kTfLiteBuiltinFullyConnected: { if (input_type == kTfLiteInt8) { const int weights_id = node->inputs->data[1]; const auto& weights_tensor = context->tensors[weights_id]; @@ -141,6 +140,11 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code, } return false; } + case kTfLiteBuiltinL2Normalization: + case kTfLiteBuiltinSub: + case kTfLiteBuiltinTanh: { + return input_type == kTfLiteInt8; + } default: return false; } @@ -1379,23 +1383,34 @@ class NNAPIDelegateKernel { break; case kTfLiteBuiltinTanh: // TODO(miaowang): add additional checks for the parameters. - if (version == 1 && - context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) { - // NNAPI only support float tanh. - return BasicMappingFn; + if (version == 1) { + const TfLiteType input_type = + context->tensors[node->inputs->data[0]].type; + if (IsFloat(input_type) || + (IsQuantized(input_type) && + android_sdk_version >= kMinSdkVersionForNNAPI12)) { + // NNAPI only support float tanh. + return BasicMappingFn; + } } break; case kTfLiteBuiltinSub: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 && - context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) { - // NNAPI only support float sub. - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_SUB; - }; + if (version == 1) { + const TfLiteType input_type = + context->tensors[node->inputs->data[0]].type; + if ((android_sdk_version >= kMinSdkVersionForNNAPI11 && + IsFloat(input_type)) || + (android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsQuantized(input_type))) { + // NNAPI only support float sub. + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_SUB; + }; + } } break; case kTfLiteBuiltinDiv: @@ -1714,6 +1729,21 @@ class NNAPIDelegateKernel { return BasicMappingFn; } } break; + case kTfLiteBuiltinCast: { + const TfLiteType input_type = + context->tensors[node->inputs->data[0]].type; + const TfLiteType output_type = + context->tensors[node->outputs->data[0]].type; + auto is_supported_tensor_type = [](const TfLiteType& type) { + return (type == kTfLiteFloat32 || type == kTfLiteInt32 || + type == kTfLiteUInt8); + }; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + is_supported_tensor_type(input_type) && + is_supported_tensor_type(output_type)) { + return BasicMappingFn; + } + } break; case kTfLiteBuiltinPrelu: if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { if (!IsFloatOrUint8Operator(context, node)) { @@ -1953,6 +1983,20 @@ class NNAPIDelegateKernel { }; } } break; + case kTfLiteBuiltinLogSoftmax: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + input_type == kTfLiteFloat32) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + // Scaling and axis are hardcoded to respectively 1 and -1 + // in TFLite. + mapping_args.builder->AddScalarFloat32Operand(1); + mapping_args.builder->AddScalarInt32Operand(-1); + return ANEURALNETWORKS_LOG_SOFTMAX; + }; + } + } break; default: // All other operators are not mapped. return nullptr; @@ -2341,7 +2385,8 @@ class NNAPIDelegateKernel { const auto input_index = node->inputs->data[input_pos]; if (need_int8_conversion && (input_pos == 0 || - reg->builtin_code == kTfLiteBuiltinFullyConnected)) { + reg->builtin_code == kTfLiteBuiltinFullyConnected || + reg->builtin_code == kTfLiteBuiltinSub)) { // Only selected inputs require int8 conversion. TF_LITE_ENSURE_STATUS(builder.AddTensorInput( input_index, hybrid_op, diff --git a/tensorflow/lite/experimental/micro/README.md b/tensorflow/lite/experimental/micro/README.md index 78362debfcf..6100d3af365 100644 --- a/tensorflow/lite/experimental/micro/README.md +++ b/tensorflow/lite/experimental/micro/README.md @@ -10,7 +10,7 @@ detection model, takes up a total of 22KB. ## Table of Contents - [Getting Started](#getting-started) - + * [Examples](#examples) * [Getting Started with Portable Reference Code](#getting-started-with-portable-reference-code) * [Building Portable Reference Code using Make](#building-portable-reference-code-using-make) * [Building for the "Blue Pill" STM32F103 using Make](#building-for-the-blue-pill-stm32f103-using-make) @@ -39,6 +39,32 @@ detection model, takes up a total of 22KB. # Getting Started +## Examples + +The fastest way to learn how TensorFlow Lite for Microcontrollers works is by +exploring and running our examples, which include application code and trained +TensorFlow models. + +The following examples are available: + +- [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/hello_world) + * Uses a very simple model, trained to reproduce a sine wave, to control an + LED or animation + * Application code for Arduino, SparkFun Edge, and STM32F746 + * Colab walkthrough of model training and conversion + +- [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech) + * Uses a 20kb model to recognize keywords in spoken audio + * Application code for Arduino, SparkFun Edge, and STM32F746 + * Python scripts for model training and conversion + +- [micro_vision](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_vision) + * Uses a 250kb model to recognize presence or absence of a person in images + captured by a camera + * Application code for SparkFun Edge + +## Pre-generated Project Files + One of the challenges of embedded software development is that there are a lot of different architectures, devices, operating systems, and build systems. We aim to support as many of the popular combinations as we can, and make it as diff --git a/tensorflow/lite/experimental/micro/arduino/debug_log.cc b/tensorflow/lite/experimental/micro/arduino/debug_log.cc index 94d8d832dd6..4d18f6f97e9 100644 --- a/tensorflow/lite/experimental/micro/arduino/debug_log.cc +++ b/tensorflow/lite/experimental/micro/arduino/debug_log.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,9 +32,6 @@ extern "C" void DebugLog(const char* s) { static bool is_initialized = false; if (!is_initialized) { DEBUG_SERIAL_OBJECT.begin(9600); - // Wait for serial port to connect. Only needed for some models apparently? - while (!DEBUG_SERIAL_OBJECT) { - } is_initialized = true; } DEBUG_SERIAL_OBJECT.println(s); diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/BUILD b/tensorflow/lite/experimental/micro/examples/hello_world/BUILD new file mode 100644 index 00000000000..f3340492333 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/BUILD @@ -0,0 +1,77 @@ +# Description: +# TensorFlow Lite for Microcontrollers "hello world" example. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow/lite/experimental/micro/testing:micro_test.bzl", + "tflite_micro_cc_test", +) + +cc_library( + name = "sine_model_data", + srcs = [ + "sine_model_data.cc", + ], + hdrs = [ + "sine_model_data.h", + ], +) + +tflite_micro_cc_test( + name = "hello_world_test", + srcs = [ + "hello_world_test.cc", + ], + deps = [ + "//tensorflow/lite:schema_fbs_version", + "//tensorflow/lite/experimental/micro:micro_framework", + "//tensorflow/lite/experimental/micro/examples/hello_world:sine_model_data", + "//tensorflow/lite/experimental/micro/kernels:all_ops_resolver", + "//tensorflow/lite/experimental/micro/kernels:micro_ops", + "//tensorflow/lite/experimental/micro/testing:micro_test", + "//tensorflow/lite/schema:schema_fbs", + ], +) + +cc_library( + name = "output_handler", + srcs = [ + "output_handler.cc", + ], + hdrs = [ + "output_handler.h", + ], + deps = [ + "//tensorflow/lite/c:c_api_internal", + "//tensorflow/lite/experimental/micro:micro_framework", + ], +) + +cc_library( + name = "constants", + srcs = [ + "constants.cc", + ], + hdrs = [ + "constants.h", + ], +) + +cc_binary( + name = "hello_world", + srcs = [ + "main.cc", + ], + deps = [ + ":constants", + ":output_handler", + "//tensorflow/lite:schema_fbs_version", + "//tensorflow/lite/experimental/micro:micro_framework", + "//tensorflow/lite/experimental/micro/examples/hello_world:sine_model_data", + "//tensorflow/lite/experimental/micro/kernels:all_ops_resolver", + "//tensorflow/lite/schema:schema_fbs", + ], +) diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/Makefile.inc b/tensorflow/lite/experimental/micro/examples/hello_world/Makefile.inc new file mode 100644 index 00000000000..59a233c15ec --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/Makefile.inc @@ -0,0 +1,39 @@ +HELLO_WORLD_TEST_SRCS := \ +tensorflow/lite/experimental/micro/examples/hello_world/hello_world_test.cc \ +tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.cc + +HELLO_WORLD_TEST_HDRS := \ +tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h + +OUTPUT_HANDLER_TEST_SRCS := \ +tensorflow/lite/experimental/micro/examples/hello_world/output_handler_test.cc \ +tensorflow/lite/experimental/micro/examples/hello_world/output_handler.cc + +OUTPUT_HANDLER_TEST_HDRS := \ +tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h + +HELLO_WORLD_SRCS := \ +tensorflow/lite/experimental/micro/examples/hello_world/main.cc \ +tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.cc \ +tensorflow/lite/experimental/micro/examples/hello_world/output_handler.cc \ +tensorflow/lite/experimental/micro/examples/hello_world/constants.cc + +HELLO_WORLD_HDRS := \ +tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h \ +tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h \ +tensorflow/lite/experimental/micro/examples/hello_world/constants.h + +#Find any platform-specific rules for this example. +include $(wildcard tensorflow/lite/experimental/micro/examples/hello_world/*/Makefile.inc) + +# Tests loading and running the sine model. +$(eval $(call microlite_test,hello_world_test,\ +$(HELLO_WORLD_TEST_SRCS),$(HELLO_WORLD_TEST_HDRS))) + +# Tests producing an output. +$(eval $(call microlite_test,output_handler_test,\ +$(OUTPUT_HANDLER_TEST_SRCS),$(OUTPUT_HANDLER_TEST_HDRS))) + +# Builds a standalone binary. +$(eval $(call microlite_test,hello_world,\ +$(HELLO_WORLD_SRCS),$(HELLO_WORLD_HDRS))) diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/README.md b/tensorflow/lite/experimental/micro/examples/hello_world/README.md new file mode 100644 index 00000000000..1de9730848c --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/README.md @@ -0,0 +1,363 @@ +# Hello World example + +This example is designed to demonstrate the absolute basics of using [TensorFlow +Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers/overview). +It includes the full end-to-end workflow of training a model, converting it for +use with TensorFlow Lite, and running inference on a microcontroller. + +The sample is built around a model trained to replicate a `sine` function. It +contains implementations for several platforms. In each case, the model is used +to generate a pattern of data that is used to either blink LEDs or control an +animation. + +![Animation of example running on STM32F746](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/hello_world/images/STM32F746.gif) + +## Table of contents + +- [Getting started](#getting-started) +- [Deploy to Arduino](#deploy-to-arduino) +- [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge) +- [Deploy to STM32F746](#deploy-to-STM32F746) + +## Getting started + +### Understand the model + +The sample comes with a pre-trained model. The code used to train and convert +the model is available as a tutorial in [create_sine_model.ipynb](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/hello_world/create_sine_model.ipynb). + +Walk through this tutorial to understand what the model does, +how it works, and how it was converted for use with TensorFlow Lite for +Microcontrollers. + +### Build the code + +To compile and test this example on a desktop Linux or MacOS machine, download +[the TensorFlow source code](https://github.com/tensorflow/tensorflow), `cd` +into the source directory from a terminal, and then run the following command: + +``` +make -f tensorflow/lite/experimental/micro/tools/make/Makefile test_hello_world_test +``` + +This will take a few minutes, and downloads frameworks the code uses like +[CMSIS](https://developer.arm.com/embedded/cmsis) and +[flatbuffers](https://google.github.io/flatbuffers/). Once that process has +finished, you should see a series of files get compiled, followed by some +logging output from a test, which should conclude with `~~~ALL TESTS PASSED~~~`. + +If you see this, it means that a small program has been built and run that loads +the trained TensorFlow model, runs some example inputs through it, and got the +expected outputs. + +To understand how TensorFlow Lite does this, you can look at the source in +[hello_world_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/hello_world/hello_world_test.cc). +It's a fairly small amount of code that creates an interpreter, gets a handle to +a model that's been compiled into the program, and then invokes the interpreter +with the model and sample inputs. + +## Deploy to Arduino + +The following instructions will help you build and deploy this sample +to [Arduino](https://www.arduino.cc/) devices. + +![Animation of example running on Arduino MKRZERO](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/hello_world/images/arduino_mkrzero.gif) + +The sample has been tested with the following devices: + +- [Arduino MKRZERO](https://store.arduino.cc/usa/arduino-mkrzero) + +The sample will use PWM to fade an LED on and off according to the model's +output. In the code, the `LED_BUILTIN` constant is used to specify the board's +built-in LED as the one being controlled. However, on some boards, this built-in +LED is not attached to a pin with PWM capabilities. In this case, the LED will +blink instead of fading. + +### Obtain and import the library + +To use this sample application with Arduino, we've created an Arduino library +that includes it as an example that you can open in the Arduino IDE. + +Download the current nightly build of the library: [hello_world.zip](https://storage.googleapis.com/tensorflow-nightly/github/tensorflow/tensorflow/lite/experimental/micro/tools/make/gen/arduino_x86_64/prj/hello_world/hello_world.zip) + +Next, import this zip file into the Arduino IDE by going to `Sketch -> Include Library -> Add .ZIP Library...`. + +#### Building the library + +If you need to build the library from source (for example, if you're making +modifications to the code), run this command to generate a zip file containing +the required source files: + +``` +make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=arduino TAGS="" generate_hello_world_arduino_library_zip +``` + +A zip file will be created at the following location: + +``` +tensorflow/lite/experimental/micro/tools/make/gen/arduino_x86_64/prj/hello_world/hello_world.zip +``` + +You can then import this zip file into the Arduino IDE by going to `Sketch -> Include Library -> Add .ZIP Library...`. + +### Load and run the example + +Once the library has been added, go to `File -> Examples`. You should see an +example near the bottom of the list named `TensorFlowLite:hello_world`. Select +it and click `hello_world` to load the example. + +Use the Arduino IDE to build and upload the example. Once it is running, you +should see the built-in LED on your device flashing. + +The Arduino IDE includes a plotter that we can use to display the sine wave +graphically. To view it, go to `Tools -> Serial Plotter`. You will see one +datapoint being logged for each inference cycle, expressed as a number between 0 +and 255. + +## Deploy to SparkFun Edge + +The following instructions will help you build and deploy this sample on the +[SparkFun Edge development board](https://sparkfun.com/products/15170). + +![Animation of example running on SparkFun Edge](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/hello_world/images/sparkfun_edge.gif) + +If you're new to using this board, we recommend walking through the +[AI on a microcontroller with TensorFlow Lite and SparkFun Edge](https://codelabs.developers.google.com/codelabs/sparkfun-tensorflow) +codelab to get an understanding of the workflow. + +### Compile the binary + +The following command will download the required dependencies and then compile a +binary for the SparkFun Edge: + +``` +make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=sparkfun_edge hello_world_bin +``` + +The binary will be created in the following location: + +``` +tensorflow/lite/experimental/micro/tools/make/gen/sparkfun_edge_cortex-m4/bin/hello_world.bin +``` + +### Sign the binary + +The binary must be signed with cryptographic keys to be deployed to the device. +We'll now run some commands that will sign our binary so it can be flashed to +the SparkFun Edge. The scripts we are using come from the Ambiq SDK, which is +downloaded when the `Makefile` is run. + +Enter the following command to set up some dummy cryptographic keys we can use +for development: + +``` +cp tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/keys_info0.py \ +tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/keys_info.py +``` + +Next, run the following command to create a signed binary: + +``` +python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/create_cust_image_blob.py \ +--bin tensorflow/lite/experimental/micro/tools/make/gen/sparkfun_edge_cortex-m4/bin/hello_world.bin \ +--load-address 0xC000 \ +--magic-num 0xCB \ +-o main_nonsecure_ota \ +--version 0x0 +``` + +This will create the file `main_nonsecure_ota.bin`. We'll now run another +command to create a final version of the file that can be used to flash our +device with the bootloader script we will use in the next step: + +``` +python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/create_cust_wireupdate_blob.py \ +--load-address 0x20000 \ +--bin main_nonsecure_ota.bin \ +-i 6 \ +-o main_nonsecure_wire \ +--options 0x1 +``` + +You should now have a file called `main_nonsecure_wire.bin` in the directory +where you ran the commands. This is the file we'll be flashing to the device. + +### Flash the binary + +Next, attach the board to your computer via a USB-to-serial adapter. + +**Note:** If you're using the [SparkFun Serial Basic Breakout](https://www.sparkfun.com/products/15096), +you should [install the latest drivers](https://learn.sparkfun.com/tutorials/sparkfun-serial-basic-ch340c-hookup-guide#drivers-if-you-need-them) +before you continue. + +Once connected, assign the USB device name to an environment variable: + +``` +export DEVICENAME=put your device name here +``` + +Set another variable with the baud rate: + +``` +export BAUD_RATE=921600 +``` + +Now, hold the button marked `14` on the device. While still holding the button, +hit the button marked `RST`. Continue holding the button marked `14` while +running the following command: + +``` +python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/uart_wired_update.py \ +-b ${BAUD_RATE} ${DEVICENAME} \ +-r 1 \ +-f main_nonsecure_wire.bin \ +-i 6 +``` + +You should see a long stream of output as the binary is flashed to the device. +Once you see the following lines, flashing is complete: + +``` +Sending Reset Command. +Done. +``` + +If you don't see these lines, flashing may have failed. Try running through the +steps in [Flash the binary](#flash-the-binary) again (you can skip over setting +the environment variables). If you continue to run into problems, follow the +[AI on a microcontroller with TensorFlow Lite and SparkFun Edge](https://codelabs.developers.google.com/codelabs/sparkfun-tensorflow) +codelab, which includes more comprehensive instructions for the flashing +process. + +The binary should now be deployed to the device. Hit the button marked `RST` to +reboot the board. You should see the device's four LEDs flashing in sequence. + +Debug information is logged by the board while the program is running. To view +it, establish a serial connection to the board using a baud rate of `115200`. +On OSX and Linux, the following command should work: + +``` +screen ${DEVICENAME} 115200 +``` + +You will see a lot of output flying past! To stop the scrolling, hit `Ctrl+A`, +immediately followed by `Esc`. You can then use the arrow keys to explore the +output, which will contain the results of running inference on various `x` +values: + +``` +x_value: 1.1843798*2^2, y_value: -1.9542645*2^-1 +``` + +To stop viewing the debug output with `screen`, hit `Ctrl+A`, immediately +followed by the `K` key, then hit the `Y` key. + + +## Deploy to STM32F746 + +The following instructions will help you build and deploy the sample to the +[STM32F7 discovery kit](https://os.mbed.com/platforms/ST-Discovery-F746NG/) +using [ARM Mbed](https://github.com/ARMmbed/mbed-cli). + +![Animation of example running on STM32F746](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/hello_world/images/STM32F746.gif) + +Before we begin, you'll need the following: + +- STM32F7 discovery kit board +- Mini-USB cable +- ARM Mbed CLI ([installation instructions](https://os.mbed.com/docs/mbed-os/v5.12/tools/installation-and-setup.html)) +- Python 2.7 and pip + +Since Mbed requires a special folder structure for projects, we'll first run a +command to generate a subfolder containing the required source files in this +structure: + +``` +make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=mbed TAGS="CMSIS disco_f746ng" generate_hello_world_mbed_project +``` + +This will result in the creation of a new folder: + +``` +tensorflow/lite/experimental/micro/tools/make/gen/mbed_cortex-m4/prj/hello_world/mbed +``` + +This folder contains all of the example's dependencies structured in the correct +way for Mbed to be able to build it. + +Change into the directory and run the following commands, making sure you are +using Python 2.7.15. + +First, tell Mbed that the current directory is the root of an Mbed project: + +``` +mbed config root . +``` + +Next, tell Mbed to download the dependencies and prepare to build: + +``` +mbed deploy +``` + +By default, Mbed will build the project using C++98. However, TensorFlow Lite +requires C++11. Run the following Python snippet to modify the Mbed +configuration files so that it uses C++11: + +``` +python -c 'import fileinput, glob; +for filename in glob.glob("mbed-os/tools/profiles/*.json"): + for line in fileinput.input(filename, inplace=True): + print line.replace("\"-std=gnu++98\"","\"-std=c++11\", \"-fpermissive\"")' + +``` + +Finally, run the following command to compile: + +``` +mbed compile -m DISCO_F746NG -t GCC_ARM +``` + +This should result in a binary at the following path: + +``` +./BUILD/DISCO_F746NG/GCC_ARM/mbed.bin +``` + +To deploy, plug in your STM board and copy the file to it. On MacOS, you can do +this with the following command: + +``` +cp ./BUILD/DISCO_F746NG/GCC_ARM/mbed.bin /Volumes/DIS_F746NG/ +``` + +Copying the file will initiate the flashing process. Once this is complete, you +should see an animation on the device's screen. + + +``` +screen /dev/tty.usbmodem14403 9600 +``` + +In addition to this animation, debug information is logged by the board while +the program is running. To view it, establish a serial connection to the board +using a baud rate of `9600`. On OSX and Linux, the following command should +work, replacing `/dev/tty.devicename` with the name of your device as it appears +in `/dev`: + +``` +screen /dev/tty.devicename 9600 +``` + +You will see a lot of output flying past! To stop the scrolling, hit `Ctrl+A`, +immediately followed by `Esc`. You can then use the arrow keys to explore the +output, which will contain the results of running inference on various `x` +values: + +``` +x_value: 1.1843798*2^2, y_value: -1.9542645*2^-1 +``` + +To stop viewing the debug output with `screen`, hit `Ctrl+A`, immediately +followed by the `K` key, then hit the `Y` key. + diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/arduino/constants.cc b/tensorflow/lite/experimental/micro/examples/hello_world/arduino/constants.cc new file mode 100644 index 00000000000..a4aaf74bd75 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/arduino/constants.cc @@ -0,0 +1,19 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/constants.h" + +// This is tuned so that a full cycle takes ~4 seconds on an Arduino MKRZERO. +const int kInferencesPerCycle = 1000; diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/arduino/output_handler.cc b/tensorflow/lite/experimental/micro/examples/hello_world/arduino/output_handler.cc new file mode 100644 index 00000000000..3dbbf247348 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/arduino/output_handler.cc @@ -0,0 +1,47 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h" + +#include "Arduino.h" +#include "tensorflow/lite/experimental/micro/examples/hello_world/constants.h" + +// The pin of the Arduino's built-in LED +int led = LED_BUILTIN; + +// Track whether the function has run at least once +bool initialized = false; + +// Animates a dot across the screen to represent the current x and y values +void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value, + float y_value) { + // Do this only once + if (!initialized) { + // Set the LED pin to output + pinMode(led, OUTPUT); + initialized = true; + } + + // Calculate the brightness of the LED such that y=-1 is fully off + // and y=1 is fully on. The LED's brightness can range from 0-255. + int brightness = (int)(127.5f * (y_value + 1)); + + // Set the brightness of the LED. If the specified pin does not support PWM, + // this will result in the LED being on when y > 127, off otherwise. + analogWrite(led, brightness); + + // Log the current brightness value for display in the Arduino plotter + error_reporter->Report("%d\n", brightness); +} diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/constants.cc b/tensorflow/lite/experimental/micro/examples/hello_world/constants.cc new file mode 100644 index 00000000000..7ac490c9c94 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/constants.cc @@ -0,0 +1,19 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/constants.h" + +// This is a small number so that it's easy to read the logs +const int kInferencesPerCycle = 20; diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/constants.h b/tensorflow/lite/experimental/micro/examples/hello_world/constants.h new file mode 100644 index 00000000000..61ca7df307d --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/constants.h @@ -0,0 +1,32 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_CONSTANTS_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_CONSTANTS_H_ + +// This constant represents the range of x values our model was trained on, +// which is from 0 to (2 * Pi). We approximate Pi to avoid requiring additional +// libraries. +const float kXrange = 2.f * 3.14159265359f; + +// This constant determines the number of inferences to perform across the range +// of x values defined above. Since each inference takes time, the higher this +// number, the more time it will take to run through the entire range. The value +// of this constant can be tuned so that one full cycle takes a desired amount +// of time. Since different devices take different amounts of time to perform +// inference, this value should be defined per-device. +extern const int kInferencesPerCycle; + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_CONSTANTS_H_ diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/create_sine_model.ipynb b/tensorflow/lite/experimental/micro/examples/hello_world/create_sine_model.ipynb new file mode 100644 index 00000000000..2a5cbc994c4 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/create_sine_model.ipynb @@ -0,0 +1,1335 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "create_sine_model.ipynb", + "version": "0.3.2", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python2", + "display_name": "Python 2" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "sblS7n3zWCWV", + "colab_type": "text" + }, + "source": [ + "**Copyright 2019 The TensorFlow Authors.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0rvUzWmoWMH5", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aCZBFzjClURz", + "colab_type": "text" + }, + "source": [ + "# Create and convert a TensorFlow model\n", + "This notebook is designed to demonstrate the process of creating a TensorFlow model and converting it to use with TensorFlow Lite. The model created in this notebook is used in the [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/hello_world) sample for [TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers/overview).\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dh4AXGuHWeu1", + "colab_type": "text" + }, + "source": [ + "## Import dependencies\n", + "Our first task is to import the dependencies we need. Run the following cell to do so:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "53PBJBv1jEtJ", + "colab_type": "code", + "outputId": "9b035753-60e5-43db-a78d-284ea9de9513", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 479 + } + }, + "source": [ + "# TensorFlow is an open source machine learning library\n", + "# Note: The following line is temporary to use v2\n", + "!pip install tensorflow==2.0.0-beta0\n", + "import tensorflow as tf\n", + "# Numpy is a math library\n", + "import numpy as np\n", + "# Matplotlib is a graphing library\n", + "import matplotlib.pyplot as plt\n", + "# math is Python's math library\n", + "import math" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p-PuBEb6CMeo", + "colab_type": "text" + }, + "source": [ + "## Generate data\n", + "Deep learning networks learn to model patterns in underlying data. In this notebook, we're going to train a network to model data generated by a [sine](https://en.wikipedia.org/wiki/Sine) function. This will result in a model that can take a value, `x`, and predict its sine, `y`.\n", + "\n", + "In a real world application, if you needed the sine of `x`, you could just calculate it directly. However, by training a model to do this, we can demonstrate the basic principles of machine learning.\n", + "\n", + "In the [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/hello_world) sample for [TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers/overview), we'll use this model to control LEDs that light up in a sequence.\n", + "\n", + "The code in the following cell will generate a set of random `x` values, calculate their sine values, and display them on a graph:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uKjg7QeMDsDx", + "colab_type": "code", + "outputId": "b17a43c6-eba1-4cc7-8807-14fcf5918d01", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 269 + } + }, + "source": [ + "# We'll generate this many sample datapoints\n", + "SAMPLES = 1000\n", + "\n", + "# Set a \"seed\" value, so we get the same random numbers each time we run this\n", + "# notebook\n", + "np.random.seed(1337)\n", + "\n", + "# Generate a uniformly distributed set of random numbers in the range from\n", + "# 0 to 2π, which covers a complete sine wave oscillation\n", + "x_values = np.random.uniform(low=0, high=2*math.pi, size=SAMPLES)\n", + "\n", + "# Shuffle the values to guarantee they're not in order\n", + "np.random.shuffle(x_values)\n", + "\n", + "# Calculate the corresponding sine values\n", + "y_values = np.sin(x_values)\n", + "\n", + "# Plot our data. The 'b.' argument tells the library to print blue dots.\n", + "plt.plot(x_values, y_values, 'b.')\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzt3X2UVPWd5/H3F1pU1ASRjhLhgDNy\nJpJJgrOVZioa4yQGNJsjzE7iqvRKcpwpH+Im2TkrrZNzNg8ziTSZGcnOEUNHozCgxjUjYtZZMEYH\nZyyBZgYThSgswRFWpBWZaFSQ5rt/3NtD3apb/VQPt27V53VOna77rVvd3/ahvv17NndHRERkwJik\nExARkcaiwiAiIhEqDCIiEqHCICIiESoMIiISocIgIiIRKgwiIhKhwiAiIhEqDCIiEtGWdAKjMWnS\nJJ8+fXrSaYiIpMqWLVtedff2oe5LZWGYPn06vb29SachIpIqZvbicO5TV5KIiESoMIiISIQKg4iI\nRKgwiIhIhAqDiIhEVKUwmNkPzWy/mT1b5nUzs/9pZjvN7Odm9nsFry00sx3hY2E18hERkdGrVovh\nbuDiQV6/BJgRPnLA7QBmNhH4OjAb6AC+bmanViknGYXZs6GtDU45BcaPB7PgMXYsnHsu5PNJZygi\ntVaVwuDuG4ADg9wyD1jpgaeBCWY2GZgLPOruB9z9deBRBi8wUkX5PHziE0EROP74oABs2gT9/fDm\nm/D228fuPXoUtm6Fj33sWKE45RTo6koufxGpjXqNMZwJvFRwvSeMlYuXMLOcmfWaWW9fX1/NEm0V\nM2cGH/IbNgRF4PDhkb3/6NHgfUuWwJgxMGkS9PTUJlcRqa/UDD67e4+7Z9w9094+5IpuidHVBe97\nHxx3HGzfXr3v6w6vvQbXXBO0PDo7q/e9RaT+6lUY9gJTC66nhLFycamifB6mTg3+uu/rgyNHhn6P\nGZx44sh/1uHDsHp1ME6hbiaRdKpXYVgLXBXOTvp94N/c/WVgHTDHzE4NB53nhDGpgp4e+OAHgy6j\nPXsGv7etLegSMoOOjqCr6K23gtbAwGPOnGBsAYL7BtPfHxQijUOIpE+1pqveC+SB3zGzPWZ2tZld\na2bXhrc8AuwCdgI/AK4HcPcDwJ8Dm8PHt8KYVKirK+ja2bZt8PsmTYKnnoJ33w0+zI8ehY0b4+9d\nty5obbgH9y1fDhMnDl4kBsYh1L0kkh7m7knnMGKZTMa1u2p5c+fC+vXlXx8/Pvjrf9EiyGar8zN7\neuDLX4ZDh8rf094ODz1UvZ8pIiNjZlvcPTPUfakZfJah5fPBh365ovC+9wXF4De/gQcfrO4HdC4H\n77wDCxbAuHHx9/T1Bd1amr0k0thUGJpET0/woVu49qDQokXwyivQ3V3bPFatCloNy5eXv+faa1Uc\nRBpZKg/qkajp0+HFMsdvTJwIt9wS/EVfTwM/75prSl9zh+uui94nIo1DLYaUO+20wYvCa68l9+Gb\nywUD2xMmlL529GhQNDQoLdJ4VBhSbPZsOFBmDte0aUFRSFo2C6+/HnRlxVm9WsVBpNGoMKTU3LnB\nvkZxFi2C3bvrms6QuruD1sNJJ5W+ds892pxPpJGoMKTQ7NnxM49OPDH48K31APNoZbPwpS+Vxt3h\nggvUchBpFBp8TplyA80dHeUXpjWSgaK1bFmw+G3AkSNBt9KOHen4PUSamVoMKVKuKMyZk64P0+5u\neOONoJgV27QpaBGJSHJUGFKis7N8S2FdSneXuvrq+PimTcG24CKSDBWGFOjsDLpZik2blq6WQrFc\nLlgIN7AxX6Ht2zXmIJIUFYYGN3dufFGYMKHxZh6NRi4XjDfEWb1aK6RFkqDC0MDy+fjZR2PHwiOP\n1D+fWsnlyq9zuOYaTWUVqTcVhgZ22WWlsZNPhiefbL4dSru7y++v9JnP1DcXkVanwtCgpk+PP1zn\nr/6q+YrCgFwumGFV7OBBmDy5/vmItKpqHdRzsZk9b2Y7zeymmNdvNbOt4eMFMztY8Fp/wWtrq5FP\n2s2dGz8DacGC5t90bt26+Gms+/ZpGqtIvVRcGMxsLHAbcAkwE7jCzCKTDd39v7n7LHefBfwN8HcF\nL7898Jq7X1ppPmlXblyhoyPY0roVbNwYzLgqtmmTjgkVqYdqtBg6gJ3uvsvdDwP3AfMGuf8K4N4q\n/NymtGRJaSzt01JHY/fu+H2V7rqr7qmItJxqFIYzgZcKrveEsRJmNg04C/hZQfgEM+s1s6fNbH4V\n8kmtmTNhzZpobMaM5piWOhqPPloae+01zVISqbV6Dz5fDjzg7v0FsWnhGaRXAkvN7Lfj3mhmubCA\n9Pb19dUj17qaOTNY1FVo7FhYsSKZfBpBNls6U+no0fhWlYhUTzUKw15gasH1lDAW53KKupHcfW/4\ndRfwBHBu3BvdvcfdM+6eaW9vrzTnhtLVVVoUIFj41awzkIZrYHX0mIL/Utes0ViDSC1VozBsBmaY\n2VlmNo7gw79kdpGZfQA4FcgXxE41s+PD55OA84BtVcgpNXp64v8CPuec5p+BNFy5HGQy0diSJSoO\nIrVScWFw9yPADcA6YDtwv7s/Z2bfMrPCWUaXA/e5uxfEzgF6zewZ4HFgsbu3VGG4+ebS2EknwbaW\n+qcwtLgN91QcRGrDop/T6ZDJZLy3tzfpNCrW0xNs+VBs+XK1FuJ0dcW3rp56Sl1uIsNhZlvCMd1B\naeVzgm65pTQ2Z46KQjnd3fF7Ki1cWP9cRJqZCkNCOjtLp6GefXZ6z1aol+7uYLuQQjt2qEtJpJpU\nGBLQ01O6lbYZrFyZTD5pEzcu873vaYtukWpRYUjAl79cGrvxRvWTD1fcZnuHDgXjNSoOIpVTYaiz\nuXODD7FCY8YEXSQyfOvWwQUXlMbjxm1EZGRUGOqoqyt+g7yLLqp/Ls1g8eKgC67Q7t1qNYhUSoWh\nTvJ5+O53S+MTJmjAebSyWZgXs12jWg0ilVFhqJOVK6F4yYhZcx3RmYRFi+JbDZqlJDJ6KgwJmTYN\n/umfNOBcqWwWvv/90viSJdqFVWS0VBjqoKsLfvKTYJDZDMaNg3vvVVGollwu6JIrdv319c9FpBmo\nMNTYwDYOe/YEW0Z//OPwxBMqCtUWt1p869ZgFpiIjIwKQ43dfXf0etcuFYVa6O4OzsQutn69ZimJ\njJQKQw11dcH+/dHYb/1WMrm0glWrYPLk0rhmKYmMjApDjcSds2AWzL2X2vnGN0pjL71UGhOR8lQY\namTp0tLY97+vbqRay+WCzQgL9fdrrEFkJFQYauTFF6PX06drO+16iduM8Gc/q38eImlVlcJgZheb\n2fNmttPMbop5/Qtm1mdmW8PHHxe8ttDMdoSPpthZf+ZMeOutaCxuR1CpjWwWOjqisSNHYPbsZPIR\nSZuKC4OZjQVuAy4BZgJXmNnMmFt/5O6zwscd4XsnAl8HZgMdwNfN7NRKc0pSTw9s3x6NHXecWgv1\ntnEjjB8fjW3aFJyDISKDq0aLoQPY6e673P0wcB8Qs4NNrLnAo+5+wN1fBx4FLq5CTomJaxn8wR/U\nPw+BG24oja1erRXRIkOpRmE4Eyic97EnjBX7IzP7uZk9YGZTR/jeVOjshAMHorHx47VJXlK6u+E9\n7ymN60AkkcHVa/D5YWC6u3+YoFWwYqTfwMxyZtZrZr19fX1VT7BS+XzpqWwAt95a/1zkmLgdbfft\nq38eImlSjcKwF5hacD0ljP07d3/N3QeOp7kD+A/DfW/B9+hx94y7Z9rb26uQdnXdVDLkDjNmaGwh\nablc6Q6sDz+s1dAig6lGYdgMzDCzs8xsHHA5sLbwBjMrXI96KTAwPLsOmGNmp4aDznPCWKr09MCG\nDaXxFSNuF0ktdHcHx34O6O+Ha6/VWINIORUXBnc/AtxA8IG+Hbjf3Z8zs2+Z2aXhbV82s+fM7Bng\ny8AXwvceAP6coLhsBr4VxlLlO98pjS1frsVsjeSqq6KtBne47rrk8hFpZObFp8ekQCaT8d7e3qTT\nAI7tnlpo0SKd4dyITjopur7khBPg7beTy0ek3sxsi7tnhrpPK58rVNxXPWGCikKj+sM/jF6/845O\nehOJo8JQga4uOHgwGvvwh5PJRYa2ahWccUY09pd/qbEGkWIqDKOUz5d2IYF2T2103/xm9Pro0fgZ\nZSKtTIVhlOIWSV1wgQacG93A9NVCGzaoS0mkkArDKP30p9FrnbWQHt3dpWdEa12DyDEqDKPQ1QU7\nd0Zj8+aptZAmxYXh4EEVB5EBKgyjEPcBUtw9IY0tbrPD667TQLQIqDCMWNxMpFmz1FpIm1wuODyp\n0NGjcP31iaQj0lBUGEYgn4/flG3ZsvrnIpWLazVs3aozG0RUGEbgiSeCrRQKTZ+u1kJa5XLB1iVj\niv4vePjhZPIRaRQqDCNw4YXBaWyFdGRnuuVypYsSTzklmVxEGoUKwzB1dsJnPxucxjZ/fnCm8PLl\n2la7GSxbFt1gb+9edSdJa1NhGIbOzuAQngMHYP36YDO2jRtVFJpFNgsf/Wg0tnq1pq9K61JhGIZ7\n741er1mTTB5SO1dfXRq788765yHSCFQYhjB3bjCNsZD6oJtPLgdz5kRjmzZpXYO0JhWGIRRvfQGl\nG7FJc7jwwtLYwoV1T0MkcVUpDGZ2sZk9b2Y7zaxkr0oz+1Mz22ZmPzezx8xsWsFr/Wa2NXysLX5v\nkrq6SlsL48drbKFZXXhhdBAaYMcOjTVI66m4MJjZWOA24BJgJnCFmc0suu1fgIy7fxh4ACjcsPpt\nd58VPi6lgdx1V2ns1lvrn4fURzYLV15ZGteUZGk11WgxdAA73X2Xux8G7gPmFd7g7o+7+8Chik8D\nU6rwc2sqn4e+vmjs7LPVWmh2q1bBaadFYwcOaFtuaS3VKAxnAi8VXO8JY+VcDfx9wfUJZtZrZk+b\n2fxybzKzXHhfb1/xJ3YNXHZZaSzuDAZpPt/5Tmnsnnvqn4dIUuo6+GxmnUAGKNxxaFp4OPWVwFIz\n++2497p7j7tn3D3T3t5e0zy7umDPnmisvV1bX7SKuBlKL7+sGUrSOqpRGPYCUwuup4SxCDO7CPga\ncKm7HxqIu/ve8Osu4Ang3CrkVJG4vw6/+MX65yHJWbcuOJFvQH9//FGuIs2oGoVhMzDDzM4ys3HA\n5UBkdpGZnQssJygK+wvip5rZ8eHzScB5wLYq5DRqPT2lrYWOjuDUL2ktixdDW9ux6zVrNENJWkPF\nhcHdjwA3AOuA7cD97v6cmX3LzAZmGX0XOBn4X0XTUs8Bes3sGeBxYLG7J1YY8nm49tpo7Mwzg+0v\npPVkszBpUjQWN/4g0mzahr5laO7+CPBIUex/FDy/qMz7ngI+VI0cqmHlytJttaW1Fa9j2bcvmTxE\n6kkrnwv8+MelsQUL6p+HNI4vfCF6fehQsE2KSDNTYQjNnl26bmHBAo0ttLrubjj++Ghs/XrNUJLm\npsIQ2rw5em0WLHYS+dznSmM3lWz8ItI8VBgI/vorHlvQDqoyYNUqmDgxGnvySbUapHmpMBA/P/27\n3y2NSeu65ZbotbtWwkvzavnCkM/D2qI9XS+4QHsiSVQuB4sWRXdf/cEP1GqQ5tTyhWHlyuiUxDFj\ngoVNIsW6u+HjHz923d8P11+fXD4itdLyheHpp6PXl16qPZGkvHfeiV5v3arV0NJ8WrowTJ8e/I89\nYMyYoLtApJy4s6G//vX65yFSSy1bGObOhRdfjMbe/361FmRwuRzMmhWN7dun8xqkubRsYXj88dJY\n3OldIsWWLSuNqTtJmklLFoZ8Ht59NxqbMEGrnGV4stnSVsPBgyoO0jxasjDErVp95JHSmEg5ca2G\npUvrn4dILbRcYejqgg0bjl2bwfLlGluQkclmSycqbN+uVoM0B/MU7jOdyWS8t7d3VO89/XTYv//Y\n9fveB6+8UqXEpOVMnhzdinvmTHjuueTyERmMmW0Jj1IeVFVaDGZ2sZk9b2Y7zayko8bMjjezH4Wv\nbzSz6QWv3RzGnzezmm5onM9HiwLABz5Qy58oze7UU6PXOq9BmkHFhcHMxgK3AZcAM4ErzGxm0W1X\nA6+7+9nArUB3+N6ZBEeBfhC4GFgWfr+aiBtb0CpnqcRXvxq9PnAAOjuTyUWkWqrRYugAdrr7Lnc/\nDNwHzCu6Zx6wInz+APApM7Mwfp+7H3L3XwE7w+9Xdfl8sCNmoXPO0diCVCaXC7ojC61erT2UpPry\n+WAzx3r8t1WNwnAm8FLB9Z4wFntPeEb0vwGnDfO9VRF3bGfxX3sio1F8yhvAddfVPQ1pYvk8XHgh\nfO1rwddaF4fUzEoys5yZ9ZpZb1/xUWujoB1UpVq6u6Gt6PT0Z55Rq0GqZ8kSOHw4+OP28OHab/le\njcKwF5hacD0ljMXeY2ZtwHuB14b5XgDcvcfdM+6eaW9vH3GSV10F48YF01PHjdPYglTXySeXxuLO\n+RAZqXwe1qwpjdVSNQrDZmCGmZ1lZuMIBpOLTjhgLbAwfP454GcezJNdC1wezlo6C5gBbKpCTiWy\nWXjiCfj2t4OvGluQaoprfa5dq1aDVC7uD4xXX63tz2wb+pbBufsRM7sBWAeMBX7o7s+Z2beAXndf\nC9wJ/K2Z7QQOEBQPwvvuB7YBR4AvuXt/pTmVk82qIEhtdHfD+vXR3XqPHg2a/PpvTipR+N/UgAUL\navszW26Bm0it5PNw/vnRg5/mz4cHH0wuJ0m3rq7SFsOMGfDCC6P7fnVd4CYiQcvg9tuDcz0GrFmj\nLblldHp6SouCGaxYEX9/NakwiFRRLhe0GgotWaI9lGTkvve90ti8efXpmlRhEKmy4uM/Ae68s/55\nSHrl87BtW2m8XidMqjCIVFnc8Z8nnFD/PCS94mYizZ9fv4kMKgwiVZbLBQsoC736qqauyvDErVsw\nq+959CoMIjWweHF0NfS2bcHYg4qDDCWutVCvsYUBKgwiNZDNwh//cTR29Kj2UJKhPf109LrerQVQ\nYRCpmauuKo398pf1z0PSo6ur9EyPG2+s/yJJFQaRGslmg8VIhQ4d0tRVKe/226PXJ58crKqvNxUG\nkRqKW4x03XUaa5BSXV3wxhvR2KRJyeSiwiBSQ9lsMM2w0NGj2nlVovL5+P8mbr65/rmACoNIzS1a\nFN0mA7TzqkTFHTs8a1ZyZ8aoMIjU2MAeSmbHYmo1SKHNm0tjy5bVP48BKgwidZDLBXPRC61Zo1aD\nBGMLb78djc2alex27SoMInUSNxf9+uvrn4c0lrhZakm2FkCFQaRuslk47rho7LnnkslFGkM+DwcP\nRmNnnJH84U4VFQYzm2hmj5rZjvDrqTH3zDKzvJk9Z2Y/N7P/XPDa3Wb2KzPbGj5mVZKPSKM77bTo\n9bvv6ryGVrZyZWnsm9+sfx7FKm0x3AQ85u4zgMfC62JvAVe5+weBi4GlZjah4PUb3X1W+Ig5xE6k\necT9T6/zGlpTPg8/+MGx64GtL5KaiVSo0sIwDxhYwrMCmF98g7u/4O47wuf/D9gPtFf4c0VSKW7n\nVYAf/7j+uUiybroJ+gtOuP/4x5NZ5Ryn0sJwuru/HD7fB5w+2M1m1gGMA/5vQfjbYRfTrWZ2fIX5\niDS8xYth7NhorF1/KrWUfB6efDIaizvgKSlDFgYz+6mZPRvziEy+c3cHfJDvMxn4W+CL7j5wXPrN\nwAeAjwITgbK9rWaWM7NeM+vt6+sb+jcTaVDZLPzJn0Rj99+vqaut5KabwIs+LeMOeErKkIXB3S9y\n99+NeTwEvBJ+4A988O+P+x5m9h7gfwNfc/enC773yx44BNwFdAySR4+7Z9w9064/ryTlrroqel7D\nkSPxA5HSfOJaC9OmNcbYwoBKu5LWAgvD5wuBh4pvMLNxwIPASnd/oOi1gaJiBOMTz1aYj0gqZLNw\n223HupTcg4FIDUI3vyeeKI392Z/VPY1BVVoYFgOfNrMdwEXhNWaWMbM7wnsuAy4AvhAzLXW1mf0C\n+AUwCfiLCvMRSY1cLuhSGtgqo78frr1WXUrN7sILgzPAzYI9tBplJlIh8+KOrhTIZDLe29ubdBoi\nFcvn4bzzov3NF1wA//APyeUktdPTE8xAmzULJkwIikQ9F7OZ2RZ3zwx1X9tQN4hI7WSz8N73Rle/\n6pS35tTVdWzjxPXrYfny5Fc4l6MtMUQS9uEPR69PPFHdSc0m7ryF730vmVyGQ4VBJGHF6xpefDFY\n7KTi0Dzizlto5F58FQaRhGWzwfTFadOOxfr7tfNqM9m1qzT21a/WP4/hUmEQaQDZbHR7BICtW9Vq\naAb5fOnZzXPmNN5MpEIqDCIN4sorS2Of/3z985Dqyefh/PODIg/BFNUFC2DdumTzGooKg0iD6O4u\n3UNp714tekuzhQuDY1wHuMMHP5hcPsOlwiDSQD71qdLY9derSymNenpgx45ozCxYu9DoVBhEGsi6\nddBRtGNYf7/2UUqjO+8sjV15ZeOuXSikwiDSYDZuDFbGFtq2LZlcZHTyedi0KRo75xxYtSqZfEZK\nhUGkAY0bF71upL36ZWjFi9kApk6tfx6jpcIg0oCK9+Z/4w0NQqfJCy+Uxv7oj+qfx2ipMIg0oFwu\n2Etn5szgevt2uOYa6OxMNi8ZWmdnadffggWNvW6hmAqDSIPK5eDkk6Ox1avVcmhknZ3Bv6NC8+en\nZ2xhgAqDSAN7//tLY428+Vory+dLiwIE5y2kjQqDSAOL+1DZtk3rGhpR3JTiWbPSMT21WEWFwcwm\nmtmjZrYj/Hpqmfv6C05vW1sQP8vMNprZTjP7UXgMqIiEstlgrKHYZZfVPxcZ3NNPl8aWLat/HtVQ\naYvhJuAxd58BPBZex3nb3WeFj0sL4t3Are5+NvA6cHX820VaVy4H7e3R2J49wcEv0hi6uo7thzRg\n/vx0thag8sIwD1gRPl8BzB/uG83MgE8CD4zm/SKt5ItfLI0tXVr/PKRU3CE8ZukcWxhQaWE43d1f\nDp/vA04vc98JZtZrZk+b2cCH/2nAQXc/El7vAc4s94PMLBd+j96+vr4K0xZJl+7u0kVvhw+r1dAI\nFi4sjd14Y3pbCzCMwmBmPzWzZ2Me8wrvc3cHyp1JNC08gPpKYKmZ/fZIE3X3HnfPuHumvbhdLdIC\n4g52ufvuuqchBbq6SjfKmzAhKORpNmRhcPeL3P13Yx4PAa+Y2WSA8Ov+Mt9jb/h1F/AEcC7wGjDB\nzNrC26YAeyv+jUSaVHd36QZ7+/drXUNS4rqQIF0L2cqptCtpLTDQkFoIPFR8g5mdambHh88nAecB\n28IWxuPA5wZ7v4gcs3EjnHFGNHbLLcnk0uriZoadfXb6WwtQeWFYDHzazHYAF4XXmFnGzO4I7zkH\n6DWzZwgKwWJ3H1gw3gX8qZntJBhziNmoVkQK/f7vR69371arod7y+WBmWLFm2R7dgj/c0yWTyXhv\nb2/SaYgkIp+H884LTgMb0NERtCakPqZMCU7XK4699FIy+QyXmW0Jx3sHpZXPIimTzQazXgpt3qwZ\nSvWSz5cWBYD7769/LrWiwiCSQt3dwQKqAe7BQKi6lGrvpphlvB0d6Z6eWkyFQSSlFi2CMUX/B8cd\nJynV09UFGzZEY83YjafCIJJS2Sycf3409qtfqdVQS8X/bCdMaL6iACoMIqm2eDG0tR277usLDvRR\ncai+nh44eDAamzAhmVxqTYVBJMWy2aBrY8qUaPwb30gknaaVz8P115fGb765/rnUgwqDSMpls5Ap\nmoD48sswfXoi6TSlJUugvz8aW7SoOVY5x1FhEGkCcTt5vvgizJ1b/1yaTU8PrFkTjc2f3xwrnMtR\nYRBpAtlscOB8scceq38uzSSfh2uvjcbSvqX2cKgwiDSJVatKB0P7+4MD6mV0Vq6MrjAHOOec5lqz\nEEeFQaSJPPJIaWz1ap0RXU1f+UrSGdSeCoNIE8lmgwPoizXL5m711NkJ99xzbBHhmDHNPeBcSIVB\npMnEHUD/k59obcNITJ4ctLR+/Ws4ejQotv/4j8094FxIhUGkyWSzsHw5jB17LLZnjxa+Ddfs2bBv\nXzT2r//a/OMKhVQYRJpQLgdPPqmFbyPV1QWbNpXGL7mk/rkkSYVBpEmVW/imtQ3xenrij+o8+eRg\nxlcrqagwmNlEM3vUzHaEX0+NuecPzGxrweMdM5sfvna3mf2q4LWYYTMRGa24+fbr1+vshjjFZ1wM\nWL++vnk0gkpbDDcBj7n7DOCx8DrC3R9391nuPgv4JPAWUPiP+saB1919a4X5iEiBcrOUlizRFNZC\nnZ3BQHOx5ctba2xhQKWFYR6wIny+Apg/yL0AnwP+3t3fqvDnisgwxc1SgvjD7FtRPh/MQCq2YEFr\nTE2NU2lhON3dXw6f7wNOH+L+y4F7i2LfNrOfm9mtZnZ8uTeaWc7Mes2st6+vr4KURVpLNhvfpbRn\nj8YbIH5coaOj9cYVCg1ZGMzsp2b2bMxjXuF97u6Al/k2mNlk4EPAuoLwzcAHgI8CE4GyPZ/u3uPu\nGXfPtLe3D5W2iBTo7oY5c0rj69e39hTWnh546KFo7CMfac7Dd0aibagb3P2icq+Z2StmNtndXw4/\n+PcP8q0uAx5093cLvvdAa+OQmd0F/Pdh5i0iI7RuXTBHv3g65pe+BB/6UOv1pefzcN110b2QxoyB\n229PLqdGUWlX0lpgYfh8IfDQIPdeQVE3UlhMMDMjGJ94tsJ8RGQQGzfC+PHR2JEj8LGPtdZgdE9P\nsHX20aPR+KWXtl6BjFNpYVgMfNrMdgAXhdeYWcbM7hi4ycymA1OBfyh6/2oz+wXwC2AS8BcV5iMi\nQ7jhhvh43AllzainJ1gFvr+of2NgLyQB8+I9ZVMgk8l4b29v0mmIpNbMmbB9ezR20knw5pvJ5FNP\nkyeXbnlhBt//fvPPQjKzLe6eGeo+rXwWaUHbtsG0adHYb34TfGg282B0XFGA1igKI6HCINKidu8u\nXfy2b1/zbrZ32mnxRaGV1yuUo8Ig0sKWLQu6UYpde21zbZvR1QUHDpTGP/KR1l6vUI4Kg0gLy2bh\nyitL4+7Bwq9mKA7lNscDTU0tR4VBpMWtWhW/+A3grrvqm0u1DcxAKnbiifDUU5qaWo4Kg4iwbl2w\nDUSxvr7gTIc0rnEoVxQmTIDPqs3qAAAHPElEQVS33lJRGIwKg4gAweK3BQuOnXE8YO/e9C2AK1cU\nQAPNw6HCICL/btWq8v3uF16YjtlKnZ3li0JHR+uc21wJFQYRicjl4ruVDh8OPnBnz65/TsM1d278\nFtoQ/E6tvjnecKkwiEiJjRtLF8AN2LSp8YpDPg/nnlv+tLUFC1QURkKFQURi7d4d33KAoDi0tzfG\nuENPTzAGsrXM+Y/Ll2utwkipMIhIWRs3Bh+sx8ccofXqq8EHcpJrHQYbZD7ttGBKqgabR06FQUQG\nlcvB44+Xf33JkqBw1LtAzJ1bviiMHQsPP6wpqaOlwiAiQ8pmg5ZDOYcPBwWis7O2eeTzQReWWfnx\nhEmT4MknVRQqocIgIsOSywVdMyefXP6e1avhlFOq33oYGFz+2MeCLqxyOjqCRXkqCpWpqDCY2efN\n7DkzO2pmZff4NrOLzex5M9tpZjcVxM8ys41h/EdmNq6SfESktrJZeOONYJbP2LHx97z5ZtB6GDsW\nPvGJygaoe3rgve8dfHAZ4Oyzg6KlmUfVUWmL4VngPwEbyt1gZmOB24BLgJnAFWY2M3y5G7jV3c8G\nXgeurjAfEamDVauCI0HLzVqC4NjMDRuCD/XjjgsekyYNvkiuszMYNJ4+HdragjGEX/+6/P1jxgRF\nascOtRKqqaLC4O7b3f35IW7rAHa6+y53PwzcB8wLz3n+JPBAeN8KgnOfRSQlBmYtnXHG4PcdORI8\nXnst+LA3O/Y48USYOjV4vnp1sD32iy9Cf//g33PixOAeTUWtvnqMMZwJvFRwvSeMnQYcdPcjRXER\nSZFcDl5+OTgvua1t5O9/5x3Ys2f497e1BT/rtddG/rNkeIYsDGb2UzN7NuYxrx4JFuSRM7NeM+vt\n6+ur548WkWHo7oZ33w228I47/KdSbW1Bt9G772q/o1obsr67+0UV/oy9wNSC6ylh7DVggpm1ha2G\ngXi5PHqAHoBMJuMV5iQiNbJuXfC1qwuWLg0+yMeMGbprqNjYscHj859Xd1G91aMraTMwI5yBNA64\nHFjr7g48DnwuvG8h8FAd8hGROujuhkOHgkHoI0eCsYiJE0tbEyecEJz50NYG48fDzJnBvUeOBO9X\nUag/Cz6fR/lmsz8E/gZoBw4CW919rpm9H7jD3T8T3vcZYCkwFvihu387jP8WwWD0ROBfgE53PzTU\nz81kMt7b2zvqvEVEWpGZbXH3sksL/v2+SgpDUlQYRERGbriFQSufRUQkQoVBREQiVBhERCRChUFE\nRCJUGEREJCKVs5LMrA94cZRvnwQMsnFvw0t7/pD+3yHt+UP6f4e05w/J/A7T3L19qJtSWRgqYWa9\nw5mu1ajSnj+k/3dIe/6Q/t8h7flDY/8O6koSEZEIFQYREYloxcIwyDEhqZD2/CH9v0Pa84f0/w5p\nzx8a+HdouTEGEREZXCu2GEREZBAtUxjM7GIze97MdprZTUnnM1Jm9kMz229mzyady2iY2VQze9zM\ntpnZc2b2laRzGikzO8HMNpnZM+Hv8M2kcxoNMxtrZv9iZj9JOpfRMLPdZvYLM9tqZqnbTdPMJpjZ\nA2b2SzPbbmYNd1p1S3QlmdlY4AXg0wRHiG4GrnD3bYkmNgJmdgHwJrDS3X836XxGyswmA5Pd/Z/N\n7BRgCzA/Zf8ODDjJ3d80s+OAfwS+4u5PJ5zaiJjZnwIZ4D3u/tmk8xkpM9sNZNw9lesYzGwF8KS7\n3xGeUTPe3Q8mnVehVmkxdAA73X2Xux8mOAOirkeTVsrdNwAHks5jtNz9ZXf/5/D5G8B2UnbGtwfe\nDC+PCx+p+svKzKYA/xG4I+lcWpGZvRe4ALgTwN0PN1pRgNYpDGcCLxVc7yFlH0rNxMymA+cCG5PN\nZOTCbpitwH7gUXdP2++wFFgEHE06kQo4sN7MtphZLulkRugsoA+4K+zOu8PMTko6qWKtUhikQZjZ\nycCPga+6+6+Tzmek3L3f3WcRnFHeYWap6dYzs88C+919S9K5VOh8d/894BLgS2E3a1q0Ab8H3O7u\n5wK/ARpuzLNVCsNeYGrB9ZQwJnUU9sv/GFjt7n+XdD6VCJv/jwMXJ53LCJwHXBr20d8HfNLMUnei\nsrvvDb/uBx4k6CpOiz3AnoKW5gMEhaKhtEph2AzMMLOzwsGey4G1CefUUsKB2zuB7e7+10nnMxpm\n1m5mE8LnJxJMZvhlslkNn7vf7O5T3H06wf8DP3P3zoTTGhEzOymcvEDYBTMHSM1MPXffB7xkZr8T\nhj4FNNwEjLakE6gHdz9iZjcA64CxwA/d/bmE0xoRM7sXuBCYZGZ7gK+7+53JZjUi5wH/BfhF2EcP\n8Gfu/kiCOY3UZGBFOMttDHC/u6dyymeKnQ48GPydQRtwj7v/n2RTGrH/CqwO/0jdBXwx4XxKtMR0\nVRERGb5W6UoSEZFhUmEQEZEIFQYREYlQYRARkQgVBhERiVBhEBGRCBUGERGJUGEQEZGI/w/w1xWP\nb+vxVQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iWOlC7W_FYvA", + "colab_type": "text" + }, + "source": [ + "## Add some noise\n", + "Since it was generated directly by the sine function, our data fits a nice, smooth curve.\n", + "\n", + "However, machine learning models are good at extracting underlying meaning from messy, real world data. To demonstrate this, we can add some noise to our data to approximate something more life-like.\n", + "\n", + "In the following cell, we'll add some random noise to each value, then draw a new graph:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "i0FJe3Y-Gkac", + "colab_type": "code", + "outputId": "60b19cdd-c69c-469e-9446-b738a79c1f51", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 269 + } + }, + "source": [ + "# Add a small random number to each y value\n", + "y_values += 0.1 * np.random.randn(*y_values.shape)\n", + "\n", + "# Plot our data\n", + "plt.plot(x_values, y_values, 'b.')\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJztnX+YVOV597/3mdkdeNNa0tGWKFIS\njUlsuMIKEqc2uqlEg41K3bfVxPddC8j6A4jEq1Jtk5S3MZIQo5ugIqvAyzaaNC0JQoJFMW6kYRoE\nwdKgxh9NEH9Usr7UpGGX3Znn/ePeu89zzpyzO7MzuzNz5v5c116zM/OcmTP74/vc5/5JxhgoiqIo\njYVX7RNQFEVRxh8Vf0VRlAZExV9RFKUBUfFXFEVpQFT8FUVRGhAVf0VRlAZExV9RFKUBUfFXFEVp\nQFT8FUVRGpBktU8gihNPPNFMmzat2qehKIpSV+zdu/cXxpiTRlpXs+I/bdo07Nmzp9qnoSiKUlcQ\n0c+LWaduH0VRlAZExV9RFKUBUfFXFEVpQFT8FUVRGhAVf0VRlAZExV9RFKUBUfFvQLJZYOVKvlUU\npTGp2Tx/ZWzo6gKWLAFyOSCVAh5/HMhkijs2mwV6eoDW1uKPURSlNlHxbyCyWWDxYmBwkO/397OY\nFyPk2SxwwQXA8eNAc3Npm4aiKLWHun0aiJ4eIJ+394nYig8S5hbq6WHhz+X4tqdnbM9VUZSxRS3/\nmCIumnQa6O1lkW9tBZJJFm8AMAY4cMBvwUe5hVpb2eIXyz9s01AUpX5Q8Y8h4qLp72dL3/OskC9Y\nAKxdy8KfzwM33MDH9PbyRhHlFspk+Hj1+StKPFDxrzOKCbqKi0ZcPPk83+/u5vtELP4AW/iyAXge\n3xcSCb+FL5uAoij1j4p/HVFM0DWbBQ4dYveOWPeex0K+YQNb9SL8ggi+MbxOjrn7bhV7RYkrKv51\nRFjQ1RVnd3NIJICODqClhV06hw4B99/vt+xdxDXU2WljBCr8ihJfVPxriJFcOlFBVznu0CG7OQDA\n1KnA9On8XEuLPTaR4CsAcQsRAbNmASefDDz4IHDkCLB7NzB3rm4EihJXyAR9ADXCrFmzTCMNcyk2\njz6YxZNOA8uW8XHi6snl+DU6O+1ziQRw8cX8GpMnAyecANx5J28ATU18OzAQfm6pFPDEE7oBKEo9\nQER7jTGzRlqnln+NMJJLR5DHZKOQIK1Y8YsWscWfTgObNgF9fXZD2LyZ1xDxRnHTTcCkSWzly3Nh\n9PcDq1YBs2fbqwCt9lWU+kbFv0YoJo8+zL1jDG8ARHxcezuvveACK/xBjGEr/847gXvuAb7//ZHP\nb+tW/mpuBpYutVcNbi2AbgiKUj+o+NcIYXn0rpgC/mBucug3J+4d1ze/ciWvG8mjl88D69ZFu3sA\n3lSIbByhvx+44w57pSG1AO75FeO20g1CUapLRcSfiNYD+ASAN40xHwx5ngB8DcDFAH4N4M+NMU9X\n4r3jhJtHH4wBXH21P5jrundc4ZdUz0TCpnqKgBvj3xCSSWDv3ujz8TwOBO/b528L4b4GEb9fd7ff\nbdXdXSjy2h9IUWqHSln+/xfA3QC6I56fC+C9Q18fBrBm6FYJIZsFVqywFbrSjsF1C7nuHXnMDfAm\nk8Cll7JLZ3CQNwOx4JNJYP584I03wn39slmkUsBZZwFu3F0CxLkcr/E8TiH1PP8GITUFrshHxTX0\nakBRxp+KiL8x5kkimjbMkssAdBtOLfoXIppERO8yxrxeifePE2GtGUTs29v9IrlypV3X388BXvfq\n4Ne/5ufkCuCSS/ixtjauAbj++vBzkDjC0qXA22+zyLvk83w8YGsH3PoBCUAbw+e1YgV/tbbaIjKp\nHtarAUWpDuPV1fMUAK849w8PPeaDiDqIaA8R7Tly5Mg4nVpt4bZm8Dxgzhy26MWvfuutVhyPHvW3\ncJgxgwU0keDbtjZ7P5kEtm1jcV22jEW3vZ2t+zDyeQ7qilXvbgC5HPD001w7kBzBfMjngUcfZYE/\ncMC+Ti7H97VbqKJUh5oK+BpjugB0AZznX+XTqQrBrJ+2Nr8rZ/58Fu0DBzjwKngep20+/rjt4TN9\nug0iuxW+fX02dfPGG60A/+u/2s3E82yDN2OAyy7jbB+x8HfvBp55hgvBhksTFfr6+MpkYMCmni5e\nzNlGwSwndQMpytgzXuL/KoBTnftThh5TAgSzfnp6rGsnl+OOnOvX+3P7AX8Tto0bWUw3buTXuvVW\nFtQNG2x66ObNhaItcYLJk4Ff/pKrfQF+nzPO4CDzfffZ9ceP89qJE/kc3YByWKbRjBnAjh32fj7P\nwWr38wLqBlKU8WC83D5bALQTcw6A/1R/fzSZjHXvpNOFmTYDA4U9ev7sz1hA3awb8bdns/xa8+cX\n+u9dcjm+Glizhls8uPT0cIDYxRv66+nsBG67jTeGiRN5I0ok/GtPPx342tf8nyWVstb9rbfyYxLo\nVjeQoowtlUr1/CaAVgAnEtFhAH8DoAkAjDH3AdgGTvN8EZzqOb8S79sI9Pb6WzAT2Wwb2QCIgL//\ne9uV0+3ouWMH8MMfsvC3tAATJgDHjoW/lzG82QBspT/6qH1u717gqaf86+VKZMIEDg5v3gycfTZw\n5pnsmtq8GXjoIeD114EXXvAf63m8aUi2T3c3X9FIzyEJdOvQGEUZGyqV7fPJEZ43ABZX4r0aBfF7\nHz3qd6F85CPAVVcBjzzCPngRfDfDZtZQV489e2wmkIh0Zyfn7a9bx0JL5Pfz9/by92+/7T+fqG6g\nxtgYgrBrF4v/l7/McYjPfjb82N5em+3jViNLoHvFCk0FVZSxoqYCvoq1giVP3gs45n70I14zOMgW\n/sKFbNEvW2ZjA3v28NVBMmkDrCLS+/bZGgHAf6znseWfzbIVHobnFRaLBRkc5M8gbquwtYkEP9fd\n7Rd+qS9whV9jAIoyBhhjavJr5syZptHYtcuYiRONIRJ5Ncbz+MtKrv9r9mw+btcuYy680K5NJIy5\n7jr+SiTs+mTSmFSKH5s4kY9bu9aYpiY+trmZXzPqPRMJY5Yv59ch4turripcd9119vMEnyPi15f3\nlMebmuxxwu232/NPJPi+oijRANhjitDY8Qr4KkUgKZeuFZxIsAskit27gY9+lC3otja2miXPv72d\nLXvX8pZAqqR8dnez+yWft9XEu3fbthBBjGFXzpNPAl/8It9+4xvA8uV2TTJpC9KkOlmQKwdpIe0G\ngFta+DjXspfUV/lMGgNQlMqg4l9DuELX1GRz7d30yDDEp79sGfv0v/AFdo8AnEvvCqwbPDaGff/p\nNL+vK/aeB3zsY/y68+bxOcm0r3S60Ac/bx4/JxtW8PM0N/Oa4bKN9uxhF082ax+T1Ff5TOryUZTK\noD7/KuMOZ9m3D7joIs6dB4CurpH964IxbGVLk7fubq7CdQO1nsd5/A8/bF9zcNDm2ruxhuZm63fv\n6PCfpxSdBfv2yHzgwUG+f+uthTULDz/sP2+36ZxceQRnGejgeEWpPCr+Y8xwmSrBPj5CUxMPT5c+\nOGGIBS2tF4xhMU6n+b1cd4s0YLvkEq7I3bbN3yxOzi2TKewfJMjz0i5a3EerVnG/IGktEZxHEBTu\n5mb+vAB/vkWLbNB5uFkGiqJUFhX/MSSYqRLsu+/28XEZGOCrgLvvtm6bRKKw734yyWtkTq9Y1+46\nIs6937+fU0O3bwdWr+bXBwp97CNZ2W77Cc+zVcKPPsp+/0mThk/JnDsX2LKFN6tk0g6Y18HxijK+\nqPiPIW7Tsv5+4IYbbEtkEWuxhMMs/OnTgWuu4e9bWoBPf9pazeJe2beP3TIimAcOFPbsP+ssLtIS\na723l6t4R4PbfuIb3wAOHrTPPfQQ8O1v8/crV/pnDQCF+fwDA8CSJXxensd9fnQAjKKMDyr+Y4hr\nJQPW/378ODdKO/dcroz96lf9xzU1sah+5CMslKlUYdYOwPfXr/db7729trc+EdcBtLfbfj+VcKvI\ne/3d3/kfP3wYOP98mzkkraFTKTuMxs1k8jx7lZLP2xbT0i4a4LiHXP24IyMVRSkPFf8xxLWSg0PS\njxyJ7oaZy3EKpSC9+sOqbHM5f4C0tZVFUoS+pYWfl8reKEqxrkWQw84n6JoKDqORK5f3vx+YMsXf\nQiK4AWSzfGUg3UWPHbMZTboBKEqZFFMMUI2vuBV57drFxVVRxVrDfTU1cSFWMllYLCWFWsH3uv12\nPmbiRC6OSqW4gMst7nLXy7qw13NZu9ZfmEXERWGplL84zS1Sc4vJ3IKzqC+3QC2s2Ky5efhzVJRG\nBlrkVXvMnQtMm1baMZ7HQd2ODvbdu5x2WrgbRLpk9vb6M3OkG2iwW2axA1Wy2cK6gUSCLfEnngCu\nvdbWC3ge9yC67TZ7jlJMNhLSMG7DBo5ZBGsDBga026eilIu6fcYI140CcBWuuDw8j90ezc2chSNM\nngyccw5nw4hIXnopB35XruTX2r3brr/5Zr/wB103bsxBOn3mcoV+/+AAmWBMQF730CG/eAeDtJkM\nu5kkiPud7/Bm4bqkPC+6SVywAG1wkFNBg7OG3dkFiqKMDhX/EinGNx5M8bzoIiv8gBXQadP84v+L\nXxTm4W/ZwkPY83l+reXL+RiZwztS8VVwUEpUDr+7LrihyGdJJvlLGs7dc48/OAsUtopw4xGZDHDv\nvZz1FLYBGGMrieXzSt2BuyFec436/BWlXFT8SyCswyRQKJrBFM+tWwtf6+BBf5okwOt7e4EFC9jt\nIVWvInrHj3Me/fbthecjFnVQdIN5+1GiGZXf734WgC3xqVNtGqcMihFGuoro6OArmYULgWefLXw/\nY/i5qVPtsYcOcQaUVB67XUkVRRkdKv4lEPSNd3fbFEp3vq4rgG6//JFw3Rkyb1cgKpxxe+iQPR9J\nq3TXVYKgmIvwRrVZHu4qArAtq198Mfz9PM+mrgY3t5kzeWNQq19RykfFvwSCQghY8ZUgpczNFQE8\nehS4666Re/QkEtZ/ns36g5xNTTZfH7CCKFO7gPAK4koQJubBFg/F9uIJG9wiwWFx+dx9d/gVVC7H\nk8QOHOArB90AFKU8VPxLIMyHvnGjFTNpriZNzQAWO6lglcKnILNns3ADLKyHDvnFceFCW5HrCi9g\n3TBjWf0aFPORXDthZLN2Pq/72WS6WNimlU77f27uz1fFX1HKQ8W/RIJCKN0w168vzKRxp1SJdRtE\nLHbAH1iVtshBH3eYG2a8hXAk106QYAM7z+PPuGBB9PlnsxzAlo3TDQIfPcpB9GDQW9s/KErxqPiX\nSVBsXH/1+vVW8IN+f2mvPHeu9d+LOBrDohZm0ZcqvGNFKW2W3QZ2RDxj2K3S7eriCmYR8+AxiYQN\nAh89aucFP/oo8OCDwI9/bIPB2v5BUYpDxb9MghlAYqVLf/swiIC/+AsebiLHuoHhfJ7z5YNplEK9\n9bdvbWVLXwLT+/ez715iIq6YA/y502kbD3CvcC66yP/abhsMdQkpSvGo+I8C183gunbc6tjdu4cP\n8K5eDbz9tvXfuwFeIraE4xLYzGQ4E0rSVwcHufgrLAi+bh1/7mXLrNXvXiW0tfn7AQmyUaTTY/95\nFCUOqPiXSLDoyQ3iJhLhw1SEU04BXn+9sNmZZO4Q2bm2O3YAO3fGx43hdhZ1axKCrRuefpo3VNdN\n1Ntrn5eroXXruFGdmw6by3GX1H37qhMLUZR6Qnv7lEgw11+6WBJxALO3t7CzJcAi//nPFw5Yl/m0\nPT3cH2fOHBvcHK7PTr3hzuK9+277c5gwATjvPLtONtLhhrZ3dHAM4OST/YVw8jNbu7ZwFrCiKH7U\n8i+RqAEsTU3W39/UZC17CewuX84C6E7dksCwkMlwOqRM44pbDxs3VuH+HIDCuEnUOEmAA8TXXhv9\nPpoSqigjo+JfImLBLlvmb7J2+ukcxOzt5efuuMNao9u2sfjL8SJIUe0ixBUSdInEiaiU2SASGHZ7\nE4XNQSACPvQhO8lMZwEryvCo+I+CTIbbK7vif/AgW6MSeHSvCqQFcdAKjWqlPDhoA6ONZL1KTGDD\nBvv53boAIttULohkEREVBokVRSlEff6jpL3dtnhwkbbJrkAlk5zHH/RBiwvJ9W2HPdYIhMVS3NTX\ngQF2tUmrh6i5CJJB5AaJFUUpRC3/UZLJsGBJde/AgD9t8f3vB844g7/fto0btUnfn5GaoNVCEdd4\nEzZ7wLX83SupfJ67m4YhdQGNsmkqymhR8S8D8Vu3t/MmsG6dddk89xzw7//Og8vFWi22CVq9FXFV\ngqjZA+k01zw89ph/cw276rrwQr5ta2u8n5+ilIqK/xCl9IcJrnU3gRUrOEc/n2c3xdNP+/v0qEUa\nTdTsAckMkgyqpiZO8wxeETz6KD+2c6e/QE7aSAOa/68o/00xg36r8TWeA9xLHWDe1MSDxVMpY+bN\n40Hjcoy8lgwzJ+J17hqldHbt4p/hvHl2EH3UMPhEggfYy3GplA5/VxoHjOcAdyL6OBE9T0QvEtEt\nIc//OREdIaL9Q1/XVOJ9K0WpA8wlGNnfz2mH993HM3plqpV06QRYcgYGuCmZWpyjJ5PhttazZ1s3\nWtgoSPH5p9Pc/lqqhQUd/q4oTNluHyJKALgHwMcAHAbwFBFtMcYEhhTi740xS8p9v7FguP70rssA\niJ7K1d/P63p6Cvv6EKm7p1IEe/wDfH/OHPb19/bymk9/2j93WKqum5r0d6EoQGV8/rMBvGiMeRkA\niOhbAC4DEBT/miUq6yab9ffpkfRNovCmbevWhW8Ol1yiVn8lyGa5d8/AgM3nB3jDXrHC/oyvv543\nY4DXzpvH37/2mo6BVBShEuJ/CoBXnPuHAXw4ZF0bEZ0H4KcAPmOMeSW4gIg6AHQAwNSpUytwasUT\nlmEjbRYEEfaowSyy1vNYmGT4iFT3KuXhunCkp89ll9mZCFJhfTBgdrz1Fo+AlAD8Sy9xqmgjpdIq\nSpDxyvbZCuCbxph+IroWwEYAfxRcZIzpAtAFALNmzRqmIfL40Nrq79MjDDeQnYiblslownTa+phV\naEpjpAwsY4CtW4FHHuHfkTG2wtqlr8/2YsrneX6A5/HvKS5dUxWlVCoR8H0VwKnO/SlDj/03xphe\nY8zQhTgeADCzAu875kgh1wc+UPwxnsfC39HBorVsGfC5z2mXyVKRvkfuz6693bp6BOnkKVdickUg\nLbKbm9nVE9wQ4tY1VVFKpRLi/xSA9xLRu4moGcCVALa4C4joXc7dSwE8W4H3HRcyGfblp1J8P5Gw\nxUTCeef5m7BJa4Fis4iUQsJ+dpkMD6x38bzCBnhEwL33Al/8Ih/X0QHcdJN/rVYCK41O2W4fY8wg\nES0BsB1AAsB6Y8xPiOhvwfmmWwB8moguBTAI4C0Af17u+1aaKBeDPP71r/OQkDfeAL73Pft8IgFc\ndRX7lIPZQsNlESnDE/Wzk6Ew/f0s5Jdcwj59d5yjMXagC8AB4PXr+ftEArjiCuDIEWDGDL9LTgfB\nK40EmeFmDVaRWbNmmT179ozLe4W1VhYxcKd2Sc5+MI3zi19kwQj26Zf2BL29KiijYbgNWXoq5XI2\nldONxYjLJ/g7k2C8TBIT339nJ7vogn8DilJvENFeY8yskdZpewewwEhA8Ngx4JprgAce8LseRFiC\ne6VYpSP16VchKZ2oHkcSi3ELvf7wDwutfzcW4D4ezODq62PXXpibSVHiirZ0BlvnrtV48CBw/vn8\nuNteuamJv0+lOHf8uut49GKxffqVyhFsff3bv124JuyiNuqxffv4tRqtlbbSuKjlj/De7wMD/Hhn\nJ3eVbGsrHMEYhfr6x55gYV5wCtiUKcDhw9HHn3468MEPAg8/bDOEFi3iNhzqolMaARV/sIUfrNpt\nauLHly1jl9Djj3NwUWbxDkdUxbBSWVy30IEDtrCuqYlTRJcu9Vdnu1d3N9/Mm/n27fz79TygpYUz\ngxSlEWg48Q8GEbu6gBtu8Au/5wF3382Wf1+ffW7zZi4oCnP1BGnEnvzVIpvlTdoYDv6uXs0i/tJL\nwFe+Yh9ftoxHPba1WZHv7ORmfbkcPz99Oj+uG7cSdxpK/Lu6gCVL+B9dMjzkvov4gN94o9BH3N/P\nIqEzYmsHibHk83wF19vLG8Kdd9rf3+Agt3TYvt1/bG+vdfscP87uI5klrMF6Jc40TMA3rB3zpk0s\nCkGSSc7+2Lw5/LV277YtnJXqEzb3uKfH7+ZJJMJjL8FjAQ3WK41Bw1j+3d1+oU8kuMjn0UftY1Om\n8O2JJ7J7wCUYE9B0wNohKsaSSll//t1382MrV/rHRLa2Fo6PdC1/DdYrcaUhxD+btRWeAAu/+PQl\nEEhks0PCskSi8vuV2iBsBGRQ1KX2wp0H0NQEzJ/vH++owXqlEWgIt48UBAEs8osW2cZrqVRh068o\nPI8nSUXl9yu1QzCw393Nwftcjl1/UrjX3w+sXetvvJfJALfeyt+vXKnuPSWeNITlH8y7P+EE4KKL\n2O1z9tk88HukLhduGwAV/dpGKqzF5XPFFcC3vhX9O5ZqYJnE1trKqaNucoAGfpW40RDi77oAjh7l\nfu6A398fBREPDJk9W90A9YLbriOfBx58MHotkZ0KtmEDx4WSSb6Vq8X+fo3vKPGjIcQfsD7hiy4q\n/hgiYMKE4gq7lNqhtbWwqMtF2jobYwfAnHYa8PzzfEww9TcqU0hR6pmG8Pm7zJhR3LpkErj2Wr3c\nr0cyGeCeeziYG4zneB7/bl3yeeC558I3C0kO0L8BJW40jOUvTJpk0zaJeErXb/wG+3zd9M5PfAJY\ns6Z656mUR0eH7cUkbbXdW7f1AxAdD5DkAO31r8SNWPfzD/uHddstJxL8Tz84GJ7KqX7e+JLNcuxn\nyxZr8Tc12b8Huf/DH/L32qJbqReK7ecfW7dP2AxY4aKLgDPOAE4+uXA4i5DLaXVnXBGj4K23rPAT\n8axfGfcI2Ftt0a3Ekdi6faL+YVtb/Zf7Yeh81/giRoHbsA/gOMAJJ9hGcAAbBnLl6KYKp9O2Uliv\nAJR6JZbin80Chw7ZwJ7ncZ+e3bv9U5xckkme4NXSomMX44wYBcGrvdNPB7761cIRnek0H9PZaeMF\nOu5RiQOxE/+gT//cc3m83+7d0cfMnq3FW42CWPFSByCcdBJn/Lice26h0IddUerfjVKPxM7n7/5z\nDg4C//ZvIx+zcGHhP3A2q6X9cUQK/m67DbjqKr4qJAJ+/GN/CmhTE3DmmYVCH9ZBVFHqkdhZ/kHL\n7q23Rj4mOMZRB7DHGyn4W7mShT+fZ0Nh0SK7pr2dbzdutG0i0mmd0qbEh9hZ/vLPOWdOeIHPvHn+\nx8OsN/fqoa+vcD6sEg+CVnx7O9d2rFljRf3ss23657JlPBBIhV+JA7HN889meeBKf799LJXibpyA\nFXS3la97rJsVJMfpP3v8iCreCvv7AdgdlM/rFaEydpRbUFhsnn/s3D5CJsN92teutdW88+fbH+Zw\nP9RMBliwwB47OKiBvbgSNWtZrv6CSEGgBnuVsWA8Xc6xc/u4tLTY1D1jOI+7WNrbuambBvYaE2kO\nFySZ1L8JZexw506MdUFhrMW/t9d2cASAu+4qPntHYgdf+IJe3seV4TK6Mhng3nsL40Of+QxbZkuX\n8j+mZoMplUImDorBmkyOrYERW7cPwD+4RML2asnlgBUr+KsYMY9yCSj1T/DyWoq4XD+rNIeT+FBL\nCwd9+/p4FgQRxwAWLAiPHSlKKQQnDrpu6rEg1pa/tPZNJPh+Pg/s2FHY60dpPNyMrv5+ntr1uc9x\nkPf66/0jHSUDqLeX17quxOPHC8dAKspoEGNVjApJNx4rYi3+AFtu7qV7Pq/NuRR/mqfn8SYgG0GY\nmEvLENeNKLgBYEUpB/n7Cvs7qzSxdPu4qVLd3f5+PkTanEvxF2tJvx5p9hbM5nFdRMF/ymSS12sA\nWCmXnh6bTTYeGYaxE/+gL/fss/3Pv//9/I++dClvCk1NmrLXqLgxHfHtr1/PVwAi5tksx4iCvYAA\nvmL4xCeAX/8aaGvTvyGlPILdY8famKiI+BPRxwF8DUACwAPGmC8Fnk8B6AYwE0AvgCuMMT+rxHsH\ncYd39/UBL7/sf/7884F9+2wO9/Hj/E+v/7iNjWwE7e32qvHAAWDxYt4MomohH3mErbSdO3kDOXAA\n2LSJx4VOmqRXlsrIdHXx30xbG1+NjldHgbLFn4gSAO4B8DEAhwE8RURbjDEHnWULAfw/Y8zpRHQl\ngC8DuKLc9w4jnbYWmjHA4cP+51taWPwVxcV1Fd56K99fssRmioWRz/PVo8SRVq3i1uGAZgMpxdHV\nxbPCAf6bWb6c+0kdP863tV7kNRvAi8aYl40xxwF8C8BlgTWXAdg49P0/AriAaGxCGsHcfhfP4+fb\n27llAxHfjnVUXaltwqa+dXcPL/xCImGLvl57zf+cZgMpI7Fpk//+hg3AsWPjU+RVCbfPKQBece4f\nBvDhqDXGmEEi+k8AaQC/cBcRUQeADgCYOnXqqE4mmNtvX5uFXi7Dn3hCG3QpTLBHv/j+g64ezwNO\nPZWzfozhYO9nPmPdOwcOhM+N0HYQShjZLLumXY4csd97XgMVeRljugB0AdzYbTSvIbn911/vn8/6\nsY/5i7u0gEsRgoE2wBbbAPz3I8bDN7/Jrp077uA1q1fbS3P5exKf/9tvsyU3OKjZQIqfYPPIMFpa\naj/b51UApzr3pww9FrbmMBElAfwWOPA7JnR08O2SJfwPmkoVX9WrNB7BHv2A7eMvBkQiwVXAAHDn\nnfbx/n5/Smhvr/9vzQ0g699f4xHVobOnJ3qkrLBw4RieGCoj/k8BeC8RvRss8lcC+FRgzRYAVwPI\nAvifAH5gxriXtJTm6z+eUgzBK8HHH2cR37GDhT6fZ2Hv6fGnfBKxG6irK3y2r15hNi7DdehsbeVk\nANfyb27mv6H9+znzR4zYsaJs8R/y4S8BsB2c6rneGPMTIvpbAHuMMVsArAPwd0T0IoC3wBvEmKP/\neMpoyWRY/HfuLMy7TqVsn39jWPg9z24S6t9XgOHnPWcy7DJct467B5955vhnhFXE52+M2QZgW+Cx\nzzvf9wH400q8l6KMF1EjGyUV6Q70AAAfCUlEQVQX+/77bWwgn+cNwPPUv68wwVhSOs2xSID9+W6h\n6Ze+ZF2H4+WtqKmAr6LUGu7Vo/uPOXVqYTaQZABJbCCsfch4/nMr1SXYQmTpUuvmkStFwNaITJ7s\nrzAf61byKv6KUgRB/+3SpfwP7Hb4NIb/cfftC/f/j+eUJqU2EONh5Up/gDfYKmTLFv9j4+E6VPFX\nlCIItoC+6y7r6hHhB/ixgwft2r4+tupmz+bAcJQPWIk3YQFegahwMxgP12HsWzorSiVwW0ATce6+\nBHiD/7g/+pFtI24M1wV89rN8Sa9jIOPLSJPhenqAefMKOxCceGLh+qVLx94wUPFXlCIQ/+2iRXzf\ndfcEMQY4/XT/Y/k8W/zz5+to0DgS1iIkSCbDV4BBLryw8LH9+yt/jkFi6fbRoJoyFmQynOXjVv8G\nIWKr//nn/Y9LFpA2eIsPrs4Ml9bprk2n/e1nPA/4zd/0B4ABzvMfa2In/hpUU6rJyScDr7/uby1y\n2WVs8aXTvHl0d+smUO+EzYCO6sUfXHvFFcBDD/EVoucBTz/tf+0zz+QC1bEmduI/0g6sKOXQ3s6+\n+4EBO/7R5bXX/K6gZJLb9AL+Xi4bNnBzQf3brE+COtPbG14TElzb38/9oeRvZHAQeOopvk/Et889\nx5uFpnqWyHhPw1EaCwncueMf3WpfV/gTCeCP/5i/D/ZyUcOkvgnTmaiOAu5aYwoTBOQK4D3v4eFT\n41UlHruArwTmNKimjBWZDA986ejgv7HbbgPWrOEyfcnkmTePrf6tW9mKS6c51U9Qw6S+KUVnZO0l\nlxQKP8DCn0oBN9/Mt+OVDRY7yx/Qnj7K+OH+rb30EvCd7wCXX849/rdutbn++/axJScj+tTnX/+U\nqjPf+17hYzNmAL/zO7aR23g2o4yl+CvKeNPVxcVcAN8uX86Wv8z/Xb+eBX/NGnuMZqU1DsFusABb\n+M8+y0OAZAb0eBqusXP7KEo1CI7j27+fc/qloGdw0D+Sr5i8cKV+CRZ8tbayMSB4HruBBgZsIHgs\nRzaGoZa/olSAtjYewO3eB/xtH9Jp+7xmpcWXsDTQ3l7g4ouBhx+2mT2AvRoI/n2MByr+ilIBZPDG\nunWc6y++Wyne8TwWALfYR7PS4kkwtVPaOCeT/LuWsZ6TJxf+fYwnKv6KUiGmT2f/7d69wPbtbPGl\nUv5+7mIRJpPA3LksABr8jRfptG345/r5BwfZSJg6ldc88oitCE+lxt8AUPFXlAoRVfgjGT779tnn\nczl2AUyYwOKv1D/ZrH/IT7CBWz4PnHACd3f9m7+xdR8yH3q8DQAVf0WpEBLUy+f5Np3mzJ+tW+2g\nF3leCsLcAfBKfSKiv369v2WzW7Ur3HUXXwG4j8l86PFGxV9RKogb4F282DbwAvj7WbPY2n/ySbtu\nvAN9SuWQ4G5fX3iH16lTueVHLmfbgQTXNTVVJ+aj4q8oFaKnx/5zu60chHwe2LOHRUAsQiJ2Byn1\nibj6RNCDlv5f/ZUN/h89ypY/wIJf7ZiPir+iVAjp4dLfX1jQ46b2BSeArVunQd9ao9gCvKCrb+FC\n9uvv32+rdoULLrBXAF//uv+5aqDirygVQnq4rFgB7NhhN4AzzwRuvNE/wNu1DgcGbFBY2z9Un1Lb\nwrtWf9TvTa4Q8nleVw0ffxCt8FWUCpLJsPi7TdxefJEv/RcssFcAQb/vk08C558P3Hcff7W2atVv\ntQgrwBturbj6crnwtdksZ/jU2ghPFX9FqTCZjL+1g4hCezsHe72Q/7pnn/XHCQYGxr/cX2Hcec0i\n1FHzecPWushVxP338waxaFHtdBtWt4+ijAHt7cDGjYX93sUt9Nhjfuu/VjJAFPt7Ep8/EO0GCq4N\nirp7FQFw9k8tCD+g4q8oY0KUKIhb6Ac/8KeBErGwVDsDRGHc7prXXw8cO8bfHzvGcZlMxt+qA+Dq\nbrkvGVwtLbXbxkPFX1HGiKj2vJkMcNNNwB138P1kkuMBKvi1RzYLPPCA/7F161jUZYqbm9kVTPVs\nbuZ1kv1TS79f9fkryjiTzXI5v2R+rF5t+/yH+ZWj/M3K2CMBXZfBQd4A+vrCRzK6HD/Ouf2PP86b\nQC39DtXyV5RxprvbpnzmciwkgK0ITiY5+0dcCxdcwBam5wH33FP9/PBGorXVVuYKngc8/XR4RW8Q\nIj52vObyloKKv6JUmd27ufJXrMjBQeCWW4CPf5xTBMW1kM8DS5bYiU9KdZg8GXj9dXvfdfUE3T6f\n+hSP9lSfv6IoaG9na99N7Qy6D3bu5C8i/3OSNqriPz709BRa+K++6r/v1m64az0P+P3f5yu6WhzX\nqeKvKONMJsNtAO67zz4mQz0EEZGgmEjfd53/Oz60tvLPvL+f7wc3aSHMNSS/q/Gcy1sKZQV8iei3\niegxInph6PadEetyRLR/6GtLOe+pKHGgvR2YOJFFIpnkgO/atcCUKeHriYA5czhwCOj83/FCUnZv\nu41/R2EFevk8u+IEz7O/q1oUfaHcbJ9bADxujHkvgMeH7odxzBgzY+jr0jLfU1HqHhGVSy8FzjqL\nH5s+HXjzzfD1nmdTBbu7OdOkmPYDSvlkMmzB9/YCn/xk+JpnnrHfex7XctSy8APlu30uA9A69P1G\nAD0A/rLM11SUhuDAAWDzZv5+927gvPMK0woFYzhV8KWXbKsAgK8aaimIGEe6ujjQnstx5XXQRQcM\nX61dq5Rr+f+uMUbi3m8A+N2IdROIaA8R/QsRzYt6MSLqGFq358iRI2WemqLUNps2+e9LgDeMfJ79\nznfc4d8g5s+vfQuzXgirp+jq4grfgQGbrulm9pxySuHvzJj6uBob0fInoh0AJoc89dfuHWOMIaKo\nPe/3jDGvEtF7APyAiA4YY14KLjLGdAHoAoBZs2bVyf6pKKOjrQ149FF73xjgne8EwuwezysMKiYS\nXGm6cqUGfsslrI0zwBZ/sII3meTfQ3Mz8PnP8xWZTPKq1jD20TCi+Btj5kQ9R0T/QUTvMsa8TkTv\nAhDqsTTGvDp0+zIR9QBoAVAg/orSSHR0sBvnjjuswIQJvwR729qAT3/aZp7kcsANN/D3UX3n454V\nVKnPF2zj3N0NvPyyv/8SwOK+ejX7/9Npvu3s9N+vm5+1MWbUXwC+AuCWoe9vAbAqZM07AaSGvj8R\nwAsAzhzptWfOnGkUpRHYtcuYCy80xvMkU9z/lUrxGmOMue668DWJhDG33174us3NxhDx7a5d/HX7\n7fb16pldu4yZOJE/+8SJ5X0m97VSKWOSyfCfM2DM7NnGrF1bufeuNAD2mCL0u9yA75cAfJuIFgL4\nOYA/AwAimgXgOmPMNQA+AGAtEeXBMYYvGWMOlvm+ihIbpNPnzp1sdSYSwDnn8FXASSfxJDDpGNnS\nUlhFCoRXj7ptJI4fB1atArZvL35CVa0TNnRltJ8nk2ELft064D/+A/j5z+1z06YBP/uZvb97N7B3\nL/8OarFtQ7GUJf7GmF4AF4Q8vgfANUPf7wIwPbhGURSL2wJaBn0PDvKQl507/f7kD32Iu0QK06YB\nDz00svi89lrlxLIWkEEqo2mdEHQXZbN+l5rLxImFG24+z5u0tOKuBx9/EK3wVZQaQYT4vPP8vmYR\nHbEyzzmHrwQk+Pvaa+Gv194ObNhgxXHhQj6uFvvMjIaRBqlE4QZ3k0nOmALsVVKQF14Ib9X89a/X\nmY8/gIq/otQQPT2FOeSSV+55LDrt7fz42rX+2bEiQK5V+8QTfnGcPj1eAeDRtE5w3UW5HP8cm5p4\nI5B+S55nvfyyEScSwLnnshsuDrMXVPwVpYZwe8l4Hg99mTQpPJMkOCYS4Lz0xYt5s0il2DJubbV5\n57XaZ6bSDJcFJO4iSc+UDXTRIrumpaXQDWQMd1q99dZx+ADjgIq/otQQxboywtZls5yXLpZqfz8H\nfd1Not6DvMUQlrPvfmb52XV3A+vX25z9lhb/Brtvn7/5HhG32M5m4/EzVPFXlBqjVOtcMoEOHSrs\nLAnEK8g7HGLtuzMQ+vs5kyqs187UqXbE4owZ/L27YbS0FL7H/ffzZhqHTVTFX1HqEHfCl8QDkkn2\nS0tm0D33sI/ftfzT6XhWBLvWPmDjJvk88NhjnDElgh382REBO3ZYF5BsGO95j7+Pj2yscdlEVfwV\npQ5wfdgAi5M7PDyf52Cl9JlJJOzEr85O7iMUZt3Wu4AJbhA3iDF+wZauqO7MBLdfTz7Pm0FYPYUE\n3es9UwpQ8VeUmieYmigZKGK1homYZAABLPj9/X7rNi7WqxAM4rp4Hm+Ghw5xQHz9+vDOm0TAaadx\nW4ewoS3Sp78e2jUXQ7ldPRVFGUOyWWvli99eOkwSRXcBleCkWLkyA1hcQnGxXgUJ4l57LWc5eR6n\nby5fzj2UiNhf7wbEAf/Pzxjg8sv5+DBSqfgIP6CWv6LULGG+6WB3z6je8fk8568HXRdE7Mu++eb4\niJgggfL2dn8W1MqVLPi5nN38ZOMMuonefhu4+mrg4EHgySft4/Pm8UYSp5+Zir+i1Cjix5aALmDd\nNuKbFjFLJOx9sfJlvYsx7NZYtszGBOKGfCZxe4lLKKx2ws3lTya5InpwkNcvX86ZQG1ttjjOff16\nR8VfUWoUt3eNWPsi8IDdBN73PuD88zk1cdMm/4yAMEbTjKyeWkOH5fl3dtppXKtX22D39OnsGhPu\nv9+61yZN4kZ4I9UN1Csq/opSo7iFXOm0zdRJJGxrAmO4+dvzz7NPeunSaPFvarKujjCff5TAjyR+\ntbYxuJk/btqmXBH19bHgi5tIzrmry7rW3J9PJbuH1hIq/opSw7jiJK6HdNoOcRFca95l9mxu6CaV\nq0DpAj+c+NWSVSybUDpt3TyS5+85qS3GsHvH7c+TzfLmKt06Ozvtc+V0D61lVPwVpU6QjWDlyuj8\n85NP9j/+6qt86/ajkUInt9hrOIEP+szTafta5VrFlbpqCG5CS5dym+vDh23vHgnySqrrqlW8OUrv\nI4mvEPFm6f68RtM9tNZR8VeUOsNt/pZIAJ/5DGepvPEGPy8zZo1h8b/2Wn68o4Nvw6z14axbKRRb\nvJhf1w0Wl9tTvxR30nAbhbsJ9fX5R2O6SOzEGGDzZmDLFv5ZdnYO/zmCQeQ4bAAq/opSZwQtUQD4\n6Edt1ornAe94B/CrX9ljNm2y4t/T4+99I69z9dX8fFi74t7e8MlVpVrFroAX605KJICLLwa2bbPx\niuBG0dpqYyFusZtABEyYAJx9tj+FUz5Pb+/wn6OW3FuVQsVfUeoQNxawcqV/EEk+7xd+gNMVARax\n3bv9bSGOHvULW3t7oZU90pVBMUIYFNDhrO1gz/3Nm+1zYe6lTAZYsMDOOHBJJLhds7RpdhE30NGj\nw3+OOAZ9VfwVpc5xffJhELGbRsS3r88+53mcy+4KW1gbaGD4K4NiCArocNa2a8kHP0tUptIbb9hG\nbO4GsGgRsGaNLfaS15FxmMaw//+00+zVUZA4Bn1V/BWlzslkeGLXNddwZWoY4qs+ftwvjIkEXxXI\n8PjmZrsuajOQSWJRuFk3vb3+26CAhlnb2Sy/p9QxSCFbUxOPXJT3l4A1wLdy9RNseXHCCXaN+/7y\nWQXXNRYkjkFfFX9FiQGZDPDAAyxMMopQRH7CBCuSEgwWZG0whuCKPVC8yyOsJYU7fL6zc/i5t+7V\niZx/sKFa0H109dV+t1fQ7XPXXdyeISjgBw6wC0wQ11gUcZuCpuKvKDFBUja7uzmPfWCALfulS63g\nzZ/vn04FWIvXFbbgZrB+vc2Bb22NzrxxUyaBwuHzvb3Dj0GUYLTbYjnYUC3oPpIsJ0Es/2CH02BR\nl9xu2sTCH2X1xxUVf0WJEbIBSMtnALjzThZCCbI2NVmLHwi3eF2RzGatoBKxxRw2FyCb5U6i0nba\nTbUsppNoNgv80z/5jyPyF1wBNh4gm9HkyYWtrV2Syehq5nSan5s+Pfq84oqKv6LEjKieQGJ5//CH\nwC23cIO3T31qZItXNhOZI7BpU2FMQObhDg6yEF96KXDGGexyGRzk8wiKuEs2y/2J3E0J4Pd0C64E\nEftcjn36iYS/VbPLggXh1cyuaymV4rhJnNw6I6HirygxI9gTaOlSO+Xr0CG23J96ioV79WrOchnO\nDx8MlM6YAfzgB7ab6IYNhYHk73+fb2XTiBJxobu7UPiB8KuFnh67NpfjDeamm/hWNjr3+GCAOuha\nAuywexV/RVFqlmJaIojbRlw2YrVL8zJJh+zv5z5BuRwL+b33Fl4JZDK8gXznO8CHP8wbhrRLOOcc\n4Ec/KnS15HLA1q328TDXy0iceSYHsV33k2xowdm6kybxFY08v28fPxeWlppOR89BaCRU/BWljii1\n0tS1koHwlgeS/ZPL8UYQ7PP/l3/JefAA8OKL9nFjgH/+Z/6eyA6PN8bvhiHiQDPgT890N7D2dmDd\nOnuuTU2Fwi9ZRDKQ5oUX7GfavZtfS4LJslGE/fyWLAmPC4yUwho3VPwVpY4otdL06NFwwQfsLIBn\nn7WP5XLs/li1CnjtNRbUO+6Ifv1gcPaee9i9c/So3TCMYb+8266BiDeHZJI3hpYW7j4qmTuTJxd+\nbndgvQi/sHkz996XgrThOpQG3UunnAL8wz80lssHUPFXlLqilErTbJYzfQQi7m2zfz8LbyIBXHIJ\n8NOf+nP/3RYJbh78SAwO2lTOlSuta4aIXUYi3m4Fbi5n308KuSSQu3GjFW63k2gUbkvrYO8it0Np\nsHL4qqsaT/gBHeCuKHWFBHO/8IXiXD6uZZ5McsbN6tU2C+hrXwsf9RiGuHa8CNXwPA4oZ7O286jn\n8eu99JK/6Cvs/USs3e6cCxcCf/In3JNnJD+9MbxZuVc7+bx/48hkuN2De86TJg3/unFFxV9R6oxM\nhq3rkaxVV4CTSeDuu/mYYIfOKLeQCxG3ht65k/3tUdx/P7tcAN6c5syxG4DncWaRWzMw3GYiU8o2\nby7Mzgkjn+e1rpvK8wqzjNrbgYkT+b1TqXj06RkNKv6KElPkKuG227iNcUeHLcRKJPiruZk3hjAS\nCb4lYneMZM5cfnn4WnHXHDvG/v5MhitzUykrtJdf7i/GuvJK3iCKsb6DPXuikM1MWkqE9eYv9uop\nzpTl8yeiPwWwAsAHAMw2xuyJWPdxAF8DkADwgDHmS+W8r6IoxRGs1JVAqOcBM2eyW2XfvvBWyOIX\nlzTRAwf4tcKE+owzOHYgbN7MaaUdHf5WET09ftfPN7/Jt0Hr303lBHjzmDkT2LOnMI//4ouB733P\nX+RFBMyaBZx1Fp+3DGmXDSxufXpGhTFm1F9g0X8fgB4AsyLWJAC8BOA9AJoBPAPgzJFee+bMmUZR\nlPLZtcuY22835rrrjEkkJBnTGCJjJk40Zu1aviWyz4V9eR6v3bXLmFTK/1zYsSecwOuFtWuNmTw5\n+rXPO8+Y0083ZvlyY+bN8z8/ezYfn0rxezU18efZtct+xnnz+PN5njHNzbzW8/yvk0rZY+IKgD2m\nCP0uy/I3xjwLADT89dhsAC8aY14eWvstAJcBiGg+qyhKpQhOxEombbaNMf6++itWADt2RMcA8nng\n+uu5N/4TT7A1/fTTXC0c5o9/+22OEzz5JPBf/+UfyBLGOefwVUU6zdW6Lnv2AM88468Ydgu4Mhng\nu9+1+f2HDnH8IfhZ4jKIpRKMR6rnKQBece4fBvDhsIVE1AGgAwCmTp069memKDHHrQsAbKbL+vV2\nJKIUWq1YYfv6J5PA3LnAz37Goutm5CxezIK+Zg2L7XnnRffVAYAHHxz5PPN5jhN4HrtsgkNcJBNI\nGBy07RiCFc/y2MaN/toAID6DWCrBiOJPRDsATA556q+NMQ9X8mSMMV0AugBg1qxZWoCtKGUSrAsQ\na7m9vbBFRNhsYMncccnn/S2Sb7oJ+MpX+LlkEpg2rbAIazjcGICkg460ToiqeA72Nxqu3UOjMqL4\nG2PmlPkerwI41bk/ZegxRVHGmKgJVK6FLC0X3EBoNsttm48d4/VEVpTdDJpslmsHXPE+ejT6fCZN\nYqv+l7+0j8lr5/O2WZy4d1zc+5J9NFzFswZ1h2c83D5PAXgvEb0bLPpXAvjUOLyvoiiIFsEoqzms\nvXJzM3DjjVwd3NYW3S4hlwOOHIk+ly9/mXv4uJXDJ54I/MEf2PuPPMK3RMDUqcArrxS2kVi40J5D\n3Gbrjhflpnr+CYDVAE4C8H0i2m+MuYiITgandF5sjBkkoiUAtoMzf9YbY35S9pkrilIWUVbzqlWF\n/W/mzuXK4P5+bucMcBpna2u4OyaK3l4Wblf833yTg8Fh/v7Dh9mVJMNpJHdfmrDFcbbueFFuts93\nAXw35PHXAFzs3N8GYFs576UoSmUJTsSS8Yxbt/rXybQstzfPDTewH729nQe3jJTJA7CrxhXor3yF\n2z64LqMgxvAwlqlT7SD4oMire2d0aGM3RWlgxI+fy3ExlLR+cLnySv9aWb92LWfUdHayq8bNxgm+\nvjH+4zs6uHV02LB391ix8lXcK4+Kv6I0KOKvl7YMixdzS+ZUyp8i+eCDNhALhNcJPPGEP7PmjTds\nW+auLlslHAzIBjNyJAU1kWCLPyj8xQyyUYpDxV9RGhTX7QPwbVTBl1jmw9UJhIlxV1d0h02X6dP5\naiAsBdWd4BU1OF43hNJR8VeUBiWT4U6fixezMEsKp1vwJVcAnjdynUCQbJaHvYs7J9hh053O5Xl8\n1dHRET5sPWwYvfTuD3sNZWRU/BWlgRGh3LTJn8IZdMkEA61RdQKCK+wi/MEOm+50LgkiB0dIuhlJ\n8jpE9ooj+BpLlhS+hhKOir+iNDBSzHX8OFv6rnC6ufxhFv5wdQIrVvivGubM4cfc15A0UUFGSAbX\nuHn8nZ2FG1FwmLv27ikOFX9FaWCGq5AdaVh8dzdP25LAb9AN4+blB4Uf4PuXXDJ8muhIefyZDLt6\nlizhz9DIw1lKRcVfURqY4WYCj7QxbNhgUzOlTkCOCbP4wwKzy5cD27Zx1pG0bAgyUh6/pI1q0Lc0\nVPwVpYEZzrIeaWOQTp5EnJYZ1m6hrY3XHjgQnqmTyfDz5Qq3FnqVjoq/ojQ4UcJZysYQ1m7BTc0M\ny9TRBmzVRcVfUZRIRrMxyDErVw6fqaNUFxV/RVFGRXA+cHAjKCZTR6keKv6KopRFMQNVVPBrDxV/\nRVHKQgeq1CdetU9AUZT6Rtw7iYT68+sJtfwVRSkLde/UJyr+iqKUjbp36g91+yiKojQgKv6KoigN\niIq/oihKA6LiryiK0oCo+CuKojQgKv6KoigNCBlpyF1jENERAD8f5eEnAvhFBU+nGtT7Z6j38wfq\n/zPU+/kD9f8ZqnH+v2eMOWmkRTUr/uVARHuMMbOqfR7lUO+fod7PH6j/z1Dv5w/U/2eo5fNXt4+i\nKEoDouKvKIrSgMRV/LuqfQIVoN4/Q72fP1D/n6Hezx+o/89Qs+cfS5+/oiiKMjxxtfwVRVGUYYid\n+BPRx4noeSJ6kYhuqfb5lAoRrSeiN4no36p9LqOBiE4loieI6CAR/YSIbqz2OZUKEU0got1E9MzQ\nZ/g/1T6n0UBECSLaR0Tfq/a5jAYi+hkRHSCi/US0p9rnUypENImI/pGIniOiZ4mopvqexsrtQ0QJ\nAD8F8DEAhwE8BeCTxpiDVT2xEiCi8wD8CkC3MeaD1T6fUiGidwF4lzHmaSL6TQB7Acyrs98BAXiH\nMeZXRNQE4J8B3GiM+Zcqn1pJENFNAGYBOMEY84lqn0+pENHPAMwyxtRlnj8RbQSw0xjzABE1A/gf\nxpij1T4vIW6W/2wALxpjXjbGHAfwLQCXVfmcSsIY8ySAt6p9HqPFGPO6Mebpoe9/CeBZAKdU96xK\nwzC/GrrbNPRVV1YSEU0B8McAHqj2uTQiRPRbAM4DsA4AjDHHa0n4gfiJ/ykAXnHuH0adCU+cIKJp\nAFoA/Li6Z1I6Qy6T/QDeBPCYMabePkMngOUA8tU+kTIwAB4lor1E1FHtkymRdwM4AmDDkOvtASJ6\nR7VPyiVu4q/UCET0GwA2AVhmjHm72udTKsaYnDFmBoApAGYTUd244IjoEwDeNMbsrfa5lMkfGmPO\nAjAXwOIhl2i9kARwFoA1xpgWAP8FoKZikHET/1cBnOrcnzL0mDKODPnJNwF40BjznWqfTzkMXao/\nAeDj1T6XEjgXwKVDPvNvAfgjIvpGdU+pdIwxrw7dvgngu2C3br1wGMBh54rxH8GbQc0QN/F/CsB7\niejdQwGWKwFsqfI5NRRDwdJ1AJ41xtxZ7fMZDUR0EhFNGvp+IjiB4LnqnlXxGGNuNcZMMcZMA/8P\n/MAY87+qfFolQUTvGEoYwJC75EIAdZMBZ4x5A8ArRPS+oYcuAFBTSQ+xGuBujBkkoiUAtgNIAFhv\njPlJlU+rJIjomwBaAZxIRIcB/I0xZl11z6okzgXwvwEcGPKZA8BfGWO2VfGcSuVdADYOZY95AL5t\njKnLdMk65ncBfJdtCSQBPGSM+afqnlLJLAXw4JAh+jKA+VU+Hx+xSvVUFEVRiiNubh9FURSlCFT8\nFUVRGhAVf0VRlAZExV9RFKUBUfFXFEVpQFT8FUVRGhAVf0VRlAZExV9RFKUB+f8FvkT+M2urzAAA\nAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Up8Xk_pMH4Rt", + "colab_type": "text" + }, + "source": [ + "## Split our data\n", + "We now have a noisy dataset that approximates real world data. We'll be using this to train our model.\n", + "\n", + "To evaluate the accuracy of the model we train, we'll need to compare its predictions to real data and check how well they match up. This evaluation happens during training (where it is referred to as validation) and after training (referred to as testing) It's important in both cases that we use fresh data that was not already used to train the model.\n", + "\n", + "To ensure we have data to use for evaluation, we'll set some aside before we begin training. We'll reserve 20% of our data for validation, and another 20% for testing. The remaining 60% will be used to train the model. This is a typical split used when training models.\n", + "\n", + "The following code will split our data and then plot each set as a different color:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nNYko5L1keqZ", + "colab_type": "code", + "outputId": "b9f9c57b-b6aa-4817-8ab4-4a2201732b9a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 269 + } + }, + "source": [ + "# We'll use 60% of our data for training and 20% for testing. The remaining 20%\n", + "# will be used for validation. Calculate the indices of each section.\n", + "TRAIN_SPLIT = int(0.6 * SAMPLES)\n", + "TEST_SPLIT = int(0.2 * SAMPLES + TRAIN_SPLIT)\n", + "\n", + "# Use np.split to chop our data into three parts.\n", + "# The second argument to np.split is an array of indices where the data will be\n", + "# split. We provide two indices, so the data will be divided into three chunks.\n", + "x_train, x_test, x_validate = np.split(x_values, [TRAIN_SPLIT, TEST_SPLIT])\n", + "y_train, y_test, y_validate = np.split(y_values, [TRAIN_SPLIT, TEST_SPLIT])\n", + "\n", + "# Double check that our splits add up correctly\n", + "assert (x_train.size + x_validate.size + x_test.size) == SAMPLES\n", + "\n", + "# Plot the data in each partition in different colors:\n", + "plt.plot(x_train, y_train, 'b.', label=\"Train\")\n", + "plt.plot(x_test, y_test, 'r.', label=\"Test\")\n", + "plt.plot(x_validate, y_validate, 'y.', label=\"Validate\")\n", + "plt.legend()\n", + "plt.show()\n" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsvXt8FNX9//+cmd1JEDUpUctHEbR4\ngWBCEvAyRXQwCl6r/eEV26WgpFoQsaiVfj62fIoV64VGBRWoIPl+VD7thxatN5CVEcShKBJuiwiI\nUFRaTU2ol+zszpzfH2c3uwlBbgmX5Dwfjzxwd2d2zq6zr/M+7/O+aEIIFAqFQtG+0A/2ABQKhUJx\n4FHir1AoFO0QJf4KhULRDlHir1AoFO0QJf4KhULRDlHir1AoFO0QJf4KhULRDlHir1AoFO0QJf4K\nhULRDgkd7AHsimOOOUacdNJJB3sYCoVCcVixfPnyz4UQx+7uuENW/E866STefffdgz0MhUKhOKzQ\nNG3Lnhyn3D4KhULRDlHir1AoFO0QJf4KhULRDjlkff4KhaJ9kUgk2LZtG/X19Qd7KIcFubm5dOnS\nhXA4vE/nK/FXKBSHBNu2beOoo47ipJNOQtO0gz2cQxohBDU1NWzbto2TTz55n95DuX0UCsUhQX19\nPQUFBUr49wBN0ygoKNivVZIS/3ZIXZ3Lli0TqatzD/ZQFIpGKOHfc/b3u1Jun3bGkiUu9fXlGIaH\nrpv07h0lL8/ao3Pr6lxqax3y8+09PkehUByaKMu/HeG6MH26A3iATxB41NY6e3RuXZ3LypXlbN58\nLytXlqtVg6LNUVNTQ0lJCSUlJXTu3JkTTjih4bHneXv0HsOGDWP9+vWtPNKWQVn+7QjHgeXLbW64\nwQACwCA/397pONeVx9o2WCkDv7bWIQgaTxrK+le0JQoKCqiurgZg/PjxHHnkkdx5552NjhFCIIRA\n15u3m2fOnNnq42wplOXfVnFdmDiR1dNcJk6UD20bwmHpK5TuQo01axqfNm0anH8+/Nd/QXm5PA8g\nP99G103AQNfNZicNheJAk7rNG+7T1mDjxo0UFhZy44030qtXLz799FMqKiro27cvvXr14je/+U3D\nseeeey7V1dUkk0ny8/O555576N27N5Zl8c9//rP1BrkPKMu/LeK6UF6OiHt0D0xe1qNMyLGIRuHR\nRx0SiSS6LvB9j7lzqwDo0sVh2zabkSMtkkn5NvG4XAFYFuTlWfTuHVU+f8UhQ+o2x/PANCEazaxU\nW5r333+fqqoq+vbtC8ADDzxAp06dSCaTDBgwgKuvvprCwsJG59TV1XH++efzwAMP8POf/5wZM2Zw\nzz33tM4A9wFl+R9m7JGl4zjgeWiBTxiP/oGD58GGKpfi6q0Q6AgBui4YNGg68fgANm++l/r6cnr0\nyLyxYcjVQpq8PItu3cYp4VccEqRuc3xf/us4rXet7t27Nwg/wPPPP09ZWRllZWWsW7eOWCy20zkd\nOnTgkksuAaBPnz589NFHrTfAfUBZ/ocRe2LpuC5s2GpzY8hEFx6JwGSxbnOu4XLjzHKMpMcx/y2o\n+T5oGhiGj4YPgGF49OnjEItZ6DpMntx6lpRCsb/YtvwdpH8P2YZKS9OxY8eG/96wYQOPPvooy5Yt\nIz8/nx/96EfNxtubptnw34ZhkEwvqQ8RlPgfRjRn6WSLc2ZysJhhRJlV4bCj1OayGoshWydiTJcn\n5/wL0hHCGqT2fnV03WTECJvTT2+82atQHIpYljSAmgYntDY7duzgqKOO4uijj+bTTz9l3rx5XHzx\nxQfm4i2IEv9DiN3F0e/K0klH52zdmpkc3sLiua4WdhHgwI7SzMmdFxpsvySJ0AK0JJw6WSNxUV/q\n/1nG5pq5DOwzhiMTx/PJJ5eQSNQoH7/ikMWyDryRUlZWRmFhIT169KBbt27069fvwA6ghdCEEAd7\nDM3St29f0Z6auaTj6IPg25Ov0kJ/eYFLUY3D6gKbs8dYeB6EQiCEFH/ThMpKGDNGTgjnGi7PX1rF\nf7AdOnemzjqa2jcmkb88IG9DGD8Q1J2aYPUkgcisVhHoaFoOpaV7ngymUOwL69ato2fPngd7GIcV\nzX1nmqYtF0L03cUpDSjL/xBhT+PoLQssMs7/HrpJmR9lSSCPHTECunaFggKYMwfq6+Fs4fKKX07u\n3DgQIDSdI57O4fM7ppA3pAaWLUOb+wL/LhGIMA0+ISFA0wKSSY/58x02brQaltfN5QIoFIrDByX+\nhwjpOPq05f9tyVdDtjp0S/l3QsLjAt1hqWZhmhCJyGPLy6XwCwE2DiYeOgEC0EQACY9Zk2q4ZopN\nr5f/Gw1BXjVoCRpZ/smkTjJpMmGCTSwmVxTP3eby3iSHNwK7IYRUTQgKxeGFEv9DhObi6LPFFDKR\nPvMMm2jIxMBDM02uqbTpUJMR3YkT5XFpj56DTRIDIxXVIwCfEG8ENhcsrWLrNR5mHXh58OXjJdSc\navIJx/PWhkvIz69h5UqbNWukmpfFXS5+uJzLA497MBkYj+I4VqPxfVvMtZogFIpDgxYRf03TZgCX\nA/8UQpzRzOsa8ChwKfA18BMhxHstce22RF6e1eDqaRrWOXRo483cZ0dEiXSVPv+XajLumLo6l3PP\ndSgutlm50qJHD5fupQ5/XHkpP177ApoQ+Gg8wzC+7g3cOJ2PNCFdPQHkJNby4J1vsnatDPfs2xey\nS5Wcj4MpPHR8BB625rB1q0VVVeNIpA1VLlYTlT+QSTkKheLbaSnL/xlgMlC1i9cvAU5N/Z0NPJn6\nV9EMrgvjx8sM2yCQYgmNI31OjVi4WI3E9PXXXXxfbhpPmmSyaVMlJ588Bk3z0ESIHXeGyVvlQ8jk\niGERKm+oIvB9KfwCMMAgwe1Dx/BYVSWbNlmUlUH2vvvCwCYIm2i+h9BkDsGS6aDrcHbgch4OdRRw\n48wxkGys8rsKVVWrAYXiwNMi4i+EWKRp2knfcsiVQJWQoUVLNU3L1zTtP4QQn7bE9dsSaes4Lfy6\nToMvPxJpLJITJ0L37i7FxQ6rVtls3OjQrZvcNAaPvn3n8MUXqcca1I67grw/fI0xeDCRCov166v4\nNPv/QACaDqeWLePR0gFs+2QhrmuRXTbcxWJAEOV/KhzexGbJdAvfhzN9l9cpx8Qj8HX0wAcRUNc9\nTu0H48kvHI9tWxQXu/Tq5bB2rY1tW2o1oFAcJA6Uz/8E4O9Zj7elnmsk/pqmVQAVAF27dj1AQzu0\nSFvHaeG/8EIYPDgj+uPGZY4999xpnHnmKDTNJ5HIoWPHSoTIbBofe+xg6uoWy8eEyJ/4CqzyYfFi\nKCqic2GE7dtnIgIPkYSjNgi+PB0wQA883njD4X/+R7p/giCzh/BFD3ihJ/TpA6GZ0pJPbyqH8PEJ\nCNDZfrnGxtsDAuN1jJWLOeWUSiZNGoMI4uiBQWEwmScWVXxr4ppCcaCoqamhvLwcgO3bt2MYBsce\neywAy5Yta5Sx+23MmDGDSy+9lM6dO7faWFuCQ2rDVwgxDZgGMs7/IA/noNA0kWvw4EysfigEw4bJ\nFUC3btPw/VsJhQIADCNO16415OdHWbXKobraJhy26N27SG4iv7iVvFXTwffx6z2WP+gQPWscJ5yw\nkNpah6//VsCIVbex9iGPQECAybvv2vi+FP0rr4S//hVOP93l4YfLCYc9EgmTO++M8tvfWo02lXUE\nX/SC9aNBM0DTBL4f57PP5gBxND1ABAG100dy+feLmGBajRLXlBtIcTDYk5LOe8KMGTMoKytT4p/i\nY+DErMddUs8pmtA0Zd1xMq6d6mqbqVMt/vY3l0mTRiHrMkg0Tdbmj8UsBg2ystwoFpZlQV8XPzSL\nwPdICJPb59osnQtgpf7glVAR09+qIncQPPdWhLVr5fNBAKedJnMIcnOrMM16dF0ghMegQQ6TJlks\ni1s8I4ZTIaaiI6grDkAXaJqcPILA4NgdJdQlFhBooCchf3lAt9MdolGr2agm5QZS7JYDZCnMmjWL\nKVOm4Hke3//+95k8eTJBEDBs2DCqq6sRQlBRUcF3v/tdqqurue666+jQocNerRgONAdK/F8ERmma\nNhu50Vun/P27JjtlPQhcSkszlvbYsVF69XIQItnIF69pd/DEE1ajEg9lcZf4eAfG22BZPDssyvqp\nDm8Im6Xs/EN5y7f4c0eLcf3BuQ8KC11KSuSk89ln8JOfVBGPP42miQZBr662qayEmhroVxBBHzML\nPI+8NQbJhIYuEgihs/6VO7CffJyOpwTU9ob8lZD3YQ7Ydubzui7OeIeyuM2SwFJuIMW3c4A2jNas\nWcNf/vIX3n77bUKhEBUVFcyePZvu3bvz+eefs3r1agBqa2vJz8/n8ccfZ/LkyZSUlLT4WFqSlgr1\nfB6wgWM0TdsG/BoIAwghngJeQYZ5bkSGeg5rieu2B7p0cdj8YRy0AEScsjK5WappISCROkpjxox8\n/ud/ZBnmUAjOES7zg3I6LPDw3zR5dliU+lKL3+dafPNN89cSQmYGA9x4o8txx8lJx/dDhEKCIEgQ\nCklrPgg0XnppOI8+apGbC7fdBr/fANf8fijf96BT3wjbN8DatQ6vvWZzo1/F1sH1dKqGbs8hNzSe\nrATLkjWNVlWRf/sMzl/pMz8wGahHec+0WrVSo+IwZ3eVDluIBQsW8M477zSUdP7mm2848cQTGTRo\nEOvXr2f06NFcdtllDBw4sMWv3Zq0VLTPDbt5XQAjW+Ja7YbUcjb/hFr04wKCEIREwFXFtdxyC+Tl\nXcbnn/8VEPh+DsuXS/98z54uI0Y4FFdvpcMsWdM/iHusn+rw+1yLykpYsQKefhqSSVJCLi+p69KC\nBzj5ZIdEwkPXfTQt7V6S2zBBoOF5ucyfH0EImUn80ksujzwiJ4t3MelbGCFiWUycaMnVy6Sn2RIS\nbE1CyR2Q9z5QU5OpaeTXo98v6D0Wjn7f474LHXLGWw0rArUJoNiJA1TTWQjB8OHDmTBhwk6vrVq1\nildffZUpU6YwZ84cpk2b1ipjaA0OqQ1fBVLoqqpg5kxIJsnTdU4ZBBtuB6HDMX0eIZGo5LPPfDQt\nxH/8xzA+/zzCpk0WZ5zh8tBD5eTmeujFIXZUGxxZDQlh8oawqa+Xwv/AAy5Dhkh3Tk6OxZgxMrRU\n16Xl77pw++02999vEgp5BEEI8GXtf83g5Zdv5rXXIsRiGSEuKXEIhz0MwyeZ9Fi1yqF/f4vLC1xO\nHDiGUDgBGogwbB8ER2zI4f0Cm9pVDr7voWmCIAS1ZRp5m03s8bbcilCxoIpdcYBqOl944YVcffXV\n3H777RxzzDHU1NTw1Vdf0aFDB3Jzc7nmmms49dRTufnmmwE46qij+Pe//90qY2lJlPgfSqSFLl2U\nB0AIEvk6QgvAgEDISBldFySTgu3bP6SkRP4GPvjAITc3VRwOqH10BLXPdeV30wuwfQeEvMSKFeWA\nR+/esnooWIwaJVfPt90GJSWwcqXF2LFRSkoc6uoKGD16NJrmAwa9e0d47DEr1QwGrrsOVqywSSRM\nhPBIJk3WrbPpH3IpGlOOecs3jWJ639HO4ia/EmMUXH76Vno/FEIPQUgPkX/WMPhZJPNDPkBLe8Vh\nygGo6VxUVMSvf/1rLrzwQoIgIBwO89RTT2EYBjfddBNCCDRN43e/+x0Aw4YN4+abb1Ybvoq9IC10\nKeEXmkbSyGHzsbfhJ34PwscPwgghMIwkhhHg+wtYsWIxq1dHKSuz8f2s4nDFEbaug0minBAeHib3\nnTE09bpPENSzfXsVNTUWQZDJJl62TA5n3TqLWMxiyJCJGIbs+wtJzj/fYdEiq5HB9YtfWEyZUkn/\n/nNw3cHcfbcFjiwy1HmetPaFCUnf5IF5lRwVwPygHHONR81Yg8qSEfwjJ0LOCKvxb/lAtmtSKFKM\nHz++0eMhQ4YwZMiQnY5bsWLFTs9de+21XHvtta01tBZDif+hRJbQ+brB08FwZiUjLH3Uose8qxoi\nbwCGDh1Pnz4LMAxZcnnZModf/GIcr78epVs3pyHs89WRE/lVIJOvBB7fWQmJhIFp+oDg009nct55\nEUzTarTgyE4w27rVRggTkJPKtm02ixY1Xmn/8pcuK1aMQQiPs85aTGFhESA/T956j5JxBuvHDWfE\nfTKE9B4mNiSFdYoBsa7M0i1mz27i2TlY7ZoUijaOEv+DTWozc3WBjROHs2cN5fTt8EIswq3TLAIB\nCIjFrEY+9lmzxlNcvLjBzfLeezaeB4sWWdi2xZsPuBz93kT+4RfgYSLw8HUTcUqEefPg8sunpmL1\nk3TpImPts7YaME1ZX0hqrUVdnaw4um2bzUUXWTu54GtrHcBLuYZS/QiscQ3CnWfbRB2LVFQcDjYe\nJprm4Wsmi4TdsPLYybNzMNo1KRRtHCX+rcy3BqqkfPwi7nFcD4PTH9H4dzjJ8gKT7mURjKczkTjZ\nnIPLgHUOs+6qxCyTJZfXr5f1/AsKYJzt8oon6+wMxOQOrZLj9BqOusIm/xKL2U/BwIGzCIU8QiHZ\nO6BbNzm+pvWD0qQrjj73XGMX/Msvu3zwgcP3vlfQfD+CLOG2kRNGadzFxmGsUck9I2rYUWqzYoyF\noTw7CsUBQ4l/K9I0UOX11126dMnq0Zvy8WuBz9clAaEw6IYgmfQIAofJky1GjpQTgGFAIiGFP0o5\nOXgE603evy3Kl7dk/O+OA99PZOrsaJrHJWfWcF31OPy/gjkPKistNmyQm7nFxY378+7OyM52wRcV\nuZx7rgzv/Oork44dKzn55G/v+fubS1xuf7GckPAgZPLljyqhi8Prr6dXLcrIVygOBEr8W5HsQJXu\n3V3q6202b06gaWFKSqQrBNMkiHscUZ3OiE02RMtEIi49e2ZCMkePhgFxKeyG8CHpcfQKh6KKzCbp\n6tXworAbXD1ayGRHmY2/PGOt19TAuHGZsg57Q7YLPggy4Z2IOF++NIduV43H/cRq1GcY224oP31H\nvYMhPAx86k6Ns7J+FMFmH03TGTp0CscfX7HTNVWYv0LR8ijxb0WyreSLL67CMGRh/iDweOKJKpYt\ne5Kht0X55yNVHBPbzo6xsL6kM6/FIpxzDrzzTjmhkAzJNIwoQlgNvnKBrNEzdIbNxKzIyJoaWKZb\nlAdRBmgOPW6yOTViYc5quYCZ9LUmTbLp08cEESecDCh7eQHJ3y/m7iBKEMDtohyhe2g5JhuGRvE8\nizeEzX+mfP21fTQCQ2YpCxHw/vuj2Ly5iH79Mgo/bRoNq5+cHBXmr1C0FPrBHkBbJm0lT5gAV1zR\n+LV//hPmzoXZL6+m/LrpXFA4l6Gxufz6uRl0XAU7djhoWqah+8aNDr4PdYXwmyFDeaRwBOVEecuX\nVnYa25Yi+Y5hUZk7jtJSsJyJ/K3SZcQI2RGsOVxX9gdw3d1/riVLXKZOncj778PYsVE2zbyQM8bq\nfGdtgEh4nOs7nCfkCkUL5HLjfBxMU05MA/Uoz/WcwCdXXNfofYXwmTrVIZ0k6bowapTcgO7Rw+WH\nP5zI9OnuHo1RodhbBgwYwLx58xo9V1lZya233rrLc4488kgAPvnkE66++upmj7Ftm3ezOyI1Q2Vl\nJV9//fVejnj/UJZ/K5P2odfVRaiunonvy+ic+fMjFBa6jHh4FH8P+3ycgN5j4chYgv6Bw4vVmaQp\nTTM55RSbM85weeCBTJG3Z++KYG5qbMlnu2UuL5BJVngehSGTdUJOFrNmNbag9yaJdskSl6+/LufH\nP/a4/nqTu+6KEts4nrxNi/E1uRpxkAPyMDF02We4W8SmshR+9jNY4lt8ATyWdy+GQGb+ChDCYPly\nm83PuQxa4bABG9+3KCzMlI5IJExGjowyZYqlVgCKFuWGG25g9uzZDBo0qOG52bNn8+CDD+723OOP\nP57/+7//2+drV1ZW8qMf/Ygjjjhin99jb1GW/wFE04axZs1PueOOhcRiFiUlDlo4KTN3Q1BbAgnC\nOMgY/bFjo1RVTSA3N0q/fhbDh2d87KGQx+DBTrNCbVmy6UtRjdMoNKdfwmmUKJumuSTa5nBdmD7d\nQdczY+jTx+G6SgtjYZRtP53ApWYUXYMLdId3bqxEu29Cw2xSU5OJXrqipApDy7SQDAKNysrJHB2D\neX45J069lxtnltM/5FJa6hAOxzEMn3A4Tq9ezi7HqGhf1NW5bNkykbq6/V8OXn311bz88st4qb6p\nH330EZ988gmlpaWUl5dTVlZGUVERL7zwwk7nfvTRR5xxhmxf/s0333D99dfTs2dPfvjDH/JNViXF\nW2+9lb59+9KrVy9+/etfA/DYY4/xySefMGDAAAYMGADA/PnzsSyLsrIyrrnmGr788sv9/nxNUZZ/\na5G1S1lXKEsq+L7HaaeZ6HqEwkIoooBQQhAIWd/+n9vP43+veoBlL1oQyNj+006TNXu2bJlIaWkB\nX32VKaFw0UV2I+HfaWM0e9MhZLJE2Bj+zn7/3SXRpt9361ZYvtzm+uvlGIQwGTHCbsgF6GZZPF7q\n0mNUOSHfQ/uzCSMzs5Nty+Qx34ce1dvRE7IjgSbgn5X9eeWViobkLz21oT1rhMOCvgXoeoAQoOsB\nX35ZoMJBFZmigKnw4t69o7uMMtsTOnXqxFlnncWrr77KlVdeyezZs7n22mvp0KEDf/nLXzj66KP5\n/PPPOeecc/jBD36All1TPYsnn3ySI444gnXr1rFq1SrKysoaXvvtb39Lp06d8H2f8vJyVq1axejR\no5k0aRILFy7kmGOO4fPPP+e+++5jwYIFdOzYkd/97ndMmjSJX/3qV/v82ZpDif9eskeRJ038KNtf\nG0QQ1GMYsgHKZcVV9Kp2+PHXy/h4CnzeH76zSOMPiy6myxSL0CuZpu0bN7osX16OrssbvGPHSj78\nsIZTTrHp189qGE9BQabjV8Z1k/EBGbbNRKxmx/5tSbTZHyUUAiEs7rorSp8+DiNG2I02ZyG12gg8\nCHauxWNZ8MQT0vXzTawzxWOhrgTyqiEWK0QAiw0bdFO+R8pddOHxDps360CAEDo//3mNcvkoqK11\nCILMvlhtrbNf4g8Z109a/J9++mmEEPzyl79k0aJF6LrOxx9/zD/+8Y9ddupatGgRo0ePBqC4uJji\n4uKG1/74xz8ybdo0kskkn376KbFYrNHrAEuXLiUWi9GvXz8APM+Tv+UWRon/XtCcb7yw0JVtErNj\n27P8KHXd42xP/rWhAQo+3FU9nbxYQC2CTY9AEIa6YsGGLQV0qIHhw2HqVOkHLy52ECJzg598cg39\n+4/baTxpi3qnLNmswP30w+bYVXx/tksIZDevrl0tCgosFi2S192bWjwVFVBUBH+4KcKw2EyOinkk\nMKkiAsDbwuK5m6JEujoN5+a/uBW9KExAEsMwKS5u/J6K9kl+vt18YuF+cOWVV3LHHXfw3nvv8fXX\nX9OnTx+eeeYZPvvsM5YvX044HOakk06ivr5+r9978+bNPPzww7zzzjt85zvf4Sc/+Umz7yOE4KKL\nLuL555/f78/zbSjx3wua+sbffdclkShvaJDee/Uw8vpGGglgbR8NoQdoSL/2Ca8FdIoJNKTVG4QB\nA3yhkdOnpkErp8t2u1SnNn6ln13e4NlumPR4pEtE1udvySzZploekRq96w3i3dTiSVes/n8bLWIs\nxMbBIdNZTNfh1EhqJkrNbnmeR1GRwZvDRnBsn8h+W3eKtkFenkXv3tGdja/94Mgjj2TAgAEMHz6c\nG26QbUrq6uo47rjjCIfDLFy4kC1btnzre5x33nk899xzXHDBBaxZs4ZVq1YBsGPHDjp27EheXh7/\n+Mc/ePXVV7FTP9R0GehjjjmGc845h5EjR7Jx40ZOOeUUvvrqKz7++GNOO+20/f582Sjx3wuaCmFJ\niaxFDz5B0qd22VTyfpEKpUkJYP4Jtej+7/E1EJ5B5/mJ9B4nedVIv7cAQcZ/7ro0tGiMxSzGjYvy\n2GMyGzcWsxqEN921C+R40u0UWzIZqjktnzhxN1WWd7GMaFqxeikWf9MsdB10IT/P5MlZp2bNtkdW\nwztjuvL7XEvF+isaSJcdaUluuOEGfvjDHzJ79mwAbrzxRq644gqKioro27cvPXr0+Nbzb731VoYN\nG0bPnj3p2bMnffr0AaB3796UlpbSo0cPTjzxxAa3DkBFRQUXX3wxxx9/PAsXLuSZZ57hhhtuIB6P\nA3Dfffe1uPgjhDgk//r06SMORd5+W4j775f/1ta+Ld58s4NY+IYm3nwVUVuIEIYhD0gf3KGDqD1D\nF5sjITHljBtFAA1/s7hRPFB4i/jLo7eI2tq3G9581i1vC8OQwY+aJsQtt2Suf//9ouE1w5Cvpcdz\nIL+DDh3k9Tt02LNr19a+LZ555n5xxhlvi3Rgp6bJ86dObf4zrJr6tvDCHURSM8RXdBDn8Hajr1fR\ntojFYgd7CIcdzX1nwLtiDzT2oIv8rv4OVfFvSm3t2+KjRbeI2lJzJzX86Jb7ha9JpfZ1Q7zGQJFA\nFwJEAl3cw/3CNFOHZylqMqeDON98u1lx3RfhbQ2yJ8Hd0TBJLjTEq692EGec8bYwTTlx7er89Ofs\np78t/tO4X5wXznwfd98txMCBctJIv/9HH90vJ1DFYYsS/71nf8RfuX32k7w8i1jI4s2zI5x/tkO3\niA2WjMIZN8PmNREiTEAiCPF/DKY/ixvKK3/nBzaPXyK9G8dvdegalxmxuvCYVeHwXNedC50dKuXt\n96bKcnZUhml6jBjhcOaZmSStuiXTqN04h/xTBpPXT9b2SXt8lgQWSw2LETfBxV2hthbSOTfz58OK\nFS7XXSc7k7VEuJ9C0V5Q4r+fZCJuLEzTIhqRUTWOI8sSpJueg2ANRdxOJVdrczjlzsH0vyrjv39V\ns3ktMAnjkQhMdpTajNu5xhlw+JW3l1EYJsmkzE945hmb3Fz5HQ06YRpfHvdTgi6g/3s+vZdAXr8K\nLi9w+UZzeEO3ec+0iKTqF2UlXwKyDIbvy6Szlgr3Uxw8RKolomL3iHTnpX1Eif++kBXsX1WV6YCV\nnR371VcuP7lhPF+vSNIpJgjwGUoVEWaRi4f++GKW7CjC8yx8H97SLC4kyvk4LNJsjpxjMb7o8BL5\nXZGXZ7F6dZRlyxzee8/m/fdTTRpzAAAgAElEQVRlqWoh4Lgb5tD9J6T6E8NHy+fQWy+iaEw5ZwQe\n9xom71dGKUp9EYMHS4s/TToaStNk0tm2bbI3geLwIzc3l5qaGgoKCtQEsBuEENTU1JCbm7vP76HE\nf2/JCq73QyaxQFbbBBmtUlAAI0e63H9/OeEBcdb8KKDXWJ0OMZMTToAOnzYtdmY1RO68p1n8LSH7\n6eoLYPHitlPFsm9fi1/8IvNZ0zkJC6oHc2pifkOW80MzBvPbdQ7dUn0OwponE8dSoaAVqdXQ00/D\nihUyNHTBAlmtbt68CB98YDF8OA0rBcXhQ5cuXdi2bRufffbZwR7KYUFubi5dunTZ5/OV+O8t2cH+\ngUc/4bAIC02TyVk1NdCrV7oGT0BC6Py55EKe3Tiex38F2phMbeVuEZtoVucskK0TFyxoJlnrMCd7\nryI7G/mFTRX0fwHCO+awoHowc9ZXMNByiewmUQxg1app/PCHI9G0AM/L4dVXI3ieTJBrWrxOcegT\nDoc5+eSTD/Yw2g1K/PeWVLC/iHvEA5OFqQqW4XAmAepPf7Lx/RCaFiAIs+Xk8Ux0LIosoKjxbq3l\nulg4gHw8fjzEHZd+CYclho1ttx31yt6rKCrKnvQqKC+vaND6UyMWRHa9qz1tGjz6qMujj45C15No\nGoRCcUpKHGIxq5ELTom/QtE8Svz3lpQJ+8IYh98ty2SmnnIKBIFs0/joowXE43IzxvcFr7wCl12W\ndf631FK2gKhWjoaH0EwMouxLx61Dnaab1tGozPxNI6dEi4LVkGoG1lBKY+lSm5ISB12Twi8E6JoG\n2BiGfKx6ASsU344S/33BsphXZrF0WfaTLl9+Kds0go5hCDRNYBh+qgRxM/Xnd1FL2Uh6kKpq2Z7M\n11kpj9jMmVLAk0np/tJ16N3bZdIkGdL5ox+ZzJ10m6yIiswOPqkSPnwZAk3uKVRWtpuvTaHYJ1Q9\n/30kEpHWZZqBA6swTQ8Z2umjaRrJpEEyabJmjc3Wrc10yUrXizCMjKna3HPtgKbzYCKRqf0fBFBY\n6BD4ccBH1+P8uMSheCycPANKbocTXw6wcVK5w3LvRaFQ7Bpl+e8jliUFq6oKZszI1OJJU1NzBUFw\nFitXyno8q1c3swm5q4ytQyGL6wDTqPVAaGfLP7GigNCPglRUUMB3N+eSF4P8mJxuk+g42Oh6u5oz\nFYp9Ron/fpD2W0ci8O67ETRtJuDheSbjx9/Npk0WQ4dKa3aviqAdbllcLUDTeRAykUFz5kD312s4\nY6zOv0sCjl6ps7ljIcexlDAeAQYjeYKjB1pciMwFaGdfn0Kx12j7myXWWvTt21fsrulxS1JX10xd\n/r08tq7OZe5ch4cftlmzRlar7NsXqqul+O+uP66ieVwXxtkur3jlhPHQwiaTLovy4ovQP9i5JHRO\nzs49itObySr+X9HW0TRtuRCi726PU+K/d+3glixxqa8vxzA8wGTBgig1NZnyA03LFqfr6w8bpoRn\nf3Bd2FDl0mO7w92v2Lzlyy8y3WQmG8OACRNkH2PXhQEDIFUZF9NsV3voinbInop/i2z4app2saZp\n6zVN26hp2j3NvP4TTdM+0zStOvV3c0tct6Vorh1cc6QbmIM81vfjHH30eBYtchkwQL5uWalIE1zu\nYSJnC5dEArp2VYKzP1gWRJ60iJ41jrd8q8GV1pS0z7+gQPYdqKrKtMQEuZGsmr8rFC3g89c0zQCm\nABcB24B3NE17UQgRa3Lo/wohRu3v9VqDb2sH57qyY1dJiUN1td3QwBzi6HpAnz4LKC5ezJ13RtlQ\nBZbj0GVZAa+LMZh4eJgM0qJtKlnrYFJQIAU+COTKCuTjCy+Uvv6aGnnM6NGZzeNQSIo+yGQ8tRms\nULTMhu9ZwEYhxIcAmqbNBq4Emor/Icuu2sG5bqZOj+d59OhhAlHuuivK6MgYupctwzAChPC4oncV\nQ56eBYHHxWgIAgwCBB6PXOFwljL79xvXhdtuk0KupeL5QVr648dnVla33ppx8yQScNVV8r8/+QRu\nukmtwBQKaBnxPwH4e9bjbcDZzRw3WNO084APgDuEEH9veoCmaRVABUDXrl1bYGh7TnPt4Bwnu06P\njxAexcUOW2bbDFq0nM0l0voMkiHyq4GEdAfpuk5gGPiBhm6anHW3fUA/S1sl24UjhLT+r7wS7rzT\n5fjjHZYssVm0yCLWxOz417/gnXegLO6y7T2HuZts1uXv3CtBoWhPHKhQz78Czwsh4pqm/RSYBVzQ\n9CAhxDRgGsgN3wM0tl1i27JOTyJhIoSsRV9dbXNXjwf5aKSP0EETUDP5bObGItzGLDTNw8gx0VMN\ndVcX2LzkWNgoodlbsipnN/vdCQEffuiSSJTz4Yce8bjJc89FWbeu8cH19VL45wflmIGH96DJw3qU\nCTmqH7Ci/dIS4v8xcGLW4y6p5xoQQmTnW/4BeLAFrtvqWBZMmWIxaVIU05Q+/1jM4oghnxCEAQNE\nEoy8epZiUU6UC3WHayttiiqs5kr3KKHZQ5r77iIRmD698UZvUZGT2qvxCYXkymztWgvDkCuDcFi6\nera952AGHiF8BB79A4elnqUifxTtlpYQ/3eAUzVNOxkp+tcDQ7IP0DTtP4QQn6Ye/gBY1wLXPSBY\nFvz85xYDBljE49LP/OV3b6JzYllDDfrPj74JTYOlwuIdLI6ogSKaL92jhGbPaO67GzcORoyAp57K\nHLd6tY3vy836IBmiqHorWzSXyBMWNTWZVcPcTTbJh0004ZEQJot1W2UCK9o1+y3+QoikpmmjgHmA\nAcwQQqzVNO03yEbCLwKjNU37AZAE/gX8ZH+v2+LsysfguliOwzuP2TyxwmL7dnjm8SLmzAvxVUmS\nvDUhvjOyiNzcncvPZ5csUEKzd+zqu4tEZJmMeFxu+n7vexZ//nOUvB1V3FE9gwti07n5jBl81XM4\n+cURYjGLW2+FGTMsziTKBYbDd6+z6fiZxVM3yr2Cujq5yb87N5NC0ZZQSV7QvI8hO2Mr1bWrXERZ\nlLD4hZjIBO4lhI+vGRi/nYBrj2skHGkhKSigkQWq2HO+ZT5uqKnk+zKUc2xiImN7/BefDQzYfgmI\nsAZaLj//eZTqaqtRWKhhwOmnuzz0UDm5uTK81zCiXHSRpVx0isOePU3yUrV9ABwHEZdtA5PfeDx/\ns8Mpf7CwmnbtwuFNYeFg42Ei8NBTZuluyvQrIdkHdlXiKF1ULzvR64SKAtZcGRCYgAZogiDwGNSz\nikErZAmIIlYzWMxhTmIwXxXXEA6nk/U8qqudhn7KykWnaA8o8QdWF9h0D0zCeCQweSJms/x8WD7Z\npqih1KTJEmFj+LAiZDH5kijXdXboFrF3Ugnl6299mrqFjr2ghmRYR9cDaeULjaQXYkz1DArw8dEw\nSYKAgcznP1fe3SiKa+ZMu1HegHLRKdo6SvyBl2osXiSKTVaRsIR8nsooNXMcCgbbTCyystwQFrvq\nsKV8/a1P0yqg775rc9RROYSEh+8brF07nOQMuCA2nRA+OnJBALIE9M3xama8FWXLFof33rNZv95i\nxAhZhkO56BTtASX+SL/83zQLBNg4ACwPWxQUwNljLOJxCy0KV1wBd9+9e2HYVZl+RcuS7RY6cjXM\nu3so/yqG12IRbrnF4rkNLqOYhcBD0zX0IAnISeB7dw3msiKL8vJMFFdpaaY5vELR1ml3G75NyzGv\nnubyx585bPcLeBRZjyepm2x4MspLNRYv/afLeSKzIsjJgYULlaAfUqQ2WUTcI2mYvD85SlGFxS9+\nAW895HK+cHjbtHl6zGq6V8+RRYBSKj9tGowcKXMC0qWgQU3cisMXteHbDHVLprGyfhSB4aPrOfQ2\nKukxagy/9j0CNHQCQgRowqNoRRXHbq/iDjGDED4eJuVEIQ7/GONApa2U4VAhtcmiBT5hzaOoxsF1\nLSZNgqSweBsLPQl/zLcYN6+xaV9TkykV4XkyiijdS1ht1ivaMu2nh6/rUjt9JAEJICAI4tRunMNX\np8f5eIjPl4U+AQYJDAgZ1C19mvgRT1FfKLNCw3hEqCJKOZcvuxd/QHkzTXkVB4Vm+h47TqYHMMiX\nmtt7aXoq7LxZr1C0RdqN5b+lyiHv3QD9emRmrmYQ/l4Jqx6aTxAGPSH4ovJaTln7GUedV89HP16U\neh6Kx0LOOhMEmMjJwFdhPIcOzWyy2Eg3TjwuY/snT5Y9FpjoNOoTadk20WhmIx8aW/5qs17RVmkX\n4u+6MG6GzSteDr3Gxvl3X53vVEymtksNwWYdCAg0nV4F/8tJ2wRboKF2TyBgfslZVMYqEcDQ1Aai\nrpTh0KJJUsBO8wFZCXu6gQg0DJFEC4ewhg3DymqzpjbrFe2BdiH+jgNv+bLw2gXrHE4/zybSz4I6\nF13PSdWF0elU7RMiIL9aZ2vSQIiAZNLkv6sriSF78v6yb5Q7ypqP71ccOjTNDt5yq8OJ9R668MEP\nZB4YAhH30aZOleZ+ysGfnkfq6ly2bNmzvs4KxeFGuxD/tF/3Hc9ipWlx29EwaBDceCMcddRQ3noL\ntr1WyszYGGoK43xRovOXx39OTV5+QyXPdGPw6yotuinRP6RJZ1inXT7XXQdbZtvMEzKRz8dABnx6\n6Ai545ve7U3NGEsCGno1766vs0JxONIuxD/bBVBbCw8+CIWFLscdV0447HHRRSZjX4swrLCSikdG\nQdjn0sTjjB0ra8NfdRWcdZZyAxwuOI4U/iCQf88+C6RKbqcT+TTgp70e5LzSv9KpWpD3gQEzZ0Iy\niR8yefXaoQwY2rivsxJ/RVuiXYg/ZFzCgwbBObgMKxlPOBxvaMNYUuJwIlvRwkl0QyCER2mpw+bN\n1h4ldikOHWw70+c3m6VYLMVC06BXL5fvPjSPzWHBlsDgiGmXcuaf/4oW+Ajf46jlkBgiyz9oWuO+\nzgpFW6D9hHqmuLXEJUo5V1cvIJwI8JM6yaRJsrqAO6pnEEoISIKhhTj7bFvFeR+GyCY8spGL3uQO\n13VZBbR3b9meUzcCklrA3JzOfBOYJDDwMJkbizB2bJSqqgnk5iqXj6Lt0W4s/zRX5TsEmscRsYDi\nu3Q23d6XDzqWMbxkBcc855M7Fr4o0YifPIwB96kf/OFKRQUUFTUuq53971NPpdtzxhFCY1VtKRcS\n4fzs+k4xOO88i3792H1PSYXiMKNNl3doWsoBaFRvua7YYMXDGgFJfC9E8VhBp5hPApNLzSgTHUv9\nztsorgux2DS+971RBIFPIpHDuHFR1qyx6Jt0sXF4O2zzwJtWozBRlfarONRp9+Ud6upcVq4sT/V3\nzURruFh8MaiS4tDT/P3aL/HF+xhGQBCC35eMQIt1xcHmHV/1d22rpI34Tp1qECLAMAJ03eOxxxxq\nXoKLHy4nHHigmxhEVY1uRZukzYp/ba1s7J0drRGLWYyzXf73lNtY/4gnM3h18H3p939pZYS1qXj+\nHJXD1SZJL/zq66FnT5tHHjEJhaSB0HFzAd88NJ6QiGMQIBJextWTVaN7dYHNSxOVB0hxeNMmxd91\nZX33oiIT8CAIsfm/t7LsC5frE1V8U+I1ZPCKJLy34kKefXY8/ftbjB6t2i62ZdJGvBAQi1mMHRul\npMThNK+AcX8Zg54S/iQ6gWbyzXkF1B7vkP96JXmLalhdYHP2GNXuUXH40+bEP+PStygujvLbn1ZR\n9tgMCmLTOYeZ6CT5ulrW7AkEBMkw69aN58knlX+/PZA24tN5AGkGHrMCI/AahH8BF/LhiMGc4Y8h\n2JxyHf4syktPWMoDpGgTtDnxz3bPJhLwzdoP8U5O8HGJ4Ohqn7wY5Meg91ioLYGF1Zdxzu07C78K\n7mibZCf8gUvfvuWEQh4JLcSOpQZHVkMCk4nh8dw3xMH3G7sObdtSXdoUbYI2J/5py657d5eHHion\nx6xnkyYgSFXovFMjb60gLwZHx6CeznxR0/g9VAP2tk064W/LFofNm1PiDux4bAQ7nuvKm9g8ELEo\nLISVK02CwMP3TbZts+nXTxV+U7QN2pz4py27Dz5wyM31ACGbthoQ6Dp1v/4BR1//V0QQ4GEy24ww\n0W78Htmrh/p6WfJF/cjbHvn5NrpuNkSE5RdHyOtvEUm9XlcH//rXUBYvhvnzI2zaZFFZqfaEFG2D\nNhvnX1fnUl09ACHiDc9pWg4lJQvJi8n6/m9ic2qkeZePbcsJAFCtG9swzeaCpJ5fsaIc3/dIJEzG\njo0Si1mEw3KvQK0IFa3F/rqc232cf16eRefOw/j006lI01+jc+dh8gduQTcrY+E1xbJg+HCYOlVG\nhSSTamOvrZKXZzVbuiEdKmwYfkPtp1jMIpnMFAFV94SipTmQLuc2Xdvn888j1Nfn4id1/PoQX/2t\ndI/PjUQgN7dRZ0BFOyI/3yYITJJJg2TSpLraBmRdIHVPKFqLqirpaj4QbUTbrOUPsGiRxarnKplQ\nPJJO1T5HbRgDpxbt0VTaTGdARRvj25bXeXkWHTpEmTrVYfly2dPBNGHMGKiuhpKSzA9T3RuKlsB1\nYcYMubIEaWi0poHRpsXftuGbX9Vw0hpBiIBA93DGO+SM37OY/iadARVtiKbL679VuhTVOI1mgn79\nLHTdoqoKzjsPSkul+NfXw/z5oGmycujw4XKlqO4Vxf7gONLiB3lvDRvWuvdUmxZ/y4Ijp9gEPzNJ\n+h5eYPJfC2zeW6w269o72RFdZXGXHqPKIfDwQybPDos2BAJkGwATJ8rksLRllvb9N+kCqVDsE7YN\n5xou/QKHJWGbSKR1b6Y27fMH+LLI4iI9yr1MoJwoSwKr1X1pikOfdD6IYcAFukPIlzNBEPdYP9Wh\nvFyuDtLU1bmce+5EzjjD3em9sjeAFYp9xcIlqpUzgXuJauWymmwr0ibF33Wllea6cgNlUcLiAcY1\ndHEyTVnTPX2Mov2R3tOZMAGumWKj5Zj4mkECkzeE3UjM0xViff9eJk0qp7Awc9OoDWBFi+E4GEkP\nXfgYyda3Jtqc26epL/fMMxu/3qOH9Ns+9ZRLr14Of/qTzZQpqq5PeyTj0rGgKMq2KoehM2Q57wYx\nd11qPxhP0C0OBOjEebQkwv/G7mKGXsFvL3c562uHgsE2ReomUuwPTarHtrY10SLir2naxcCjgAH8\nQQjxQJPXc4AqoA9QA1wnhPioJa7dlOzm3fX18OGHjV8//3yIx13uv182b08kTN59N4qlfrjtG8ui\nm2UxMZIVAbR6GowcSX4PH/0hgR/SMJIBpdUbKeennMYmfv7q49JKW2xCUZRPuq3ms8/msGPHYN56\nq0JFiil2y+ppLjVzUgZENErdu1XUlkB+IeS14nX3W/w1TTOAKcBFwDbgHU3TXhRCxLIOuwn4Qghx\niqZp1wO/A67b32s3R0FBplqjELBtW+PXS0uhZ08Hz2ucwAPqF9quScV9WraNNc6Sj0eNgmSSvDVw\nxliNzSXfoXv1v8iPybTBq4I/oyc8CGRQ9iexB/kgPhcAIebz9tswb3wRs4Y7dIvYahZQ7MTqaS7d\nf1pOTzy8+Sbvzarky96zCHwPfeWshiZUrUFL+PzPAjYKIT4UQnjAbODKJsdcCcxK/ff/AeWapmkt\ncO2dqKmRYVIAhYUuQ4ZMbPDR6rp8vbjYxjBMhDAIhUyKi+3WGIricCHtK7z3XtI7vVuqHIKkjLsT\nwJExg/nP3UxeSvgB/sz/R9IwG5z+n53ySaO3vaL/07zilXPi1Mz7KhTZ1MxxMPEI4RPGY8eKpwmS\n9WRXkm0tWsLtcwLw96zH24Czd3WMECKpaVodUAB8nn2QpmkVQAVA165d92kwti1/i6ed5vLIIxnX\nzl13Rdm0ycK2ZQJPaWm02ZouinZIkzaNW1K+/1dEDiZxAgxGMpkZegXixO6cuXUOfxKDmRmq4PQ7\nruKqfAdsm2O7reaLD5Y1vG1y8fGYLEcXqvi/YmdcF2LfK+DEIdCpGo6I6fSav4J1gwRBCNBD5Ofb\nrXb9Q2rDVwgxDZgGsrDbvryHZcGUKfDWWw7hsHTtaJrHnXc6nHZaZmN3VzVdFO2QJhttb2Lzlm9R\nTpQBOLyp2SzVLHJyYMDzFcydW8HTD4PwYcjjFtGovK+OT7kO0z7/I7sUoeXMg6Qq/q9ojOvCyJEu\nD9w/mr+HfT5OQNGdPt9ZK3uN/KtEY0vOMPIuaD2Nagnx/xg4Metxl9RzzR2zTdO0EHIfo0kV/Zaj\nogJ69bKpr5dtHEMhk6uusslrzd0TxeFLk1oep2JhzoJlcYulgYUGhAyorJSHT5qU2VeKxzMGvdw2\nqMC2K+jfH/r3ByKqRkh7ZlclRBwHevVyCIU9WW5ewI7ego5rQxwR0wjHTL6cuqvSky1DS4j/O8Cp\nmqadjBT564EhTY55ERgKuMDVwBuilWtJ9+tnUVenXDuKPSQrlddCzgXjx8OCBVLog0DuFzlO4/aP\nmgZbt8K0aTKEeKdqjKpGSLvl2yp02jb86U82yYSJKeLoSchfF+bvdz/O36trZORPReveN/st/ikf\n/ihgHjLUc4YQYq2mab8B3hVCvAg8Dfw/TdM2Av9CThCtjnLtKPYVy5Liv3jxzmHXOTnS4gcZUTZt\nmgwmSE8Syr2vgJ22khrdE5YFt9xiMfuPC7nm7Cq+70HelAh5lkX3AzS+NtvMRaFoCZpbtqczx6dP\nzxTi6tXLpazMYcUKm02bLFXnR7GT5V9ZCStWyNdKS+G222Sf8XA4a2Jogebh7b6Zi0LREmR7bbJ/\nl127Zgq8FRa6PPxwOTk5HkOHmuTmRgGLiRN3/g23wG9bcZiQvZVUUCDFPt0dML1SBPncyy+7HJ+s\nIv/2GeSt8g9Iqzgl/grFHtDUinvuNpf/1B2iwuZ7ZQ5mOI6mBRhGnCBwuOgiaydf74Hs0qQ4NEgb\nDxMnSis/Tfa+UWGhy4DzB7DZi6PfL6N98ta3vu9Qib9CsQc0LQF92e/LuTLw+C/d5DfVtxFKBAQC\n9GTA6hcKGo6tr4cHH4SzzpIbw7vyASvaNrYt3Ttpyz+bq8qqMPR4Q9TPFyUaeZsPk9o+CkVbJzsV\nIEIVoWQ9mhAYmkf/NdWcMVbn3yUBR1brzF9fg65LkRcC5s6FF1+UFUBDqV+cCvtve3ybS8+y5GsP\nPggvvJBxGQKcsg30BCnjAdZXn0n1bZVc1cqWgRJ/hWIPSPtvN1S5DJkue+0JICFC/B+D6R9bzFEx\njwQmjm5z5ZUupulQXS1bQAaBnAxGjJD7Bcrn37bYE5eeZckV4AsvNH7+6xMinD52Bl+XJDiiOsxV\nsUqO7mJxVSuPuU2Kv9pUU7QGlgXHVzng+2iAj8ZMhvEHKlhDETYOizSb436wmp/9bBRB4JNI5DB2\nbJT335dlolW7x7ZDts58W1hn9rEFBbL8TDIpn9d1WHOUxfPvO/SPOTjYLMVi6uDWH3+bE3+1qaZo\nTd7E5mpMBNLKr0JmYS7FYikWAy9yue22kWhaEsMAXY8zfrzDxo0WBQUyRLSqSk0ChzvNhXHuqhR/\n02N/8xuXzz+vQgjYvLmUnj1reKvQ5oE14wAoLISiotb/DG1O/Hc3AysU+8OpEYtLZ0Tpl3BYpNss\n9RvfXMce6yBEJpRD1w0GDrTp0kUKQnrDb+ZMWLhQ3ZuHK011pqamUYWQnUo5pI/t3t3lrLMGoOsy\nS1BWINZ56KEc7rorypo1Fu+/LyeL1jZc21wbx+zerGpTTdHSWBZMdCyO/O04fvyERYcOcumu6/KH\nvGKFTSKRgxA6YNCp02WAFIDsUD/V8/fwpjmdsSwYN25nwc4+9srSKnQtjqZlSs9DQG6ux+DBTkP8\n/4G4P9pkhq/y+SsOFNm+3HRtn+Jil8rKKoSYiRBJdN3EMKJccIHVYPnn5CjL/3Bnb3TGdWHxgy4/\n2WCzbpKHCGf6Qmiajq7nYBjRZvND9pZ2neGramkpDhTZ99qmTfDnP8NFF1l06+aweXMS8PF9D01z\ncByLqip5rPL5H/7src58+ZJDp6RPyR3wyUCNdzgTv6PNBadWk3/KYPL6Wbt0HbUGbVL8FYoDzbRp\nMoYb5L+9etl07WqSTHokkyZjx9pMmQJPPpk5p67OVVVn2wmOA28ENr8kxJGxgJNjJo8bN/F4aIzs\nAW0uhmgRlmUdMKNAib9C0QLMmdP48bPPWlx+eZRP/1bF0SvgiPcbBx/U1bmsXFlOEHjoutmqvVoV\nB56mE7ttw7wQ4Elnj6ELfnXFCvQXZQ9oEffQDnB0ihJ/haIFGDwY5s9v/NgCuj87CxOP0cxiU4Es\n+AZQW+sQBB7ZvVqV+LcNmk7shhFl0SKLBy91MF/w0YXA0HwE8E1gEsYjEZhsKrA5ABGeDSjxVyha\ngIoK+e/TT8Pxx8s47SLHQegeWuBj6B5FNY5sZ+Q45J9XgK6bDQLRmr1aFQeW7Ind9z3+8AeHZ5+1\nmBeyiZpmQ1vP1ztHmKZH6B84LNZtLquxlPgrFIcjRUWwejUsXw7z5sHfKm2KcmTmj2aaMiQole2T\nFwrR+85LqB3UmfziiLL62xDbttn4vomue8TjJsuX2wQBLE5aPFsRJdLVYXWBzV9etViqgavL/tAP\n2Qd2nEr8FYoWomniz0s1FkXRKFuqHN7E5vwVDt3SB/g+efe/QN6kXIhG0t4gxeGM67KlyuGe6Tb/\nOj1Kaals7hOLyf+5QQBYsPhkGD0aqqvlaUaqP/SBjv5S4q9QtBC2Lat2BoH8t6AAfvigxV//aiEE\n9A9BNGRiBPXU9RTUlgjyV8XJU2nohzf/f3tnHx9Vde7779p7ZgfbSoKhFpSCgmgBQ8JLbfdBcWtU\nfK32cNvbak8QPNAqaKNolbanNz21pfU1rdIWVLjMtZyeY6lagQo4soXiVkFICAQU0YKgVJs2AV8y\ne2bvdf9YM5lJSIAYNG/r+/nwSWayZ2bt5MNvrfWs5/k9mdZuCxcyyA9YiUVpbZzf1c7JKeRS3d5O\nPrmUVMpn7lyL2bPjTS41ILsAACAASURBVKZ/dXWf/LC1+Gs0x5BMzWQYwsyZWQMvUNv+2ePjfHfs\nXbx55ROEUTCSIcXHF5LfOcPVdJSMcU9jI0iJCUTxcXB5AZvBg+Gtt9Rmb/x4F9NUZwGRiE9JiUtt\nrU002jlOBFr8NZpjhOtmPfxPP92juDhr6QxqQnhgo039iLO4Nu9PIEJCIagPN2vx765kYn3pWT8U\ngqS0cHEA+P731VmQ68Kkkwt5LzAITUkkYnHqqQ7f+U7nFfxp8ddojhEZD5dhwzzuvruUaNQnmVTb\n++3b1f/us0KPkRv3IK8xESLESEkKZj8Cv9Ylv12Jo7ZucByCiAWhj4hEMK6byqq+ZfStUrbMmSww\nGw9Ky2kYFlA/zqBgeiXOnZ3799bir9EcIzINX1591aVPH7W9N2jkjqtjfPhZmyU3eqzwS7G2+bx3\ni6RhNBRUQX5tEmIxPLT9Q1egPbbwHjZzZJwJuKwXDnPLbK6yObQRS3qHkL81JH+7gDPqYMLHfCNH\nQIu/RnMMsW0YOdKhenOEMBVgpiRfWbqQ/HllTJrm0me+jyEDCrZCwVb1moaR8OagTdxwg0dVlVKZ\nhQu1HXln0R5beNeFvwQ2z0kbM2j9Ws+DnXscrolYmLRi+N9J9DhLZ42ms8nPtymumcqpiwXFsyF/\ni1KFIWUORh9L+T+jRP+Vcqi6H961NzJ3bikjR3qAsn/Wls+dQ2t2zZ4Hc+eqr0e6NpfMLmLaQzal\nMs7u6T/pMh2m9Mpfo/kYyB9fRv7ti5u3dsrEhSoqaHhrNdV3S0ILECBE2CUyQDTZP1Mm5g9th4Fa\nXttS03N3EX/BZslgmzmdr/uAFn+N5uOhLVWwbaiooH7+s4TRVM7eW3SJDBCNIteu+frrYehQj8uL\nY5xQDTtjZdi23ayXA0AYeuze7bJ3r8Ojj6oXjxnTdnvHzkaLv0bzcdGW4bttU7dzHoSzwAgQRoQB\nA6YxYEAZjqMVvyvheeB5Hvfdcx5WNIGRhKI5C6lZ4FJabpNIwBe+4DFpUoz331/E66+nSCQs1q5V\nBVyWpZr8VFWlzf660J9Xi79G8wnjeTBnehGXDr+OA2Phkm+XccYZdtvphbo1XafhulBU5BKJ+mBC\nKOHAmUl2PeLS2GgzYoTHvfeWYlmNCCERgmbhO9+H++9XNR7r1qmc/67yJ9Tir9F8wuyMZVM+66SJ\n9zKsB+Y4cHbK5faIwy/Wppt6ZE4MEwl1UDxvXjZ5XPOx4zjw+987pJIWlkxgpOD4LVHuq3WQEkpK\nXKJRH8OQSAlhKEilLKqqHED16Q2C5n15tfhrNL2Uc3Gx8Hl/ZMAr9wYUWPNp/GARj50uKawN8FMW\n/3lHHPdim6v3uAxJJJR6hCHMmtW1lo+9gG3bbG6evYYrSmL0q4YH6stYH6rff1WVcvCU0icITFat\nmsbTT5c1VXVffbVq7alj/hqNhiFlDqlHLP5R0kgYlWBIDOnzQQl8rlYi8THWuSz5B7w/Zg+3jBD0\n2wYC1DKyKy0feziuq5wbamvtJkG//PIFzL2pgrVrJ7N8+Qxmz45TXNzcwRPURm3UKOXx1BWjdlr8\nNZpPGttmyXVx9q+LUZJchCFTCBnhU1WSJAFJLHaNKGyyiNh0tcGY2XDCDonIywPH0ccAnxCOA3l5\nKuoGcOmlC7jllm8D8MUvrmIou5j351+wfbtNEGRfZxjqdZm/T1f8G3VI/IUQJwD/DZwC/BX4upTy\nn61cFwA16Yd7pJRf6cjnajTdneFlNt9ZbDPstjLGjXOZPt3hne/C0p+4/L+9DkPTsWTTDEhJuHfM\ndC4aNBinwsHDPmr7AU3HyM3YLSyEgwdVs2YhAAnXTryHDcuvorHEbvLnNwy44AKoqOjaf5eOrvzv\nAOJSyp8LIe5IP769les+lFKWdPCzNJoeQ0ZUli+HE09Uz71XZHPzOzY+UNhQQxgKpDRIpSyW15Rx\nykwbx4bY9U0Owl3uELEnYttpYzbX5aVTS3ifVZC27u6/VnIeLj+vtvkyHg4u6w2Higq7y/9NOir+\nV0LauxQWAy6ti79Go2lBGHqcfbYK7Rw8aPHkk3GCwObrIxcwY9YshBEQygjz5lWydatNeTn06ePx\n/vsuI0ao+HIk0rUOEXsiNQs8vjCrlEjgMy5qcd9F11B69n/Rf62k//I+rMHhS9IjTikWPqG0sIjT\n1duzddTb53NSyrfT3+8HPtfGdX2EEBuFEC8IIQ4xvMsghJiRvm7ju+++28GhaTRdm9dey4Z2IhGf\n995z+Rfh8bOSGzCjSQxTYhgphg/fTBgqq+iBA0uZMuU/uPfeUkaN8pg6Va/6jxUNDR67d8+loSFr\n4FOzwGP/9RWIZAIRKqe3+mWjmHn7X/jtip8y5eQ4LwobJ53BFSEgKv1uYcx0xJW/EOIZYEArP/pB\n7gMppRRCyDbeZoiUcp8QYijwrBCiRkq5q+VFUsoFwAKA8ePHt/VeGk2P4LTTHA4eVGmCqZTF5s0O\nP+4Xo7AqYG8A0gAhJJMmLWT1anU2EIn4CBEgpc/YsS5jxtjMnasPfjtKQ4NHdXUpYehjGBbFxXHy\na+ELs0oZESYwCUlhEAiL9RGHDYFNtWVT+SNYXg5rGx18qZq2G3ldLKezDY4o/lLKC9r6mRDib0KI\ngVLKt4UQA4F32niPfemvrwshXGAMcIj4azS9iQkTbGKxOKtXqzTBbdts3iVG/rsw4M/w9hUgDIhG\nA2691eW00xySSYtUyicMI/Tvv4cHH/SabARaO/jt6VlBx+r+6utdwlD1YAhDny1bXII7YWLKx0gL\n/zNcwM+MCr71gM2kOnUAXFenmq/X1dnsKoxTVHcMBvMJ0dGwz5+AKenvpwBPtrxACNFPCJGX/r4/\nqoVBbQc/V6PpEZSV2dxwwxxOPtnGMGAxZSTI48RVYPgQpAwMw+KqqxwmTLCpqYmzYsV0pJRcdtlD\n3HVXKWec4TUd/ObieTDH8XjvB3OZ43h4Xuuhje5Kpvj5P/5DfW1pt9weCgocDMMCTMBi/o2F7Fi1\nB19GSGGSIsobDOULqRpOfGQulxd6lJerzy4vV3pfNMOGOXO6hfBDxw98fw78jxDiOmA38HUAIcR4\n4DtSyn8HRgDzhRAharL5uZRSi79GkyZt9Mm6dbDBt7nYXMPt/V1eeqSQARfU4fsOr75qU1cHhYU2\n777rEokEmKYK/5SUuLzxhn1IpKHJRgIf37f40/JKksny5qGN/O4hVK3RnqYrRyI/38Y041RVuexb\nWciC6nIsfFKY1JxyBSP+uoLpLMAkJHjJIHw5j7EyzvrQ7rYZVx0SfyllHVDayvMbgX9Pf/88UNSR\nz9Foejq5+eT19TZX3m+TSoH8g8oplzJbOPTVr6rwT+asoKHBaTXkc27OIaTEZ8SJS/lnTmijvt7t\n1uKfaaTyUawTWoaL1C7CJpGwuYO5TfYb/ygJ+az/FtHdAaYMkUCEkCD0Od90eUHYXc624WjRFb4a\nTRchI94TJ0IqlX1eplMfMuZgffva3H57nDPPdKmqcnjtNZsf/ODQ9xtS5hAssgh8H8OyOGXcZBqC\ndU0r/4IC52O/p4+TIzVSaYvcHr2RCEydqp73ffXVxaFupMkr9waEUYkQm3hvCJz4Z0G/WkkKA2FZ\nfO1XDsfVdZsQ/yFo8ddouhCuq0Q+F8NQzxmGWuGWlQHYzJ9vI6VqIZgbdsiuam3sNVl1zLdtihuK\nqK93KShwuvWqP8NHsU7IDRcFAcyfD9GomgiSSXgBm1+NncaF1nwwJFKm2H8Z/O3iKB8uvZkRFDCk\nzKHItrt1SEOLv0bThcj1kjEMuOUWKCjIZpbkrjIXLz405LFggTISC0P1PvG4zcgbVDZLQYOKbfcE\n0T8SDQ1em5NcJlyUqZKWUk0C06dnr7n0W2UEyUWEYUI56gmQkYARdxQwZMicT/RePi60+Gs0XYij\nDWW0dp3nKcfnTMgokYCNGz2SydIec8h7NLSas59zz5nfXSwGCxcq4bcsuGGM1yxVs+GBqeyp/y11\n6ZcKTP70J4fx47tnmKclWvw1mi7G0YYybDxsXGpqHOa6Nnv2cIizZEmJSxD0nEPew5KOd9Wfvacp\nZz8IfJ54wuX00w/12hk8ONti8foSj6LyFm55Y8fwz4OAACHhyQdv5lfL2q6p6G5o8ddouiPpU0uZ\n8BkWWiw34myI2JhmNjNo3jwYPdqhutpqWgVHo4Xs3j23x8T8m8g5xe07ShD+AmTEIJmyuOceh127\nsoKd2xwtDOHMMz3qTqygfliCgq0hMuHzXIWL+UMI+xhASBgKxLADxySttKugxV+j6QbkpiYCJCpc\nzk34iDAgis85ocvzSVtZDaMOgYuKsvnrb7zhMnRoIa+91nPy/JuRc4pbsAXGzIa6kgg/qKpka63d\n7FA8FsvG+0eO9NJ9ExJsSYaMvs3A2mrxw2ccDu6He+6JKEsNQzJp0iJWrSpj165Dayq6I1r8NZou\nTsvURCnhiymHVaFFnvBJSgsXp+nwErINvwAuvNBm2DC49toKxo1LoFayPSwE1OIUt18tfKZWMpQ6\nDENNhnv2qAPxhQuzv6eLLophWY0YhiQQBlWTL+CHtRWqTeMWWLFiGldcMR8hJJaV4tZbWw8hdUe0\n+Gs0XRjPU9W/uW18AdZLmwtFHEe4PCsdXmhhHyyEErtYTLmBZla3UoYIYfSIPP9m5JziikWLkMkU\nmBZfutlhxgFYtAgeekiFwzLnIqNGeVx22UKEUM3Xk6ko+4dWsCnPhg/VNatWlTFp0mKiUZ9oVNls\n5Od33m0eS7T4azRdlJaxaSGUeGUE7AVsPGnTmv1tGKr8dSHgG9/IWEeHSGnwz39ewIknVvScVX+G\nzEl5WRnCdYk6DlfZNtvnqgyoIMiehwgB48e7GEaAEBAEgpUrp/LBBzZTpkBtLaxdq3r3zp4dp6LC\n5aKLetY5iRZ/jaaLkgljZwq8IJuXLoR6PiNmppl9nLtDkBKqqprbQfzoRxXs2mX3iIyVVrHTeVCu\n6jSViQgNG+YxbpzL+ec77NtnM3Fi1iU1lbKIx8vYsUNNFJYF3/ueygSaPNnmootUrQTQYyYALf4a\nTRcl17sms9rPCDxkJ4EzzoBzz4UxY2DpUti3z6O4WFk/1NbaTavXkhKX6mplHd2yKviIdCNv6Nwz\nkkxa5urVHo2NpZimOuy+8kp12N3QEGfLFpft2x1s22br1qxRXEEBrFx55LqB7ooWf42mi9KyeXh5\nuRIl08xaE0gJ27fDK6+oit7f/tZj4InnYUZ9UkmLW25dw7ZtagLYuVNlA5lm60ZobVbFtqamORNA\nV5sXcu0bEgl1ZvLDH7qYZqbeoZH9+2NN1c7nnGNzzjnqMDgTWsv9/bT0+u8pB+Va/DWaLkxuwVdR\nUXYiuOGG5tc1mb7Vx4ienAATLJngzhkx3uljN1lDQOtCfdjV7WG8k48wL3yiZCahwkI1lsxZyerV\n8PbbDpWVJoYRAJL9+xcxYEBZ0z16nppcw1BNjpWV2fvIeP33FEO8DFr8NZpuQmYimDs3G/rJkFmt\njngH/nY6hBKMFLy/HJiseozkvg+eB3PdplngsKvbdPxJJnxShsWOQqfJ0KyjnvrHatfQchL67W89\n/v73GG++qTJ2ampsli9XaZsgCcMU1dUxhgxROx3XtZvOV4RQPkoZ8vNtiovjPcoQD7T4azTdjlzz\nN9OEm2+GQYM8iotjwH5Ouz+C/+mA4zZHub22jBdWqdfNmJF+g1aW6wUjD7O6tW1qKuM8NtPl2cBh\nU7lNvEiJdUc99Q+7a2gxMxxuoshMQmec4TFpUoxBgx5h8OAkY8bAxRcv5JZbXFatKuOSSxYDPkFg\nIsQi3ngjhWFYTJwYx7LsNu8jP1+FzpYs6TrhrY6ixV+j6Wa0NHUbOdKjquo8wjDB28C+mVHefPhK\ntpcM4ABArToIbhJ/183GRBIJcF1qmUN1tToUHj360NXtsjqbn0mbIAQzZ4XfXk/9XAE/7K4hd2Yw\nTfZfOo05K8r4S9C6t47jwOjRHj/7Wakq2hJqayQERCJJxoxx+eMf57B5cyWwlA8//BQTJjxFZqcz\nZIhLPG63eR9dKbx1rNDir9F0Q3LPAnbvdpHSz/7QSHHSdcsYZEic5GJmz44zebK6uKHBo/60lyj4\nQkh+LRCG7KovTAubjWWpFFBoLuiHW+EfrRFdSwGtrDzMrqGF6f7nnpjPChZTSpwNvn1IeMm24Ze/\ndEmlfISQICFTAGGKCF/6ksP113skk+UEgU8qZRIEkXSOv8VzzzmUlbV9H8eyZWRXQYu/RtPNKShw\nEMJS3vNAGBoYRpgu6vIZM8alqMjOHur2b8S4F4pnQ/4Ogzer6poJWyzWvFdAZjKYMkV9PZxIHo6W\nAlpXd5hdg+PQMNqkfkRAQRXk10qi+JwvXKqtQ711PA9WrnRwHIuI0YhISU54Aax6GHDqdTg32uze\nPZc33vAxzQDDgD17prNq1eCmlNjGxpzdUQs6Et7qqmjx12i6Ofn5NiUla3jssRjbtsHOnWOYNau8\nqairutrBdeGkk9KHukISRuAfJYJP78yjcLKDtS4rbMBhJwPVSewwpGM7NYUOy+psLi9UPvmXFzr8\npEVcvbVdg+fBxo1QdI/qomL4kuLbDD6z0+KMqQ7x9OfPnZsVYcdRO5fHH49z1dgYN21ayAm1AUks\nVn2vjKtonrVjmhbPPVfGkiXZD28WGmvBR20Z2ZXR4q/R9ADy821GjbKZOVO1Ijx+N5w/einx6sns\nel2tlAsKHAwihKkAIwV9qwyuT1byRexmwgbNxR7aEfJoYTX9V1HJMFmONHyK8ixerIyzrM5uU0Az\noaHJk11GjkxhmpKwj0H9rReQf3oFZemD39zw0ZQp2f67maK2ZynDwcXF4eX7bZ67Cmy7edbOl79s\ns2hR9rMnTz787/ijtIzsymjx12h6CLathHlnzOOaReUYtT7XmutYfnMRrmsDNsU1U/nnC/PpVyX5\nVC30p65pxZsrbC0ng4ULsznwjnOYFM10bCdjNf1VuRQL9Rjfp6jOpWhO2wqaOYv2NxXCNQZSSMxI\nHgVXVUD6ELpl+Gj/fvXakSM9SkpcDhwopCh/M303A7VZh1Pbbt7GMrPKX7pUCX9bq/6eihZ/jaYH\nYdtguy6kfAgDIvhsus/lZ1JlybxYWcaIxxYjkz5JlBX01FZWvLmrXM+jqU+AEFBTk602zs188TzY\nucfhmoiFIX2SocUfmcxE1mEYPsaRguWeR8nTLtPCQn5ZW07j7ICGcQYnfLuyWfaR42S9jEwTBgxQ\nDVkyzqWGESJCML4FU2cv5H+/5uI4zSecTDXzqFEOdXU2Rd25E/tHRIu/RtPTyDmdTBkWzwYOQboC\neFmdTdFzcbxYjE194fpRR47hu64yO5NSfV26tPnKe2fM46SYy5yFDqkUNIopXPEVePH0Mv7v/Tbb\nUkWUGi5fq3Qoaitu4nmkzi3lwqRPKQKDkE/VhuRvF5gj6mBC88uFyDZe79sXxo3LOpciAVMVun1Y\nkmTxRJchOZ/bdPAdJAgSJlWPPsiPfzyDNWt6VljnSGjx12h6GjmnkzsKHTbcaCOSWY//9SEE31zM\nqNDHMBazfn2ctWvbjsO3zHS55hqPgQNdXn7ZofBVuGZRKcL3eVpGAEmEALnc4lXKSKXgeWnzorQ5\nrg7aWmDvjrmcnPSJEJDCIMQkiWh1t+C66lwDlPjffz88/LADWEACRAgpVeHcb3uU/HnNX6+qmdV1\nZiTkpyUz2VNbRCzWM5q0HC1a/DWabsZRWSKk4zbvedlVciqlzMs++MBlyhQfw1AFTqsejtF3U4zl\n2yEMy5gwwT7krW68Ef74R5g2zeOUU0q5dkqCa68xGfLHyzAf8kEGRFE+0iaSMPA58JSLlOq9IpHD\nR3yew+F/YSFR4ajvUsnEkXX828PZm8yEaiZOdDAMu8m2Oghg3z6bK69Uh7nRaCHJXZsp2A758w7N\nS9271yH0TQwjVBNEVYiDSz29SPnR4q/RdCvaW2nqujB8uMfo0VmL540bHb75TUv1ppURrvvHw7x5\nT4owCokPF9HQsKZZjD0W89i718WyHN55J0aQakQYEkSIeP8pkjKCIUCaqseklAGBabEm5QBq8pk6\nFWyyfkINI2nmlTO8zOaSR+L8SzKdoRO1mfYwZPQ4azyXAASx2BXceef3qK1Vk8BLL4Hj2NgZw7nd\nsOQ95eff0jHivPNsvjr8QX5aMpN+VSHH1eaxPuLwiyOlsPYwtPhrNN2I9laaTjp5AWfdPQuiAclk\nHrfeGmfbNuXvP3asy9cb90D+fMIoYIIR+uzfH6O6Osa+fTBo0BgGDixn2jQ/XREbQrqCVgRw/MuS\nh8Op7GEwzwuHB+dBUZ3L8nqH5+9SA5MSJvXNzloNo02q7xOEpACLmpo4Th7ErnP57/0OA7C5bkDz\n+2gK1aR3FwMGPMF9963glltcamttnnhCee9nCtLamiAzIaM9tUWsrv13AJ49uYxfPNa7Qj6gxV+j\n6Va0q9LU8yh8diYH/i2lhF0kmDHD5bbbbHbsUP7+k2/26PvnhRhJn1BCgGDv3ocxjBQDBkAiYWKa\nUmXQCCW8QgAhfO5pQYoIu6+Gp6octm+3WVYHRXNU60TDUBk5Z57pUXBcBQ3DEuRvDdnvhIRSgoBU\nyuftF2MM+91ijjN8ZkctSmWcpwKbxYuzwh2NFja7NSEgGk1y0UUxSkrUruaVV+ympvUtrIuahN1x\n4GzTY2VQioWPj8XAa8p6nfCDFn+NplvRrkpT16Xg5RDjGyrzxRQmU6Y49OkDM2eq3cM3fmkzPuny\njQfvYkT5UwgjQIgwJ7UzIAyjhKFAygimqcI6ST/CMzsv4fR7/8yF0Ydwkou5/fY4e/aoIqyM82im\neTx5Caq/HHLaPMH+SUr4VcvJCH03k60FSDTyv4mRAM5rdFl1XyHJGzcj5SLI6VasuphJLrnkYUxT\nkkxaxG6rpPSlOt463SEM1S8mDJW/f+7v79HpLnm/9TEJMAyfqwpc6GXxftDir9F0O4660tRxyP9J\nHsW3JagfZ1Aw/UHy81Vjl0yvX9+H9dJmSP5ZjBRPql7BaVM0CaRSFg888AD9+tVx1lnK/Oz++10e\ne8yhpMRlRPQpTDNACJ/iYpeHHsqu2ONxePVVlz59fCAk7GPw7rShSOt1VPhGsHr1VLwdZZSzUIkx\nkut4hKks5IMRKbZeF5JKCQyjeQODTA/jSCRQP5MJ/nP0TE5ZIhltWNjE8bAxjObe/ABDyhxYrLZP\nR6w96MFo8ddoeirpbUK+65Kf3iY0NHicfbaybd6yxSYSUTuAqioHkVTLcRHA8dvh4HHH8+Cye1ix\nYgbRqOoelp8PH35oU1urPiLTGB4sNm92+GLgcd6HLuvucvje4zYjRzpUV2f7BBzofxuN75cTiSjf\noXHjxnDmQpeVCy7l8uefxERiksIEDpZIwigYhkyv9AUgmxrZBIHakUQiAaQMTqgKiBAiQ59zcXnR\nsMnLa0Xbe6JRz0egQ+IvhPgaUAGMAM6SUm5s47qLgV8CJvCwlPLnHflcjUZzlORsE3JbNd5zj8Wj\nj8b58pdtNm+G+fNtVs2+lWsvugu/H/zjSxBE3mPmzHKkhBNOqCMMHcCmoEC9dW5jeN93KNgOK0nH\n0p+wqFkQp2hGcz+dX//aZsmSIkaPVjYMN91UTjTqI/8zwt9vinJCbUBABMOQfKYqhZEMSUoDYUTo\n27eEgwc3IkRIEAiefvo61qwpY84clyd/WohdW04ynSq6VjiMHw9jx6qK5J0xj3Nx1ao/8zvppaKf\noaMr/63AvwLz27pACGEC84ALgb3ABiHEn6SUtR38bI1GcxRk8uMbG/c0a9WYTLqUl9tUVkKfPrDM\nuIrSSfcTsZJIAYaQRGSC8vJZCBGQSJi89daDOM6Mpk5iGSO1fxEeP5IVWCSIEAIJ3r+tghoqKJqh\n/HQWLIANv/K4fL+Lu9Vh6NUuhqHGI0zYdNN03r5nMCf8q8Orr0L9Ey67ZhcSKakjL8/huusASpHS\nRwiLU04pY948ld45aBA8eFcRB55ycaXDxoiNqFbuoGeFHvH0pBQssjDX9IBOLMeADom/lHI7ZLZj\nbXIW8JqU8vX0tb8HrgS0+Gs0HzO5q30hTISIEIYqlr9pk9PMV//VV10ifdIZPUAQCKQ0MM1UOvQS\nsmPHTPLyilizxiYWg02bILLBY7UsxSKBSUiAgUnI+APP4H97Hc+treS49+vY8EQhj1LelGUztaqS\nZNICmcCUBp87bgxV02aQKoQf3g9JbKUStSpzaMkSmHx6JU7RUp7bNpmZv8mmZ9o22I/beJ7Np10o\n2gMPPaTOBRxcLFT1cNBTOrEcAz6JmP/JwJs5j/cCX2rtQiHEDGAGwODBgz/+kWk0PZzcxuxSwsCB\n03nnncHMnq1SI3N99XPj80JEiEansmrVGM47bxZSJtNdr0Ieesjl29+2+c1vVNHUnye6WCmfCCEp\nDF5nKP1H7uJgScjxVY2cufkGDpSE/HykgVUrVVwen6G1dSyaXclPS2bSvzrA2lbOTUYRLwibIGh+\nH2EIYxMeD9WUY9X4XMM6/hArwm6lt2/GZG7x4nSqZ+jgp6uHe/MBb0uOKP5CiGeAAa386AdSyieP\n5WCklAuABQDjx4+XR7hco9EcgdwGJoZhMWBAGWecYTNv3qHnnfn5zePztbU2990H1dVw000zESIk\nlcrj5ZedZj18T7ylkIMr4ECxoO+2COuH/CvDvnMXYRREIEEGyAgYyYBRs0361prsvUxw6sQn+Py6\nkxj6XxJDhiTxOSd0eV60vir/6pkx/ja6kROqJJ+qVYe6nme3WtCVe6ZbWGjzh83x5jF/zZHFX0p5\nQQc/Yx/w+ZzHg9LPaTSaj5mWgp6xbchdIWc6YuX63Tc0eGzYMJehQx2WLZvBX/9a1FRMtWtXThtF\nz6P/6hupvjtQ9XoEXwAACitJREFUVcKE9K13SUUFhikJhQohYahag2XOFTAaBs94guG8BF+EvUaE\nzy8zSYYW6wyHc0yPs1Muz0qHF9L596NGeRTfvYjdUcmbSSiaYzKkzGGJ23bFc/MzXZvemMt/OD6J\nsM8GYLgQ4lSU6H8DuPoT+FyNRkPzBia5tOUT1NDg8fLLpYwapbKCZs+Os3OnzaWX2rzzjjJ5axJV\n16V+ZLLJHkLKFP36vdRUBdx0HKisgLjq6kuoSi0lDLOGc3/76gDyi8bzSvEAZlXX8LV7yjFIEAiD\nuwbP4z/enEFxsYsZVZXKoSE48Ktp9LNtHHpeb91PCqMjLxZCfFUIsRc1pS4XQqxMP3+SEGIFgJQy\nBcwCVgLbgf+RUm7r2LA1Gk1Hac0nCGDVKhcpVaPzSMSnpMTlkkvggQfg/Wc89s6cS80CT13sOHym\nOoqRRIk96nC2KQVE1XJBCANWQv7aOoYPn9wk/AI44fm9VJ/9BB98dgEDnVkcHNaIkCERmeKOvbM4\nJ+KxZYujDocxMcw+FIxWLmyZ8M5PfnJkkztNczqa7fM48Hgrz78FXJrzeAWwoiOfpdFoji0Zn5sJ\noct608Fx1OHpnXc6/OIXVlMD+JoahwkT1IHrqrAUK/QJb7CIbY4zvMxm3Wku4ewYZ1y0ln6X1apq\nnszKX4JMe+uf+KwFv3Y46SSl0O++eDefXbiLZF/ZFDJKoRrL59dKNZHIgMXTXJYMnsPxx8cZNKh5\n+Ap0yv5HRVf4ajS9FBuPuCgFfILA4pUa1Vx969Zs8VZ1tUNJiVJWR2RTJpOBzyvzXb6z2Kay0uam\nP9uMrfT43c6J7ClPIQWkklFenXcZqQJo3DyAH+0sYy42NnDSSTM4aUARxEtpGJbASIYEwiCZyuOh\nqhv5MfcTEQFGXh5DyhxU218dtz+WaPHXaHorrouR9BEyIAx8HpvpMmieskTYsUMVbwFs26ZCOWeb\nDiEWQeiTlBbPymydwJo14Lo27xWupWR7jFcGwIv7y6gdpIq7wlC9R7MU+xz7ieLjC9kS1jH7Voct\nr9h41lUsnnZods5RNbLRHBVa/DWa3orjkDItCJUlwrOhw2Xpgq+KCnjmGZq6ZYUhrBc2v5+uUian\nLHTYEDSvE7BzVudnoao7M8KfeY/C5s7MeNi42DgGnDOBnBRUu6nvbkbwCwvbbhyvJ4T2o8Vfo+mt\n2DY7Hozz2EyXZ0OHTXk2dztKQCsqYN26rC++YSjBHV6mRHlu2ZEF1/NUs/fM4W5Lh81MttHYhMeH\nhstn5jnYM5o3VcnNSDIMdTidcSPNHFCXlqpxGoaaPGbM+Fh+Wz0OLf4aTS+maIZS2vOXuhROhiI7\nWweQLZJSop0r9G3VCWTIiHYikRX+lg6brquE/5nwPKKhj7zBgqI1zd4oNyMp8z5CZNM6XTc7QYUh\nzJoFRUV6B3A0aPHXaHoznkdReXppvc6Comy+pLJ88A4pEMt5aevtEj2PRIXL2ITD+lB56l9wgdpN\n5Iqy40ChiJFHAgHIIAGxWLOLWnYuq6w8dCLKdAwDNUlo656jQ4u/RtObOUxT4FxTOMOwKC6ON5sA\nNm70mDzZZdOmbAtFGzUjnJvwWRVaXGTE2ZRnHyL8oB6fcgXwhHrcmj3kkaz3bVuFembNUrfQqn+/\nplW0+Gs0vZnDNAXONYULQ5/6erdJ/BsaPIqKShk50ueaayy+//04jmM3TSYiDDjO8LnzApe8CrvN\ng9mB3yuDFQtVV/VoFMrKDhnikfL4Z8xQoR596Ns+tPhrNL2ZwyytW5rCFRQ4TT+rr3cBVQVsGD6/\n/KWLbduA0zSZCMvCmVwI7lxqahxKy+1DQ0R2esLooHLrQq/2o8Vfo+nttKGcbZnCQfOJwTQtRo92\nsu+Ve1Kczs38gmExNoizPrQPMWDTyt05aPHXaDRt0pYp3OEmhiYxnzu36TwhIn3ON1xeELY2YOsi\naPHXaDQfiWYTQ2sB/ZzzBGFZfK3S4bg6HZfvKmjx12g0HaOtnM8W5wlFtk1RZ49V04QWf41G0zEO\nky6q4/ldlw75+Ws0Gk1TeMc0dUeVboRe+Ws0mo5xpEosTZdEi79Go+k4OrzT7dBhH41Go+mFaPHX\naDSaXogWf41Go+mFaPHXaDSaXogWf41Go+mFaPHXaDSaXoiQUnb2GFpFCPEusPsjvrw/8PdjOJzO\noLvfQ3cfP3T/e+ju44fufw+dMf4hUsrPHumiLiv+HUEIsVFKOb6zx9ERuvs9dPfxQ/e/h+4+fuj+\n99CVx6/DPhqNRtML0eKv0Wg0vZCeKv4LOnsAx4Dufg/dffzQ/e+hu48fuv89dNnx98iYv0aj0WgO\nT09d+Ws0Go3mMPQ48RdCXCyEeEUI8ZoQ4o7OHk97EUIsFEK8I4TY2tlj+SgIIT4vhFgjhKgVQmwT\nQny3s8fUXoQQfYQQLwkhqtP38OPOHtNHQQhhCiE2CyGWdfZYPgpCiL8KIWqEEFVCiI2dPZ72IoQo\nEEL8QQixQwixXQjRpWxPe1TYRwhhAq8CFwJ7gQ3AN6WUtZ06sHYghJgIvAfEpJRndvZ42osQYiAw\nUEq5SQhxPPAycFU3+xsI4NNSyveEEFHgL8B3pZQvdPLQ2oUQ4hZgPNBXSnl5Z4+nvQgh/gqMl1J2\nyzx/IcRiYJ2U8mEhhAV8SkpZ39njytDTVv5nAa9JKV+XUvrA74ErO3lM7UJKuRb4R2eP46MipXxb\nSrkp/f1BYDtwcueOqn1IxXvph9H0v261ShJCDAIuAx7u7LH0RoQQ+cBE4BEAKaXflYQfep74nwy8\nmfN4L91MeHoSQohTgDHAi507kvaTDplUAe8Aq6WU3e0eKoHvAWFnD6QDSGCVEOJlIcSMzh5MOzkV\neBdYlA69PSyE+HRnDyqXnib+mi6CEOIzwFKgXEp5oLPH016klIGUsgQYBJwlhOg2ITghxOXAO1LK\nlzt7LB3kbCnlWOASYGY6JNpdiABjgd9IKccA7wNd6gyyp4n/PuDzOY8HpZ/TfIKk4+RLgd9JKf/Y\n2ePpCOmt+hrg4s4eSzuYAHwlHTP/PXC+EOLRzh1S+5FS7kt/fQd4HBXW7S7sBfbm7Bj/gJoMugw9\nTfw3AMOFEKemD1i+Afypk8fUq0gflj4CbJdS3tfZ4/koCCE+K4QoSH9/HCqBYEfnjurokVLOkVIO\nklKegvo/8KyU8ludPKx2IYT4dDphgHS45CKg22TASSn3A28KIc5IP1UKdKmkhx7VwF1KmRJCzAJW\nAiawUEq5rZOH1S6EEP8FOEB/IcRe4P9IKR/p3FG1iwnAvwE16Zg5wPellCs6cUztZSCwOJ09ZgD/\nI6XslumS3ZjPAY+rtQQRYImU8unOHVK7uRH4XXoh+jowtZPH04weleqp0Wg0mqOjp4V9NBqNRnMU\naPHXaDSaXogWf41Go+mFaPHXaDSaXogWf41Go+mFaPHXaDSaXogWf41Go+mFaPHXaDSaXsj/B5mj\nDFi6aXY5AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t5McVnHmNiDw", + "colab_type": "text" + }, + "source": [ + "## Design a model\n", + "We're going to build a model that will take an input value (in this case, `x`) and use it to predict a numeric output value (the sine of `x`). This type of problem is called a _regression_.\n", + "\n", + "To achieve this, we're going to create a simple neural network. It will use _layers_ of _neurons_ to attempt to learn any patterns underlying the training data, so it can make predictions.\n", + "\n", + "To begin with, we'll define two layers. The first layer takes a single input (our `x` value) and runs it through 16 neurons. Based on this input, each neuron will become _activated_ to a certain degree based on its internal state (its _weight_ and _bias_ values). A neuron's degree of activation is expressed as a number.\n", + "\n", + "The activation numbers from our first layer will be fed as inputs to our second layer, which is a single neuron. It will apply its own weights and bias to these inputs and calculate its own activation, which will be output as our `y` value.\n", + "\n", + "**Note:** To learn more about how neural networks function, you can explore the [Learn TensorFlow](https://codelabs.developers.google.com/codelabs/tensorflow-lab1-helloworld) codelabs.\n", + "\n", + "The code in the following cell defines our model using [Keras](https://www.tensorflow.org/guide/keras), TensorFlow's high-level API for creating deep learning networks. Once the network is defined, we _compile_ it, specifying parameters that determine how it will be trained:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gD60bE8cXQId", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# We'll use Keras to create a simple model architecture\n", + "from tensorflow.keras import layers\n", + "model_1 = tf.keras.Sequential()\n", + "\n", + "# First layer takes a scalar input and feeds it through 16 \"neurons\". The\n", + "# neurons decide whether to activate based on the 'relu' activation function.\n", + "model_1.add(layers.Dense(16, activation='relu', input_shape=(1,)))\n", + "\n", + "# Final layer is a single neuron, since we want to output a single value\n", + "model_1.add(layers.Dense(1))\n", + "\n", + "# Compile the model using a standard optimizer and loss function for regression\n", + "model_1.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O0idLyRLQeGj", + "colab_type": "text" + }, + "source": [ + "## Train the model\n", + "Once we've defined the model, we can use our data to _train_ it. Training involves passing an `x` value into the neural network, checking how far the network's output deviates from the expected `y` value, and adjusting the neurons' weights and biases so that the output is more likely to be correct the next time.\n", + "\n", + "Training runs this process on the full dataset multiple times, and each full run-through is known as an _epoch_. The number of epochs to run during training is a parameter we can set.\n", + "\n", + "During each epoch, data is run through the network in multiple _batches_. Each batch, several pieces of data are passed into the network, producing output values. These outputs' correctness is measured in aggregate and the network's weights and biases are adjusted accordingly, once per batch. The _batch size_ is also a parameter we can set.\n", + "\n", + "The code in the following cell uses the `x` and `y` values from our training data to train the model. It runs for 1000 _epochs_, with 16 pieces of data in each _batch_. We also pass in some data to use for _validation_. As you will see when you run the cell, training can take a while to complete:\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "p8hQKr4cVOdE", + "colab_type": "code", + "outputId": "3f1a7904-ffcd-4bb7-8bbb-bcd85a132128", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "# Train the model on our training data while validating on our validation set\n", + "history_1 = model_1.fit(x_train, y_train, epochs=1000, batch_size=16,\n", + " validation_data=(x_validate, y_validate))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Train on 600 samples, validate on 200 samples\n", + "Epoch 1/1000\n", + "600/600 [==============================] - 0s 412us/sample - loss: 0.5016 - mae: 0.6297 - val_loss: 0.4922 - val_mae: 0.6235\n", + "Epoch 2/1000\n", + "600/600 [==============================] - 0s 105us/sample - loss: 0.3905 - mae: 0.5436 - val_loss: 0.4262 - val_mae: 0.5641\n", + "...\n", + "Epoch 998/1000\n", + "600/600 [==============================] - 0s 109us/sample - loss: 0.1535 - mae: 0.3068 - val_loss: 0.1507 - val_mae: 0.3113\n", + "Epoch 999/1000\n", + "600/600 [==============================] - 0s 100us/sample - loss: 0.1545 - mae: 0.3077 - val_loss: 0.1499 - val_mae: 0.3103\n", + "Epoch 1000/1000\n", + "600/600 [==============================] - 0s 132us/sample - loss: 0.1530 - mae: 0.3045 - val_loss: 0.1542 - val_mae: 0.3143\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cRE8KpEqVfaS", + "colab_type": "text" + }, + "source": [ + "## Check the training metrics\n", + "During training, the model's performance is constantly being measured against both our training data and the validation data that we set aside earlier. Training produces a log of data that tells us how the model's performance changed over the course of the training process.\n", + "\n", + "The following cells will display some of that data in a graphical form:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "CmvA-ksoln8r", + "colab_type": "code", + "outputId": "1b834831-81e8-4548-dd8c-f5edf2c3ff43", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + } + }, + "source": [ + "# Draw a graph of the loss, which is the distance between\n", + "# the predicted and actual values during training and validation.\n", + "loss = history_1.history['loss']\n", + "val_loss = history_1.history['val_loss']\n", + "\n", + "epochs = range(1, len(loss) + 1)\n", + "\n", + "plt.plot(epochs, loss, 'g.', label='Training loss')\n", + "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n", + "plt.title('Training and validation loss')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzt3Xd8FHX6wPHPk5AQamhRWiBRUHqN\nYA6BIIjYQJTzQFHh9FB/Kp7lFMspopzlPAse56l32FCxIIoKogIRPKIUpRcJECDUEDoB0p7fHzNJ\nNstuNm0JhOf9eu0rM9/5zsx3djb7zLfsjKgqxhhjTFFCKroAxhhjTn0WLIwxxgRkwcIYY0xAFiyM\nMcYEZMHCGGNMQBYsjDHGBGTBwpwUIhIqIodFpFl55q1IItJCRMp97LmI9BORFI/5dSLSszh5S7Gv\n/4jII6Vdv4jtPi0ib5f3dk3FqVLRBTCnJhE57DFbHTgO5Ljzt6nq+yXZnqrmADXLO++ZQFXPL4/t\niMitwHBVTfDY9q3lsW1T+VmwMD6pav6XtXvlequqfu8vv4hUUdXsk1E2Y8zJZ81QplTcZoaPRORD\nETkEDBeReBH5SUT2i8gOEZkgImFu/ioioiIS485PdpfPFJFDIpIkIrElzesuv0xEfhORAyLyqoj8\nT0RG+Cl3ccp4m4gki8g+EZngsW6oiLwkIukishEYUMT786iITPFKmygiL7rTt4rIGvd4NrhX/f62\nlSoiCe50dRF5zy3bKqCrV97HRGSju91VIjLQTW8P/BPo6Tbx7fF4b8d6rH+7e+zpIvK5iDQqznsT\niIgMdsuzX0TmiMj5HsseEZHtInJQRNZ6HOuFIvKLm75LRP5e3P2ZIFBVe9mryBeQAvTzSnsayASu\nwrnoqAZcAHTHqbGeA/wG3OXmrwIoEOPOTwb2AHFAGPARMLkUec8CDgGD3GX3AVnACD/HUpwyfgFE\nAjHA3rxjB+4CVgFNgfrAPOdfyOd+zgEOAzU8tr0biHPnr3LzCHAxcBTo4C7rB6R4bCsVSHCnXwAS\ngbpAc2C1V97rgEbuObneLcPZ7rJbgUSvck4GxrrT/d0ydgIigH8Bc4rz3vg4/qeBt93p1m45LnbP\n0SPAOne6LbAZaOjmjQXOcacXAcPc6VpA94r+XziTX1azMGXxo6p+qaq5qnpUVRep6s+qmq2qG4E3\ngN5FrP+pqi5W1SzgfZwvqZLmvRJYqqpfuMtewgksPhWzjM+o6gFVTcH5Ys7b13XAS6qaqqrpwLNF\n7GcjsBIniAFcAuxT1cXu8i9VdaM65gCzAZ+d2F6uA55W1X2quhmntuC5349VdYd7Tj7ACfRxxdgu\nwA3Af1R1qaoeA8YAvUWkqUcef+9NUYYC01V1jnuOnsUJON2BbJzA1NZtytzkvnfgBP2WIlJfVQ+p\n6s/FPA4TBBYsTFls9ZwRkVYi8rWI7BSRg8A4oEER6+/0mM6g6E5tf3kbe5ZDVRXnStynYpaxWPvC\nuSIuygfAMHf6enc+rxxXisjPIrJXRPbjXNUX9V7laVRUGURkhIgsc5t79gOtirldcI4vf3uqehDY\nBzTxyFOSc+Zvu7k456iJqq4D7sc5D7vdZs2GbtaRQBtgnYgsFJHLi3kcJggsWJiy8B42+jrO1XQL\nVa0NPI7TzBJMO3CahQAQEaHwl5u3spRxBxDtMR9oaO/HQD8RaYJTw/jALWM14FPgGZwmojrAt8Us\nx05/ZRCRc4DXgDuA+u5213psN9Aw3+04TVt526uF09y1rRjlKsl2Q3DO2TYAVZ2sqj1wmqBCcd4X\nVHWdqg7FaWr8BzBVRCLKWBZTShYsTHmqBRwAjohIa+C2k7DPr4AuInKViFQB7gGiglTGj4E/i0gT\nEakPPFRUZlXdCfwIvA2sU9X17qKqQDiQBuSIyJVA3xKU4RERqSPO71Du8lhWEycgpOHEzT/h1Czy\n7AKa5nXo+/AhcIuIdBCRqjhf2vNV1W9NrQRlHigiCe6+/4LTz/SziLQWkT7u/o66r1ycA7hRRBq4\nNZED7rHllrEsppQsWJjydD9wM84Xwes4HdFBpaq7gD8ALwLpwLnArzi/CynvMr6G07ewAqfz9dNi\nrPMBTod1fhOUqu4H7gWm4XQSD8EJesXxBE4NJwWYCbzrsd3lwKvAQjfP+YBnO/93wHpgl4h4Nifl\nrf8NTnPQNHf9Zjj9GGWiqqtw3vPXcALZAGCg239RFXgep59pJ05N5lF31cuBNeKMtnsB+IOqZpa1\nPKZ0xGniNaZyEJFQnGaPIao6v6LLY0xlYTULc9oTkQFus0xV4K84o2gWVnCxjKlULFiYyuAiYCNO\nE8elwGBV9dcMZYwpBWuGMsYYE5DVLIwxxgRUaW4k2KBBA42JianoYhhjzGllyZIle1S1qOHmQCUK\nFjExMSxevLiii2GMMacVEQl0JwLAmqGMMcYUgwULY4wxAVmwMMYYE1Cl6bMwxpxcWVlZpKamcuzY\nsYouiimGiIgImjZtSliYv1uDFc2ChTGmVFJTU6lVqxYxMTE4N/s1pypVJT09ndTUVGJjYwOv4IM1\nQxljSuXYsWPUr1/fAsVpQESoX79+mWqBQQ0W7j171rnP7B3jY/kIEUkTkaXu61aPZTeLyHr3dXMw\ny5m0NYln5j9D0takYO7GmErHAsXpo6znKmjNUO7dPyfiPE4yFVgkItNVdbVX1o9U9S6vdevh3Io5\nDuce9kvcdfeVdzmTtibR992+ZOZkEh4azuybZhMfHV/euzHGmNNaMGsW3YBk9znDmcAUCp5HHMil\nwHequtcNEN/h3AO/3CWmJJKZk0mO5pCZk0liSmIwdmOMKWfp6el06tSJTp060bBhQ5o0aZI/n5lZ\nvMdejBw5knXr1hWZZ+LEibz//vvlUWQuuugili5dWi7bOtmC2cHdhMLPCk7FeUC7t2tFpBfwG3Cv\nqm71s+4Jj8oUkVHAKIBmzQI94dK3hJgEwkPD82sWCTEJpdqOMebkql+/fv4X79ixY6lZsyYPPPBA\noTyqiqoSEuL7uvitt94KuJ8777yz7IWtBCq6g/tLIEZVO+DUHt4pycqq+oaqxqlqXFRUwFub+BQf\nHc/sm2bzVJ+nrAnKmCA7Gf2DycnJtGnThhtuuIG2bduyY8cORo0aRVxcHG3btmXcuHH5efOu9LOz\ns6lTpw5jxoyhY8eOxMfHs3v3bgAee+wxXn755fz8Y8aMoVu3bpx//vksWLAAgCNHjnDttdfSpk0b\nhgwZQlxcXMAaxOTJk2nfvj3t2rXjkUceASA7O5sbb7wxP33ChAkAvPTSS7Rp04YOHTowfPjwcn/P\niiOYNYttFH6wfP4D2vOoarrH7H9wHq+Yt26C17qJ5V5CV3x0vAUJY4LsZPYPrl27lnfffZe4uDgA\nnn32WerVq0d2djZ9+vRhyJAhtGnTptA6Bw4coHfv3jz77LPcd999TJo0iTFjThiXg6qycOFCpk+f\nzrhx4/jmm2949dVXadiwIVOnTmXZsmV06dKlyPKlpqby2GOPsXjxYiIjI+nXrx9fffUVUVFR7Nmz\nhxUrVgCwf/9+AJ5//nk2b95MeHh4ftrJFsyaxSKgpYjEikg4MBSY7plBRBp5zA4E1rjTs4D+IlJX\nROoC/d20cpeZCfPmwbZtgfMaY0rvZPYPnnvuufmBAuDDDz+kS5cudOnShTVr1rB6tfc4G6hWrRqX\nXXYZAF27diUlJcXntq+55poT8vz4448MHToUgI4dO9K2bdsiy/fzzz9z8cUX06BBA8LCwrj++uuZ\nN28eLVq0YN26dYwePZpZs2YRGRkJQNu2bRk+fDjvv/9+qX9UV1ZBCxaqmg3chfMlvwb4WFVXicg4\nERnoZhstIqtEZBkwGhjhrrsXeAon4CwCxrlp5W7/fujdGz7/PBhbN8bkyesfDJXQoPcP1qhRI396\n/fr1vPLKK8yZM4fly5czYMAAn783CA8Pz58ODQ0lOzvb57arVq0aME9p1a9fn+XLl9OzZ08mTpzI\nbbfdBsCsWbO4/fbbWbRoEd26dSMnJ6dc91scQf0Ft6rOAGZ4pT3uMf0w8LCfdScBk4JZPoDQUOdv\nbm6w92TMmS2vfzAxJZGEmIST1vR78OBBatWqRe3atdmxYwezZs1iwIDyHVzZo0cPPv74Y3r27MmK\nFSt81lw8de/enQceeID09HQiIyOZMmUKDzzwAGlpaURERPD73/+eli1bcuutt5KTk0NqaioXX3wx\nF110EdHR0WRkZFCrVq1yPYZAzvjbfeQNkqiAQG3MGaci+ge7dOlCmzZtaNWqFc2bN6dHjx7lvo+7\n776bm266iTZt2uS/8pqQfGnatClPPfUUCQkJqCpXXXUVV1xxBb/88gu33HILqoqI8Nxzz5Gdnc31\n11/PoUOHyM3N5YEHHjjpgQIq0TO44+LitDQPPzp4ECIj4R//gPvuC0LBjKmk1qxZQ+vWrSu6GKeE\n7OxssrOziYiIYP369fTv35/169dTpcqpdT3u65yJyBJVjfOzSr5T60gqgNUsjDFldfjwYfr27Ut2\ndjaqyuuvv37KBYqyqlxHUwrWZ2GMKas6deqwZMmSii5GUFX0j/IqnNUsjDEmsDM+WFjNwhhjAjvj\ng4XVLIwxJjALFu47YDULY4zx74wPFuAEDKtZGHN66dOnD7NmFb4L0Msvv8wdd9xR5Ho1a9YEYPv2\n7QwZMsRnnoSEBAINxX/55ZfJyMjIn7/88svL5b5NY8eO5YUXXijzdsqbBQucfgurWRhzehk2bBhT\npkwplDZlyhSGDRtWrPUbN27Mp59+Wur9eweLGTNmUKdOnVJv71RnwQKrWRhzOhoyZAhff/11/oOO\nUlJS2L59Oz179sz/3UOXLl1o3749X3zxxQnrp6Sk0K5dOwCOHj3K0KFDad26NYMHD+bo0aP5+e64\n447825s/8cQTAEyYMIHt27fTp08f+vTpA0BMTAx79uwB4MUXX6Rdu3a0a9cu//bmKSkptG7dmj/9\n6U+0bduW/v37F9qPL0uXLuXCCy+kQ4cODB48mH379uXvP++W5Xk3MPzhhx/yH/7UuXNnDh06VOr3\n1pcz/ncWYDULY8rqz3+G8n4AXKdO4H7P+lSvXj26devGzJkzGTRoEFOmTOG6665DRIiIiGDatGnU\nrl2bPXv2cOGFFzJw4EC/z6F+7bXXqF69OmvWrGH58uWFbjE+fvx46tWrR05ODn379mX58uWMHj2a\nF198kblz59KgQYNC21qyZAlvvfUWP//8M6pK9+7d6d27N3Xr1mX9+vV8+OGHvPnmm1x33XVMnTq1\nyOdT3HTTTbz66qv07t2bxx9/nCeffJKXX36ZZ599lk2bNlG1atX8pq8XXniBiRMn0qNHDw4fPkxE\nREQJ3u3ArGaB1SyMOV15NkV5NkGpKo888ggdOnSgX79+bNu2jV27dvndzrx58/K/tDt06ECHDh3y\nl3388cd06dKFzp07s2rVqoA3Cfzxxx8ZPHgwNWrUoGbNmlxzzTXMnz8fgNjYWDp16gQUfRt0cJ6v\nsX//fnr37g3AzTffzLx58/LLeMMNNzB58uT8X4r36NGD++67jwkTJrB///5y/wW51SywmoUxZVVU\nDSCYBg0axL333ssvv/xCRkYGXbt2BeD9998nLS2NJUuWEBYWRkxMjM/bkgeyadMmXnjhBRYtWkTd\nunUZMWJEqbaTJ+/25uDc4jxQM5Q/X3/9NfPmzePLL79k/PjxrFixgjFjxnDFFVcwY8YMevTowaxZ\ns2jVqlWpy+rNahZYzcKY01XNmjXp06cPf/zjHwt1bB84cICzzjqLsLAw5s6dy+bNm4vcTq9evfjg\ngw8AWLlyJcuXLwec25vXqFGDyMhIdu3axcyZM/PXqVWrls9+gZ49e/L555+TkZHBkSNHmDZtGj17\n9izxsUVGRlK3bt38Wsl7771H7969yc3NZevWrfTp04fnnnuOAwcOcPjwYTZs2ED79u156KGHuOCC\nC1i7dm2J91kUq1ng1CwsWBhzeho2bBiDBw8uNDLqhhtu4KqrrqJ9+/bExcUFvMK+4447GDlyJK1b\nt6Z169b5NZSOHTvSuXNnWrVqRXR0dKHbm48aNYoBAwbQuHFj5s6dm5/epUsXRowYQbdu3QC49dZb\n6dy5c5FNTv6888473H777WRkZHDOOefw1ltvkZOTw/Dhwzlw4ACqyujRo6lTpw5//etfmTt3LiEh\nIbRt2zb/qX/l5Yy/RTnA2WfD4MHw73+Xc6GMqcTsFuWnn7LcotyaobCahTHGBGLBAuvgNsaYQIIa\nLERkgIisE5FkERlTRL5rRURFJM6djxGRoyKy1H0FtYHIOriNKZ3K0ox9JijruQpaB7eIhAITgUuA\nVGCRiExX1dVe+WoB9wA/e21ig6p2Clb5PFnNwpiSi4iIID09nfr16/v9sZs5Nagq6enpZfqhXjBH\nQ3UDklV1I4CITAEGAd6/aHkKeA74SxDLUiSrWRhTck2bNiU1NZW0tLSKLoophoiICJo2bVrq9YMZ\nLJoAWz3mU4HunhlEpAsQrapfi4h3sIgVkV+Bg8BjqjrfewciMgoYBdCsWbNSF9RqFsaUXFhYGLGx\nsRVdDHOSVFgHt4iEAC8C9/tYvANopqqdgfuAD0SktncmVX1DVeNUNS4qKqrUZbGahTHGFC2YwWIb\nEO0x39RNy1MLaAckikgKcCEwXUTiVPW4qqYDqOoSYANwXrAKajULY4wpWjCDxSKgpYjEikg4MBSY\nnrdQVQ+oagNVjVHVGOAnYKCqLhaRKLeDHBE5B2gJbAxWQa1mYYwxRQtan4WqZovIXcAsIBSYpKqr\nRGQcsFhVpxexei9gnIhkAbnA7aq6N1hltZqFMcYULaj3hlLVGcAMr7TH/eRN8JieCkwNZtk8Wc3C\nGGOKZr/gxmoWxhgTiAULrGZhjDGBWLDAahbGGBOIBQusZmGMMYFYsMBqFsYYE4gFC6xmYYwxgViw\nwGoWxhgTiAULrGZhjDGBWLDAahbGGBOIBQusZmGMMYFYsMBqFsYYE4gFC6xmYYwxgViwwKlZWLAw\nxhj/LFjg1CysGcoYY/yzYIHVLIwxJhALFlgHtzHGBGLBAuvgNsaYQCxYYDULY4wJxIIFVrMwxphA\nghosRGSAiKwTkWQRGVNEvmtFREUkziPtYXe9dSJyaTDLaTULY4wpWpVgbVhEQoGJwCVAKrBIRKar\n6mqvfLWAe4CfPdLaAEOBtkBj4HsROU9Vg3L9bzULY4wpWjBrFt2AZFXdqKqZwBRgkI98TwHPAcc8\n0gYBU1T1uKpuApLd7QWF1SyMMaZowQwWTYCtHvOpblo+EekCRKvq1yVd111/lIgsFpHFaWlppS6o\n1SyMMaZoFdbBLSIhwIvA/aXdhqq+oapxqhoXFRVV6rJYzcIYY4oWtD4LYBsQ7THf1E3LUwtoBySK\nCEBDYLqIDCzGuuXKahbGGFO0YNYsFgEtRSRWRMJxOqyn5y1U1QOq2kBVY1Q1BvgJGKiqi918Q0Wk\nqojEAi2BhcEq6K6M7RzPziRpa1KwdmGMMae1oAULVc0G7gJmAWuAj1V1lYiMc2sPRa27CvgYWA18\nA9wZrJFQSVuT+Hj1h2Rl59L33b4WMIwxxodgNkOhqjOAGV5pj/vJm+A1Px4YH7TCuRJTEslBIDeE\nzJxMElMSiY+OD/ZujTHmtHLG/4I7ISaB0BABDSU8NJyEmISKLpIxxpxyzvhgER8dz02drgcNZfZN\ns61WYYwxPpzxwQKgWV1n4FX3JhYojDHGFwsWOL+zAPuthTHG+GPBgoJgkZ1dseUwxphTlQULoIo7\nJsx+mGeMMb5ZsKAgWFjNwhhjfLNggQULY4wJxIIFFiyMMSYQCxZYsDDGmEAsWGDBwhhjArFggQUL\nY4wJxIIFFiyMMSYQCxZYsDDGmEAsWGDBwhhjArFggQULY4wJxIIFFiyMMSYQCxZYsDDGmEAsWGDB\nwhhjAglqsBCRASKyTkSSRWSMj+W3i8gKEVkqIj+KSBs3PUZEjrrpS0Xk38EspwULY4wpWpVgbVhE\nQoGJwCVAKrBIRKar6mqPbB+o6r/d/AOBF4EB7rINqtopWOXzZMHCGGOKFsyaRTcgWVU3qmomMAUY\n5JlBVQ96zNYANIjl8Wt1+nIAVuxYUxG7N8aYU14wg0UTYKvHfKqbVoiI3CkiG4DngdEei2JF5FcR\n+UFEevragYiMEpHFIrI4LS2tVIVM2prEnTNvA+DR758gaWtSqbZjjDGVWYV3cKvqRFU9F3gIeMxN\n3gE0U9XOwH3AByJS28e6b6hqnKrGRUVFlWr/iSmJZHEUgOxsJTElsVTbMcaYyiyYwWIbEO0x39RN\n82cKcDWAqh5X1XR3egmwATgvGIVMiEkgrIoAEEoECTEJwdiNMcac1oIZLBYBLUUkVkTCgaHAdM8M\nItLSY/YKYL2bHuV2kCMi5wAtgY3BKGR8dDzvXjsJgEd+9zjx0fHB2I0xxpzWgjYaSlWzReQuYBYQ\nCkxS1VUiMg5YrKrTgbtEpB+QBewDbnZX7wWME5EsIBe4XVX3BqusF0R3BiA2smWAnMYYc2YKWrAA\nUNUZwAyvtMc9pu/xs95UYGowy+bJhs4aY0zRitUMJSLnikhVdzpBREaLSJ3gFu3ksWBhjDFFK26f\nxVQgR0RaAG/gdFx/ELRSnWQWLIwxpmjFDRa5qpoNDAZeVdW/AI2CV6yTy4KFMcYUrbjBIktEhuF0\nQH/lpoUFp0gnnwULY4wpWnGDxUggHhivqptEJBZ4L3jFOrksWBhjTNGKNRrKvfnfaAARqQvUUtXn\nglmwkykvWGRlVWw5jDHmVFXc0VCJIlJbROoBvwBvisiLwS3ayRMa6vy1moUxxvhW3GaoSPcOsdcA\n76pqd6Bf8Ip1coWEOC8LFsYY41txg0UVEWkEXEdBB3elUqWKBQtjjPGnuMFiHM5tOzao6iL3fk3r\ng1esk8+ChTHG+FfcDu5PgE885jcC1warUBUhLMyChTHG+FPcDu6mIjJNRHa7r6ki0jTYhTuZrGZh\njDH+FbcZ6i2c24s3dl9fummVhgULY4zxr7jBIkpV31LVbPf1NlC6R9OdoixYGGOMf8UNFukiMlxE\nQt3XcCA9mAU72XLkGL9uW27P4DbGGB+KGyz+iDNsdifO87GHACOCVKaTLmlrErsytvHr9hX0fbev\nBQxjjPFSrGChqptVdaCqRqnqWap6NZVoNFRiSiIakoXmhJKZk0liSmJFF8kYY04pZXkG933lVooK\nlhCTgITkgIYRHhpOQkxCRRfJGGNOKWV5rKqUWykqWHx0PC0aHKFKvTD+e9Ns4qPjK7pIxhhzSilL\nzUIDZRCRASKyTkSSRWSMj+W3i8gKEVkqIj+KSBuPZQ+7660TkUvLUM5iqV2tBudEnmeBwhhjfCiy\nZiEih/AdFASoFmDdUGAicAmQCiwSkenu7c7zfKCq/3bzDwReBAa4QWMo0Bbndx3fi8h5qppTvMMq\nORs6a4wx/hUZLFS1Vhm23Q1Idm8NgohMAQYB+cHCvZNtnhoUBKZBwBRVPQ5sEpFkd3tBG6ZkwcIY\nY/wrS59FIE2ArR7zqUB370wicidOZ3k4cLHHuj95rdvEx7qjgFEAzZo1K1NhLVgYY4x/ZemzKBeq\nOlFVzwUeAh4r4bpvqGqcqsZFRZXtB+UWLIwxxr9gBottQLTHfFM3zZ8pwNWlXLfMLFgYY4x/wQwW\ni4CWIhIrIuE4HdbTPTOISEuP2SsoeEbGdGCoiFQVkVigJbAwiGW1YGGMMUUIWp+FqmaLyF04D00K\nBSap6ioRGQcsVtXpwF0i0g/IAvYBN7vrrhKRj3E6w7OBO4M5EgosWBhjTFGC2cGNqs4AZnilPe4x\nfU8R644HxgevdIVZsDDGGP8qvIP7VGHBwhhj/LNg4bJgYYwx/lmwcFmwMMYY/yxYuPYe38W+jIP2\nLAtjjPHBggXOw49mbPiSg0cz7OFHxhjjgwULnIcf5cpxyKliDz8yxhgfLFjgPPwoJFQh1x5+ZIwx\nvliwwHn40R86XEMY1ZltDz8yxpgTBPVHeaeTmPqN0RwsUBhjjA9Ws3CFhTlDZ3NzK7okxhhz6rFg\n4QoPd/5mZVVsOYwx5lRkwcJlwcIYY/yzYOEKC3P+ZmZWbDmMMeZUZMHCZTULY4zxz4KFy2oWxhjj\nnwULV17NwoKFMcacyIKFy5qhjDHGPwsWLmuGMsYY/yxYuDYeWAvAkq3LK7gkxhhz6glqsBCRASKy\nTkSSRWSMj+X3ichqEVkuIrNFpLnHshwRWeq+pgeznElbk3jsh4cAuH36aLtFuTHGeAlasBCRUGAi\ncBnQBhgmIm28sv0KxKlqB+BT4HmPZUdVtZP7GhiscoJzi/JsyQAgOzPUblFujDFeglmz6AYkq+pG\nVc0EpgCDPDOo6lxVzXBnfwKaBrE8fiXEJFClqvNM1Sq5Ne0W5cYY4yWYwaIJsNVjPtVN8+cWYKbH\nfISILBaRn0Tkal8riMgoN8/itLS0Uhc0Pjqe169+GYAnL3re7jxrjDFeTolblIvIcCAO6O2R3FxV\nt4nIOcAcEVmhqhs811PVN4A3AOLi4rQsZbgwtqOz05rnl2UzxhhTKQWzZrENiPaYb+qmFSIi/YBH\ngYGqejwvXVW3uX83AolA5yCWlWrVnL9HjwZzL8YYc3oKZrBYBLQUkVgRCQeGAoVGNYlIZ+B1nECx\n2yO9rohUdacbAD2A1UEsqwULY4wpQtCaoVQ1W0TuAmYBocAkVV0lIuOAxao6Hfg7UBP4REQAtrgj\nn1oDr4tILk5Ae1ZVLVgYY0wFCWqfharOAGZ4pT3uMd3Pz3oLgPbBLJs3CxbGGOOf/YLbVaWK87Jg\nYYwxJ7Jg4aF6dThypKJLYYwxpx4LFq6krUlI1YMk79gdOLMxxpxhLFjgBIq+7/blgGzhm1UL7N5Q\nxhjjxYIFzr2hMnMyIfwgucdq2b2hjDHGiwULnHtDhYeGQ8RB5Hik3RvKGGO8WLDAuTfU7Jtm0z66\nGdFV29q9oYwxxosFC1d8dDxdm7chN7NaRRfFGGNOORYsPFSrZr+zMMYYXyxYeKhe3YKFMcb4YsHC\nQ3pWKkePKgu22NBZY4zxZMEY0jgoAAAd/klEQVTClbQ1icmr30RV6DvpcvuthTHGeLBg4UpMSSQn\n9DAAmcftOdzGGOPJgoXLeQ53FgDhufZbC2OM8WTBwhUfHc+YhLsBePfKT+y3FsYY48GChYdOzVsC\n0LJWlwouiTHGnFosWHjYkb0KgPlrVlVwSYwx5tRiwcKVtDWJ++ePAOCB6X+z0VDGGOPBgoUrMSWR\nrKo7Acg6HGmjoYwxxkNQg4WIDBCRdSKSLCJjfCy/T0RWi8hyEZktIs09lt0sIuvd183BLCe4d56t\ndQiA0GNn2WgoY4zxELRgISKhwETgMqANMExE2nhl+xWIU9UOwKfA8+669YAngO5AN+AJEakbrLKC\nMxpqzh9nEl4tkyGxo2w0lDHGeAhmzaIbkKyqG1U1E5gCDPLMoKpzVTXDnf0JaOpOXwp8p6p7VXUf\n8B0wIIhlzVet1lEO7qtyMnZljDGnjWAGiybAVo/5VDfNn1uAmSVZV0RGichiEVmclpZWpsLmP1o1\nZCPfrFhsHdzGGOPhlOjgFpHhQBzw95Ksp6pvqGqcqsZFRUWVqQz5j1atlk5uRh3r4DbGGA/BDBbb\ngGiP+aZuWiEi0g94FBioqsdLsm55ynu0qlTfixytbx3cxhjjIZjBYhHQUkRiRSQcGApM98wgIp2B\n13ECxW6PRbOA/iJS1+3Y7u+mBU3eo1VbN2tI1czGwdyVMcacdoIWLFQ1G7gL50t+DfCxqq4SkXEi\nMtDN9negJvCJiCwVkenuunuBp3ACziJgnJsWdL9lJHHsUHUufruf9VsYY4wrqMN+VHUGMMMr7XGP\n6X5FrDsJmBS80p0oMSWRnGppoKFkHq5FYkqiDaE1xhhOkQ7uU0VCTAKhjVYCELKju/VbGGOMy4KF\nl5Cz1jgTe2MrtiDGGHMKsWDhoeD+ULlk743myyWLKrpIxhhzSrBg4aF+9fpoSDZU2wtJ9/PM4NEV\nXSRjjDklWLDwkJ6RToiEQPU9FV0UU0mtXQs33gjZ2RVdEmNKxoKFh4SYBKqGVoWGy/PTcnOdvxkZ\nMGkSqFZQ4UylcOONMHky/PprRZfEmJKxYOEhPjqelwe8TEjbqflpc9YuBGDMGLjlFpgV1J8GmsrO\nLjbM6cqChZdfd/xKbqtP8ufvfGQLANu3O/OHDlVEqYwxpmJZsPCy8/BOCFE4/wsAfvtiSKFfcn/i\nxpHcXEhOrogSGlMgIwOeeebM6gP54gtISSn/7ebklP82i+Nf/4LvvquYfZeEBQsvDWs2dCa6vpGf\n9tbnG1i3zpn+5BNYuRKeegpatoT16wuvn5EB48ZBZmbJ9vvee3DNNWUo+Blq1Sr46KPSr//llzB1\nauB8p6onn4RHHnE+PyXx8cfwpz8Fp0zBdvXV0LVr+W5zwQKoUgXeead8t1scd94J/fuf/P2WlAUL\nLzd1vIkQQqD+uvy0N0cPZ+XKgjy//QbffutM79pVkL5smdOv8cQT8N//+t7+3LkgAnu8BlzddBNM\nm1ZOB3EGadcOhg4tmN/itBqSng779wdef+BAGDIkOGU7Gfbtc/4eP150Pm9/+AP85z/lX55gyxtw\nsrcc7xT34Yfw9tvO9Pffl992KxsLFl7io+Np1aAV1N3oN8+11zpXIgChoc7fnBzo1AmmTHHmDx/2\nve7zzzt/F/n5vd/IkU7NZPdu38v9OXgQGjSAOXNKtl5FUg3cnLB/f/H7iWbPhubNndpfgwZQv/6J\n+1u7tvjlGzMGbr+9+PlLIiMjcB5vubnw7LNw4EBBWl7TSZVK+HDHl15yLqyOHi1IK2mNvTiuvx7e\nfNOZFin/7VcWFix8OK/+eU6/xY1+73OY73//g88+O/Gf1V/7Z96H0d+omLffdmomt93mzE+bBhdd\nVHBF5c+yZc7V9F//GrDIJ0VyMmRlFZ3nX/+C2Niih5HWrQtNinq+ois7G5YscabzmqW837N33oHW\nrZ2/IvDHP/re1ldfOV9Kzz0Hr78eeN9563z9dfHyAiQkFPR/FdeMGfDww3DffQVpeZ+zvIsWEadZ\no7iOHIE+fZzmvLJIT3fOU945KI6UlKKDZt6FlWcNMRjBorzs2FHRJQguCxY+PNjjQQSB5vMC5v3L\nX+Cep1afkO4vWIS473jeF5mqcwXlLe/q8ZprnIDkr6aSx7OGA87VeKB1fMnIgEsvhTVrSr4uwPLl\nThBo2RIeeKDovImJzt9AAwWKU7M4erTgPfXXB5H3hfivfzl/33rrxDw//ABXXVU46L77btH7zsx0\n1rnyysJXwYF8/nnx8wIcO+b89fzy9A4WUHB8xTFnjnMeAp2r4mxn+3ans91Tdrb/C6PYWBg0yAkw\nG92K/OrVTsCb4XGvas+g79nc5mu7WVmBL6yK4mubqvDgg0Vf1CxeDI0bFzRn5dm/P/Dn53RhwcKH\n+Oh4/vfH/1G/Zm24rXPA/KnbTvx0PjnnKeo9V4/oF6Op9bdaVB11MZH3JPDD5kQAJv78GoOnDKbf\nC/cUulLMM3du4fkjR3zvW9XpP8n7kOd9edSu7f+KPCcH7r0XfvnlxGXz5zvbu/tu3+sG0rFjwZVt\noN+k5JXZs+q/ahX89FPJf4+QkRH4S6JOHeevry/08ePhm28g71HueVe1AJ9+WjD9wgtOTdLTgw8W\nTD/0UMH01Klwxx3w5z/DaB93jinuMf7yi/MebXWfSv/ZZ05t4NChwsGiNFfdJe3r8LZoUeHOde9j\nCgvz/VnKq3V+/z3ExcG55zp9gR9+6KR79t95ltFzev78E7cbHu6/xpiW5pyrko4cO3wY/v536N3b\nf568CxHvZuARI+Dmm0+8+Jowwemo93VRGRLi9H2Cs94HH5SsvEGjqpXi1bVrVy1vC7YsUBkryoN1\n1fk38POKSD8x7ZxZyuX/p0TsVUb2KEg/7wvn77V/UMai3NrN73Zr//mi/OlGf75aGw78p5739wv0\nrN/N1AGv3ay9JvXSc255TEG1Ra+F+XlH3rM1f/pv8/6mC7YsKHRcM2YU7MPzWP8272/6z49WKqhe\ndFHp3jPP8rdsWXTea6918j32mO/1o6JOLKdn+VevLlj+/vuqY8ac+B56mjDBSYuN9X8uP/nkxLQb\nbzyxfJ569ChIHzzY97HkrdO1q//yff656qRJqjk5ql9+qZqb66T/6U9O3ksvLbzukiWq113nTL/3\nnurevb6364vn+5a37dLwft88jz8z0395Dhwo4v8J1dtuU23UqOA850lOLsjz1VeFt5mTU7DstddU\np00rvPz6651l3un+znWeXbucZRER/t+Hd95x8gwfXji9fXsnfdky3/s8cqRg+tZbVRcuLPyeFXU+\n8z4fZQUs1mJ8x1bCbrHyk1fDuHnazawfUxsW/R/88ifYd27hjMfqnbjyxv7OC2Cqx6WBuJdeU6dA\n1BpI8lGtcB18ueDSacfLzqXWzi9vBw3lmwUDYKxAahcAkpdF5ed965Wm+dOPzHkEgFAJJTTEaavI\nXT0QcBrMqzxRnarhQka223i88TtgDj/+CBEjryTsvB9oVLMRmTmZiAjNIptx8NhBth3aRpPaTUBh\nT8Yeru9wvbvH5/L3vWnvZmJfSaBORB2OZx/n/Abnc98FDxFaRflhcyJTpz4MwNNPQ9crl3B240yg\n4GFTeVf5AHd8dQcAnRt15t+vxAPtGTGiYPkNN/h+D+/46g46N+pMekY6u9KHArGkbs8CwnzmX7R+\nA1D4/LZo4XvbeTyv6DccXE3S1gPFfmhWv35OLaZ2bedKE5wa0q23OreXGTmy4Grdu6bmWbPIyirc\n7Ji0NYk5GxOpvnUQf76hDatXQ2QkNG3qv1mnLH7/e+fvuj3rSNq6l7iG8YwZ4z9/oOa6Pcd2cjiz\nBlCLDz5wOqFbt/Zd3gULnJp3jx4FaXc4H5f89w4KrtBXbPuNNfOnkhCTQLfGhc+TqlNLaN3aaRpr\n0aKgX8VXzVXVOUd5zcue+4OCWoy/Zum8UZXgjE7zbsbKs32708zlKSwMLrvMGf59Moh6H91pKi4u\nThcvXhyUbT8z/xkem/sYuerxadnfDA42gakfwoHmTlqH92DV7yEnIijlOMETAotvg6//DaHHIafq\niXnGerTxHI6Cb/8B+2NgS08n7S9RUGMPHGwE1fbBlGmwYcCJ6+eGwFevwbnfQbP5UGuXkya5IMDO\n9lB/PYz3+BaoegBG9nLutZUVASiMPwYXvgT9xsDTHv/5t1wI0T/DWD+fx7xyZIfBR5/B+iuL9x49\nWA+yakBkKvz4IHz/XOB1vFTt/RI1Ln+a2lVrk/LnTQA0fKERDWs25Hj2cTY8M5XMba2dzB3fgcEj\n4Pk0yGhQaDu1/labw//8Ad1edNNmzf5/5/C3f6HaBR+itVM5NvsvPvM1aLOCw7npHFubQM0BzxLR\nbiZ7XvgBABkbgv50F3wzgaiRd5D21msAXP3hYNbNb8eafz0FQIsbXyL5vXup2+5nat/yB0QkP7hH\n1Yji4LGDrE9PJmvBnTTsuIwaTTYTVSMKFFbuXsneMemFC9VqGuE3DKXvgfeY+cJ1+ck3T36A7Go7\nSUxJpPbhriA5rHn8K/9vQvdXnP+lw+43pOTQ8O9Nqb2vF7+Nd0YwNL3t/6jSemb+OWn394tZ+ZfC\n7UBXfziYhjUbsubzq/jhv5c7idfcAB2cyBEV1py0R1Py84fV207W3saE1NhL7pF6XPXq/Szdtpqt\nz8503vOnY6hbO5yj+2sjAlE5nfhlbMEY5OiuK7nygc9JTP+Afcf2see5BWSnxdJ//JOsrPomUTWi\niK0Tw+fDAo+Tb/3PNqy5q6A/tNek3qRlpBFVI4p6EfXyt9Hsrlt5dEQ3RnUdFXCbvojIElWNC5TP\nahbFkHeDwcycTMJDw7m7+918tPIj9h5dSZUHu5N5pBqZx4UqtfaSdemjZM+7HxbfDioQmuV8WQXD\nS1vgYLQz7StQAGyJh8ZLYG8LeGseHPUaT/rWDxD3b/hmAkRuLgh83t5YDDs7wy/uB7LuBqeG1ekt\nGHAP/Hs5tPX6ddzxSPj3MieojT8K1d2qwk/3wnavX1UdbApZS4s+3uRLYPK3Refx9rw7IH+sQHbp\ngvjxH+7lePwT7M0uGLqz8/BO59f+AMc9AlyIeynpFSgADmUeAg3c+3o42/nxxNFFw4rMt2d1ewhz\nOrMOfzOGwxva5y/T+Q/BIeeLNi9QAHy+7nPYWvAZSN7u/FBo39F97Duw2UnMioCke1nzu39AlUzY\n0RFmPMuWJUvhjs6s2bgfdnSF83z82EGFzLSmzHyzb6Hkd4a/4EzU3sq2g9FwXoDL4ZDsglo4gIY6\n7/mSs/OTUtPTYc/2/PmVqScOd/f5pawFXbVpa84vtChrv3Peco84rQVfLlkEYQUXQHv+tpA9D56d\nf1Gz1WvTW5e047Vh7WDsX0GBvY0A+HbdPJg+n+2tprFswP3+j9vDmj2FOzrmbZlXkH68Zn76ln/+\nh9tqRjJv8zwmXzO5WNsujaAGCxEZALwChAL/UdVnvZb3Al4GOgBDVfVTj2U5wAp3douqDgxmWYsS\nHx3P7Jtmk5iSSEJMAvHR8TzXz/8VatLWJBJTXmT/8f3MWT+fs6s1p0HKbSzfsZK1yyLJOh5GiISQ\ns+ccqJdMTs/H4Z+/QUgW1NoGB2KKLpDkgIYWBIqiTFpQ9PI9bZxAAb4DxViFS+91AoWnvKa4pSOh\npTtmdMMlvvex0f3iyChoKmNLr8J5DjSHxCeLLmtJA4W3rGqlX3feX2GBxxX+8ZpQ1W33yfbY7q+3\nQJSfcage/+BFCvHogQ3JhNxw/3k9L0TWX1EwnfgEdH3zxPyLbndqg/llql0wvSXe2V5qd5j7NCy8\nG7q8CfV/c5bvbg/J/WGy2x52vcf+8gl8NvnEi5I8eZ/Z367yf0xQuIx5Vg8u+KwC7D23cO10ge8a\n2AnyLhoONCk4ljze7/WxOoWCCxlnwe7WgfehwNxxBa0MR+vC/lj46T4oZrDgO6+hZYpzAbo9Dqoe\nLLzsnTm8HxFHr+a9Sl3DCKg4HRuleeEEiA3AOUA4sAxo45UnBidQvAsM8Vp2uCT7C0YH98myYMsC\nveqd67TVhLba8V9dNOS+5srDNZT7muh5j16nLR64WRnVRbnlQqXTJK11++Uaes5crdr6W61Sf4tS\nO7Wgk67uegVVaf2Zhpwz208HYk6RnYun5Gvw8LKtf8GrZVu/2bzC8+0+UFp9psS/UP7HetH48tmO\nZAXOc863zt9zZxakdXulcJ4ubxRMR24qmK637sTtnf+5cvbSspe92ytKra1l346/V42dziCU4uS9\n/rLy3fclD5RuvbvOU6occabbv1d4Waupyli0/7v9S/z9QzE7uANmKO0Lp6dylsf8w8DDfvK+fSYH\nC295I5M8RzH5SvO3jq9REuvWqb7w9mq9/4PXdMGWBfrrr6qPPKI6ZMR2rV5vn0Zf/p7yu+eVyE0q\n50/XGg23FfowhjVepY2ufkURJ9DUar1AQbVK1MYTP9Qd3lHO/Sbwh7/hkhPTRvRS2k4J3peEvU6P\nV/XdFV+G0+nV5mNlLPr64tdL/H1T3GARtA5uERkCDFDVW935G4HuqnqXj7xvA19p4WaobGApkA08\nq6on/IRJREYBowCaNWvWdfPmzcE4lDOG03xW0NTmT06OM64/K8sZkXHsmDMevE4dqFkTko85I3H2\nrWtHVtWddKh3IXt2hzHv11Q6dj+I1kylfe1exLQ8yjPvLkRzhWZHB5Fw2V5GzO/B8ezjcLQeXWpf\nRvamC1n70U3ExEJ2Rk169XJGCQG06LyDTc2eICfpbkJjFzCg+WDWrs1lw/KGtLjic+rntuXnmS0B\n6Dl4DUt+aETj9r+R/EM3QmunERaZxsjnp/HasEf9Hmv1jjPJWHbZCelh9VMJb76EI78MAkBCs7nx\nrUdZ8G1DkiffC8DwR+cxeXyvE9YFnKaqtLZ+9xtacy+aWZ3cTKcZI7zJajK3tXEWXvBP6Pgu/Gdh\nfv7IVr9yZPP5ZB+t7pT77FQydjU9YbulFV7zIGFNV3BkbY8TF3Z50xklWAJnjW3H7injYXNPOOpj\nNKGner/B3vPgd8/Dggdh4C3Q9iNCX00h57DbNxSaCTleTUiSg5y1Bt3VLnCB6q+F9FYlOoZTSbO7\n/8ijN19Yqiao4nZwn8rBoomqbhORc4A5QF9V3eBvf8EcDWVOnuIGrNLm90UVfkp1ttO7eQLdm8QX\n+kX0/zYnMfXH5Qzq3pHOZ11IrVoFPyQ8ftwZslq7thM4wRlKqwpVq8K2bc49qo4dc34w9nPqz8xc\nuoSB3ToTHx2PiDPscs8eiIhwgu2xY852Q0OdwFy7trO/nBznYVyL987OP95du6BePed2M7m5zisr\nC6pXd349PHu2Mzx39cGC96lb43hSU2HdoYW8My2Vo7ubEBG1nQtbnI+EKPPTphEReYjOMoIhCa35\n6CPnvmf9+jk/otsb/ivL9s9j2VJh5vHHyQ07gCwczTUdL6FDtSupVs35UVndurBpE+wKS+Kdpe8i\nAlc3H0lc427Ur++cu9nJP1B7x0CGD2jDuAmb2J6zgo5xR/j24xh2bq1JnYRJjLiiDXWPxrEp9Bt0\nXwzUSaFPbAKta8WzcKHzPvfq5QwtDo1eyEffr6ddg86MHtaGI0fgnx+uQ1t9xvmhlxIT0YXDh51h\ntFdc4axXp45Tlv/+9BGZGRG0rNWZ9CMH6BzVncu6dOTxf2xhe+RUdv8WQ9927Rn1+xZERzu3eGnf\nHlJTnXOflLyaTz7PIPxILN071GfcOOdcrtyfxISvv+WzBcvIafEF8svtXNLiYr7fMJuc3S0J7fw+\nF2Y/SEyDRpx3cRIcaI7W2srG6UM5q1pTdhzaRvKunVzVvybNa51Pt27O3QciI53PxYMPlu2eVqdC\nsIgHxqrqpe78wwCq+oyPvG/jFSxKshwsWBhzsiVtTaLvu33zRwnOvml2qQP2mcD7wqY8LnTKw6kQ\nLKoAvwF9gW3AIuB6VT1hqIh3MBCRukCGqh4XkQZAEjBIVU+8CZPLgoUxJ9+p8oVnSq/Cf2ehqtki\nchcwC2dk1CRVXSUi43A6VKaLyAXANKAucJWIPKmqbYHWwOsikotz/6pniwoUxpiKER8db0HiDGG/\n4DbGmDNYcWsWdtdZY4wxAVmwMMYYE5AFC2OMMQFZsDDGGBOQBQtjjDEBVZrRUCKSBpT2fh8NgD3l\nWJzTgR3zmcGO+cxQlmNurqpRgTJVmmBRFiKyuDhDxyoTO+Yzgx3zmeFkHLM1QxljjAnIgoUxxpiA\nLFg43qjoAlQAO+Yzgx3zmSHox2x9FsYYYwKymoUxxpiALFgYY4wJ6IwPFiIyQETWiUiyiIyp6PKU\nFxGJFpG5IrJaRFaJyD1uej0R+U5E1rt/67rpIiIT3PdhuYh0qdgjKB0RCRWRX0XkK3c+VkR+do/r\nIxEJd9OruvPJ7vKYiix3aYlIHRH5VETWisgaEYk/A87xve5neqWIfCgiEZXxPIvIJBHZLSIrPdJK\nfG5F5GY3/3oRubm05Tmjg4WIhAITgcuANsAwEWlTsaUqN9nA/araBrgQuNM9tjHAbFVtCcx258F5\nD1q6r1HAaye/yOXiHmCNx/xzwEuq2gLYB9zipt8C7HPTX3LznY5eAb5R1VZAR5xjr7TnWESaAKOB\nOFVth/OsnKFUzvP8NjDAK61E51ZE6gFPAN2BbsATeQGmxFT1jH0B8cAsj/mHgYcrulxBOtYvgEuA\ndUAjN60RsM6dfh0Y5pE/P9/p8gKauv9AFwNfAYLzq9Yq3ucb56Fc8e50FTefVPQxlPB4I4FN3uWu\n5Oe4CbAVqOeet6+ASyvreQZigJWlPbfAMOB1j/RC+UryOqNrFhR88PKkummVilv17gz8DJytqjvc\nRTuBs93pyvBevAw8COS68/WB/aqa7c57HlP+8brLD7j5TyexQBrwltv09h8RqUElPsequg14AdgC\n7MA5b0uo3OfZU0nPbbmd8zM9WFR6IlITmAr8WVUPei5T51KjUoydFpErgd2quqSiy3ISVQG6AK+p\namfgCAXNEkDlOscAbhPKIJxA2RiowYlNNWeEk31uz/RgsQ2I9phv6qZVCiIShhMo3lfVz9zkXSLS\nyF3eCNjtpp/u70UPYKCIpABTcJqiXgHqiEjes+Y9jyn/eN3lkUD6ySxwOUgFUlX1Z3f+U5zgUVnP\nMUA/YJOqpqlqFvAZzrmvzOfZU0nPbbmd8zM9WCwCWrojKcJxOsqmV3CZyoWICPBfYI2qvuixaDqQ\nNyLiZpy+jLz0m9xRFRcCBzyqu6c8VX1YVZuqagzOeZyjqjcAc4Ehbjbv4817H4a4+U+rK3BV3Qls\nFZHz3aS+wGoq6Tl2bQEuFJHq7mc875gr7Xn2UtJzOwvoLyJ13VpZfzet5Cq6A6eiX8DlwG/ABuDR\nii5POR7XRThV1OXAUvd1OU577WxgPfA9UM/NLzgjwzYAK3BGm1T4cZTy2BOAr9zpc4CFQDLwCVDV\nTY9w55Pd5edUdLlLeaydgMXuef4cqFvZzzHwJLAWWAm8B1StjOcZ+BCnXyYLpxZ5S2nOLfBH9/iT\ngZGlLY/d7sMYY0xAZ3ozlDHGmGKwYGGMMSYgCxbGGGMCsmBhjDEmIAsWxhhjArJgYUwAIpIjIks9\nXuV2d2IRifG8q6gxp6oqgbMYc8Y7qqqdKroQxlQkq1kYU0oikiIiz4vIChFZKCIt3PQYEZnjPldg\ntog0c9PPFpFpIrLMff3O3VSoiLzpPqPhWxGp5uYfLc7zSJaLyJQKOkxjAAsWxhRHNa9mqD94LDug\nqu2Bf+Lc9RbgVeAdVe0AvA9McNMnAD+oakecezitctNbAhNVtS2wH7jWTR8DdHa3c3uwDs6Y4rBf\ncBsTgIgcVtWaPtJTgItVdaN708adqlpfRPbgPHMgy03foaoNRCQNaKqqxz22EQN8p87DbBCRh4Aw\nVX1aRL4BDuPcxuNzVT0c5EM1xi+rWRhTNupnuiSOe0znUNCXeAXO/X66AIs87qpqzElnwcKYsvmD\nx98kd3oBzp1vAW4A5rvTs4E7IP9Z4ZH+NioiIUC0qs4FHsK5tfYJtRtjTha7UjEmsGoistRj/htV\nzRs+W1dEluPUDoa5aXfjPL3uLzhPshvppt8DvCEit+DUIO7AuauoL6HAZDegCDBBVfeX2xEZU0LW\nZ2FMKbl9FnGquqeiy2JMsFkzlDHGmICsZmGMMSYgq1kYY4wJyIKFMcaYgCxYGGOMCciChTHGmIAs\nWBhjjAno/wGVkooxFkdVNgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iOFBSbPcYCN4", + "colab_type": "text" + }, + "source": [ + "## Look closer at the data\n", + "The graph shows the _loss_ (or the difference between the model's predictions and the actual data) for each epoch. There are several ways to calculate loss, and the method we have used is _mean squared error_. There is a distinct loss value given for the training and the validation data.\n", + "\n", + "As we can see, the amount of loss rapidly decreases over the first 25 epochs, before flattening out. This means that the model is improving and producing more accurate predictions!\n", + "\n", + "Our goal is to stop training when either the model is no longer improving, or when the _training loss_ is less than the _validation loss_, which would mean that the model has learned to predict the training data so well that it can no longer generalize to new data.\n", + "\n", + "To make the flatter part of the graph more readable, let's skip the first 50 epochs:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Zo0RYroFZYIV", + "colab_type": "code", + "outputId": "e6841332-0541-44bb-a186-ae5b46781e51", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + } + }, + "source": [ + "# Exclude the first few epochs so the graph is easier to read\n", + "SKIP = 50\n", + "\n", + "plt.plot(epochs[SKIP:], loss[SKIP:], 'g.', label='Training loss')\n", + "plt.plot(epochs[SKIP:], val_loss[SKIP:], 'b.', label='Validation loss')\n", + "plt.title('Training and validation loss')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl4lNXZuO9nZhJQQbGRFpFAcKkC\nRhYjGgEJov1A0Wqx1q3giqJYqbV1aVWq9odrxQUVKiKpWvWTT9xArUDYDPsiRURRIomCQiooAknm\nfZ/fH2dmMjOZJJNkJpkk576uXJl3O+95t+c5z3LOEVXFYrFYLJb64mnqClgsFouleWMVicVisVga\nhFUkFovFYmkQVpFYLBaLpUFYRWKxWCyWBmEVicVisVgahFUkliZHRLwiskdEuiZy36ZERI4WkYTn\n1ovIGSJSFLa8SUQGxbNvPc71rIjcUd/jayj3PhF5PtHlWpoOX1NXwNL8EJE9YYsHAmWAE1i+VlVf\nrEt5quoA7RK9b2tAVY9NRDkicjVwmarmhZV9dSLKtrR8rCKx1BlVDQnyQIv3alX9oLr9RcSnqv7G\nqJvFYml8rGvLknACrotXRORfIvIDcJmI5IrIUhHZJSLbRORxEUkL7O8TERWRrMDyC4Htc0TkBxEp\nFJHudd03sH24iHwqIrtF5AkRWSIil1dT73jqeK2IbBaR70Tk8bBjvSLyqIiUisgXwLAa7s+fReTl\nqHWTReTvgd9Xi8jGwPV8HrAWqiurRETyAr8PFJF/Buq2ATgxat+/iMgXgXI3iMi5gfXZwJPAoIDb\ncGfYvZ0Qdvx1gWsvFZFZInJ4PPemNkTk/EB9donIPBE5NmzbHSLytYh8LyKfhF3rKSKyOrD+GxF5\nKN7zWZKAqto/+1fvP6AIOCNq3X1AOXAOprFyAHAScDLGCj4S+BQYF9jfByiQFVh+AdgJ5ABpwCvA\nC/XY96fAD8AvA9tuBiqAy6u5lnjq+AZwCJAF/Dd47cA4YAPQBcgAFprPK+Z5jgT2AAeFlf0tkBNY\nPiewjwCnA/uAEwLbzgCKwsoqAfICvx8GCoBDgW7Ax1H7XggcHngmlwTq8LPAtquBgqh6vgBMCPz+\nRaCOfYC2wFPAvHjuTYzrvw94PvC7R6Aepwee0R3ApsDvXsCXQKfAvt2BIwO/VwAXB363B05u6m+h\nNf9Zi8SSLBar6luq6qrqPlVdoarLVNWvql8AU4HBNRz/mqquVNUK4EWMAKvrviOAtar6RmDboxil\nE5M46zhRVXerahFGaAfPdSHwqKqWqGopcH8N5/kC+A9GwQGcCXynqisD299S1S/UMA+YC8QMqEdx\nIXCfqn6nql9irIzw876qqtsCz+QlTCMgJ45yAS4FnlXVtaq6H7gNGCwiXcL2qe7e1MRFwJuqOi/w\njO7HKKOTAT9GafUKuEe3BO4dmAbBMSKSoao/qOqyOK/DkgSsIrEki+LwBRE5TkTeEZHtIvI9cA9w\nWA3Hbw/7vZeaA+zV7ds5vB6qqpgWfEzirGNc58K0pGviJeDiwO9LAsvBeowQkWUi8l8R2YWxBmq6\nV0EOr6kOInK5iKwLuJB2AcfFWS6Y6wuVp6rfA98BR4TtU5dnVl25LuYZHaGqm4A/YJ7DtwFXaafA\nrlcAPYFNIrJcRM6K8zosScAqEkuyiE59nYJphR+tqgcDd2FcN8lkG8bVBICICJGCL5qG1HEbkBm2\nXFt68qvAGSJyBMYyeSlQxwOA14CJGLdTB+D9OOuxvbo6iMiRwNPAWCAjUO4nYeXWlqr8NcZdFiyv\nPcaF9lUc9apLuR7MM/sKQFVfUNUBGLeWF3NfUNVNqnoRxn35CDBTRNo2sC6WemIViaWxaA/sBn4U\nkR7AtY1wzreBfiJyjoj4gJuAjkmq46vAeBE5QkQygFtr2llVtwOLgeeBTar6WWBTGyAd2AE4IjIC\nGFqHOtwhIh3E9LMZF7atHUZZ7MDo1GswFkmQb4AuweSCGPwLuEpEThCRNhiBvkhVq7Xw6lDnc0Uk\nL3DuP2LiWstEpIeIDAmcb1/gz8VcwG9F5LCABbM7cG1uA+tiqSdWkVgaiz8AozFCYgomKJ5UVPUb\n4DfA34FS4ChgDabfS6Lr+DQmlrEeEwh+LY5jXsIEz0NuLVXdBfweeB0TsL4AoxDj4W6MZVQEzAHy\nw8r9CHgCWB7Y51ggPK7wb+Az4BsRCXdRBY9/F+Niej1wfFdM3KRBqOoGzD1/GqPkhgHnBuIlbYAH\nMXGt7RgL6M+BQ88CNorJCnwY+I2qlje0Ppb6IcZtbLG0fETEi3GlXKCqi5q6PhZLS8FaJJYWjYgM\nC7h62gB3YrJ9ljdxtSyWFoVVJJaWzkDgC4zb5H+A81W1OteWxWKpB9a1ZbFYLJYGYS0Si8VisTSI\nVjFo42GHHaZZWVlNXQ2LxWJpVqxatWqnqtaUMg+0EkWSlZXFypUrm7oaFovF0qwQkdpGaACsa8ti\nsVgsDcQqEovFYrE0CKtILBaLxdIgWkWMxGKxNC4VFRWUlJSwf//+pq6KJQ7atm1Lly5dSEurbqi1\nmrGKxGKxJJySkhLat29PVlYWZtBlS6qiqpSWllJSUkL37t1rPyAG1rVlsVgSzv79+8nIyLBKpBkg\nImRkZDTIerSKxGJpJAoLYeJE8781YJVI86Ghz8q6tiyWRqCwEIYOhfJySE+HuXMhN7epa2WxJIak\nWiSBkVc3ichmEbktxvbTRGS1iPhF5IKobQ+KyAYR2Sgijwdmt0NEThSR9YEyQ+stllSmoMAoEccx\n/wsKmrpGLZvS0lL69OlDnz596NSpE0cccURoubw8vmlLrrjiCjZt2lTjPpMnT+bFF19MRJUZOHAg\na9euTUhZjU3SLJLA3A+TgTMxczCvEJE3VfXjsN22ApcDt0QdeyowADghsGoxMBgowEyAcw1mUp7Z\nmIlw5iTrOiyWRJCXZyyRoEWSl9fUNWrZZGRkhITyhAkTaNeuHbfcEiFmUFVUFY8ndnt6+vTptZ7n\nhhtuaHhlWwDJtEj6A5tV9YvAzGUvY+amDqGqRYGZ26KnyFSgLWbK0TZAGmbmtsOBg1V1qZphi/OB\n85J4DRZLQsjNNe6se++1bq3qKCwuZOKiiRQWJy+ItHnzZnr27Mmll15Kr1692LZtG2PGjCEnJ4de\nvXpxzz33hPYNWgh+v58OHTpw22230bt3b3Jzc/n2228B+Mtf/sKkSZNC+992223079+fY489lg8/\n/BCAH3/8kZEjR9KzZ08uuOACcnJyarU8XnjhBbKzszn++OO54447APD7/fz2t78NrX/88ccBePTR\nR+nZsycnnHACl112WcLvWTwkM0ZyBFActlwCnBzPgapaKCLzMVN6CvCkqm4UkZxAOeFlHhGrDBEZ\nA4wB6Nq1a91rb7EkmNxcq0Cqo7C4kKH5Qyl3ykn3pjN31FxyM5Nzsz755BPy8/PJyckB4P777+cn\nP/kJfr+fIUOGcMEFF9CzZ8+IY3bv3s3gwYO5//77ufnmm3nuuee47bYq3npUleXLl/Pmm29yzz33\n8O677/LEE0/QqVMnZs6cybp16+jXr1+N9SspKeEvf/kLK1eu5JBDDuGMM87g7bffpmPHjuzcuZP1\n69cDsGvXLgAefPBBvvzyS9LT00PrGpuUzNoSkaOBHkAXjKI4XUQG1aUMVZ2qqjmqmtOxY62DV1os\nliakoKiAcqccRx3KnXIKigqSdq6jjjoqpEQA/vWvf9GvXz/69evHxo0b+fjjj6scc8ABBzB8+HAA\nTjzxRIqKimKW/atf/arKPosXL+aiiy4CoHfv3vTq1avG+i1btozTTz+dww47jLS0NC655BIWLlzI\n0UcfzaZNm/jd737He++9xyGHHAJAr169uOyyy3jxxRfr3aGwoSRTkXwFZIYtdwmsi4fzgaWqukdV\n92BiILmB47vUs0yLxZKi5GXlke5Nxyte0r3p5GXlJe1cBx10UOj3Z599xmOPPca8efP46KOPGDZs\nWMz+FOnp6aHfXq8Xv98fs+w2bdrUuk99ycjI4KOPPmLQoEFMnjyZa6+9FoD33nuP6667jhUrVtC/\nf38cx0noeeMhmYpkBXCMiHQXkXTgIuDNOI/dCgwWEZ+IpGEC7RtVdRvwvYicEsjWGgW8kYzKWyyW\nxiM3M5e5o+Zy75B7k+rWiub777+nffv2HHzwwWzbto333nsv4ecYMGAAr776KgDr16+PafGEc/LJ\nJzN//nxKS0vx+/28/PLLDB48mB07dqCq/PrXv+aee+5h9erVOI5DSUkJp59+Og8++CA7d+5k7969\nCb+G2khajERV/SIyDngP8ALPqeoGEbkHWKmqb4rIScDrwKHAOSLyV1XtBbwGnA6sxwTe31XVtwJF\nXw88DxyAsVRsxpbF0gLIzcxtNAUSpF+/fvTs2ZPjjjuObt26MWDAgISf48Ybb2TUqFH07Nkz9Bd0\nS8WiS5cu3HvvveTl5aGqnHPOOZx99tmsXr2aq666ClVFRHjggQfw+/1ccskl/PDDD7iuyy233EL7\n9u0Tfg210SrmbM/JyVE7sZXF0nhs3LiRHj16NHU1UgK/34/f76dt27Z89tln/OIXv+Czzz7D50ut\n/uCxnpmIrFLVnGoOCZFaV2KxWCwtjD179jB06FD8fj+qypQpU1JOiTSUlnU1FovFkmJ06NCBVatW\nNXU1kkpKpv9aLBaLpflgFYnFYrFYGoRVJBaLxWJpEFaRWCwWi6VBWEVisVhaHEOGDKnSuXDSpEmM\nHTu2xuPatWsHwNdff80FF1wQc5+8vDxq604wadKkiI6BZ511VkLGwZowYQIPP/xwg8tJNFaRWCyW\nFsfFF1/Myy+/HLHu5Zdf5uKLL47r+M6dO/Paa6/V+/zRimT27Nl06NCh3uWlOlaRWCyWlCCRUxFf\ncMEFvPPOO6FJrIqKivj6668ZNGhQqF9Hv379yM7O5o03qo6yVFRUxPHHHw/Avn37uOiii+jRowfn\nn38++/btC+03duzY0BD0d999NwCPP/44X3/9NUOGDGHIkCEAZGVlsXPnTgD+/ve/c/zxx3P88ceH\nhqAvKiqiR48eXHPNNfTq1Ytf/OIXEeeJxdq1aznllFM44YQTOP/88/nuu+9C5w8OKx8cLHLBggWh\nib369u3LDz/8UO97G5Pg5C4t+e/EE09Ui8XSeHz88cd12v/DD1UPOEDV6zX/P/yw4XU4++yzddas\nWaqqOnHiRP3DH/6gqqoVFRW6e/duVVXdsWOHHnXUUeq6rqqqHnTQQaqqumXLFu3Vq5eqqj7yyCN6\nxRVXqKrqunXr1Ov16ooVK1RVtbS0VFVV/X6/Dh48WNetW6eqqt26ddMdO3aE6hJcXrlypR5//PG6\nZ88e/eGHH7Rnz566evVq3bJli3q9Xl2zZo2qqv7617/Wf/7zn1Wu6e6779aHHnpIVVWzs7O1oKBA\nVVXvvPNOvemmm1RV9fDDD9f9+/erqup3332nqqojRozQxYsXq6rqDz/8oBUVFVXKjvXMMMNZ1Spj\nrUVisVianGRMRRzu3gp3a6kqd9xxByeccAJnnHEGX331Fd9880215SxcuDA0YdQJJ5zACSecENr2\n6quv0q9fP/r27cuGDRtqHZBx8eLFnH/++Rx00EG0a9eOX/3qVyxatAiA7t2706dPH6DmoerBzI+y\na9cuBg8eDMDo0aNZuHBhqI6XXnopL7zwQqgH/YABA7j55pt5/PHH2bVrV8J71ltFYrFYmpzgVMRe\nb+KmIv7lL3/J3LlzWb16NXv37uXEE08E4MUXX2THjh2sWrWKtWvX8rOf/Szm0PG1sWXLFh5++GHm\nzp3LRx99xNlnn12vcoIEh6CHhg1D/84773DDDTewevVqTjrpJPx+P7fddhvPPvss+/btY8CAAXzy\nySf1rmcsrCKxWCxNTjKmIm7Xrh1DhgzhyiuvjAiy7969m5/+9KekpaUxf/58vvzyyxrLOe2003jp\npZcA+M9//sNHH30EmCHoDzroIA455BC++eYb5sypHIi8ffv2MeMQgwYNYtasWezdu5cff/yR119/\nnUGD6jRnHwCHHHIIhx56aMia+ec//8ngwYNxXZfi4mKGDBnCAw88wO7du9mzZw+ff/452dnZ3Hrr\nrZx00kkJVyR2rC2LxZISJGMq4osvvpjzzz8/IoPr0ksv5ZxzziE7O5ucnByOO+64GssYO3YsV1xx\nBT169KBHjx4hy6Z379707duX4447jszMzIgh6MeMGcOwYcPo3Lkz8+fPD63v168fl19+Of379wfg\n6quvpm/fvjW6sapjxowZXHfddezdu5cjjzyS6dOn4zgOl112Gbt370ZV+d3vfkeHDh248847mT9/\nPh6Ph169eoVme0wUdhh5i8WScOww8s2Phgwjb11bFovFYmkQVpFYLBaLpUFYRWKxWJJCa3CbtxQa\n+qysIrFYLAmnbdu2lJaWWmXSDFBVSktLadu2bb3LsFlbFosl4XTp0oWSkhJ27NjR1FWxxEHbtm3p\n0qVLvY+3isRisSSctLQ0unfv3tTVsDQS1rVlsVgslgaRVEUiIsNEZJOIbBaR22JsP01EVouIX0Qu\nCFs/RETWhv3tF5HzAtueF5EtYdv6JPMaLBaLxVIzSXNtiYgXmAycCZQAK0TkTVUNH9VsK3A5cEv4\nsao6H+gTKOcnwGbg/bBd/qiq9Z8swGKxWCwJI5kxkv7AZlX9AkBEXgZ+CYQUiaoWBba5NZRzATBH\nVffWsI/FYrFYmohkuraOAIrDlksC6+rKRcC/otb9TUQ+EpFHRaRNrINEZIyIrBSRlTZzxGKxWJJH\nSgfbReRwIBsIn3z5duA44CTgJ8CtsY5V1amqmqOqOR07dkx6XS0Wi6W1kkxF8hWQGbbcJbCuLlwI\nvK6qFcEVqrotMHlXGTAd40KzWCwWSxORTEWyAjhGRLqLSDrGRfVmHcu4mCi3VsBKQUQEOA/4TwLq\narFYLJZ6kjRFoqp+YBzGLbUReFVVN4jIPSJyLoCInCQiJcCvgSkisiF4vIhkYSyaBVFFvygi64H1\nwGHAfcm6BovFYrHUjp2PxGKxWCwxsfORWCwWi6VRsIrEYrFYLA3CKhKLxWKxNAirSCwWiyXFKCyE\niRPN/+aAHUbeYmnGFBZCQQHk5UFublPXxpIICgth6FAoL4f0dJg7N/WfrVUkFkszpTkKnETTEhVp\nQYF5po5j/hcUpP61WUVisTRTmqPASSQtVZHm5ZnrCV5XXl5T16h2rCKxWJopzVHgJJKWqkhzc41S\nbE6WllUkFkszpTkKnETSkhVpbm7zep5WkVgszZjmJnASSWtXpKmEVSQWi6XZkixF2hKD+MnEKhKL\nxWIJIxjELysDjwcmT4YxY5q6VqmN7ZDYSDS3DkYWS2uloMAoEdcFvx/GjbPfbW1Yi6QRaKlpipbm\ni3XdVE9enrFEXNcsO07LyQhLFtYiaQRipSlaLE1FsGFz553mv21tR5Kba9xZaWlGobRp07IywpKB\nVSR1pD4uqmCaotfb8tIUG5tEuAhbu5vRNmxqZ8wYWLAA7rvPehDiwbq26kB9XVQ2TTExJMJFaN2M\nLbv/RSJpzanVdcUqkjrQkJ609qVsOInoydxSe0PXBduwsSQaq0jqgG3JNS2JuP/2GRpsw8aSSKwi\nqQO2Jde0JOL+22dosSQeUdWmrkPSycnJ0ZUrVzZ1NSwWi6VZISKrVDWntv1s1pbFYrFYGkRSFYmI\nDBORTSKyWURui7H9NBFZLSJ+EbkgbP0QEVkb9rdfRM4LbOsuIssCZb4iIunJvIa60tpTSy0WS+sj\naYpERLzAZGA40BO4WER6Ru22FbgceCl8parOV9U+qtoHOB3YC7wf2PwA8KiqHg18B1yVrGuoK7aj\nl8ViaY0k0yLpD2xW1S9UtRx4Gfhl+A6qWqSqHwFuDeVcAMxR1b0iIhjF8lpg2wzgvMRXvX7Yjl4W\ni6U1kkxFcgRQHLZcElhXVy4C/hX4nQHsUlV/bWWKyBgRWSkiK3fs2FGP09Yd24PdYrG0RlI6/VdE\nDgeygffqeqyqTgWmgsnaSnDVYmJTSy2W5o8d0LLuJFORfAVkhi13CayrCxcCr6tqRWC5FOggIr6A\nVVKfMpOK7ehlsTRf7BA69SOZrq0VwDGBLKt0jIvqzTqWcTGVbi3UdHqZj4mbAIwG3khAXWvFZmNZ\nLC0fG+esH0mzSFTVLyLjMG4pL/Ccqm4QkXuAlar6poicBLwOHAqcIyJ/VdVeACKShbFoFkQVfSvw\nsojcB6wBpiXrGoLYVorF0jqwQ+jUj6TGSFR1NjA7at1dYb9XYNxTsY4tIkYgXVW/wGSENRp2oL/U\nJejPzsiA0lLr17Y0DBvnrB8pHWxPFWwrJTUJn1vbdSsnIbIWo6Uh2Dhn3bFDpMRBsJVy771WSKUS\nQUsxOCWq61q/dkvFxihTG2uRxIltpaQeQUsx3CKxFmPLw8YoUx+rSBqRROSn2xz3SsL92YmKkdj7\nm3rYGGXqYxVJI2GniU0OibQU7f1NTWyMMvWxMZJGIhH56TbHPbnY+5ua2Bhl6mMtkkYivFXl9cLW\nraYFXJePwrbMkkuq39/W7HazMcrUxs6Q2IgUFkJ+PkyfDn5//dwnrVmYNAapen+t283SFMQ7Q6K1\nSBqR3FwjpPz++gcObcssuaTq/bUBZ0sqY2MkjYwdat5SH+x7Y0llrEXSyLTmIRjicRsl2rWUqq6q\nutKa3xtL6mNjJJZGIR4ff6LjAKkYV2gpis3SOog3RmJdW3Fgh2doOPGk1iY6/TbV0nmDiu3OO83/\nZLxP9l21NAXWtVULqdiqbY7Ek1qb6PTb6PIyMoyQbSprINkBc/uuWpoKq0hqoTGyZVqDuyMeH3+i\n4wDRQ6iMH9+0QjbZ/VRSIbOrNbzLlqpYRVILyf74W1MrMp7U2kSn3wbLmzix6YVssgPm9X1XEyX8\nW9O7bInEKpJaSPbHnwqtyNZAqvRaT2Y/lfq8q4kU/vZdbr1YRRIHyfz4U0XAtXSS1SBINVdOXd/V\nRAp/+y63XqwiaWJs/4DGI9ENgpbgykmk8LfvcuvFKpIUIFWH5Whp1GQ91MeyaAmunGQkODS3e2Bp\nOHEpEhE5CihR1TIRyQNOAPJVdVcyK2exxEttiqAm66G+lkVLceVY4W9pKPF2SJwJOCJyNDAVyARe\nqu0gERkmIptEZLOI3BZj+2kislpE/CJyQdS2riLyvohsFJGPRSQrsP55EdkiImsDf33ivAZLEkiF\nDnDxdPSrqXNifTsu2nkyEkMqvEOWhhGva8tVVb+InA88oapPiMiamg4QES8wGTgTKAFWiMibqvpx\n2G5bgcuBW2IUkQ/8TVX/LSLtADds2x9V9bU4654QUi2omgqkSowgHhdTTdZDQywL25pvGKnyDiWD\n1iQz4lUkFSJyMTAaOCewLq2WY/oDm1X1CwAReRn4JRBSJKpaFNgWriQQkZ6AT1X/HdhvT5z1TAot\n+WVvCKkSI4hHEdQUC4jeBk3bA741UdM71JwFcWuTGfEqkiuA6zAWwhYR6Q78s5ZjjgCKw5ZLgJPj\nPN/PgV0i8n9Ad+AD4DZVdQLb/yYidwFzA+vLogsQkTHAGICuXbvGedrYpIrATDVSJUYQb8C4Jush\nuK21CYCmprp3qLk/h9YmM+JSJAF31O8ARORQoL2qPpDkeg0C+mLcX69gXGDTgNuB7UA6Jl5zK3BP\njDpPDWwnJyenQUMcp4rATDVSKd0zUS6m1iYAmprq3qHm/hxam8yIN2urADg3sP8q4FsRWaKqN9dw\n2FeYoHyQLoF18VACrA1zi80CTgGmqeq2wD5lIjKd2PGVhJJKAjORJMJ10NJiBK1NAKQCsd6h5v4c\nWqrMqI54XVuHqOr3InI1Ju33bhH5qJZjVgDHBNxgXwEXAZfEeb4VQAcR6aiqO4DTgZUAInK4qm4T\nEQHOA/4TZ5kNoqUJzObuOkgWrU0ApCqp/hziaYS1NJlRE/EqEp+IHA5cCPw5ngMCWV7jgPcAL/Cc\nqm4QkXuAlar6poicBLwOHAqcIyJ/VdVequqIyC3A3IDCWAX8I1D0iyLSERBgLSZ2k9KkYtCwubsO\nkklrEgCpTKo+B9sIq0q8iuQejEJYoqorRORI4LPaDlLV2cDsqHV3hf1egXF5xTr235iOj9HrT4+z\nzilBqr50iXAdpKKCtLQumuIdbGgjrCV+N/EG2/8X+N+w5S+AkcmqVEsiVVv+DXUdpKqCbApaomBo\nCup6H5vqHWxII6ylfjfxBtu7AE8AAwKrFgE3qWpJsiqWitRHYKRy0LAhroNUVZCNSWEh5OfD9Ong\n97cswdDY1EfANtU72JBGWEv9buJ1bU3HDIny68DyZYF1ZyajUqlIfVsSqR40rC+prCAbg+D7sH8/\naCC5vCUJhsamPgK2Kd/B+jbCWup3E68i6aiq08OWnxeR8cmoUKpSUABlZeC65n9dBEaygoZN6VJp\nqQoyXoKCL6hERFqWYGhs6iNgm+M72BzrHA/xKpJSEbkM+Fdg+WKgNDlVSk0yMowSAfM/I6Np65MK\nvtZUzappDMIFn9cLV14Jo0a13vtRE/GmytZHwNb0DqZq7Kq+302qXg/Er0iuxMRIHgUU+BDT07zV\nUFoKHo9RIh6PWW5KWqqvtSmpy4famC3LVBYgtVGXBk8iGyap0NBKJKl+PfFmbX2J6dkeIuDampSM\nSqUieXnQpk3ifZv1FRIt1dfaVNTnQ61N8CVCAaS6AKmNpmrwNNeGVnXvTKpfT0NmSLyZVqRIEtkC\nDb4sGRkwfnz9hERL9bU2FYn8UBOZzRWrXuvXw8yZMHIkjBlTvzo2Fk3V4GmODa2aGg2pfj0NUSSS\nsFo0ExJhek+dCuPGGcEQdJW5bv2EV2uOUSSC8NZfbR9qvNZForO5ouu1axfccYfZ9v775n8qK5Om\navA0x4ZWTY2ZVL+ehiiSBo2o2xwoLC6koKiAjNIRlG7MToglcsMNppUKRtD4fMnL+GnOvvVkE6v1\nV92HWhf3UqKzuaIFyIQJkdtnzmx6RVLbe9ZUDZ7GcD0mktoaM6nccKxRkYjID8RWGAIckJQapQiF\nxYUMzR9KWVE/3Bk34XGVNul2zFsRAAAgAElEQVRSrRCJ56UsKKjM/AKjRJ580gTuE/0yN3fferKJ\n1fq7/fbY96gubq9kZHOFC5CRIystkeByU9Jc37NUrHddrI5UU4I1KhJVbd9YFUk1CooKKHfKcbcM\nAn86rkq1QiTelzIYsC8rM26tJ59MXmsy1YNzQZrqg6iLz7ku+ybbBRF8X1IlRtJc3rNoUrXe8Vgd\nqagEG+LaatHkZeWR7k2nrPsiXF85HtdLerrEFCLxvpT1FTItbWiWIE35QcR6FtXd57o+t2S7IMaM\naXoFEiQZ71ljNC6aw/dRHamoBK0iqYbczFzmjppLQVEBu059n7VLOzByeAa5udlV9q1ri7UuD72h\nQ7Pk58d/rsamqT+I8GdR231OZf90U5JoC6yxGhepHryuiVRUglaR1EBuZi7rv13P3SUX4ndPYt5T\nQwHIPnEPBUUF5GXlkZuZm9SXsqHCdsYMc9yMGalhAoeTl2fiCK5r/jflB9HUSq05k0glW1BQ/6GI\n6kpzbRykohK0iqQGpq6ayth3xuJu7Q8z/o3fSef6BS7ey3+Bc8Ri0r3pzB01N6RMkvFAG9L6aA7C\nUSTyf3Uk292Riq281kiqDUWUqqSaErSKpBoKiwu5YfYNuOpCUR446aA+HL8f9/NT0c4LKCvqx4T7\nyphwefLiHQ1pfaS6cCwoMKnQquZ/dYquMdwdjdnKS7WMm1Qi1YYissSHVSTVUFBUgOM6ZiGrALzl\n4Adw0QN2QHEu7oz3+UAPYNE/4xNuDYl31EfgpKIJHE5Q0ZWVGYukutZnY1lWjdHKS8WMm1QiWUMR\nWZKLp6krkKrkZeXh9XjNQuZSGHYTeFxQD7z7GKz7LTjpuI5JC86f9SUTF02ksLiw2jJjCcT6UFgI\nEyea/7WRm1t9/4imJjcXJk2qjJOMHx/7moIKx+ttHOFSl/tbVxL1DrRUgo2fe++1ShaS+y4mEmuR\nVENuZi6Tz5rM9e9cj6MO7DvMKBH1gV9gz8/AW464gi8Nnts1Gv+8RXg8HiafNZkxJ1bNz4zX1VST\n66OltWhLS2sfJqahllVdXEnJvr/VvQPW3VVJoi3D5npvm9O3bhVJDWT/NBufx4fjOMa95fGD4wU8\n8NlZ+EbczNXH3sr2jq8w68cFALiuy7jZ48j+aTa5mZFPvTaBGM9gf80hgF4X4nVv1Ve41PVjTPb9\nra7/SnMRGM2N5nxvm9O3bl1bNVBQVIDfDQyMlbkU+k4HXEDA9TKiy+WMGvc1s/ffGXGcow4FRQUx\nywy6miDSZA2+8FOmGKFaneujsd08ySZe91aQupr6dXUlNcb9jXY3WndX8mjO97Y5fetJtUhEZBjw\nGOAFnlXV+6O2n4YZiv4E4CJVfS1sW1fgWSATM97XWapaJCLdgZeBDGAV8FtVLU9G/UO92/1luLjQ\nOx/WjgYnDbwVLOCv/HfuHiqcisprQmjjbUNeVl5o0Mdgf5PwQSDHX5Id0UoKvvC1DfaX6gH0+hCP\newvq17qsa+ZaU9zf6DpmZBhl2VKeb1OS6pmLNdGcvnVRTc4gviLiBT4FzgRKgBXAxar6cdg+WcDB\nwC3Am1GKpAD4m6r+W0TaAa6q7hWRV4H/U9WXReQZYJ2qPl1TXXJycnTlypX1uo7C4kLGvzue5V8v\nNyuKTzHpwAfshO39zLre+ZC5FA8ezj3uXIYfPZw129Ywfe10KpwKRIQBXQewrGQZftePLL4Dd95f\ncR3B6zWBxby8SiHZkqdujeWvjldBTJwId95ZOQT/GWeY0XDjiXuk+seYiDlqLLFpDs8/VRGRVaqa\nU+t+SVQkucAEVf2fwPLtAKo6Mca+zwNvBxWJiPQEpqrqwKj9BNgBdFJVf/Q5qqMhigTg/FfOZ9Yn\nsypXrLwa3nnKBN4BvGVw+RDIXIrP40MQKtyK2IUBFOfi/WcBOOkRwqKlv/A1KYx4rj14fLDns8dj\nUkVbkrANV5bBRkbQFWqxNDbxKpJkxkiOAIrDlksC6+Lh58AuEfk/EVkjIg8FLJwMYJeq+msrU0TG\niMhKEVm5Y8eOel4CTJ21nree7WUsETD/Z08OKBExf06asVIAv+uvXokUnwKLbgMUz+gzueYPX0YI\nwVRO1U0ENfmr47n2oKl/xhmVndaam9+7NpqTX9xiCZKqwXYfMAjj8joJOBK4vC4FqOpUVc1R1ZyO\nHTvWqxKFhTDuouNw5k6AGXOhOJeee64H9WKUiJo/b4XJ6qqJ4lNMGfPuhRlzcdSh64iXamx9N4f8\n8bqQCCGZm2vcWW3atExha/tRWBpKU8iOZAbbv8IEyoN0CayLhxJgrap+ASAis4BTgOeADiLiC1gl\ndSmzzhQUgOP3gQo4im/rGdx0fR/Gv+llf5mLqgPHvgUDHjJZXVF4xYtHPMZCCRtmBUehaDAZB8bO\nda3RBRQVwK8rDT2+ISQqeFjfcpqL6zDVxlGKh+Zyb1s6TZXunExFsgI4JpBl9RVwEXBJHY7tICId\nVXUHcDqwUlVVROYDF2Ayt0YDbyS+6oa8PGiTLpSVK14fPHn9rxlzXja8tJ4bnvpf/F3/jWQu48wj\nz+TAtPN4Y9MbaNiEkhkHZHB538v5fv/3TPvqQyoWlBsl4q3A7TaPcbNXAVC6tzRCsFeXPx6ctbHc\nKY8YMDJeGnp8IkiUkKxrOanWn6CugjeVBXWq3dt4SOX72RCaqu9J0hRJIBg+DngPk/77nKpuEJF7\nMErhTRE5CXgdOBQ4R0T+qqq9VNURkVuAuYEA+yrgH4GibwVeFpH7gDXAtGRdQ2XLV8jLSyM3N5vC\n4kJm/jAB/wAz36kC73/xPqd1O61SiQQyu77NKuDBvQ/ypwF/4qpze7L950+ybMkBbMt4CTKXUuHC\n2HfGIkiEYK8uZTE4a6OjDuVOOQVFBXVSBA09vjkTzwfWWMKlroI31QV1c+o4B8m/n02ppJoq3Tmp\n/UhUdTYwO2rdXWG/V2DcU7GO/Temf0n0+i+A/omtaXyE5nH3l1XZtmbbGvMjGAtx0s1Aj6OH8tCS\nh/CIB6/HS0WfCgizWlw1Y2bv29KH8X/5hknXVe+6CfZrCVoUeVl5dap/rOMbU3g2ZQuwtg+sMYV1\nXQVvqgvq5tZXI5n3s6mVflP1PbFDpNRA9Esx+pHPzDzuxf0r+5LsOwyyCvghGCOpEgvJQzOX4qiD\n67gRrq8QxafAjA9Y7qQz5FWHxx/zUloKGT3WU+B/G4or3V6je48GYFTvUVCSy8QX6jAkfdisj3lZ\neVCS2ygvfVN/XFD7B9aYwrqugjfVBXVN97apGxCxSOb9TAWl3xQxNqtIaiD6paBoMN5dA3FmzAZ/\nOuAFcU0/ktFDTcA9q8BYIk7VbK6YSgQilE9ZmcO4ceC4ius5Cs/od2iTdS+Thk1i/LvjQ9ZEX//1\njL+kHkPSZ+aGlNLEFxrnpU/Ux9VQoVTTB9aYwrqurcZktDITLeBj3dtENyDi7WtU2z7JbLU39D1K\nRcUbD1aR1ED0SzHqvG4wawZT3LZoMHNaveCkIUWn4+22En/mUqNUghZLoH9Jz37f88nOT0KuLA8e\nM+wKhA0IKYCL3/GgroCbhrtlEOWZS5n58cyI+MbMOaUNFs6NJTwTcZ5ooXTjjbB2LYwcCWPGNLyO\nje0SCAreYKpmbees6/410VgWYiJb5/HUuS7XlaxWe0Peo1Sw3OuLVSQ1sH49ZGWZca9uuin4ULsx\n44nI3tVen4e+B40kr8tJTPrqN5QH3VxhsZLDut5DG+8Wyp1yRMQolFgGiriopxzwgqcCshbg8/jo\nc3gf5hfNR1HSvemMHJ7Bon9CWbni8fnJ6PEJkF2n62ss4ZmI84QLpf374cEHzfr3Tc5DwpRJY364\nyQ66h7duofJ3Y7lfEtlQiafOqeBWgvq/R6lS//pgFUk1TJ0K115buTx2rPk/ZkylUMzIgDlz4K23\nvKx8ux/r/92PJ15ayRrfUyzceCofh8VKFi308sfbbqRDmw5kHJjBjXNupMKpwCMenKI8cH2AF9SF\nPs/CIVuNpZK5lHJHeLTwURzXwePxMGnYJMacWJmG7HSbx/gNq8k+sWo6b039RhrTjG6okA4XSqqV\ng1sCzJyZGEXS2CQz6B6udLxe0xgKTk0waVLjWKKJbKjEo5SSZWGHj4NWWpq876Wu9U8lN5hVJNUw\nc2bksuvCuHGQnR3pZrjhBvNhg7FSSjdm8/TtT1OYAYNeL8epqABvBZo1j0cLV7HgcjNviaqiqJk0\n64CdJtaC38RVeuebAgNuMc1cGhp2RVQo3Wsmsi7NeBsd+P9w1aHc8Uak8xYWF5K/Lp/pa6fjd/1V\n+o1MnbXe9Nr3+2iTLilvRocLpV27Ki0SMO6thtBUH2Qyg+7hSscNeFBVzbrS0sZz4yWy31BtdU5W\nLKmxxnerS/1TzQ1mFUk1jBxZ6TYJ4jhm4qlwF0HwIwXT8gt+3Lm58NQrmxg7+WXcbvMgcymOeigo\nKmDr7q2V43EVn2Km7nU9ZirfYTeZ9VEpxMGe8x7xhNJ+w4e5B1j+9XKmrpoaGnm43CkPBfjLnXLy\n1+WbYewPzOCGp0rwl98FajpcFhRIwl/EZAZ0jzrKKPuGxkiaIiAcpDbBEV1WXQRNuNKJtkjCy2tO\nxFPnRF9XUCEHv/PapjpoKPHWP9XcYFaRVENQOE2aBJs2md8+X+XshV4vnHVW5YRMXi88+WTkwxxz\nXjZkFnL9Mz7cRbfjOXIxW3dvZfue7WaH4lOg4G6jMPABfpNOHCOFOKhIMg7IYPy74+l8cGf+dOqf\nuPHkG3loyUO46jLrk1mRoxRHMW3NNFx1ERGcbv3Bexs4ptd+Xl5aQu9fsltMY8Ykxp3V2AHhaKoT\nHNWVFb1/dYorWukErzUV3CDNiaBCDrdIUiEFO9VSwq0iqYGgsAp+rFu3mtiJ6xrBMysgs0XM35w5\nsGZN1Dwiq8bA81ejjuJfUMYUPZO0bivxlAzAnfF+WBqxHzzllenC1aQQb/9xO9t/3A5fwxufvIGI\nVJ9WHMAjHlx1jRsN8KgHX9cV+EefiXyZx+8vOYnc3PPqfH9qir+kWoupOho7IJzIsmpTXNFKJxXv\nf6oTrpCTHSOpb71SoT5WkcRB8IMMKpFoVI2VElQs06fD/Pkm62vsWHBdATzgb4NuOQ1/5lJyym9h\nhdsGxQv44cgPIO+vlYM/BlOIsxbEHBASTL+UeOaTCaYcQ2AGR18bftX+IV4q+ho3az6PljzE929f\nxajeoyJmcgy60ILusPAxwWobtyvVWkzV0dgB4USUFd6waQ7KurmTqm7AmurV2HE/q0jqQGlp5TwY\n4YhEZhGVlZlZ7latCu4bHHLeAwfsxCterjr/KNa9CmVlJhgfVCKC4BEPAwam85MzPuGtT5fjKJUz\nMwYyuWokfBbHQM97yVxGmjeNs44+C0py+dcfr0YrvOD9MxWjhzLFncKMdTOYNGwSN059iYrPT0W6\n/xFv1+U4roOLGxoTbP7o+RHjdu337yd/XX6EImmKFlN9RzZOdEA4P79ux8X66GO5piZOjJxB0es1\n7lZIbWWdaqRStlMyaIpAvFUkdSAvz2RshE+H27evcWm98UakMlm+PPxIxSgTP7Lvpzx51pOMOTGb\nNX/PZ8rMTWjWvIhgukc8LNm6BMC4o6LH7xp2U0hBVFEqwX1DLjMHvOX0vvUWso7dxpzNcygv6GmU\niPrAL7BuFJq5lHKnnEn/u4zy52aDk456y3HDAv2KUuaUkb8un1G9R+H1eHEcB0WZvnZ6yKIJEi6g\nEzl8fayyUmFk4yAzZph3ZMaMhvUNCc8ODO4jUjm/PcA110DXro0jFFuCAE61bKdk0BRuZatI6kCs\nVnZhoWkh1o4DvnI83ReyZtvxFBYXMmrEMcz473Xs9+8PRTlcdXE1akyu8OC7n8AMjZ4qGV0R++ID\nNBSwX7v0ENalPWPKzZoHnjvB8QIeWHMF0ucFpOtyPln5s2oD/dEcfejRfLzzY6AyKyyW8K5OyEe4\n0Epy40t7rKasmkY2bsw5WJLRNyR8H4+nMgsrPT0qHpdEGiqAG2vY/NqOay6xu3Dqei+awq1sFUkd\nCT7I/PxKF0awk1w4Ho/5UwXHUfA4MOwmnC6LmbJqCTPWzWDuqLnMHTU3or+H1+M1c747FZFDqASD\n7xDovOiJLeiD+/ohFMQPBOxDyilzKfSdDivHmH1cLz1+uI5NugzNml8l0O/BE+qNn+ZNo+/hfcmb\nkUe5Ux46raL8Y/U/6Ht4X8acGJlOFUvIAyGF4P1qIJI/F3+F13SYe2k9a3xPAVSxcgqKCigr6oe7\nZRBl3ReFFEZeVh5ejxfXcfF6vKH4TmNYKuGKKi8vt/rYRgyFVp+OdpMm1R70TbTyrE0A19bxtTGG\nzY/nuOYSuwtS30zAxnYrW0VSRwoLzcMpD8hQr9f893iMvzro7lqzBlavhpUrATxmlsV9ZspfRUMt\n+K6HdGVU71GM6j0qIsA9oWACH2z5wATKM5fiufwXuGsvhdVXBuaLDyinsIwuwCiJ8LG+qnOB9c6H\ntaPNfPPeCooOnWHcaMHj140K7aooAzMHst+/n7bbh/DYwwdQflC/yDKLT8EpyuPardN5ceCL9Dys\nZ0gJZByYEcocExEyDsyIUC7O6t/AfgHMkC/XT34FZ+AzAExfO535o+eHhFNG6QjcGTeBPx3XV07G\n2Z+HqiCYMlSV/HVGyyd7DpZYimru3KrWVXUKLRkd7ZKhPGsM/tdyvmRYafU9LtWynWqjvveisRME\nrCKpIwUFUFFRuRzs1e7zwRNPVKYL/+53JugORsmkpXsYPqwDc/a3CVke0b3Obx90e6jcCXkTWLR1\nUejjnHTdKGZO/TkfrEnHRQDHWBWx3E6ZS2sPyIcrnKwC9naK2n/taOPiWjsaHT2UhSwMxF/uCsRq\nRla61aJiOAsZysLMZ5i+djqPD3+c8e+Ox+/6TU9+12H8u+OZNGySGR5m60mw5gqCCQnicXC6zQtV\no8wpY0LBBEa2f5jSjdls3ZqNx1VcFTyul9KN2XAe5K/LD3XArPjyRJ5ZcCjPHXU7T4y5JK45WOrb\ngo+lqG4flFvVPVWDQov+6KfOWs/MOaWMHJ5h+iLF2Aeqd3kkQ3nWJIDDz1fmN89rQt6EWq2u6uqf\n0WM9Ht9xKD58aS5bO7xIYfExtV5DvNZGlb44CbLeCgsrvRR9+1ZajVB/xdVcLCirSOpIXh6kpVVa\nJEFUzYsD5mUKKhGAnBy46iovpaV/YvjxwynNeJutu7fyj9X/wNl6EvuLTif/J5+ROzYsUB01d0hu\nZi7ZlxMaqNGVssqhVMIQaulXEp39FUvhVNchMt7160ZBUR5lWQuY+fFMyor6oVsGGfdaIKi/Ztsa\nY20V5QVcdUY5dhzwNtszCyPq+/7b5/H+mmPwoPi8QppP8APp6UJGj/WMffsppq2ZZq47TKmVLyhn\nTZ/XmDRsEjM/nsnIniNZv6od4y6qiBgahi71b8FXN9lYtHCKd1KyqbPWc+2FR4G/B+9PL4dX14eU\nSTjRY2ldeWVlvKShE6BVR3Wt3PARFlxcPtjyAYu2LqrR6qrOZVNYXMj4DUNxftsPKcrD7b6If+xY\nwoz82p9LfayNulpv1cX1INJTAZVeivBRBeoaW2ouFpRVJHUkN9c81Px82L7dZGwFLZTly80HEk3n\nzpUpm+np2dx414Gs3rgedgnMfhh10pm+ROh7+HpKM94mo3QEpRuzycvLJc+XS8ELQB7QpZDRj3wG\nRYPZ3vEVZn0isOg2JGshZw5ux8ieI1mzbQ0Lv1wYCoJHsPLqmgP1YATx7q5mWHs3ECc5YCcsus38\nj9VRMnysMI9jLAzXZ/ZtPxv3+bMiMs6cfR159/si3PZulflbth/1QGRdZswFfxvAg4vgYDKVOPhL\nPj7oacZ+9FBEP5lopfbxyo5M2/lLHHUo+LIAZ8GfcMrvjhgahoGRLfjwoWSi+85UabmW5DL6+42Q\ntYBRI44BYOzbY5m2ZlrI2gy65sIVWnXCauacUvD3CCRWKDPnlJJ9YtXzhrs8HEeZMgVmzAgoRiLr\nVF0CRE2t8Lq00oONnnB3bG1WV7TLJn/WlxT4X2Lr7q1m8rguS5AuH+JiXJXxWlbx9vwP1aMOSRrh\nSic6rjd6dKSnAkxmXXBdcJyz+gT36+qmaszkkiBWkdSD8Ac7dSpcf31lT/c5c8yQ88GhU9LSoFOn\nsCHQy5QH/5wJ2g04OxDvECoqlBue+l/cbvNwZ9yExzWt72BrxpfmoKNuxzliMekHp3NjxkuQ/wH4\n01FfOSNHfE72T/cw/t3xMacCpvgUo0TcNEAQx8txX9/P5m5nRo77FXRRefxw4rPQabUZCywq9bjL\nCZ/T/qjdfLp2IE74WGHHzIZN54YE+b/fOjhmxlnRgnIYvcWct88M8793fjUZaF5AEVHS04WD+8/i\nkeILQj31w6/R+/2RiA8cx8GXJizx/D+cwPWVO+XQbS54bwdHUY+f5WlPMPzAjFALPuhyDCY7CILX\n4+Xm3Jt5YtkToX2u7HNlYHKxbMrLu5GePoq+h69n/IaTA1l4xioMpksDoYnJFm1dRPZPjZURLagO\nPGY5ePsbxeqroM8puxgy46JQizmolPLyzDthXKseVIWycpf8fE8g/djUaVQfKCyJsgZqaYXH20qP\nFljR7thYllDQ/bN9e2UfGF+aw7Pf/RZn3mK8Hi8+jw9cQoknQYVcnbVXXZ2CM4CWlRnrYPJkM+hq\n+L2IOf10NQOehisd9/MBUC6oW2mFRHsqoi0SX5pTxU0Xb0ZWrE7C1V1/U6TBW0XSQEpLIzsolpXB\nww9Xjr81fjx8/715oczw52omwwoIxyDicYwS2TLIBJJVqAgbtdVV4PMBaOcFlDvlrF3aAY97QESs\noCBjomnN4eLBQ07nHDq370zR+sNZW3CuEfaBWIQqbJ47iN//+lXe2nsHG3dujGzNu2qGst93WKTb\nat9hMOh+SgDZKXiK7kCCPfS1Atp9E2FhaI//hS8HmmVRYw2FucA8665AnTTUU9VVJ1kL8aS5uH4H\nnw/OvnAH9M7nkeI7KvvXBN10ADPm4jjpiMfhhLNW06bvK6zwLol8YJmFodiQZhUw68elvPWOlz+c\n+ge+3/89q7etZuW2laGMOUXxu34e/vBhwKRnO47DM6uewbP4J2hZL9T1UF5urInyI8pjuhajW775\n6/KZsW5GZQwsbAZM3xVv8/Pvr+HnOdtYKu9Q5piGQVApBd1FV/z9RZ557seABejFlQo+3rGF8vJe\nlS39/Mp+LUHXSoG/5s6k8cRYqhNY0e7YiGOiElXS0uCci7ez6Yg/s/GARQD4XT8jjh1B/yP6xxSa\nNQnK6G2jv99IWVm3UL+bsWMrOw9XjuAbNf00JpswvDGwf0tfRv1hI78a3qOywXHUEnSxi79C8aXB\nqFFeRo2qjJEc3O1z1m4pZuTwDLJ/lk3+rC95btdo/rFjcchNZ9ystY/AXVhcyJAZQ8y74fHhEU/M\nEb3jfXbJwCqSBhIrZhJULI5jlAqY1okIuCoE4wFgBLsInHPhDt7LWs3+b3qh4uIRxecLt0hAj1qC\nI17Sven06Z7JfI+AQpt0MX7aLpGtq0nDJkFJLoMvr4AywSivyvNWVPh55KVVuAM/MZUMdzP5/MZl\ntXk4oBFpxEEUxc2ajy/tbly/F1+ah+G/+YG3+vwPzpaBlXGYn/2nMovs3ccqXWOA6/cZxappVVKZ\n07NW8fgrm1hTeDDbO77CnP13Uf5jeZVYCN5yY9UEFJ66yrp9b4L3sSrPSxA0KjbkqMNDSx7C5/FF\n9OIPVwiqis/jCw3/D5hRnT1/xiNtSU/3mMnGNlTGCoLWDEBGmNWT7k0HiAhQ37vg3pDw8hyxhM8y\nl7LxRz/6Y6RSWr1tNYXFheRm5jJqxDE8u/M0/L3zQwp1ifjwpRWgePD4/GzfU0p5eacIFxKDtppE\nB43dmTRmKz18kqwuhUwomECZU4arboQyCp/KOZroRJUKv8tb2yfjHj09Yr9O7TpFJJ7EKyijt23v\n+ArIHwi+764bvJdCWVmlmyli+ulFEyNGzab4FHTGv9nsT+fBmS6X3jaHXsM+NHMKcQZ8fir+7otY\nn/5bxpw4JtLiO6KcRRvSmXviXLqOKMCZvziiIfHsUx1rHIE7aIW8+/m7ocZEhVsRejdjNQKSFR+r\nDatIGkgwZjJ+fHRvdkP4XBDmhyAeJSPzv/y35DCjCNrAn244nOHfLGPcvcfhx4fHIzzxRLgp7oUu\nEykoKmDXkgt59K6jQqMQT5oUbMlUbRFOfAEcfzCY7YcjVsL2PuB6wVuB020uIcsokMnl+XIobttv\nYc4T4LQJXgmc/FiVmIpkLuXCB6eyY0OvQJbRnxj79haeWXV/5U4Bwd2nUx/W/uwMKBpslMw3x0fO\nwxKVynxFnyvI7r3HuIt+3E+1nTSD/WuqGehSEHweH66aPiaqWunOCz4WKtcJQuf2nel4UEfWf7M+\ndNzvc3/P0uKlLNy6MOJ+adEQbry0P2POOw8yTRykz+F9+HTnp7y56U2mrJqC1+NlxDEj6NSuE30P\n78uabWuMYnIUF5eSH0oq6ysSynKLZuW2lQzNH8qkYZMo3VvKb3r9hhfdF0PPxUXodP1lbF13JP7u\nBcz2pOFLmwt48aU5PLdrNM7qxaHrVJQKpyIi0yoYz5m2ehqdD+7M+lXt+N3FDuXlgjfNj/72T7hd\nloTqF1RGfQ/vGxFTiqay0RW4Lk85TrcPCLfMveLl4LYHM3HRxJjl1CQow7d5PV7m7L8LPetzeOcJ\n875TaZGLR8nL84SODQrtjCg350//+1tK/OmAsdL/9cAAFo8YTMHeifg7L0I7L8ABxs1eBsCabWtY\nvW11SMkGlV10vQHz7QVG4BavsrXDyyG3V2FxYZW+WqH3I/DcYjUCarMKk4XEM+hfvQsXGQY8hmkK\nP6uq90dtPw2YBJwAXMpip5UAACAASURBVKSqr4Vtc4D1gcWtqnpuYP3zwGBgd2Db5aq6tqZ65OTk\n6ErToSNpFBbCwIGxB3WEynGRKiqMZeLxVPZUnjzZpA1PnAh33mnWe71w771w++1Vz3PaacZKAXP8\nffdV3S98/6FDTaaXeCsY8Oe7KCwpxP/FQLxHLsLbdXkoHfmso8+iU7tOAEyZdCg6917MowPTb6UC\nrsgz7qEoBKGtry1zR80FYOBzAys7VAboeVhPNu7cGGlR+NNNbOWsGyDn2dC+aZ40zj7mbD4t/TR2\n4kDIIknDm+Yy4M93GwEfVFJRCq/HYT0Y3G1wSIhv37Odol1FrPtmXewst5DbbAGezGW4uHjFa6yw\nYHA/zLWW1m0Vv8/9vZnJUh18Hh8VTkWVstO96SG/v9fjpXO7zhTtLorY57xjz+OtT9+KiAGF+sgY\nWwOvx1utsglHEE5yfke/st9D1gL+seNKHHVCZYRbYD6Pj6v6XkXfw/tyw5QX8H8xwIzTVjQEnXeP\nUdpSAaffBYPur3KeoLIOulxijVYwddZ6rv3bEkCrxMQ84glZfB7x4PP4uLLPlRzc9mAKthTQNq0t\nPQ/rWUVhTV01NUJ5byrdxD7/Pr7c9WXlu7ZulHEBOj7wuPQY/RTT/noy679dz7TV01izfU1oBtLf\n9PoNO37cwcieI6E4l2tHHhuKLSJ+rvvjV4wa9zWnPX8aftcfqr9XvBHPzCMe2njbhNxPU1dNZdrq\nabRNawuKeV8D75C3+2LILAx5Eqatnsbyr6u2TD14OO6w40LfkQcPZxx5BiN7jqw9MaQeiMgqVc2p\ndb9kKRIR8QKfAmcCJcAK4GJV/ThsnyzgYOAW4M0oRbJHVdvFKPd54O3wfWujMRQJwK23Rs7cF8Tj\ngaefNr/HjTNKIHjbwxVGdErkpEmmYyNUpnZOnAh/+UulwkpLgwULwvpDxOojUVg12FpT4K6wuJC8\n+26n/Nn3wE0PXIX5iHpfMpPy3L9Suq+Ub3/8NuI6BeGkzifRuX1n3tj0RhUhF+wh76hjssDm3RsS\nTt6hf+Wcqzbw333/ZcfeHXxa+qnpYxKeqhxN4CP0dF9EWreVVDjGojjsoMOq1A2Mcgr6l0UEVQ19\n+IKQ5k2jz8/6sHyZp9qJxSLOHbHPGXi6LovMIItBuEIILmtUi/yps5/i+neujxBKl2Zfyv9t/D/K\nnXJzD12nViUSJCiUzzr6LN757B0q3Aq8YuJCBVsKIgSWIHhKBuA8/15kgsW7j4U6rwbvR5VU8zDl\n279zf9Y99Egoqyno/x/79lieWfVMrfelJtI8aaGZRh9c8iCzNlU/B0/w+l11TdbixpHQYyaek54L\nvQuxCLolJ581mRenH8jCpy8MZTv2v/0OJl3zGyavmMyL61+s9rxHH3o0v+r5Kzq06cCGHRt4af1L\ntU/5EFDw0RYzmHfDI54q24LPIai4bjz5Rh758JGQUg/v0FtX4lUkyXRt9Qc2q+oXgQq9DPwSCCkS\nVS0KbKv562smPPCAmblv2jSjAGJZHK4bOZyKiBnRFSJzxjMy4MYbK2MvwaHp8/KMKyyYiRI+mVa8\nkyFF+7GjX7LczFyeGP4E10/34gSfjDj40l02HDgZ/86NMa9f0ZitqPDt5/78XNPaDovHeNJcBgwy\nH/Syr5YZH3XxybUL84DLzAXKAjLXK15O7XJqSGCGE7Ec9T0rSteDu9LW1xaKTq19vLGiIVX2cWNY\natEEBVSwLtGC5aLjL2La6mlVMtLap7cPDaezfc923vz0zVqnEAgKZ3drf8qL8phVVACZ5ryOOjy2\n9DGGHz28yn1wtgysmmAR1nk1NIhn8SmVFiBEPK/lfWZAmYIaazh/1lYK/C9VsS4FQURC1khwnLma\n+kNVuBVc/871rP92fdXMvRgMzBzI4g8d3GD24Zen4f7sP7jVddotPgUtysOfVcC42eN48oonWVrx\nCyo+PxXNms8K7zJOe35ylfsfXefPv/ucB5fEaFlWgyB4PFWVm1e8XNPvGgCmrJpS7Tldddnn38dD\nSx6qkjWYbBdXMhXJEUBx2HIJcHIdjm8rIisxSaP3q2p4s+NvInIXMBe4TVWr5LuKyBhgDEDXrl3r\nWvd6Ez0ZVngHrK1bK1Meg9kjrmviK9mBPmfBY6IDk8Ec9Ntvr6GHcUHiBqQr3ZiNupWjFnuOms+I\na9fwxo+LI/aTklPRLYOR7gvQLh/WWKaiDD9mOJ3adWKKTkFD2VMLWagfwqawnaNjILGEeZAwF5On\n2yrmbJ4Ts0VXG5u/28zm7zZDVnm18RYwH++g01wWLgjfZ35c5zj32HPp1K5TzFY5UG0Ld/ue7aGU\n1Ihx2KgUxgA+j49TjjiF/f795HXP4++vFuKf8W5MhVzulNOpXaeQ7z5EVN+eCIuwKK9yvxkfxEx2\niI5ZuVLBP/57Ge68JVUsGA1kz9F1OaoaSrX+fv/3VaaLDmftN2vjnlqhsKQQ3XJLRP2kaAiSuTzS\n/RruAgv0g6oYPZSHljzE+F//ioItc1n+9XIUYloyQVdT1w5dK91qNeAVL1p8CrrlNLxHLubqc3sZ\nt+LsG0LlC8I1/a7h6RFPU1hcyNTVUyMUmIhJuIlIDInTUk0kqRxs76aqX4nIkcA8EVmvqp8DtwPb\ngXRgKnArcE/0wao6NbCdnJycRr+zEUOoR/VCPucc+PRT2LjRKJPg/CVr1lT2gL3ppkjLxeer7EFb\nXaerjIy6DadQkx81Lw+8Pj+uI+Bx0R6v0ek4D2lr/3975x5eRXXu/8+ayd4Bj1UwakEJBJEq2lQC\nFomUkIpFsag5pb961NOgUmnwSmvLkd4OWg+0tFbqpTZ4vECrrZ5S8Qbe0ACSILeAUdACEgIKFoOo\nFUiy96zfH2vWzJrZs5NAQETm+zzzJHv2zJp12+ud9X7fS8JbdBLvlMCf5pNqsbBfSeF89xycHkFz\n24BuX1g07mqkqHuRmuzujsJrprkw6MUshTIb7vx+dCNCKqYTb/geDUc9mnHZcUccx/Zd21vvEA2X\nRBf15yB6L8CRArFoEhQsQOQvITcnl8vPP4nqLed7PEKbIWlQKpaRfUdSeHwh/1v7v1nVKmEkrARP\nr3s66/UXn3IxE4dM9HYrz6x7hpSTYuW2lTgb/yurQBYItn2yjQmDJ/Db6t/6ajm3/QU7r6S+y0OR\nYXBaExzYLYr/MKzJ0j1aVw06rn9SqmABHxV9RPkZ5Wx7szfv1n2JLqeuYmXOPby/6/2s95sC0hxr\nzUfJzttBOAjhkJMQOL0X4YTVcobzKwivv9bn/4ppi6dxcteT2xwrB4eGnQ2tLubdjuzG4B6DGZl7\nKzdMPY2mZnAWtnBU8bOMG6UylV439zrSMk2unUv5GSruXXF+MZd++dLAy8a5vc9l/sb5WXdmAkFR\n96I2691RHEhB8g6Qb3zu4Z5rF6SU77h/3xZCVAFFwAYp5Vb3kiYhxIMofuUzDXOnICU89ZQfowvU\nrsS0+NKmiWbCrKIilXExiv8wna5++EPlt9IW2nJcKi6GH0xucJ0nLeSzd1B01QaqxpR7DnbsupkZ\nKRvpCFLNAjaWYOcvofCLhdS9V4dEmcyaTmV5R+Qxe83szAoZC4PIaUGWn6N089oT/9nfKzPisNVY\n/deRxoK2aXUBDA1xM8Li6NyjfUFiCCyR/2okl3PRud3oduQOjtr+I26vuIB0i9KPW1ecx/WXnM3s\nNbNJn/gKnLio7c524UiHa565hqsHXM0Pi38YXLwjIBBcfOrF7Ni9g4WbFma9rtuR3aj7Zx33PfG6\na3a9DfKXKMHTaz7YP/EW+Msv6sEnx5fx5FtP4uAw5805nlopgPwlbO65DCHd5Ta8Q/zXFz2LO5GT\nYuiFm1jsmn2LgoUM/VqSRZsWKVPrKJiqwRTwzB8BCXYz94nzuP/Jm2l5cJ4rKEZgXfEiVo8d/g6i\nlR3rlUVXes6jQgjSmwap+eNYYKcp/t6jLD6+JthmXZ727xISkZNSuyUX6z9Yn3UMTITnk0AwtOdQ\n9qT2sGJpkvc2ljC3z2K6dTmK5mYBjuJwpj38Ks8338LgEwdz9wV3U7u0k1IdbulFDcpJ8rE3HguU\n25oQ0Zjw7AQKjy88oOqtAylIlgF9hRC9UQLkP4DL2nOjEKIrsEtK2SSEOBYYAkxzv+supdwq1F6+\nDHj9gNR+P6K01N8pCBEUItnQqVNmoqylS9X9nTr5/EdVlRIi2unq9tvV7iWVaj2xUnscl7rIPlhC\n4ji+02Nxmc+pzEjV4YjdIBKeeseRDm/88w2klFiWxV0j74LNxcye10j/wTuZ8Oxl0Z73xsIgU9JX\noZgOjPWlWPlLvZD2tmXz5d49WGWYEMuClwkTII50/EUg9CY79Oe3Ui1u9972LWHxo7N/5C9ErxyH\nk7pQ+bqkFX9we/VvMnPGtBNpmaZyRSWdcjpx6Zcv5S+v/0UZxFlWBoEukcxbN4+Tup6UtTyB4OPm\nj7nmj38KEuT6DT0UnHOl/SFf+PgLAZWOI51IYZKWaf+8qe6y0rDuAi+awY9vfZdf3/wrZqyYwXVz\nryPlpFjcYHFM52No3N0YXXGdrkCnO5CqNaQE6VWXkT56c0BQOBuHYvWo8SyjRMFCsNPItPCjYG8e\njKj/Oku2n0bhx4/Qqc8Savgdsr4EnaNHOi0senMt1vHh+gTbV1BaxZCLNvDwB23vNE2EyXKdqE5H\nzk4/9AsvDtySKx4C+0rVRleFuGrbKlZtW0XinRKsP71EqsXmwTtVVIvmExYEniWR0ULEyJAqdx8b\nSLdwoHDABImUMiWEuA54DiXmH5BSviGEuBVYLqV8UgjxVeBxoCtwoRDiFinl6UA/oNIl4S0UR6JZ\nuoeFEMehlPergIoD1Yb9hdZI9CjYNpx2GrzySqY5sVaFaf6jtDSY/tdx/Pwoe/YoT9uMqLGba2j4\nsMELRZHNcam0VDk7KlWZyFCVNeY9DedvhLXfgn6zlSWP6+jm4CCkoHZpJ2beVEhzM8x/MIVzxu+Q\nZ8zEyl/KKbuvQNSX8tYXZpAuWBDNSxjnrN6LuHfUvRQeX6hs/htHccOU05QXvuWoHYyhtoEIfXHo\nTXbPhsHIPr4F1bgB4+iS28UTslavlxA5P0W2OF692kPwtgaJpCnVpN4uXSFyzwX3ADD+mfGBXUpT\nuoldLbtaLevhuodh483ZOSXDAXNtFg1hNuLeka5zpiGQxIcFyJXfA2wEaT76IIepi6bS8GGDJwzT\nMp1diGj0nwlbi+CdM/ESsWEpjmLk9aH5sMAzzX3sjcdoQbqhItQ4896X4dnfI1NJFs7XmUFHwJgF\nGZyPLHgpw1Q3LHDr85dwRM5prZL+UbjolIuYt36eZw5d3KOYhQ0LlRHKonMCY7Rq42YoPyeS52l5\n+2xEM0gH0lLChrMhJEgiEZEhNZxu4UDggHIkUsq5wNzQuV8Y/y9DqbzC91UDmSFP1Xfn7Odqfiow\neY3CwmDQR73wa/+SCy9UqixtnRUWJratBJLO4X322UrogB+KpaVF/X3wwWAWvUDgOUtZg4STR5l1\nbi3yaF7jKHi2j2cJY3Vby48uGeK9zSftJNuqR7Bnj7u7Stuw/GpYVY71zR/x9nN30dJi4YjRMOZc\n7CvOc8n3l8ktWM31Z13PU8dez+71Z9F/8E5Gnnwvjc8XQilMGlrM1KmQ0py6bFHWRS70rsWD8ZZm\nJ9I4KUEyaTH23/tQ94bvKKb10Z5TWq9lyDEjcN4+u91cSFuLj+cIqB0gpaB2ay09j+7JpV++NMNM\ndNOHm9p8JtkEcTuRtb4uIa7VgHav5QxouZalq5pAJpCWItLlS9WK+FX+flnbnEFoWymwU5AWeNyE\nY6vcPd7CvsA1rYZH33iUtJOG1d9V5shY6vrasS6/4aqmTIE69FcZVmfNmyKI+lDEgzXb1wTrHtE3\nZhm5di7djuzm+fc40uHdj9/1r4/i/bJF4C6oUmGDZEL5b2ljDveZomAhX+jzOh81h/TYnorOz5Bq\npls4UPgsk+2fW4SJeL1Tqa1VpsNPPAFz56r8Jo2N8MYb8Je/KIFiWfCd7yhyPixkLAsuuED9r3PI\nt7So8kH9beiyznvbxoGeR/dUDkxZgse1Fnm0cW0hllS5QYQjGHfMw/z63F6UnVLm7Riuv7WboaJT\nYVqEk8uAD/6HFS02ThqlGqsfBiW/4eqLv0zPo79JacFvYEsxXY6G0pvU3WHTZq0ybGqWOJbasVhu\nGBMtRLRfhPzTCzgtbkTiCybw/VNvprysF8XFhRQOnO95NWvjA+0d3PBhA5XpSjjR5yi0lZQZLgVQ\nQSM3ncuQkhYWOdMiHcaqXmni+fnN0Hm7l3RM5i/1ogXr8mxhc8qxp7B2+9p2vRH3/+puXhMjPAsg\nu2etZxK9L7CwkFtUeBCtLjv1pus5pf8OnnjrThjzaiaRLl1foYiFt0/XPpz0yeU8P3NiiNAGceJK\nhn65DzUvd6WlRako7d6LyOlVS0v+UiVbGs6C+mHIgoVqGtVe6ZYhFVeztcj/DJkhffKX+HxYK0R9\nNvQ9pi/rdqzzPostZ3t9YyXSXPQ/v2fiJUOp+2edUgciSdpJzmIC6xdt8QVWO3g/Xd8Mk2uj3tJu\n5qOoepvCChthOeQmrQOexyQWJAcZ5kI9frxv8tvcrBwVhw2Dxx4Lhlp59FGfuDfhOEqA2Lb/nePA\nzp3+IpyTuBy7/AE48RVPpTVjhnKUTKfVLqg9qVxBCT9bx/vKtSkv66Xa5PqpjP+vTbSktHbSzyaZ\nTNqMvbwrdctcIUAa8WEvxJazYQCByK1acIwZk2na7JtCC/L6baAx75s0fFjIfSvvCyziJ+2aQWUq\n4fMcn3SBob+iuPher75AICTF5YWX8+dv/ZkZK2ZkLIoSSY7Ioah7kQrwKB3YPBhr1stIJ5eaRWms\n7y5G5leTa+d6oUdmzKnj+V/0CagdsJtxxgzP8GmQSEp6lrCucV3AlFmgfC4uPOVCvpT3Jao2VlG7\nrZa69+rI6WVz1cWFlJ/xa+r+WefxFZawMoSSdsCMChdjCTVebCwNqGL+seIE3uz8YMDiTl/v7f4E\n9Mvrl+Evsv6D9WxY1BwktJHq/61fZdkOwQ9u2cDt8x8i3Ws+Ob1WcuPgG7mj5g5aNp3pmRpLu5mT\nz6lmvZHDhu618O6Z/udTn0CcuJyTB77DuiP8fv3KF7/C6vdWZ6g3rU3DuejcbsqooWEhYUgk63as\nU17lx53KjWfdSO1j51HpdEJKC5HOYVDLROpW1HHNPQ2kO41B7D6eoYVF/PXWi6BFxT0795Zf8fzu\npgDvJ+q/jui51IvzptVtCStBuufS4LxoxcDAE3SBDKmN9DtyKDde0p/i4kgFz35DLEg+w9iyBR4O\nuRVI6YdHiYLKER88Z1qNgc3VXWbS8+uPeAv2tdf6Ze7Zo4SK47SeiKemRu2KtNPl9Onq/NSpfmC/\nB3ZOQlpzQSbIzbW48b/rvYio48oKXRWf4P4HkqRWXk1qVTkz5Ahmrh7OmI/W0tzcyxMcEG3a7Avi\nQqCQms01gai6k0snw8m9uG96M2mD54D+gfboDIsaD9c9zIlHnUiX3C6Rb9gtTgsfN39MwkqohW7B\nfyNTSaRUwQHFxqHYPZcw/fzpnqC6//ENkO6HqXbI5iNjC5ui7kWIVcL7fNPZN9Elt0vAXHvqoqms\n2LpCBWBMS1ZuXemVYRoElPQsYcOODTSnm7GExcDuAyntXcrvl/iBLTUx7EhHCYaCl8D+qacucwpe\nCvSDQPDjIT+mT9c+XPPMNR5pb765m5CaYHcJ7eP7vMv29b2RjgpauGrjZhg6FWSalrTF39f8XS2s\n9cMCC+iGHevJSZaQakmB1QxF98N7X/HVekN+g8xfwjrw/Dr6d+vPyJNHct3c62gJcSY/uuxMyoZ8\nk6r6KgbnD6ZqYxXNTrMXZ81z+MNhXaPbtoIFJJOXk2pR8zEvD8Zf8iWc5smAjRRpnn/Z8YSGk5Ls\nfLM/FEwLPPvHlw+iy8m3eRylNuHudmQ31ry/JmitV1ClLBpTmerLQJ8bQn4N93HD67kUDtx37/b2\nIBYknyGUlytOw8yuuC8I71RWrPAdIZNJXJWOCs419c9B9Zi2KtOkfTanRi2cHEfdU1trJu+CMbev\nU+axrj/GyPOP5q6myV5E1MKB8ykuVrGYnLSFdACZwNk4lOb8Je4Ptdwrr7wcis7zU9CG37B81VxE\n0Lp8+MOjb3HNPY+R7jWf3IJays/4XZv9+Pc1f2fWv8+iU04n9mwsQtYPU7pq90f65vtvugmOXiTd\nkqOIX+Fbj0kpadzlE84nFP4jaKkUEVEZlND45pe+Se1WFf9Jo0tuFyYNnUTN5hovqGE4O+HSd5ey\n9N2lJKxEwJjiqE5HKdXZ5rNI15eyrPdCVm77nbeTsLA4t/e59E+N53d/WYnT88VAyH0KqsjpuYK0\nYwUiG3+0R+noc6wc5TwoZUaMNQ8hdc37wka+/bwad6uF405/A+tDV5BhWNpp/sflFmS3FTj9/wwb\nS/zx0BGmQzyWg0P9znoaPmzgufXPcfcFdzN7zWxeEN9AbizB6r2Ij44rZPismRmm8DovyX0r7/N2\nCiknxXVzr1NWg+UPcOEnf6HbF7ozb9E2nJZjCbwkyJQyAqEF7BZOKPwHnZtWsWfMNxCbSvnRZV/l\n11eWYZIXeiep47WZRgGJXiso/tktLF6UUAEfo8LURODTCCcfC5LPEIqLVRiUadNUkqz9BceBK6+M\n/q60NBhy5Yc/VNyMXsA1qR9Wc5kmzcmkOmeqnqgfRvKoJM09l5HsvZpup46heWWmuXEUz5G0k5SP\n6kt5/2Do8gkvDQ8KIiM5UJA/KWbS0OCPZlxZIYUD/0VV/RGUFtye8aMqP6OcGStmBBbBb532LRUJ\n93Q3KnOLjbT2eDp1iST99lBI5ag3T1Jw0otQegsi/1WSdqeANdzES4by5FsjVM6Zzu8jdh/PN4Yn\neDm1grT042HNWz+Pp956KpDkyVNDuia22lltfvl8LzvhC2+/4C0qKSfF9wd+n55H9yTviDyunXut\nil1m6NhTY4Zj91yG7aYmGP2F3zLhskKc5oux7Z9B+XDS+a94RPKNg3/AI689wpaPtyCRpDadyR8X\ndMXu/SdkfotHMOuQJ5awcBwn0KdWz6VIl6twIECoP/bh8oDg9JBfgzXyhzhPu1F8592Fc8U5avfi\nXZOFtHahI/E27mp0E3ANpzn/VTcSb2GkKbw+iroXeX2uw+870kE6KZ7527GkUxJJV2U44O761UuC\nnwgucVI1Ey/5FRPx+bjGXWup2fzFQIw706s95aQYN2AcoCIbPPPSDha9bWOftICKi86gqPsV1G6t\n9RJw6cCrQCBE0KcRTj4WJJ8xFBfD44+rzIuzZ0P//srB8L772ud/EoVkEo46Cu64Q5XxwAPBHN+m\nZRb4Do1FRcFdRpg7Cd9nJlAqL+tFeY9gwiBT5ZTXOMoTUGGeo7TgN95OQguvqYui/V5qNtcw+aEm\nmpqH4aRF5C7K3K1MGloceKM3w2+/ctUr3Pzizbz9wdtc9pXLKDulTJm11lyGk0ogHbBEJ07Y8V3e\n67VCvZX2WYxYLGlpkThWM9bXbyOn10qu6v/9SGu4nF7LaPa8/wWLnE7cfcHdXuTWqvoqP/KvA1cP\nuJqeR/f0+tBcaJrSTVTVVzFp6CQml06malOVp57TPIcu03GcaB17z2We5d6su49iT5ODdCxsklx9\nzJ9hoIryW9S9iOvnXe+r/wziN203Y19xHlaPaiU0pHq+NsG2hc2QnkO8yL0Tnp2gcq9sPssPbdNz\nKWlJ1t2Ms7U/OpsoaQtr9RjsXsvbDIVjBjTUC2o41Hp4boYX3XEDx/km50fk+VlI64eRarGQjgBh\nqYyiRzcoayzXkELvGr556sXU/bOOxl2N5B2Rxw3zbvCed+fIO2nc1UjDhw1qnAwUdS9i3MBxjL93\nFi0PfhvSSVILmqHobxSe0ZfGXY3e/eHAq9MWT+Pdj99l7ICxh3SsrRgdgI7ZpVFU5EcO1h7v2mRY\nCF89ZVnqvGXBkCHKH6WoiAAP0tyMm+M7GNgx/Gavr02n1Y4lijsxF+ywqXBNTTG8Ugw5ruBxf7x5\njaPcFLV+WSoMfiFZrL6zpkQdPms4Tc4AHOt5LDpn+LtkRFR+pI4JbwynqX4A1qbd3HPNkYwrU88s\nzi9mwZXKVj9gJr3zOXIS85FYONYe3s17hBwhuLroasqvLIcrbGXO/a+P6DZgHOWjfhP5w62qrwq8\ncUuk95ZsJnIy22kKo6mLpgYWGlvY5B2R5wnFu0be5UUNTss0M1bOYObqmUw/fzq5ObnsKViItJvB\nwTUpDfrEPLBzjMdp5SQsVwV6r/dsHV0ZyBBKF+bezq6Tfu7lbHek45VtY3N+n/O9NhYeX8i0Rxcx\nZ+Z1nuXUudfNZf7rq5QnfmhnYQlL+ScZ576W921O+9f5bDvuUbqdupFtn2zjiTefiLSUU2//jZ5V\nnl7QzYW3rcyOVVXFlJYWUzxQnbtu7nWkTBNdHRYmYlckkcx5cw5z3pwDm4tVNIaCIshfQlO6iWue\nuUb1k2WTsBNeEisppeeVHuaJtr1xKsN3lAbUcWxR+YfUDh6e2/Aczelm6p6tO6Q922PsR4wb5ye5\nysuL3ink5SlyXjsyLlsGv/qVuifKsTH89m6S8mGCuz3cSbb4Yn4d1Y+xau3eB5eMStijs9k5PRZj\njRnBudZtTL6itNVAlrPnNSrB89DzOOkk1y2UFL6c+XzT858TX+Hq3z3M26t68qLzM5wei0k7tm86\nvUXvxrqRnF1OkQ1VEVZvYT7DfEturZ3m/TnvDqVlw9lYvRfxg0vO9tLzJu0kY84YE2iDqc7xhPio\nDdz/+AaWJn4dWPSq6qsCnNaVo0+huLg88OyEbcRZO6kaXoFUS5pk0mLi5YOgh8rZrtunkWPleIJf\nt2tQy0SedE3HaAHyYAAAIABJREFUSdnMv+vbOPLfwfqJSq6Wv9QTBrVba9l2xE6eWS1JtQgsW7L4\npaN55cVjyE1OZP584Iwanlv/XMDIwuy7GXPquPaX/0e653xkfnVGrpBsmR3DcfKuugq29dmp+KYe\n1Ygx33B9n17yrdhcK8WM3dXmwTDzRRXSx/6ppyL1CH0HLvzShTz5jye9c3rXWV42ift/n6KlJUUi\nIeh2+ps0bzdSNz+9jpk3FQc4yk8z5W4sSA4hhJ0ag2//wXzYoCywJkyAE07w0wGbRLxlBQM7hnmP\n8nJ1RAmvbHbpWo3U0JB9NzN9usuLNAHCYemOZ6nZ3LXNiR7+sQd2KQUrmVyeS3F+8J5wm0aPzOOl\nP5yD477dpVMyUpCFd0Dlo/rCKFg0ayXNaTsgAExhFbVz09eUliq+RYeL6XLy2sg34KhFraYGpt1T\nQOqxFyBtkbMYPhrwcGCxALWbUYYBJYjeC0kW1FJaUErdiiOpmlfM6JEw/bYvUjpzJS1pZQIccMJ0\nOa3yUfMz6lQ1psqLs1Z+VTl1F6wNGT8Ue3yN3pkIBFf2VwSdGdtt+umvkmMX0uxuM9Jp1xRYJhD1\n55Dbe7UnRDwO4Iq5XJyYxlNLV5FefhVIwZ49klmzBPfem10A19TAdf9xqkpra98Mrrl1WwtsWG2a\nTkNlpUTa18GYxyF/CcmCFYz8xvHMeQGVY6egCpn/Kt8f+H1AcRueqnJ1ue806aoW7Z7LvCRoAYdO\n7XjY+xV3ntUgxkxCbBiC6LOYokGXkXzWyLhYPyyao/y0Uu5KKT/3x8CBA+XnHVOmSCmEVnhFHwUF\nwWtsW8rKSnVvdbUqp7JSykGDpCwr889pVFcHrw2julrKzp1VucmklLm56v+cHCkty3/mlCnqOXZO\nWiJSkpxPZHLcMFndkKVg8xkN1XLKwineteHP2eoVaOPjr8lEbrO0bEd27txKeyLKrm6olhV/mCkr\nJtZ795ntDre1oiK6T1p7brjuFRVSJpJpCSmp4sFIadmOrJhYLzvf1lnat9iy822dZXVDtdc2YaVl\nIrdZVj7+mqx8/DVJ4hOJaJEkPpGVj7+WtW2t9aXZj9UN1RnPNssJfzdl4RRp32JLJiPtW2w5ZeEU\nWVEhvfaovymZyG2RFX+YKSuXV8rOt3WWYrKQTMa7b8SsEdL63hCJvdu9x5G5udF9qZ9bMbFeWrb7\nHNEsGX6ztG6xMuodvrfzbZ3VsxKfSCEc/7ckmiVn/kEyfJIs+82vVf/muP2b84lMXF0SKLdyeaW0\nv/e1QJ2xd0vGFsvcX+bKiS9MlIlbE9K6xZLJXyZl4uoSr7yc3CZZWSnliHEvq7oY/WeOlzkH9dxq\nz2+jLaDCWbW5xsY7ks8JSkv9XQcEIwdr1Nf7HAqot5drlHqWZFLFALv9dp/UnzcP7rzTV5tFOSma\nHvFBfxW4+mro2TN6NzNrFqTTQlk7pRO0bBjS5vY7W8TiNncyIZVb49pC7r4TajdsUqalPfoCbW/7\na2pg1qxiHnywWAXFvMvnisxYamZbwe8T06m0NZWeGe3Aj8umQ4hIII2d42QaNGwpZva9kG5RMZqc\nlEXjWkUSk+qn9Ospyex5jYwrCxKzugzNY4RTDISjTI+a8AHNX4hWnWRTz4U5rjovurk7IU99krE3\nfMy948s9taXpQJm0k4w+bTSLGiawu+ghFW4Hm1QqwsAixHElEvNploprufSs8zn960eR1ziKqj8X\nUhcxt7VqU6tNz9x+J6ufHUBLSjnQ6hAvTy1y4Ds7sGRnL8LD2K6zKM7v5dVl3MBx1B5zHpUyiUQo\nT/yihyC/hpRjs2rrKp9XctKc8tH3WOPumFNNKa651kHKYYoHHDOCZMHKjB1GdDijtn8b+wuxIPmc\nQEcCnqU0DxQVKSERtvQKcyX6+z174Le/DX7f1KTKkNL3F0kk/JArs2YpvxedQ0WrrEzVGKjrr78e\nVq2C0aPVuQcewF0/lHNaos9iSgumtpoq2Azvkk0tkS3Ui/7O9/BPI8vHkN7+CjNnZYbRDwut6ae/\nyoTLCv24YQSFQTa1I/jWbLat+lD3V5R60KyjOT7u6IHdjD1gFnf/pNhTJ4UXeh1KRz8j7708nn+w\nGVoAIenf29f/RQlnIONcVVVxIMr0U9PPx77qa4EICSaisnCGhUtVozJ2ko5KnpaTX0v5KJWx0VQt\n2pbNVf2vChgeXFP/J9KryiGdwLItGhpsamoyBYHmuIZ861Ve+evZSMfm73cMo6TXMCZMwBvPcFTt\nsNp0+i+aYIuyLly6ZidPPHIcUtqkW1p46q2nSCSuIoVNMulHeDBRXtaLmXfpuSeRA/5KWptdnzaa\nRQ2LvP7+0plbWTPbdVoU0nvhsuiseMDy3MgxKi4u3udEdh1FLEg+R4iKi1VRkbkziYJWeIVhLmT6\nTXraNHjuOTIW1cZG/61o504YOxbeessv27Jg0SIV7kSVKxBC8tVRa5j+s6kZYVE0v9BaeBcT0QS/\nL1TMHZMjgQ1DkCcsiBRK5kLUVD+A3zzxbzQ1+e0VIrswyGbNltevjtqttVA/zLWIyrzXrKOb9NCF\noKDvLo7v9xalxcNpXNuHmi9mGkpoIXLuuTB5slsXCtlw6wZ++7MCpExw1619KBvm9onRTt0PDR82\nKPNc16qsqr6K0tLiQJRp6VhcZURIaM+bbwbHVQqdcpVXu50jufua/0dxvm9Bl43zqF3aCWdjifLR\n2DYQZ/X3uO++oBViQBC98zUWPzbYq3tTk4ppt3u3X7dwVG3z+X4MNpg0qZiamm48838ttDSrSAlO\nt+WcccJABnQfEAiQGp4T/o7Bhh5Tg21zUy2MHplH4cB/MXfdBSq1b+ftKiZXOkEiYTF6YClVf4aG\nLrMC4zbr6XVU7SxuM6zRAUN79F+H+nE4cCTZUF0t5Wmntc6d7O0xaJDPA+jD1FNXVma/N8wbmFzB\nlCnqnMmlhM9VTKzPqvc1r7UsKROJkM7Y0CPndkrJ5LhhkTp+KSN05JbSkQuh7o/ikHR/m3yM5jjK\n/nNrq88z7zc5lURCPTORUH2s+92ygn0XpSPP1jdCqDqZ7dT1qlxeKZO/THq8RO4vc726VlaqepjP\nrqyUcsQI9be9CHAtbfBuUfcmkinFF9m7pf3VSonwx0aXNWWK4sI8jsQKzsFEInNuJhIRvGAWLqjy\n8ddkzjd+LsWF4ySJT9rk29rqgyh+Y8SsEdK6xZKMHSzF8J/Ish89Ezl/k+OGydxOKWlZiqPbm7Fo\nC7STIznoi/yncRzOgkRKNTGTSf8HY1mZgiB8WJa/gJWUBL+7/HI1YfVnc2GSUi0s2crNzVUTvaxM\nCaTKSuOHXxnxg2pjgQy3MxvpPWWKf41JGLdKLjdUK5LT9hcq3XfhRTyq/pWVZr9rgnWwR5Zmg7k4\n67IrKnxBYC6Iui0VFapPKyqyCzhzDuhxMBfcMCkuJgtZ8VRFRjm6rWVlwfq0ZwFrz3i2Ni6KoPf7\ntG+/jzLmZrY5pBfasrLMvgQ1z70x1UT9UxUZRgLZ5oc5z1prf0VF0OjCHNvAXA0JsYqJ9ZEvVWFB\nGSUQ9xWxIIkFSQB6Ausj6ocUJUiSSXV92NqrpET9jVpUwwuMKXDKyoILmn7T1m/gUYvh3ry16msn\nTvTf5s23tPZYnoV3FHphsm2/HyzLX+yzCa8RI8KWdCkphv+k3TuSqB1HeEdSWRkU6npnGCVczHHU\nOzbLdjzrLimj38Cz9Ul4fE8+uW0LuPBiGF54s+0A9P1l/7k18Mzjj8+sg7krHTEic+ej6x+2chTC\n7dPHX/PqkPxlUub+Mjf7zrUNwRhlWWU+V/8mspXRlmWWroM5ByyrbYHWXsSCJBYkWRFeHKMWfL1g\nCaGERrYdTEmJ/2ZrLt6WpcyNS0r8c1FCST8j48dcuXcqD90u/bacmxssUwspsy5R5s3hHYUur6Ii\nUx2i33DNxd1UpwV3JMqEt+IPM1s1x4xS70W1z9yJhMekoiL4XHMHYgo9ra5DNMucb/w80qQ6avEy\n6xi144xaTJPjhkkx/Ccy5+LxMrdTKuvCW/GHmVIM/0lg52YKl+S4YTKRTHu75XA9Jk6MFrhRY613\nBuGXpBHjXg7sQiqeqmi/WXRox5ttRxE1Nu2Z71FC3fztZWvvvqK9giQm2w9DhM1VzfAptg033aSI\nau3AuHBh9rIWLlQkd0so5JGU8M47KvTJq6+qc0IoazLTTDmRUOSwfpaU2cOxtAaTaJcyaH0mpTpv\nBsJsbs5MQ9yaY+GYMZkWb/qztsaKIvh1NkyA8nIr4C2u621aeDU0BCM1m2R+lDGFLtvEtm2Z42Ea\nQ+jsnE89k1ZpXO0WnF4vUVXf2SfEtxQrUtdwLNV9Bn4dhYCuXWH7dvW5qcnvV922pTs+oPmBuSpO\nlN3MqAnPMeiYCyJNyR/84eXIJgn2T7GvusCLFWZaYF14w7Pseu0CjjgCnnrKv7+kBH79aygrUybY\ny5Zlj8Sg+7K8PGh9aNtwxK5Tsd/xLdK0tVhNDX4IEoLWgVEhhsLe5RQswLbLMywpUyk1NpMm0SZa\nix5x993tyyN0IBALksMUUal/wQ/k+NFHKh6XlG2XFV60NNJpZR2jF3f9g7nrLnX+hBNg4kR17bRp\n8OSTvjAxE3y15W8R5cPSXphlmF7w4ZAwoBYZs3zL8hOB1daqc4WF0QtWtmeb4Te0abBtKx8cPRat\nmTSXlwcDegoB3boFhTUEhZI2R5YI+MJWKHyE3IKVKitlRL200LDt4IKr6zhrFvzxj9nbhjhfmbK6\nicW6Wad7Y6b7CNTnVIuNSnoouKrLTM8fw7TAmjfrPFItqg6W5bf91VfVc0GZmuu5m5OTPRKDKVCm\nTVOC6clHupFIzufq3z1MUfciz9dE+weFzbj1i05GiKGQd3n5qL7wWmZftVa/1hB+XnuF0QFBe7Yt\nh/oRq7b2HmGdfFte81FH2DomkVDqpbB3d5gINg+TOGzN0sVUq4XVT/qZZlvCqh6tdjPVWWGVTlgt\nN2hQZl10ORMntm3NFLak0mWHSfRs3vCtqTV0hIL+/X2jhvAz3b2bBEdOnLo+sl7ayi5M+IcNGJLJ\noMowbEGnoxjYOWlP/ZSNBwqrFk3DiDDHMmhQZr9ls1Bra76HeQbTutBUYUaNVVT9oww6ws+Jql97\nOcE2+Zl2ltMaiDmSWJB0FGGd/MSJQa5E8yfZBInJtegfnbkQa1KwtfAugwb5dTF/NOai5hHHli+8\nwqaQUfxCeFHV/EyU4NKfTcGo+YDWOANo3VRY6+hNowO9iIaJ2dYWrcCiW52dJ9AmvOE6jhgRrJdp\n5WT2YVZSOKKvogR9mFcyBZXJMUQJ+agXiI5a+mmE52AiEZxjpsVea6Fu2rN4m2bUuZ1SAd5sb+ue\n7Xn70gdRaK8giVVbMbIiSi1TVhZUg4FSYZjOXRpSKtWDVgWE88w7juJoCgszVTEaffuqxFqmrn7P\nHqXjj1JD2bZyhAR1TW0tnsdzlIrJVFdJqcrWKpcodZLJk7S0BDkDx4lWBc6ZA3PnBnPAmH0Eqg06\nHI2pqjO/N9VTrak1pk71Pdx1nTW/MXOmnx7ZbEv//sEEZtOn+2kLrr1WXTNuXFQYDgWTJ5g6VY3r\nmDFqDLp1U6pS7RWv+12IoLrMTCkwdarfPhUsUdV9+nRVLvh9GY4kUFWVyVW1hby84Nj94Adqrs+c\nGexL21YqLp2zJzyerakyNXQk71lzNvHAzjHcZ0RXMCMImA6S2ZDteeH50Z4I2x3BARUkQojzgd8D\nNvC/Uspfhb4vAaYDXwH+Q0r5N+O7NFDnfmyQUl7knu8N/BXIA1YA35VSRixBMQ4EoiauSdzPm6e4\nDjM/ytixmTlRQC0kjY3B8C7btsEbb8A6NwX1ww+rMnJyfH24lJlxwMz4VkVFZowqtQhdfLHiY8Jh\nVwYPDhoTSKm88sOhw3UU5DCJf//9fviYqPhmGlE5YGbNUsJIStWu2loVmwyCfI1ZB13/cFRjU8e+\nc2dm+mQhVN9q73fbVgvlrl1KiJhZMefPV/2q+zqVUkJF8z9tcT5mxAM9ByzL/1/zSo2N6gVBczxa\n2Om5lEz6ZckIIwz9IhMmu81sn2EeJhvf1NjoC1ch1DX/+Aecdx68+y4sX+4LwjvuyKxDNqj4bOr/\noqJQpIXUI6RffoX0sivZvfbbTNv5ASP7Bl8A8vJaLz8bWpsfBwTt2bbsy4ESHhuAk4AksBo4LXRN\nAUqIzAK+HfruX1nKfQwldAD+CIxvqy6xauvThcklmOqYysqgCihsKtoaVwJSHndcUP1QUuL7xWgT\nXe3oGHW/6WNhqn7CnMqIEdEqr4kTW1fjhVV+/fpFczZaFWG2VZtim2qoKHVcNlNTsw/DarawSias\nsoriFNryTYh69pQpbTu6hj2vTTVPWGWkx7S1KNJmfTL9doL+Nm3xTVG+JVrN1VYdso1FeD4Hxreh\nWuZcPF56EYFx5BmDdkpEOqPP2zPu+reQTTW7L+BgcySocKrPGZ8nAZOyXPtQewQJKgTq+0BO1DOy\nHbEg+XTRli49POE12hMKv7WjLSdL8PXx2fxnshH3lhUkdls7NJEfJRS1kAkLuyjuSMrsC21OTpBE\n130bFVXAdELs1y9aYIW93qurfU4sfG22BTksfLKNUZjnMT3OTV4nvIhG8SHhcqKeqTm0bHyTRmVl\ndBlauLZm2BDlw5FtPpvPHlSyQ4ZD6WPvlohmmdspFWnM0ZaxivnC9HkQJN9GqbP05+8Cd2e5NkqQ\npIDlwBKgzD13LLDeuCYfeD1LmePc+5f37Nlz33syxj5hXyZxWzuS/XHYtu+0pn/k2vu9rMz32A8b\nCoQXe/Po0SNo0aMFZUfqefnlmREITGsh89ChVKLeqG07aLEWbk9ZmRqnsrLgvf36Ze5eop4Rtjoy\nd51RoXi0FV3YSEHHLzOvbW9OnPCOSkdl0M8zdxH6mmzxyKL611yYtaOrbft9km3n1taORPdX8Hkp\nyZl/kGL4T2TFH2ZGts8sq6IicyemxyQm2xV6SSnfEUKcBLwkhKgDPmzvzVLKGcAMgDPPPFMeoDrG\nyIL2kI5R92iuZM0aWLx47/1CTNg2XHih0nWvWaPOpdNKx/2d78Ajj6hz+if4zDNBnxgZmjXmZ53X\nJZmEn/88yNFo3woTxx3nO+21Bw8/nMm5aK6oqSl47ezZSvcejk6seSFQPNXatZntmTNHcVo5OerQ\n7Tev1VxW2ABAl3H//er/oiLF85hcQ1QagzlzlL/GTTcF9fjdugWNAJ54QkWZNrNMRnEGYT7ATD0d\n5tBsW/Fie/ZAXZ1fbmlpJuEO6vo771Rzc/x4v+81p9XYmMmb5eX5XMxdd/k+RkcdFUylMH684q38\nNksQDqL7Kjqd9ScvS2VeXrBPdR0dR/V9KhWsdzKpytX80qFOtr+D2jFo9HDPtQtSynfcv28LIaqA\nImA20EUIkSOlTO1tmTE++wh77k6eDM8/73+vPe9XrVIk8ZIl2T3vHQcGDVKLaUmJT/SnUkqImD8+\nbR3UHmgB1a1b0HJIGwvMmqUW1dxcN/x9DuzYsdddEahfTg7cc496zs03B9u8Z48i2E0IodquSeh/\n/CP7cxxH9UleXrSwkxKWLoWRI/0FW98npRI+2snOXOha608t0E1vbFDGCCbBblqbmblWcnN9o4Xo\npE5BaMuunTuV4yGoNiUSvvFB//6ZwltKJagLC6PbUVqqxkb3iZTKIMCygpZo4BtwVFX5/RaEAJlD\nzvN3c/3ZE6n6cx/PEVJfGxZ0ZhlCwNChcMwxSlDra/fV4XGv0J5ty74cKCH1NtAbn2w/Pcu1D2Go\ntoCuQK77/7HAOlyiHvg/gmT7NW3VJeZIDl1k82kwoaPlTpwYrSvW15h+DPuqcjLjaYX9G8LBKMvK\n1NGv374/Tz8zHAm4PVyNSaa3RYK359DcjzZqMOOZ7csRCGdv8CAmB5NMRqctEMIPyGjOlbaMElqL\nTG2WHf5sxk4zHS+rq6ONO8LGJu0dM31oHmtvxy3KKbesbN9/fxxsjkTVgQuAf6Cst37qnrsVuMj9\n/6vAFuAToBF4wz1/Nsr0d7X7d6xR5knAUmC9K1Ry26pHLEgObewN39IamW8uVlFcjCbUs5HqQqjv\noiy69nahMJ/ZnuvCud+jnArD5WqCWJPUemEK69P3duHXTpS2LWWXLpnX6MV/bwwTopwK9bOy9ZHJ\ndWhBrvku0yjBNC4IW95FLbyDBmU63+rz2lCiNV4qLBDCLzhRY9W//96NQ7ZnRp1vj2d/NrRXkBxQ\njkRKOReYGzr3C+P/ZSj1VPi+aiByMymlfBsYtH9rGuOzjL3hW1q7Niq+2LZtyifFTBcMShVgOkhq\ndcrYsUq3Hla/QHanymzIyYFRoxQXoN6RopFIqL9mLLGBA5VqxoRWy2gu5cEHlepD+1R06RLOA9/6\nc8OQUqnJTJ7AVKmZKr+PP1a+F2b5OnPjEUf4bU6llOrIdDadNQvuvdeNvZXyOYgePVQgUF2m7vuq\nKtUXuk2pVNBfSaOpSfVBZaV6Zv/+yrFwwYIgJzRggLou3Ddmf69eDVdemekzE0Y6rVJYh2FZkJ8P\nmzer+19/PXsZYQwdqgJSak7MfH5YNZdItO3rsj/wWSbbY8Q4YAhzMWH9+lVX+UErw6lrtRAyPbLL\ny32SXQunlhZfp6/JeSnVIq8dDEERyk1NahEwSdW+feGUU3zCXAdbTCZ9gWY632lBoR39ZswIOtEt\nWKDqX1vb/oCcYaxenf27IUMyDRZAtVdKVe/Jk/0267aMHq36XztAPvig4pgaGoJBGd97T11v9msy\nqdpsRgHOBiH8/hk92ifgw/1QVKTG2LajBRKofl+ypH19qMdTQwv6LVt8IZntOWEkEooDOeUUxROG\nERYqY8d+OpGAhdyX2XSI4cwzz5TLly8/2NWIcQghHKI7KpR9a1F59Xd64YoKPR5VTrZrop7X1vNN\nAwPLgttuU+FHTA/wcGh8DS3U2muAoEnrqOsHDVJe9GY9w3UfP94XbtoTH4Jv3ELA97+vvP/Nfq2q\ngp/9zG+Lea+JkhL1Jq/D6pghW8x2/8//qH6aMcMPE7O/lkkdNRl8Qd8eWBZcdFG0oM6G3Fx4+eWO\nCRIhxAop5ZltXtge/dehfsQcSYx9wf5w6DqYyOYoJ2V2T3CtszfJ/XBgTjMNc1mZykc/6KIVfqKs\n0LE3KXjb4owmTmz93pyczORWOtpzVKDPqLZHEfjaCCCckdG8T/d1WxyH5u9M3qqte3QQyb3htEpK\nOj6H+CyQ7Z+VIxYkMQ5XtCYMw2RxOC2xeZ0m2MPZ/HQWQOt7QySJTyTCCSyuUQt/a3XNJtz0YUYp\nbq2d2cLLhCMHh50g26pvpgNhMKyM+cxsKaejLNXaQ9pnS5GtoyWcdlrmPR19CWqvIIlVWzFiHMaI\nUsG1lUTM/H7qoqn8/OWfk5ZprC1DONe6jdEDS/c5U5+pdoNM1U9lpYqeu6+IaseMGYp8Hz26fWXP\nmKEcAWtrW8/iGQ5iKYRSAUY5B+p67dyp+Kx0WvEo4QRadXVwzTV+BOeLLvKDkdbUKCJeqxdNdea+\nor2qrViQxIgRY59Rs7mG4bOGe1kA55fPV6l6O1KmsdjX1SlLOiHgxhs7JkT2N1rjqMLXtCWos5UL\nmc9o7bma10mng06b+4pYkBiIBUmMGAcONZtrqKqvorSgtMNCJEbH0R4B117EgsRALEhixIgRY+/R\nXkFifRqViREjRowYn1/EgiRGjBgxYnQIsSCJESNGjBgdQixIYsSIESNGhxALkhgxYsSI0SHEgiRG\njBgxYnQIh4X5rxBiO7DpYNfjIOFY4P2DXYmDiLj9cfvj9u87ekkpj2vrosNCkBzOEEIsb48d+OcV\ncfvj9sftP/Dtj1VbMWLEiBGjQ4gFSYwYMWLE6BBiQfL5x4yDXYGDjLj9hzfi9n8KiDmSGDFixIjR\nIcQ7khgxYsSI0SHEgiRGjBgxYnQIsSA5hCGEyBdCvCyEWCOEeEMIcaN7/hghxAtCiHXu367ueSGE\nuFMIsV4I8ZoQYsDBbcH+gRDCFkLUCiGedj/3FkK86rbzUSFE0j2f635e735fcDDrvT8ghOgihPib\nEOJNIcRaIUTxYTj+P3Dn/+tCiL8IITp9nueAEOIBIcQ/hRCvG+f2esyFEGPc69cJIcZ0pE6xIDm0\nkQJuklKeBgwGrhVCnAbcDMyXUvYF5rufAUYCfd1jHHDvp1/lA4IbgbXG518Dd0gpTwY+AMa658cC\nH7jn73CvO9Txe+BZKeWpwBmofjhsxl8IcSJwA3CmlPLLgA38B5/vOfAQcH7o3F6NuRDiGOC/gbOA\nQcB/a+GzT2hPYvf4ODQO4AngG8BbQHf3XHfgLff/SuBS43rvukP1AHq4P5xzgKcBgfLkzXG/Lwae\nc/9/Dih2/89xrxMHuw0daPvRwMZwGw6z8T8R2Awc447p08B5n/c5ABQAr+/rmAOXApXG+cB1e3vE\nO5LPCdwtehHwKvBFKeVW96ttwBfd//WPTmOLe+5QxnRgIuC4n/OAnVLKlPvZbKPXfvf7D93rD1X0\nBrYDD7qqvf8VQvwbh9H4SynfAX4LNABbUWO6gsNnDmjs7Zjv17kQC5LPAYQQRwKzgQlSyo/M76R6\n3fhc2ngLIUYB/5RSrjjYdTlIyAEGAPdKKYuAT/BVGsDne/wBXHXMxSihegLwb2SqfQ4rHIwxjwXJ\nIQ4hRAIlRB6WUv7dPf2eEKK7+3134J/u+XeAfOP2Hu65QxVDgIuEEPXAX1Hqrd8DXYQQOe41Zhu9\n9rvfHw00fpoV3s/YAmyRUr7qfv4bSrAcLuMPcC6wUUq5XUrZAvwdNS8Olzmgsbdjvl/nQixIDmEI\nIQRwP7BWSvk746snAW2FMQbFnejz5a4lx2DgQ2M7fMhBSjlJStlDSlmAIlhfklJeDrwMfNu9LNx+\n3S/fdq+6BakOAAADBklEQVQ/ZN/WpZTbgM1CiFPcU8OBNRwm4++iARgshDjC/T3oPjgs5oCBvR3z\n54ARQoiu7q5uhHtu33CwSaP46BDh9jXUFvY1YJV7XIDS+c4H1gEvAse41wvgHmADUIeydDno7dhP\nfVEKPO3+fxKwFFgP/B+Q657v5H5e735/0sGu935od39guTsH5gBdD7fxB24B3gReB/4E5H6e5wDw\nFxQf1ILalY7dlzEHrnL7YT1wZUfqFIdIiREjRowYHUKs2ooRI0aMGB1CLEhixIgRI0aHEAuSGDFi\nxIjRIcSCJEaMGDFidAixIIkRI0aMGB1CLEhixNhHCCHSQohVxnFz23e1u+wCM7prjBifZeS0fUmM\nGDGyYLeUsv/BrkSMGAcb8Y4kRoz9DCFEvRBimhCiTgixVAhxsnu+QAjxkpsXYr4Qoqd7/otCiMeF\nEKvd42y3KFsIcZ+ba+N5IURn9/obhMpB85oQ4q8HqZkxYniIBUmMGPuOziHV1iXGdx9KKQuBu1ER\nigHuAmZKKb8CPAzc6Z6/E1ggpTwDFSvrDfd8X+AeKeXpwE5gtHv+ZqDILafiQDUuRoz2IvZsjxFj\nHyGE+JeU8siI8/XAOVLKt92gmtuklHlCiPdROSNa3PNbpZTHCiG2Az2klE1GGQXAC1IlKkII8V9A\nQkp5mxDiWeBfqJAoc6SU/zrATY0Ro1XEO5IYMQ4MZJb/9wZNxv9pfE7zm6j4SQOAZUaU2xgxDgpi\nQRIjxoHBJcbfGvf/alSUYoDLgUXu//OB8eDlnz86W6FCCAvIl1K+DPwXKgx6xq4oRoxPE/GbTIwY\n+47OQohVxudnpZTaBLirEOI11K7iUvfc9ahshj9GZTa80j1/IzBDCDEWtfMYj4ruGgUb+LMrbARw\np5Ry535rUYwY+4CYI4kRYz/D5UjOlFK+f7DrEiPGp4FYtRUjRowYMTqEeEcSI0aMGDE6hHhHEiNG\njBgxOoRYkMSIESNGjA4hFiQxYsSIEaNDiAVJjBgxYsToEGJBEiNGjBgxOoT/D+Vislm1Q+UtAAAA\nAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W4EQD-Bb8hLM", + "colab_type": "text" + }, + "source": [ + "## Further metrics\n", + "From the plot, we can see that loss continues to reduce until around 600 epochs, at which point it is mostly stable. This means that there's no need to train our network beyond 600 epochs.\n", + "\n", + "However, we can also see that the lowest loss value is still around 0.155. This means that our network's predictions are off by an average of ~15%. In addition, the validation loss values jump around a lot, and is sometimes even higher.\n", + "\n", + "To gain more insight into our model's performance we can plot some more data. This time, we'll plot the _mean absolute error_, which is another way of measuring how far the network's predictions are from the actual numbers:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Md9E_azmpkZU", + "colab_type": "code", + "outputId": "39b97561-b01d-49f2-c35c-fbd8db663806", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + } + }, + "source": [ + "plt.clf()\n", + "\n", + "# Draw a graph of mean absolute error, which is another way of\n", + "# measuring the amount of error in the prediction.\n", + "mae = history_1.history['mae']\n", + "val_mae = history_1.history['val_mae']\n", + "\n", + "plt.plot(epochs[SKIP:], mae[SKIP:], 'g.', label='Training MAE')\n", + "plt.plot(epochs[SKIP:], val_mae[SKIP:], 'b.', label='Validation MAE')\n", + "plt.title('Training and validation mean absolute error')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('MAE')\n", + "plt.legend()\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXmYFNW5/z9v98wALoiOUSIMYIiJ\noqOAhNjXJU0gRo3EBe+9GnPHuBFZvEGNXk00GRMTlBglUWPAhTC/GDAJEVfckFHEVgQBUVwQHQEV\no6OIiszSfX5/nD5d1dVVvcx0z8b5Pk8/3VV16tSp01Xve95dlFJYWFhYWFhkQ6izB2BhYWFh0fVh\nmYWFhYWFRU5YZmFhYWFhkROWWVhYWFhY5IRlFhYWFhYWOWGZhYWFhYVFTlhm0cUhImER+UxEBhWz\nbWdCRL4qIkX32RaRcSLS4Np+TUSOzqdtG651u4j8rK3n9zSIyGYRiRa5z7+KSG0x+7RoO8o6ewA9\nDSLymWtzF6AJiCe3f6yUuquQ/pRScWC3YrfdGaCU+nox+hGR84AfKqWirr7PK0bfFsWBiPwVeEMp\nVdvZY+mpsMyiyFBKpYh1cuV6nlLq8aD2IlKmlGrtiLFZWFi0H37vbKHvcXd8760aqoMhIteIyN0i\nMk9EPgV+KCIREXlWRLaKyHsi8kcRKU+2LxMRJSJDktt/TR5fJCKfikhMRPYvtG3y+PEi8rqIfCIi\nN4nIMhH5UcC48xnjj0XkDRH5WET+6Do3LCI3ikijiLwJHJdlfn4uIvM9+24RkRuSv88TkVeS97Mh\nueoP6iulGhGRXUTk/yXH9jJwuKftlSLyZrLfl0Xk+8n91cDNwNFJFd+HrrmtdZ1/QfLeG0VkoYh8\nOZ+58RnzNSIyP/l8fCYia0RkaHJ8H4jIRhEZ52rfT0TmJP+TzSLyKxEJJY8dICJLROQjEfkwef97\neObnYhFZm3wG5olIr4BxZe0riW8m/5uPReQO05eI7CMiDyWfnY9E5ClXvweLyJPJY2tF5HsB1z9P\nROpd26lnXUQmA/8N/Cw5Z/ck2wwUkXuS8/aWiEzJMu+9ReQGEdkkIu+LyJ9EpHfy2DgRaRCRn4nI\nFuA2v33Jtrmeg8ki8gbwatBYuiyUUvZTog/QAIzz7LsGaAbGo5l1H+AbwDfRkt5XgNeBqcn2ZYAC\nhiS3/wp8CIwCyoG7gb+2oe0+wKfAScljFwMtwI8C7iWfMd4L7AEMAT4y9w5MBV4GBgKVwFP60fO9\nzleAz4BdXX3/GxiV3B6fbCPAt4EvgEOTx8YBDa6+NgPR5O/rgXpgT2AwsM7T9r+ALyf/kx8kx7Bv\n8th5QL1nnH8FapO/j02OcTjQG/gT8EQ+c+Nz/9ck72lc8ty/AW8Blye3JwHrXe3vT15vF2BfYCVw\nbvLY14CxQEXy/14GXO+Zn2eB/sn/5XW0JOw3rnz6ejH5H++d7NfMz+/QDLc8ef4xyf0VyXu7LHls\nXHLev+ozx2n/Af7Peq3reAhYDfwseZ2vot/HsQH3dxNwT/L56As8BPza9Vy1Ar9N9tUnYF8+z8HD\nyWv06Wz6VDA96+wB9OQPwcziiRzn/RT4R/K330vxZ1fb7wMvtaHtOcBS1zEB3iOAWeQ5xiNcx/8F\n/DT5+ylcRAg4gQBmkTz+LPCD5O/jgdeytH0AmJL8nY1ZbHT/F8Bkd1uffl8Cvpf8nYtZzAV+6zrW\nF22nGphrbnyuew2wyLV9CvAJEEpu75nsbzdgAJqx9HK1/x/gsYC+TwOe98zP6a7tG4Cb8/z//fpy\n/8ffN/8bmqD+Cxjq6WMM8A4grn3/AK70meNCmcWRwJue610F3OZzLyFgBzDYte9okkw5+VztACpc\nx/325fMcHJPP/HbFj7VZdA42uTdE5EDg92jVyC7oB+u5LOdvcf3eTnajdlDb/dzjUEopEdkc1Eme\nY8zrWsDbWcYLejV9RvL7B8lvM44T0S/9AeiXfBfg+Rz9gZYaAscgWv12EVrqIDn2vfPoF/T9PWM2\nlFLbRORjNDE3c1LIf/a+6/cXwAdKqYRr24xvMNALeF9ETPsQepGCiPQH/ogmnLsnj33guZZ3XHv5\nDSjPvrzzu1/y97XA1cBiEYmjFzC/Sx7fqJKU1XXeAL8xFIjBwCAR2eraF0ZLl170R8/jGtc8iqfN\n+0qp5hz78nkO0t797gRrs+gceN1GZ6FXsl9VSvUFfkHmw1psvIde8QAg+i3J9pK2Z4zvAVWu7Vyu\nvX8HxonIALSa7G/JMfYB/glMR6uI+gGP5jmOLUFjEJGvALeiVTyVyX5fdfWby833XRwmg4jsjpYA\n3sljXO3BJpIEXinVL/npq5Q6NHn8OrQ3XnXyP/sRbX+u8unLO7/vgiaaSqmLlFJDgJOB/xORbyWP\nV4mLQifP85u3z9ELA4P+nuPe/2gTWjLo5/rsrpQa79P3+2jV8NddbfdQSrltMn7PgHdfPs9Bt03z\nbZlF18DuaFXD5yJyEPDjDrjmA8BIERkvImXAT4AvlWiMfwemicgAEakE/i9bY6XUFuBp4C9oVcb6\n5KFeaP3wB0A8KWWMLWAMP0sahAeh7SgGu6Ff4g/QfPN84EDX8feBgZI06PtgHnCuiByaNOpOR6v4\nAiW1YkAptQl4ErheRPqKSEh0DMsxySa7o4nsJyJShVYdthX59DXV9R9fgbaRkXzGhiaZwido1UwC\nvQpvBS4RkXIR+TZaRXm3T99rgENFpDq5aPil5/j7aFuWQQxoFpFLksbrcPLcwz3nobTL+e3ATBH5\nkmgMFJFj85wbg055DjoKlll0DVwCnIU2OM/C/2UpKpRS76M9SG4AGoGhwCr06rHYY7wVWAysRauM\n/pnHOX9D64VTKiil1Fa0qugetJH4NDTTywe/REs4DcAioM7V74toA+fyZJuvk65iewxYj1b3uNU2\n5vyHgV8lx/UeenV8Zp7jai9+COyKNth/jNb5m1X3L4HRaAJ9H7CgHdfJp695wOPABuA1tK0C9Hw+\ngTZeLwP+oJRaqpRqQjssnIR2xPgj2la13tuxUmpdsr/6ZN9PeZrcDhyW9MT6p9JuqSckx9yQ7H8W\n2o7gh0vQKrDlyXt8FK3qzBud/ByUHJKuLrTYWSEiYbQYfZpSamlnj8fCwqJrwUoWOzFE5LikWqYX\n2mjcgl5ZWVhYWKTBMoudG0cBb6J19d8FTkmqBiwsLCzSYNVQFhYWFhY5YSULCwsLC4uc6DFBeXvv\nvbcaMmRIZw/DwsLColth5cqVHyqlsrnNAz2IWQwZMoQVK1Z09jAsLCwsuhVEJFdGBcCqoSwsLCws\n8oBlFhYWFhYWOWGZhYWFhYVFTvQYm4WFhUXHoKWlhc2bN7Njx47OHopFAejduzcDBw6kvDwoxVl2\nWGZhYWFREDZv3szuu+/OkCFDSE8Ya9FVoZSisbGRzZs3s//+++c+wQdWDWVhYVEQduzYQWVlpWUU\n3QgiQmVlZbukQcssfBCLwfTp+tvCwiITllF0P7T3P7NqKA9iMRg7FpqboaICFi+GSKSzR2VhYWHR\nubCShQf19ZpRxOP6u76+s0dkYWHhRmNjI8OHD2f48OH079+fAQMGpLabm72VT/1x9tln89prr2Vt\nc8stt3DXXXcVY8gcddRRGbaCE088kX79+qXtu/7669lll1349NNPU/sef/xx9thjj9Q9Dh8+nCVL\nlhRlXIXAShYeRKNaojCSRTTa2SOysLBwo7KyktWrVwNQW1vLbrvtxk9/ml64TymFUopQyH89PGfO\nnJzXmTJlSvsH68Luu+/Os88+yxFHHMFHH33E+++/n9Fm3rx5HH744SxcuJD/+Z//Se0fM2YMCxcu\nLOp4CoWVLDyIRLTq6de/tiooC4tiIbYpxvSl04ltKp0h8I033mDYsGGceeaZHHzwwbz33ntMnDiR\nUaNGcfDBB/OrX/0q1faoo45i9erVtLa20q9fPy6//HIOO+wwIpEI//73vwG48sormTlzZqr95Zdf\nzujRo/n617/OM888A8Dnn3/OhAkTGDZsGKeddhqjRo1KMTIvTj/9dObPnw/AP//5T0477bS046+/\n/jqtra3U1tYyb968os9Pe2GZhQ8iEbjiCssoLCyKgdimGGPrxnLVkqsYWze2pAzj1Vdf5aKLLmLd\nunUMGDCAa6+9lhUrVrBmzRoee+wx1q1bl3HOJ598wre+9S3WrFlDJBLhzjvv9O1bKcXy5cv53e9+\nl2I8N910E/3792fdunVcddVVrFq1KnBs3/nOd3jiiSdIJBLcfffd/Pd//3fa8Xnz5nH66acTjUZ5\n6aWX+PDDD1PHlixZkqaGamhoaMPstA+WWXhgPaEsLIqL+oZ6muPNxFWc5ngz9Q31JbvW0KFDGTVq\nVGp73rx5jBw5kpEjR/LKK6/4Mos+ffpw/PHHA3D44YcHEuJTTz01o83TTz/N6aefDsBhhx3GwQcf\nHDi28vJyjjjiCObPn088HmfgwIFpx+fPn8/pp59OOBzm5JNP5p//dErVjxkzhtWrV6c+nZFh29os\nXLCeUBYWxUd0SJSKcAXN8WYqwhVEh0RLdq1dd9019Xv9+vX84Q9/YPny5fTr148f/vCHvnEGFRUV\nqd/hcJjW1lbfvnv16pWzTS6cfvrp/Od//ifXXHNN2v5Vq1bx5ptvMmbMGACampr42te+xgUXXNCm\n65QCVrJwwXpCWVgUH5GqCItrFvPrMb9mcc1iIlUdswLbtm0bu+++O3379uW9997jkUceKfo1jjzy\nSP7+978DsHbtWl/JxY1oNMrll1/uq4K65ppraGhooKGhgXfffZe33nqLzZs3F33MbYWVLFywnlAW\nFqVBpCrSYUzCYOTIkQwbNowDDzyQwYMHc+SRRxb9GhdeeCE1NTUMGzYs9dljjz0C24dCIS699FKA\nlHSilOLuu+9m8eLFqXYiwsknn8zdd9/NYYcdlrJZGPzyl7/klFNOKfr9ZEOPqcE9atQoVYziR7GY\nliiiUauCsrDwwyuvvMJBBx3U2cPoEmhtbaW1tZXevXuzfv16jj32WNavX09ZWddch/v9dyKyUik1\nKuCUFLrmHXUiIhHLJCwsLPLDZ599xtixY2ltbUUpxaxZs7oso2gveuZdWVhYWHQA+vXrx8qVKzt7\nGB0Ca+C2sLCwsMgJyyySsPEVFhYWFsGwaihsfIWFhYVFLljJgrbFV1hJxMLCYmeCZRY48RXhcH7x\nFUYSueoq/e1mGJaJWFiUFmPGjMkIsJs5cyaTJk3Ket5uu+0GwLvvvpuRxM8gGo2SywV/5syZbN++\nPbV9wgknsHXr1nyGnhW1tbWICG+88UbatUQkbUyrV69GRHj44YfTzg+Hw2n5o6699tp2j8kNyywo\nPNNskCSSjYlYWFgUB2eccUYqe6vB/PnzOeOMM/I6f7/99kvLu1QovMzioYceyqhL0VZUV1en3ds/\n/vGPjHxT8+bN46ijjsrITNunT5+0/FGXX355UcZkYJlFEoVkmg2SRGy6EAsLfxRT4j7ttNN48MEH\nU4WOTHqMo48+OhX3MHLkSKqrq7n33nszzm9oaOCQQw4B4IsvvuD000/noIMO4pRTTuGLL75ItZs0\naVIqvfkvf/lLAP74xz/y7rvvMmbMmFQepyFDhqQyxN5www0ccsghHHLIIan05g0NDRx00EGcf/75\nHHzwwRx77LFp13Hj5JNPTo15w4YN7LHHHuy9996p40op/vGPf/CXv/yFxx57rF01tQuFZRYu5PtA\nB0kihaqzLCx2BhRb4t5rr70YPXo0ixYtArRU8V//9V+ICL179+aee+7hhRdeYMmSJVxyySVky1Jx\n6623sssuu/DKK69w9dVXp8VM/OY3v2HFihW8+OKLPPnkk7z44ov87//+L/vttx9LlizJqFa3cuVK\n5syZw3PPPcezzz7LbbfdlkpZvn79eqZMmcLLL79Mv379WLBgge94+vbtS1VVFS+99BLz58/PyCH1\nzDPPsP/++zN06FCi0SgPPvhg6tgXX3yRpoa6++67C5vYHLDMIolCH2g/ScQWTrKwyEQpJG63Ksqt\nglJK8bOf/YxDDz2UcePG8c477/hWpDN46qmn+OEPfwjAoYceyqGHHpo69ve//52RI0cyYsQIXn75\n5ZxJAp9++mlOOeUUdt11V3bbbTdOPfVUli5dCsD++++fyu2ULQ06OEWSFi5cmJH/ydS8MO3cqiiv\nGsrLaNoL6zqbhN8D3RZib9OFWFikoxQJOk866SQuuugiXnjhBbZv387hhx8OwF133cUHH3zAypUr\nKS8vZ8iQIW1S1bz11ltcf/31PP/88+y555786Ec/apfKx6Q3B22IDlJDga7NfemllzJq1Cj69u2b\n2h+Px1mwYAH33nsvv/nNb1BK0djYyKeffsruu+/e5rHlCytZJGFVSBYWpUEpJO7ddtuNMWPGcM45\n56QZtj/55BP22WcfysvLWbJkCW+//XbWfo455hj+9re/AfDSSy/x4osvAjq9+a677soee+zB+++/\nn1J5ga6l/emnn2b0dfTRR7Nw4UK2b9/O559/zj333MPRRx9d8L3tsssuXHfddfz85z9P27948WIO\nPfRQNm3aRENDA2+//TYTJkzgnnvuKfgabUFJJQsROQ74AxAGbldKXes5fgEwBYgDnwETlVLrXMcH\nAeuAWqXU9aUcq3mgbcZZC4vioxQS9xlnnMEpp5yS5j105plnMn78eKqrqxk1ahQHHnhg1j4mTZrE\n2WefzUEHHcRBBx2UklAOO+wwRowYwYEHHkhVVVVaevOJEydy3HHHpWwXBiNHjuRHP/oRo0ePBuC8\n885jxIgRbSqBalRNbsybNy9DLTVhwgRuvfVWampqUjYLg+OOO66o7rMlS1EuImHgdeA7wGbgeeAM\nDzPoq5Talvz9fWCyUuo41/F/Agp4LhezKFaKcgsLi+ywKcq7L9qToryUaqjRwBtKqTeVUs3AfOAk\ndwPDKJLYFc0YABCRk4G3gJdLOMYM2KA6CwsLi0yUUg01ANjk2t4MfNPbSESmABcDFcC3k/t2A/4P\nLZX8NOgCIjIRmAgwaNCgdg84FoMxYxxD3JIlVh1lYWFhAV3AwK2UukUpNRTNHK5M7q4FblRKfZbj\n3NlKqVFKqVFf+tKX2j2WujpoagKl9HddXbu7tLDokegpFTZ3JrT3PyulZPEOUOXaHpjcF4T5wK3J\n398EThORGUA/ICEiO5RSN5dkpEWGLc1q0ZPRu3dvGhsbqaysREQ6ezgWecC42fbu3bvNfZSSWTwP\nHCAi+6OZxOnAD9wNROQApdT65Ob3gPUASqmjXW1qgc86glHU1MCdd0JLC5SX6+1CYdOdW/R0DBw4\nkM2bN/PBBx909lAsCkDv3r0ZOHBgm88vGbNQSrWKyFTgEbTr7J1KqZdF5FfACqXUfcBUERkHtAAf\nA2eVajz54pxz9HdNTduIfLGC+ywsuirKy8vZf//9O3sYFh2MksZZKKUeAh7y7PuF6/dP8uijtvgj\ny4RXImiLVAFOcF9TE4hAZWVRh2lhYWHRKeh0A3dXQbHy10QiMHOmjgRPJGDaNOuGa2Fh0f1hmUUS\n0agm8CL6uz3pPhobNaNIJGyqcgsLi54ByyxcMI4d7XXwsHmmLCwsehoss0iivh5aW3WMRUsL1Na2\nXX1kU5VbWFj0NNgU5Um4DdOJBDz+OCxd2nZi702cZmMvLCwsujOsZJGEkQbGjYNQqLj2Blub28LC\norvDMgsXIhGtfurVSzOMYrm+2trcFhYW3R1WDZWEW000cyZMnaqJ+7Rp+nhjY3YVUjY1U6GVwqzK\nysLCoqvBMgsyA/LOOstxfW1q0owjkQhO35ErxUchhZVsuhALC4uuCKuGIlNNBI7rayik98fjmnH4\neUnlo2aKROCKK3ITfquysrCw6IqwzILMuIiaGr2iP/98OPFEnVTQGL0ffzzTSF3MuAobo2FhYdEV\nYdVQOCk6FiyACRP0diwGc+bo1X1ZGYwaBStWpHtJGSmhmPW7bS1wCwuLrgjLLNCMYdo0zQSWLoXq\naqcQEuggvU8/1RJGa6v/ir+YBelLUdzewsLCoj2waijysxO8+qqO7j7/fGt0trCw2PlgmQX+doKa\nGv3bQCnNTAYNsozCwsJi54NlFsDChbDXXnDkkY7UEIloCeOCC3SQXiEG51gMpk+3kdoWFhY9Bzu9\nzeL//g9mzNC/33lHMw634dqNfKrnueMkwmFdec8UUrJG654PG1Bp0VOx0zOLf/0rffuuu+C66/Tv\n2bOdSO5evfKrnue2f8TjMGuWrust4hjHrc2jZ8IGVFr0ZOz0aqhTT03ffv99/dLHYjBlivaEMpHc\n+QTIGfuHqYlhUp7bQLueDxtQadGTsdMzi+uug2OOcbaV0m6ztbX6pTfIt3qeiZP48Y8dW0d5uQ20\n2xlgAyotejJEKdXZYygKRo0apVasWNGmc712BhFHojBlVm+5BSZOLLxfo78Gq8veGWBtFhbdDSKy\nUik1Kmc7yyw0zEu+cSPcdpuWKkIhXd+itrawF98SDAsLi+6CfJnFTm/gNjDusrEYzJ3rGCkNo5g9\n20kHkk3CsEZOCwuLngjLLDwwNoe6Or29dq12rV24UG8/+qj+njjRX4LwM3JaZmFhYdHdYZlFAObO\ndepxe7Fggc4f5SdBeAsdVVbqAL32qKSsWsvCwqKzYZmFD4x04McoQKui3BKEqXNhVFYma2xlpZOg\nsK0qKavWsrCw6AqwzMIHRjowkoWIdqkFna68ulr/drd5/HGdsXbmTKcEa5DffSFSglVrWVhYdAVY\nZuEDr3SwYIFmBomEJto1NXDppbpNba1zzFuCdebMTJVUoVJCofW7LSwsLEoByywC4K4pUV2tpYYd\nO7SE8cYbOuhu1izNLJYu1cRcRDMTUyCpsTG9kFFbpARbDMnCwqIrwDKLABijcmWlJvoXXgi33w4f\nfeS0WbBAe0UF2SgMcXcT+GxSQpAh2xZDsrCw6GxYZgHENsWob6gnOiRKpCqSMir72Szc2LHDSUO+\ncaP+uG0WXgKfTUqwhmwLC4uujJ2eWcQ2xRgzdwzN8WYqwhUsOWsJ9fWRNG+ooCD3pUs10TfJAkHn\ng1qyJJjQB0kJ1pBtYWHRlbHTM4u6NXU0xXWx7aZ4EzOWzeCy6D2+3lDeb8Mk3MykqUkH9AUR+qB8\nUdaQnQ4bW2Jh0bWw0zMLLxa+tpBdK37IWb8/Fhq+xYihg1m1CrZsgf79YcQIWLUK5szR9SnCYad2\nhcFtt/kXSvJLWOiucVEKQ3Z3JLpWJWdh0fWw0zOLEV8ekbHvrrV3IfyN3n17M2Kf55g7tzpFuGpq\ntFG7psYxat9xByxf7pwfj6dLF4ZgL1/ueFQlEo5EYmplXHFFph2jPYS+uxJdq5KzsOh6KCmzEJHj\ngD8AYeB2pdS1nuMXAFOAOPAZMFEptU5ERgOzTTOgVil1TynG2Li9EUFQpBsmFIodrTu4454NNDdX\nZxCuSETnjZo6VUsHQfAayw1CIUcaSSQ00/E7rz2EvrsSXauSs7DoeigZsxCRMHAL8B1gM/C8iNyn\nlFrnavY3pdSfk+2/D9wAHAe8BIxSSrWKyJeBNSJyv1IqC1luG6JDopSHy2mON2ccUyhW9bqRsvLx\nQBgRnVCwslLHXkyZks4oQiEtLRgJBIJTh3z96/Dqq3p/KKQ9qNwohNAHSSDdlegWK7akO6rgLCy6\nKkopWYwG3lBKvQkgIvOBk4AUs1BKbXO13xX08l4ptd21v7fZXwpEqiKccMAJLHx1oe/xxMBlnHvD\nXWx5tIaFC7UqaflyXV3PzSjKy+HmmzPdZg3B/uKL9H6/9jV4661gQp4voc8mgXTngL72xpZ0VxWc\nhUVXRSmZxQBgk2t7M/BNbyMRmQJcDFQA33bt/yZwJzAY+B8/qUJEJgITAQYNGtTmgfbftX/GvrJQ\nGYlEAhFhxOgdLPDwkqVLnd/hsGYUfnUu3CnP77hDM5jycjj+eG0wB39jeL6Evr7eUXEZ20dnBvR1\nldV8d1XBBaGrzKvFzotON3ArpW4BbhGRHwBXAmcl9z8HHCwiBwFzRWSRUmqH59zZJG0bo0aNarP0\n4WfkjifiCEI8EWfaw9O4cMxYHn10qOva+lsExo/XEkUs5v8iG4LtNoq7I72NyirovGyorHRUXH62\nj45EV1rNd1cVnB+60rxa7LwIlbDvd4Aq1/bA5L4gzAdO9u5USr2CNn4fUtTRuWCM3GnXRZEggULx\nResXrB4wmcumb2D0aM0gDEIhuP9+uPJK/UKbiG43YjFd0wK0x1Njo3822jaNvVGPwYzFa/soFsw9\n+N2fQVCW3Y64thdGMvv1r7s/cS3VvFpYFIJSShbPAweIyP5oJnE68AN3AxE5QCm1Prn5PWB9cv/+\nwKakgXswcCDQUKqBRodE6V3Wmx2tO1AoX++ox958jKVl1Xyj30aU2ju13x1f4VUDxWJa/WRiMsyq\n0J0CXSS7NJBL/RCN6qjxUq6g813ZlmI1355VdU/JqdWTpCSL7ouSMYskoZ8KPIJ2nb1TKfWyiPwK\nWKGUug+YKiLjgBbgY5IqKOAo4HIRaQESwGSl1IelGmukKsLimsXUN9RTuUsli9Yv4t7X7k1jGApF\nU2sTb25qCuwnHHYq4xlVk4mrAGdVeMUVOofU1Kma2Uybpr2rsgXxBRHKjjBi56v/z3cshejfe5rt\noS3ozo4KFj0HJbVZKKUeAh7y7PuF6/dPAs77f8D/K+XYvIhURXQSwU0xpj08zbdNggTfOuUN7npl\nQMYxEbjoIscWIZIeeCeSvipsbNTHTTpzPyJYCJEuFQGJxXSCxLLkk5JrZZtrLIVKCnZVrdFTpKS2\noCcb97vTvXW6gburob6hnqbWppQ6auieQ9nw8QYUipCEOPi4Z5g1+FvccQesXOmoocrKYNs2h7gb\nu4aIljjOOy/d6ykfItjZhNKbnuT88/09twpBoZKCd1UN7a9pbtF9UGrjfmcS6+7muFBKA3e3xNam\nrSTQ7kUKxanDTqUiXIEglIfKiQ6JMnEiPPecJp6GKRiPpIoKJzjPfEQyiWw+BtjONtK6CXs8DoMG\ntX8MhgGGw/kzwEhEq+5Av1wkobxhAAAgAElEQVRXXRXsTGDRs1BK434sBmPGwM9/rr87+nnqbo4L\nVrJwIbYpxg2xG1LbIUK8/uHrtCZ0iEdcxalbUwdotVVNDcyd66wMRiQ9cF94AZ5/3lFBtbb6r6C9\nqgW/VU5nqh9KIdm0R/9u7ReFozupOfxQSum6rk47mUDubNGlQGdrDgqFZRYu1DfUk/Dk5XAbulsT\nrcxaOYu5a+ayuGYxkUjEt0peOKzVUqbGRSiU7vFkvKTAkTi6okhaKsNqWxlgd3u5Ohtd8ZkqFD3Z\nuN/d7s0yCxeiQ6L0KutFU2sTItp9VqnMBINN8SbqG+q1UTxJ+KZPd1a9oFVUW7boGIxEQueRAu31\nFI3qtqDdapcsSV81NzVpxjNyZPttBO1FVzKs5qo02F1euo5CT5HESvUM1tTAnXfqRV15eXBwbCnR\nld6vXLDMwgW3C+3yd5cH5osShOiQaNq+aFRLFImE/jbR2vfdp9VRra3aVfbccx2JA5yX2B17kUg4\nOagMM+kuD1Sp4fdyeQ3x55yTm8nuDMzFSmLZEYnoZ6CnPwfFgmUWHkSq9BNT+2RtYJvxXxufaueG\n2wNq7VrtcuqGkTrKyx3JwrzEZtVcWwuPPZYZm2Ef5GAC7zXEz5qlbUlBapeeoJ7JB91NzdEZ6E4r\n+86GZRY+qG+oJ56I+x4LSYjjDzie2KYY9Q31RIdEiVRFqK/X0oMptWoC7twmEBHo2xdOOAFee02n\nKb/sMs1YamthwgT9bYgf2BWhQTYCb1bQJgBSqexMtqeoZ/KBJYYWxYJlFj6IDolSEa5Ipf9wI6ES\nTH5wMmWhMloTrVSEK1hcs5hoNJIS+UUyGQXofTNmONtvvqlTlZt9jz6qV8X19ekGcLCxBdkIvDuz\n75w5mllnS6Ni1TMWO4MastgQrwG3u2LUqFFqxYoVRevPRHIvf3d51nZhCXP+yPMZtMcgKhtPpPGV\n6pRnlLE/iDhqJTdEYMAA2LzZ2XfssfDII87DvHUr3HijJpK9ejkr6p3tYc9XdTR7tiPVuefLr7+d\naf4sHOwsash8ISIrlVKjcrWzkkUAIlURZh43k+jcqG8VPdCGbhFhzuo5SSnj19qltipCdbXjUrtq\nlSZiXkmjrAzeey9934QJwaVYTaJCKMyg2xMIY77693zSqJj+uuJc9IT/qqtjZ1JDFhOWWWRBpCpC\n/Vn11DfUs7VpK79b9ruM5IJKKVpUCwmVoDnenOFSa7Bliy7JajBsmK62d9ttzr5jjtEFlIwbrpe5\nhMOaiBRi0DVRqmYV1Z09q/Ih8NlUTF2dENsVb8fAqiHbBssscsCdYPD+1+7nlQ9fSTseV3HCEiYs\nYSrCFRkutQaXXQaLFjkP6O236/133ul4ST33nCYYXjdak1/q5psd4pGvQbezo1Q7GkESSEcT4rYw\nJrvi7RhYL7G2wTKLPBDbFGNs3Vh2tO7wPa6U4qjBRzFs72GBfUQiTvCd+wE95xwtGZhYjPp6nQfJ\nHRnurevtNeiaWhlddYXU0St6PwkkKA9PPuMqdPxtZUx2xdtx6KpqyK4MyyzyQH1DPc3x5gzPKIME\nCZ56+yme3vh0KhWIOc+41oLzgJrKb9EoqfxSphDS1q3OMZM8zw+mLxP8F0TIOjtK1W1/CYXgllv8\na5WXGl5CXFmZH0FvC+Fvq4RgV7wWXRmWWeQB40rb1NqUykjrh4RKsKN1BzOWzeCRDY/QHG9OudYa\nhuFHPGfOhMmTtYQwY4ben82Tx41sKySzIr7ppkzppKNQX++o0xIJmDRJ7+9ohuElxPkS9LYQ/vZI\nCPmseLu67cWiZyIrsxCRvkqpbQHHBimlNvod62lwpwHZ2rSV3z/ze+LKP2hPobj3tXsRkQyjN2QS\nz8mTdXCeuzyrnyePm0CYfnJVo+ssY6l7rNGoZn7GWJ9IaNdWv8qApYaXEOdD0NtC+EspIfRkI7hl\ngl0buSSLemAkgIgsVkqNdR1baI7tDDCG7ulLp5NQwdIFkCqc5Gf09hLPeBzWrcvsw62Scme0NTEb\nSgVLH7GYjgQ3TKkjjaV+xOyWW7RE4b7nzjbe5kvQ20r4S6UT76lG8J7MBHsKcjELcf3eK8uxnQbR\nIVHCoXCqxkUQlFJMPHwiNYfVpOWRikQ08Zw6NT2hoEE4rL9NtHco5DAXryutibtwSx/uKOZEQp/b\nkcZSP2JmbC9Tp2pVmzdle2chX4Ju2hijeGcSsZ5qBO+pTLAnIVelPBXw2297p0FYwjnbKBQvvPdC\n2r7YphjTl06n+vgYTz4Jo0ennzNwIIwfnzw/ObuJhCawvuMIO8TCrMxmzXIkilAIxo3r2FVaUCW8\niRO1629ZmR7btGmZlcmM4b+rVcAzc9sVKvQZSact1RO76vxC2yooWnQsckkW+4jIxWgpwvwmuf2l\nko6si6K+oT6nVGGw/N3lHHnnkVx65KUAXP/M9Sil6F3Wm8U1i5k5M8KYMU4cxDvvpKf+cMMvQO+i\ni9JdQJubHSYjotVUtbUdm+4im9omW3R1qdQQ3vtsy33X1TkxLV1h1dsWFVcx57cUz471BOv6yMUs\nbgN29/kNcHtJRtTFYTyjmuPNKSN2NhuGQjFj2Yy0fU2tunjSFUdHWLIkMy15Phg/Xns5Ga+qiy92\n1BMmBciIEf6qk1Lqh7MRkmwqlFKoIbz3OXOmY/vJ975jMe16bP6bsrLuueot1vy29dnJh8HY2Ieu\njazMQil1ddAxEflG8YfT9eH2jDKG63y8pNIgpM6NRDSzeOKJYHWTF+Xl0L+/s9pNJHSywZtvdlxk\nIfilLpV+OBchybZ6LIUu3nufCxY4KjqvvSdbH8ZTTQTOPrt7ErRizW9bnh1rvO4ZKCjOQkSGAWck\nP1uBnJkKeyKMZ5R7e/rS6Wlt+u/any2fb/E9/6f/8VNfo/cFF6RLF8OH6/oXy5Y5BGvIEMdg7G4b\nj2tGYY65y7x6X2o34QiHdZGmWKz9L3A+hCRo9ehmJJWV+nvt2vbFh3gJ5PDhOg08aIaRj5Hd20dn\nlN4sBoql5mkL07HG656BnMxCRIbgMIgWYDAwSinVUMqBdTe41VMV4QquHnM1kx+cnCZp7NVnL/rv\n1h+A6Uunp0V3T5yoYw8uv1zXufjWt+Bf/3II+vjxOrfUpk1alXLWWempz93Gbsj+UkciWiVzxx06\nI+5tt2WvLJf3HGS5Zj4w13Zn3M03QNFPzeEXiGc8y0IhzYjyGVNH6NI7IsagGGqetsxHd07umA96\nwj3kg1xBeTGgLzAfmKCUWi8ib1lGkQmveqq+oT7DlvHRFx/x0Rcfse4DHVhREa7gpuNvYtV7qwCo\nOayGJ5/UT9v06TB/viNRbN+u1VTxuFY/bdkCvXs7NoubbyZ1nnlog17qWEwzHKPGAifJYK7Av2wv\nRjEIq1mFuoP4cq1Gs6k5vASyV69gZhZ0b0FEtlhEorupaQplOkHPRSH33VUJcnf779qDXJLF+8AA\nYF+099N6dmKX2VzwqqdCEspqw2iONzPpgUmpFCJzVs9hyVlLiFRFMlZjEyboRITxuCbwDz6Yn40i\nWwoLtxorkdCSRiKhpRQRJ0HhYp3qKiNxod+L0d7Vq7lvt2SRS0rJV83hp+oy+wt96d3t86kpkg09\nRU2TayHh3ZfvfXeVbATZ3qXu/t/lg1wG7pNFZA/gVKBWRA4A+onIaKVU9hJyOzkiVRH+9L0/8eMH\nfpy1nTvXlLcehns1BukpQVpaNHGfOVM/nNlsFF4YguyWLAxzMAZzcFxF6+q0msrdvtDMrfnCS9Dz\nsVkUov5yq7rcxKfQl97dPldNkVzwG//s2dogP2FC5yReLBRtIej5/m+dRZDzuaeermJzI6fNQin1\nCTAHmCMi+wL/BdyYzA1VVeoBdjfENsVSqqjqfaodN1uEb+z3jYwyrSFCKYYhIlTu4lhd3aux6dMz\nXWuXL9cFk265pTCjtSHIRlLwShlKaY+rREL3CZkxHBUV+WduLRTFUnMEwY/4FGpv8TLc9sRgeMe/\ndi38OLnGMAb5rs4wCiHobiKaz//WWVHr+TpsdIX6KR0CpVSbPsDgtp5bis/hhx+uOhvPbHxG9bmm\njwpfHVZ9rumjLrj/AhW+OqyoRYWvDqsL7r9Alf2qTFFLat/wW4entqlF9fp1L/XMxmcy+35GqbIy\nQ5bSP+Xl+vgzzyh18slKhcNKhUJK9emj1KxZSv32t85x89vgsssy+wuFlLrggvTz+vTR/VZU6GOm\nr3BYnxMO6+3uAPf99OnjzIff/OTq54ILlOrVq/19uXHssen/x7HHFt5HRyNoTtvazu88v/lszzzn\nc822jFWp7vVuACtUHjQ2l4H7vhy85vvFYlo9AabuRVzFU3W73R5SgGG0gE5pvvr91Wl9NMebqVtT\n51sL47zz4M9/zrxuPK6lBID773fUVTt26HxMQXaISARWr87sr1evTP170AqwFCu+QjPsBp2bLfjL\n737aItFEIpk1Rdq7qpwwwZEozHZXR77SXVtVSn7/TalX7+1x2OiJObxyqaEiwCZgHvAcO2nywHxh\n3Gd3tO5Aoejbuy+LaxZTt0ZT8hFfHpFWFyOomNLsF2YD0CvcK60WRk2NdnN12y5AM4PZs521qIGI\nbutOQqiUZiKmvKqXMJ18si4Bm8tAaYjyzJnFrZXhJgD5ZNgNOjcX8SiGG2lQX0EEMV8dtlE5dSeb\nBeQ3p8Ukoh1hy2jrc5LLG7E72jJyMYv+wHfQMRY/AB4E5imlXi71wLojIlURLvzmhcxYNgOlnDQf\nc9fMpTneTDgU5oSvnsC7n76bYbsAEARFSs1HU7wprRZGJAJ/+lN6um8Dv9xRRx6p63q3tuptpbRh\nXCltq6ipaRthCiLK2SQCcyyX0dpNANzIJ+K6EO+aUr6sfgSx0FXwxIndh0kUgmK4Vxt09dV7W6Sh\nrsxIcnlDxYGHgYdFpBeaadSLyNVKqZs7YoDdDavfS9fr/Gvdv1KqqXg8zr2v3Us45J+11itpKKVS\nBu+U4fz4KE8/HaGuDl54AZ5/PtPwPWSITkq4bJlmEuefrxlDXZ1T77ulRacZqa0tnDD5EWVIdyV1\nq7xMTqZ8Au38PLUgM+jQD/kQj44wPPoRxEK81Xo6iiXV5cN4uhrxzbag6epG8XwiuHsB30MziiHA\nH4F78ulcRI4D/gCEgduVUtd6jl8ATAHiwGfARKXUOhH5DnAtUAE0A5cqpZ7I8546FROGTeDRNx29\nzqnDTmXmszOJJ5fKCkU8kRl7EZYwCpUWyKdQTHt4Ghs+3sCNsRuJq3hKNXXrrRFiMf0SNDen99XQ\nkL49aJDz0BkX2ERCJy9cujT/hHrmpfMjyu6XwOt6u2BB/oF2Xk+tlhYn6DDXGPMhHh3lhukliN45\nq6xMD6C0aBuyMZ6uSHyzLWg6y0U4X+QycNcBhwAPAVcrpV7Kt2MRCQO3oNVYm4HnReQ+pZS7Ltzf\nlFJ/Trb/PnADcBzwITBeKfWuiBwCPIIODuzymHi4XqIvWLeACcMmMPHwiWzbsY0/r3Qs0yEJEZIQ\nLQld/SgsYS75j0t49I1HMwzeX7R+wfXPXJ9iIiZjrYnFqK/Xhu+ganvuBzIS0av8SZMcW4A3cjvf\noCg/oux23XVLFhMmaKaUb6BdkOE4H+RatXak6sK7qnXHjxSa/daicHRF4pttQdPV1Wq5JIsfAp8D\nPwH+VyRl3xZAKaX6Zjl3NPCGUupNABGZD5wEpMiaSq/vvSvJ6HCl1CrX/peBPiLSSynVlPOOugAm\nHj4xxTRAp/GYu2YuTa1NhEIhbjnhFqr3qU4zfF+46MKUB5UXbmkjQSIjFuP22+HoozP1/CJw4YWa\nGcyYoTPVeiGSOyrb76W74or0dt6XwJxnXojq6sIC7Tqj3kYxEbSqtSqp0sH7zHRV4hu0oPF7NruS\nGi2XzSLUjr4HoD2pDDYD3/Q2EpEpwMVoldO3ffqZALzgxyhEZCIwEWDQoEHtGGpp4c0bFamKENvk\nlCtb9MaiQEYhSKpuhsGCdQsAaNzeqPuLRPjTnzKz1iYS8PvfpzORcFh/jDvt+PGOu63xkoL0B7Sy\nUksDSuUnEbi3g45lQ0e4RJb6xcu2qg0iYl2JMHQEinm/Qc9Mdyuo5H42u5oaraAU5aWAUuoW4BYR\n+QFwJXCWOSYiBwPXAccGnDsbmA0watSoLp2zyp03KrYpRnRuNJBBGIQIURYuY9jew3jx/RdTkd6P\nvvkoj775KCEJpWwYEydG2LBBSxCp80OZXlJum4K7nck5dccd6ZKGMU7H47rdhRem51TyQ3uq08Vi\n2uhuVFbFWnl3NCHOtqoNWkF2JcJQasyerWOA4vH83KJzIYg5d8TCoFToamq0UjKLdwB3OpCByX1B\nmA/cajZEZCDakF6jlNpQkhF2Euob6mmJt2RtEyLEqP1GsWrLqgw7hkFCJdJsGNddB0OHaoLf3KwJ\n7uuvZ6qnjPQRj8O992omYGIaWlqc337G6RtvdNKA+Ln9eZMN5ludzn1uS0v+SQTzQSkIcS7mk2tV\nm29sRk9ELAZTpjjFvvItRJUNXVXl1B50tXsqJbN4HjhARPZHM4nT0bEaKYjIAUqp9cnN76Gz2iIi\n/dAxHZcrpZaVcIydguiQKOXh8qySRXm4nJFfHukbj+FGggRbm7amto0LrMktBHDAAfDGG/5lW03i\nQHdtDKUcQm2M0yZIzkgm3mAzvzxTbmaTjQgaYu52lw2FYNy44BrihaDYhDhf5lPIqnZn8paqr0+X\nbvNxi86F7qhyyoWudk8lYxZKqVYRmYr2ZAoDdyqlXhaRX6FzkdwHTBWRceiiSh/jqKCmAl8FfiEi\nv0juO1Yp9e9SjbcjEamKUH9WfcrA3bd33zSPJ0E4e/jZ9O2dzX/Awe+f+T0nf/3klJprwYL04xs2\nwKGHwpo1/ucrpRmBG6NGORlt3cZpt5TgDjbzxkUYTyw3s/FbHbnVTu5Ehb16FYdRQPFXaKWQAorl\nLVVqdVsx+o9G9f/rrsVSrLgLv4VIVyG2bUFXUqOV1GahlHoI7Xbr3vcL1++fBJx3DXBNKcfW2fDW\nvhi651CmPjQ1FUtRc1gNtfW1efUVV3HOu+88fnLET2jc3gjDDoBHTUIhQSl48cXg80Uy7Rhr1ujs\np+ZFM+Vaq6sdIzg4hNNN6MvL0+s7GGbjl/bAWxWvrCyzNoTfC58PESg0u2m+KJT55Euw2ustVWq7\nR7b+CyHKne2RZtFG5JNtsDt8ukLWWTee2fiM+u1Tv/XNIJvvOZc9dllaRlpqUYNvHKxCtaGM/Rmf\nI3+rkFYlEldlZUqJ+GesdWeudbcR0fv8sqm6M3HOmuWfkTYfuDNzglKjR/tnFe3TR2fCLSvT15s1\nS4/NZNb1u157Mobmg3yznQZl7M3WV1vHXupMp0H9l3qu24rulPm1M0Exss5atA2xTTHG1o1NZZt1\nJwPMBiNtxDbFmPTAJOasnpPRZtMnm9IKJgXiOz+DA+9jwEf/w6ihQ1h083dpbgr72i1CITj3XF2q\ndeFCvU8ppxiSO/GgVwXT2Ni+zJzhsGOA91OT1dc7kkciAZMn6/3mnCDjaFcxGLvH4VckqZgun6U2\niAb131Xm2otSz0dnq7g6+vqWWZQA3lTl7mSAuWAYjclcC06CQSA/RmFQ9SzvD17J/SpBuOYoTvp8\nHov++eWUx5Nxra2ocKKl77033dBtvk3iQT9DbNADm4/H0DnnOPmqWlszCU00mu4CnEjklzOqlISi\nEPWGGUdQkaRsLp+gt9euzR7IWCp1mxdBTKzYc12oijGoTSnVXZ2t4uqM61tmUQKYVOVGsogOieZ9\nbn1DPU2tTWlJBQWhIlxBS7wlkFkYhhIixKH9D6UiVMF+fffj/tfu13XABzzN6DF/4bIpV6ReHnDs\nD2vX6up6bq8oN5qbddtbbw02xLrTlUN+D3NNjeNFVVaWSWgiEV0J0Pjkl5XpMebKGVVsQuEmToWs\npM04vC7F5j6DvKDM3OZKvuhHNIx9qRTwM7gWc67zIYKdlYrejc6Wpjrj+pZZlAB+Edv5IjokSigU\nIuGyOCdI8JMjfsLq91bz+FuPp7ymwhImoXRdDLfksXrLairCFZw78lweeeORNKYVqUqqPzbFqHtg\nPXP+ciYtzeEM91kv3NKFnyH2iy+0isi43Z54Yv4Ps1eS8a4aq6u1mgz09aEwg3F74SVOM2cWtpI2\n4/DLdRXkBWWcDnIlX+xsomVQrLnO535Kcc/5ptA36OwYiM64vmUWJYLX28nAXaPb73ikKsItJ9zC\nBQ9ckCZdrH5vNbXRWpZuXJqqjTFs72GBAXvN8WZWvbcqrfjS2n+vpb6hnspdKpn28DR2LLkI1aTA\nQ6yD0NKSmbbCbXMw34mETiFSVua45VZW+vWo+zPR46bi39y56YTZLb24mVU25KvPzaddsew0QeP2\nY76hkJ5byB6g2NlEq9jI535KofbyeuXliirv7BiIzri+ZRYdiHwN3xMPn8iGjzekiieBTn1uJJYZ\ny2Zw/+v3BzIKgy2fbaFuTR1zVs+hOd6MQiEI4VBSIhnyBIR/DnEBFU5JFkESRiKRSfS/+lX/jLdK\nwfHHO3mn/vd/tYTgfai9Lz6kE+Z8gvq8yFdNkW87P+JUjJV0rsR3Rq2XbbXb2USr2Mjnfop9z2Yx\nkE8Kfe84OnO+O/r6lll0IAoxfF837jqG7jk0LdW5wQPrH9B2iCwISziVoNAtoZh6GiEJQdWzcNZY\nwm+P45KxP6KfGsrWrfC73wX3+6tfwV13wV57wYMPamnDQMQJ7isrS081YlKh+/nle7PVuiULb1Bf\ntshm0+fGjfkxmELUGWclw0W9tcnbikK8oMx9+d1rsRhXUL8dfT7kdz/FlC4Nk843hX4xUGi+tK6w\nGLDMogNRqOHbm+ocNMNx2zNChKjcpZIPtn+Q2jdw94F8Za+vsPTtpb51vhWKIwcdyVNvPwVVz5Ko\neo7VA5ZTG62l7reRAHWU3vnOO/DOO8Gl2MvK4IQTYNGiTInjqaf0gw/ZjbJeghkUQe4NCnNX6itL\nPtnZXvp81Bleom5sJu1FNi+oIAN2OKy9x0aM8J+HYhD69njYdLaHUFvH4rUbFbOmfHvH1pXm1DKL\nDkR7DN8GlbtUEg6FUQlFOBRO1cYYWzeWptYmEiTY/OlmNn+6OXVOiFCaEVwQ9uq9V+q4QvHom4/y\nRMMTHLz5KeAIdMkSjeHf/ITVa3fA9n3S9nth7A7btztJ4txYtw7GjIGzz86+ovcSTD+dvvc8N/EF\nXUp20KD2u1aWyoCcr97dfX0Tp2FSzLvVJdB+otLee+0oY3tb7ExdSaVUyNhyte1IqcMyiw5GkOE7\nH8Q2xZj28DTiiXiKURjJY3HNYqY9PM038aBCcdi+h/Hi+y+i0Ezmox0fZbRrTbSyZt+LIbwE4uWI\nCJdeGqLfiX9i9cQD4dWTUz16mUYopJlFWZlWHRnDtRfNzfq7LQbKbATWeyxfdZEhErGYv3rL3W84\nrFVcsVj7X8x89e5+cRqmFom7EmIxCHV7DccdYWxvj52pWNdvrwRSyNiyte1oqcMyi24EY/NIkECU\n6DxQSUSqIoFZahUqzRjemmjVKig/VD0LPxoDDVFk/6WcPPV3rP13JeGjbiD++gmQKMePUYTDTsR3\ndbVWRZlocDdEtITx3e/qyn2F2ACyEVivKsGstvPpO9tLZ/o1cRK33ZYegd0e5Kub94vTcMe0mD7a\nSxzbazh2j3XLFv2dK6CwUPgxRbM/yCW5WNdui9eUHwoZW7a2HSXJGVhm0Y0QZPMw7rgjvjyCsIRz\nGr9zoupZqHoWhVC3po65a+aiBjYRPmccR348i6X/GpZm1xg0CDZtSnd/feih9C6Nh1UioW0XoF80\nYwMoNNle0DHIjInIRaxyvXSRiN7X2qrbNDXpjLjZsuIWYwXqvn4kkr0mebGIYzHUMXfe6UiQkD2g\nsNDxelfalZX+jL4U6pm2ek35oZB5Dmrb0W7Tlll0IwSVZzXuuOFQmCOrjmTZpmXtZxhAWaiMLZ9v\nSaUeCVfFOK7mXvhsX556ZC+MhDFoEGzanEBCUFaugHBgwSU32qpvdxMCSCcKbsLf1KQjv4OKNRkU\n4ttvVpWPP669tLJFGLd3BepFNpVZV/GYqa9P95ADf6LaVhWKlyl6GX1dXaYUlk/fpiYLZEq7bsbv\nfgaM919lZed4kZVCesoGyyy6Gbw2D7c7bjweZ+nGpdotNgD9evVLK5YUBEGIDIxw/2v3pwzjZaEy\ntjZtZVnVyRB+DOLlhMuE2LMQb02AQOs3bqTv4FNRamhaf6FQZhp0pfTK/+GH8xenvd5BIulEwU34\ng4o1ZcxpHi9dJKLH+rvf6fog2fos5go02/2be4a2667bUwLX7/xoVKeo90oWXibcHhWKd6VtCDjo\nKpFGHQr59R2L6bGZMc+ZA0uWOPPhJ6lu3aqrRsbjutSw9zksRIJqj+2hIw3zlll0cxjVlFn9K1RW\nqSIbowgRQkRIqAQhCfH0pqfTCjJ9c8A3dZGmgQlt11hTw64fRdn25tcBHa4df3oa1z8jaYxBBL7/\nfe1Oa15qg3//W3/KyjTxzyVOu4mMuYY7Od8VV6TrzRctyszH5Idchu5YzMnV5K4kmI8UUky//SCd\nfVsIrx8hzFV0ySvV+RG5+npnlT5ihL8arlgqFMPEp0511IQGbgeAoPuvr9dOC25pyD2H3vlubNTP\n2PTpjkeaOdebJNJcIxcj6GjbQ1thmUU3h1FNmUjt1kQrIkJrwsd31QOvlHFo/0PpW9GXZZuWpXJO\nGQiSxjwAWH0W21or0OqopIeUCpGIp0s2ZWVw2WX6Y1QEXqax7766LnO2FW1sU4yN/dZTVn4mEE5J\nFiaLrju63AT2hcPajTYfQ3q2F9stLYRCmSVfvavHUvntBxHZtnhseYmUO1renZY+aH7OOiu/WBE/\ntFeF4p7vxkb/bMTjx1imPDsAACAASURBVOtnLtdq3sTlGKLvnteg+fZ6ybkli0IlqI62PbQVlln0\nABjVVM1hNSl7xtp/r2XBugUM//Jwtu3Yxh2r7qAlka5M3ta8LS39+eotwelDEiRIi+9riEK8Av0I\ntSISAlE6GE5J6sULh9MzwxpD7eWXO4ZugDPP9M+WmtIXH7SWCxddQfMLpxPa/36+f/gRXDa5P2vX\nOhlpp03T5yxYkO5qOmhQfsSovt6RBrx1MrwvdG2t3u/OEOtlMqVYHQYRWbfH1OzZcPvtOluvqcnu\nB7cEJALDhzsuz97EkaD7N/Oarwu0m6ivXav/m+HDoV8/vS9XhtygKolBiR2NI4VS8Mgjmln49WlK\n+Rrp9PzznePuew6ab+9+8Gd8+TCCYjLOUkokonJlj+smGDVqlFqxYkVnD6PLYtIDk5i1claatGDU\nTm0yhm86AuYuhng5hFs45sf/YtiuRzMiso1V761i3eJR7GjZwblnlzPx5OqM02MxzTBeeQUGDIAj\njsjMKAsOUSDUSjweh0QFAOHyOH+6uYw//MEVKS5xwmEhEQ+lrTJnzcokmn4v2OzZ8OMf+5/nNYC6\nx+bOECui+7j11sKntL0v/fTp8POfOyvs8nJ48snsfc2e7TDbXr20S7OpaeKWoCBdr9+rl9brQ/CY\n3UTdrLwNRKB37+zeasaW0NKi78Uw70mTnBoo4TD8+teOsXv5cmf85pibIZXK+cCLbE4Y2dq2hVG0\nN9ZCRFYqpUblamcli50ENYfVMHfN3FSUd0hC9Ar34rtf/S4LX00PiHBLG4FI5pWiIUpo/6Us2+dZ\nnhFBvai0CqtaJy1c81IF1YcvSTPKew2KH3wAq1dr42QopIlKWXmcw45dQ1PzCBJxQRJhUCGMB1a8\nJcykSWZlaNLmQrw1fdyhkCZGbgS9YI2NjiHefZ5fyg/3KjsUcnJiKaXvAwqLIfES7Xw9eNxEprIy\nXRXT2prbxdeocIwRvn9/TcS9Xl9nneXYA0R0FL57le03ttpaZ468UCq3t1pdnfOMGE+ntWt1rIvp\n09RAMefV1jrHQqFMlVwudWIx4GZIoZCW8IIkqPYS+460dwS7zVj0KBjbxjXfvoZZJ87imjHXsLhm\nMZf9x2X0KetDiBBhCXPM4GOyelOloepZOPpaEgO1q25ropW4iqcYjULRFG9Ky54L6UTAjZYW/YJp\nt9cEy99dTiL0BaGworxcCJfpXkFpCSJlPjFBgiH9EX3ArBpN8kGTlyrISByN6vbhsP52rwq97pl3\n3pm+gh8/3mEYLS165Tt2rHNNL4whPRbTnylT9HmJhCawtbXB55rzx46Fq65yrtPY6IwB9Pgefzxz\nHO5rGzWJcS6oqdEEa9w4h3G6VU7hsGYm2XJkmbE99lhw2vtQSH9MGhP3/2CwZUvm9pQpwUzLqNAM\nEgnNWNz3777fXr2KzyjMOAyzbW3VDDHovwx6FvOF9/8rpb3DShY7EYJSjbhjN+rW1AVHd7cR9752\nL7NXzs5IiujATVGSxnJJAAo57iLGffkMJhweZdWqEFu26NXviBGacGjVhnKd28rooz/l3DP3TKX3\nNvYEdyI+Pz2y1zDtZiJl5XESCsrKwR1HIqJTsffv7/TpVz7VDT9Dsdt7zBB5vziObJl1o1FHKjD9\neN12/Vayfvry2tr0bL81NdmDAt0wBDCo3vsZZ8DBB2faetyELhZLD+wsL9dz7J6nsrJ0puV1m/bm\nzzJ2pGLEJmRTHUWj6a7i8Xjwit/PplGIWqojYy0ss7BIYyKmUJLBkD2G8N5n79ESb0FEqN63OsMQ\nLgiD9hjE25+87du/QjH5wcksWr+I/rv1Z8R3JxO+rZp43OSYUtB7K+zY03VSGaw4HxVK8KWJ/+bC\nCzN119XVMGMGvPaasP6NBPF4gooKYea1e6ZemunToalZkYgL8bhi1izJqis3v9MMqH9bi6q5EDYc\niRq6jBHfvYmKudUpBmTcc8NhOOmk7O66XuPqjh16xdyrV3Yib87NllnXy+z8CLHfSvaKKzKJjOmr\nri59Xz7EyOs67K2P8sEHznhNRmGzPXu2NoLvsotj4xCB731P/y4vz15S16SS92bmdf8Pue6jvXER\nkUh6KWC3lOqFn6E831os7jF2hKutZRYWaag5rCbNc+qdT9/hoshF3Bi7kdZEq6/HlIgEMgqDuIqz\n8DVtGykP3cGX//tKNs+7QjMFBHb0dffo2CcSirv+vB/GNbe5WXH55cKwYaSkjOOPJ03qcGOrbCCR\nGIxWUQlKaQK9alWwEdrrFbVgUSPxAU+j9nuSuIRprHyAxYurUyv8225z1B+jR2sPHD9i4zWugiai\nixbBH/9IhiTkJnKxGJx3ni5fa+CXWdcdL+JXg6NQN03jglxIPiwv01q0SBfBMit9r9QU5GBgoJSu\nnWISKE6c6B9l7bUrtWXFXYy4CKMSvPnm/Nym3XOQLbNyIWMsBSyzsEjB5JiKVEVSqqiWRAv1b9Vn\nxF0YCJIee5EHWhItbP7aL+HwL8GKiUCyfmiaOslsZ6ZEf+oplXS7zTwmou0JRoX0+18MBhV2tVAo\nJcyZExwwVlmZHn09fP8qlsYr/GuZx9KLNQWt9LwShXu13drqjKO+PlPqicXgmGPSvYlCoWADupeY\njBjhHwMSRMTcq/v2RlnHYjrCOR530mMERbQvWODfl/GkMvPl5wrtR8Dbor8PYgTulB8bNwbXS2kv\nIc+HmXekUdsNyywsANJyTHnx/ufvB3pHBe0fvMdgNm/bnN0t97A6WH0WtFbgMAyAOIhyEXkT9OdS\nWwXU1TC2gro6TVQSrWFPWz3e5hbF5MmS8sQx6R0g0yuqnxqqAx8fWA8N34LNg6FKtw2yc2STKEIh\nTWzcgVzuhHjGtmJQX59ZH2TECP3tF23uJiZBHkdBxMW7ujfjNF5H+cIQ1+XLHQO5cWcFf0I4YQI8\n+mj6Pr+58huHm8iGw/q6V19dWH4obz/mf5k0ScecGAcEMya/YM+2EvKgypH52jk6ApZZWADpOaaM\nZ5Qh9G4VU1jCjP/aeN799F2ef/f5QGaxedvm/N1v62vhzbFaJSVx+MrjcNA/4aGbU3EVSAtaPSVo\nxuI1iruheODJd/lBzQ7Ky4fS3OwdRxyVgHhSNeUt+Wq8oozrY2UlsDnC3EsiNDXB7TdonbRb3751\nK1x5pSaIvXunZz91SxTe2AWTluSOO5w2psjR3Llayti4URNAt6dPNBqcXddr6A3Kj+UXC+BNK2+Y\nVEuLVo+de25woJ979W1UaV4j95FHwnHH+RPCiRO1ysqMwczVhAlabbhunVYhrl2budpvbNQSTH29\nbmtiLaAwou1n9/G6/5r/yE/C8WM2boaeT5Cht3JktjGW2qjthmUWFgAZ6c9nHjeTBesW8Nibj6UR\n/YRKMHrAaKJDoqnqfO4qfAZKKcTlxzmk3xAG7TEo09Oq6lmI1sLbR0NcQbhFb1c9C/u+BGuS7i6H\nJS2tDVHo8yGsPRPePoZMlZUex+ZX92XGVc2MPvUJXnp0NNu37orxltLf6QzmySf1CtJ413z3u3Df\nfU6iuHPOcYh5IgGTJ0MonEilGlEJx93YRH8DjPl2nKYmzeRCISEU0oxl7Vpgn7XcdsdBxFsc6ccd\ngbxjhx4TOLEcSmkj77Zt2aWHmTOdaOmbbvK3gfglZBR/gY1EQq/WlyfLpfgFOfoFKXoxbFh2QnjZ\nZTry2ox3woRMgr18uU7meNNNmWo9rzFdxD8FSjYjtpG8jP3Ay/BCoewr+iAju1/uLUhfTBipOBcj\n6Cijths2gtsiBWOzcKc/j86NpqmmeoV7seSsJanjdWvqMlKJCEJIQiildJqQJI4ZfAzLNgakT990\nhGYEQ55Eqp7NLpVsOgLmPOkqxKRA4oz+z6d4Y11fPnp5eNJwbhhDyHVyHC2Z+KmzdD8hCZNIpFPN\nY46BZ55xq4OMWiyE19YSCsHTT8OMP21h4V+/lLxeK4MPaOLt9bumzg8ddB+JV07ErYIbPRrWrHFU\nHqk5dQX9mbxHDz7oEHjTNhTShNxtR/Hz/Jo+XcdoGFuCt+/t2zWBfsrD2wG++tV0ScxIT48/7khP\nRhIyfScS6Z5s2eCWGBYscPr1juGtt/yrMZr5Ki/XRbgefNDJH/aDH8Duu+eXwtzLUE84Qe8PKtrl\nDcY78URt2Dfz8I1vwMqVetvkLJs7N53hhcNOITG/sZUitYeN4LYoGN44jEhVhPqz6qlbU8eWz7bQ\nf7f+1BxWk2oTqYpQt6YulbRQEI4edDSxzTFaE60ZBD9r/Eay4JJOQRIKNKgDmqkkkt5SSQLP9yax\nfNjtsPsR8OpizSdw2zwAErDLh7B9X1dnCRxJQ0CFSfhcdulSTWjmz3cTKDfDcU469FDt0nvf/XuT\nYiai+KJlO7Brql1i275akkomXgyFhP32g+gpG/jXv4Q3VuyfVLs5xM+46Br3XCOFpPpM6JTv7hxO\nq1ZplYkbXh2/2yZgku/FYrpmujfp44YNmii606O7o9l79dLSmEnhHaTfD0IkQirnl9uw7capp2rJ\nwlzXSBTGnnDOOU6kvTsr7F13pfeTTUWVzRXZ6zQA6V50iYRWhYVCzrVXrUo3jJvruz3jlHIWCt6x\ntSXKv5iwzMIiK7LVDI9tinHn6jtTRL0iXMGwLw1j2aZlue0VARARlNJ1wkf29y8Ty5B6KGvWDCGU\ngBOmwKjb9bGUHeSXsGEc+hF3rfy37538rUBak0Z0r+4lUxejFPz971rn7nhitabaSkgxqKqMjRsV\nq1fr9CVpEowq48NNlWl9hr+ylMTIOajYT5DGg0gkYOFCBfcNhK89CDIAVAUgiDjutRs3asIRpBRo\naEjfDlpFu11rwT9Z3pIlev/LL8Pf/uYQNLeqzaRtNzCSjCGaQfr9IJiIdq9R3+Dkk+G662DoUIeh\nhMNw8cVOgkJzLXeciBcmhbnXruCGVyWVzWnAG4xn4mXcVSLPPddxdwYtWbhVbEa686ZX986Je/47\nynZhmYVFm1HfUE88oZfZgnD28LNTOahM5b4jBhzBi++/mJYKXRDKw+Wc8NUTeL3xddZ9uC51zKio\nEomEP6OAtLxUDKnX297j0au1TaMV7Vm192vw4YEp9VTfYSvYtmc9LPs/MqUDfxfelpYES5929ksI\nFHFIhFEJ2LgpgVLiOc/pKxEX9toLPvooea/LLiIUVqiWcFoyeBIV8OpJrnM1kVq1ShP2GTP87QF+\nMOk0ID2dhNe1dtUq//PdxNJtDwiHHULmJpCJhFYdTZjQ9sjk+vp09ZIJQHRLPrGYvo7JkKuUZhRe\ne0hNTXocDDjqnvPOy7Qr5FNNERyJxxsdfsstuHKWaZSVOYzFK13lW1+9vj5TLRlUUrZUsMzCos3w\nGsWNisqdPgQgOjeaOkcQTjrwJC77j8tSdo9j/nJMXvU3QoQIhUK6bVJt5QdBUF6GAsksudqIvst3\nZrDtpa+jbRhu6cMwDj/DeQKVcKQFlTD2kHByG/CopMyITF+GUWimEE4SALdrsEE4OTZS5z75pI4P\n8curBZnGXS/KyjSBcRtUm5q0sd4QU1MlDtKz7Ho9xNzR0xdfrBmYwWOPabWdm+i5U8l7VSi5EiJe\ncomWJsx41q5NN3pnMzhHInosOtIf1q93bAYjRmiGk49x2R3Rfscdznx5XYqN4d99rzNn5mbGuVKp\nRKP6Wua/D4V0nx0Zb2GZhUWb4VcT3Ow3v6cvnZ6SPkDHZSxav4jL/sMpNHDiASemoruDcGb1mexe\nsbsu8EQwYxGEP5/4ZwB+u/S3vF11rXPQxTy27PksDDnCUWelDOFug7UzaiQOg56Gt7+VulIQQ4EE\n9NsIWweRoQZLtff0jwAJKvZ6l+aP93W5CDvtX33VywycjVBIckobxx+vbQlugmOS+Rm4EyWadoaB\neNNSGNVNv36Z6hd3lHyQCsXYRbyrY2+cy7Zt6atv4w7sVt1ceGF2Q/A99zhGfaNGmzw5XVUETkZb\nw9AgvR/3Ct+byNBg4kTHrbqyUs+DidMIqjPiDmL0U4lFItoOY1KzmzF0ZLyFZRYW7YLXpuH1qPKW\nfQVSmWj779Y/Vd0vRCjNcyokIc445Aw++PwDJgybQPU+1dTW12YUcHIjJCF++h8/pXF7I5W7VPLe\nZ++lHZeq59hv2Cbe+fQdvcMtfbzzDXj1lFTb3b/0MZ9+0BcnpkNB9V2wKeLEfoRaCGGkAzdDEPhk\nEOEyIR5vTRJ+45GVRdUlcZo/2Vu31/64yetrI7wmjm7GFIf+a5FwC4cO/Dqrn9sjNf7+/dOztpaX\n629HKlEkEoqhB25nw6u7paX83rIlXXppanJSnV9xRabXz8UXO1KHm2HMmuWkZHEzMrcKyy+Izayi\nTQ4oI025U4+7pSiltDF9aGQtjZUPUNl4ItN+UJ2hnolG0+NV3ExSqfRtE3vj9irz1ng3aiU/GELv\nNv6DnoepUzUzKTSNR01N+ngKSe5YDJSUWYj8//bOPUyK8kz0v7e6h0FUboNyneGyAkpCYJRFRtSg\noEFQ5FlysjHuQhSdmCMJiAkb92x2PXGfwzmuBowSIt4CWY2bhCwoAl6ACUSHmwKiXARh5A46CIjI\nTHfVd/74qqqrarqnZ2CGufD9nmceuuv6VVXzvfXeZSTwBPoX/6xS6v9G1t8H3I/Wt08CxUqpLSKS\nB/wJ+Fvgt0qpSfU5TkPdEMwCbxFrwbLxy3ztY8rSKSEfxKsfvRqKeJKIU1kpxZ+3/pll4/Xr3fB5\nw32B45mjbMcO7T+mzxieXPMklXalburk2KFjxq04f9v1b9m/bX9qoWfKOl4AViU4cYgl+OLqabB4\nViDqSuCrDnDXMD/3o23Ldpwo/XuqRkXFQCktKK56Fk52DAki2n8ERy8jpMVYNvR5FbaPQf+3tF1h\nY6PrZAEhjcQd05GvoZTF5iOpaKl4jk3B4E0cenWg3lcUEyemd+Lv2HIhQQHmKMVrr8WI8uabsHy5\nfisuL09NgI4Djz+uw22PflXOyjfaucJRC7cFC/S4vAKAYjkU/Y/VPDqrJ53mdc5YATgYzptIpCZb\nkVS01WOPBSu7Ku7/9R9xui+HkkGoiq+jHKniUwi+nVeHUjqQIWiiKilJ9XgPTtBeeZRx48KJmp4g\njJ4rUxXaaCfC6DbpkvFKS3XAg2eia5JmKBGJAbOAm4B9wDoReUUptSWw2UtKqd+4248BfgmMBE4D\nPwe+7v4ZmgDBLPBKu9KvM1WUX8TMkTMZNncYCTuhczAiiXxVkvpQ/jEAP/nPW9cnrw9bP93qbx+3\n4nS6qFMqC11ZWGL5DvOYxHig6AFOnD5BjpWT0lD8jn8twErCoDk6AdATIotn6Qk3Vplyprvrju0d\nAqvHAC3wcilSmog7mbfZo4+3Y5TfVZBey+Hzv3EnVRu6rkNGPqjHu3Mk2IqcFsIDD5e5IbQ9XIFh\n4wsjAHH0chXHsW1uu+MgnbpW8vyxCayzExB70z9n68FvMPbysTzzjINtBwVH0DRmYSdtHFH+8m7d\nYN8+PYElk9p5e8cd4QnQtpXOupbWrtwJC6ZEAu67D2j9Cc+U/oGVv5/iBhroPiWjRwMXHaLTNW9A\nt96U/GdRKCzYiw7ych06ddI+DC8ayrYhlpMk2fIw6rdvuOVj8GtRHTuWMu2MH69NQeF8mei90Of1\nOjB6eSPBxL50xQ/feEMLRdtRxOJJvnPXYZAuiAWWpUDFfBNX1GRUWhrukWJZ4fMFzWqeEz/aRMwz\nF9aXwKhPzWIwsFMptQtARF4Gbgd8YaGUOhHY3n/FUUp9CfxVRC6rx/EZ6oCg2Snq8PYc3JDK2Sgp\nK+FYxTFmlM4A9CQ/sONA1h9c7xckjImeDIPHsCwLx32NVCi2fJp654hJjKdGPaW3c3M0LMtiatFU\nTpzWP7HWLVszo3SGFiRipboBlg3TgkLFESXQZp92joMOx+34QfVRV54Z64LPtOZxujWUPhgSMJK/\nBuuum7B3X5tytm+ckMpYH/kAKn81DuIfz+n5V55IrqPyG1fBhtSkz8jJcOhKfYxO78HSJyAJCodF\nJx/hnusUyfdWoZQTGttjL3Vk/+B22GoI4CUzegRMY1YSKxYDJ06LFjq3JOi8dhzFiy8FgwAC5jQV\njxwvdY7W3T/m1WP/B/vt2fiVhtGCZOFChYq3xmo5h7lH3+NHsc3A3/i5JdGKvBWVimeeTzD03j8w\n+jvD6XRxZwq/tY37f92RpNcXXpTv23j00VT+x8yZ6bQKLbQHDtvDyS8VO9f1IJjIOWiQTpR85hk9\noXs5HEVFqa6IHomEvjeObfHi0x21KdFyoOgJbiv4Bzpd3Nk3XQV9E9EIMNvWgsgr+RIs0f/kk6kQ\n6kTAKlvfTu76FBZdgb2B7/uAq6Mbicj9wFT069mNtTmBiBQDxQAF0awjQ72TzuyUzuHt4X0fPm84\ntmNjWRZP3vIk/S/tHzrOzJEzKT9VHjrGrFGzmLR4UtpkP4CPP/+YJ9c86WsMSSfJzNUzKZlQAsB1\nL1znaxlKBbSaHiV6UrcVykrQ/vJNHA0euJqoq4zrL38F65MbcbqvcNcJ5JdidXsn5ZdJE/qrUP7x\nbNzJI/+d6sOEAV77NTgxkot+yZK/uQuntZMa2+Gvw+JZOI7Fi295vpN0yYSulnPRYYaPLadXq0IO\nHYp2bnP3UZHv/ufUcS5qV8HJzy8ABMtSPL7sBWx1aTiZ0t1eKYFkDs7u66gAHp/b3Z04FYmkDZdu\n5aHi/qHeJNgWK2d/B5RFvEUlvSuW8fXcW9gcB+UoLNEO/6Cv4HSF4qFffI7ttEtzDyw2Jf6AaqGA\nfwqMD1q2FJLJVBhysG7Xe+9VfRze8VJl9pPYb09m4dtxWuaGw3WjDbmCSX2gv8+cmdIeKiu1Yx5S\nIcWewKhvJ3eDO7iVUrOAWSLyPeBfgAm12HcOMAd0uY/6GaEhE+nMTg9d91DGJL7gPg4OooTyU+UZ\no6qCFF9VTP9L+zNv0zxe2PgClXalP+Hbyuaxdx4jWrqm0q70mzkFS4wI4mtA0RDbo5dkFgwWFn07\n9GX7Z9tDzvgq5K9GCtbRL68vWz/TGkyVEidZhJCnXdnKrn7bQ1em3tRti09WXg+3vqzX7R3i+l0C\nZVFCqMhnC07k88a8fG2Sc6JVfyGc8R4N9wXEJqeF4vT1P4HXHtMaUdzG7u6GFsWSYOt9ewzYz8Ft\n3UkkHRxJID1XYn0yHDuZOq9jC/f/+o/0v+okw4YVIZbt7i/u+GIkK5Js/e2PAEUsDsX3Cq1ba6e3\nZ8oS0aHNR/d7QQDRUGmFevvBgDKUEoJvvx2OGvN8CvPng6OCAkfcj+699O+PHqvC4vRpeOKJlG/C\nthW/eRpycx2+/f2DLHm1FUf3t/PPr5SOggsSHMegQdClS+YSJHWJlX2TM2Y/fiFnALq5yzLxMjC2\nHsdjqGM8s1NMYlXMTrXdpyi/iIeu08bY6aumU7q3atPiovwiZt86mxUTVvCDq37gT6hAlcKF1TGm\n7xhWTFjBTb1u0o51t5d4dZO3IOTGc/lm92/WKDvdVjbbPtuWdVvPsR908AvCvVfey4PXPFij68lI\n2bDIm3zUdBSYzKKmo6Bj39/Ghq5rIVYBktABAblHU/uJQ7+rD1A47SfYhb/RQvjGf8X5xxuq3ttY\nkoJxs5k8ewHWjQ8jE24iVrCWqXdcSU5OQBBZSZKfd2HKM//F5sObA5Fl3p8bMeb6buykcOiLg8x4\nIkEi6SCWzZ337aPL5Qe8EwPQNv8gd963H3Hb9+rri5HK6E/dG9sOm3tAT9THTh/FsW13DNp5H4vb\n2j8R0rpSIdmeLyQV2QYooeI0vPh0J47u95qAKf886ZzxXj2w9et14cX6FhRQv5rFOqC3iPREC4nv\nAt8LbiAivZVSO9yvo4EdGJoMNdEIarNPpmiqdMcoyi+isHMhkxZPwlY2cStOvw792Hg41ckvbsUZ\nP0AbiD1txBKLA18cYPORzTw87GFW7VkVCusFPVl3vKgjR7484h/n7oF3+8d65r1nQpqChUV+m3wq\n7AoOn0z1/qhW+wByrBweKHqAx995PHS8uBWnsHMh87dk6AYUZMA82HBXyqfhVecFUmVRvIkfqpid\nLMedgBWonMj6wD2xHJRVCSMfgMP9YfWPdUZ8RfvUca0EW772HYi5giGqEZUN09FmxMBWrJw7jL/e\n8O9wbSlKOSgV48TpEwy5bQsrP9yuj7ljFLx7D2s3VrJu4O9QyX5priU1XhEdaWdX3g3KwrGTvPTm\nh3Rq1wbo4m99bN8l/MEegRrdV5vxqgiJIFV9MI4Da1e2C2/VZwHOtY8T+3QA8tpTOLYX7hzV6lTo\nWCmzlbdtkvZdT/D5gfbpBYWluDjvBCc+bY0TifiqT+pNWCilkiIyCXgdLc6fV0p9KCK/ANYrpV4B\nJonICCABfE7ABCUiZUBroIWIjAVujkRSGRoB1dWOqu0+maKpMhE1TW06vMlfJwj3FN7j779iwgoe\nfftRFmxfwNoDa1l7YC1jLx/rl2J/a/dboY5/R786ypg+YwBCBRTTaTwKxZ7je8iJ5XD75bezYFv1\nCYaDuwzmys5XMn7AeErKSqp0Gkw4Cd8/k5X81fD9G9L7NFwTW+5bs6n4ZEB4P0lCrJIr/nEOhz9N\n0r5yIDvfuIFwrxBXoFy+kK5XHKLDFR+w6YiFWvxkKtfEn/QcKHyhev+O5x/yijx+PAJn941I0Qyk\n5Qnkos95bun3SFQCsR4wcK4WLiqufUo4ocKLmqC5BxCF3aLcjRTT0Wlq13AOiqcBpDSmxK6hcN10\nve9rswMCwwkcO2p2C/4bvH4FX3RBdXsHp9tqir81BN4fz2+edlK+i5AmR2B/FV4visu+8SnrDrQN\nnBv/PEolOfHZBf73eFzOSQOkevVZKKUWA4sjy/418HlyNfv2qL+RGRoj1UVTZaIov4iSspKQ41sQ\nWsZb+pqAt92peW0NrgAAGTxJREFUxKnQvgu2LeD1na8zc+TMkIbhhe16WeWWWMzdNNfXiKK+Ee+8\nlXYlKK0ZZJroc6wcZo6cCWjhmNcqLxWZFaC65EMgnMRYnU8jfzUVI34Iz68MRCElodcyBn9vCWtj\nT0B3OLp3CFh/iZitbIhXwND/YF/+avYBbPy1Kygib/exyrBW49Lt4m7s+2JfapzRIo+OQr09DVDY\nlmtyUZaOFPOO60WNDZin/zaN56LKy6jYNpxk0kFE+zZAUDY6Gs2x3Mtw9HUrL7kRfV2xSqyeK/US\nrwilFyZtJfX+Khg1prL8Cxy8EvYOwclfTeHg0xT/ELbsPsbK1z3tK2oCzICKs3ZJH1Ll9COajVUJ\ndiv/eL0Gb4duR4H6VS0a3MFtMHiciVkLwkImZsVCJqPpq6b7xxrXbxxv7Ar37axIVlB+qly3TU3j\nPAfd8MnTdIb1GEZuPJeKZIX2W0a0gk4XdWJq0VQefTsVb3pn/zvZUb6DLq27+GVOgua2a7tfW335\n9gC+j0MEUVWFTFryV8Po/xnKGbl+/ApOdy6FA4FtRt0fnjALXwjnnFRBAQ5cvhAZ+ngq5DjA4S8P\nh/Na8lfDFfPh428RjUZSfm14OywcolpT/mpOAld8dTd9T/6APt3yePRfO0MyB92O1wLiWrOwbFdG\nWPgTrygYORmn29upgQ56Fjp+SLej/8C+9r+DpTNg/9Wp8eUehz6vwKlLodUR/W+njVyyfyKflnXQ\n2zmW3q/zBpZceIz+l5bSfsQqeHNyRBNz75t//VENxru3wVIxpD7brUL3eOtXKxg+b2pGs21dYYSF\noVFxpmatqJBJ5/8ovqqYJTuXhMxEDg5LP17KnuN7GD9gPIWdC3nuvefYcGgDtmPj4GCJ5Ws6wXPl\ntcrjR0t+5DeHyrFyfNOS9+YvCBe3uJg1967xzzl91fSQua19y/ahNraZiEkMhcJRDo7Sb9Se8IgK\nrSpEckZK5V2cg06126QVEkEfiThaCA16NqPISjgJruhwBbmx3JQ/6asOZC7g6H4fOTkkHNKx9YLn\n2XrB8/Rr3Q/Gt9H90S/4TOee+Dksbl7Ku/cQzO/QY0ghCC17buTnP/w+9y9eT7LwOVdYuONJtILB\ns0NjscTi5rYX8+KDd6X6yO+/GvZfzYINFbz20bdIdl0Fd/03vP1T2H6b9g+JK7AcnV1PzyWw+ya8\nRMUUQZNV1G/iBQAkYMDcGpltzxYjLAzNgqiQyeT/mHbNNBZ9tChkJlr5yUpWfrKS5zY8hyW6qm3M\nilF8VTGFnQur5HwEz+X5TABfm1m7P1XWRKF4YeMLoaZRUU1oyc4lKKWIW3Guyb+GVZ+sQqEQJNTf\nY2rRVJ5c8yQVyQptglL4y2eUzqjWdHVJq0soL1iL4052iUyypSZ5JZl8JBnY+tlWYhJLmdt6/AXJ\nSaDctupaE/D8EO7bdWQyr44tn27RcZf5rj+p4we03HcLp7stSY2v03tVs/EDeJWQQRe2fEU9j7Pz\nFtg2Vo9Nib7mwPU6yuHlY5Nhwovwp9/B8V74k7ndgsSua6DrSr3Pd8elukEeL4D196K1HwW7bw5c\nuw0dtkL55a7pLBpwEPneZxGSvwaRGHmtwv1S6pr6DJ01GBqM6kJ0Z42aRY6VQ7QeVcJJ+ALGqysV\nFRRBvOz18QPGM/vW2fq8c4exYPuCUCRU0klSUlZC6d5Spq+aDsCy8ct45IZHuHvg3SSdJA4OSin6\ndehHy3hLt2Og+AmEjuOw8eBGZo6cyYheI1IlU5SibW5bJhZODIXhXl9wvT9Bt4i1YNwV4zKayXOs\nHHJjuXjtcG/udTM5Vk7mbXtswLruUWIF60LhyxYWPdr2qHJfAT9iLSYxLui5kcunToLhP4fRPwTx\nypiA/zZ9wWcZz399wfXpL8QjfzWni/4tLMgGPQt3fRNu/Ln2m0Q0hE4XdmLepnncMPcGFm5fqDW1\nof8B8dM6TDiWqCJgvOsSBI73CCx1NYfo9l6I9oB5OgrNu1aF1tIkoX1EPVa5OwSd3kkdstx2d/iY\nX3RB7b0a27GZsnRK2gCMusJoFoZmSXX+j2AUVbB/eI6VE9IsvIq4QT+Id5x0Zq6SshISdvjt3pus\n81rlVdn+oeseonRvqd8syusJ4oUEe057QXBweGv3W6zas8p3yEcDAYLHufMbd7Jm/xpdVBGhdcvW\nxKyYXzIlOL7RvUcDOuRUoVhetjytWcsSy6+vBXDoy0Ms3LYwdSwR9hzb44856E/JsXJ4atRTbDio\nGzt8UfkFWy8I1BVd9BtSkUp2Rs3CUQ79LunHnuN7KDtelv7hZyKD1iQIz214rmp1gGxNtly6H5tA\nWdSMds2j1QYdhPxDsUptLvuqA+nLwUTW/XaFNgNiwYFBMHcZasJwKgvW1aspyggLQ6MkWur8TKjO\n/+GtGz9gfBUzUklZCXuO7/HzKWzb5ul3n/YjorwIrKiZa1iPYeTEckI+jNG9R9Ppok5sOLghY5HF\nqFDzwmm9Cru92vVi17FdvqPdc8hH748XBjyu3zjKT5X7k1/CTvi5HNFJ3BKLJTuXhJ36rnkrVBYF\nPVHPKJ2BoxxiVixU9deryeWNuW+HvpxKnGLP8T0AfsLk3E1zU2Y0Fxn0nD5KNWai4HiDAv5MiN4D\nW9mZ/UVZzHIWFnvazYP4P0ASxLJod+NzHL32n6sfRDb/UHVC6vs36IiyXSP8sGIpu4EWPTfVKILw\nTDHCwtDoiL61p6sVVVekEyieg3zuprlVwmm9ST5dmK9XLNETPoWdC5mydIrvm4hbcXBIW2QxOIbo\nsX869Kf+cYLnCmo5XiRXwk6wvGw5U4um+seAVLkThdKOcqWwLItb+9zKq9tfDYUd58ZzmTlyJkt2\nLOGV7a/4E7tXxddRji6dHiAqWLZ+tjW03nZs5m+Z75d6Ce2Lyjh5xiTmCyFBuKz9ZVWOXVN6tOnB\nyMtGsuijRalw3rNEoVDd3oYJw5GyG7h9ZFsWffW/IEu8AZA15LnadV7bYFf7uHzQYZ4z0VCG843g\nW3tFsoJJiyfhKKfarO66xnvj9ybhpJOs4vuoSZdA7zpw4N4r76WgTUFWoZfu2P0v7V9t1nswC91x\ntAbw1KinKD9VztoDa0MRYH3y+vDN7t/0NaklO5bg2A5xK87Ewon+8ilLp1RpSBWTmB8cEHwbD2kg\naWbKuBVnXL9xrNqzytcsquSXRCbIfh36MXnI5JCgbBFrUeXYNSHHyuGlcS8BsHrf6jMSFp6Q9SLk\nQgIyfzVWwTrIvw17W3otJSYxhhYMpV+HfrRu2ZrH33lcR7W5ZsZaETGR7bzwXeCeWl9TbTDCwtDo\nCL5Zi4j/NnsuwgODBE1V6SbqbGG+mXqU1+bc2c7lCdZovoWtbF8bW7t/rW8mAthevp2yY2W+UPC1\nChF/jNNXTde5JAEc5XBP4T0UtCkgr1UeP17yYyrs8DbpEIRbLruF8lPlvpZ4rOIYJbtLQqXpg8Qk\nxrNjng0JymMVx0L5K9no3qY7e4/v1VqJCJuPbA6FOgNc0eEKtpdvr6IZeWPwBGLQpFjYuZAX33+R\nlXvCuTH9L+3P4h2LM+a+2Mrmnb3vcGf/Oym+qpixfcf6v6vNRzYzc/XMKlpTuoRNn4BwTTpiQmcN\n5x/RXIaoCaYhxnMm/wnPNMkQqvfZzHl3ju+biApWb9LLjeWS1yqPYXOH+ZNjMCfDq8i76/Ndvm/D\ndmx/wslrlVelpFFMYiGB1//S/kxZOoV1B9aFzFgt4y35uyv+jt9/8Hud0R6Ls2TnEl796FXfrPjI\nykd8YRR9S/cKKUavu2R3Sei75QZziugormBeTG4sl1suu4Vn3ntGm81cM1g0AGHn0Z062Eh0VJI3\nhrF9xzJt6DTmbZrHoZOH/PHHrBijTo5i1Z5VRHn/yPshwefd7+Bkn3SSTFo8if6X9g/9roryiyg/\nVc4/Lw/7Oq4ruI51B9ZRkazw/T6ez0gQ33cTt+L1/n/DCAtDoySay3C2zu6G4kwETXUFFee8O4cf\nLNKt2d7Y9QZP3/p0SCAB/udodJZCkWPl+JON5+OIJh6W7i1lytIpflkTQYhZusFU9Fo2HNrgT4be\n2zfAxS0uZvbo2Ww4uIH3Dr7naxCVdiXzt8wPObljxBjTdwxLdi7xzX2e1hO8F9Gqwj8Z+hPa5rYN\nXXdeqzxfo4JwhNi4fuMo+aTEF56e5uDgYCmLuBX3zZ3Thuqci4I2BRz68pCvvdm27ZeBiZJOQ4pb\ncV+IedgqJZSjzcOivehPJ0+HfHbB6/R8Sp7mVN8YYWFo9Jzpm31TpbqCitFKtPO3zKf4quIqJiuP\nYHRWbiyXX93yK9+PsXDbQj96aUTPETw87GHfBOVNjhYWI3ql1gWZt2leKCqpqFsRi3cuDkWDWWKF\njuVN2svLloc6Hw7uOphpQ6dVeSkI3osYMcb2HcupxCnG9RtH8VXFofGk+42k8/1kCkAYddkov2gk\nUMUXVFviVtwPFw5WKs6xcnyhHA3kiIY3rz+4ns1LN1fx1UXHFtQK6wsjLAyGRkZ1BRWj9a3G9RuX\n8TjR6Kxg5dyH//JwSiOI5YSEQfT86QRFOk4nT4c0mSrhraLDe/tf2p9be9/Kqx+9qlvgikVeq7y0\nLwV5rfL8BETvjT/TWNKZ7rL5foJViz0zmeejCvZ99/CSEIMmv7gVZ0jXIfx1z19T2pLb6rf4qmI/\nEVPfAuGugXeFhLL3UjB/y/wqZqyor650bykPlzxMhZ0am5fLY8xQBsNZUBf5GueabAmFgO+ziL5d\npztWMMR2+qrp7Dm+x89QD05eNTl/kPEDxvP8xudJ2AlyYjlMvHIiGw9vDGkWjnJCUVMbDm7w3+ZF\nRJtdlMOUpVN8O76HZw7zWvDOHDmzWkGRrRdKJmHiVS2O5swE+7571+NFmEVNQlOWTgG0kLit721M\nu2ZaRuHraS7R5V60mKfpCBKKwvOu0TPhWWKFeq3U9+/bCAtDs6WmzZQaI9WZ3oqvKs4qJKIE70U0\n5yNYyr0m5w9uUzKhJKOZZ/yA8Ww+stlvUJUbywXw36ZFpRzA6SLdgi14cbSAjAqU6LaZeqFU91vI\nlDMT7Pvu+WzSmb48DcHBIUaMwV0GhwR0SVlJ2lyhbCHS3nV564LniZoOzwVGWBiaLbVtptQYqC9N\nKHgvapPzkY1sZp50E6DndE739hzEm8S9N2mv3Ek6oZ+tF0p1v4VMmpRXFibb88h07kwCKvqMs4VI\nl5SVpD3PuRQUYISFoRlzJs2UGpL61ITOJufjbIlOgJmit6Lj8Sbxh0se9jsZRif64MRbnemsugnd\n28frAV/d2DNdX7pzpxNQQI2rE6T7PaQ7z7kytRphYWi2nE2eQ0NQn5pQY7oX6d6mq9vW65Vekzf3\ndBO+d5ya9Dw50/uSTqikE1C1qU4Q3PZ08jTzNs1j9q2za2xeq2uMsDA0a5pS2G19a0J1dS/OddBA\nbd7cswmeTJNxfZgpM427ptUJhvUYpgs22rpgY7Qvyrm4hiBGWBgMjYTG9PafiYYKGqjpm3ttOBdm\nynRaVE2rExTlF3H3wLt5+t2nUSi/L0pUoJwrU6tEm883VQYNGqTWr1/f0MMwGJotXoy/5z+ISYxH\nbngko+nnTI5fW0F5tlpOQ4dWZzv/mYYE1wYReVcpNSjrdkZYGAyGbKSL8c+N5daZZtGUw5zrm/oW\naDUVFladn9lgMDQ7gjkPXox/XU7omSKHmgteQmR9tj2tb4zPwmAwZKW+Y/ybWphzbTgbrakxaVxG\nWBgMhqzUtfM9XWJaY3funylnE7HUmBJLjbAwGAw1oi5Db9O9LTelMOfacDZaU2PSuIywMBgM55TG\n9LZcV1TnhD4brakxaVxGWBgMhnNKY3pbrgtq4lc4G62psWhcRlgYDIZzSmN6W64LmqOmlA4jLAwG\nwzmnsbwt1wXNTVPKhBEWBoPBcBY0N00pE0ZYGAwGw1nSnDSlTJgMboPBYDBkxQgLg8FgMGSlXoWF\niIwUke0islNEfpZm/X0isllENorIX0WkX2DdQ+5+20XkW/U5ToPBYDiXNMVaUfXmsxCRGDALuAnY\nB6wTkVeUUlsCm72klPqNu/0Y4JfASFdofBf4GtAFeEtE+iil7Poar8FgMJwLGlO9p9pQn5rFYGCn\nUmqXUqoSeBm4PbiBUupE4OuFgFcv/XbgZaVUhVJqN7DTPZ7BYDA0aZpqhd36jIbqCuwNfN8HXB3d\nSETuB6YCLYAbA/uujuzbtX6GaTAYDOeOppqX0eChs0qpWcAsEfke8C/AhJruKyLFQDFAQUFB/QzQ\nYDAY6pCmmpdRn8JiP5Af+N7NXZaJl4HZtdlXKTUHmAO6U97ZDNZgMBjOFU0xL6M+fRbrgN4i0lNE\nWqAd1q8ENxCR3oGvo4Ed7udXgO+KSK6I9AR6A2vrcawGg8FgqIZ60yyUUkkRmQS8DsSA55VSH4rI\nL4D1SqlXgEkiMgJIAJ/jmqDc7f4AbAGSwP0mEspgMBgaDlGqeVhvBg0apNavX9/QwzAYDIYmhYi8\nq5QalG07k8FtMBgMhqwYYWEwGAyGrBhhYTAYDIasNBufhYh8CnzS0ONoIDoAnzX0IBqQ8/36wdwD\nc/1nfv3dlVKXZNuo2QiL8xkRWV8TB1Vz5Xy/fjD3wFx//V+/MUMZDAaDIStGWBgMBoMhK0ZYNA/m\nNPQAGpjz/frB3ANz/fWM8VkYDAaDIStGszAYDAZDVoywMBgMBkNWjLBoAohIvoisEJEtIvKhiEx2\nl7cXkTdFZIf7bzt3uYjIr9we5u+LyJUNewV1g4jERGSDiCxyv/cUkTXudf6XW90Yt1rxf7nL14hI\nj4Ycd10gIm1F5E8isk1EtopI0fn0/EXkAfe3/4GI/F5EWjbn5y8iz4vIERH5ILCs1s9bRCa42+8Q\nkRr3CkqHERZNgyTwoFKqHzAEuN/tU/4zYJlSqjewzP0OcAu6rHtvdHOo2VUP2SSZDGwNfP9/wAyl\n1GXoqsUT3eUTgc/d5TPc7Zo6TwBLlVKXAwPQ9+G8eP4i0hX4MTBIKfV1dBXr79K8n/9vgZGRZbV6\n3iLSHvg3dIfSwcC/eQLmjFBKmb8m9gcsBG4CtgOd3WWdge3u56eBOwLb+9s11T90A6xl6Na7iwBB\nZ6zG3fVFwOvu59eBIvdz3N1OGvoazuLa2wC7o9dwvjx/Ui2a27vPcxHwreb+/IEewAdn+ryBO4Cn\nA8tD29X2z2gWTQxXpS4E1gAdlVIH3VWHgI7u53T9z5t6D/OZwDTAcb/nAceUUkn3e/Aa/et31x93\nt2+q9AQ+BV5wzXDPisiFnCfPXym1H3gM2AMcRD/Pdzl/nr9HbZ93nf4OjLBoQojIRcB8YIpS6kRw\nndKvDs0yDlpEbgWOKKXebeixNBBx4EpgtlKqEPiSlAkCaPbPvx1wO1podgEupKqJ5ryiIZ63ERZN\nBBHJQQuKF5VSf3YXHxaRzu76zsARd3lt+583doYCY0SkDN2r/Ua0Db+tiHjdHoPX6F+/u74NUH4u\nB1zH7AP2KaXWuN//hBYe58vzHwHsVkp9qpRKAH9G/ybOl+fvUdvnXae/AyMsmgAiIsBzwFal1C8D\nq17BbUXr/rswsHy8GyUxBDgeUF+bHEqph5RS3ZRSPdCOzeVKqTuBFcC33c2i1+/dl2+72zfZt26l\n1CFgr4j0dRcNR7ccPi+eP9r8NEREWrn/F7zrPy+ef4DaPu/XgZtFpJ2rnd3sLjszGtqJY/5q5Oi6\nFq1yvg9sdP9Goe2wy4AdwFtAe3d7AWYBHwOb0VEkDX4ddXQvhgGL3M+9gLXATuCPQK67vKX7fae7\nvldDj7sOrnsgsN79DSwA2p1Pzx/438A24APgd0Buc37+wO/R/pkEWrOceCbPG7jbvQ87gbvOZkym\n3IfBYDAYsmLMUAaDwWDIihEWBoPBYMiKERYGg8FgyIoRFgaDwWDIihEWBoPBYMiKERYGQxZExBaR\njYG/n2Xfq8bH7hGsLGowNFbi2TcxGM57vlJKDWzoQRgMDYnRLAyGM0REykTkURHZLCJrReQyd3kP\nEVnu9hZYJiIF7vKOIvLfIrLJ/bvGPVRMRJ5x+zW8ISIXuNv/WHQPk/dF5OUGukyDATDCwmCoCRdE\nzFB/H1h3XCnVH3gKXRkX4ElgrlLqG8CLwK/c5b8C/qKUGoCu7fShu7w3MEsp9TXgGDDOXf4zoNA9\nzn31dXEGQ00wGdwGQxZE5KRS6qI0y8uAG5VSu9xCj4eUUnki8hm670DCXX5QKdVBRD4FuimlKgLH\n6AG8qXRDG0Tkn4AcpdS/i8hS4CS6vMcCpdTJer5UgyEjRrMwGM4OleFzbagIfLZJ+RJHo2v+XAms\nC1RYNRjOOUZYGAxnx98H/i11P7+Dro4LcCewyv28DPgh+P3E22Q6qIhYQL5SagXwT+gy21W0G4Ph\nXGHeVAyG7FwgIhsD35cqpbzw2XYi8j5aO7jDXfYjdFe7n6I73N3lLp8MzBGRiWgN4ofoyqLpiAH/\n6QoUAX6llDpWZ1dkMNQS47MwGM4Q12cxSCn1WUOPxWCob4wZymAwGAxZMZqFwWAwGLJiNAuDwWAw\nZMUIC4PBYDBkxQgLg8FgMGTFCAuDwWAwZMUIC4PBYDBk5f8DAkVpn8pWhMcAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ctawd0CXAVEw", + "colab_type": "text" + }, + "source": [ + "This graph of _mean absolute error_ tells another story. We can see that training data shows consistently lower error than validation data, which means that the network may have _overfit_, or learned the training data so rigidly that it can't make effective predictions about new data.\n", + "\n", + "In addition, the mean absolute error values are quite high, ~0.305 at best, which means some of the model's predictions are at least 30% off. A 30% error means we are very far from accurately modelling the sine wave function.\n", + "\n", + "To get more insight into what is happening, we can plot our network's predictions for the training data against the expected values:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "i13eVIT3B9Mj", + "colab_type": "code", + "outputId": "afc103e2-0beb-4a26-fe18-c0cccc6d3d2a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 281 + } + }, + "source": [ + "# Use the model to make predictions from our validation data\n", + "predictions = model_1.predict(x_train)\n", + "\n", + "# Plot the predictions along with to the test data\n", + "plt.clf()\n", + "plt.title('Training data predicted vs actual values')\n", + "plt.plot(x_test, y_test, 'b.', label='Actual')\n", + "plt.plot(x_train, predictions, 'r.', label='Predicted')\n", + "plt.legend()\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJztvXmcVNW16P9d1c3kiLQYvaLigANK\nBMXGUkB8GjDRi6hPkwhB41AgmheTFxm8zye5MSDo517yokj3zwnSSJKnVxxeEohDi9oVCEaMEYyi\nYsCIYCMIyNi9fn/sc7qrq6uqq7rmqvX9fM6nhrPrnH1OVa299lprryWqimEYhlFeBPLdAcMwDCP3\nmPA3DMMoQ0z4G4ZhlCEm/A3DMMoQE/6GYRhliAl/wzCMMsSEf4EjIhUiskNEjs1k2wz062IRWZft\n8+QCEakUERWRvt7rh0Xkzhyc9yYRqc/2eQoBEdkgIiMyfMw235uRGib8M4wnfP2tWUR2Rbwem+rx\nVLVJVQ9S1X9ksm0uKTYhp6o3qeqMjtqJyGsicn0OupRzSvnaDEdlvjtQaqjqQf5zTzO+SVVfiNde\nRCpVdX8u+lYuiEiFqjblux+GUciY5p9jROQeEfmNiCwSke3AOBEJisifRGSriHwqIv9HRLp47aNN\nEnXe/t+LyHYRCYvI8am29fZ/U0TeE5FtIvJLEXk9nrYnIgeIyK9E5AsReQc4O2r//xKRD73zvCMi\no733BwAPAMO82c/n3vujRWSViHwpIv8QkbsS3LOLRWSdiPxvEWkUkY9E5DsR++tE5EER+YOI7PTO\n1V1E/kNE1ovIZyIyV0S6R3xmqohsFJFPgOuizlcnItMjXl8Z0de1IjJSRGYBQWCed11zvLb9ReQF\nEdkiIu+KyFURx+ktIs97x/kTcDxxEJE/isjEqPf+5t23gPe9bvK+u7+KSP84x7lJRNZ438sHInJT\n1P6krk1EThIRjfpsy+xARPqJyMvedX/u/VYOjXd9Ecc4X0Q+EZFAxHtXi8hfvOdx/xsxjtVmtiJR\nM84OvpvLIu7TBhH5UUd9L3pU1bYsbcA64OKo9+4B9gL/iht8ewDnAENwM7ETgPeA27z2lYACfb3X\ndcDnwGCgC/AboK4TbY8AtgOXe/t+DOwDro9zLfcD9cBhwHHAamBdxP5rgKO8a7oW2AF8zdt3E1Af\ndbz/BpzutT/T6+dlcc59MbAfuA/o5n32K+CkiOv8AiewAl6bXwJPe/09BPgd8DOv/WXAp0B/4EDg\ntzHu23Tv+XnAVuAi79jHAKd4+16LvF/AQcAnwHjvuzgbaIxo/ySwCDgA+LrXh/o413wD8ErE6zO9\nY3UFLgVWAId6feoPHBnnOP+K+02Jd992AV/vxLWdBGjUsVvaACd7x+nq/bZeB+6PaLsBGBGjf4L7\nn1wY8d7TwE+856n8N6L73PK7S+K72Qyc5z3vBZyVb/mR7c00//zwmqo+p6rNqrpLVf+sqstVdb+q\nfgjUAhck+PyTqrpSVfcBC4GBnWh7GbBKVZ/x9v0nTgDH4xrgHlX9QlU/xmnzLajqb1X1U++ansD9\noQfHO5iqvqSq73jt3wJ+3cE1NwN3q+oeVX0J+ANwdcT+p1U1rKrNuEHsZuB2r79fAjMBf7ZwDfCI\nqq5W1Z3A9ATnvRH4/1T1Ra+v61X173HaXg68p6oLvO/yDWAx8N89bXUMcJeqfqWqfwV+leC8TwHn\niEgf7/W1wFOqute7vkOAUwG869gY6yDe7+xDdbwEvAgM68S1JURV3/OOs1dVN+F+T4m+T/9zivvu\nvwsgIj2BUd57dOK/EY+43423fx/QX0QOVtUtqvqXTpyjqDDhnx/WR74QkVNF5P95ZogvgX8HDk/w\n+cg/+lc4rSbVtv8S2Q/vT7ghwXGOiur3x5E7ReR6EXnLm55vxQmmuNfgTefrRWSziGzDaWmJrrlR\nVb+KOv+/RLyO7NuROO0/sj/P4zRSiLr26GuJ4hjggwT7IzkOON8/p3feb+Pu3deAimTPq6rbcAPc\nt0VEcAPXQm/fUmAe8BDwmYjME5GDYx3HM2cs90wdW4GRtN7nVK4tISJypIj81jPhfAk8TuLvM5In\ngKu8AfIqYLmqbvCOm+p/Ix6JvhuAK4DRwD+83+WQTpyjqDDhnx+iU6nWAH/DmTEOAf43bjqcTT4F\nfK0ST8AcnaD9Rpyw8GkJJxWRE3CC6BagSlV7Au/Seg2xUsf+GqfdHqOqhwIPk/iaq0SkR9T5/xnx\nOvIcn+FMa6eoak9vO9Q7D7hrj3ktMVgPnBhnX/R1rQdejDhnT3XRV7d5fWpO4bzgTETfBYbi/qvL\nWk6sOkdVzwLOwJl9fhz9Ye9+PYmb9XzN+16W0nqfU7m2nd4xD4h478iI57OAPcAA7zd8PUn+hr1Z\n0Eacxn8tbjDwSeW/sRNnUovVv0TfDd7sYjROQXgeb+ZRypjwLwwOBrYBO0XkNGBCDs75PHCWiPyr\niFQCPwR6J2j/W+BOEekpbh3BbRH7DsIJi824ceRmPJOEx2dAnyhH3cHAFlXdLSLn0mqSiUcAmC4i\nXcXFi38TJ9jaoS7S52FgjudkFRHpIyIjI67lBk+rPBC4O8F5HwFuEpELPUdrHxE5JeK6Toho+yxw\nuohcKyJdvK1aRE7xTGuLgZ+KSA8ROQP4XgfX/BzQDyfwfu3NzvCOWe19bztxA11zjM93w9ngNwNN\nInIZzi7fmWvb6G3jxK0nCeG0aZ+Dvb5sE5FjgJ90cG3RPAH8COe3ifxeU/lvrMLNIHqIyMk4v4lP\n3O/Ga3+tiBzifU/biX0/SwoT/oXB/8RFnGzHaTq/yfYJVfUz3LT3P3COrxOBN3HaWyzuxmnM64Df\nAwsijvVXnIN1hdfmFGB5xGf/CLyPM1H4ZqhbgJniIp7uxAnkRGzACZdPgfm4ENr3E7T/nzizygqc\n8FiKE6So6nPAg8ArOAfiH+MdRFUbcP6D/+Md52Vatfc5wHc9M8J/eKaaUcA4r58bcVp3t4hrPgwn\nWB8BHkt0waq6GzdgXExbbbin9/mtuO/jU9z3GP35rTiB+jSwBWfffr6T16Ze2ztxvqGTaPsd3w1U\ne8d5FjerS4UncA7pP6rqFxHvp/LfuB+nhGwCHsU57v1r7ei7uQ742DMt3ei1K2nEUyaMMkdEKnBm\nlP+uqq/muz+RiMjFwMOq2jfffTGMUsE0/zJGRC7xzDjdgLtwEQ8r8twtwzBygAn/8mYo8CHOJjwK\nuEJV45l9DMMoIczsYxiGUYaY5m8YhlGGFGxit8MPP1z79u2b724YhmEUFW+88cbnqpoobBsoYOHf\nt29fVq5cme9uGIZhFBUikmjFegtm9jEMwyhDTPgbhmGUISb8DcMwypCCtfkbhlGa7Nu3jw0bNrB7\n9+58d6Wo6d69O3369KFLl5i1bTrEhL9hGDllw4YNHHzwwfTt2xeXTNZIFVWlsbGRDRs2cPzxcQvC\nJcTMPoZh5JTdu3dTVVVlgj8NRISqqqq0Zk8m/EuUcBhmznSPhlFomOBPn3TvoZl9SpBwGC66CPbu\nha5d4cUXIRjMd68MwygkTPMvQerrneBvanKP9fX57pFhFB6LFy9GRHj33XcTtnv88cf55z//mbBN\nIurr67nssss6/flsYcK/BBkxwmn8FRXuccQI9360KchMQ0Y5s2jRIoYOHcqiRYsStktX+BcqJvxL\nkGDQmXp+9rNWk49vCrrrLvdYW9v2tQ0ARiGTaUVlx44dvPbaazzyyCP8+tet5XpnzZrFgAEDOPPM\nM5k6dSpPPvkkK1euZOzYsQwcOJBdu3bRt29fPv/8cwBWrlzJCE+7WrFiBcFgkEGDBnHeeefx97//\nPTOdzRJm8y9RgsG2dv5oU9BTT7V9vWCBa1NVBY2NbrZgfgKjEMiGD+uZZ57hkksu4eSTT6aqqoo3\n3niDTZs28cwzz7B8+XIOOOAAtmzZQq9evXjggQe4//77GTx4cMJjnnrqqbz66qtUVlbywgsvcOed\nd/LUU6lWs8wdJvzLhKoqCASguRlEYOBAePVV94eqrIRHH4X9+93+QAC6dTNHsVEYxPJhpfu7XLRo\nET/84Q8B+M53vsOiRYtQVb7//e9zwAEHANCrV6+Ujrlt2zauu+463n//fUSEffv2pdfJLGPCv0gJ\nh92fIBkNPRyG2293wl3V/Yl++UuYM8dp+StWwDPPuH3gBoBM/ckMI118H5av+fs+rM6yZcsWXnrp\nJd5++21EhKamJkSEq6++OqnPV1ZW0tzcDNAmzv6uu+7iwgsv5Omnn2bdunUt5qBCxWz+RUi0/b4j\nO6ivOfnCXdW99s07v/996z5wmn+iP5k5io1cEsuHlQ5PPvkk3/ve9/j4449Zt24d69ev5/jjj+fQ\nQw/lscce46uvvgLcIAFw8MEHs3379pbP9+3blzfeeAOgjVln27ZtHH300YBzEhc6JvyLkFRDOX3N\nyV8TEinc6+vdjADc/jFj4J574v/JUh14DCMTBIMwbVpmZqKLFi3iiiuuaPPeVVddxaeffsro0aMZ\nPHgwAwcO5P777wfg+uuvZ+LEiS0O37vvvpsf/vCHDB48mIqKipZjTJ48mWnTpjFo0CD2+3+qQkZV\nC3I7++yz1YhNQ4Nqjx6qFRXusaGh48/U1Kh26aIqolpZ6V77x+rWzb3frVvHx5oxw50X3OOMGfH7\nOGNGcn0zyovVq1fnuwslQ6x7CazUJGRsRmz+IvIocBmwSVXPiLFfgF8A3wK+Aq5X1b9k4tzliD8N\nTtbmD87E09zszDuq7rVPpDmoIxLZX30/RFWV8zHYCmPDKFwy5fB9HHgAWBBn/zeBft42BHjIezQ6\nSXQoZ0eMGOEWfTU3u0dfaNfXO/ORqjP/TJ/utnjHjjfwRIbjBQLumOY4NozCJSPCX1WXiUjfBE0u\nBxZ4U5I/iUhPETlKVT/NxPmN5PBt/pH5oHxNfs8eJ6xfeMGFgCbS1mMNPJF+CD9cNHqFsU8qkUqG\nYWSHXDl8jwbWR7ze4L3XBhEJichKEVm5efPmHHWtPPAdu76G7zuJfU3+4otb1wF0Jh+QP7MAd45A\nAG6+uf0gYg5jwygMCiraR1VrVXWwqg7u3bt3vrtTUsTL9wNOOE+f7hZ2xdPWOyIYhBtuaJ1VNDfD\nsce2F/zTp7tZhiWdM4z8kqtFXp8Ax0S87uO9Z2SRaPNKIidxZ5zI0YwfD/Pnx3cGX3RRq3mpo7UE\nhmFkl1xp/s8C48VxLrDN7P3ZJZZ5paNY6XRjqRMtxvF9An56icGDY5uEbPGYkQsqKioYOHAgZ5xx\nBldffXXLwq7OEJmy+dlnn+Xee++N23br1q3MnTs35XNMnz69Zd1BpshUqOciYARwuIhsAO4GugCo\n6jzgd7gwz7W4UM/vZ+K8pUyk1g6pa+TZyIeSDPGikHyfgB9Z9NZbbfdbARojl/To0YNVq1YBMHbs\nWObNm8ePf/zjlv1+LHwgkJp+PHr0aEaPHh13vy/8J02a1LmOZ5CMaP6q+l1VPUpVu6hqH1V9RFXn\neYIfb+3Brap6oqoOUNWVmThvqRKptV94oROcd90FB5/Xn+ZAAI44okP1OJGNPx9E+wQinc6QeNWy\nzQiMbP4Ihg0bxtq1a1m3bh2nnHIK48eP54wzzmD9+vUsXbqUYDDIWWedxdVXX82OHTsA+MMf/sCp\np57KWWedxX/913+1HOvxxx/ntttuA+Czzz7jiiuu4Mwzz+TMM8+koaGBqVOn8sEHHzBw4EDuuOMO\nAO677z7OOeccvv71r3P33Xe3HOvnP/85J598MkOHDs1OeuhkVoLlYyvXFb4NDaojR6oGAm45lojb\n3uI0bQaNWKeletxxrUt14xwr36tsI/sQvTK5pib+Pr/PnVnNbBQ2Ka/wzcKP4MADD1RV1X379uno\n0aN17ty5+tFHH6mIaDgcVlXVzZs367Bhw3THjh2qqnrvvffqT3/6U921a5f26dNH33vvPW1ubtar\nr75aL730UlVVfeyxx/TWW29VVdVrrrlG//M//1NVVffv369bt27Vjz76SE8//fSWfixZskRvvvlm\nbW5u1qamJr300kv1lVde0ZUrV+oZZ5yhO3fu1G3btumJJ56o9913X7vryPsKXyMzxHKKdukC1U1h\nTt+/BoA2JZs//hgmTHDPQ6F2x/NNML7SlOu4+limHN+pHGsVcCyHc77MV0YBkYUfwa5duxg4cCDg\nNP8bb7yRf/7znxx33HGce+65APzpT39i9erVnH/++QDs3buXYDDIu+++y/HHH0+/fv0AGDduHLW1\nte3O8dJLL7FggVv3WlFRwaGHHsoXX3zRps3SpUtZunQpgwYNAlyRmffff5/t27dzxRVXtKSXTmRK\n6iwm/AuISKdoIOBi7++/Kkz/20Ykts9Nm+YeYwwA+bSlx/rP+g7lmTPj74sk0+l8jSIkCz+CSJt/\nJAceeGDLc1XlG9/4Rrsyj7E+11lUlWnTpjHBV+I85syZk7FzxKOg4vzLnUg7fbduLiZ+wJsLqNi3\nFyFK649kyxY3Axg0qMUm6mv7Cxbkr5h7Ir9Dsj6JTKfzNYqQPP0Izj33XF5//XXWrl0LwM6dO3nv\nvfc49dRTWbduHR988AFA3BrAF110EQ899BAATU1NbNu2rV166FGjRvHoo4+2+BI++eQTNm3axPDh\nw1m8eDG7du1i+/btPPfccxm/PtP880zCWPwHx8HChW0/EAiw48Svs2/zVg7evYnK3REhaqtWwdCh\nfPCTh7jol6GWKl3+yttca86J8gDV17cWk+nIHJVqHiOjBMnDj6B37948/vjjfPe732XPnj0A3HPP\nPZx88snU1tZy6aWXcsABBzBs2LA2At3nF7/4BaFQiEceeYSKigoeeughgsEg559/PmeccQbf/OY3\nue+++1izZg1B79oOOugg6urqOOuss/j2t7/NmWeeyRFHHME555yT+QtMxjGQj60cHL4J/VjV1a2O\nXX+rrta/1jS0fOaWypq2DmBvawLdyBE6g8laUaE6cWL+Hb8+5sA1LKVz5kjH4WtmnzwSN7xx1ChX\nWzESEZgzh+cbgy2fqdUQbw8c2+64AhzBJqYym/k6jvHjM1cII11SLURjGEZ2MOGfR2LavWtrYenS\n9o2vvRaCwXaf2Tm3DmpqoH//lqaR/oFrmxcSHHUIjBuX1WuJFYYd671CW39gGOWKaDIVPPLA4MGD\ndeXK0l8LFg47p+zGjXDkkXD/H07nwHWr2zaqrobly9t8JuaK39pamDgxflWWfv1c8p0MTwFiRRRB\n/CgjS+lc3qxZs4ZTTz0VkbghDEYSqCrvvvsup512Wpv3ReQNVR3c0edN8y8AHnsMFi+G/fNqaV63\njjaiu1evNoIfEuTgCYXg9ddh4MCWpbT+sRTg/fdh6FA3SGSQWKacROYdv/9gK3fLke7du9PY2Eih\nKp7FgKrS2NhI9+7dO30Mi/bJM76QnMEUpjK7fYOZM1M7YDAIb74J4TA7xt/CgWvfQokIE21udmGh\nH3wAs2al13mP6DDsqirXhURRRonWH9jMoLTp06cPGzZswGp2pEf37t3p06dP5w+QjFc4H1s5RPuo\nuhQHIanRJqQlcqcZVHv1Spi6IRlmzFD9HSPbp4Xwt4MOUp08OSPX4adxqKlpjebp1s1FGsWK6Jkx\nozWFRSDQWgjeooEMIz2waJ/8kkweqtpa+NWkMA/qLQiKQKuWPnNmzBW7qTBiBFzVYwkTpYYmArSb\nZO/YAbNnw5QpaZ0HWk05jY2t5p79+9sXdPGpqnKTEHCPVVXuuUUDGUZuMOGfBToqVRgOwy23wKRJ\n8KOm2VTQ3GKWEYDhw9MW/NC6yKrvz0OsqXkNGT48dsMHH4QhQzLiC0g2mqex0aWwAPfY2Bj781VV\n5hcwjKyQzPQgH1sxm31mzHBmC3CPvklD1ZlFKivdvnNp0L1UtDX3BALZtXWMHdve/BO5HX102udP\nJptoIvNOLBOSmYAMIzkws09uiTTzxNN+w2G47TZnDjmXMHczHfG0fgUkEICHHsqul7POWxfQty9E\nJLFq4ZNP4Lzz0loXkExFsETpWmKZkMwEZBiZxaJ9MkBHqYt9oVVf3yr4X+ZCurIXQVEEqaxw5pcM\nmHs6JBRyWzgMF1wA+/a1b7NwIWzeDEuWZK0bHaVrsYyehpE9TPPPAPHSjY8Y4XLW+7b/rVudbeVB\nJtGNPQQ8J69UnwPLlhEeEMqtfTsYhFdegaOPjr1/6VLo0SPrq4PjYRk9DSN7mOafAeJpqNGDwqpV\nMJ9xDCIqH/hZZxEmmJ+8+8EgbNjgBPz//b+uA5Hs3u1mAStWZGV1cDLdM6FvGJnHNP8MEE9DHTHC\npVQWcY+160cxDpei2bfzAzB+fP5DHOvqXAmxkSNj73//fecLyEBYaDRWo9cwco9p/p0g1grUeBqq\nv4J98Z5RHLvGJWxrE88/dqxL2EaB2LeXLHECfnaM1cbg3v/Tn+DeezOiksdb6WurfA0ju5jwT5FU\nyiLW1ztNfoiGGUVbwa/A5pFjOaKuDohf+CQvzJoFY8bAddc5jT+aZcvg/PPhjjvSThERb8aTr9KT\nhlEumNknRVIxz/i+gOvEFXGONPUsYSSPjKhr0z6ZEMmcEQzCe+/B5Mmx96u6WcAxx6Rlr4kVFpt3\nE5hhlAEm/FMklXz0wSCsGzaOkM4DWgX/HxjJVT2WFEfo4qxZresCYrFhQ1rrAmL5Syznv2FkH8vn\n3wmStkcPGdKuItenYybyePVDLZ+NdayCtXd3VC9g4ECYOzdjvoCCvAeGUeAkm8/fhH+2GDWqfUUu\nEZdv35NmqRZBKQjCYbjmGqfxx2PsWBc9lIOuLHAWNcaPL7D7ZBh5woq55JMpUxKWYvRJtQhKQRAM\nwvr1TsB37Rq7zcKFMGhQVmM3/cXJ8+a57cILLVTUMFLBhH+mCYfhvvvav19d3U4bjmXbLhp7t78u\nYGz7AvKAW9GWYYkcuR5gwYK2WSkKcqA0jALGQj0zzYIF7W3iI0fGzJETL7yzYEI+k6GuzqWHiLUu\nYM8eZ4+54460cxZFm8hGjWq7PxAo4IHSMAoQE/7ZZvjwhMnRYi0OK7qUBrNmwYknwvTp8Omnbfet\nXevKRv7+9y5stJMXFm0OO/LI1kVxFRUZ8zMbRtlgDt9M4YenVFXBD37gbBJdurRmeSsXfJvM88+3\ndwp36eJ8ATfemPJMIJ5zvGhmSIaRIyzaJ5fU1rpE/U1N0K0bzJnjktGXs1SqrXUafzyqq2H58pQO\naeGfhtExOY32EZFLROTvIrJWRKbG2H+9iGwWkVXedlMmzlsQ+LHv+/a5YrR79kBjI+ER05hZHyzf\nCJRQyC0Oq652Gn80K1bAYYel5BAuqBXQhlHkpC38RaQCeBD4JtAf+K6I9I/R9DeqOtDbHk73vAWB\nr91GzZ7erhqRsIZv2RAKOe3+lVdcrqBotm51q4OvuKKMb5Jh5IdMaP7VwFpV/VBV9wK/Bi7PwHEL\nmrdrwzRPmECk2Fdgc9UpzH0zWNix+rkmGISnn3azgFgsXgxDh2akgLxhGMmRCeF/NLA+4vUG771o\nrhKRv4rIkyJyTAbOmzfCYdgyYaqrwuW95w8C/+vz23n0UZe/v+Bj9XPN8uVw3HGx9zU3u1nUySfb\nLMAwckCuFnk9B/RV1a8DfwTmx2okIiERWSkiKzdv3pyjrqXOq7PDnBlRjcsX/LOZTK2GaGqC73/f\nyg/GZN26+AVjwKWQHjo0KwOAFY0xjAhUNa0NCAJLIl5PA6YlaF8BbOvouGeffbYWJA0NuqeihzYh\n2gwt22cjx2qPHqoVFao9eqg2NOS7owVOQ4PqmDGqzmPSfquuzuhNbGhQ+36MsgBYqUnI7kxo/n8G\n+onI8SLSFfgO8GxkAxE5KuLlaGBNBs6bH2bPpkvTLgIozQiN9OKTsZM5YkmdFRtPBd8PUFMTe/+K\nFTBsWErO4ESafcHnTDKMXJPMCNHRBnwLeA/4APg3771/B0Z7z2cC7wBvAS8Dp3Z0zELR/BsaVGfM\n8DTFkSNVI7T9PXTRYZUNpkWmS0ODar9+8WcBIqqTJ3d4iESavWn+RrlAkpp/RoR/NrZcC/82Qj7i\nPV9ghKnW5giB1AwaplorKtznjAzQ0KA6caK74bEGgeHD40rtGTNaPxbvO4n1HRtGqZGs8LfcPsSv\ny+ubCp5tGsUQWouy+A7ex+RGi+bJJH5So0GDYheN8WsHX355uzxBfjZU/zv0v5PIVcGGYbRiwp/Y\n9mC/nOBMpvDNiOLr/uOW6pH0HRPixRFm3884ft6fWAOAqlsXsHhxm2ypsTKkRg7qFRXuo/v3l2fK\nJcOIxoQ/8bXGIGGGNLnc/BL5gepqei1fwrQc97OsCIVgwACYOtVp/LFYuhSOOAKeeQaCwXbZUCMH\n9aam1vf37nW550z4G+WMFXMhdhFxAKZOJYC2FfwjR6ackMzoJMGgSw1RU+MS9sdi82Y47zw+mFLb\nLtInsjBORUVOemwYRUPZC38/PBCcsKiv9wTIuHGwbFmLfV+hw9z8RpYIheC112LnB8J9N8fPnkDg\nziltcilFDupz57qEqyLucfz43HXfMAqRsk7pHG0TFnE24ZDU8uB+l45YcMKlGWF1zesMCJmtIK/E\nSRXt/4r3UsnfB36br7/ZvoC8pYQ2yoFkUzqXlc0/+s8faRNubnZtVOFyngJaBT/A/dxBc2OQHSZA\n8ovvC7jmmpZiMUrrd9WV/QxYtRBGbW43Syu6CmmGkUXKxuzja/mRaZYjbcJdusD5gTAvcwHneGGd\nvuD/FWP5aY9ZVFW1P4aRB4JBWL/ehXv27Nki+CViY+lSOPRQZ76LgeX5McqdshH+8cI5fZvwb24P\n83LTeVzAMg5jKwAf0ZdbK2oIT3SpGxobLUVAQTFrFnzxBYwc2dYp7/Pll7BwIfR35SV8gV9b2zqI\nX3gh3HKLDQJG+VE2Zp+44ZyeKWD7gZdTQdsUzT17VfK950NtTAWxjmHkmSVLYMoUeOAB+Oqr9vvX\nrGHvoYfzq69mUKshRJyZr7nZDeQ1NTB/vuVkMsqLsnL4xnX4jRqFLm1dyOXfEZk82WmXyRzDKAzC\nYZg0CVatavO2/52GqWZYYDnp9iXJAAAdxUlEQVQVFc657//8Kyrg5pvh2GPtuzWKGyvgnixR0SMt\ngr8TBcaNAqJ/f1jTNnms/90ulZF8PG8Jb74Jjz7qtP/IaK/IFB+GUWzktIB70RIOO4Ovhy8cvjru\nNBP8xc7q1e2KxvgmvZG6lNBPDuGh7eOor3c+nxtucILf/DlGuVCWwt93/H06e0FrjKfHe/Sj96bV\n5gAsBZYscQb9Qw5peaslGmj7dli4kOBlVUyrqmX8+NbIL/PnGOVA2Ql/P+QzcOcUeix+os0K3iYC\nXM980/xKiVAItm2LXzpyyxaYMIHg1AtYPidsxXiMsqHshH99Pdy9awqTmc2hfNm6I1DB/6h8iD9X\nBE3zK0WWLIGGBjjzzNj7ly1jwK3DmTYiHFPw27oAo9Qom1BPnxEj4AQeByIie3r1Qp5/nu8R5Jh6\ni/YoWYJBFwU0apRbBBbN/v1w4418esoF/PHI8fQbH2yXGtqcwUapUHaa/zEPTuEINgGtDl5uuqkl\nJfC0afbHLnl8X4C0XRqmgK5Zw5GL5zFu3nm8NmxKS2ivLe4zSo3yEv61tfzLwrb5+bf2PK5dLL9R\nBoRC8PrrLlNoINAa4hux/aRpNgdOGtcmDYiZBI1SoSyEfzgMv7uiFr3lFsTLz+//2beE7sxn14x8\nEgzC00/Da6/xZvVEp/l7u3zlYMCqhQRvOp23f1BrzmCjpCh54R8Ow4dDx3HJ4gnQ3NwmRfO9TObF\nE0P57qKRb4JB9sx5iEWBsQAtg0BLWOjq1Zw4ewLT6keZ4DdKhpIX/jp1Ctc2L2z5IzvBH2Ai87iT\nWTz1VJ47aBQEwSCc8Fod4eGTaepxUOxEcUuXwvHHu1XhhlHklLzwP+cv7o/qC34FJvIQD+M0/quu\nylvXjAIjGITzXplF5VfbnUO4d+/2jdatc+lAjjrKBgGjqClt4V9bS5cdW9u8ta/XkXw+JkR1tft/\nh8zqY8QiFHKF4ePVDt640Q0CceoFGEahU7rCv7a2JW+PP4UXoNvMn/L00y51jwl+IyHBoKsdPHw4\n9OwZu83ChXDBBbb6yyg6SlP4jxvntLLIvD0irvKTSXwjFYJBeOUVVzRm7NjYbZYtg/POczUFDKNI\nKD3hP2WK08Yi0ECA310+j/AYi+c30qCuDqqr4++fPRsOOshMQUZRUHrC/4kn2rxU4NbAQ4x+LmR1\nd430Wb7czQC6dYu9f+dOp3yMGpXbfhlGipSe8D/hhDYvNx45kFoN2dJ8IyEpJW6rq4Pdu+ObgcCF\nhZqmYRQwpSf8773XrcMHqKjg85/OtaX5RkL8xG133UVqs8O6OudH6t0bunRpv/9b33IVxSwk1EiS\nXGaPLb2snsEgvPpqS6HdAcEgLw6wurtGfGIlbkv6dzJrltvCYef0jWTrVrdNmOCcwnV1Ge65UUrk\nOnts6Ql/cHcs4q5FvTSMNviJ2/w/Xadmh8GgWzgyaZIbRaLxgxBsADDikJYS0glKz+xjGCkSDDot\nK1bituhpeG2t8+X6lpw2+0MhN+scMyb2iRYuNDOQEZecZ49V1bQ34BLg78BaYGqM/d2A33j7lwN9\nOzrm2WefrYaRT2pqVCsrVQMB1R49VCdPVoXWbfJk935FhXtsaIj68JFHtv1A5DZ2bN6uyygsGhpU\nZ8xwj5HPOwuwUpOR28k0SngAqAA+AE4AugJvAf2j2kwC5nnPvwP8pqPjmvA3ckn0n66hQbVLl1ZZ\nHQionnRSW/l90klO8IN7nDGj/XE/GzlWm0GbYw0Aw4erTpyY3j/dKGoaGhIoEJ0kWeGfCbNPNbBW\nVT9U1b3Ar4HLo9pcDsz3nj8JXCQiMRMnGkauiRXtU1/f1nQfCMCVV7b93JVXJp6mh8PQ99U6JkoN\nq6V/a+U4n2XLYN48GDbMTEFlSrSdf8GC4or2ORpYH/F6AzAkXhtV3S8i24Aq4PPIRiISApdu89hj\nj81A1wyjY2I52kaMcOu49uxxwv2BB5xJ/8QT4amnXDbYUMiZ9+NFkvnHrdUQj1SE+MuAcXx91cLo\n07sTe3moLP1IeTFiBFRWukw0gQA89pgrJZ2LaJ+Ccviqaq2qDlbVwb1jpdM1jCwQy9HmO4Hvucel\n9vFlcijkSgD7rxPVfY4+7s65dS4iqLra/eMjaW52IaGHHmrpIcoM9aaEzc2wb1/uakVnQvh/AhwT\n8bqP917MNiJSCRwKNGbg3IaRNvGifRIJ9k4fNxRyKSKWLXPThmjr55dfuqigQw4xU1AZ4JsXfUdQ\nRUXuon1EtZ0lMrUDOGH+HnARTsj/GbhWVd+JaHMrMEBVJ4rId4ArVfWaRMcdPHiwrly5Mq2+GUbB\nU1sLt97q5vqxOO00WL06t30yckb0wq45c6CxMb0FqSLyhqoO7qhd2jZ/z4Z/G7AEF/nzqKq+IyL/\njvM6Pws8AvxKRNYCW3ARP4ZhhEIwYIBbHLZqVfv9a9bA4YfDjBnmDyhB/NlhPjIQpK35ZwvT/I2y\nY8gQWLEi/v7Jk10qCaNk8SPNcqH5F5TD1zDKmuXLnUP4gANi758926qGlTCdTjDYSUz4G0YniE77\nkLFsjKGQqwkwcmTs+sHLlrmykjYAlBThMEyf7kKLcxXtU5qJ3Qwji8Ry0t1+e4azMS5Z4k40bFj7\nRHH797sOXHmlJYorAfzf0549rfH+uYj2Mc3fMFJkwQJXy8XX0J56qv0isYzgpycfPrz9vl27XEho\nt24WElrk+IsBfcF/8cXZX+AFJvwNIyXCYXj00daFOZWVbrVv1rIx+gXk/cVh0UVj9u51i8OOOsoG\ngSIlcjFgt27O/JOLqB8T/oaRApE5f0Tg+993Zvp4KaEzhr847Jo4y2M2bnSDwJQpWTi5kU0SpRTP\nJhbqaRgpkOtqSzE56ign7ONxxBFw/fUWFlqmWKinYWSBfGlpbfj0UxcN5NeqjmbTJhcWevLJFhVk\nxMWEv2GkSLo5fzLCkiUu6qemBo47Lnab9993dYXNFGTEwIS/YRQzoRCsW+cGgXjMnm0DQIGQsfUg\nGcDi/A2jFPDz/kyYEHv/7NnOW718ec66ZLSlIPxFEZjmbxilQigEDQ2x1wWAyxvUr19hqJ1lSH19\n6wrePXvc63zOBEz4G0YWyfmf218XMHly7P1r17pVw1dcYYNAjqmqcgu5wD1u3ZrbXD7RmPA3jCyR\n60RdbZg1y/kBDjmk/b6mJli82DmDR43KYafKm8bG1nRNgYDL4J2VleFJYsLfMLJErNrAOSUUgm3b\nXFho167tq4YBLF0KgwbZLCAH+HWh/ZW8WV0ZngS2yMswskQsBx/kp3AH4NI/TJrUPlEcuIHh2mst\nUVyWic7Xn4n8/dEku8jLhL9hZJHIPze0DgaVlS41xPjxOR4EwmG47jq3BiAW1dUWEdRJsiHIO4MJ\nf8MoMGbOdPb/yNxA3bvnKeRv3Dh47jlXMD6agQNh7tw8r2IrLgopjNPSOxhGgeFnb/RN76p58gWA\nM+9s2wZjx7bft2oVnH8+nH66ZQpNgnwUYskEJvwNI0f4eYEmTMivo68NdXXO1BONKqxe7TprEUFx\n8TX+F17IbSGWTGDC3zByzLHHwi9/CTff7MzveWf58tgzAJ+lS+GYYywiKAapFGIppNQOYDZ/w8gZ\nkXbhigqnXO/f7zTFl18uABN7OOzSQCxeHL9Nv34wf34BdLYwSNbWn0ufgNn8DSPPRGt6kXH/+/a5\nTdXZihcsyGtXHcEgPP104iRxfqbQQlFf80yyKb7zvuYjBpbYzTCyQCxNz3f47t3rhH6BTrrd4rAB\nA1zVsA0bYreZOhUuuST/cY0FQDDY8S2I/O4LxSdgmr9hZIFYmp6vJd58c9s6LJWVLt6/oAgGYf16\nlyPogAPa71+2DO6800UFWURQhxREEaAoTPgbRhaILModqekFg87h6yf4EoGbbioMYRCTWbNg587Y\nEUHgpi9WOzgpCqIIUAQm/A0jCyTS9CIHhu7dC1Drj8Xy5c4XMHKk63Q0s2e7tJUlPAsotGiddLFo\nH8PIA6mkAiiUtAEtjBsHCxfG3z92bMnlCCqkFbwdkWy0jzl8DaNAiCXkC1Lo+IL9qadg9+72+/2B\noYQGgHg+nGLGzD6GkWNi5fmPl/s/Uujs3l0gIaHgBPuuXc4MFIuFC+Hgg+GCC0rCThLPh1PMmPA3\njBwTS4uMFwc+YkRrZJAqPPZYgcnSJUtcRNBBB7Xft2OHiwoqgXUBhRitky4m/A0jx8TSIhNFB91w\nQ2syuP37C2OBUBtmzYLt2xOniLjwwqJ3BhdatE66mPA3jBwTS4tMpFmOH+8CbAre5FBXF7928J49\nLiS0f/+iHwSgNCJ/0or2EZFewG+AvsA64BpV/SJGuybgbe/lP1R1dEfHtmgfw2il4CJ+EhEOw7e+\n5SqUx6OII4LiOeEL5TvKVbTPVOBFVb1XRKZ6r2Ot9tilqgPTPJdhlC3JpBAoGIJB+OILGDIEVqyI\n3WbhQnjnnaIpGhMp2OP5ZwouKqsD0jX7XA7M957PB8akeTzDKHtqa10K/aK3jixfDg0NMGYM9OzZ\nfv+qVTB0aMFfaHQkVlVVe/9MISZu64h0Nf+vqeqn3vONwNfitOsuIiuB/cC9qhozZ6yIhIAQwLHH\nHptm1wyj+KitdaZxcGn0weVZK1r8TKHhMAwb1r54fHOzu+BlywrSDBRZpau52Qn2xkan2UebeAot\ncVtHdGjzF5EXgCNj7Po3YL6q9oxo+4WqHhbjGEer6icicgLwEnCRqn6Q6Lxm8zfKkVGjWoU+uDD6\nJUvy15+MEg7DpElO449FdTVs2QJXXukiiPKMr/H7gj8QgG7dEufsLyabf4dmH1W9WFXPiLE9A3wm\nIkd5JzwK2BTnGJ94jx8C9cCgFK7FMMqGq65K/LqoCQbhzTddjqD+/dvvX7EC1q51eYKGDMl9/6JI\npUoXFF8oaLo2/2cBvxDddcAz0Q1E5DAR6eY9Pxw4H1id5nkNoyQJhVrzp9XUpG/yKciQxFDIOXsT\nrQtYsSKvpSPDYfjHP1y67YoK6NIFTjgB3n67AO9nZ1HVTm9AFfAi8D7wAtDLe38w8LD3/DxcmOdb\n3uONyRz77LPPVsMwOk9Dg2qPHqoVFe6xoSHfPYrB5MmqJ52k2qePX9+m/TZ2bE67FHnfunZVHTPG\nPQYCrjuBQAHfT1UFVmoSMjYtzV9VG1X1IlXtp848tMV7f6Wq3uQ9b1DVAap6pvf4SDrnNAwjOYoi\nAmXWLFca8re/dbaVWCxcmNMcQZH3rakJvvrKPfo1GHzHb0HezxSwFb6GUaIUVTKyYBBeew369Im9\nf9mynA0A0fftqqvcoz82BQJFcD+TwPL5G0YJUygRKCmRqF5A376udvD48Vm9IP++VVW50M7ox0K+\nn8lG+5jwNwyj8AiHXZH4cBj27Wu/v0sXeOWVrA8AxbZqFzIY6mkYhpFzgkEn3B94IPb+ffvgG9/I\nqimoKHwmaWDC3zCKlIIM48w0fuxrdXV7h/DOna31Ampr074f0Z8vKp9JJzCzj2EUIcVqkkiL2lq3\nQjg6RQTQDLzFQH4QmMufK4PccENqboFCz9SZCmb2MYwSptRNEjEJheDVV2H48Ha7BBjIKl5pPo/p\ne6dQU9O2HGZHxLufxbZqNxVM+BtGEVLqJom4+L6AqNrB4m0BYCqz+blOSWlQLMf7aWYfwyhSitEk\nkVFqa+Huu2HjxjZvK84MtJdu7Kq+gF7Lk8uMVyr300I9DcMoD2IUjfGlmgCcdhqsLp90YmbzNwyj\nPFi+3NUOjigY45uBAFizBo46quCLxuQaE/6GYXRIwYeVzprlSkc2NEC/fu33b9zoisZMiVVltjwx\ns49hlBiZtl0XZVhp//5O448mEHA5hAr+AjqPmX0MowyJrjebCU29KMNKV6+OXy9g6lTo1QsOO6ys\nZwIm/A2jhEhWUKdixinaMMi6Orc6uG9fEHFafyDgVgV/8QVs3eqqhpXpAGBmH8MoIZIx0XTGjJOP\nMMiMntM/2IMPwieftN/fp4+rKVAC5qBkzT6VueiMYRi5IRh0wjyR0Kyvby1KvmePe92RzAsGcysX\n0/UztBs4/Avwtf1oNmxwOYIaGkpiAEgGM/sYRonRUUqCqqq2VamqqnLXt2SJNF/t2QPTpyfvv0jo\n95g1K3Ht4GuugSuuKOCwpsxhwt8wyozGxrZVqRob89ufWPh+hkDADVAvvJC8A7tDv0ddndPwY1UN\n27ABFi+G888v+XUBJvwNo8yoqmr1fXbrln0HbmfWCPjmq4svbh0Ako00SspBHQzC+vUuVbTfMBJV\nty6gb9+SHQTM4WsYJUg8Z6lvEtmzx8m8Bx5wyTKzdW5I33bfmc+n7CyurXXCPh41NZm/UVnCHL6G\nUaYkEpi+SaS52UU/ZtrkE33u665rb4JJVXh35MCORcoO6lAIPvggtjMY4Lbb3PRl2rSiGQQ6wsw+\nhlFiJLJ5ZztmP/rckPz5fPNQbW1bhy2knlO/U+koZs1yvoAY9QLYtw/WrXOzg1GjUjho4WKav2GU\nGL6A97XvSIGbTChoqkRq6dHnHj/ebR2dL3LGIOJmJpF2/lyYioDWegG1tfCLX8D777cvIL90KYwb\n5xzHRYwJf8MoMToS8JmM2Y8WtHPmOFMPtC2j2NH5ImcMgYCbKYh0bnYSa+YT6/wJ/QKhkNumTIlt\nClq40DmM+/dPrV5kAWHC3zBKkFQEfDoraaPj8W+91QXK+Fp/skTPGObMcf6IzvQp0czHJ+nZwaxZ\nbkXwwoXt9y1b5raaGnj99aIbAEz4G0YZk240TVVVq6ANBNwgEM9ck2iQyaQ5KtlVzkk7ouvq3Kg2\naRKsWtV+vypccAH86EdusCgSTPgbRhmTkhD0iGXqaWx0A8Htt8fWuDsaZDKdO6ijmY+/1sGfpXRo\nWgoG4c03nS9g0iR3wyLZt8+Zhx5+2HmaiyAiyIS/YZQwHQnVZEwk0UQPGI2NLhoHYMCA2OdLNMjk\nul5AOOwGKd+/MGdOCucLhdxFXnedcwZHs2VL63qBAh8ATPgbRomSjFCNNpGAU1wTaeAdRROlOsjE\nC03NVhbRtNc6BIPw3nsu4uepp2D37vZtJkyAn/2soDOFmvA3jBIlWZOOL7DDYbjwwlYB/fLL8dun\nap9P9Bl/YNizxwnjrVuzu6q3M7OdmNTVuW3UKBf+GU2hZwpV1YLczj77bDUMo/M0NKj26KFaUeEe\nGxoSt584UdVZwd02cWLnzjljRsfniqamRrVLF9VAoPURXN9nzEjuvKlca2f7GZeaGtX+/dveQH/r\n3dvtq6nJ0MkSA6zUJGSsaf6GUaJkY0FXItKx3Tc2ti7sAmeLTyXOP1XHdcbrE/jrAoYMgRUr2u7b\nvNltEya4FBIFEhGUVnoHEblaRN4RkWYRiZtISEQuEZG/i8haEZmazjkNw0iejnL7RzJ+vBO2vtBN\nJU4f0qv1G5l2ols3V3DrZz9LfgDpKG1Fp9I9dIbly+NnCgUXEXTBBQVRLyBdzf9vwJVATbwGIlIB\nPAh8A9gA/FlEnlXV1Wme2zCMDBIMOoHdmZlCOAz/+AdUehKlstK9DoeTC+lMdpbSmc/nOpqI5cvd\nY7xMocuWOV/AwIEwd27+/AHJ2IY62oB6YHCcfUFgScTracC0jo5pNn/DKA4i7e1du6qOGeMefft7\nTY2zr9fUxLbLJ2t/T9Wu7zNjhvtMKj6EjDF2bGw/QOQ2cmRGT0kB2fyPBtZHvN4ADInVUERCQAjg\n2GOPzX7PDMNIm0hzD8BXX7nnfsqH225rDauMTtgGyWvlnVmQBhmM7ukMdXUuS+jMmS4raCyWLnUR\nQ0uW5LBjSdj8ReQFEflbjO3yTHdGVWtVdbCqDu7du3emD28YRhaIrAzWtStcdVWr/T0QgP37WweD\nioq2dvlYAj2efb6z6ah9k1AqPoSMEgrBRx+5HEBHHhm7zdKlOa8d3KHmr6oXp3mOT4BjIl738d4z\nDKPIibVa1l8Eu2ABrF7tTNzgbBw/+hH07NnWLh+plVdVxZ8JpBO9lPHons7gRwTFWxeweDE88wzc\ncUdOIoJyUczlz0A/ETleRLoC3wGezcF5DcPIMpGrZVVd+puZM+Htt2H+fHj11da2gYAT/NHRR9dd\nBzff7AR7Y2PrTGD3bjeARJJK9FLBsmSJmwX06tV+n6qLCDr55OzPApJxDMTbgCtwNvw9wGd4jl3g\nX4DfRbT7FvAe8AHwb8kc2xy+hlF4RDtno5293bq555ELtUBVpL2TNpYDt6HBHcf/XLduGVyIVYjU\n1LibE8sRXFHRqYsnSYdvWpq/qj6tqn1UtZuqfk1VR3nv/1NVvxXR7neqerKqnqiqP0/nnIZhZI5U\n4t/9kEm/vKIfxunb02+4oa193y/K0rWri3iMtrfHc+DecINzDoM7XirrBYqOUAjmzWu94EiamrJ6\n8bbC1zDKlFTj3+MJ68jcQPPnJ1+QJV4UzvjxbY+T0+icfBAvU2hFRVYv3oS/YZQpqYZOdhQymapD\nNl77XKelKAj8TKFTpsATT8AJJ8C992b14sWZiAqPwYMH68qVK/PdDcMoWTqz8jXTRVeMzCMib6hq\n3HQ7Pqb5G0aZ0tnUzJkW+jag5AcT/oZRxvjC1vcr5lr4JltDwMg8JvwNo4zJRtKzVDT5BQtcCghw\njwsWmPDPFSb8DaOM6Wy+nHjkPIOm0WlyscLXMIwCpbP5cuKRak7/dGsIGJ3HNH/DKGMyHVYZHQ5a\nVZW4IHw6NQSM9LBQT8MwMopv86+qcknfzASUW5IN9TSzj2EYGcVPvhaZpC3Vso5G9jHhbxhG2sTK\nEZRpf4KRWczmbxhGWsSL8CnLNA1FhAl/wzDSIlG4aEEUUTFiYmYfwzDSwsw7xYlp/oZhpIWZd4oT\nE/6GYaSNmXeKDzP7GIZhlCEm/A3DMMoQE/6GYRhliAl/wzCMMsSEv2EYRhliwt8wDKMMKdisniKy\nGfi4kx8/HPg8g93JB8V+DcXefyj+ayj2/kPxX0M++n+cqvbuqFHBCv90EJGVyaQ0LWSK/RqKvf9Q\n/NdQ7P2H4r+GQu6/mX0MwzDKEBP+hmEYZUipCv/afHcgAxT7NRR7/6H4r6HY+w/Ffw0F2/+StPkb\nhmEYiSlVzd8wDMNIgAl/wzCMMqTkhL+IXCIifxeRtSIyNd/9SRUReVRENonI3/Ldl84gIseIyMsi\nslpE3hGRH+a7T6kiIt1FZIWIvOVdw0/z3afOICIVIvKmiDyf7750BhFZJyJvi8gqEVmZ7/6kioj0\nFJEnReRdEVkjIgWV9LqkbP4iUgG8B3wD2AD8Gfiuqq7Oa8dSQESGAzuABap6Rr77kyoichRwlKr+\nRUQOBt4AxhTZdyDAgaq6Q0S6AK8BP1TVP+W5aykhIj8GBgOHqOpl+e5PqojIOmCwqhblIi8RmQ+8\nqqoPi0hX4ABV3ZrvfvmUmuZfDaxV1Q9VdS/wa+DyPPcpJVR1GbAl3/3oLKr6qar+xXu+HVgDHJ3f\nXqWGOnZ4L7t4W1FpSSLSB7gUeDjffSlHRORQYDjwCICq7i0kwQ+lJ/yPBtZHvN5AkQmeUkJE+gKD\ngOX57UnqeCaTVcAm4I+qWmzXMAeYDDTnuyNpoMBSEXlDREL57kyKHA9sBh7zTG8Pi8iB+e5UJKUm\n/I0CQUQOAp4CblfVL/Pdn1RR1SZVHQj0AapFpGhMcCJyGbBJVd/Id1/SZKiqngV8E7jVM4kWC5XA\nWcBDqjoI2AkUlA+y1IT/J8AxEa/7eO8ZOcSzkz8FLFTV/8p3f9LBm6q/DFyS776kwPnAaM9m/mvg\nv4lIXX67lDqq+on3uAl4GmfWLRY2ABsiZoxP4gaDgqHUhP+fgX4icrznYPkO8Gye+1RWeM7SR4A1\nqvof+e5PZxCR3iLS03veAxdA8G5+e5U8qjpNVfuoal/cf+AlVR2X526lhIgc6AUM4JlLRgJFEwGn\nqhuB9SJyivfWRUBBBT1U5rsDmURV94vIbcASoAJ4VFXfyXO3UkJEFgEjgMNFZANwt6o+kt9epcT5\nwPeAtz2bOcCdqvq7PPYpVY4C5nvRYwHgt6palOGSRczXgKedLkEl8ISq/iG/XUqZHwALPUX0Q+D7\nee5PG0oq1NMwDMNIjlIz+xiGYRhJYMLfMAyjDDHhbxiGUYaY8DcMwyhDTPgbhmGUISb8DcMwyhAT\n/oZhGGXI/w++6U8tCYD1ygAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wokallj1D21L", + "colab_type": "text" + }, + "source": [ + "Oh dear! The graph makes it clear that our network has learned to approximate the sine function in a very limited way. From `0 <= x <= 1.1` the line mostly fits, but for the rest of our `x` values it is a rough approximation at best.\n", + "\n", + "The rigidity of this fit suggests that the model does not have enough capacity to learn the full complexity of the sine wave function, so it's only able to approximate it in an overly simplistic way. By making our model bigger, we should be able to improve its performance.\n", + "\n", + "## Change our model\n", + "To make our model bigger, let's add an additional layer of neurons. The following cell redefines our model in the same way as earlier, but with an additional layer of 16 neurons in the middle:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oW0xus6AF-4o", + "colab_type": "code", + "colab": {} + }, + "source": [ + "model_2 = tf.keras.Sequential()\n", + "\n", + "# First layer takes a scalar input and feeds it through 16 \"neurons\". The\n", + "# neurons decide whether to activate based on the 'relu' activation function.\n", + "model_2.add(layers.Dense(16, activation='relu', input_shape=(1,)))\n", + "\n", + "# The new second layer may help the network learn more complex representations\n", + "model_2.add(layers.Dense(16, activation='relu'))\n", + "\n", + "# Final layer is a single neuron, since we want to output a single value\n", + "model_2.add(layers.Dense(1))\n", + "\n", + "# Compile the model using a standard optimizer and loss function for regression\n", + "model_2.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dv2SC409Grap", + "colab_type": "text" + }, + "source": [ + "We'll now train the new model. To save time, we'll train for only 600 epochs:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DPAUrdkmGq1M", + "colab_type": "code", + "outputId": "34ad91e0-229b-479c-bd65-12ad1ed1c660", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history_2 = model_2.fit(x_train, y_train, epochs=600, batch_size=16,\n", + " validation_data=(x_validate, y_validate))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Train on 600 samples, validate on 200 samples\n", + "Epoch 1/600\n", + "600/600 [==============================] - 0s 422us/sample - loss: 0.5655 - mae: 0.6259 - val_loss: 0.4104 - val_mae: 0.5509\n", + "Epoch 2/600\n", + "600/600 [==============================] - 0s 111us/sample - loss: 0.3195 - mae: 0.4902 - val_loss: 0.3341 - val_mae: 0.4927\n", + "...\n", + "Epoch 598/600\n", + "600/600 [==============================] - 0s 116us/sample - loss: 0.0124 - mae: 0.0886 - val_loss: 0.0096 - val_mae: 0.0771\n", + "Epoch 599/600\n", + "600/600 [==============================] - 0s 130us/sample - loss: 0.0125 - mae: 0.0900 - val_loss: 0.0107 - val_mae: 0.0824\n", + "Epoch 600/600\n", + "600/600 [==============================] - 0s 109us/sample - loss: 0.0124 - mae: 0.0892 - val_loss: 0.0116 - val_mae: 0.0845\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mc_CQu2_IvOP", + "colab_type": "text" + }, + "source": [ + "## Evaluate our new model\n", + "Each training epoch, the model prints out its loss and mean absolute error for training and validation. You can read this in the output above (note that your exact numbers may differ): \n", + "\n", + "```\n", + "Epoch 600/600\n", + "600/600 [==============================] - 0s 109us/sample - loss: 0.0124 - mae: 0.0892 - val_loss: 0.0116 - val_mae: 0.0845\n", + "```\n", + "\n", + "You can see that we've already got a huge improvement - validation loss has dropped from 0.15 to 0.015, and validation MAE has dropped from 0.31 to 0.1.\n", + "\n", + "The following cell will print the same graphs we used to evaluate our original model, but showing our new training history:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SYHGswAJJgrC", + "colab_type": "code", + "outputId": "efcc51f6-f1f1-490a-ffba-ed283586f83e", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 851 + } + }, + "source": [ + "# Draw a graph of the loss, which is the distance between\n", + "# the predicted and actual values during training and validation.\n", + "loss = history_2.history['loss']\n", + "val_loss = history_2.history['val_loss']\n", + "\n", + "epochs = range(1, len(loss) + 1)\n", + "\n", + "plt.plot(epochs, loss, 'g.', label='Training loss')\n", + "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n", + "plt.title('Training and validation loss')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()\n", + "\n", + "# Exclude the first few epochs so the graph is easier to read\n", + "SKIP = 100\n", + "\n", + "plt.clf()\n", + "\n", + "plt.plot(epochs[SKIP:], loss[SKIP:], 'g.', label='Training loss')\n", + "plt.plot(epochs[SKIP:], val_loss[SKIP:], 'b.', label='Validation loss')\n", + "plt.title('Training and validation loss')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()\n", + "\n", + "plt.clf()\n", + "\n", + "# Draw a graph of mean absolute error, which is another way of\n", + "# measuring the amount of error in the prediction.\n", + "mae = history_2.history['mae']\n", + "val_mae = history_2.history['val_mae']\n", + "\n", + "plt.plot(epochs[SKIP:], mae[SKIP:], 'g.', label='Training MAE')\n", + "plt.plot(epochs[SKIP:], val_mae[SKIP:], 'b.', label='Validation MAE')\n", + "plt.title('Training and validation mean absolute error')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('MAE')\n", + "plt.legend()\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzt3Xl8VOX1+PHPyQ4JEAhRtmBAEQg7\nRDQiJYgiasUflVpwQayI0rpUy1epK0WtuFQRS61LRVEUF6qioNSyiGhklUU2QQwS1hDWsGQ9vz/u\nzWQIWSaQySTMeb9e88q9zzxz73nuTObM89xNVBVjjDEGICTQARhjjKk5LCkYY4zxsKRgjDHGw5KC\nMcYYD0sKxhhjPCwpGGOM8bCkYKqUiISKSLaItKzKuoEkIueISJUfuy0il4hIutf8BhHp7Uvdk1jX\nayLywMm+vpzlPi4ib1T1ck3ghAU6ABNYIpLtNVsXyAEK3PnbVHVqZZanqgVATFXXDQaq2rYqliMi\nI4AbVDXVa9kjqmLZ5vRnSSHIqarnS9n9JTpCVf9XVn0RCVPV/OqIzRhT/Wz4yJTLHR54T0TeFZFD\nwA0ikiIi34nIfhHZISITRSTcrR8mIioiie782+7zn4vIIRFJE5FWla3rPn+5iPwoIgdE5EUR+UZE\nhpcRty8x3iYim0Rkn4hM9HptqIg8LyJZIrIZGFDO9nlQRKaVKJskIs+50yNEZJ3bnp/cX/FlLStD\nRFLd6boi8pYb2xqgR4m6D4nIZne5a0RkoFveCfgH0NsdmtvjtW3Her3+drftWSLysYg09WXbVERE\nBrnx7BeRuSLS1uu5B0Rku4gcFJH1Xm29QESWu+W7ROQZX9dn/EBV7WEPVBUgHbikRNnjQC5wFc6P\niDrAecD5OD3N1sCPwB1u/TBAgUR3/m1gD5AMhAPvAW+fRN0zgEPA1e5z9wJ5wPAy2uJLjJ8ADYBE\nYG9R24E7gDVACyAOWOD8q5S6ntZANhDttezdQLI7f5VbR4CLgaNAZ/e5S4B0r2VlAKnu9LPAfKAh\ncBawtkTda4Gm7ntynRvDme5zI4D5JeJ8GxjrTvd3Y+wKRAH/BOb6sm1Kaf/jwBvudHs3jovd9+gB\nYIM73QHYAjRx67YCWrvTS4Ch7nQ94PxA/y8E88N6CsYXC1X1U1UtVNWjqrpEVRepar6qbgZeAfqU\n8/oPVXWpquYBU3G+jCpb99fAClX9xH3ueZwEUiofY3xSVQ+oajrOF3DRuq4FnlfVDFXNAsaXs57N\nwA84yQrgUmCfqi51n/9UVTerYy4wByh1Z3IJ1wKPq+o+Vd2C8+vfe73vq+oO9z15ByehJ/uwXIDr\ngddUdYWqHgPGAH1EpIVXnbK2TXmGADNUda77Ho3HSSznA/k4CaiDOwT5s7vtwEnubUQkTlUPqeoi\nH9th/MCSgvHFVu8ZEWknIjNFZKeIHATGAY3Lef1Or+kjlL9zuay6zbzjUFXF+WVdKh9j9GldOL9w\ny/MOMNSdvs6dL4rj1yKySET2ish+nF/p5W2rIk3Li0FEhovISneYZj/QzsflgtM+z/JU9SCwD2ju\nVacy71lZyy3EeY+aq+oG4M8478NudziyiVv1ZiAJ2CAii0XkCh/bYfzAkoLxRcnDMV/G+XV8jqrW\nBx7BGR7xpx04wzkAiIhw/JdYSacS4w4gwWu+okNm3wcuEZHmOD2Gd9wY6wAfAk/iDO3EAv/1MY6d\nZcUgIq2Bl4BRQJy73PVey63o8NntOENSRcurhzNMtc2HuCqz3BCc92wbgKq+raq9cIaOQnG2C6q6\nQVWH4AwR/h2YLiJRpxiLOUmWFMzJqAccAA6LSHvgtmpY52dAdxG5SkTCgLuBeD/F+D7wJxFpLiJx\nwP3lVVbVncBC4A1gg6pudJ+KBCKATKBARH4N9KtEDA+ISKw453Hc4fVcDM4XfyZOfrwVp6dQZBfQ\nomjHeineBW4Rkc4iEonz5fy1qpbZ86pEzANFJNVd9//h7AdaJCLtRaSvu76j7qMQpwE3ikhjt2dx\nwG1b4SnGYk6SJQVzMv4M3ITzD/8yzg5hv1LVXcDvgOeALOBs4Huc8yqqOsaXcMb+V+PsBP3Qh9e8\ng7Pj2DN0pKr7gXuAj3B21g7GSW6+eBSnx5IOfA5M8VruKuBFYLFbpy3gPQ7/JbAR2CUi3sNARa//\nAmcY5yP39S1x9jOcElVdg7PNX8JJWAOAge7+hUjgaZz9QDtxeiYPui+9AlgnztFtzwK/U9XcU43H\nnBxxhmaNqV1EJBRnuGKwqn4d6HiMOV1YT8HUGiIywB1OiQQexjlqZXGAwzLmtGJJwdQmFwGbcYYm\nLgMGqWpZw0fGmJNgw0fGGGM8rKdgjDHGo9ZdEK9x48aamJgY6DCMMaZWWbZs2R5VLe8wbqAWJoXE\nxESWLl0a6DCMMaZWEZGKzswHbPjIGGOMF0sKxhhjPCwpGGOM8ah1+xSMMdUrLy+PjIwMjh07FuhQ\njA+ioqJo0aIF4eFlXfqqfJYUjDHlysjIoF69eiQmJuJcnNbUVKpKVlYWGRkZtGrVquIXlMKGj4wx\n5Tp27BhxcXGWEGoBESEuLu6UenVBkxTStqbx5NdPkrY1LdChGFPrWEKoPU71vQqK4aO0rWn0m9KP\n3IJcIkIjmDNsDikJKYEOyxhjapyg6CnMT59PbkEuBVpAbkEu89PnBzokY4yPsrKy6Nq1K127dqVJ\nkyY0b97cM5+b69ttF26++WY2bNhQbp1JkyYxderUqgiZiy66iBUrVlTJsqpbUPQUUhNTiQiN8PQU\nUhNTAx2SMcZHcXFxni/YsWPHEhMTw+jRo4+ro6qoKiEhpf/OnTx5coXr+eMf/3jqwZ4GgqKnkJKQ\nwpxhc3is72M2dGRMNaiOfXibNm0iKSmJ66+/ng4dOrBjxw5GjhxJcnIyHTp0YNy4cZ66Rb/c8/Pz\niY2NZcyYMXTp0oWUlBR2794NwEMPPcSECRM89ceMGUPPnj1p27Yt3377LQCHDx/mmmuuISkpicGD\nB5OcnFxhj+Dtt9+mU6dOdOzYkQceeACA/Px8brzxRk/5xIkTAXj++edJSkqic+fO3HDDDVW+zXwR\nFD0FcBKDJQNj/K869+GtX7+eKVOmkJycDMD48eNp1KgR+fn59O3bl8GDB5OUlHTcaw4cOECfPn0Y\nP3489957L6+//jpjxow5YdmqyuLFi5kxYwbjxo3jiy++4MUXX6RJkyZMnz6dlStX0r1793Ljy8jI\n4KGHHmLp0qU0aNCASy65hM8++4z4+Hj27NnD6tWrAdi/fz8ATz/9NFu2bCEiIsJTVt2CoqdgjKk+\n1bkP7+yzz/YkBIB3332X7t270717d9atW8fatWtPeE2dOnW4/PLLAejRowfp6emlLvs3v/nNCXUW\nLlzIkCFDAOjSpQsdOnQoN75FixZx8cUX07hxY8LDw7nuuutYsGAB55xzDhs2bOCuu+5i9uzZNGjQ\nAIAOHTpwww03MHXq1JM++exUWVIwxlSpon14oRLq93140dHRnumNGzfywgsvMHfuXFatWsWAAQNK\nPV4/IiLCMx0aGkp+fn6py46MjKywzsmKi4tj1apV9O7dm0mTJnHbbbcBMHv2bG6//XaWLFlCz549\nKSgoqNL1+sKSgjGmSgVqH97BgwepV68e9evXZ8eOHcyePbvK19GrVy/ef/99AFavXl1qT8Tb+eef\nz7x588jKyiI/P59p06bRp08fMjMzUVV++9vfMm7cOJYvX05BQQEZGRlcfPHFPP300+zZs4cjR45U\neRsqEjT7FIwx1ScQ+/C6d+9OUlIS7dq146yzzqJXr15Vvo4777yTYcOGkZSU5HkUDf2UpkWLFjz2\n2GOkpqaiqlx11VVceeWVLF++nFtuuQVVRUR46qmnyM/P57rrruPQoUMUFhYyevRo6tWrV+VtqEit\nu0dzcnKy2k12jKk+69ato3379oEOo0bIz88nPz+fqKgoNm7cSP/+/dm4cSNhYTXr93Vp75mILFPV\n5DJe4lGzWmKMMTVYdnY2/fr1Iz8/H1Xl5ZdfrnEJ4VSdXq0xxhg/io2NZdmyZYEOw69sR7MxxhgP\nSwrGGGM8LCkYY4zxsKRgjDHGw5KCMaZG69u37wknok2YMIFRo0aV+7qYmBgAtm/fzuDBg0utk5qa\nSkWHuE+YMOG4k8iuuOKKKrku0dixY3n22WdPeTlVzZKCMaZGGzp0KNOmTTuubNq0aQwdOtSn1zdr\n1owPP/zwpNdfMinMmjWL2NjYk15eTWdJwRhTow0ePJiZM2d6bqiTnp7O9u3b6d27t+e8ge7du9Op\nUyc++eSTE16fnp5Ox44dATh69ChDhgyhffv2DBo0iKNHj3rqjRo1ynPZ7UcffRSAiRMnsn37dvr2\n7Uvfvn0BSExMZM+ePQA899xzdOzYkY4dO3ouu52enk779u259dZb6dChA/379z9uPaVZsWIFF1xw\nAZ07d2bQoEHs27fPs/6iS2kXXYjvq6++8txkqFu3bhw6dOikt21p7DwFY4zP/vQnqOobinXtCu73\naakaNWpEz549+fzzz7n66quZNm0a1157LSJCVFQUH330EfXr12fPnj1ccMEFDBw4sMz7FL/00kvU\nrVuXdevWsWrVquMuff3EE0/QqFEjCgoK6NevH6tWreKuu+7iueeeY968eTRu3Pi4ZS1btozJkyez\naNEiVJXzzz+fPn360LBhQzZu3Mi7777Lq6++yrXXXsv06dPLvT/CsGHDePHFF+nTpw+PPPIIf/3r\nX5kwYQLjx4/n559/JjIy0jNk9eyzzzJp0iR69epFdnY2UVFRldjaFbOegjGmxvMeQvIeOlJVHnjg\nATp37swll1zCtm3b2LVrV5nLWbBggefLuXPnznTu3Nnz3Pvvv0/37t3p1q0ba9asqfBidwsXLmTQ\noEFER0cTExPDb37zG77++msAWrVqRdeuXYHyL88Nzv0d9u/fT58+fQC46aabWLBggSfG66+/nrff\nfttz5nSvXr249957mThxIvv376/yM6r92lMQkQHAC0Ao8Jqqji/x/HDgGWCbW/QPVX3NnzEZY05e\neb/o/enqq6/mnnvuYfny5Rw5coQePXoAMHXqVDIzM1m2bBnh4eEkJiaWernsivz88888++yzLFmy\nhIYNGzJ8+PCTWk6Rostug3Pp7YqGj8oyc+ZMFixYwKeffsoTTzzB6tWrGTNmDFdeeSWzZs2iV69e\nzJ49m3bt2p10rCX5racgIqHAJOByIAkYKiJJpVR9T1W7ug9LCMaYE8TExNC3b19+//vfH7eD+cCB\nA5xxxhmEh4czb948tmzZUu5yfvWrX/HOO+8A8MMPP7Bq1SrAuex2dHQ0DRo0YNeuXXz++eee19Sr\nV6/UcfvevXvz8ccfc+TIEQ4fPsxHH31E7969K922Bg0a0LBhQ08v46233qJPnz4UFhaydetW+vbt\ny1NPPcWBAwfIzs7mp59+olOnTtx///2cd955rF+/vtLrLI8/ewo9gU2quhlARKYBVwPl98mMMaYU\nQ4cOZdCgQccdiXT99ddz1VVX0alTJ5KTkyv8xTxq1Chuvvlm2rdvT/v27T09ji5dutCtWzfatWtH\nQkLCcZfdHjlyJAMGDKBZs2bMmzfPU969e3eGDx9Oz549ARgxYgTdunUrd6ioLG+++Sa33347R44c\noXXr1kyePJmCggJuuOEGDhw4gKpy1113ERsby8MPP8y8efMICQmhQ4cOnrvIVRW/XTpbRAYDA1R1\nhDt/I3C+qt7hVWc48CSQCfwI3KOqW0tZ1khgJEDLli17VPRrwBhTdezS2bXPqVw6O9A7mj8FElW1\nM/Al8GZplVT1FVVNVtXk+Pj4ag3QGGOCiT+TwjYgwWu+BcU7lAFQ1SxVzXFnXwN6+DEeY4wxFfBn\nUlgCtBGRViISAQwBZnhXEJGmXrMDgXV+jMcYc5Jq2x0ag9mpvld+29GsqvkicgcwG+eQ1NdVdY2I\njAOWquoM4C4RGQjkA3uB4f6KxxhzcqKiosjKyiIuLq7Mk8JMzaCqZGVlndIJbXaPZmNMufLy8sjI\nyDil4/ZN9YmKiqJFixaEh4cfV273aDbGVInw8HBatWoV6DBMNQn00UfGGGNqEEsKxhhjPCwpGGOM\n8bCkYIwxxsOSgjHGGA9LCsYYYzwsKRhjjPGwpGCMMcbDkoIxxhgPSwrGGGM8LCkYY4zxsKRgjDHG\nw5KCMcYYD0sKxhhjPCwpGGOM8bCkYIwxxsOSgjHGGA9LCsYYYzwsKRhjjPGwpGCMMcbDkoIxxhgP\nSwrGGGM8LCkYY4zxsKRgjDHGI2iSwsKF8PDDkJcX6EiMMabmCpqkkJYGjz8OOTmBjsQYY2ouvyYF\nERkgIhtEZJOIjCmn3jUioiKS7K9YwsOdv9ZTMMaYsvktKYhIKDAJuBxIAoaKSFIp9eoBdwOL/BUL\nWFIwxhhf+LOn0BPYpKqbVTUXmAZcXUq9x4CngGN+jIWwMOdvfr4/12KMMbWbP5NCc2Cr13yGW+Yh\nIt2BBFWdWd6CRGSkiCwVkaWZmZknFYz1FIwxpmIB29EsIiHAc8CfK6qrqq+oarKqJsfHx5/U+iwp\nGGNMxfyZFLYBCV7zLdyyIvWAjsB8EUkHLgBm+GtnsyUFY4ypmD+TwhKgjYi0EpEIYAgwo+hJVT2g\nqo1VNVFVE4HvgIGqutQfwVhSMMaYivktKahqPnAHMBtYB7yvqmtEZJyIDPTXestiScEYYyoW5s+F\nq+osYFaJskfKqJvqz1js6CNjjKlY0JzRbD0FY4ypmCUFY4wxHpYUjDHGeFhSMMYY42FJwRhjjEfQ\nJAU7+sgYYyoWNEnBegrGGFMxSwrGGGM8LCkYY4zxsKRgjDHGw5KCMcYYj6BJCnb0kTHGVCxokoL1\nFIwxpmKWFIwxxnhYUjDGGOMRNElBBEJDLSkYY0x5giYpgNNbsKRgjDFls6RgjDHGI6iSQliYHZJq\njDHlCaqkYD0FY4wpnyUFY4wxHkGVFApDjvH9ttWkbU0LdCjGGFMjBU1SSNuaxs4jW1mxbQ39pvSz\nxGCMMaUImqQwP30+GpKLFoSRW5DL/PT5gQ7JGGNqnKBJCqmJqUhoAWg4EaERpCamBjokY4ypccIC\nHUB1SUlIoW18Nhpdl8nD5pCSkBLokIwxpsbxqacgImeLSKQ7nSoid4lIrA+vGyAiG0Rkk4iMKeX5\n20VktYisEJGFIpJU+Sb4LjY6hpb1zrGEYIwxZfB1+Gg6UCAi5wCvAAnAO+W9QERCgUnA5UASMLSU\nL/13VLWTqnYFngaeq0zwlWWHpBpjTPl8TQqFqpoPDAJeVNX/A5pW8JqewCZV3ayqucA04GrvCqp6\n0Gs2GlAf4zkplhSMMaZ8vu5TyBORocBNwFVuWXgFr2kObPWazwDOL1lJRP4I3AtEABeXtiARGQmM\nBGjZsqWPIZ8oPByys0/65cYYc9rztadwM5ACPKGqP4tIK+CtqghAVSep6tnA/cBDZdR5RVWTVTU5\nPj7+pNdl1z4yxpjy+dRTUNW1wF0AItIQqKeqT1Xwsm04+x6KtHDLyjINeMmXeE6WDR8ZY0z5fD36\naL6I1BeRRsBy4FURqWin8BKgjYi0EpEIYAgwo8Ry23jNXgls9D30yrOkYIwx5fN1n0IDVT0oIiOA\nKar6qIisKu8FqpovIncAs4FQ4HVVXSMi44ClqjoDuENELgHygH04+yz8xpKCMcaUz9ekECYiTYFr\ngQd9XbiqzgJmlSh7xGv6bl+XVRUsKRhjTPl83dE8DucX/0+qukREWuPnoR5/sKRgjDHl83VH8wfA\nB17zm4Fr/BWUv9jRR8YYUz5fdzS3EJGPRGS3+5guIi38HVxVs56CMcaUz9fho8k4Rw41cx+fumW1\niiUFY4wpn69JIV5VJ6tqvvt4Azj5s8gCxJKCMcaUz9ekkCUiN4hIqPu4AcjyZ2D+YEnBGGPK52tS\n+D3O4ag7gR3AYGC4n2Lym/BwUIWCgkBHYowxNZNPSUFVt6jqQFWNV9UzVPX/UQuPPgp3L+FnvQVj\njCndqdyO894qi6KahLkH4NphqcYYU7pTSQpSZVFUk+1HfgZg4eYlAY7EGGNqplNJCn69IU5VS9ua\nxqTv/w7AoKlDSduaFuCIjDGm5ik3KYjIIRE5WMrjEM75CrXG/PT55IceAiA3J4T56fMDG5AxxtRA\n5V7mQlXrVVcg/paamEp4xA/kAuGF9UlNTA10SMYYU+OcyvBRrZKSkMIT/Z0bu/2z/+ukJKQEOCJj\njKl5giYpAHRNaA9AmwadAxyJMcbUTEGVFKKinL/HjgU2DmOMqamCKinUqeP8PXo0sHEYY0xNFVRJ\nwXoKxhhTvqBKCkU9BUsKxhhTuqBKCkU9BRs+MsaY0gVlUrCegjHGlC6okoLtaDbGmPIFVVKIjHT+\nWk/BGGNKF1RJISTESQyWFIwxpnRBlRQAwiPzWbBpqV0l1RhjShFUSSFtaxrZ7OK7zavoN6WfJQZj\njCkhqJLC/PT5EHEIzY0mtyDXLp9tjDEl+DUpiMgAEdkgIptEZEwpz98rImtFZJWIzBGRs/wZT2pi\nKhJxBHLrEREaYZfPNsaYEvyWFEQkFJgEXA4kAUNFJKlEte+BZFXtDHwIPO2veMC5fHbXlmfTKroD\nc4bNsctnG2NMCf7sKfQENqnqZlXNBaYBV3tXUNV5qnrEnf0OaOHHeABoFteARqFnWUIwxphS+DMp\nNAe2es1nuGVluQX4vLQnRGSkiCwVkaWZmZmnFFRMDGRnn9IijDHmtFUjdjSLyA1AMvBMac+r6iuq\nmqyqyfHx8ae0LksKxhhTtnLv0XyKtgEJXvMt3LLjiMglwINAH1XN8WM8gCUFY4wpjz97CkuANiLS\nSkQigCHADO8KItINeBkYqKq7/RiLR1FSUK2OtRljTO3it6SgqvnAHcBsYB3wvqquEZFxIjLQrfYM\nEAN8ICIrRGRGGYurMln5WygogK82fefvVRljTK3jz+EjVHUWMKtE2SNe05f4c/0lpW1N499r3wMm\ncPm/hzL3j+/YUUjGGOOlRuxori7z0+dTELMFgNx9Z9gZzcYYU0JQJYXUxFTCG+4EIOxQazuj2Rhj\nSgiqpJCSkMJHIycCMOrcv9nQkTHGlBBUSQFgQOfzqFMHQg+1CnQoxhhT4wRdUhCBM86AUzwx2hhj\nTktBlxQAoupls3jTJrufgjHGlBB0SSFtaxobjy5iQ9o5pI6ZYInBGGO8BF1SmJ8+n8LIfQDkvvOe\nHZZqjDFegi4ppCamIgVRx80bY4xxBF1SACC3XqAjMMaYGinoksL89PnHXQxvysopgQvGGGNqmKBL\nCqmJqYQPut0z/9rSN2xnszHGuIIuKaQkpHBlz3ZwxR8AyD/UkKe/8eutoY0xptYIuqQA0CSmCcT9\n6Mzsac+nP35qvQVjjCFIk8KwLsMIabLWmdnViUIttENTjTGGIE0KKQkpjO5/I0Tvgm09UZT9OfsD\nHZYxxgRcUCYFgNjIWGj3Cay7BjZexjPfPMP4mVO55hq7h7MxJngFbVJITUwlpP0nUBAJU79Ad7fj\ngTGh/Oc/8MkngY7OGGMCI2iTQkpCCgNSGxUXHGyB5tYFIDo6QEEZY0yABW1SAHio/x+KZw4mQG4M\n4Fxe2xhjglFQJ4WUhBRGfHiXM3MgAfKcLsKKXzYGMCpjjAmcoE4KAL/vORRidsCa38HheAA+XvVl\ngKMyxpjACPqkkJKQQpchH8OetrC/NQArf/nJTmYzxgSloE8KAC890hXOLu4daF5du/SFMSYoWVLA\n6S1ceufH0PEdpyA3hhkbZlhvwRgTdCwpuP76m2GEDL4R6mZCbgyFFFpvwRgTdPyaFERkgIhsEJFN\nIjKmlOd/JSLLRSRfRAb7M5aKpCSkMLDdQIjIhpz6oFhvwRgTdPyWFEQkFJgEXA4kAUNFJKlEtV+A\n4cA7/oqjMu678D4IPwKrboRPXrfegjEm6Pizp9AT2KSqm1U1F5gGXO1dQVXTVXUVUOjHOHyWkpDC\n2ef95MysuBmw3oIxJrj4Myk0B7Z6zWe4ZZUmIiNFZKmILM3MzKyS4MryxqR46PC+M5Pj7FsYMWOE\nJQZjTFCoFTuaVfUVVU1W1eT4+Hi/ruuixBQuuMI9o/mn/gCs3bOWPm/0scRgjDnt+TMpbAMSvOZb\nuGU13t9GXOJMvD8dMnoCkFeYZ/sXjDGnPX8mhSVAGxFpJSIRwBBghh/XV2X6nns+vxv9jTOzbpCn\n3PYvGGNOd35LCqqaD9wBzAbWAe+r6hoRGSciAwFE5DwRyQB+C7wsImv8FU9lTXumF22Tt8H6QaBO\nWSGFjPnfCUfWGmPMacOv+xRUdZaqnquqZ6vqE27ZI6o6w51eoqotVDVaVeNUtYM/46ms+0Y1h6y2\nsHqop2zBLwu4/3/3BzAqY4zxn1qxozlQrrsOuvQ8CB+/Cdt6eHoMz3zzjA0jGWNOS5YUyhEVBfNn\n1yeybh68uhT+7exnUJTrpl9nicEYc9qxpFCB2Fj4w63ObTrJuBDmPQoL/4/0A+n0ntzbEoMx5rRi\nScEHDz4I4RHuSddfjYX/PQ0KBVpgh6kaY04rlhR8EBcH27eV2FQHWgLwyYZPeGXZKwGIyhhjqp4l\nBR81bgzbt3sVzH0MCsJQlNs/u90SgzHmtGBJoRKaNoW8PAiNyIFVw+D73wNYYjDGnDYsKVRSWBi8\n9vF6Z+aHIbC9O2CJwRhzerCkcBKGX96F7hdvhvS+8Moy+No5y9kSgzGmtrOkcJJefqp18cycJ+Fg\nM8ASgzGmdrOkcJKSk2Gr990intsGK26EQkFRbvvsNrschjGm1rGkcApatIDCQjin2w6n4OMpMK4Q\nVjnXSnr6m6fp+q+udoKbMabWsKRwikRg8ZymjH51BsT+7BSuHOZ5fuWulfR6vZcNJxljagVLClWg\nYUN4ZsRAXvxsLjRfBLs7wdsz4ccrAGw4yRhTa1hSqEJ39LqFP4+Kh0PNYdMV8M5MJzns7Aw4w0lt\nJrZh1GejbEjJGFMjiaoGOoZKSU5O1qVLlwY6jDIVFsJzz8F/VswhbWq/4if+kARnrPPMhkgIL135\nEiN7jAxAlMaYYCMiy1Q1uaJ0dm+9AAAVTklEQVR61lOoYiEhMHo0fPt2P56b82bxE5O/hr2tnF7D\n4TgKtZDbPruNPm/0sV6DMabGsKTgR/dcfBOvfbqKxFtHw9E4mLgZ/rUS3v3UU2fBlgVc+PqFlhyM\nMTWCJQU/u+XXnfn5lWfp+muvL/yMFPji7zB5vufchgXpTnJo+vemDHpv0AkJIisLLrwQNm2q3vir\n2+LFzhFdK1YEOhJjgpMlhWqy7JMU3p33PYmXfOEUfHcvbOlTfG7Dp6/A8pvZ+dWv+Xj9x1z4+oW0\neqGV51DWadMgLQ2eesp5eVYW1LLdQT756CPn78yZgY3DmGBlSaGahITAkNRu/PzlAL5YtYQ6Tbcc\nX2H5rTDjdfj0VZj5IqweQvr+dG777Dbino5jzKwnAPh+79eMnvYSjRvDv/7lvHTmTHjggWpu0Gnu\nhx+cHss33wQ2jn//Gx57zJneuhW+/z6w8VSHjz+GKVMCHUXwsqQQAJd1Oo/sjLP4Zksa5z15HQy6\nAWK8btaw5A6Y/i68vAQW/4G9R/eSnRkLwLKtq/j7jFkA3PXs1zT9e1MG/ymNJ5/Ko+fLKX4/Se7w\nYaeXUpUWL4bQUOdLr6j3I1K166isop7K9OnVt87CQucLsbCwuGzECHjkEWe6dWvo3r3i5ezcCR98\nUPXxbdwI8+eX/pwqTJwIGzac+noGDYKbbjr15VS1os9mTo5vvfQjR8reXr748kvnR0F1C6v+VRpw\neg4Xtkxh8ZgU0ram8ddPH2JZxmr2vDALjsQ7lXYkO4/dHSE91Sk71NRz17f8zb3Z+a/X4efuUBjO\nkg0ZLNl5G6P/O5rQTQMJT1hJaL09J6w7KiyK2KhYcvJziI+Op1FUI5rENKFb025kHckiNTGVC1qk\nIOIcXnvoEDz6qPPa4cPhww8hM9O58VBlZGfDV1/BlVceXz5xovNF+OWXxV+IeXmVW3ZVWrYMvv3W\nmY6IKLve4cMQEwOTJzvbpTIee8y5TMrNNxeXvfoq3H576cvLyYH8fGc6P9+5hHtZBg2C776D3bsh\nNxfeew/uucdJtA88AJ06wdChlYsX4Nxznb+qTuI54wzncwzw3//C3XfDgAHw+eeVX3ZlrF7t9OTK\na8Mvvzh3TIyOLn9Ze/dCo0bOdF4e/OMfzntQp87x9XbudO6n8uKLcOedzmf2zjvLX/aYMU791auh\nY0enLDe3/M+Ut/79nb+33OJb/api5ynUMG9/uZKxz2/hQNfH2DPzTudmPr7qNR7OXOXsyF58J7Rc\nAJEHoctb0GYmRB6GbT1g/lj47e+gMAxUICwHwo85y8hsB5Oc8ynajR3E+rHOIH+TZ5sCsHO0c52n\nelc/TN0L3+Dgx48T3et1wpr8eFwoBYcaE1J3HxJa4Ck7NHs0h7/8M/EPnEdoowxP+f63/8mxFYOo\nP3g0R5dfQ97mFKJbr6Aweid1+48nvGnxz8/8rJYUHm5EREtnT3T+znMpzIkmvNlaoqKE+tKM1X+e\nS/2rH6Huhc4YRFES3Hd0HzkFOaVuuoJ9zQltuI2osCjS//Szpzwk6hAtxiWTG3LwuPpRYVFEZiWz\n4bEPkDr7OPOxJE98BVlnEd5sLVLnABKa76mfUC+RY7ubsjvqW7bckw5A24eu5WDeXvJDD3Dwk8fI\nWTOAMy6bTJ0Bf+XYMdj1F6fe2Y9ezk9/db5t4x9MJrThNk8s2XPv4PDcOznjsbaIwK6H16FHY4+L\nN+zMDTS87Voyx60EIHFCq+O2SVRYFC0btASFn77twqFFgwk561uiUl8AoPBILLsfcT4XzR/qw7bH\nvyK82Voiz1rB2cOeYfPUuzm0cDgRbb6i0W1DSt3GAFooIIpI6T9Ofl7ZgpXjXwQg4W+dyPxwLPUG\njHc+L1sv5JyO+1l46xwAer/Wl18O/ex5T4uWl3XgCFvv30BUu69o96c/kZOfQ2RY5Al/9yy/kIxX\nJtHk7kFEtVpB2LI72fTWvcT+eryn3Y3qNKJbk27Mn9WYbf+eULw9m/1A43svLbWNRXGsnzCBY+v7\nULfPS0Sn/hPS+5D55j84b/wQdkd+d0Lcx/Kc7XB48TWEH0lg0ZRBALQZfyHRsUfJyc+hbeO23Hfh\nfaQkpJS5jcvi63kKlhRqsLStadz36kyWfnApOXuaoe3fg68fOrmFhR+G9tOLk0ziPOd+EN7i10Bm\nh+L5ix+Euc6+DK68HVosgtfSoCDKKQvJhcII55pPf+gAB86CuA2QVxeezC5ezpWjnLLvb4ZM9ydT\nvzFOjyg0FzZeDru6Qp0s59Ddknr8Czq+ByF5MHmhU/ZICBxqBs+7yaXr6xCdCd/cX9zeZkuh3nY4\nkAA3DIDDZ0Kjzc7zm/rD3nOgzSz45SL46C0Y1s/plX3xwvHrP28SnD8RGh+f+NhwJbz7mTN9wXNO\nIi4MP77OgLug5yQIKYS0u2H2BBhyNUz75MR2Fmm6zEni6/8f7HbOhueGy+Dt2e50fzjny+L6Y93/\n4bvOdtr3VCYcLaUbF3YU8t2fwI8KZLaHsGNQb4fzo+CT1+DwGfDjVcWvOX8CXH4PLL0VPnOHJkNz\noCCyuM5FT8LCvzjTIXnQfzScsRri10K9XVAQCiEFsKM7vDkHukyBK+526heEAQqhBc70Y15dxMv+\n5GyvDu8563h5BaQ8C2mjnefbfQTbe0CHDyCrDeQ0gPb/gfoZ8L477ndLCix4EK65Hva0gxaLoVDg\nWCx8McH5f+j7MBxtBN/dU7zu+xtCnf1wpCG8O8Mp23pR8fNnfeV8ruM2Qmg+rB4CaffCTX0hJN/5\nofXOp7Dx1ye+D4OvdT4nEYcgcb5TtrcNvPVfaPgTbO95fP2bL4KzvoH9CVA/g/CwML4a/lWlE4Ov\nSQFVrVWPHj16aLB6eenLmvzPFG1+7VPafGyynjk+QSPPe1O57B6l/QfO48Kn1Ong+/NRcGJZVJbz\nN/xQ1a2n7u6yn7v0z0riHN+XFb3D+XvxX5QR551cPNcOUvrfq3SZrDT/Tolb79vrovYqiXOL52O2\n+b7OJsudv96vR5Ub+znxtPm0uKzbq05sviw35ZnS37/SHpfcp4TkVH57RRxQhvdW6m9xlt9w4/HP\n19mjhB5VWi5wtmv9LVXzuQk7cmJZg3Tnb7NFSt1dFS8jJFfpf4/S98Hy68VsV4ZeWTzf/RUlcr9y\n9udK9M4ylu21LaN3KmGHy19Hp7eVK/6ghGcrV4xSxqJ/W/C3Sn9/AEtVK/6OrbBCTXsEc1Ioy7e/\nfKt/W/A3ve/L+7T9P9rrmc800TPGdtCGI6/VMx4/W5s820QbjzlfG/3xKq1z2WPKpaOVbq8pI3oq\nHd5VOr2lnP+8Muh655/3/OeVnhOVRhuUzlOcD2bsZiXpPeeL595mzoc/9GjxF2N4tpLyrCJ5xf+Y\nt3d2llX0hSP5Ff8ztvnM+Rv/gxOP559vW/EXzXFfAIeVGy4t/Usr9qfyk1STZU78JesULavXk0rT\nJb5/GXWceuKXLao0XuPEUtprQo86ybxo/vbOSs8XlF7jlSFXKQ9FKO3+U/x8ZeKJ2e5s+zp7ir9w\ny/qiQp0vssvuVvo86sx3/ffx2/KONkrbj4rLuv7bibFo3rsdZT3a/efkEkx5jzNXnPhF3/fBstfT\ncJNXG153PrPxq5WR3YuTcGmPq29ytmnC18Wfec/7eMx5lHzNpX9W7m+g/P5CZ/kl68RuLn1dceuV\nRj8qPf5VXNZ6tnJPcw0fF67f/vJtpb8nfE0Kfh0+EpEBwAtAKPCaqo4v8XwkMAXoAWQBv1PV9PKW\nGUzDR/6StjWN+enziasbx/c7vmdn9k72Ht1L5pFMIsMi2Xd0HyJS4Th8kdyfUghr9gMhdQ6Rn9ma\nkJhMJOLIcfsTCo80oPBwHIWHGxFSfyd6OI7Qxj+jOTFI+FGnUmgeIVHZRIYW7wM4tKYXEW3nA8qx\n5YOJ7PQZmhuN5kVBfiSE5hEW9wtHsxpzYFN7Z/iq4WZiw5oS1eAQuQW57N2XD8tuhZYLiT7agZCD\nLQlPWEnE2c4JgloYQv72DhQei6H+uStpWLc+Ow7sYtdR9z4ZBWHEFrRFMjtRmN0YaT2X/fnbYOMV\nEJpL7LlrCTl0lmc/x7HVVxDWbDWhDbehR2PJj3K2L0cbOMM1e9tQN6IOR9jjDHNF7yY2P4mohntL\n3f+RfTCE7FWXOLF0nUL0wa6E7+tESN0DhDbYTsG+BCTyMHnbOhLZbi6aH0FovUwIP3rcUVyFR+sh\n4Tkc+akbh7Y3hYIIqLedmHbfkf1TJzj7vxBagCBo1tnQaBPRP44g5HAzorpNJ7RelrM994Q4Bzw0\nWe0seM01ELeRhi13EpLZmdBGW5zPQlZLctZcRkTiYrQwnLAzNhFSdz+qcOTHCzgUsgUa/QjLboM9\nbeHcmdBuBuSHw4KHoelyGpz1C2G5cRSeuZx9x7Jg23nOMGD4Eee9DsuhUeNCCvY158CPnaDVXMhu\nQr2zfuLQj92cYaGWC53PSnZTZz9awnfOvrW8upD4NRxuTGx0NAf4BS0IgbxoYsOacXR1f3JyC6HT\nO1AYSr1Gx4iOcPZca0EYhOSjRxpybOVVhJ+1HKSQ7G+vJ+fcd+CHa6HOXhpeMYHISHG229G9sKML\nZLWF/Cganj+LyPAwQrOSkMNnkhG6wLmYZmguNPE6e3P9/4OCcEj6kF+16s34fuNr5z4FEQkFfgQu\nBTKAJcBQVV3rVecPQGdVvV1EhgCDVPV35S3XkoIpS1GyS01MPe6fpqzyk13eySwzbWsaU1Y6O76H\ndRlGSkJKpZbxyrJXmL52OtckXVMlF1EsuW7veaDcuCpT15cY4urGkXUk64S/pb2PU1ZOYWf2TgCa\nxDTxbMvy2lTW8kuup7T342S2e3mfw5KfgZKvK2pf0Y+0to3bcvk5l5e6PSqrJiSFFGCsql7mzv8F\nQFWf9Koz262TJiJhwE4gXssJypKCMcZUXk24SmpzwPsuxhluWal1VDUfOACccPiJiIwUkaUisjQz\nM9NP4RpjjKkVZzSr6iuqmqyqyfHx8YEOxxhjTlv+TArbgASv+RZuWal13OGjBjg7nI0xxgSAP5PC\nEqCNiLQSkQhgCDCjRJ0ZwE3u9GBgbnn7E4wxxviX3659pKr5InIHMBvnkNTXVXWNiIzDOV52BvBv\n4C0R2QTsxUkcxhhjAsSvF8RT1VnArBJlj3hNHwN+688YjDHG+K5W7Gg2xhhTPWrdBfFEJBPYUmHF\n0jUGTryWdO10urTldGkHWFtqKmuL4yxVrfDwzVqXFE6FiCz15eSN2uB0acvp0g6wttRU1pbKseEj\nY4wxHpYUjDHGeARbUvDvDYyr1+nSltOlHWBtqamsLZUQVPsUjDHGlC/YegrGGGPKYUnBGGOMR1Ak\nBREZICIbRGSTiIwJdDwVEZHXRWS3iPzgVdZIRL4UkY3u34ZuuYjIRLdtq0Ske+AiP5GIJIjIPBFZ\nKyJrRORut7zWtUdEokRksYisdNvyV7e8lYgscmN+z73WFyIS6c5vcp9PDGT8JYlIqIh8LyKfufO1\ntR3pIrJaRFaIyFK3rNZ9vgBEJFZEPhSR9SKyTkRSqrstp31SEOcOcJOAy4EkYKiIJAU2qgq9AQwo\nUTYGmKOqbYA57jw47WrjPkYCL1VTjL7KB/6sqknABcAf3e1fG9uTA1ysql2ArsAAEbkAeAp4XlXP\nAfYBt7j1bwH2ueXPu/VqkruBdV7ztbUdAH1VtavXMfy18fMFzu2Lv1DVdkAXnPenetviy42ca/MD\nSAFme83/BfhLoOPyIe5E4Aev+Q1AU3e6KbDBnX4Z5zanJ9SriQ/gE5xbtNbq9gB1geXA+ThnmIaV\n/LzhXAwyxZ0Oc+tJoGN342mB8wVzMfAZILWxHW5M6UDjEmW17vOFc+uAn0tu2+puy2nfU8C3O8DV\nBmeqqnsneXYCZ7rTtaZ97rBDN2ARtbQ97pDLCmA38CXwE7BfnTsHwvHx+nRnwQCZANwHFLrzcdTO\ndgAo8F8RWSYiRTdSro2fr1ZAJjDZHdZ7TUSiqea2BENSOO2o87OgVh1LLCIxwHTgT6p60Pu52tQe\nVS1Q1a44v7R7Au0CHFKlicivgd2quizQsVSRi1S1O85wyh9F5FfeT9aiz1cY0B14SVW7AYcpHioC\nqqctwZAUfLkDXG2wS0SaArh/d7vlNb59IhKOkxCmqup/3OJa2x4AVd0PzMMZZokV586BcHy8NfXO\ngr2AgSKSDkzDGUJ6gdrXDgBUdZv7dzfwEU6yro2frwwgQ1UXufMf4iSJam1LMCQFX+4AVxt436Xu\nJpyx+aLyYe6RCBcAB7y6mgEnIoJzM6V1qvqc11O1rj0iEi8ise50HZx9I+twksNgt1rJttS4Owuq\n6l9UtYWqJuL8P8xV1eupZe0AEJFoEalXNA30B36gFn6+VHUnsFVE2rpF/YC1VHdbAr1zpZp24FwB\n/Igz/vtgoOPxId53gR1AHs6vh1twxnDnABuB/wGN3LqCc3TVT8BqIDnQ8Zdoy0U43d1VwAr3cUVt\nbA/QGfjebcsPwCNueWtgMbAJ+ACIdMuj3PlN7vOtA92GUtqUCnxWW9vhxrzSfawp+v+ujZ8vN76u\nwFL3M/Yx0LC622KXuTDGGOMRDMNHxhhjfGRJwRhjjIclBWOMMR6WFIwxxnhYUjDGGONhScEYl4gU\nuFfaLHpU2RV1RSRRvK56a0xNFVZxFWOCxlF1LmFhTNCynoIxFXCv1/+0e83+xSJyjlueKCJz3WvZ\nzxGRlm75mSLykTj3XVgpIhe6iwoVkVfFuRfDf92zohGRu8S538QqEZkWoGYaA1hSMMZbnRLDR7/z\neu6AqnYC/oFzhVGAF4E3VbUzMBWY6JZPBL5S574L3XHOtAXnuveTVLUDsB+4xi0fA3Rzl3O7vxpn\njC/sjGZjXCKSraoxpZSn49xcZ7N7cb+dqhonIntwrl+f55bvUNXGIpIJtFDVHK9lJAJfqnOjFETk\nfiBcVR8XkS+AbJzLGnysqtl+bqoxZbKegjG+0TKmKyPHa7qA4n16V+Jcw6Y7sMTrSqXGVDtLCsb4\n5ndef9Pc6W9xrjIKcD3wtTs9BxgFnpvyNChroSISAiSo6jzgfpzLUp/QWzGmutgvEmOK1XHvqlbk\nC1UtOiy1oYiswvm1P9QtuxPnLln/h3PHrJvd8ruBV0TkFpwewSicq96WJhR4200cAkxU514NxgSE\n7VMwpgLuPoVkVd0T6FiM8TcbPjLGGONhPQVjjDEe1lMwxhjjYUnBGGOMhyUFY4wxHpYUjDHGeFhS\nMMYY4/H/AZN6yxQ6gTLNAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl8VNXZ+L/PvVlARWij1oVAcGeT\nLaIpImHR4q7V1rVBQBHcSm1fK77S8qoVpVqpSy2oUOJS608/UjfckJFtQHapC4IaSEQUUkEQSTJz\nn98fd+5kZjKTTDKZbJwvn3yYuXPuveeee+95zrOc54iqYjAYDAZDQ7GauwIGg8FgaN0YQWIwGAyG\nlDCCxGAwGAwpYQSJwWAwGFLCCBKDwWAwpIQRJAaDwWBICSNIDM2OiNgiskdEujRm2eZERI4VkUaP\nrReRESJSEvF9g4gMTqZsA871hIjc3tD9aznu3SLyj8Y+rqH5yGjuChhaHyKyJ+LrAUAFEAx9v05V\nn6nP8VQ1CBzU2GX3B1T1hMY4johcA1ylqoURx76mMY5taPsYQWKoN6oa7shDI95rVPWdROVFJENV\nA01RN4PB0PQY05ah0QmZLv4lIv8Ukd3AVSJSICLLRGSniHwlIg+JSGaofIaIqIjkhb4/Hfp9nojs\nFhG/iHSrb9nQ72eJyKcisktEHhaRJSJydYJ6J1PH60Rkk4h8KyIPRexri8iDIlIuIp8DI2tpn/8V\nkeditj0qIn8Jfb5GRD4OXc9nIW0h0bHKRKQw9PkAEXkqVLcPgQExZe8Qkc9Dx/1QRM4Pbe8NPAIM\nDpkNd0S07ZSI/ceHrr1cROaKyBHJtE1diMhFofrsFJF3ReSEiN9uF5GtIvKdiHwSca2nisjq0Pav\nReTPyZ7PkAZU1fyZvwb/ASXAiJhtdwOVwHm4g5X2wMnAKbha8NHAp8CNofIZgAJ5oe9PAzuAfCAT\n+BfwdAPKHgbsBi4I/XYLUAVcneBakqnjv4GOQB7wX+/agRuBD4HOQA6w0H294p7naGAPcGDEsb8B\n8kPfzwuVEWAY8ANwUui3EUBJxLHKgMLQ5/sBH/AjoCvwUUzZXwJHhO7JFaE6/CT02zWAL6aeTwNT\nQp/PDNWxL9AO+BvwbjJtE+f67wb+EfrcPVSPYaF7dDuwIfS5J7AZODxUthtwdOjzCuDy0OcOwCnN\n/S7sz39GIzGki8Wq+oqqOqr6g6quUNXlqhpQ1c+BmcCQWvZ/QVVXqmoV8AxuB1bfsucCa1X136Hf\nHsQVOnFJso5TVXWXqpbgdtreuX4JPKiqZapaDtxby3k+B/6DK+AAzgC+VdWVod9fUdXP1eVdYD4Q\n16Eewy+Bu1X1W1XdjKtlRJ73eVX9KnRPnsUdBOQncVyAK4EnVHWtqu4DbgOGiEjniDKJ2qY2LgNe\nVtV3Q/foXlxhdAoQwBVaPUPm0S9CbQfugOA4EclR1d2qujzJ6zCkASNIDOmiNPKLiJwoIq+JyDYR\n+Q64Eziklv23RXzeS+0O9kRlj4ysh6oq7gg+LknWMalz4Y6ka+NZ4PLQ5ytC3716nCsiy0XkvyKy\nE1cbqK2tPI6orQ4icrWIrAuZkHYCJyZ5XHCvL3w8Vf0O+BY4KqJMfe5ZouM6uPfoKFXdAPwW9z58\nEzKVHh4qOhroAWwQkfdF5Owkr8OQBowgMaSL2NDXGbij8GNV9WDgD7imm3TyFa6pCQAREaI7vlhS\nqeNXQG7E97rCk58HRojIUbiaybOhOrYHXgCm4pqdOgFvJVmPbYnqICJHA48BE4Cc0HE/iThuXaHK\nW3HNZd7xOuCa0L5Mol71Oa6Fe8++BFDVp1V1EK5Zy8ZtF1R1g6pehmu+fAB4UUTapVgXQwMxgsTQ\nVHQAdgHfi0h34LomOOerQH8ROU9EMoBfA4emqY7PAxNF5CgRyQF+X1thVd0GLAb+AWxQ1Y2hn7KB\nLGA7EBSRc4Hh9ajD7SLSSdx5NjdG/HYQrrDYjitTr8XVSDy+Bjp7wQVx+CcwVkROEpFs3A59kaom\n1PDqUefzRaQwdO7/wfVrLReR7iIyNHS+H0J/Du4F/EpEDglpMLtC1+akWBdDAzGCxNBU/BYYhdtJ\nzMB1iqcVVf0auBT4C1AOHAOswZ330th1fAzXl7Ee1xH8QhL7PIvrPA+btVR1J/Ab4CVch/UluAIx\nGf6IqxmVAPOA4ojjfgA8DLwfKnMCEOlXeBvYCHwtIpEmKm//N3BNTC+F9u+C6zdJCVX9ELfNH8MV\nciOB80P+kmxgGq5faxuuBvS/oV3PBj4WNyrwfuBSVa1MtT6GhiGu2dhgaPuIiI1rSrlEVRc1d30M\nhraC0UgMbRoRGRky9WQDk3Gjfd5v5moZDG0KI0gMbZ3TgM9xzSY/Ay5S1USmLYPB0ACMactgMBgM\nKWE0EoPBYDCkxH6RtPGQQw7RvLy85q6GwWAwtCpWrVq1Q1VrC5kH9hNBkpeXx8qVK5u7GgaDwdCq\nEJG6MjQAxrRlMBgMhhQxgsRgMBgMKWEEicFgMBhSYr/wkRgMhqalqqqKsrIy9u3b19xVMSRBu3bt\n6Ny5M5mZiVKt1Y4RJAaDodEpKyujQ4cO5OXl4SZdNrRUVJXy8nLKysro1q1b3TvEwZi2DAZDo7Nv\n3z5ycnKMEGkFiAg5OTkpaY9GkBjqxO+HqVPd/w2GZDFCpPWQ6r0ypi1Drfj9MHw4VFZCVhbMnw8F\nBc1dK4PB0JIwGomhVnw+V4gEg+7/Pl9z18hgqJvy8nL69u1L3759OfzwwznqqKPC3ysrk1u2ZPTo\n0WzYsKHWMo8++ijPPPNMY1SZ0047jbVr1zbKsZoao5EYaqWw0NVEPI2ksLC5a2Qw1E1OTk64U54y\nZQoHHXQQv/vd76LKqCqqimXFH0/Pnj27zvPccMMNqVe2DWA0EkOtFBS45qy77jJmLUN68Zf6mbpo\nKv7S9DnjNm3aRI8ePbjyyivp2bMnX331FePGjSM/P5+ePXty5513hst6GkIgEKBTp07cdttt9OnT\nh4KCAr755hsA7rjjDqZPnx4uf9tttzFw4EBOOOEEli5dCsD333/PxRdfTI8ePbjkkkvIz8+vU/N4\n+umn6d27N7169eL2228HIBAI8Ktf/Sq8/aGHHgLgwQcfpEePHpx00klcddVVjd5myWA0EkOdFBQY\nAWJIL/5SP8OLh1MZrCTLzmJ+0XwKctPz0H3yyScUFxeTn58PwL333suPf/xjAoEAQ4cO5ZJLLqFH\njx5R++zatYshQ4Zw7733cssttzBr1ixuu+22GsdWVd5//31efvll7rzzTt544w0efvhhDj/8cF58\n8UXWrVtH//79a61fWVkZd9xxBytXrqRjx46MGDGCV199lUMPPZQdO3awfv16AHbu3AnAtGnT2Lx5\nM1lZWeFtTY3RSAwGQ7PjK/FRGawkqEEqg5X4SnxpO9cxxxwTFiIA//znP+nfvz/9+/fn448/5qOP\nPqqxT/v27TnrrLMAGDBgACUlJXGP/fOf/7xGmcWLF3PZZZcB0KdPH3r27Flr/ZYvX86wYcM45JBD\nyMzM5IorrmDhwoUce+yxbNiwgZtvvpk333yTjh07AtCzZ0+uuuoqnnnmmQZPKEwVI0gMBkOzU5hX\nSJadhS02WXYWhXmFaTvXgQceGP68ceNG/vrXv/Luu+/ywQcfMHLkyLjzKbKyssKfbdsmEAjEPXZ2\ndnadZRpKTk4OH3zwAYMHD+bRRx/luuuuA+DNN99k/PjxrFixgoEDBxIMBhv1vMlgBInBYGh2CnIL\nmF80n7uG3pVWs1Ys3333HR06dODggw/mq6++4s0332z0cwwaNIjnn38egPXr18fVeCI55ZRTWLBg\nAeXl5QQCAZ577jmGDBnC9u3bUVV+8YtfcOedd7J69WqCwSBlZWUMGzaMadOmsWPHDvbu3dvo11AX\nxkdiMBhaBAW5BU0mQDz69+9Pjx49OPHEE+natSuDBg1q9HPcdNNNFBUV0aNHj/CfZ5aKR+fOnbnr\nrrsoLCxEVTnvvPM455xzWL16NWPHjkVVERHuu+8+AoEAV1xxBbt378ZxHH73u9/RoUOHRr+Gukjr\nmu0iMhL4K2ADT6jqvTG/ZwPFwACgHLhUVUtEZCAw0ysGTFHVl5I5Zjzy8/PVLGxlMDQdH3/8Md27\nd2/uarQIAoEAgUCAdu3asXHjRs4880w2btxIRkbLGsfHu2ciskpV8xPsEiZtVyIiNvAocAZQBqwQ\nkZdVNVKvGwt8q6rHishlwH3ApcB/gHxVDYjIEcA6EXkF0CSOaTAYDC2GPXv2MHz4cAKBAKrKjBkz\nWpwQSZV0Xs1AYJOqfg4gIs8BFwCRnf4FwJTQ5xeAR0REVDXSyNcOV4Ake0yDwWBoMXTq1IlVq1Y1\ndzXSSjqd7UcBpRHfy0Lb4pZR1QCwC8gBEJFTRORDYD0wPvR7Msc0GAwGQxPSYqO2VHW5qvYETgYm\niUi7+uwvIuNEZKWIrNy+fXt6KmkwGAyGtAqSL4HciO+dQ9vilhGRDKAjrtM9jKp+DOwBeiV5TG+/\nmaqar6r5hx56aAqXYTAYDIbaSKcgWQEcJyLdRCQLuAx4OabMy8Co0OdLgHdVVUP7ZACISFfgRKAk\nyWMaDAaDoQlJmyAJ+TRuBN4EPgaeV9UPReROETk/VOxJIEdENgG3AF7ymtNwI7XWAi8B16vqjkTH\nTNc1GAyG1snQoUNrTC6cPn06EyZMqHW/gw46CICtW7dyySWXxC1TWFhIXdMJpk+fHjUx8Oyzz26U\nPFhTpkzh/vvvT/k4jU1aY9BU9XXg9Zhtf4j4vA/4RZz9ngKeSvaYBoPBEMnll1/Oc889x89+9rPw\ntueee45p06Yltf+RRx7JCy+80ODzT58+nauuuooDDjgAgNdfb9tdVot1thsMhv2LxlzS+ZJLLuG1\n114LL2JVUlLC1q1bGTx4cHheR//+/enduzf//ve/a+xfUlJCr169APjhhx+47LLL6N69OxdddBE/\n/PBDuNyECRPCKej/+Mc/AvDQQw+xdetWhg4dytChQwHIy8tjx44dAPzlL3+hV69e9OrVK5yCvqSk\nhO7du3PttdfSs2dPzjzzzKjzxGPt2rWceuqpnHTSSVx00UV8++234fN7aeW9ZJHvvfdeeGGvfv36\nsXv37ga3bVy8xV3a8t+AAQPUYDA0HR999FG9yi9dqtq+vaptu/8vXZp6Hc455xydO3euqqpOnTpV\nf/vb36qqalVVle7atUtVVbdv367HHHOMOo6jqqoHHnigqqp+8cUX2rNnT1VVfeCBB3T06NGqqrpu\n3Tq1bVtXrFihqqrl5eWqqhoIBHTIkCG6bt06VVXt2rWrbt++PVwX7/vKlSu1V69eumfPHt29e7f2\n6NFDV69erV988YXatq1r1qxRVdVf/OIX+tRTT9W4pj/+8Y/65z//WVVVe/furT6fT1VVJ0+erL/+\n9a9VVfWII47Qffv2qarqt99+q6qq5557ri5evFhVVXfv3q1VVVU1jh3vngErNYk+1mgkBoOh2UnH\nks6eeQtcs9bll18OuIPn22+/nZNOOokRI0bw5Zdf8vXXXyc8zsKFC8MLRp100kmcdNJJ4d+ef/55\n+vfvT79+/fjwww/rTMi4ePFiLrroIg488EAOOuggfv7zn7No0SIAunXrRt++fYHaU9WDuz7Kzp07\nGTJkCACjRo1i4cKF4TpeeeWVPP300+EZ9IMGDeKWW27hoYceYufOnY0+s94IEoPB0Ox4SzrbduMt\n6XzBBRcwf/58Vq9ezd69exkwYAAAzzzzDNu3b2fVqlWsXbuWn/zkJ3FTx9fFF198wf3338/8+fP5\n4IMPOOeccxp0HA8vBT2klob+tdde44YbbmD16tWcfPLJBAIBbrvtNp544gl++OEHBg0axCeffNLg\nesbDCBKDwdDspGNJ54MOOoihQ4cyZsyYsDYC7mj+sMMOIzMzkwULFrB58+Zaj3P66afz7LPPAvCf\n//yHDz74AHBT0B944IF07NiRr7/+mnnz5oX36dChQ1w/xODBg5k7dy579+7l+++/56WXXmLw4MH1\nvraOHTvyox/9KKzNPPXUUwwZMgTHcSgtLWXo0KHcd9997Nq1iz179vDZZ5/Ru3dvfv/733PyySc3\nuiBpW5nDDAZDqyUdSzpffvnlXHTRRWETF8CVV17JeeedR+/evcnPz+fEE0+s9RgTJkxg9OjRdO/e\nne7du4c1mz59+tCvXz9OPPFEcnNzo1LQjxs3jpEjR3LkkUeyYMGC8Pb+/ftz9dVXM3DgQACuueYa\n+vXrV6sZKxFz5sxh/Pjx7N27l6OPPprZs2cTDAa56qqr2LVrF6rKzTffTKdOnZg8eTILFizAsix6\n9uwZXu2xsUhrGvmWgkkjbzA0LSaNfOsjlTTyxrRlMBgMhpQwgsRgMBgMKWEEicFgSAv7g9m8rZDq\nvTKCxGAwNDrt2rWjvLzcCJNWgKpSXl5Ou3b1WqkjChO1ZTAYGp3OnTtTVlaGWQuoddCuXTs6d+7c\n4P2NIDEYDI1OZmYm3bp1a+5qGJoIY9oyGAwGQ0oYQWIwGAyGlDCCxGAwGAwpYQSJwWAwGFLCCBKD\nwWAwpIQRJAaDwWBICSNIDAaDwZASRpAYDAaDISWMIDEYDAZDShhBYjAYDIaUMILEYDAYDClhBInB\nYDAYUsIIEoPBYDCkhBEkdeD3w9Sp7v8Gg8FgqIlJI18Lfj8MHw6VlZCVBfPnQ0FBc9fKYDAYWhZG\nI6kFn88VIsGg+7/P19w1MhgMhpaHESS1UFjoaiK27f5fWNjcNTIkwpggDYbmw5i2aqGgwDVn+Xyu\nEDFmrZaJMUEaDM2LESR1UFBgOqWWTjwTpLlnBkPTYUxbhlaPMUEaDM2L0UgMrR5jgjQYmpe0aiQi\nMlJENojIJhG5Lc7v2SLyr9Dvy0UkL7T9DBFZJSLrQ/8Pi9jHFzrm2tDfYem8BkProKAAJk0yQsRg\naA7SppGIiA08CpwBlAErRORlVf0oothY4FtVPVZELgPuAy4FdgDnqepWEekFvAkcFbHflaq6Ml11\nNxjSgd9vtCZD2ySdpq2BwCZV/RxARJ4DLgAiBckFwJTQ5xeAR0REVHVNRJkPgfYikq2qFWmsb1KY\nzsDQEExkmaEtk05BchRQGvG9DDglURlVDYjILiAHVyPxuBhYHSNEZotIEHgRuFtVNfbkIjIOGAfQ\npUuXFC/FxXQGhoZiIssMbZkWHbUlIj1xzV3XRWy+UlV7A4NDf7+Kt6+qzlTVfFXNP/TQQxulPmam\nu6GhmMiy5DATS1sn6dRIvgRyI753Dm2LV6ZMRDKAjkA5gIh0Bl4CilT1M28HVf0y9P9uEXkW14RW\nnK6LiMTrDDyNxHQGhmQxkWV1YzT+1ks6BckK4DgR6YYrMC4Drogp8zIwCvADlwDvqqqKSCfgNeA2\nVV3iFQ4Jm06qukNEMoFzgXfSeA1RmM7AkApmcmvtGPNf6yVtgiTk87gRN+LKBmap6ociciewUlVf\nBp4EnhKRTcB/cYUNwI3AscAfROQPoW1nAt8Db4aEiI0rRB5P1zXEw3QGBkN6MBp/60Xi+KnbHPn5\n+bpypYkWNhhaOiYqsmUhIqtUNb+ucmZmu8FgaDEYjb910qKjtgwGg8HQ8jGCxGBoBkyYq6EtYUxb\nBkMTY8JcDW0No5EYDE2MmdhqaGsYQWIwNDFmlruhrWFMWwZDE2MmthraGkaQGAzNgAlzNbQljGnL\nYDAYDClhBInBYDAYUsIIEoPBYDCkhBEkBoPBYEgJI0gMcTEzrw0GQ7KYqC1DDczMa4PBUB+MRmKo\ngZl5bTAY6oMRJIYamJnXBoOhPhjTlqEGZua1wWCoD0aQGOJiZl4bDIZkMaYtg8FgMKSEESQGg8HQ\nBmjOkH1j2jIYDIZWTnOH7BuNxGAw1BszYbVl0dwh+0YjMRgM9aK5R7+Gmngh+949aeqQfaORGAyt\ngJakATT36NdQEy9k/667mkewG43EYGjhNLUG4PfXPoeouUe/hvg0Z8i+ESQGQyNTV0dcX+JpAOnq\nMJIRWmbCqiEWI0gMhkYkHdpDU2oAyQotM2G1cWjsQUdzYQSJwdCIpEN7aEoNwJitmo62FLRgBInB\n0IikqyNuKg3AmK2ajqY0WaYbI0gMhkakLXTExmzVNLQl7c8IklZGW7GptmVMR2xIhrYw6PBISpCI\nyDFAmapWiEghcBJQrKo701k5QzRtyaZqMBjazqAj2QmJLwJBETkWmAnkAs+mrVaGuJiJYAaDoSWS\nrCBxVDUAXAQ8rKr/AxyRvmoZ4mFWLjQYWi4tKftAU5OsIKkSkcuBUcCroW2Zde0kIiNFZIOIbBKR\n2+L8ni0i/wr9vlxE8kLbzxCRVSKyPvT/sIh9BoS2bxKRh0REkryGVk9zp0EwGAw18fthwgQYOhQm\nT3bNz/ubMEnW2T4aGA/8SVW/EJFuwFO17SAiNvAocAZQBqwQkZdV9aOIYmOBb1X1WBG5DLgPuBTY\nAZynqltFpBfwJnBUaJ/HgGuB5cDrwEhgXpLX0eppKzbV5sQELBgaC89vuW8fqLrbWnsob0NISpCE\nOv+bAUTkR0AHVb2vjt0GAptU9fPQfs8BFwCRguQCYEro8wvAIyIiqromosyHQHsRyQZ+DBysqstC\nxywGLmQ/EiSG1DABC4bGxPNbekJEZP80Oydl2hIRn4gcLCI/BlYDj4vIX+rY7SigNOJ7GdVaRY0y\nIR/MLiAnpszFwGpVrQiVL6vjmF6dx4nIShFZuX379jqqmhh/qZ8JjxUz4feb9zt1tS1iAhYMjUms\n3/K66/bPwUmypq2OqvqdiFyDG/b7RxH5IJ0VAxCRnrjmrjPru6+qzsSNMCM/P18bcn5/qZ/CuydR\nOet1CGYx+6EgC96197uHpC3RliaBGZqftjQXJBWSFSQZInIE8Evgf5Pc50vcMGGPzqFt8cqUiUgG\n0BEoBxCRzsBLQJGqfhZRvnMdx2w0fCU+qj4bBMEs0AwqKoJMmQJTpjTNA2Ns+Y2PefENjY3xWyYv\nSO7EdXgvUdUVInI0sLGOfVYAx4Uc818ClwFXxJR5GTcSzA9cAryrqioinYDXgNtUdYlXWFW/EpHv\nRORUXGd7EfBwktdQb3IOyMHq9jpBuxICgNq88w4sWtQ0a0IYW356aIsvvhl0GJqTpHwkqvr/VPUk\nVZ0Q+v65ql5cxz4B4EZcAfQx8Lyqfigid4rI+aFiTwI5IrIJuAXwQoRvBI4F/iAia0N/h4V+ux54\nAtgEfEaaHO3+Uj8T35iIdl6KffXP6HHqV1iW4DhNY1s3tvyWTUuaM+ANOvbX0FND85NsipTOuCP/\nQaFNi4Bfq2pZ4r1AVV/HDdGN3PaHiM/7gF/E2e9u4O4Ex1wJ9Eqm3qngK/FRGazEwcHO9XN6/wV8\nsa6oyWzrxpbfcmlp2mJbyiJraJ0ka9qajZsSxev0rwptOyMdlWoJFOYVkmVnURmsxLZsyPUz/dl+\nrPEfDHnvQefjgPS9rcaW33JpaR23GXQYmhtRrTugSUTWqmrfura1VPLz83XlypX13s9f6mfakmm8\n8ukrKEqGlYEgBJwAWXYW84vmU5Brevj9jZamkXh1aqpBh/HH7D+IyCpVza+rXLIaSbmIXAX8M/T9\nckLRVW2dVze+SlCDAFQFqwBQlMpgJb4SnxEk+yEtQVuM7cybKoCgJQpRQ/OTrCAZg+sjeRBQYClw\ndZrq1GLwlfgIOsHwd0XJtDJx1CHLzqIwr7D5KmdoVpoz8qs5O/OWYNYz2lfLI9kUKZuB8yO3ichE\nYHo6KtVSKMwrxLZsAk4AAEEY228sXTp2oTCv0GgjhmahOTvz5vbHNKUQNdpX8iSb/TcetzRaLVoo\nBbkFPHr2o2RamUjo38ItC8k5IKdZhUhLCj1tbvbHtmjO5QSaOwN1U4bFmxD85Ellqd39In37uAHj\nAJjw6gQcHD7a/hHXvXpd1G9NrWo3xiipLajs++uIsbl9NM1p1mtKjai5ta/WRCqCpEH5q1oj5XvL\ncXCitr340YuMGzCuyTuzxjBrtJUOuCXY65uLtjg7PxmaUog2t8BuTdQqSERkN/EFhgDt01KjFkhh\nXiEZVkbYVwJwcQ93Yn9Td2aNMUpqKx2wGTHunzSlEN1fBXZ9qVWQqGqHpqpIS6Ygt4CFVy9k2pJp\nbN29lbH9x4bNWk3dmTXGKKmtdMBmxGgwtAySmpDY2mnohMRE+Ev9+Ep84cit1uhvaI11Nhjqi3nO\nU6OxJyQaQvhL/QwvHs6+wD4ABncdzL3D72XSpNb1lBqV3dDWaSu+wNZAKuG/+yXF64r5IfADGvq3\ncPNChvxjCP7S/Sj+1LDf0JrDq034btNhNJJ64C/1M2vtrBrbq5wqky7F0OZo7SP6+voCjRms4RhB\nUg9iU6Z4WGKRc0DsUvMGQ+umtUf31ScYoyUJzdYo0IwgqQeRqeVFhKM6HEXpd6WoKje8fgNQPUnR\nYEgHTdnJtIXovmR9gS1FaLYkgVYfjCCpBwW5Bcwvmh+O2PKV+Ljj3TtQlIAT4PrXrmfNV2so6lPU\nYDNXaxyN1Jf94RpTJV4bNXUnsz+FV7cUodlSBFp9MYKknhTkFoSFxPpv1rtTM0MR1EENMmPVDOas\nm9OgtUpmzoQbb3Qfouzs1jMaqQ+tdcTVlCRqo+boZPaX6L6WIjRbikCrLyZqq4GE13SPmYejKBXB\nCnwlvvodzw833ABVVeA4UFHRNqNMTCRN3SRqo+ZM1rg/UFAAkyY1r+Bs7qSYDcVoJA3EW9NdUQRB\nPbWk9FTYPJyck86t3/F8rgDxsO222VG01hFXU5KojVrKqNmQXlqjFmgESQOJXdNdECpLBqBz3kad\ndkxcYtG7HiOKwkLXnFVRAZYFjzzS+h6mZDCdYd3U1katsZMxtH1MipQUiEyVAjDl7greeXIITlCw\nbbj2t5vpcu6z5JSfS/nHvZMKQTQdrCFZzPNiSDfJpkgxgqQR8ATKzk3deXDCOQQDGWRmOmjRcAJO\nAGfOW1hOe7KzpFXZPeNhOq+WQbqCFhJFizXHPTfPWnKks51Mrq0mwsu9VRGowMFBfvVT7M3DOWVw\nJYucheii30MgC0clLZE2rXFJ2+7WAAAgAElEQVRRrbZCc3Z0jbUuTWT9491faJ57bp615Ggp7WQE\nSR3U1ln4/TDlHxVUOP1xOi8BQDsvxem8jCUacsDn+cCuxFKbrCxpVOdya1xUq63Q3C9wqkEL8eqf\nKFqsOe65edaSo6W0kxEktVBbZ+H9VlE5BMd6Cyk6A81diiUWllg46oZgSe5yLpj6CIdvvxTy3oPO\nxwGNc6db46JabYXmfoFTCVrw+2HKFDeww3Gq65/o/jbHPTfPWnK0lHYygqQWaussvN+coGDRnhH2\nn7j43E9Z89Uatu3ZxrxN8wg4AWzLhs5+Zu/7A4HtAeYUZzVosmI8WuOiWm2FlvACNySCKzwACgkR\ny6quf6L72xz33DxrydFS2sk422shGY0kyp7c2c/QOUOpDFaSYWVwznHnMG/TvPB8EwBbbO4aeheT\nBk9qlGszDsnmozW2/dSpMHmyOziyLBgxwtVOWkv9DU2LcbY3EqNGuf8XFdWM548dCUx4tZiKYAXg\nppZfuXUlVU5VWIgIQpadFQ4X9ohdcbE+mHkFzUdrbPtYTaqlCJHWKJQN1RhBkoBIjcO2q7fXZ3JY\n2e6yqO8iwvSR06OEhRf1VRmsJMtuPLPX/ojpjOqmpZhCImnuwAVD6phcWwmI9Y/MmOE+7LWtFFfU\np4gsOyvh7446zNs4j6mLpoZXVPRSrQQ1SGWwst45uloSzbmantcZTZ5c932q7RitdTXA+tASckpF\nYvKvtX6MRpIAzwSwbx+oun+RDvd4o9+C3AIePuthrn/teoLqLoAVlYcLeOXTV3h5w8tYlsWjZz8a\nlWolntmrJVFXKHRzjipTjaJq7vrvz7SEwAVDahhBkgDPBFBcDLNnQyBQ/ZDX1umU7y2POk6kf0RE\nwgLGcRxufP1G3rv6vag1TlqqWauujra5w2FT7Yyau/77M8mY24zZsmWTVkEiIiOBvwI28ISq3hvz\nezZQDAwAyoFLVbVERHKAF4CTgX+o6o0R+/iAI4AfQpvOVNVv0lF/zwdSVBT9EE+dmrjT8TSMfYF9\nYSFiYZF/ZD5rtq0Jzy8Bd/0SX4mPSYMntVgB4lFXR5uoI2+qDiBV239jjIpNZ9dwavM37g/aYmt/\ndtImSETEBh4FzgDKgBUi8rKqfhRRbCzwraoeKyKXAfcBlwL7gMlAr9BfLFeqavqSZ8UQ+5DX1ul4\nqygWrytm9trZBJwAWXYW/Y/oz6qvVoXLCUKGlcGWXVvwl/pbrCDxHvCcnNo72ngdeVMv1JVKFFWq\nE/xiNde22Nk1F21dW2wLgjKdGslAYJOqfg4gIs8BFwCRguQCYEro8wvAIyIiqvo9sFhEjk1j/RpM\nXZ2Ot4piUZ+iqOzAc9bNoSLghgd36dSFsu/KmLFqBrPXzmbBqAUU5BakFArc2MQ+4NOnQ3l54o42\nsiP3FuoKBNzv3kJdqb4g6Ry51SWIalv+1vOlQfo7u9Y+eq0vbd2HUlxc/fy0VkGZTkFyFFAa8b0M\nOCVRGVUNiMguIAfYUcexZ4tIEHgRuFtb6KzKyGV5AaaPnO464recTMmiQjcPV+4yKoIVFK8rBuD0\nu35P4PPTyDj69yycfF/KwiSVTid2JOgJES+qprbj+XyNv1BXU43c6rNeus/nCsnIJ1DE1eDSQaJ6\ntGXh0hJDlhsLvx9mzap+fjIyWqegbI3O9itV9UsR6YArSH6F62eJQkTGAeMAunTp0qgVSKZDi/di\nl+8tJ7hlIMx5B4JZYFfCqOGQuwyAaf9aRGD2GxDMIvBeJdOOf4SXftfwtybVjjd2JJiTk/zx0rFQ\nV1OYOOq7XnpOTrTAtCz3+8SJ0Lt349cvUahsazeN1EVrnPyZDD6fey/BHYCMHt06rzOd80i+BHIj\nvncObYtbRkQygI64TveEqOqXof93A8/imtDilZupqvmqmn/ooYc26AISUVfce6I5DYV5hVgfjIJA\nNmgGBDOhpBBbbIr6FLF1/fGugAn9tnZZp6g5J7H4S/21/l48dzP7KpwGx+d7I0Fv/ejy8uTj/b19\n774bFi6EceNSn6eRzjXLvboVF9dvvfTycld4gNsRqEYnQmxs4tXDzMNovUTez3bt3MCe1kg6NZIV\nwHEi0g1XYFwGXBFT5mVgFOAHLgHerc1MFRI2nVR1h4hkAucC76Sj8rVRl8027ovd2U/xqxuRtWNx\n5beCFcTqtohBXQYxbck02h1zAthnQlDBrqKk02zuWPA+2XZ2eMa750PJOSCHiW9MTDgj3l/qZ9bO\nSaj1OmgmGZkWhYU29SV2JFgfW3WszyTVUXO6TByxWQwyQm9F5DUmOreneXn7ikSHijc2ierRkn0I\nbdnslioNfaZbWpumTZCEfB43Am/ihv/OUtUPReROYKWqvgw8CTwlIpuA/+IKGwBEpAQ4GMgSkQuB\nM4HNwJshIWLjCpHH03UNiajr5tcwCXVfz/Di4exb8Bs0ACCIKCeeuZxNXd5n4eaq6p1HLYSSQshb\nALnLcBT2BfaFfSheOhURwVEHR53wjPhIQeIr8RE8arFrOls3ij5HDgT6p/W6a6OxzFLpMHFE1g3g\n2muhS5fk1kuPbRPveOl8wWPr0ZI7o7YQkZRu6vtMt8Q2TauPRFVfB16P2faHiM/7gF8k2DcvwWEH\nNFb9UqG2mx/7YvsCr7oZgPPeBWsyOBaZmRZDztvMJ98EonfO9WN1WY5q9Xx4RXl89eNs+35bOJ2K\npRZS9lPki9Oxj1lSY0a8N5+lQmycdUWsXNOe4W+n/tA1tCNvyZE3sXXzzAvJBBXEzXBQUG0qa6oR\nY0vtjNp66G5z0BLbtDU621sMtY3ool7s0ohOHW+WOxy8axDW4tsJdp0fdrgDqGqN1CpBDfLKhlfI\nsDLQoLpC5Kn5UJWBLFHWn/URvpKp4bBhbz7LlLsreMdpjxNMz1K/ydKSI2/iaRXJdLK1RVC1tBGj\nV9/w4MbXNJ1RSx5ANCWNqf21xDY1gqQeRD4MkHxnEdWpk42jQlUVPPiHY1DnLix7Ms6ZN8EPOaGQ\n4OU4ODWO46hD90O688E3HxD8YjBUWqAWVVXKDX/7f+hp95BlZzF95HTK95ZTmFfIlKsLWfRU3Q9d\nU8xfiR01R052rG1+SlMQWbfaMhdEkqgzTuT8bk4hGm9OUFN0Ri15AFEbjdnxN/bAoiW2qREkSRL7\nMIwaVb8RXUFuAVOuJtypi7j7Oo5gaTbWG4/hBNVd3/3qn7lrv8cIE0VZ+/Va90veArArEUewMxyC\nXd/F0SA/BH7g+teuBwg74efPL4g7L6J47ma2OR/CD4fwetUkgkctbrJU9vFW6ktl9nsqL37svsmO\n+BKVSyVsOl3ECrfy8upccukm3aG7je3raWh4fyLSof21tHBoI0iSJPZhgPqP6CJHEjk57lwDT6g4\njg0K4ojrbO+8FFtsBnUZxJadWyjZVRJ9sNxlriO9ZCi/PP8ont/1fng+g5cYsjJYSfG6Yrp09LHz\n8O5M/DscueRTzjr2LG6+vAcV+44CuoAEwX4dRg2nssuKGo77dOC1p1fnyJDZhgiChnbUifZNZsSX\nqFzs9pYwczmR0Jszx902Z056BFy6Hfr1uffJ1qWujj82ym/MmJoL30XSEk1RjY0RJEmSk+OOmlWr\nF7qqK2VIPCJHEr171xQqVoZDMG8BDg6WWvhL/VQ5VfEPlrsMzV3GM9+6fpdYghrkiTVPENw8EJ3z\n6/AkyH/3fQqt6I57+zU0b0WRkqFkdVuXVCr7hnYQsbm74q0dnnDfBOa3VEZ8ifZNdsSXqJy3vaXM\nXI4n9JI14TWUpvAVJXvv61OX+oT3B4PuWkW1CeJkByYtLaS3PhhBkgR+v9vRB4PVk84efzz+Ou71\neRC8Mj5ftVDK6f4JEz9cTWXQjko7XxeRjvlIAk4ASk6PmOioKI47qz4AbhS1A1aQk3+6l+l1mLX8\npe58mNm3XEmgyiYjM8jovzxD0bnH1anFJMrdlYyPpLaVJBO9+Mn4fdI1WvTOveXVKwgGu4a39+nT\nOMdvCLFCryHX3twmnVi8a6ioqJmapqHBBcmG9ydaqyjRMeuK/mtu82cqGEGSBJFmGM+3AdGO1Mjs\nr8mouxDtJ7AsePRRGHdhb3oPmF9j0qFt2Zx97NkcftDh9DuiH/M2zmPuhrnJXUCezxUcoYmO9Cl2\n/9YVweox4LiPQWG3wvAKjfESSHqd+b4Fv0ErFBSCjsOMFz/h8e1jGNRlED0O6UFRn6IakyO9TrWy\nsmuUnX7SpCTvQYmvxkqS3jkKCmD6s+t58qXPOLL3p9B5MP5SogRPZACCt5+/1I8v4GP6s+dS/nHv\nxrOxRwg9e+ebZGTOR9XGcWDlSveez58PdE4+wCHZYIh6BU109jPqgY1QMoSiC7vWee317ezSbdLx\nBMVNN8GDD7rv3vXXw7x5cNZZ7uDPe7duuaXhk2nj/RZvraKcnIaHfDeF0E0nRpAkQeSoJzKvUkZG\ntSM1Mvurp+7OmlW7QPH5qo/pOG7KdTc/U3Wyx96H9Y7bMYwbMI7fv/N7pi2ZFt4mCIO7DGbxlsVR\njvrDTvycb0YND0109FWHGpcUgtqADao88OwqGDwV27I59ahTWVK6BEXDM+u9zlzz3gX7f8OCSfMW\nENQgCzcvZOHmheFsxkBUOn2vUwU7qZc5chb/ll1byLAywAHbssPp971zPLn+SaqOroLvYd6cbEb3\nHR0WPBWBCm58/UaCTjC8MmXvw3ozvHg4FSX9sTb/wKPXQ0FB77oehaSIFHoctZhr//IMn88t4p13\nqn1BxXM3M+fgmhpWPEFQmzYW217JlKtR9uAsijrPByKEf4zm4ffDlCnVz2tkZ5eozo0ppGsMaiKE\nmje4U3X/nzsXXnnFraeXsubBB918b40VHegJGm+tokjzdJ2+mjjtlVCrbiXmLiNIksAbgUyZQrgz\n8BKsefmnYhO7eOpubfbTwsLqJH/gvgSxI5HYDMKR3DfiPgDuX3o/KGTYGfQ4tAdXnnQl8zbOY0P5\nBjb9dxPbv98Oud9EzVUBOLzXBnYsdghUVoFVRbDdNlj4PwTzfCwMLgyX2xfYxxTfFC7ucTG2ZRMM\nO/oLowVTCM/JP2fdnKgFvrxOtcvOIvdF6exn6qL4o2evo6sIVODghNdvOe/485i3aR6Pr36cWWtn\nIYgr3CJMexXBCl759BUssVBVEMJ+Jm9lyrH9xlJR0h/nH2/hBLO4/j2HeX+axuEnfhHWqBoaEh27\nfHLRucdBX1i0qLqjIO89KrdHa1jrv1nvCjwNRqXFqU0biyTZcrFlvcwJYSEQxwTpje5j/VmR9ylW\nSLvXfxfTR07Ht7ccShO3Y21tHaXhWTZj+o6BRbeFtVvLqjY5e3jbIwd39dGAIwcxsZpsJJ5ASdbf\nlEjYxzOntSZzlxEkSVJQ4AqSyM7AmwHtjSRsG84+21WtPeGi6morxcU1H4KCAtecFbn4U33V//tG\n3MeFJ1wYHvk/vvrx8APqK/Fxx7t3RPtPSk8NC4BtuS8hvxoKXwyBfR3g9UdBrRpZiRXl7c/fZv4X\n8xEJOfVzlyG5y0O/R5NhZbD6q9VUBCuilhrOsrPoN3Af5XunMvf7nTww+wEcdci0M/GN8kWNZqf4\nplARrAhrVopS5VTxafmnYcERDCb2H325280PGhYmEXh+J2vzMJyQ7yhYVcXcN76F7//O7LWzeeis\nh6LMimP6joky2c1cNZMXP3qRvkf0pVN2p6iOJry42auu2YiyrtXmkLmbIe89+g3cR9Yb1cJmZ8VO\n7lhwR3gFzYpgRVgQxAqmRMEQyZbzytqWTTAYRFFmr50dvj432Wcu6lhUVsKLL1abdi0LRoxw34WC\nApjw2Eb2LZiI5i3AyV3G9a9dz4AjBrj3Tp244ehRWktJ3XnjIoVeMBhkxqoZZO78OEq7vekmeOAB\nCAbde52ZpfxmosWDDyb/bsXWxxvEWGKR8eVgxnSak9AEGKlRZGQG2dLpGfylNf2GdZloI4/dmsxd\n0kKX8mhU8vPzdeXKxllQMdFaFbEjieJiePJJqAoFXGVnw4IFiSNKEqmvyaq2UxdNZfKCyQQ1iC02\ndw29i8K8Qk7/x+muwx1cITJnfs0U9qWnwuz3wMkEBCQAwybD4HujBE+s5hGJhcVpXU/jx+1+zLxN\n86gKVkWZ17zfl5ctr/EbwPgB43ns3MeYuWomN75+IwEn4Aqh2s4f8ZvdZQXnHX8eS0uX8s3e+Csv\nW1gghEf761cdxI2XnUigykKtinB7CMIZR5/B/C/mRwU72GJz3gnncXzO8TVMiu0y2kV3kp7/q1Kx\nMwI88twn9B6wJ2pk7fm8Dm53MPcvvT9qGeZMK5P3rn6vRqdbHx8JEHefmXPX8+K8cvZ1foNFzjQU\njXpmCu+eROWs1yGYSXa2xUN/teOabfx+GDosSEWF1hh8xMMWm2v7X0uXjl2ihEdk3rjYMuV7y8Nl\nI7VbW2yuPXRWWLstKHCv6/p7lhHcfQh2h+387fYCev+kd+J3K6at4uWxA8LvjTjtaJdt1ZrpoHju\nZmbtHJVwTlZd5seGTnpOFyKySlXz6ypnNJJ6Es8JF7st8vuMGa5WEggkHlEkcuzVK2Qxzmi0ILeA\nR89+NGwusbacQdDJRtV2/Rslhe6LX1IIjgUIockskOfjoG1nsGfO3Lhrp8SiKCOPGcn7W9+nIlhR\n43cHh4WbF8bZ02X1V6uZuWomN7x+Q92CL85vwVEjOP6nx/PqxlejjmuLHSUMzj/hfG796a0hkyH0\nXuC+/E98+ysCR7nHzrAyuLjHxfg2+6K0nqAGmfvJ3Bqh1opSEahgim8KUwqnuOYonytEnKDgODDh\n0ec4/5qPokbW/97wbzLtTIJOMEqIWGLxm4LfRAU+JDJxxg40vHLxTE7jBoxj5tz1XPfLYyDQHeyB\nZI7243ReEn5mopJ9lhRyeJ8S1hzZienPXh/l6/CX+pn496+prDwvHA1IydDw/YlN8SMItmWH/WXg\nZmpQ3KANW2z3L1TGG2xYYpFtZzN95HTWfLUm/JuI0G/gPsYN8JZT8LElYwvaZz384y2CwSwmXBrg\nsX+tZ9Kkmr6v2PY59/hzo/LY2ZY7r8vBQUqGosEsVK2wf8sXeLaGgC4oAF/gWYILFsfVODzB5V1L\nvHsZ+77XMHeFoiaTDZBoKowgSZF42oj3vaioesJXQ6JW6hWyGDKnxI5Axw0YF3bY55x0LhMX21RU\nKmIrg05X/JJJVZ4PMkLhwJYDZ98IucvYs+i2qLDhsOCJpfRUKBnG3ODXvG8nGUkWw4qtK1ixdUW0\nGa6kMPH5a/w2hD8vuS/qmBeecCGHH3Q4f1/1d8DtFF779DVu/emt1e1WAAUFXTn4nQL+vGQxiuKo\nw2fffkbfn/Tl/a3v16hrvFBrB4e3P38b32YfY/qOoV/367EzTsQJCojitPsmnCvNCbodqKJUBaui\njmeJxe9++jseXv5wXLOav9QfzgTdL3A9E6/oHbqfVZz3p+mcNbQT5XvL2bJrS9g04zgOE16bAMCL\n8453hUio3Y777hquGjoy6pkJ+8Fyl7EZ+PsqyLajl4MunFNIZWZ/sM4EzQS7CqvbQmwr09UsLBtB\n3CALz68BPL768bgh7ZZYjO03lm3fb+Pfn/w73CZeduvyveU8du5j9DuiX3hgNPGNiQBRJkgpuTX8\nXDih1EG9B+yJa2KKbB/v3njBHJERkmsOa8fsJUKgyjVbzdo5iuCCmhqHv9QfDgrRoCIi5ByQE/7N\nE1wigiUWjjrMWTcnfAzXpNgZddx31OcTJk2C9VkzmfLRi/T9vi/T/9/ykLaYxeyHgix4146eLNlM\nS3UbQZICiZyStY0o6kN9wycTjVojt/eeDz6fUFiYRUHBvUx4dRcznBlohPPc6vI+qoLGhg3n+QB3\n9AjuS05pATrnbTSYxfvvVcKo5bWaNwA6ZXfiu8rvcNQJj1zjdc7SbSHqzXcRhfbVKzBLXui3iLpF\nHiPbzubWQa7AeGLNE+FRcFCDFK8rjjJpFK8r5vHVj4f3D2owynQVD1tsBhwxgONyjuPZ9c+Gr6Ey\nWMnfV/2dLHsWv/ztWzxzb4Hrd3rjrwR/8iEDTglyZIcjeW3ja1Q5VTWu+3c//R2dsjuFOznPJzBn\n3RxuOuUmHlj6QLWPZ3EOTkUPcGxwhLlvfMvc73+PIGTama4/K3R4R90gg9+c+jxvzaput40HP0Fh\n3r0ATHjVFTZnH3c2cz+JHhBELgc9xTeFqmBVdXaFddWrMY3tNzb8ud8R/cIj76I+RczdMDdK84rE\n2/76xtdraDIAcz+ZGzZ1eWanfYF9/HXZX8MmLw0qJwzYyob3AjgB9/qCXeYzxbecKYVTAKKiAGPb\nZ3Tf0Wzbs41XPn2Flz99mWw7m6I+RYybUEC/I9zw8m8OeZ7NBy9EVdkX2Bd+TrzAlqAT9CqOow4T\n35gYHsiFTXManX1i2pJpbN29lZX/zUStt0AzcaSKnO6fMXOVn+tevQ6Atz5/Cz6rHtxVVgajBpf+\nUj+Fd0+i6rNBZB4zCd8dU5tMmBgfSQpMnequghgMuo52b36A9/2uu6qjRFKdCZ6u8L/w3JAI+7OF\nq9o76uCUnoJ+cXqUj8IWOzxyZ9EkePdOd4QrVTDsD65vpbFYeU3cIABB0NJTXWd2rP+ktIAeeyZw\nfP5XHH7iFxzc7mAe9D/omi1CI0FVJcPKwBKrRtRXPAThqA5H8eXuL1Hc7MwXnHABW3dvjau1AHRe\n9yhf/vs61LHDbWOdPo0MK4OgE4w7Mr9n2D3srNgZV5DFmouqzXuuRhBp+hOELh27sGXXlur7Khbj\n+o/jvcWVfLzyJ5C3IOxbennDy2G/lS02llg1MirYYpNhZUS3V4yJse+tv2N99kwUt31V3SCJGnWP\nwMIiOyObUX1GRWks8fa5ddCtTF82ncpgZfxjiYVVNgj9Ykgoq7YbIp5hZWCLHa674GoFkffg1kG3\n8hf/X8KDDq+9AJ5c82TiDBO1YGGRf2Q+lcHK6jx5tRHh9xt4isPW3Vsp210W/XvEPb9w6iPceulg\nCnILmPBYMX+/+ZLwvbhw6iMMPCWYknaSrI/ECJIUiNVIvIlRXpRIY6YVT6cg8swlnv06cgJfPEen\nJ2iCWwbC2l/hrBnljopDnZnV5X3yj8hn5Vcrw2HJZx97Nlt3b61pvqqLRbfBu3e5gsoKYA/7Pxg8\nFREJrdnidlhnH3u26+TfnI/zj7ei/Cp2lxX89qe/5bt93zFz9cyQJpVcEIF3vdkZrp3+pnk3JezE\nauC99E4WYlehRcOqhWCcNsi2s3norIei/UTJnCOJ65DQP4Rwu1likWFlEHACNTQFzyz4xqY3auZ5\niyTFgYQgnHzkyRzZ4UiAuIEakZx59Jkc/aOjmbFqRsLnyBab844/L/GE3ThtJgjH/OgYPvv2syiH\nfg3B2UhEPQP1eBZjy0vucjLtTMb0HcNHL53PwtlnRNyLPyKD760RFVmvehpne/qJl4TRi1+fPj06\nBUoqYXyJUovUJVSSFWCe6auoT1Hc2Pneh/WuIWhuOvJZHvzTOQQDGWTaDv3OWUfhhVvodOz5FOb9\nJeHM+MI5heGOOFLz8SZBLlxSFa1lRJjXsrMsHrrhl6zJ2OE6XZ0qLMvi4bMeZtyAcW7Y8N0VvB0T\nUBDMXcaD/gcZ229stRCJ48S/8MQLOTDzQJ5Z/0xU+4w4ekTYiT5v07waZp+ERMy30YhOwouSCpuo\nQhFtPQ7pwbxN85IXIt45akSzFcTV1BycsClHEPKPyGdP5R4+2vFRaL/qDurTQz7l1kG3sm3PttoF\nSSgLdaz5Mxk8QbZm25qwVmdh0aVTF0p2xj9n3yP6cuEJF9acoxRDwtF/gnuvKJu+3RQuJgiH/vd8\nvv7PiWhotdLGQhC6durqXmNMfTqMu4g9h71du+CKuOcKYXOqWB+APSTiXiwIm1sj5wmlAyNIUiR2\nQpI3WXHNmup0CammiogURBUV7rwTx6kWDl6Z2JxV9RVg4Vm2ceyskYKmMK8Q39MFOAFwgqCOTf8j\n+3Pf6P7AhdUHLCuAxQXuU5brHt83yhe2tRf1cW3rxeuK2bZnG5QVkPH0RAKVrhnLvvpn/Payn/Ld\ngBciolR6M3VRF3cUjYOoUL63PFz/KVfDe8VBKiqqojo2r9POsrOojOPEl9zlHH7g4Xz+7edRbWKJ\n5UZwlfiYu2EuL7+zHb64LdxJe53hmL5jOLjdwbyy4RU+2fFJdUcQp6MXhGv7Xxv+3u+Ifkx8YyKL\nNy9OOBJPmtICrOJ3cQKuBke/2dDnKTRk4om8rjXb1lSba2I6tI8Yzmk7TqueN5SIOianxqPHIT34\n9am/DgcFzFg1I/ybg8PmnZsT7vvw8oc55kfH8LNjfsYrn74S1zwY1GBCQVRrAEcI12x6CtvmPA2B\nrFAAyg2Q/0S4zOldTmdJ6ZLEufDq0DLKviuLW5/dG/rDYW8lvP5ILKwo/6J2Xlrve9FYGEHSSEQK\nC9uOzsHjOd0buvZD5LGr1zEJpdoodiPD4q3r0RABVvzqxnBUSOV7lRT3fYGCCdUT7cKjmkL3Or3U\nFLNnR6eCiacNebmlIif2+Uv9zFo7y9VSFp2Iu1hXBuII1/74ae4b0RVGxLRHLRPvCgpgwbs2xXPL\n+OjAx1jCChQ3hLTfEf0Ywxjm7tjAtvdqOupnr53Nr0/9tevUDHF5r8urJ6eVDozqbE+ffCcjh3aM\nskF3yu4Uns+TiAzLfe28dpi6aCqVwcooIeL5YA4/6PCwfd7T4MJzbGIQBGvzMIKBjFDHZMPKcbB2\nVI3Q7UFdBrFo86LqneN0sE7uspqzTT28jrL9DvjhUKxuC9HOy8PFvYCMeOl2njj/iaiJnfGuJVJj\nizQDeRMcw6HDsfWpqwONE0Di+c08lJCACWQBGeCo66f7yX8gdxmZViY79u6oXYjE0Xq8wAFFcRx3\nkbq9fUrYsrAKDdSh0fxLaigAAB5fSURBVMWYs9zUq3EGHXEGLplWZnjQli6MIGkkIs1cW7a42YGD\nwepZ7ZGhwMmu/RDp34i3jklWllsu0boekyY1IGqsZEiNsNpE1ztmTOJ5MrHaUKLcUr4SnxsBBOGX\nXByhXbZN0YVd4587QahzZN0KCroC9+IvvaBmAsxDbOyrf0bwi8FIt/cg1AEGnACdsjsx49wZvPjR\ni1zc42LK95bz3H+ec1/amM62x/cTmDQ4uo6RQk5E6H94fwq7FfLdvu/Ytmcb//3hvywpXcLM1TPD\noZ/ePpEzqb2os1hNEKrzl0Xa7jOtTDd89sCDmeurhIAAFmC7jtmYkXePQ3qwrGxZ2MyYefRSWAxV\nlVW1dmiWWIzInMz8p24nWGWDWogodpaDjBoRnogXmyQzdvLf1EVTyTkghxc/erGGz8i2bG4puIVO\n2Z3IOSCHNV+tiXJ21+jAa5lvdHrX06OXYojRoCR3OadxK0sWZxJstw354VCk20KcPJ+riTiK61iy\nkJJhDD4tC3+pv9ocGAcpGYZGPCd5O0cz8sK+Yc3Tu88byjeQ/eMS/mfGmzzw7MoaS24nur5DJlzO\n9pyXE54/koFHDmT6yOlpj94ygqQRiV2DInK0DvUzM8Ub0XsRYN46Jp6GEamRiESn0052XQ2Pogu7\nMvuhIJWVQbKyrISdOUQLR9t2Bajf754vVhuKl1vKS/+RaWe6HVruMjJHn8XYHxXXOdmqthxk8cp5\no/6gBsGBay/oRZeOHcg5YBQT31hTYyLnuAFutI6/1F/dyUeMZjOzJG7bxBNyfj/4VkC/7uu5YWP/\ncEe4r6QfU+6uYMrVheF94uV2ir3W2vxZM4+YyVwvJHfNaAhm1AidzrQyAXj4rIerw3PHFMHoDIrn\nbo7S5DKsDPr+pC8rv1qJs2UgWjKMnQeeHxKo7vFUBSdgcW2nOXQZWnOiXuQ1hKMEv+iHlpwOebuB\nU5CSoVjdFkGuH0cdHl7+cI1Z34kc7FIyFHWy3QSkDmGhaWHRzm4XpW0IgoZG7YKQufV0lhf/iWCF\ngAoqQTQUfbbunJvQ1x4GtbAzHf5242WU5xwUrcnhdtZj+48Nt2W/Ppdx8xIJv0PP/nZc+FnufVhv\nJj7+L1YsPQAnbwGVXVbQ6diPWTTrHG6bHWDRwmEhn0y1KfLQb37J9gjBdOg3l0QLkhhtLDM0l8cT\n6E0RAmwESSMRGx0VO1qH+pmZaozoi+P7QaBa69i5szpq7KabXD9NXansY/FMQ8loMQUFruP/ySfd\ncz3+eLS2FalFrflsCPbO0yA0Yi3MKwy1WQEP91rJmoy/AW6HVpCbWHg1lBqJFCPMa4kyLEO0YMg5\nIIc1Uf6a+BPACnILoKwA39OwPmrRshMJXjUQcpdC6anonLd5R9uz6CmYP7+ASYOTv1GJBGn53nKs\n3Pdds9Tha7Dm/Q11Msh4+zEKug5hxw5l48FP8Lg+XjNFR25NTc7TIArvnkTlnNfRYBZrMoWMjOrM\nul4SR7dNJuH3w9Sn4z8/vhIfFSX90TmhyDrLfTlUM2FREC0ahtN5SY1Z4UV9iqpNoMQEahyzFFlC\naMKgoMf4CYpNlp3FxT0uZtGWRdUTFmMnSe69jcer7JBQ1HBnvX3ZWSDL4OwbkB8O49qLT2DchUX4\nS/dUD3wgPOs+Ns3J6KvdzzXev7IC1v15oLsMg12JPeZst43LClhxbwFUKCJBrHNuhgEzybKzuHvM\nCG5+u1ow/fqyvtyw3o22q6mNjUDyVnFtv2trLOeQVlS1zf8NGDBA08nSpart26vatvv/0qWJt91z\nj/t/fY6ZlaWana1qWW4aSMuqPmYk99zjlvfSRYrEL9fY1y1SfU7bdusR7zqy2wV0/N/m6NItS+O2\nT23nqavdlm5ZqvcsvEeXbklcKJky9Tn30i1Ltf3d7dX+P1vb390+fNzIa8vIiLhvtqMZZ0xWa4ql\n1ojbVaxgjTarzzOS6Bq9OmWcMVkt2wk/M5mZ6p4z43tl7Klq/5+t9yysvlm1nXv8rSVR9R0/3i07\nY0b0PnXd16VblmrGGZMVqQo9M4HQX3X7xLZnuA6vjFeZIsoU1P4/W8e/Mj58PyPrHnufI7/X+G2p\n+1wiAXXtWFWKVKqdEXDrmPG9Zo0bElWXpVuW6vhXxuv4V8bXqGNd1x/5jooV0PG3loS3e88JqNoZ\nwfC7Eu/eeHXocWmxYrnth1Qqw28L39dUnyVVVWClJtHHNnsn3xR/6RYkkQ9Ho3YKof3Hj48WEPE6\nbK98oo69MR6qWJIRXJEviGVV1zlRm8Vrg7oETqIOPVVmzHA734SCe+E9av+fHe7YvE458tq8Dtyr\n/4yXPtB7Ft6jM176IKnBh9cG9bl3XmcZeY5IgQYBJf+xhMIv9lqXLnWfwezsugV/Mvd1xksfaGZ2\npVp2UO3MKs3MCtZon6iOO3T9M176oNHv84yXPlD75Jkq+Y+pddp9atlVijjVz3REZx+P2HtT2/XX\n1o5Ll7r3yDtv5LtSG959s2xHyfxerWsGafu728d9vhqCESRNKEjqM7pO5fh1aSRe2QsvdOvilZsx\no3HqV2NUFKM1jR9f89gzZkQLwBkzau6baseUqENPhWRe7GQ0Eq/94wmCGTNUzzyzuk3iXWuqz1a4\nE57h3qOQQUrtzCqd8dIH4XK1DYbqusex50umvlEaRC2CskZbxhE0DWXpUtXM7Mqw5iH5j4W1rmQ0\n+mQtEbFla3tXahu4JGqnSEHrtU2yA7W6SFaQGB9JI1DXGs91Udfs81h/Q12TEd98030VvImR3uJb\nqaxrkGhyY13XXf7/2zv3WDuK84D/vvuwoaQCYiJABdegoEZUTgxxKW5pZdJgQagiSyARGhWKrKAL\nlFKpqgOKVKVVFbf80RTHNDWkvJSoiQIlINLyMtwKyVcG8zA4cdJA6xIQLuAGIqricn2nf8yOz5y5\ns7Ozu+dx77nfTzo65+zZszvf7O58M99j5mBn8a6xMfs9lCklS04Ic+46HDlLADimp7tXwxwfn3/u\nsuixHNlmZjq+k6eesgEUMVnbJrP6ASBr1sAzz1jnOHMTHNy3+kjaT1k9++cHWLmy2m8Wk90trXDg\nAJx0kvUd+ItMlS2f8Oqr3fIf3Lc6OptvE6anKUKlBQ4bxmSciWWG2Q/ylsuOXZuySMmcerz66u5A\nmq7JGBPJxZ2AmtXFC1jf32WO55GjbRb7q98jkia4Ye7GjXkmg1z60astO24OvTh3ro9k6u/uNlOb\n92f1bFO9R3//sTE7MnGjhl6Raw7tVf3ljGhj9dyr83dGQ/a1fHleT9/5B/sx2vfNQpPL/89sv//F\nxn7MHD9fm3ps8vypj2TEFEl4Qbdvn+/zaNJAxx76lA22rb+m6YPQD/9M3fLFHsTQl7Fhw/z6jDmU\nB1HecN8256+SM6esbc/v++2c2ajsXg+v1caN3SbAXuGelypzXdUx6iiepvVYdb/06xlTRbJAFEnY\ns928udv2nmOPzeml5thge6FM+q0Q6uLK5AckpAIRYnWW6qn30/81qOvRTxlyyrIQRySDqBP/XL14\nblI+kn7JoopkgSiSMKxvbGx+72xycn7D7/eGw5ukqned25BWMUjFUXWuKrNLToNTdowNGzrXKKy7\nrnBNsddpWPjlr3s9Q8d+nfNUmhUzyuJ6/xs35o0AcjsITemVM9ova5nc/VZY/YoaNUYVyYJRJDt3\ndo9ARDqRGW7YHl5oP3qjKw9hrNMY1LH3G1PvwUmZyPpB1WiqTLZQJpfbUHcklqq7sDed6kn3gtxe\nZ24DW/daNhkN9LJRTpXHv9fbNpC9auB7+dzFjt10xNkr+XIViUZt9Zl16+DWW+2MvW6dktQ08DMz\ncN11nWx4Y2CiuEpzc/D44zbKJzzGzAxceaXdLxZpkhP95CJr7rzT7meM3d400isXP6Ll8GE7I4Cf\nIT893ZkC5tChTllCmWJy50yln4qyqppTLEXdNWRSZQ0jhMDuc+hQ95Q4seO9/37+tfTP46LWjEn/\nL3Vv5dZB2X7htYH2a/v4x60zkWqsjFVRdU1n/q6zdEQY1RmLduvn8wvoiGRQ5PYuQlPY5GTHLBEz\nv9TpceYMwWNO0UGNSMoy5MtyUapkMqZ9tEvT0VmTHmHKRBErQ2XCpHe83Gvp+43Gx7uTKav+1zTi\nq05dtYkebBORVrZv1Wg2FayRundTSa2xc4SjkF75llDT1sJSJLn4D7IfdpoavqamKMkl1ug4M1Ps\nQei1/yTVYPvKtcpPEZYr1QCUJQm6xtmfmqbMDFl23qYKrE4QRR2/WE4yYVkdNI1qyq2DumbXJr6+\nHNNo6rypfZsoqTq/j493nvGwg5Eyd5aZeuuQq0j6atoSkQuBW4Bx4BvGmL8Kfl8O3AN8EjgIXGaM\n2S8iK4B7gV8D7jLG/KH3n08CdwFHA/8M3FAIPBK4iRDvuw8uucQmKbntofnlmmu6zRYizZOPwvVU\nXDIWRNYViWxrOmx25jSw57viivnmg/XrrXnPmdvCtU/8Y+UkTZbtF5oVP/igU7cA3/8+bN4clyE8\nXpVJI2YmqTJRQHciW9U5mibKHjxozVpzc/a8VUmIvizQKf+rr3bMsq58ZfvmmF3d/2LXsyyJL2Xm\nqWN2Su0bm2G7yuRV9btverv99s59ODERTxb1zZ0pU2/fyNE2TV5Y5fEKcDqwDNgDnBnscy3w98Xn\nzwHfKT4fA5wHTAHbgv88DZwLCPAvwEVVZVkMIxJ/GOz3MlLDYt8JHIv8aloG/xhNIsRyzzU1Zcud\n48iemor3ynxy8yXKyh+aFV1vsGr+o7Lz5jrOU7/HTBShOaNOhFUOTU0+sclF/RFNSq5Urk5qVJnK\nm8ox85SNJqpMUbGRbx1zaG4dT0117j+wo+LUMXptKWDYpi1gHfCI9/0m4KZgn0eAdcXnCeBtQLzf\n/8BXJMDJwI+875cD26vKstAViX9DhFFaZbbRqrDUOg9ITtl8U1sT80LsmHWS1HIeGr+sVRncKXu3\nL2uVH6LOeR1VijgVjdbEVt+E3HslvA/Da+qX3ze9+PvWMSu5e73KrFtVh23CdcN9/M7fxIR9LzML\n1lX8oSIpe877kTRrzMJQJJdizVnu++9HRhd7gVO8768AJ3jfQ0WyFnjc+/5bwEMl578a2A3sXrly\nZW9rt8eUOdZ8pRKzy6acfL1s/GONadXDmMoYDv0xuaG1/nE3b47b8XfuNOacc+Y3UmEeRU6vM7Ut\nVrZUPkq4b1P7ednIrJ8huClyRiSuZx76nMJOUuqahCHY4YzYYSBBU4Wb4wsJfREbNsTv57CD1+QZ\n3LnTyitS/nz0uhPhs+QVif9aTCMSf5hfNWtv2YMXi/zyb/6602TkNlI7d9qht3/u2M0fNj51ktT8\nnn8suqx7llt7/s2bu/ft9VQbMblyo5xSpstQGcca1F6MSKoUf87/w162kytsdDduLO8ApMrvK1AR\ne5yqQILY81FlIi0b+fqmqphZLjbCDq9RLyPOfPrZiVgIikRNWzWo00POOVY4BfrUVD3zS3i8nOF+\nOA2Gb64K5aiTae0oG8n4pg2/d+h6hBs2dO+3YUOezE1MBXX+FyrUUGmEo8oyc2Yb80ZKObUlbIBj\nkUWpEVWooMJy1g2rzZU1PG/MhBZGRPmylpnbUte7bT33y1eSq0j6GbX1DHCGiJwGvI51pv9esM+D\nwJXADHYE80RR+CjGmDdE5Ocici6wC7gC+Fo/Cj9oytZWr7vmuvtPmATpIqK+/GWb1Dg314kWgerp\n1auif1wESciyZTYqJ0yuCqdQz5Fx/XobUeYimMBOTT8+buVZtsxGuj31VCf6DOz06Y8+2vnPJZek\nz5OK6vIjzMqmPk9FSPn7pBIx/STMuTl7Lbdtmx+Vk5NwmWJ62kanOXKS16qipMLEwfFx+MIXOlGA\nd9/dHf00MzM/wiu8Z3bsmJ8Y6hL03D2cishzsrqIPBG46KLOf93vYeSWuw5+qzQx0ZHFP/e6dXa7\nS+qdne3IsmVLJ+LM/R4uTV2H8F7sR9JmLXK0TdMX8Bng37Amqy8V2/4C+Gzx+Sjgu8DL2Gis073/\n7gf+G3gPeI0i4gtr3tpbHHMb3gim7LUYRiS9JNVDdb0ylyfiTEHue46Zoeyc4WjhzDM7ZSmzKVeZ\nylKmCd/xWtY7dDJs3pw/AirrHfu92bGx7ryS3FFbzEnr92B9mWILa4Wy1s1vCH+LmQKrTHI5vofU\nFC6xHn/YSy+7zmVObleWVH2Eia3ORxPz4/jnCKc4ipWlbFRUt4xVuLpJRTv20tTFsE1bC+m1lBRJ\nzg3uO+82bux+uNyU3TlO4xA3Pb6bT6ws7LNqDRb/YfEVXI58jrJInxxfQOwcX/lK2gae8/D6viun\nGLZvn+/zmZy0x8xJDMxt2MPORNgg5/pIUqHTYZRU3etUprT9evaVUNl/Q5NgrP79qLGyz36ghh9s\nkpppIiTmk2nSUfOvW1W0Y9Pjx1BFskQVSd3Q0nPO6b4pXehiXT+KIzYaKHNYphrGsMEOo19yoqj8\nXtvkZH7OSuwcZT4g53PJWc44NtVLTEH5DUNZfabKakz6PmjbI46NIJoqpzKlHY5ucx3jrp5jIdth\n2WOjkLKck3CEkbvsdSo4InUPV13TmJL1/9t2nRVHriLRSRtHjKps3fD3TZtgzx77fWzM3ppuWdxP\nf9r6VOr4Atwki7Oz85cg9bdDd8a0n4V86FBaxly/kb9U7uxst527yhcQnsPJdfPN8OCD3ccum0gz\nJLbssMva9/0UExO2HmZmyuszLFvoq4hllTtyM7pT2fcxO7+fle98YM6Xkzp2zP/mJqR0dVUnc9/P\nzPfrKzYBZNXncDlbfzaJ1DPijuHudRG46qrY8rjxujn//E79PflkJxN/YsLKNTFht73/vn2G/efI\n94/E6r4fqCIZEaoeTEc4DcfBg7B1q30PG4AyJVLlyCtrqMq2+8cU6W6koRMsUIfp6W7FIdLdYIdO\n0BzFtG4d3H9/x9H53HOwe3enwTp4sHsd8pD1660sofwi9n1sDM47D3bt6m6g60zlcdttnSCLiQnr\n5D7rrPlO4XAqFvebo2qN8Jhyc+V6/vn09B+xY4frt4f3aNk1ijXIYX2tWGEVAHTWig+V5MxM+piu\n3Hfc0bmvJifLlUisHLn38D33dDpThw7Z7+4c7tzG2PtkdhZeeqkTsFI19Uq/UEUyAlQ9mCHuxoo1\nFKtX50Vn+Teq2+7+Uza/1fR0vNfuH9M1qmA/X3DB/Ic1JzrKn58LbOTQtm22kQPbuPpK080hVnVc\nV3+xCKGqOc5i9bJlS3ev9aij4qO5nDmzZmbg2ms7x3MRSqGcfkPpR1a5+dXKGiS/fmKNtX+sstGQ\nO0aVoqkT/VbVWbr++s59cOedtuPk14kfRZiKckqNMGI0ness5MCB7vMb04n0M6b5/GE9Jcf+tdhf\no+4jaWLzbpMcVRU5U/WflMO4avrrOo7EVPJZU+dwTLY28fqp+qybaxBOp+HkyvGVON+M79Oqus5l\njm+XMJiKGKvKn6kKxMidOyv0QYnMjxqsE0XY1omd69/zI8X8QIOc56RJjlYZqLN96SiSJjd4m4ei\nKnImJGcf30GYctTWUYCpiKbcRLlBEHPsN1kDJTbBX070ViqBLvc6pxzLMTnLZKwKZ64qb0zZxRIZ\n63aGyq5VFaHMuZ2Esk5QeLxYeH+vIraMUUWypBSJMc16xm170+4YbUck4T5Vs7XWeVBijXRO1FHb\nB7BOmWI0GTG6RjMWMp0aHeQorZz6SY0Aq6KzykJtU1FL4fxaZXUWi2KK3Re9yARP1VnV/GCp/+aW\nq5c5JMaoIllyimSY5A7XU/uEppGq+ZCaPvRNE/h6RW4D0bQhcTLUnS6lF9cwVeYcpeGH1oY5IOHx\ny2bWHURnIPc+KTOfxhJQ25wn/I+OSFSRLFlyRyS9PE+/Rx4x6prmmii2YcpYVuawTDGlkWsCrZt/\n0Uvq1G1M5qmpzsSVuTlNTcrYqzrIVSQataUsCFIx/r0MX+xVJE1T6kTVNJlnDYYXAgrpOeP8endl\nnJuzEVAukq+qbqrqpGmd5VKnbsPoMT8y7OKL4YEHrCqZne3tNep3HcRQRaIsGGJJgIM4zyAZhCIb\nWghoBWG9h2UctpLPoW7dOpm3bOlWQCedZEO9F9o1aorY0ctos3btWrN79+5hF0NRBkZOLsawWQxl\njNGk3LFcL1j48ovIs8aYtZX7qSJRFEXpP4tRceYqEjVtKYqiDIBhmlT7zdiwC6AoiqIsblSRKIqi\nKK1QRaIoiqK0QhWJoiiK0gpVJIqiKEorVJEoiqIorVgSeSQi8hbwnw3+egLwdo+Ls9BRmZcGKvPS\noK3Mv2yM+UjVTktCkTRFRHbnJOOMEirz0kBlXhoMSmY1bSmKoiitUEWiKIqitEIVSZrbhl2AIaAy\nLw1U5qXBQGRWH4miKIrSCh2RKIqiKK1QRaIoiqK0YkkrEhG5Q0TeFJG93rYPi8hjIvKT4v34YruI\nyFYReVlEXhSRs4dX8uaIyKki8qSI/FBEfiAiNxTbR1ZuETlKRJ4WkT2FzH9ebD9NRHYVsn1HRJYV\n25cX318ufl81zPI3RUTGReR5EXmo+D7S8gKIyH4ReUlEXhCR3cW2kb23AUTkOBG5V0R+JCL7RGTd\noGVe0ooEuAu4MNh2I7DDGHMGsKP4DnARcEbxuhr4+oDK2GtmgT8xxpwJnAtcJyJnMtpyHwI+ZYz5\nBLAGuFBEzgX+GviqMeajwM+ATcX+m4CfFdu/Wuy3GLkB2Od9H3V5HecbY9Z4+ROjfG8D3AI8bIz5\nGPAJ7DUfrMzGmCX9AlYBe73vPwZOLj6fDPy4+LwduDy232J+AQ8AFywVuYFfAJ4Dfh2b8TtRbF8H\nPFJ8fgRYV3yeKPaTYZe9ppynFA3Ip4CHABlleT259wMnBNtG9t4GjgX+I7xeg5Z5qY9IYpxojHmj\n+HwAOLH4/EvAT739Xiu2LVoKE8ZZwC5GXO7CzPMC8CbwGPAK8I4xZrbYxZfriMzF7+8CKwZb4tb8\nLbAZmCu+r2C05XUY4FEReVZEri62jfK9fRrwFnBnYcb8hogcw4BlVkWSwFiVPZLx0SLyIeA+4I+N\nMT/3fxtFuY0xh40xa7A99XOAjw25SH1DRH4XeNMY8+ywyzIEzjPGnI014VwnIr/t/ziC9/YEcDbw\ndWPMWcD/0DFjAYORWRXJfP5LRE4GKN7fLLa/Dpzq7XdKsW3RISKTWCXyLWPMPxWbR15uAGPMO8CT\nWNPOcSIyUfzky3VE5uL3Y4GDAy5qG34T+KyI7Ae+jTVv3cLoynsEY8zrxfubwP3YTsMo39uvAa8Z\nY3YV3+/FKpaByqyKZD4PAlcWn6/E+hDc9iuKqIdzgXe9oeOiQUQE+AdgnzHmb7yfRlZuEfmIiBxX\nfD4a6xPah1Uolxa7hTK7urgUeKLo1S0KjDE3GWNOMcasAj6HLf/nGVF5HSJyjIj8ovsMbAD2MsL3\ntjHmAPBTEfmVYtPvAD9k0DIP21k0ZEfVPwJvAB9gNfsmrG14B/AT4HHgw8W+AtyKta2/BKwddvkb\nynwedpj7IvBC8frMKMsNfBx4vpB5L/BnxfbTgaeBl4HvAsuL7UcV318ufj992DK0kH098NBSkLeQ\nb0/x+gHwpWL7yN7bhRxrgN3F/f094PhBy6xTpCiKoiitUNOWoiiK0gpVJIqiKEorVJEoiqIorVBF\noiiKorRCFYmiKIrSClUkitIQETlczDLrXjdW/yv72KvEm5VaURYyE9W7KIpSwv8aO+2KoixpdESi\nKD2mWBPj5mJdjKdF5KPF9lUi8kSxDsQOEVlZbD9RRO4Xu17KHhH5jeJQ4yJyu9g1VB4tsvIRkT8S\nu57MiyLy7SGJqShHUEWiKM05OjBtXeb99q4xZjWwDTsTL8DXgLuNMR8HvgVsLbZvBf7V2PVSzsZm\nZYNdM+JWY8yvAu8AlxTbbwTOKo4z1S/hFCUXzWxXlIaIyHvGmA9Ftu/HLqT178UEmQeMMStE5G3s\n2g8fFNvfMMacICJvAacYYw55x1gFPGbswkSIyBeBSWPMX4rIw8B72OkwvmeMea/PoipKEh2RKEp/\nMCWf63DI+3yYjk/zYux8SWcDz3gz+irKUFBFoij94TLvfab4vBM7Gy/A54Gnis87gGvgyAJcx5Yd\nVETGgFONMU8CX8RO+T5vVKQog0R7MorSnKOLVRcdDxtjXAjw8SLyInZUcXmx7XrsSnZ/il3V7qpi\n+w3AbSKyCTvyuAY7K3WMceCbhbIRYKuxa6woytBQH4mi9JjCR7LWGPP2sMuiKINATVuKoihKK3RE\noiiKorRCRySKoihKK1SRKIqiKK1QRaIoiqK0QhWJoiiK0gpVJIqiKEor/h+mPrdO7d3H3QAAAABJ\nRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl8VNXZ+L/P3CQssmnUgiQQ6goY\nEYhoqmAQtWBdsNiK2gbceKvCW2zVV21VXFqs2hb3AgolVUGrPyMWECwSQAhCICCCCwiBhE2M4gZk\nMnPP74+75M5kkpksk0zC+X4++WTu/txzzz3PeZZzriil0Gg0Go2mNnzNLYBGo9FoEh+tLDQajUYT\nFa0sNBqNRhMVrSw0Go1GExWtLDQajUYTFa0sNBqNRhMVrSwSHBExROR7EenRmPs2JyJykog0es62\niFwoIiWe5U9FZHAs+9bjWi+IyL31Pb61ISJlIpLTyOd8SUQmNeY5NfUnqbkFaG2IyPeexfZABRC0\nl/9HKfVyXc6nlAoCHRp73yMBpdSpjXEeEbkJ+JVSKsdz7psa49yaxkFEXgK2KqUmNbcsrRWtLBoZ\npZTbWNs915uUUv+taX8RSVJKBZpCNo1G03AivbN1fY9b4nuv3VBNjIg8IiKvishsEfkO+JWIZIvI\nKhE5ICJ7ROQpEUm2908SESUiGfbyS/b2BSLynYgUikivuu5rbx8hIp+JyDci8rSIrBCRsTXIHYuM\n/yMiW0XkaxF5ynOsISJ/F5FyEdkGDK+lfP4gInPC1j0rIn+zf98kIh/b9/O53euv6Vyua0RE2ovI\nv2zZNgEDw/b9o4hss8+7SUQut9dnAs8Ag20X35eesp3kOf439r2Xi0i+iHSLpWwiyPyIiMyx68f3\nIrJBRE605dsvIjtF5ELP/l1EZKb9TMpE5CER8dnbThaRJSLylYh8ad9/57Dy+Z2IbLTrwGwRaVOD\nXLWey+Zs+9l8LSIvOucSkeNFZL5dd74SkWWe8/YVkaX2to0i8rMarn+TiBR4lt26LiK3AlcD99pl\n9qa9T5qIvGmX23YRua2Wcm8rIn8TkVIR2Sciz4lIW3vbhSJSIiL3isheYHqkdfa+0erBrSKyFfik\nJlkSFqWU/ovTH1ACXBi27hHAD1yGpazbAWcBZ2NZej8GPgPG2/snAQrIsJdfAr4EsoBk4FXgpXrs\nezzwHXCFve13QCUwtoZ7iUXGt4DOQAbwlXPvwHhgE5AGpALLrKoX8To/Br4HjvKc+wsgy16+zN5H\ngAuAQ8AZ9rYLgRLPucqAHPv3E0ABcDTQE9gctu8vgW72M7nWluFH9rabgIIwOV8CJtm/L7ZlPBNo\nCzwHvBdL2US4/0fse7rQPvYVYDtwt718C7DFs//b9vXaAz8C1gI32ttOAYYBKfbzXgE8EVY+q4Cu\n9nP5DMsSjiRXLOf60H7Gx9rndcrncSyFm2wfP8Ren2Lf2132tgvtcj8pQhmHPAMi1/VJnu0+YD1w\nr32dk7Dex2E13N/TwJt2/egEzAce9tSrAPBn+1ztalgXSz14x75Gu+Zun+rcnjW3AK35j5qVxXtR\njrsD+Lf9O9JL8Q/PvpcDH9Vj3xuA5Z5tAuyhBmURo4zneLb/P+AO+/cyPI0QcAk1KAt7+yrgWvv3\nCODTWvb9D3Cb/bs2ZbHT+yyAW737RjjvR8DP7N/RlMUs4M+ebZ2w4lRp0comwnUfARZ4lq8EvgF8\n9vLR9vk6AN2xFEsbz/6/Bt6t4dxXAWvCyme0Z/lvwDMxPv9I5/I+48ud54bVoP4/4MSwcwwFdgHi\nWfdv4I8RyriuyuJcYFvY9e4Dpke4Fx9wGOjpWTcYWynb9eowkOLZHmldLPVgSCzlm4h/OmbRPJR6\nF0TkNOCvWK6R9lgV64Najt/r+X2Q2oPaNe17glcOpZQSkbKaThKjjDFdC9hRi7xg9aavsf9fa/93\n5LgU66U/Geslbw+siXI+sKyGGmUQy/12O5bVgS37sTGcF6z7W+ksKKW+FZGvsRpzp0zq8sz2eX4f\nAvYrpUzPsiNfT6ANsE9EnP19WJ0URKQr8BRWw9nR3rY/7Frhch0TSaAYzxVevifYvx8FHgQWi0gQ\nqwPzuL19p7JbVs9x3SPJUEd6Aj1E5IBnnYFlXYbTFascN3jKUcL22aeU8kdZF0s9CHn3WxI6ZtE8\nhKeNTsXqyZ6klOoE3E/1ytrY7MHq8QAg1ltS20vaEBn3AOme5Wipva8BF4pIdyw32Su2jO2A14HJ\nWC6iLsCiGOXYW5MMIvJj4HksF0+qfd5PPOeNlua7myolg4h0xLIAdsUgV0MoxW7glVJd7L9OSqkz\n7O1/wcrGy7Sf2VjqX69iOVd4+e4Gq9FUSt2ulMoARgL/JyLn29vTxdNC28dFKrcfsDoGDl3Dtoc/\no1Isy6CL56+jUuqyCOfeh+UaPtWzb2ellDcmE6kOhK+LpR602Gm+tbJIDDpiuRp+EJHewP80wTX/\nAwwQkctEJAn4LXBcnGR8DZgoIt1FJBX4v9p2VkrtBd4H/onlythib2qD5R/eDwRtK2NYHWS41w4I\n98CKozh0wHqJ92PpzZuB0zzb9wFpYgf0IzAbuFFEzrCDupOxXHw1WmqNgVKqFFgKPCEinUTEJ9YY\nliH2Lh2xGtlvRCQdy3VYX2I513jPM74HK0aGXcdOtJXCN1iuGROrFx4Afi8iySJyAZaL8tUI594A\nnCEimXan4YGw7fuwYlkOhYBfRH5vB68N+9iBYcehrJTzF4ApInKcWKSJyMUxlo1Ds9SDpkIri8Tg\n98AYrIDzVCK/LI2KUmofVgbJ34By4ESgGKv32NgyPg8sBjZiuYxej+GYV7D8wq4LSil1AMtV9CZW\nkPgqLKUXCw9gWTglwAIgz3PeD7ECnKvtfU4l1MX2LrAFy93jdds4x78DPGTLtQerd3xdjHI1lF8B\nR2EF7L/G8vk7ve4HgEFYDfRc4I0GXCeWc80G/gt8DnyKFasAqzzfwwperwCeVEotV0pVYCUsXIGV\niPEUVqxqS/iJlVKb7fMV2OdeFrbLC0A/OxPrdWWlpV5iy1xin38qVhwhEr/HcoGttu9xEZarM2aa\nuR7EHQl1F2qOVETEwDKjr1JKLW9ueTQaTWKhLYsjGBEZbrtl2mAFjSuxelYajUYTglYWRzbnAduw\nfPU/Ba60XQMajUYTgnZDaTQajSYq2rLQaDQaTVRazaC8Y489VmVkZDS3GBqNRtOiWLt27ZdKqdrS\n5oFWpCwyMjIoKipqbjE0Go2mRSEi0WZUALQbSqPRaDQxoJWFRqPRaKKilYVGo9FootJqYhYajaZp\nqKyspKysjMOHDze3KJo60LZtW9LS0khOrmmKs9rRykKj0dSJsrIyOnbsSEZGBqETxmoSFaUU5eXl\nlJWV0atXr+gHRCCubih7OolP7c8M3h1h+xARWSciARG5KmxbUETW239z4ymnRqOJncOHD5OamqoV\nRQtCREhNTW2QNRg3y8KemO5Z4CKsr2itEZG59uyRDjux5sWPNN3xIaXUmfGST1OdwkIoKICcHMjO\nbm5pNImMVhQtj4Y+s3i6oQYBW5VS2wBEZA7WVMSuslBKldjbzEgn0DQdhYUwbBj4/ZCSAosXa4Wh\n0WiqiKcbqjuhnxAso26fS2wrIkUiskpERkbaQUTG2fsU7d8f/oVHTV0oKLAURTBo/S8oaG6JNJrI\nlJeXc+aZZ3LmmWfStWtXunfv7i77/eFfPo3M9ddfz6efflrrPs8++ywvv/xyY4jMeeedVy1WcOml\nl9KlS5eQdU888QTt27fnu+++c9f997//pXPnzu49nnnmmSxZsqRR5KoLiRzg7qmU2mV/8vI9Edmo\nlPrcu4NSahowDSArK0vPiNgAcnIsi8KxLHJymlsijSYyqamprF+/HoBJkybRoUMH7rgj1JOtlEIp\nhc8XuT88c+bMqNe57bbbGi6sh44dO7Jq1SrOOeccvvrqK/bt21dtn9mzZzNw4EDy8/P59a9/7a4f\nOnQo+fn5jSpPXYmnZbGL0G/yplGHbxIrpXbZ/7dhfR2rf2MKpwklO9tyPT38sHZBaRqfwtJCJi+f\nTGFpYdyusXXrVvr06cN1111H37592bNnD+PGjSMrK4u+ffvy0EMPufued955rF+/nkAgQJcuXbj7\n7rvp168f2dnZfPHFFwD88Y9/ZMqUKe7+d999N4MGDeLUU09l5cqVAPzwww+MGjWKPn36cNVVV5GV\nleUqsnBGjx7NnDlzAHj99de56qqQnB4+++wzAoEAkyZNYvbs2Y1ePg0lnspiDXCyiPQSkRRgNNbn\nGKMiIkfbH+RBRI4FzsUT69DEh+xsuOcerSg0jUthaSHD8oZx35L7GJY3LK4K45NPPuH2229n8+bN\ndO/enUcffZSioiI2bNjAu+++y+bN1ZuRb775hvPPP58NGzaQnZ3NjBkzIp5bKcXq1at5/PHHXcXz\n9NNP07VrVzZv3sx9991HcXFxjbJddNFFvPfee5imyauvvsrVV18dsn327NmMHj2anJwcPvroI778\n8kt325IlS0LcUCUlJfUonYYRN2VhfwN3PLAQ+Bh4TSm1SUQeEpHLAUTkLBEpA34BTBWRTfbhvYEi\nEdkALAEeDcui0mg0LYSCkgL8QT9BFcQf9FNQUhC3a5144olkZWW5y7Nnz2bAgAEMGDCAjz/+OKKy\naNeuHSNGjABg4MCBNTbEP//5z6vt8/777zN69GgA+vXrR9++fWuULTk5mXPOOYc5c+YQDAZJS0sL\n2T5nzhxGjx6NYRiMHDmS11+v+lT90KFDWb9+vfvXHDNsxzVmoZSaD8wPW3e/5/caLPdU+HErgcx4\nyqbRaJqGnIwcUowU/EE/KUYKORk5cbvWUUcd5f7esmULTz75JKtXr6ZLly786le/ijjOICUlxf1t\nGAaBQCDiudu0aRN1n2iMHj2aX/ziFzzyyCMh64uLi9m2bRtDhw4FoKKiglNOOYXf/OY39bpOPNBz\nQ2k0mriSnZ7N4tzFPDz0YRbnLiY7vWn8nN9++y0dO3akU6dO7Nmzh4ULFzb6Nc4991xee+01ADZu\n3BjRcvGSk5PD3XffHdEF9cgjj1BSUkJJSQm7d+9m+/btlJWVNbrM9SWRs6E0Gk0rITs9u8mUhMOA\nAQPo06cPp512Gj179uTcc89t9GtMmDCB3Nxc+vTp4/517ty5xv19Ph933nkngGudKKV49dVXWbx4\nsbufiDBy5EheffVV+vXr58YsHB544AGuvPLKRr+f2mg13+DOyspS+uNHGk38+fjjj+ndu3dzi5EQ\nBAIBAoEAbdu2ZcuWLVx88cVs2bKFpKTE7IdHenYislYplVXDIS6JeUcajUbTAvj+++8ZNmwYgUAA\npRRTp05NWEXRUFrnXWk0Gk0T0KVLF9auXdvcYjQJOsCt0Wg0mqhoZaHRaDSaqGhlodFoNJqoaGWh\n0Wg0mqhoZaHRaFoUQ4cOrTbAbsqUKdxyyy21HtehQwcAdu/eXW0SP4ecnByipeBPmTKFgwcPusuX\nXHIJBw4ciEX0Wpk0aRIiwtatW0OuJSIhMq1fvx4R4Z133gk53jCMkPmjHn300QbL5EUrC41G06K4\n5ppr3NlbHebMmcM111wT0/EnnHBCyLxLdSVcWcyfP7/adynqS2ZmZsi9/fvf/64239Ts2bM577zz\nqs1M265du5D5o+6+u9qXrBuEVhYajSbuFBbC5MnW/4Zy1VVXMW/ePPdDR870GIMHD3bHPQwYMIDM\nzEzeeuutaseXlJRw+umnA3Do0CFGjx5N7969ufLKKzl06JC73y233OJOb/7AAw8A8NRTT7F7926G\nDh3qzuOUkZHhzhD7t7/9jdNPP53TTz/dnd68pKSE3r17c/PNN9O3b18uvvjikOt4GTlypCvz559/\nTufOnTn22GPd7Uop/v3vf/PPf/6Td999t0Hf1K4rWlloNJq44nyy9777rP8NVRjHHHMMgwYNYsGC\nBYBlVfzyl79ERGjbti1vvvkm69atY8mSJfz+97+ntlkqnn/+edq3b8/HH3/Mgw8+GDJm4k9/+hNF\nRUV8+OGHLF26lA8//JD//d//5YQTTmDJkiXVvla3du1aZs6cyQcffMCqVauYPn26O2X5li1buO22\n29i0aRNdunThjTfeiChPp06dSE9P56OPPmLOnDnV5pBauXIlvXr14sQTTyQnJ4d58+a52w4dOhTi\nhnr11VfrVrBR0MpCo9HElXh8stfrivK6oJRS3HvvvZxxxhlceOGF7Nq1K+IX6RyWLVvGr371KwDO\nOOMMzjjjDHfba6+9xoABA+jfvz+bNm2KOkng+++/z5VXXslRRx1Fhw4d+PnPf87y5csB6NWrlzu3\nU23ToEPVR5Ly8/Orzf/kfPPC2c/rigp3Q4UrmoaiR3BrNJq4Eo9P9l5xxRXcfvvtrFu3joMHDzJw\n4EAAXn75Zfbv38/atWtJTk4mIyOjXq6a7du388QTT7BmzRqOPvpoxo4d2yCXjzO9OViB6JrcUGB9\nm/vOO+8kKyuLTp06ueuDwSBvvPEGb731Fn/6059QSlFeXs53331Hx44d6y1brGjLQqPRxJV4fLK3\nQ4cODB06lBtuuCEksP3NN99w/PHHk5yczJIlS9ixY0et5xkyZAivvPIKAB999BEffvghYE1vftRR\nR9G5c2f27dvnurzA+pb2d999V+1cgwcPJj8/n4MHD/LDDz/w5ptvMnjw4DrfW/v27fnLX/7CH/7w\nh5D1ixcv5owzzqC0tJSSkhJ27NjBqFGjePPNN+t8jfoQV2UhIsNF5FMR2Soi1ULzIjJERNaJSEBE\nquWyiUgnESkTkWfiKadGo4kv8fhk7zXXXMOGDRtClMV1111HUVERmZmZ5OXlcdppp9V6jltuuYXv\nv/+e3r17c//997sWSr9+/ejfvz+nnXYa1157bcj05uPGjWP48OFugNthwIABjB07lkGDBnH22Wdz\n00030b9//3rd2+jRoxkwYEDIutmzZ1dzS40aNcp1RYXHLBo7GypuU5SLiAF8BlwElGF9k/sa7+dR\nRSQD6ATcAcxVSr0edo4ngeOAr5RS42u7np6iXKNpGvQU5S2XhkxRHk/LYhCwVSm1TSnlB+YAV3h3\nUEqVKKU+BMzwg0VkIPAjYFEcZdRoNBpNDMRTWXQHSj3LZfa6qIiID/grlsVR237jRKRIRIr2799f\nb0E1Go1GUzuJGuC+FZivlKr1A7RKqWlKqSylVNZxxx3XRKJpNJrW8oXNI4mGPrN4ps7uAtI9y2n2\nuljIBgaLyK1AByBFRL5XSjVuxEaj0dSZtm3bUl5eTmpqKiLS3OJoYsBJs23btm29zxFPZbEGOFlE\nemEpidHAtbEcqJS6zvktImOBLK0oNJrEIC0tjbKyMrTrt2XRtm1b0tLS6n183JSFUiogIuOBhYAB\nzFBKbRKRh4AipdRcETkLeBM4GrhMRB5USvWt5bQajaaZSU5OplevXs0thqaJiVvqbFOjU2c1Go2m\n7iRC6qxGo9FoWglaWWhaFI051bVGo4kdPZGgpsXgTHXtTEjXWPMMaTSa6GjLQtNiiMdU1xqNJja0\nstC0GJyprg2j8aa61mg0saHdUJoWgzPVdUGBpSi0C0qjaTq0stC0KLKztZLQaJoD7YbSaDQaTVS0\nstBoNBpNVLSy0Gg0Gk1UtLLQaDQaTVS0sqgBPVJYo9FoqtDZUBHQI4U1Go0mFG1ZRECPFNZoNJpQ\ntLKIgB4prNFoNKHEVVmIyHAR+VREtopItS/dicgQEVknIgERucqzvqe9fr2IbBKR38RTznCckcIP\nP6xdUBqNRgNxjFmIiAE8C1wElAFrRGSuUmqzZ7edwFjgjrDD9wDZSqkKEekAfGQfuzte8oajRwpr\nNBpNFfEMcA8CtiqltgGIyBzgCsBVFkqpEnub6T1QKeX3LLZBu8s0LYTCQj13laZ1Ek9l0R0o9SyX\nAWfHerCIpAPzgJOAO5vSqtBo6oPOoosNrVBbJgmbOquUKgXOEJETgHwReV0ptc+7j4iMA8YB9OjR\noxmk1GiqiJRFpxvDULRCbbnE072zC0j3LKfZ6+qEbVF8BAyOsG2aUipLKZV13HHH1VtQjaYx0Fl0\n0dFp6S2XeCqLNcDJItJLRFKA0cDcWA4UkTQRaWf/Pho4D/g0bpJqNI2AzqKLjlaoLZe4uaGUUgER\nGQ8sBAxghlJqk4g8BBQppeaKyFnAm8DRwGUi8qBSqi/QG/iriChAgCeUUhvjJatG01joLLra0R+w\narmIUqq5ZWgUsrKyVFFRUXOLodFoNC0KEVmrlMqKtp9OSdVoNBpNVLSy0Gg0Gk1UtLLQaDQaTVS0\nskB/u0Kj0WiikbCD8poKPUhIo9FoonPEWxZ6kJAmnmirVdNaOOItC2eQkGNZ6EFCmsZCW62a1sQR\nryz0ICFNvNBzRWlaE0e8sgA96lYTH7TVqmlNaGWh0cQJbbVqWhNaWWg0caSlW6362xMaB60sNJoj\nkFiUgA7QJybNpcC1stBojjBiVQI6QJ94NKcCP+LHWRzJ6DEARyaxji3S355IPJpzXJi2LI5QtIvh\nyCXWLC0doE88mjPDTiuLIxTtYjhyqYsSaOkB+tZGcyrwuCoLERkOPIn1pbwXlFKPhm0fAkwBzgBG\nK6Vet9efCTwPdAKCwJ+UUq/GU9YjDT0G4MhGK4GWS3M9u7gpCxExgGeBi4AyYI2IzFVKbfbsthMY\nC9wRdvhBIFcptUVETgDWishCpdSBeMl7pKFdDA1Hp5VqjiTiaVkMArYqpbYBiMgc4ArAVRZKqRJ7\nm+k9UCn1mef3bhH5AjgO0MqiEdG9y/qjYz6aI414ZkN1B0o9y2X2ujohIoOAFODzCNvGiUiRiBTt\n37+/3oJqNHVFz1asOdJI6NRZEekG/Au4Xillhm9XSk1TSmUppbKOO+64phdQc8Si00o1RxrxdEPt\nAtI9y2n2upgQkU7APOAPSqlVjSybRtMgdMznyOVIjVXFU1msAU4WkV5YSmI0cG0sB4pICvAmkOdk\nSGk0iYaO+Rx5HMmxqri5oZRSAWA8sBD4GHhNKbVJRB4SkcsBROQsESkDfgFMFZFN9uG/BIYAY0Vk\nvf13Zrxk1bRO9Ah1TWNT31hVa6iLcR1noZSaD8wPW3e/5/caLPdU+HEvAS/FU7ZE50g1dRuLI7kH\nqIkPhYWwcyck2a1mrLGq1lIX9QjuBKS1VK7mRI9Q1zQm3nfSMODmmyE3N7Y61VrqYkJnQx2p6LTM\nhqOzlTSNifedDAahR4/YG/zWUhe1ZeEhUVw/eiqOhtNSs5USpQ5qQmnIO9lS62I4opRqbhkahays\nLFVUVFTv4xPN9aMbjSOPRKuDmlBa6zspImuVUlnR9tOWhU2i+RV1WuaRR6LVQU0oR/o7qWMWNq3F\nr6hpueg6qElktGVh01r8ipqWi66DmkRGxyw0mhZEa/Wba5oPHbPQaFoZOgAeP7QSjo5WFhpNC0EH\nwOODVsKxoQPcGk0LQQfA44MeBBsbtVoWItJJKfVtDdt6KKV2xkcsjUYTjg6Axwc9CDY2ormhCoAB\nACKyWCk1zLMt39mm0WiahiM91z8eaCUcG9GUhXh+H1PLNk0D0ME1jaZ50Uo4OtGUharhd6RlTT2o\nKbimFUhioJ+DRmMRTVkcLyK/w7IinN/Yy/qj141ATcE1nZ3R/BwpWTKJqhATVa4jlWjZUNOBjkAH\nz29n+YVoJxeR4SLyqYhsFZG7I2wfIiLrRCQgIleFbXtHRA6IyH9ivZmWSKQMF52dkRgkwnOI9xfW\nHIV4333W/0T5kluiynUkU6tloZR6sKZtInJWbceKiAE8C1wElAFrRGSuUmqzZ7edwFjgjgineBxo\nD/xPbddp6dQUXNPZGc1Pc2fJNIVlk6hjNxJVriOZOg3KE5E+wDX23wGgtiHig4CtSqlt9rFzgCsA\nV1kopUrsbWb4wUqpxSKSUxf5GkJhaSEFJQXkZOSQnd60tTI8uKazMxKD5n4OTdFg1qYQm9MNFE9F\nrd1b9SOqshCRDKoURCXQE8hyGvpa6A6UepbLgLPrI2Qtso0DxgH06NGj3ucpLC1kWN4w/EE/KUYK\ni3MXN7nCCEdnZ4TSXC94cz6HprBsalKIzR2viZeibu77aslEG5RXCHQC5gCjlFJbRGR7DIqiSVBK\nTQOmgTWRYH3PU1BSgD/oJ6iCHA4cJm9Dnru+OSwNTShH6gveVJZNJIWYCG6geCjqRLivlko0y2If\nloXwI6zspy3EnjK7C0j3LKfZ6xKO1Pap7m+FYvpbHzH9yfmojCW0yXg4ISyNI5nW+oLHYi01l2XT\n3PGaeNFa76spiBbgHikinYGfA5NE5GSgi4gMUkqtjnLuNcDJItILS0mMBq5tDKEbk8LSQia+MxFT\n2WGT0nMIzloIwRQw7qFi7MUUlBRoZdGMtMYXPNGtpeaO18SL1npfTUHUmIVS6htgJjBTRH4E/BL4\nuz03VHotxwVEZDywEDCAGUqpTSLyEFCklJprZ1S9CRwNXCYiDyql+gKIyHLgNKCDiJQBNyqlFjbs\ndqvjuKCUYzCVDLUUhUqCoMK34wJyMnIa+7KaOtAaX/CWYC211rhZotxXSwu01ykbSim1D3gaeFpE\nesaw/3xgfti6+z2/12C5pyIdO7gustWXnIwcUowU/EE/hs/gnCEmy5b6IajAqOR31wxo0VZFS6uQ\nNZEoL3hj0RqtJU3sJLplGYloAe65UY6/vBFlaRay07OZMnwKb2x+g1F9RlF+sJz3Sy/G3D4YX6/l\ndDnpZ8DI5hazXjR3hWwtiioetEZrSRM7LcGyDCeaZZGNlf46G/iAVjh5oBOz8Af9LN+5nCnDp9Am\nYx3+9FWkGCnkZDze3CLWm+askM2tqFoCsVpLWulGp6WVUUu0LKMpi65YI7CvwQpOzwNmK6U2xVuw\npsKbNusP+ineU8yYfmMAyO2X26JdUM1ZIVtizykR0Uo3Oi2xjFqiZRktGyoIvAO8IyJtsJRGgR2I\nfqYpBIw3TsyiIlABwIvFL2IqkxQjhdx+uc0sXcNozgrZEntOiYhWutFpqWXU0uJwsYzgbgP8DEtR\nZABPYWUwtQqcmMX4+eMJmAFOJlLKAAAgAElEQVSCKghARbCiVaTMNleFbIk9p0REK93o6DJqGqIF\nuPOA07Eymh5USn3UJFI1MeUHyzGVWZU+C5jKDBmsp6k7La3nlIhopRsdXUZNgyhV84Bse4K/H+xF\n744CKKVUpzjKVieysrJUUVFRvY515oY6HDjsKgwfPsYNHEePzj3iMuVHSwvIaTSa1omIrFVK1TYp\nrLVfbcqiJdEQZQGWwsjbkMfM9TMJmAEMn4EgVAYrEREuO/Uy7vrJXUDD54xqiQE5TdOiOxOapiJW\nZVGnQXmtmez0bLLTs8ntl0tBSQE7v9nJtLXTMDFBQf4n+cz7bB4+8REwAw2anbalBuQ0TUNzdSa0\ngkpcEuHZaGURhqM0pq2dZjvbqrZVmpUIgkLhD/rrHQBPTQWfD5TSATlNdZqiMxHe+GhrN3FJlGej\nlUUEnIF64S46o+w8KMlBZSwhJWNdveaMKiyEiROthsDngylT9MCspqQllGNjZvdEut9IjY+2dhOX\nRHk2WllEIHxywd7H9ubUQ2NZ8NLv8fsFSfoDE/7xTjWrIpaGyHnwpgkiUF4eXZ5E6Vm0dFpKOTZW\ndk9N9xup8dHpp4lLojwbrSwikJORg+EzCAatMRdbv9qKbOpGhR8wfSh/Ek/8+ShOPHoj40ZmArE3\nRPV58M3Rs2gJPfC6kig9tFhojLTjmu43Uh3U6aeJS6I8G60sbEIbx2xuOPMGpq6dikJRaVayucNz\n4BsFZgpgYH4+lFuvNsksANIKmfTPCir852MGpdaGqD4Pvql7Fi2lB15XEqWH1lTUdL811UE9LiZx\nSYRno5UFkRvH3H65zNowq2rsRfoqGDMMCh6AbReCSiJYWcljL69mYbdhVJgDMH2L8NEWX1KQ1N6f\nAJkRr1fXB9/UPYuW1AOvC/Utx5ZqZdV2v4nQ+Di01PI90tDKAsjLg8OHrewkp3G8555sFucuJm9D\nHi8Wv0ilWWkpjJwHYccQ93sXu495BX/Qj5m2Asm9CHYMJZhRwMRN68gc2HifY23Kl7s1Z2vVtRxb\nupWVSEohEi29fI8kfPE8uYgMF5FPRWSriNwdYfsQEVknIgERuSps2xgR2WL/jYmXjIWFMGOG1TAC\nJCV5zPX0bJ6/9HmeueQZfE5RORbGBQ+QdP1wbryiDylGCoYY+Hp8gDrvz5hpK9zU2pZGfbO1mpLC\nQpg82fofbyJZWZrGQ5dvyyFuykJEDOBZYATQB7hGRPqE7bYTGAu8EnbsMcADwNnAIOABETk6HnIW\nFFgV1bouXH999cax/GA5IlWf8vD1WI0MfhSjx2oyj89kce5iLjvlMpRSbgZVki+pRX6O1ZutpVT1\nbK2mbKgj4fRE77vP+h9vORy/v2FYfzt3Nt+9twbC64+3fFubFdvaiKdlMQjYqpTappTyA3OAK7w7\nKKVKlFIfAmbYsT8F3lVKfaWU+hp4FxgeDyGdyurzWRW2f/8I+9jTmBtikOSzPHcKRcAMuNbD25+9\nbY32BgTh+jOvb5Ez1tb28jZ1Qx2Jpu6JOn7/m2+2OhPTpzfdvTe3Ym5sItUfp3wfftj6D63rnlsT\n8VQW3bG+sudQZq9rtGNFZJyIFIlI0f79++slZHa25WoxDKs3PXFiVUV1XlbKrPjFw0Mf5tlLnqWN\n0QZDDPtLejnkbchzpzYH8Ikv4rcwCksLmbx8MoWlifsmhL+8XisrEVwGzdETzc6GHj0gEGi6e28M\nxZxoyqam+pOdDffcY/1u7s6IpmZadIBbKTUNmAbWRIL1PU95uaUoTDO0EocG3rK5Z7DVcmYen0lB\nSQEHtvZm0iMVHE7rHHK+y065rPqAPXtmW3/Q36B5peJFeEZKY40RaWyaK+c8J6eqQ2EY8b/3mhrW\nWO87EQPH0epPa83C89KSM7/iqSx2Aeme5TR7XazH5oQdW9AoUkUgUiWureJmp2ezcW0H7v2fEyGQ\nAsYgjLErMNNWkGKkMOLkEUxePpnU8ksp/zjTOl8g9POtifRhpVgbltoa6qZ8CRojw6c+8jphK6Ws\nDDpHlngQXidTU+vW+Dd1wxtLeUZT9InQGYkn9VHgCaVclFJx+cNSRNuAXkAKsAHoW8O+/wSu8iwf\nA2wHjrb/tgPH1Ha9gQMHqoawcqVSf/6z9d9ZbtdOKcOw/jvrHS4et0QhlQqUQvzquMv+pn7z9m/U\n1KKpqt0j7ZTvpnMVyT8on2Gqdu2Umvrmh6rdI+2U8aCh2j3STq3cubK6EM3En/9s3SdY///857od\nH62sEo36yOstI1BKJP736q2TdX1GTflMGvNa4e9hayJRnyFQpGJo0+NmWSilAiIyHlgIGMAMpdQm\nEXnIFm6uiJyF9YnWo4HL7G9791VKfSUiDwNr7NM9pJT6Kl6yQvXearRe0JnnHGDRDL873mL/8a8x\nc30xgDXuYvtgCKRgKmtEd/nHVtZUQ7+FEQ8i9ejq0qNpae6D+sjrlJEzHsc7Jide9xpeJ+vS625K\nd11jPv9EHxfSEOpqOSXaexXXmIVSaj7WJ1m96+73/F6D5WKKdOwMYEY85YuGU3GdQKE7nXNpIVN2\nXQ1jBkBJDmQUQPoq/EHLT5FipFDRazlmkh+faZCSItax9vTnhYUw+aXGe4kLSwsbpITCGxaom7kc\nPogvNTW0vBKN+ro7xoyBvXthwQIr2N2UrpK6NP5eRe8EjuNJa3cfNRZ1VeCJVq4tOsDdFESczjlQ\nQGXQHtGdvsrdN8VIIbdfLv279eeNzW9w5k8W8e0n/SFjKaSdDGS756vwK4ykAM/M+cSdjDDkujEq\ngMYKnHt7dJMnx96jCR/EN2GCtZxIgdVw6vrShteBp56ykiIiHRtPH3Msve7mCGwnykR3LYG6WE6J\nVq5aWUQhkimY86scko1k/EE/YH2v+/LTLnc/uzrxnYn4g34WsxjVXmF+YfLCP5N49pJnKS8YR4Vf\nYQYF04Rbn32V4qTnyO2X6zbyXgVg7DqPS5Ifo2vfT8i99ORqisCZTr0xA+d16dF4B/GJwPr1iWU6\n10RdXtrwOlBebvXYq1mcCZCB1Fyui9bsPmpOEqlctbKIQsTpnNOzKRhTQN4GKyXG29Df8p9bqiYf\n9BAwA4yfP57bk4cBGSBWrCPYczFT137ArA2zXKvAVQA7zyI4az75wRQwTmfG+kso+OPkEGWQk5GD\nses8zM/PxThxRaOMGq9Ljya8fEaNguXLG246N1UWSE0fB/Kuqymmk4gfEGpo/OlIoznrWUtDK4so\n1Didsx1/8FJYWsiM9TNQpWeHxDIcKndk8dd/paOUD6QShk+E9FUooCJQwaSCSUzKmeSOGD9cMhQV\nTAGVBEFF5efnVrccyrKRvMXgF9RyRZ7PgNyGV8hYezSRyiczs2EvRlP10CNdByJfO/weI7nqEsHH\n3ND4U31JpMYwVlniUc9i/TJhc5dRfdDKIgZibTgLSgoI7DgLZi2CYAoYfmvSQUdhlJxPsNIHShCf\nAYeOc+0PE5N3t73L8p3LWZy7mCnDp3Bryb8IGlUZV8knriAnY3LoNQsgUGmgTKj0w9SpMGtW01bI\nSJlkDbl2U/XQI10HIl87/J4izcybKD7m+saf6ksiNYZ1kaWx61ldvkyolUUrJdaeSk5GDr4dhzA9\n1gAlOVXKIqPAUiBBhfJVQsaSkOMVisOBwzy24jEOVh5Epa+EMcOQkqGc9ZODTLl5cjVrpq4pnXUJ\nnDdWmq9TfqmpNQeGI91TvHvoNV0n2rVrm5k3kXzM0DRlmUiNYV1kaeyyqenaiWBxNgZaWYQRrhjq\n0lPJTs/m2Vs7MH6ZIhgwEUPh+/FKgvgwSwdZimP4b+HQsdVcVA4KRf6n+QiCQuHrsZo2vTYwJdfy\nkdzyn1sA6N+tP+UHy8nJyGHx4mzy8mDmzNpTOmPNnGrMqUnc7K8KKwju80GbNlHKMY499PDnG+k6\n0a4dHtSP5TvqzUVTWDtN2RhG67jVRZbGLpuarh3JNZjIqeU1oZWFh8YIWo4bmUnmEmc6iBT6//QZ\nFmxdQP6s8RFdU45SCEeh8OEjq1sWA7oNYOMXG5mwYIKbgeUc2zapLYtzF/P889nk5to9+N4bKQj8\nB0pz2Li2A28sKGfUiFTKU2PLnGrMDCtvwwqh82/VVo6ReugR/cF1sIBqUvzh14lmHbS0nmK8rZ14\nKySvZRopLTuWDkBtsjeWvLVd27lOIrns6opWFh4iKYbaGobaejmzZtnHzMok86JKS1GEuaZ84nOm\nNwGsFFzTM1u7iFC8t5g1u9fgEx+mCp3JXaGoCFa4jXl2NpBWZRVI2U8IzHwHAr1ZNF1xUjYYpy+E\n7u+7M+ZGwgmwO5ZFLBlWNTXaTvl5LYv6NLARg9FpsVtKBSUF7PzPtfj9PRvsLomr5dMMbsKQ89Yz\nUB0vheR97iKxTPjZvK7AaNdOJJddXdHKwkNOjvWlPNOs+mKe0zA4E8c51NZDCK8QJ3TsBkl+CFiB\n6iFDFMecMpJ5W+ZRqSoBaGO0YcTJI8j/JN+9RlAF3anPvVOgezHECGnMvWm3FNxrTXRIEijF1pWn\nk7Tmv9z85CsRx2w4ZKdnV5uapLbGqTa3lbdhDY9Z1KXBixiMPi+6BRQyZuXAQpKSFwNGiMKqT8Mb\nq+VTF+LlJoxZAdWx1xsPxRa+r/e5+3zWjL9gKY7U1NB6cfiw9Z7G023ZUFqaVepFK4swnI5+MBiq\nIBxLwck0qq2HEF4h7rqtGyOu2ui6gzIHXsGkgkmuAhCEESeNAFWzWyoSgnB79u1uY563IY+93++F\n0myYtdBWFAagAGsqkkClj235uXAmoXMCU/1FjThI0Gdww5k3hIwtiea2itiw1tLgRWpcUntvBONU\nRBkkJUNOjgFp0S2ggpICKkoGYG4fjOq1nHF/e5keB3JDFFZjxGfq0tDW1HjG6v6ri5vQub+KQAU+\nn49nL3mWcQPHRdy3Lr3eeCi2SPWsf+9bSUnJdMt1wgT4+98tGSdOrPoWTTBovbszZ0JuHVLHa+0E\nxcFlVJNVmkipxzWhlYWHgoKqShcIVKWhjhlTN/dUxApRmEl5BsBG9+U1MfGJjyRfEm8v/pLg9vMg\nY2/EwHckFIonVjzBqrJVFJYWUmlaVopsv8dye5EEBLAUhc8+Slj0bpCly2DJe0aI79/7Uk8ZPsUN\noHsbp2AwyNS1U0MGEaa2T7VcaqiY3Vbec1YEKpj4zkRO6HgCAAu2LiBgBqoajG79mfDRBIK/tubi\nMn+8EtIeDbGAUtunUlBSwMYvNrpyZ6dnk1p+Keas31qTOvoCbP76cxiW506/UlvDW5cecV7+Dg5X\npKNMH4crFLm/3c6d9/5QbSqX2hrPWN1/seznut6+2cnh7f1RJUMwMwoYP388mcdnRryfuvR646HY\nItWztkmzmPLKBxQvtMrx229DXVHl5XDDDda76ry3XiVXX4sYGu4yquna4Z2nlhLH0MrCQ01pqFD1\nEjnfYYba/dbegNYtt1RlKvmSTiP46wGYaSvw4ePCXhfSft8w8v8ZOQDuw8epx57Kx19+HFFmE5Nl\nO5ZVrSg9B/VNOvgCYFpur2N+/hBfbesJe/rD7ixQSVRUVJKXX0Z2dk8KSwuZVDCJimAFpjKpCFQw\nfv54TGW6iiPFSHFHpjspvs4I9onvTCRoBvH5fEwZPiXiSxn+0jgNnqM0V3/gg5LT7CyxCgC3wfCJ\nz7LC7Lm4KsEdwOicz6uABcHwGdb0Kh+Pw2cqTCUQNFj2xmkse6uXOxo+JyMHw2cQDAZRSrF692r3\nS4ax9oin5W9k+pJVKPk1kIQyDbYW9eB/fumH1zaSOfB79/4LSgqqytkTb4LI7r9IhO8HMHn55BCX\noTdupWa969atwNiLQhViHYLDjvUKVjaeV2Gltk+1vuHSPjVEWdcl/uUORg2rZwu2LmDhrEz3/fMZ\nJqZSVRYmnhhhmHuxVmXgUU5OfY4Ub/O+99OmWQoqtfdGylP/U+NzqotF1VLiGFpZePDGJ5zG3fGR\nTpkCxcXW+unTq9xRtc3q6fQYHOUDoEjCt+MCJH0VKUYKk3ImkffMCSEBcN+OYSRnFBMwA6QYKZzf\n83w+Lf/UDXCHB8JdSs+BWYutc/kCMPAF6JfHV+mroI93u6VEyFhKYenJ5MzKcbOsBAGxpidRKPxB\nPwu2LCCjSwaHAoco/aaUoAqiUMxcPxPAmpIdE1FC+cFy997z8new97hXWXD4fvdenJfGafAmFUxi\n0dJvq+QOU5YKFTFes2jbIt7d9i6Dew6mz7F9XBmcY5zpVZ7JzKZNSqbnGRgQTHZHw+dk5LhJBiYm\n+Z/ks2DLAq4/8/paGxKHafkbueXqkzEre1tl3r3IVcgEFA/nLWf/R79z7//nvX/uPkdTmaS2T42o\nTCNZSd7GOrdfLvcMvidio+RtBGXbYDDbgDIgqDB2DHMb7Gn5Gxk/+jQClQZJyUF3Usuaxud460my\nL5mfnfwzunboSv9u/Zn4zsQQa7mN0cZ91rUpNi/Ovnkb8nix+EUqzUoUircXfodZYaJMHwoTGfAi\nSgUJiI+N+7IZNzIzopKryapxyvtAxQH32grFtLc2sved1dx13SC3s+dtD6ZNs9OlfQplnIhvzDyS\nek6q5pat7dpueXqUdCSLLl4JDA1BK4swnEqSm1tVSaZPtx7imDHVv8McS+aDoyhEoE2KMOXWX1Ce\n2q6qIoyEmU8F8fuD+JIUz912NZkDfxbygs3aMCvERbRgywLyP80PuV7bshEcdpSOqaDzzlCXVvoq\nqyHekItPDPp3yyZvw3PV0nGVsnp1UvYTVEkO+Tvfg/SP3e1OXKUyWMm6PetI8iWhggoRsRq/Qhh6\nQZCKiu5gjIcxb9pTuFd/YUf1GcW7s3ZUTWsSENiQW6Mr7vj2x/PFwS8A6wVftmMZK3auIMmXhBk0\nQ+I9ATPAG9/dwZRXnqB4YSYvzlBUVgZCRsPnbchz3XcOFcEK1u1Z51o0CsX0ddPp361/iL+/sLSQ\n255bgFl5f1WZdyuGfWe4CrnsmJcgaFlKhwKHeGXjKyFlXbyn2J140nm2kRreKcOnhKROT183ned+\n9hzlB8urNUre3rxx4gpkBVRWKowkeObWX5CdnunKHvDfD8pHpT8YMqklZdnk5e+AjKXkXnoyBSX2\nTMs2lWYlb336Fm2T2rL3h70h86GZyuRw4DAT35nIgG4DalVskZInnMZx6tqpKBRmz/dQvntAJWNK\nJXRdAwumEAymcOvVJsVT8si99GTuuSf0ZYxk1XhjOCEdrtJzMGctIj+YwoIXg66L1hmBHQg46d8K\nZYoly/bB+NNWVHPL1nRtt95EcDuFjMOIIcuvOZSJVhY14K0kjnKAumUyhJuxN9zgBN8ygSpfdna2\nFT+wKothbyekEoS7J8YNHMe0tdO45T+3uJXen74IjDs8lkNBZMHWj0GZbbj16iDp47+BTlWbnHNJ\n6U+Qfy3GrEwC4w+WkgFUSQ5Gr/dRaSsxMVmze41rjQTNILfNv41L952J35/l9mYpyUHSP6j2wjov\nw7WXP87LBQEIGoAPiq+HfnkRFcbXh7+ulgQQVEEuO+ky3v7s7ZCkAYXiv9v/y3LjbBbfu5jc3Gwe\ne7mI3ce8Qs6Qs90ebCSK9hRZ9+W5xvj54wEo3lNcVV49N4Jxtx0aUtB1HTLmJY7edyVfd30TlVYY\ncl6v3Ek+6/XzWjAvrnsxxEoylYk/6OeNzW+ENNaOPM9c8ozrznOUNcCYfmMAyL0+F8Y6dSvZrVsF\nJQWYPd+zZA9WTWr5j7WreGHuJtSsdwlWdgfjKl4oHs6lF6SS5EsKUayOm+jtT9+ulpShsFx6q3ev\nZub6mSwZsyRibxuIGIDP7ZfrdpCk52qCYy9CbR+CZCxDdgx1Z0kI+oP8Y8ZBZpTnuD18wLXAvLG3\n7PRsJi+fHFK+LiU5rnXv9wd57OXVHNx6H6P6jCInZxxJyUGCQQUYIIGQ98uxwJ37cWJoY/qNYe/3\ne+naoSsbv9hY5YosyK7mdrrnHiDNjjNt2OlmNB4qGcpjbZbz5h0eq6QRB83WhbgqCxEZDjyJlZLz\nglLq0bDtbYA8YCBQDlytlCoRkRRgKpAFmMBvlVIF8ZQ1EuHmYW4uVQPfUqvyvBtjJHK0/OxIExeO\nGziO4j3FVT2wtBVWo14y1JpKxBP3cHzAzkuhlEGw0qRkQ08YHOGCJTmYgeSqBn9DLqwfA8EUfCnQ\n/647WG08WXVeu60ImAHyD09EjPesY+2Xqvdxvbn0lEvdoKu3gfyAKZw07FS2LhoKGGAa1j3YY1EE\nwVSW1RA0q7ukkn3JfHX4qxB3VfeO3dn9/W63sbVeVFjYbRiHA4dZvaJ6xlnvY3sjCB9/+TGmMvHh\nCxnfEjAD3DrvVvc6PnwYPQzM4RNh/jOgfPDOkyTfcAmTJ3Vi4jvF+IOWHzPclWaIwTOXPEPm8ZnM\nWD/DipmgKN5b7FpqTvwF4HDwcLXGOqiCFO8p5qcn/tRSlGaQW+fdiogQNIOkGCnWSP9AATm/CnVl\n7f1+L8k9i6gYcyGUnB8yo0Bg27lQabhu0UDxNeRv24lkfEGXkzdz4HCV+0YkevZeRbCCvA155PbL\ndS1An/hYvWs1+Z/ku1aJaZrcMs+aoWDcwHEhyQsTFkygMq2QZCOZiefeyV+XQrBSYXUuxuLvN4up\nwanWRJ5KueXUxmjDkjFLaoyV+cR+xr2WY9pT8fiSTPIP/xa2rWLRtkXcde7n9LuzgtUr20O7/dVm\nYBAEEWHT/k08UPBANUvV6bgIQoqRwm+7zwHjkpDMvmlrpzF+/niCKmh1IpyMxmAK+Uv9/F9qPn+5\nfiTQuINm60LclIWIGMCzwEVAGbBGROYqpTZ7drsR+FopdZKIjAb+AlwN3AyglMoUkeOBBSJyllIq\ngqM+ftTW2NeUvRAeMIz3ACGnB+a6AdJX4UtfjeEzMJXhujaK9xQzc/1M/BnLUJ7JCckosGIZYQpG\nZbwHvj+ASrb2A7fnFagMUrEtG05+MrJQ6YWo3KEhM+9u3g+b92923VhOp12h2Pr1Vki/j6Q2SwlW\nQlKy8LPhXSBtJJRlQ0kO8yvvInDCcus4T9vU59g+/Pac33LrvFtDRPjx0T9m3w/7ADB8Bqt3real\nD1/iUOBQNXGdkfATz5nIhAUT3MbP8Blkp2Xzfun7KGW52byNvokJJsih46yZhFUSYgo3dJnFuIE9\nyTw+k7wNeawqW8X6fetDrnfzgJvJPD6Tx1Y8Ruc2ndl/cL91TmVyY/8b6dG5BwcqDvDXlX8lqIIs\ne9+Pb8ddnDnoKza2mYZCkeRLsp5p0O/KHFRBt3wqghVuuaQYKUw4e4J7PrCU7KCzg6xJ/0tog59R\n4M5hhi9oWXpmEsrwc8COJzlJBL/L/h1TVk0JcWVG4sXiF9n7/V5X6VealdXcqM79ezO2HAXnKE1B\nOPGMLxg44kNWv90flNidixxU+ioqg5Uh9+IP+snbkMdjKx5j93e7uXHAjW5cxOn1u9bIgNfZXHQc\ny+ThEKv28RWPowwVuVNFVYzs5Y0v17jd+V8RrOCxnVfCr89xM/s2plzHbfNvI2AGXJmPKvspP3ji\nmE+8UsTIC39ULWnA8Bns/GYnhaWFcVcY8bQsBgFblVLbAERkDnAF4FUWVwCT7N+vA8+IiGCFY98D\nUEp9ISIHsKyM1XGUNyLerCZnPpeashea5StldlDwsVeX8/bC71AZS2iTsa6a+Q2WYplUMIlFeHqT\nEDm47MQ3nAYfsS0LaxLEDW2nhMgRYr1Ata8IOoRbIi7pqxhw112MbDeF1N4bmbhpEhWLB2D+czxi\ntsFIXoT8ehhm2gprKhTbl//C5S+QtyEvpBH34eODXR8QNINuLztSw+RlwtkTKD9YHmK5mMrk/Z3v\nVwXOVfUetIlpKVnjDxC0MnT6Z3/rZgbNWD+jWkMqInRq24nBMwdXk9vwWZaIkzllKtNNTDCDKXy4\nLMA1j/dl/9FzaZ/SPqILKEQ+u3GuCFTwxMonQmYBCJgBTuh4Akm+JDehwXkW7rP/pgesvdmOJwEF\nD0DOg5x1tskJHU/gsy8/CykXR4lc3fdqlpYspey7MoAalYP3OK/C8/aWC0oKXPkqg5WMnz+ewHFn\ngfEuYrZBkkxUxjJAqjLnnDIVH9PXTXfXrd69mrvOvSskBugGpy+FIV8OAbvRdoh13FOd8GT2Pbnq\nS0wztB/8Q/d5YPzO7dSpjCUUlBwVkhzy2IrHePuzt5m2blq1mEk8iKey6A6UepbLgLNr2kcpFRCR\nb4BUYANwuYjMxho6NtD+H6IsRGQcMA6gR48ecbgFi3AlMGVK5NiFV4nEYzRpjZRls/D+bJT9qdYp\ncz5h3MDqn2rNTs9mUs4klu8cxuH0VdZLsPzuiFORACEN/pCeQ1hGdZcFWC/685c+HzHoDpbLxWm0\nauPGK/qQebwnjXf7YNdlFvCbyPbBqLT33ZTjSTmT2PjFRqavmx5yrctOvYy5n8x1M7SiXVeh+Hvh\n313/v9O4xyKzVU6FyNiLMHYM4/ZrBjBx07WWr91WVNWup6zxMeF+824du/HlwS+Zvm46szbMYsLZ\nE6xG1ONPNysVL88twzfkv/jKzoXt9+DLWIKvh6UcXXlLz0FKhuLrtRwzbQUiUqUoSs+xOwHLmOeb\nR9AMug38nI/mhKQpG2XnEVw/xlIUGLDtQtgxhLXyU1anVX/WIsLVfa/mtU2vuT3lWAaa+sTnlrlP\nfG7spbC0kJ3f7LRcM6Z1/oAZcGdkViU5qIylkG7FhryKQhAGdhvI6t2hfcx/rPmHa4kfDhx207AL\nSgoiPq+Q+7MV0nFHHWcNgK2FJF8SF2RcwKJti2rdb/OXm6379xaRrbClZCgqo4CUnuvIyXgi5Dhv\njK62jL3GIlED3DOA3kARsANYCVR7ikqpacA0gKysrDiof4twS6K42MqMgtDRojk5DRtNGk6sozod\n+cygICRT/nEmjIy8rww1L44AACAASURBVDeV8UDFAf5a9r77zQxfsolhz5Lr8/m4uu/V7P9hP6P6\njCLz+ExyynLwpxdWO+fgHoMpP1jOiJNHMH/r/JA03GQjmXO6n8OK0hU1TlniEx93/OQOMo/PDM1U\n8bpDjEqk1zJ8YrgpxwC3zb8tJKh984Cb6d+tvzttikJhiFHt2pGC5OUHy0NSN2uSNxIn9N7BWRdu\n5DM2ug2RT/lCerqOKyXEAvPgfKpXoTgUOGS5P1DVyoGMAsydgzBnvQNmG3xJf+Dqx1/ktW9ut/zl\nToq02Ybg0goYcyG+nkX4xEfljoGuJakMP5WOW0kJfY/ry80DbnZjYIYYXDbsWN7mpwSX/BG2DXNd\nbcHtgyHt/Wr3oJRi9kezXcUkCGedcBbFe4upNCtxPkF8SuopVfdnl3/Pzj0p+7YMU5lMfGcin3/9\nOX8v/Lvrx7/slMsATyNZg/XqkGKkkNMrh6LdRSGK+Vv/t1Xyoli09Dve+9cCLhjqi3SaapjK5MuD\nX5LsS64Wn3AQhEtPuZTd3+6O+ZxASF01eqxBpa+2pUwO2T/cmnZS2cNTeBuTeCqLXYROKJFmr4u0\nT5mIJAGdgXJl2bW3OzuJyErgszjKGhHvbJferCbvVOC5uVX7Z2fXPpq0rteuzaUVLU+7NhxTdvLy\nyVaPbMwwpOQCxo06ldxLH62WkldYCAUvwdOnF1Gc9Bx7v9/LvC3zCJgBknxJfLDrA1aUriDFSGHi\nORP5e+HfCZgBd6LE5TuX19qzFIQubbq4gTsTK7jcrU8puzyusMuHdWVQ94dDMlu85nuSL4ncfrkU\nlBS4gWmf+Lh5wM2s27PO7WF6s5yc5TZGG1Lbp5K3IY91e9aFNPBXnHoFXTt0DXFnhLPru13s+iS0\neicbyTw14ik3e8oZjxDps7uGGJQcKAlZV90tNNS16mT5PXa6sYFZafLK3N0w2HaflAwFsw3KNKyY\nU8n5BNM/QERCsn68mWqO77t/t/60TWrrumi6dugK6W9DziTYMRgxBV9SEHotr9Z7c1xQ3t55ki+J\nGwfc6GaSmZjM+2weu7tWb0R3fLPD/R3uNvMH/cz9dK4b9I/FWjn6y0t4/NEkVMbZiGNJh1N6Dsz6\nL4FgCouW+GHMwloVkHOOgBlg5KkjGdR9EKt3rw6Z083Z761P3nLdirFiKpOenXsiIuz8Zqer5AJm\nwM22yvvPFpYt/Ql0WB/6JU6zMq7B7ngqizXAySLSC0spjAauDdtnLjAGKASuAt5TSikRaQ+IUuoH\nEbkICIQFxuNOJNdTebk1inP69OpfV3Ma7tzcyKNJ60pNcZFIsoXnaceinELM+x5rSOm1gdxLF1fL\nugq9ViaLFz9P9qWh00k4jag/6Gf9nvWu+8ZUZmRXjusGKQhJqQVCctPvP/9+JhycQGX6ByQbydx1\nbuiLkJORQ5ukNm7a5TOXPONub2O0cQOAADcOuJGNX2x01wlSfUqRsCngnbjIXefeRXZ6Nrn9cl1l\nsmb3mqgKcMRJIyg/WB7S23OC3jPXz6QyWInP5+PSUy7lrU/eqv2BeXrR12VeR6mYLFvq9/i038Px\nYxi9llsWhZOckLEUxO69RrBSjm1/LAcOH2D6uukYPoNLTrokJPA7a8Ms/D3WINcPR20/H5WxhKSe\nazi3+xBWlK5wg+1OOTrjRJxnUn6w3HVJgdWohbuGIhRgtVmWvYMuaypzrxtu76yX7FjcH1BjLsTo\nsdpS9m79W2q5VSO4YY9pewwHKg7UPhC2LBu238WI3htZmLTQ7QQ4cigUSilGnjqS3d/tpnhvMaYy\n3frnTUxwUKgQpem9t9W7VvPAv96hcuYC+75GhQxgDZ9UtLGJm7KwYxDjgYVYqbMzlFKbROQhoEgp\nNRd4EfiXiGwFvsJSKADHAwtFxMRSNL+Ol5w1Ed5Yl5dbudCFhaHKIDW14Q13JGqzFvLyqkaFe/O0\n6zJ5mnfCtpsH3Fyj+RpeDnl5zr1lc89gK1PFGywc1WcUy3cuD2mUQ14K7yhzw88Vk5/hrqsHu9cO\nH0+SeXxmzYOPyrIZ8+3H7sAx73Ynx33elnlMXTuVZCOZp0c87Qb9gZDzTl4+OWQcA8CPu/yYO8+9\nM2Q6Dic7xzuaORIKxbwt83j7s7erjVx3FI930OX8LfMjns+Hj2OPOpYvfvjCXbf/h/207bXfHWDp\nRRAGDvJTxMVWzCdjKb4eH1QNtEz/AOVNXEhfxZcHPcHlkrPIL+hNyokrye0HG9d2IPOzVzgh8zO6\nXr6d6esmY6ogQdNg+EnDefTC6pao88ycqT9S26e6LrZweh/bm61fbXXdOYYYKKWqKYracKw/xwUa\nHuexZkW4gN+PHszb//2ST//1HCqQTFJykF/+fiWvva8IVAbtr1cWAHDTwJt4+oOn3diTqcyQmELy\nriEs+NfvebvS6kRNeeUDylP/Q2r7VDfzsDJYiYgw4uQRjBs4LmQgHRAyUt3F05EKT33P/zQftoXG\nGKXkArAtQ29nKR7ENWahlJoPzA9bd7/n92HgFxGOKwFOjads0aipsQ5Pp41kAdSl4Q7H614KVzqF\nhVZj/eKLVaPCfT7L2iksjP2a4TOx9ji/R42VLHxg4f9v79qj5CrK/O/r7plJlF2EwQcKIaCsGg9I\nIDs6iybR4CwqSHbDCugxEQLjCHHJHg8jkaMnKE509Wh4yU4WwjKrKz4wLnJ4GUiA3c4BA4GExypJ\nDCFKNjBr8ICazOPbP+re7uqaqlt1X909PfU7p0/fvl236qu6VfXV96ivZBWcUI1NjGckT/AAKivp\n0fFR4LkFGAvCTxS4iK6RfnRLykpVstHtLwnbSTDpY9DevhiLgyi6MiMEIEK175qPgzM3YssLW3DD\nGTfU5F2p58z5Eya0nft3Yvndy3HCG6ob2Sqhs5dsrImTtOWFLRMkjnAS0EbiVeqlyw8QHmzb9m3D\nZ+74TCXtolmLAAD3PnBzZe8LHl+Cwqd70DHzsUCKWo4DR20CCBVGUUABpx13GhadsSigdxybXyjU\nGr4DRn7wgYO4vO2neHDN3wOjIpTJ3IU7UOx8puY8FN37qexpuGoFRnacira33oNre6/FXRv2o/xg\nG/a94UeViXDeMfNw08duqtR97yt7a5wkTOqmtkIbClSohFF50yFvqqi/CASS9k2gOILzzjwS1z58\nGf68+Z/AI0WACxgfLeBdh8zDAxvEONv/pnvxeOkvsWjWIHpP6cXCty+sML1wl30oieKPl2PNwSLG\nx8VZLcPPnIAVK6pOJbOPnF3ZN1HpQ3u6gf/qBkrBpt/Q2y3AMX84F8/dclNlIVX89N/izAVH4PZf\n315Np0iGZ51+KLre87W67ORuVgN3w+Fy6lWIrOLTr1kDLFsmGE949GgYe0oXZwoQIQjkWFUuDKMm\nEmvpIDo/usOYVm4HnQquuxs1gwBH6yf8cCXdeeIZWP7fxaC9KHM1nbxhSWxsWl8ZfHvfcR1wxsS8\nBIPurthkHnvhMfzykRLGfzMXB459CENPDNVIT7KUIOeBh57D4/uXYPTND9WoLAp7TsXuOz6BTaWI\nDZwGphj+BwC3PX0bFs1aVAk5cte0k/AzifGeVrgKKxd3VNIvu3NZxeU0VKnJwRdVCfNtr1yMp6VV\n687ybBHmnksiCONPZ6Gj4z5c9O3vY/EZxwN7urHqe3oJeuiOZ3Fw7Z0VxrN6ZA12/uDzGBkpAPQZ\nYMlp6Jj5WEWiDWkKjw0OEdqLgIlMFEDNoqQmJE7fYtz19uvwu21/haV/91YMd/4Bt244KNR1xStA\n44T29iI6O6tHESyevxDf6K56hsh0qRLumt/Vnv7Y2Vlb/+E/DlfUsAfHDmLojmdxy+e7azQQakiQ\nD5e+jjU8HeNMoHHCRYd/DzPe8h+1HoaKS/uHP3C+MeR85mDmlviccsop3CiUy8wDA+I7TR6lUhjr\nlpmIua+v+v/AAHOxWP0fYC4URDpA/Dcw4FbWwABzoTgu8iiOW58rlwUtCxcyd3SIsqZPF/fLZXFd\nLDK3t4t0tnbIqr3CckNamJnLu8s8/arpXLyyyMXTrmDQiGgvOsh9/buc8hlct5XR9qp4tu1VXvjN\nb3DxyiJjJbh4ZZEHHhww5tExbZS7vnQpF64sMFaCsbSbi+0HJtCZBUxtwMw88OBAhebCygL3DPVw\neffEwsu7yzzw4ACXd5e5XBb0U2GUO6aNcv+q7aIdMMoi8FW1n0WVzczc17+rpu3x1rsrvwvFce7p\n3WCkp+OrHUwriTu+2qFNY2wPqS4qBtdt5dKHvsSFC0/l0kXv567F67h/1XZub+egbuPc1j7m/H4G\nBsT4C8ehOobKu8vc3juPacEXub13Hvf176qMX3lsq+2vtml5d5nbv9ou+pLmo/bFJIAwC1jn2IZP\n8ll9Gs0s+vrcJkoT5M4Xfjo6pElQ6kilkvgOGUWhEG8isg10Na0YUOLT1lZbT5WJESWbFF0ZiDq4\ndM+EaQbXba2Z/HR56wa9ykz7+ndVGND0q6ZPmIzkNigWuSZ96UNfquQlM/QsGKYpn3JZ0NDeO89I\ns2t+g+u2ctdZj3Jb+1hNf1HrPGGyDBgPCiOM0quMMy5klF5lKoza+5xh0k/aZmF/LxTHudh+gNsu\nmlt5N6AxacyNahcUUXmaxlCV8Y5xx7RRHhysHUfy2LbVsby7zH0/7+O5a+dyYWWhwijiMlMTPLOo\nE9TJ1NQJop4fGGAeHBSdLmQAukEYpu3rqw7UQoG5pye7yVm9PzBQSxPRRJpc6TbR6Mq8ZKnBdQK0\nlT04WMugBwf19EStWqPSD67bOvE/k1SUscTVMW2U+757S2JGEfWfyztTmVa4wk5SvzgLHBUyY6PC\nKNOCLwqJ68JTmUp/rkgWKP6J+757SyyaTO2lY6Z9fck0ATVlBoyj7+d9mTAKZs8sckPcydSWlzwA\nBgdFh1JVPbbnslJtaCc9B2YYSlY6ul1ota1SK+kk1YpOHZQEJnVC3Ik7Kv3goGDog4PVMtX6pn2n\nuoWEqS1N0kjc8k356FbGJkbrCtc+YqJz+nTxfoulMS6d9dnKgqN/7Tou/vUapjk3cHvvvMwmYNNY\niquyrQc8s8gBWv32YK0aRp1M5cGjYzQ16ou+qpRhekaXb1YwDUhXNZuOJpdBnrVkEadtspqk40hN\nuntZTIayitKkmjTVN035urxdJsOosRGVd5L3NDgoVKiFwkSJy8bM0qi/tCpChwVhHDrSzgWeWeQA\n3eReWbEUhQFYt1oL/29ri15pRBmPszaO6pBHec6MwLHDuwxsU3lZD7Y0UpOrWseFNp3zQ6lUlWRc\n6Un77uPYr2x9X4c0k6JpYWbtbzmMiSSMOap/pKXPlVl419kYUPdeAEFMpnGxB6GrS9yTo9MeOFB1\nsRsbqz4T7seIcksN07qezOcaS8qEOOdvuJbtmqdrKPcoF1MgWUTgpGHko3bZh4jar2NrJ9coxmEZ\nNcf3sthIakpr2z+UpD3mzwdKpWo/Zza3i9x24fiISh/SmDR0zu7dgjbAtF9I/6zLO1bLsrWhLTyP\nLg+jq3hM+lLBhaNMhk8jbBY6m4P6W3aHjVptuaorouiqpxRS77KdJY8c1SyuZSWlXUUcmlX1RpQa\nKA8VZpivbN+K8tLTSRaFglkiSkOTqhpzsevons9yDEZJuXEkiHpKFg2f5LP6NNIbKrQz9PRUjaVh\nJ5R1pboBrNPbutgsVMgTS1IPqaTIYyKWEXdA6NpMfg9RE1jcSTSviTfMO4nROYlOPIt6xO2Dcpku\n7yctTUmdCXTj0rWsLOiV6dDZDr3NYpIwC+Za24RuRRVnFeHSkXX52WjIE1lKFrq6pR2EctuYVq71\nlMziDO4kE0Hc9sqq7mnyyWvBkcYW5JKPjCwYXlQ5efVRV2bhbRYZINQbjo+LWE2nnQasXGnXiev0\njYBZ575xowgrsHz5RD12qHNeuRJYv17QkrsOE9X6ZRE80aSjV3W8nZ1Vu5BLWfL7IdLr8uul+417\nmmISPb1ryPqwT+3e7V73KJ18mn6gozmtDS6Kpqh2jWMzkJ9Zvlz8XyiIKNVZ2w1N80XaNnKGC0eZ\nDJ9mkCzicnzdSsQmbZRKE1VdaWnJU5USp6yoTUuyKiBJ/fJcFcZpvzxVdjqVZpQdJYk3Up7Sl0p/\nI2xwSW0DeatidTQkGQs6wKuh6oukYq1ONaLmpeqCVRfcpLQk1W8nhU3EdtkJn3RQRrWJ/C50LtBJ\n65Q0fRLmHZfZJXElzWovRlwbXBbvOYtyXfpQHJdt3f9x0mbFoDyzaHLEedHqRGAztLkg7NxRYTqy\nRpQBVP5PDaKoozsvv/e48a2S+szrbE5pJCfZ604X2E73TB4SWlbPZ0lfPco1GcBt+blKeKY+U0/J\nIlebBRGdDuBqiMOPbmTmryv/dwAYAnAKgGEA5zDzLiJqA3AjgJMhAl8PMfOqPGmtN0Kf9PFx8R2l\nV1Z1ob0ZRCQO9Z8c+OUTpQ+xbkOolw73nqxfDzz0UBCueX6tznrxYn0eoU43DCstI66OWz02N9yn\nwBZ/f12d4oSoV/Xlsh2DSLRNHJvTxo3VvQqA2EdgPVo3oh1tz6htHPeseBfbSBL7RxZ7EeKWa7NB\n2cp22W9iKiMrW6EzXDhKkg8Eg9gB4DgA7QCeADBLSXMxgH8Jrs8F8MPg+hMAbg2uXwNgF4CZUeVN\nFslCXoW0t4tVbHu7eVWQdOepS/mq/3neKqgoF2M5TRJVTlp1kGtcrqi6JW2/tGrGKJWmje60Lsl5\nr9pd6mCzc+Rp/7BJlq6ShSnKg0sZaYFGq6EAdAO4R/q9AsAKJc09ALqD6xKAlwAQgPMA/Dy41wng\n1wAOjypvMjALuePYDNW6Z1wNkS7l69RZaSe9qHJlxmhTtdjo0Kmz4my0UvNIwrCyhO29yHSZ2i4J\n3XE3/enKdWnHODp5XbmmZ02LhjQ2i7hwYUS2skM1M5HeZhannknQDMzibAjVU/j7UwCuU9I8CeAo\n6fcOAEcAaANwK4AXAbwKoNdWXjMyC/WFRq0go+wQYT5xJ0QVNuNdXquvvj6u2cUeSjGue09UyKux\nsC3jMlKTJ1q9GUWIqLKTLDJcy3R951FMIUrKi/teTJKCLp96eSDZFlRZSJbyWTZtbdHMLuux6sos\nmnWfRReAMQBvBnAYgIeIaD0z75QTEVEvgF4AmDFjRt2JjIJOz6jqt1evFj7/pr0TIUL95KZN4vhU\n1Re9cizk4to9GaoeM0q/HhVTyaQTTeMHH7X3JLRpHDig1y93d4u2++Y3gR07RNqxMeCii4AZM+z0\n6OxAgNv+hyx8/3WI8vuX302hIGwSWdiY4ui8TXtdOjuBJUtEmtmzJ+7bcI37BEwcM0uWROeTxF4U\nByo9q1dXx2mxCFxwgajz8PDE9ovTT+bPF+9VjiGntpPcP1atqmM8KBkuHCXJB+nUUNcD+JSUbi2A\nj0eV12ySRRw1R1x1gLzCUN1Nk6p44toBXFc35bKQJEIx23Y4lHoYUX//xBAHOskijgpA194u70Bd\n6SYJ2Z40jU1NVQ+oqjCTZCfHejLp4XVQvdLmzrVLKHlKg2qf6Omppc8UAyvJyt/V9TkcT1m6u6MJ\n1FAlADsBHIuqgftdSppLUGvg/lFw/QUANwfXrwXwNIATo8prNmYRp8MkFSsHBiYevCR36LiiedgR\nZTWRKS9ZdDa5asadXNV8Qx2uuvfC5IKr0m9q17iMUabN1cU2quwk6oRGqshUqO0QtoW8oXLhwuiY\naDqUyxMDb/b3J7N32NImZdKqu7nO7TzrPSI6erJ0Smk4sxA04CMQxukdAK4I7n0FwMeC62kAfgxg\nO4BHABwX3D8kuP9UwCgus5XVbMyCOdvObXomjmQRpzPKg0OXl+44UhVJ9yDI+nndKYSmyV5tiygb\nTxLdc1iuy94UXd1Vum02qGZiEDJ0kp268k9qX+vqqu1XPT3J6IvyNHRh5HJa1Wah0qguGrK2KYTI\ny0bjyixytVkw850A7lTufVm6/jOAf9A894ru/mRDnLg+SWIAhT7kqs3ihBP0vvA2nbxqtxgeNvvV\n33ab0JszC31rnLMTbHUKy+zsBD73OfE8IPaj7N4trlW6Vq0CRkaq+YTPmMqX7UBynClbnKZwb4J8\nHoKuXrq6q+1roi+0Q6lnLgDp7CU6+1YSqO8o1NnL9AET7WsuWLoUeOSR6u9Fi+LTNzRUbd+DB8Vv\nlxhLCxYIOxkRcOaZQH+//tyR1atFfUZGhO3iwgtr2zOv/Q9522iscOEok+HTjJJFI6GuiOLq5G3q\nlXB1bYuwm3Z1HKqWFi6M1tPqJAub6iLJClDW28e1R5gkIl0aVXoJT2VM4zbtEk7FRHsSmPKw5a2e\nWR4XOu87tXy1LVWPJFsbubZPFv0/ajxl8Z7QDGqoen48s6giSk1j2wRo63zqoCKqDmqdGisrNYor\ns3M5K9wlT5NKIumEHcVkZLpl9Y2s4sjCbVqn0jPRGqeervr/qH0iWaJcFhM9kbCb6PqDbtLVHVRW\nL9fkJM9npe5yZRbN6jrrkQI6MXv+fCFeA+J72zbzMag2F8owH0AMqy1bJpZ74ACwbJlwBzSpvWxq\nEdn90EUEl1VLoWohjjuo7Ip8881CzVAoANdfL0KsJA1jrnPBDOkL6QjVJm1ttcd/XnBBNfSJSa3j\nepRnW1ut+sukxohTTxf1pi2cCZBMPRbVf84/H9i7F7jrLnFc8S23TDxKN6xr+Pv664GLL64eCxs3\nVLqaLml/CTE0VA1BYwsVUhcXWheOMhk+XrKoQjZAhuEfTBsCk3hVLFxYuwLr6qpKL66bx2xqEReV\njZw26YYl3bOqt0u4SSpO3nK+UZsx5bDscnu6rIZN7RRVPxfJKyrPJOpN22ZUF/WYThIwqR2j+qBN\nwlHbyPWdm/pr0pX/4KDeE9D1PcUBvBqq9REl/usi1eoGkc0F1FRuR0f1ed0uaJu6waYWcfX8UAdM\nGnWNziVUpc1F5WJq+0JBfGQX076+2klPZwdypVmnSkurNrO9Uxdmpe4LkFVxqn1Bp/ox2Rh0/cfG\nmFwXMy7t65LO1l9MCwBZJWaKwhxX7WqCK7PwaqhJCpv4PzxcK+7Lnk3hjvEkUVYBkWbDBv2pfCtW\niDQbN1Z3qOvEd1UtUigIuuT/XTw/TB5GoVdLZ6e7GkEuE6iqI5iFh86mTXY13aZNwCWXCC8mQNAx\nPCy8ur71reou3UKhGl138WKhcnjsMWDz5ngRZ5PsyneBrNIzqZCGhsSOedN7lp8tFsUOe1ldtGkT\nsHZtbbm6d21Sq5rUarooCao3mutOeNd+aEpnO5FPN4Y3bqyNIlwqTYzCrD5ritKcKVw4ymT4TDXJ\nwrbiSbLiy8IAF2c1OzjI/M53VlfccfeFhHmEu8NDlZu8sk8am6ivL5n0pToAtLUJelTjaaFQu+HM\n1J4uiFLPpVVTRKkvbe1q66OqJBeqM3X1c1UZRXmqqfm4OmC49MM46UKYTobU9WkVWe65gFdDtTZc\nJgKXzhu3g9uej6s+SnP4kpqHznNI3VkcZzdtEvrC51R7keqWGXrquGwMM5Xjan9I0wdME6yLus9l\nwZLEBmT632WjXZz+nheTCJ+JsrnI/cekqsrKq8wzixZEmo6fRXmuz7gYXE0uonEkC52NoVCo3ZOQ\nJqx7UulL957UvSlposdGTTRZvy9dfVyfMz0b539X6BYpaSbUOPVLUoZJqlLrEbW/Jqu288yixZC2\n4+cxicQtT85TDTynrpBdJzGd95K6GstaekqTR7gyT7PfQGcUtm3aM9XBNMnmtaKOizi06Izg6sTr\nSq9JRaQiqTpIliBkpwa1X6TdX+MCzyxaDGk7ZZqVT9zyTAMyzuCNo87q6aldtbuoivKe5FzgqkpS\nn1G9p1ziYKmqDfU/ncdQGvWma12SqJZMUHd+mxYnLiov193uaRdVulMj1YWOlyw8s4iFRkz6cY2B\nLiqoODrqRqfNG0loMakvovLSGd1Nk05ax4mousY15sdxRzXlF6aNs0KXyzW5rUbVLbyXdk+LLX9v\ns/DMwohGqZNcVSYujEk3aUQZV7M2RmbpRZIWSWhJstIsl2u9saKkL1t/iTuRmvJ07Suu3nZZMrkk\nY0aVBvKOw+W9oTyzyAVpxdW4Hk6ug6wRq/zJLlmEz8V9n66H69jyd5kI1eeTGJ/lhYqLt13SFXqS\nNtClVe0MrnG4ksJLFp5ZNCXirspcXTXzNN5lJbHkRUO9acmyrCjjr6s0EKWmsUlPeevw40Jni4sj\nWSRFVvX1zMIjU2TVMeXBnsat1bWMRkkPzUBDXoiqm4udIY6qy6RSahYvrDBtHGbYbHBlFrmG+yCi\n0wFcDaAI4EZm/rryfweAIQCnABgGcA4z7yKiTwK4TEp6IoCTmfnxPOn1MCPJ4Uw6yCEXABECYsaM\nbA+JqXs0zgbS4BrKJEtEHe5jiuSrHlQV1Ta2EBtZ9UUTXCLpqvTI7QGI6zQHTDUlXDhKkg8Eg9gB\n4DhUz+CepaS5GLVncP9Qk88JAHbYyvOSxeRAPVbczbCqnyr11MEmRWRtX8gaaYzHpvo2g6u2CWgC\nyaILwHZm3gkARHQrgLMgztQOcRaAlcH1TwBcR0QUVCDEeQBuzZFOjzoiryMn611GM9DQDBKUDvLK\nXydFrFhhb5u8pYcopDm+VH0nQ0O155DYpJRmRp7M4i0Anpd+7wHwHlMaZh4lopcBdAJ4SUpzDgRT\nmQAi6gXQCwAzZszIhmqP3FGPiaCRk029aGj4mcwOSBKNtdFIw+jV+gLNydCToKlDlBPRewD8kZmf\n1P3PzGsArAGAOXPmsC6Nh0erohkkKBsmA406JGVmOvuF6YTDyYY8mcVvARwt/T4quKdLs4eISgAO\nhTB0hzgXwA9ypNHDY1KjmVfoISYDjVlCre9kZJY65MksfgngeCI6FoIpnAvgE0qa2wEsAbAJwNkA\n7g/tFURUAPBxyA05dgAABsVJREFUAO/PkUYPDw+PXNEqzDI3ZhHYIJYBuAfCM2otMz9FRF+BsL7f\nDuAmAP9ORNsB/B8EQwkxF8DzoYHcw8PDw6NxoFrHo8mLOXPm8ObNmxtNhoeHh8ekAhE9ysxzbOkK\n9SDGw8PDw2NywzMLDw8PDw8rPLPw8PDw8LDCMwsPDw8PDytaxsBNRC8CeC7h40egdtf4VICv89SA\nr/PUQJo6H8PMr7clahlmkQZEtNnFG6CV4Os8NeDrPDVQjzp7NZSHh4eHhxWeWXh4eHh4WOGZhcCa\nRhPQAPg6Tw34Ok8N5F5nb7Pw8PDw8LDCSxYeHh4eHlZ4ZuHh4eHhYcWUYBZEtJaI9hHRk9K9w4no\nF0T0bPB9WHCfiOgaItpORFuJ6OTGUZ4MRHQ0EW0goqeJ6CkiujS438p1nkZEjxDRE0GdrwzuH0tE\nDwd1+yERtQf3O4Lf24P/ZzaS/jQgoiIRbSGiO4LfLV1nItpFRNuI6HEi2hzca9m+DQBE9Doi+gkR\n/Q8RPUNE3fWu85RgFgD+DcDpyr3LAdzHzMcDuC/4DQAfBnB88OkFcEOdaMwSowA+z8yzALwXwCVE\nNAutXecDAD7IzO8GcBKA04novQC+AeA7zPw2AL8HsDRIvxTA74P73wnSTVZcCuAZ6fdUqPMHmPkk\naW9BK/dtALgawN3M/A4A74Z43/WtMzNPiQ+AmQCelH7/CsCRwfWRAH4VXA8COE+XbrJ+APwngA9N\nlToDeA2AxyDOfH8JQCm43w3gnuD6HgDdwXUpSEeNpj1BXY8KJooPArgDAE2BOu8CcIRyr2X7NsQJ\nor9R31W96zxVJAsd3sjMLwTXewG8Mbh+C4DnpXR7gnuTEoGqYTaAh9HidQ7UMY8D2AfgFwB2ANjP\nzKNBErlelToH/78MoLO+FGeC1QD6AYwHvzvR+nVmAPcS0aNE1Bvca+W+fSyAFwHcHKgbbySi16LO\ndZ7KzKICFuy35XyIiegQALcBWM7Mf5D/a8U6M/MYM58EsdruAvCOBpOUK4joDAD7mPnRRtNSZ7yP\nmU+GULdcQkRz5T9bsG+XAJwM4AZmng3gVVRVTgDqU+epzCz+l4iOBIDge19w/7cAjpbSHRXcm1Qg\nojYIRvF9Zv5pcLul6xyCmfcD2AChgnkdEYXHB8v1qtQ5+P9QAMN1JjUtTgXwMSLaBeBWCFXU1Wjt\nOoOZfxt87wOwDmJh0Mp9ew+APcz8cPD7JxDMo651nsrM4nYAS4LrJRB6/fD+4sCj4L0AXpZEvUkB\nIiKI882fYeZvS3+1cp1fT0SvC66nQ9honoFgGmcHydQ6h21xNoD7g9XZpAEzr2Dmo5h5JsT59fcz\n8yfRwnUmotcS0V+E1wB6ADyJFu7bzLwXwPNE9Pbg1gIAT6PedW608aZOBqIfAHgBwAgEl14Koau9\nD8CzANYDODxISwCuh9B3bwMwp9H0J6jv+yBE0q0AHg8+H2nxOp8IYEtQ5ycBfDm4fxyARwBsB/Bj\nAB3B/WnB7+3B/8c1ug4p6z8fwB2tXuegbk8En6cAXBHcb9m+HdTjJACbg/79MwCH1bvOPtyHh4eH\nh4cVU1kN5eHh4eHhCM8sPDw8PDys8MzCw8PDw8MKzyw8PDw8PKzwzMLDw8PDwwrPLDw8LCCisSDC\nafi53P6Uc94zSYqG7OHRrCjZk3h4THn8iUUYEQ+PKQsvWXh4JERwrsI/B2crPEJEbwvuzySi+4Oz\nBO4johnB/TcS0ToSZ248QUR/E2RVJKJ/JXEOx73BDnQQ0T+SOJNkKxHd2qBqengA8MzCw8MF0xU1\n1DnSfy8z8wkAroOIAAsA1wK4hZlPBPB9ANcE968B8ACLMzdOhtiBDIhzB65n5ncB2A9gUXD/cgCz\ng3z68qqch4cL/A5uDw8LiOgVZj5Ec38XxIFLO4PAjXuZuZOIXoI4P2AkuP8CMx9BRC8COIqZD0h5\nzATwCxYH2ICIvgCgjZmvIqK7AbwCEd7hZ8z8Ss5V9fAwwksWHh7pwIbrODggXY+hakv8KESMn5MB\n/FKKJOvhUXd4ZuHhkQ7nSN+bgusyRBRYAPgkgIeC6/sAfBaoHNR0qClTIioAOJqZNwD4AkQ48QnS\njYdHveBXKh4edkwPTuALcTczh+6zhxHRVgjp4Lzg3ucgTjW7DOKEs/OD+5cCWENESyEkiM9CREPW\noQjgewFDIQDXsDinw8OjIfA2Cw+PhAhsFnOY+aVG0+LhkTe8GsrDw8PDwwovWXh4eHh4WOElCw8P\nDw8PKzyz8PDw8PCwwjMLDw8PDw8rPLPw8PDw8LDCMwsPDw8PDyv+H54gjB3Fee3GAAAAAElFTkSu\nQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f86dWOyZKmN9", + "colab_type": "text" + }, + "source": [ + "Great results! From these graphs, we can see several exciting things:\n", + "\n", + "* Our network has reached its peak accuracy much more quickly (within 200 epochs instead of 400)\n", + "* The overall loss and MAE are much better than our previous network\n", + "* Metrics are better for validation than training, which means the network is not overfitting\n", + "\n", + "The reason the metrics for validation are better than those for training is that validation metrics are calculated at the end of each epoch, while training metrics are calculated throughout the epoch, so validation happens on a model that has been trained slightly longer.\n", + "\n", + "This all means our network seems to be performing well! To confirm, let's check its predictions against the test dataset we set aside earlier:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lZfztKKyhLxX", + "colab_type": "code", + "outputId": "b792a12e-713d-4b07-9f8e-de0d059d5cdb", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 298 + } + }, + "source": [ + "# Calculate and print the loss on our test dataset\n", + "loss = model_2.evaluate(x_test, y_test)\n", + "\n", + "# Make predictions based on our test dataset\n", + "predictions = model_2.predict(x_test)\n", + "\n", + "# Graph the predictions against the actual values\n", + "plt.clf()\n", + "plt.title('Comparison of predictions and actual values')\n", + "plt.plot(x_test, y_test, 'b.', label='Actual')\n", + "plt.plot(x_test, predictions, 'r.', label='Predicted')\n", + "plt.legend()\n", + "plt.show()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "200/200 [==============================] - 0s 146us/sample - loss: 0.0124 - mae: 0.0907\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJztnXmYVMW5/z9v9yzgEpVR44KIMRhj\nnJ+Ak+iJim3QuMS4EaOJZhSJjQtRkmvQyY0JuS4ImlyMIDIKyFwTjHEUl2gkoq2irTgoCRE1oBcR\nl6ijeF1glu76/VHnTPf0dPf0TPdMb+/nefrpPnvV6XO+VfXWW2+JMQZFURSlvPDlOwGKoijK4KPi\nryiKUoao+CuKopQhKv6KoihliIq/oihKGaLiryiKUoao+JcwInKWiCzLdzo8RGSoiDwgIh+LyJ/z\ncP2AiGyKW35JRAL9OM8RIvJqThM3iIjIuSKyIt/pSEfif5XD8xZ83gcLFf8MEJEfikiLiHwqIu+I\nyMMicni+09Ubxpg/GGO+ne90xPE94ItAjTHm9HwnxhjzNWNMqLf9RMSIyJfjjnvKGPOVAU1ckSEi\nI937VJHvtCiZoeLfCyLyM2A2cC1WuEYANwMn5zNdvVGgL+HewL+MMZ3ZnqhA86coxYMxRj8pPsAO\nwKfA6Wn2qcYWDm+7n9lAtbstAGwCpgHvAe8ApwAnAP8CPgR+EXeu6cDdwJ+AT4AXgIPitl8BvOZu\nWwucGrftXOBp4L+BVuBqd90Kd7u4294D/g9YAxwYl88m4H3gDeCXgC/uvCuAG4CPgP8Fjk9zP74K\nhIDNwEvASe763wDtQId7TyclOba3/G8ALgf+AbQBFcAeQLOb9v8FLonbfyhwu5vutcDPgU0J5zva\n/e0HfhF3f1cBewFPAgb4zE33Gd7/2lue3W23A3OBv7jnfQ7Yt7f/JMm9mQi87J7jdWBy3LYA9jn7\nD2LP2cS47TXA/e41VgJXec9Fimv9GXgX+NjN/9cS7ulv3efkY/fZGApsdO/Tp+7Hcf/PO+KOHenu\nU5FpnlKkbx5wQ8K6+4CfZfierEiWHnddCPhx3PJ5bho/Ah4B9u7rf1eon7wnoJA/wHFAZ/zDkWSf\n/wKeBXYFdgGeAa5ytwXc438FVALnY0Xqj8D2wNeALcA+7v7TseL4PXf/y7CCVuluPx0rdj6sCH0G\n7O5uO9e91k+wojg04UE/FitoO7oP7lfjjm1yX57t3RfiX7ji7J6jw027H7gQW8hJkntRCazHimgV\n8C33BfxKXP7uSHMve8v/BmA1VpSHuvdhlXt/q4AvYUXkWHf/64CngGHuMf8ktfj/3H2Bv+Len4Ow\n5imwAvHluOMC3nkyyPPt2ML4G+7/8gfgzt7+kyT35jvAvu5+RwKfA2MTnrP/ctNzgrt9J3f7ncBd\nwLbAgcBbpBf/89xnwavYrI7bNhcrkHu6z8M33f1G0lNIu/3fiftkkKdU4j8OeBP3GQR2wr5He2T4\nnmQk/tjW/Xr3f6nAVoqe6et/V6ifvCegkD/AWcC7vezzGnBC3PKxwAb3d8B9KP3u8vbuw3ZI3P6r\ngFPc39OBZ+O2+bC1uCNSXHs1cLL7+1xgY8L2+Af9W1hRPxS3Vu+u92Nr5AfErZsMhOLOsT5u2zZu\nHnZLkp4jsDXG+PMvAabH5a838U+Zf6xYnxe3/ZAkeW4AFrm/XweOi9sWJLX4v+rdyyTpSif+veX5\nduC2uG0nAK+k+08yfDaXApcmPGfxIvaee14/tkDdP27btaQR/4Tr7Ojmfwf3/9hCXGssbr+R9FH8\nM8hTKvEXbEtjnLt8PvBYmjwkvieZiv/DxLVQ3fx/jjVf9vu/K5SP2vzT0wrs3It9eQ9sE9jjDXdd\n1zmMMRH39xb3+99x27cA28Utv+n9MMZEsc35PQBEpF5EVovIZhHZjK3F7Zzs2ESMMY8Bc7A1t/dE\npFFEvuAeX5kkD3vGLb8bd57P3Z/xafbYA3jTTXeqc/VGyvwnbse+hHt498O9J7/A9s10pSchLanY\nC1uQ95VM8vxu3O/Pce9dmv+kByJyvIg8KyIfuvk8ge7/favp3pfiXWcXbK01o/sgIn4RuU5EXhOR\n/8MWkLjX2hkYQv/uU7Jr9ZanpBirxHcCP3BX/RDbovLO29t7kil7AzfGnedDbMGzZ1/+u0JFxT89\nYaxt+ZQ0+7yNfUg8Rrjr+ste3g8R8QHDgbdFZG/gVmAK1hyxI9aMIXHHmnQnNsb83hhzMHAAsB/W\n1PEBtmaYmIe3+pH2t4G93HT391xJ8x+3PT6PbwL/a4zZMe6zvTHmBHf7O/Hnc9OSijexJoi+klWe\nU/wn3RCRamy/xg3AF93//iG6//epeB9rEsr0PvwQa+44GlvbH+klA/usbCX5fUr27H2GbSl67Ob9\nyDJPYFtX33Pfi0Pcc5HhexKfPlKlEftMTE54voYaY56BzP67QkbFPw3GmI+x9uS5InKKiGwjIpVu\njWWWu9sS4JcisouI7Ozuf0cWlz1YRE5zWxtTsYXPs1h7rcG+zIjIRGyNJiNE5OsicoiIVGIf+q1A\n1G2V3AVcIyLbuy/Pz/qZh+ewNc5p7n0KAN/F1tIyJVX+k7ES+ERELnfHEPhF5EAR+bq7/S6gQUR2\nEpHh2P6QVNwGXCUio8Ty/0Skxt32b2x/QjL6nedU/0mSXauwdvX3gU4ROR7IyIXX/X/vAaa7z+8B\nwDlpDtkee89bsaJ4bdy5osBC4Hcisod7vx1XyN930x5/n1YD40RkhIjsgDXJZZ0nNy0vYguj24BH\njDGb3U0ZvyfGmPexhfTZbl7Oo3vBdgv2+fmae64dROR093em/13BouLfC8aY32LF8JfYB+pNbK1i\nqbvL1UAL1gNlDdZD5eosLnkftpPqI+BHwGnGmA5jzFqsl0UYK0a1WO+eTPkCtkb0EbbZ3wpc7277\nCfYBfh3rvfFH7EveJ4wx7VjhOx77Yt4M1BtjXunDaZLmP8X1IsCJwGhsx7AnBju4u/wGm9f/BZYB\n/5Pmur/DFhbLsN4bC7CdymBt14vd5v/3E9KQTZ7T/Sfx1/gEuMRN30fY2vn9GZzfYwrWBPQutg9i\nUZp9m9y0vIX1lEkseC/DPufPY80gM7E278+Ba4Cn3ft0qDHmb1jPrX9g+7YezGGewD6nR7vf3nn7\n+p6cj62xt2IdMJ6JO9e9bv7udE1g/8T+z5Dhf1fIeL3lSgEgItOxHYtn5zst+aDc868og4nW/BVF\nUcoQFX9FUZQyRM0+iqIoZYjW/BVFUcqQgg2OtfPOO5uRI0fmOxmKoihFxapVqz4wxuzS234FK/4j\nR46kpaUl38lQFEUpKkQk3Uj2LtTsoyiKUoao+CuKopQhKv6KoihlSMHa/BVFKU06OjrYtGkTW7du\nzXdSipohQ4YwfPhwKisr+3W8ir+iKIPKpk2b2H777Rk5ciQimQbxVOIxxtDa2sqmTZvYZ599+nUO\nNfsoijKobN26lZqaGhX+LBARampqsmo9qfiXEOEwzJhhvxWlkFHhz55s76GafUqEcBjGj4f2dqiq\nguXLwXHynSpFUQoVrfmXCKGQFf5IxH6HQvlOkaIUNkuXLkVEeOWV9FMv3H777bz9dv8n5wuFQpx4\n4on9Pn6gUPEvEQIBW+P3++13IGDXJ5qC1DSkKJYlS5Zw+OGHs2TJkrT7ZSv+hYqKf4ngONbUc9VV\nMZOPZwq68kr73djYfVkLAKVYyHWl5dNPP2XFihUsWLCAO++Mzbg5c+ZMamtrOeigg7jiiiu4++67\naWlp4ayzzmL06NFs2bKFkSNH8sEHHwDQ0tJCwK1prVy5EsdxGDNmDN/85jd59dVXc5PYAUJt/iWE\n43S38yeagpqbuy83Ndl9amqgtdW2FrSfQCk0BqI/67777uO4445jv/32o6amhlWrVvHee+9x3333\n8dxzz7HNNtvw4YcfMmzYMObMmcMNN9xAXV1d2nPuv//+PPXUU1RUVPDoo4/yi1/8gubm5uwSOoCo\n+JcwNTXg80E0CiIwejQ89ZR9iSoqYOFC6Oy0230+qK7WjmKl8EjWn5XtM7pkyRIuvfRSAM4880yW\nLFmCMYaJEyeyzTbbADBs2LA+nfPjjz/mnHPOYd26dYgIHR1Jp54uGFT8i4hw2D74mdTQw2GYOtWK\nuzH2xbnpJpg929byV66E++6z28AWALl6sRQll3j9WV7N3+vP6i8ffvghjz32GGvWrEFEiEQiiAin\nn356RsdXVFQQjUYBuvnZX3nllRx11FHce++9bNiwocscVKiozb9ISLTf92b79GpLnrgbY5c9887D\nD8e2ga35p3uxtKNYyRfJ+rOy4e677+ZHP/oRb7zxBhs2bODNN99kn332YYcddmDRokV8/vnngC0k\nALbffns++eSTruNHjhzJqlWrALqZdT7++GP23HNPwHYSFzoq/kVCX105vdqSNw4kXtxDIdsiALv9\nlFPg6qtTv1h9LXgUJdc4DjQ05KZVumTJEk499dRu6yZMmMA777zDSSedRF1dHaNHj+aGG24A4Nxz\nz+WCCy7o6vD99a9/zaWXXkpdXR1+v7/rHNOmTaOhoYExY8bQ6b1ghYwxpiA/Bx98sFFiPPOMMUOH\nGuP32+9nnun9mPnzjamsNEbEmIoKu+ydq7rarq+u7v1c115rrwv2+9pr06fz2mszS59Snqxduzbf\nSSgZkt1LoMVkoLE5sfmLyELgROA9Y8yBSbYLcCNwAvA5cK4x5oVcXLtc8Jq+mdr8wZp4olFr3jHG\nLnvEm4N6I53NNb4fAnSUsaIUC7nq8L0dmAM0pdh+PDDK/RwCzHO/lT6Q6MrZG4GAHfQVjdpvT6BD\nIWs+Msaaf6ZPt59U505V8CS64J1zTu69MhRFGRhyIv7GmCdFZGSaXU4GmtwmybMisqOI7G6MeScX\n11dS49n842NAeTX5tjZbMDz6qHUBTVdTT1bwxPdDtLXBCy/YQgbStxC0QFCU/DNYHb57Am/GLW9y\n13VDRIIi0iIiLe+///4gJa108Tp2vRq+10ns1eSPPjo2DqA/8YC8QsQ7R0uLLWTOP797QaIdxopS\neBSUn78xphFoBKirq8vAGq2kIxCAGVzOGfyBDdF92anmOggDoRDO5s3cvSnEi2YIrQzjfXbDqakH\nMq+We4XI9Om29RCN2kJmxIjuwj99eqyVoeYgRSkMBkv83wL2ilse7q5TckljI5/cuIAPPxvCdnsP\nw3nnJQ6NrANgL/MWctER1i7T0QHGsD1whHdsBOSSRVD7eJ+U2XGsuHsjhxODyo0fHxP+3sYSKIoy\neAyW2ed+oF4shwIfq70/xzQ2YiZPZru1KxnxxpMMe3IpZt06BLo+RCJdwu/RbXs/Y0GnGoTj9Ql4\n4SXq6pL3K+gAMmWw8fv9jB49mgMPPJDTTz+9a2BXf4gP2Xz//fdz3XXXpdx38+bN3HzzzX2+xvTp\n07vGHeSKXLl6LgECwM4isgn4NVAJYIy5BXgI6+a5HuvqOTEX1y0lEl0m+9w56o40TDu3j99vP15A\nn0R6G+KbJlHJOoQ9byPPs+jvf09+WnUPVQaboUOHsnr1agDOOussbrnlFn72s591bfd84X2+vtWP\nTzrpJE466aSU2z3xv+iii/qX8BySK2+fH/Sy3QAX5+JapUi8AFZUxGLx9EkMJ0yAZctI7CjpKgwO\nOgjmzbO/QyHYvNl+DxkCw4bBbrtBfX36Ib59VGjHgfPOg/nzu3c6p4s8Gr9dPYSULgbwYTjiiCP4\nxz/+wYYNGzj22GM55JBDWLVqFQ899BCvvvoqv/71r2lra2Pfffdl0aJFbLfddvz1r39l6tSpbLPN\nNhx++OFd57r99ttpaWlhzpw5/Pvf/+aCCy7g9ddfB2DevHn8/ve/57XXXmP06NEcc8wxXH/99Vx/\n/fXcddddtLW1ceqpp/Kb3/wGgGuuuYbFixez6667stdee3HwwQfnNN8F1eFbrjQ1wdatViC9CrkX\niyfjztFgEIFuNv+aYSQX9b6+PH0Mqxj/ntbXw+LF3fsD4renGkCmLQKliwF8GDo7O3n44Yc57rjj\nAFi3bh2LFy/m0EMP5YMPPuDqq6/m0UcfZdttt2XmzJn87ne/Y9q0aZx//vk89thjfPnLX+aMM85I\neu5LLrmEI488knvvvZdIJMKnn37Kddddxz//+c+uVseyZctYt24dK1euxBjDSSedxJNPPsm2227L\nnXfeyerVq+ns7GTs2LEq/qVGOGxDK3tm+IoKax/3av7drDC91X6CQbYPBtk+g137RB/CKiZ7T+MH\niEH67Yn9BTpgTBmIh2HLli2MHj0asDX/SZMm8fbbb7P33ntz6KGHAvDss8+ydu1aDjvsMADa29tx\nHIdXXnmFffbZh1GjRgFw9tln09jY2OMajz32GE1Ndtyr3+9nhx124KOPPuq2z7Jly1i2bBljxowB\n7CQz69at45NPPuHUU0/tCi+dzpTUX1T884w32has6E+aZGvLPYS7sREuvtg2DXoJvJ/zilK6Ib7u\nw+21LpK9p/EBuWbMSL/dI9dhfJUiZgAehnibfzzbbrtt129jDMccc0yPaR6THddfjDE0NDQwefLk\nbutnz56ds2ukQqN65pn4uXeHDIlZaLoJYjgMU6bEOmrb2pJ65XheM01NAzCZe2KiwmE46ii45Rb7\nce05qeYSTpbfdO9xrsP4KkVMnh6GQw89lKeffpr169cD8Nlnn/Gvf/2L/fffnw0bNvDaa68BpJwD\nePz48cxz+9kikQgff/xxj/DQxx57LAsXLuTTTz8F4K233uK9995j3LhxLF26lC1btvDJJ5/wwAMP\n5Dx/WvMfZBLNMRkFbAuFMJ0RBDCA+Hw9VDOx0zhVmIWc4VXxPTo6YOpUnLFjeW52PQ+2Oj3y4+Xd\nm1CmN5NUX2MZKSVMHh6GXXbZhdtvv50f/OAHtLW1AXD11Vez33770djYyHe+8x222WYbjjjiiG6C\n7nHjjTcSDAZZsGABfr+fefPm4TgOhx12GAceeCDHH388119/PS+//DKOm7ftttuOO+64g7Fjx3LG\nGWdw0EEHseuuu/L1r3899xnMJPRnPj6lGNI547DMCXGR/zH/GfMZQ00HPtNGpVk/bX6PQxLDLl9w\nwQCHVvbiQseChsY+fn8sfnTc7n0NSa2UJhrSOXfkPaSzkhkZ9VslMdg/2OrwF99yjoiGeMoX4Ds7\nOjQkHJZoFk3ltZkzHAcef9zamF54AZ5/PtZrHYnABRfY38EgoB24ilJoqPgPImn7rTybyMaNPVQy\nEHC4qtrh2XaHqiq4PtDz3P2J958tYRxCIxxOHBOmdnUA2tutWQrbopSLLoLaWnAc7cBVlAJDTCaz\neeSBuro609LSku9k5BxP42tq4MUX7br//ORyhi+5wdacq6qSjvIqtAFPiQ2U52aH+dKMSWyz4eWu\ngWVGBJk82UZ6CwRsYREqnDwo+eHll19m//33RyTteHSlF4wxvPLKK3z1q1/ttl5EVhlj6no7Xmv+\ng4wnekcdZZ12fkwjezKrq8ZMR4c1lbiC6R3Ql/6uwSgoEs04D7Y67HncAs685Ugq6QAgIn4qFi2y\nefL5cObOhUCwW2hppfwYMmQIra2t1NTUaAHQT4wxtLa2MmTIkH6fQ8U/D3jC+WMauda13nd7BbIw\n2A/WyNhkZpw1axzG+5/grEgTfj9897uw2/2N1j01GsVccCHvycP8hWlcVe30SFuhtW6UgWH48OFs\n2rQJnbMjO4YMGcLw4cP7fbyK/yATDoN/ZZjHzRWM48mu9V01/8suy0r5BqtjNbGPAWDqVGgzDs9V\nOsyZA7vVhuHB2+JiVkQ5ySzlWB5i/NYQoZCTckpI9e0vXSorK9lnn33ynYyyRwd55ZDeQhMvvTxM\n+zeP5LKlh3UJv1fjl2HDbAS0mTOzSkOmg6hyQfy4r/jwzdGoO1m848DcuTaQP7HQ0VW0M9/8mK9u\njt2oZIWWoigDh9b8c0RvNdell4c5ftY4qugEupt5BGyp4bpFZkM+vH4gjSeTl6eLLsK4cSwE+Bpr\nOeD6w2HfeRAM9ji+psbeEjUBKcrAoOKfI1KZW7zwN8PmhziJzpgnjPstIvDzn+dE+D3yMTI2baET\nDEJtLe2nfJ+q9zbFCj4TtWEramtxHKfr+Joaa0JSE5CiDBxq9ukniSaeZOYWrzUQuaWRH5g7iGJF\nv0v4x42Dp5/O2tRTKPSISZSwsfqqK4HYPeiaXaypCWbMwCFMQ4M1GakJSFEGFq3594NUJp7ly2NB\nLsGK1o+2NnILsYh9EeD/djuAYb+5NKe1/aLAnXOA2bPh1VftuspKG9M6ErEl53nnceKYeq6qcnRA\nmKIMIFrz7wfpOicXL4Zbb7WFQ00NTDILgFhnpx8YdsnZEAyW59y1wSCsXQsrVsDVV8PEifZGejfz\nlluovXgca37SqBE9FWUA0Zp/P0jVuZlYKAx5McxY34sQjZl6Ir5KKgIBdW30OibCYVtielOZAXR2\nsu9/T6HhidoyuymKMnhozb8fpAov7hUKPp/97P9uCD/Rrg7OtXIAL897IuWkJ+VEV6sH92ZOnhyL\nQw32xkyfXmbNIkUZRDIJ/ZmPTyGFdE6IsJyW+fONqagwxucz5siqZ0xn9VAT8fnNFt9Qc++02AnK\nOcRxyrzPn28i/koTwWeiYIyIMZWVPcJDK4qSGjIM6aw1/17wzDNXXmm/e6uItrbCIdEw06Iz6OyE\n3x6/nOm+qzia5fzwJqfr+HKeqSpVqydcG+Rb/if4G0fb9pIxNi7QxRdrC0BRcoyKfy/01Tzz/c2N\nPBYdx1X8kmXR8QBcaxp4Our0OD6ta2QJk2oUcigEKyIO05lOhIqufhKi0fKziynKAKPi3wt9CpcQ\nDrPvf0+hkk4qiDJU2jhjt9CghVsoFnrrM3ne7zC1Yg7GX2E7T6qr9cYpSo7ReP4ZkHG0yVNPhaVL\nY8sVFfDkkz3i2CeeT6NZxuh2L4ib/CCTSX8VRck4nr+Kf65obLQeKx4+H8yb12MgV6KL5+zZGsog\nLUl8YsM4XYPpBny6SkUpMnQyl8Gmubn7cl1d0hG8iX0Izc06t21aEm7YG00hjlzg0GHni2HRIjuV\nsN4zRekbavPPFRMmdF+eNCnpbol9CBMmDF4I5qIkECBSUUVE/EQqqvjTu4Eu4YfyHCOhKLlAa/65\nwqvlNzdbRU8RtydZ9MvaWrX5pyKMQ4NZzmGEeNoE2InuN8jn0wJTUfqDin82JPbUBoMZBWtLDLmc\njxDMxYLn/vmEcfBH4PzdYqE1/H64+Wa9d4rSH1T8+0tjox18FI1aV0TtqR0QEuMo1dfDZV9oRO5p\nxpw2gX3LLTKqouQIFf/+EA7bSUg67axctLVpT+0A0cNMtqYRZrleVbOWwb6UX2hsRckBOenwFZHj\nRORVEVkvIlck2X6uiLwvIqvdz49zcd28EQpZ7xMP1/BcliGaB4FuI6ETvaoWLMhLmhSl2Mm65i8i\nfmAucAywCXheRO43xqxN2PVPxpgp2V6vIAgErKmnrc0anufMIYxT3iGaB4sJE2DZstjyiy/a0lZv\ntqL0iVzU/L8BrDfGvG6MaQfuBE7OwXkLim61etcW8UbwapomPUG4Nlj2IZoHjWAQTjkltqxxfxSl\nX+RC/PcE3oxb3uSuS2SCiPxDRO4Wkb1ycN1BIxyGhkCYnX5xIf847EKWXh4mjMNXFzdw3q1O16xd\n6q8/SEybBkOH2ptdUQEbN6qtTVH6yGB1+D4ALDHGtInIZGAx8K3EnUQkCAQBRowYMUhJ6511TWGW\ntR9JJR1goG3WQn7/rxDt7U5XTb+1taf/vjJAxE+YvHChnTdz8eKUtjaNnaQoPcmF+L8FxNfkh7vr\nujDGtMYt3gbMSnYiY0wj0Ag2tk8O0pYTvrN2FpV0dM3IVUkH+70doiphknH11x9EHCfW8R6J2Gkg\nm5p6/AFlP12moqQgF2af54FRIrKPiFQBZwL3x+8gIrvHLZ4EvJyD6w4O4TA1K2LZMUAUH/tOCpTt\nZCwFQyAAFTbuvzGGyIKFPcw/2hejKMnJuuZvjOkUkSnAI4AfWGiMeUlE/gs7ndj9wCUichLQCXwI\nnJvtdQeKHiaCpiZM1M7Da4VfeOasmzkyaNVeRT+POA7vHD+RXZfOx48h2hFhU1OIveP+lMRBYtoX\noyiWnNj8jTEPAQ8lrPtV3O8GoCEX18oVyezASU0ECcfdz8m88rUgRw5yepXk/G23er7HYippp4Mq\nniBAfdz2ZLGUFEUp06ieqeblTWoiqK8nWllFBKGNKm6smqa1xwJiVL3DCVXLmS5XcULVckbVW3XX\nAXeKkp6yDO+QTOQdJ4WJwHHwPxHijaYQTxBgRr2jtccCwnFgRsghFHKYEYjNjOa14Px+Ow98ZydU\nVmoUDkXxKEvxT2UHTmkicBz2dpxu5gSlcEj0soov3OOjcLS3J3UIUpSypCzFP50d2LEz7gIBelr8\nlUIjWd9NfOEO3QsARVEsZSf+8WIRCMRc/xzHboweeRTS0Y6prML3hM4PWMik8uF3HHhudpjW5hCb\nRwc480anW0hoRVHKTPwTbcEi1hbsTaT+lRubGNfRZgdzdbTx7qwmdrtXxb9QSdV3QzhM7VT3j36q\niud/v5wHWx319lGUOEpa/BNNAvFiEY3afYyxwTn/56Iw10de6Hb822/DbknOoxQGKX34E0qF2tYQ\ntQ36xylKPCUr/slMAvFiEe8F4hDmkch4qmgDIAJ0UEXlpHoND1DApOy7STOySwtyRbGUrPgnMwk0\nNMTEoqYGfvITu++RhKiinQqidOLj1eFHE71yOrVBhxkzUpgWlIIgaTylJKVCOGw9fRYtsgV+RQVM\nnGj7APT/VMqRkh3k5VX+EkMse7NCtbZaQTcGniBA1F9FRPxIdTVfu8sKf7rzKAVO3PRfm86+nOHf\n3IszbjmSMW1hIhFr6ps/v/sgP0UpJ8SYggme2Y26ujrT0tKS1TnSNfHDYVhxxOWcHLmH+/yncdzN\np1DbmnxnNRUUMZdfjpkVCyLbgZ8jeYpnXTdevx/OPx9GjND/VykNRGSVMaau1/1KWfzTkiAKMm0a\nzJw5cNdT8sOoUZj167vCcUe7+QmrAAAdUklEQVSBPxxwLT9e30Ak0tPrS/t0lGInU/EvWbNPr/zx\njwBdosA99+QtKcoActppXRFZDYDPz49uCxAK2XDc551nhV9DPivlRsl2+MbTw2wTDhN9+50uUQDY\ndMhpDM9bCpUBw23NyR//CF/6EnLddeA4OMTiAC1erCGflfKj5M0+ia6az80OU9s8nejfHsVnokSB\npxjHM9c+QUNBBZ1WBoQkHTjap6OUEpmafUq+5h/v8jm2Lcz+U8ZDpA0x1q2znWp+XXUdMwL5Tqky\n4KQYtJFu+k0tGJRSpeTFPxCwnXrRKAQkREWkHaJRxOfjk7qjeWDsdA3TXC6kjAdhSRR6HeCnlDIl\nL/5gvTkAnvIFiPqq8Hfat3mn2dOp17e5fIgf+VtRARs3QjhMGIemJli40JYLntD3UlYoSlFT8t4+\noRDUdYS53MwgEoE/TNRZ18sWb+Tv+efb0X233krkqPE0BMLMn99T6HWAn1LKlGTNP775/s2XGpkW\nvRAfUTqjFbwy5kkIas9u2eJF+PNmeom2cxghnjC2IiASE3qd/1cpZUpO/OPttIf5wizvuAAfBgEq\n6KTm+isg+ES+k6nkkzjzj4iPkyNL+UBqWFwV7BHvJ11nsKIUMyVn9om3057Z0dQl/B4VG1/LV9KU\nQsGr0n/3u/g6O/i6WcktZjIbjzybefNU7JXyoOTE36vUHeYLM5FF3Ud3Am8Fzspf4pTCwXHg888B\nO8pbgF2X/QEaG/OaLEUZLEpO/L1K3dVHh6j2dXbV+j/1fYHV357GmEc0fo/iMmFCz3XNzYOfDkXJ\nAyUn/gDOmkYCm5cifh/4/cjQoWy/4q8q/Ep3gkE4K6EluM02GuNZKQtKT/wbG2HyZFi5Ejo64Lvf\nVbdOJTV33GED+3/jG9b3/4EHNMi/UhaUnvg3N3fZ9w1Yu64Kv5KOYBBOOcX6/mt4T6VMKDnxf220\nteOahGVFSYuO6FLKjJLz879rxyAbBE41zdwrExi5YxAd0qWkIjYg0MGJn+DZq/lrq1EpUUpO/AMB\nGD8kyIL2oI3REsh3ipRCpWfgNgcngEZzU/LKYEWSLTnx1yH5SqYkDdxG3MqtW6GpSR8iZdAYzEiy\nJWfzB3uzGhr0nVXSk9TMHwhYrx+wHcALF6rnjzJoJKuQDBQlKf6KkgleK7FbkFfHgYkTMW4ccNMZ\ngVCIxkY49tjYAOBwGGbM0HJByS2D6XeQE7OPiBwH3Aj4gduMMdclbK8GmoCDgVbgDGPMhlxcW1Gy\nIVngtqVfqOfbZjGVtNMRrWLRSwGm/MFuW7YMXnsNbrpJuwWU3JBo4x8ss3XW4i8ifmAucAywCXhe\nRO43xqyN220S8JEx5ssiciYwEzgj22srSrYkm73r+//tcDDLCRCilRr2ezjEocCz2Dfxnnt6n+RF\np39UMiGVjX8wnplc1Py/Aaw3xrwOICJ3AicD8eJ/MjDd/X03MEdExBTq7PFKWZDsxfNC/XtCv5zx\nVH/UzkVUMZ7lPIvDaad1r/knNs11+kclU5LZ+LdbE6a1OUTNhAC1wYF7cHIh/nsCb8YtbwIOSbWP\nMaZTRD4GaoAP4ncSkSAQBBgxYkQOkqYoqUn24gUCUF0NbW0wnhBDTDs+E2GIr53zvxRi4s+drgHB\nqWr2Ov2jkimejb+tDXw+GBVq5KvLLsJHlPZlVazh8QErAAqqw9cY02iMqTPG1O2yyy75To5S4iTr\nXOuKCns1nDEvgG+I3cHnE84btpQgtsc3nUeZDhZWMsVxYPZsK/xf7wxz8rKL8BPBh6GaNjoWNA3Y\ntXNR838L2Ctuebi7Ltk+m0SkAtgB2/GrKHkjVedazObqQO1ymDULli61wQJXrrQ9vjNTR4jVsSZK\nX2httV7F40wIH5Fuk0/tvsfAXTcX4v88MEpE9sGK/JnADxP2uR84BwgD3wMeU3u/Ugj02rkWN+lL\nFzfcYO0+aQ7U6R+VTAkE4HB/mL2jG+k0lfjoACDqr2D3afUDdt2sxd+14U8BHsG6ei40xrwkIv8F\ntBhj7gcWAP8jIuuBD7EFhKIUBxMmWB9PD2PUkK/kDGdNI491XoSYCKaiEjnxFNhtN/zxk0kPADnx\n8zfGPAQ8lLDuV3G/twKn5+JaijLoBIPW1HPDDVb4Kyth40br1qMFgJIN4TBceCG+aNQud3bwwtu7\n0TZt3oA/WgXV4asoBcvMmbBihZ0oSARuvVUnfVGyp6kJPOF3WblycB4tFX9FyRTHgREjMB2dEIlg\n2tp5oymkYR6UnGCACD4WUz8o8wmp+CtKH1hTE2BLtIpOfHREfVx3aw1XXqmNAKWf1NcTrawmitCJ\nnwuZx0qfMyguwir+ipIh4TBc1uwwldlE8eEjwm8jU/l6JKwzPyr9w3G4Y9LjXCnXMI6nWOgLcvTR\ngzMqvOTi+SvKQOCFbGhrg2m04sNQQRRDO9+SEH+vcnQwl5IZCYGfRtU7XLDYob0dqqtg+vTiie2j\nKCWPF7IhGoUnJUCnVOGnHb+/ggljNnLmpDC16vmj9EY4bEW/o8N6jYVCOI6Tl0GBavZRlAyID9nw\n4hCHdfOWI8Hz8Ylh7KpbqZ2qRn8lA2bNsrUIY+x3kw3fkI8JqFT8FSUDEid+qQ1azx8ikcGZdkkp\nfsJheOCBfKeiCzX7KEqG9AjZ4DUHUsV2VpR4QiFb4/fw+6F+4MI39IaKv6L0F43gpvQFN164aWsj\nip8N/zGHffP4zEihxlerq6szLS0t+U6GoihKzljTGObPF4d4LBrghWpnQFw6RWSVMaaut/205q8o\nijJIPNjqcE3UIRoFX1usmygfjUcVf0UZIHQeXyWRmppYKJ9oFDZvzt+Unyr+ipJrwmHeaArRsDDA\nioij8/gqXbS22lm7olH7vXp1/qb8VFdPRckl7lDgveZfyUPt4zX0g9INb45ov99+T5iQvyk/teav\nKLnEHQrsMxGq2cJspnK5fzY1NQ4zZqgJqNxJ5iBWW5sf86B6+yhKLgmH4aijoK0N782KVFRztO9x\nVkQcKipg4kTr3q2FQGlRKH08mXr7qNlHUXKJ41h1B8T9+DrbOawjRCRiA8PNn68hoEsNL/DfX34Z\n5uFxM1jTWPh/roq/ouSa+nprwPWorOLpygAidtEL66L9AKVDUxOcvaWRx6JH8qvOX7L/lMIv3VX8\nFSXXOI5V9gsugAsuwPfE48wIOUyenL/OPWXgCIdh7YIwc7iYSjqoIEpFpK3gS3ft8FWUgSAxEFDY\nxoG76Sbr7ldTE9MGtf0XN6EQ/KCjCT8RBDsdo/j9SUv3QukXABV/RRlwwmH4n3GNnNzZzH0VExg9\nN8gll8QG9jz+eP6FQOk/J9aEGcUifBgMYHx+ZM6cHn+q1y+QjwFdyVCzj6LkmHCYbpO6fzSrkbmd\nk/k2y5jbOZkPrm2krc3a/tvaukK6K0VKbWuIal8nAiCCL3g+BIM99vMmBCqUCOBa81eUHJK0dvd2\nM0CXSWDcB81AT3FQipRAAKm2ob2lqiplmOZCiwCu4q8oOSRZ7c6ZNAGzclmX33/TZxMAELEz+eUx\npLuSDfEG/AxCexdaBHAVf0XJIUlrd04QAV6/vplX18NlXM+XeI3Hj5k5aJN1KzkmWROvoaHXw3pM\nCJRHVPwVJYekrN0Fg1Q9+RrHrZ8FwOXM4uxdYLgzM19JVfpIN0+dpE28AlH1DFHxV5Qck6p2N/y5\ne6wbINb2P/yem6Bx36Sdgx6F5BpYziRW9J+bHaC2kAz4/UDFX1EGi9NOg1mzumz/bNkCkycDEK4N\n9hD5QnMNLGdCIRjbFuasaBOyBV58sZ7aQjLg9wMVf0UZJMKnzGTFb+HCyE1sy5au9R8taGb8mmAP\nkQ+FrCtoNGq/i9CyUDKcWBPmp9GjqKYNgOiChVAfysjOX6ion7+iDBKhEDQwk58yG6CrBRDeY0JS\n/+/EWZ9qagY7xYpH7cOzqKatK1ifv7Mj/476WaI1f0UZJDxPoEXtQSoFrhnbzE6TJrBTbZCqR3qa\njxNnfWptzWfqy5jGRli6FIlfV1lZlHb+eFT8FWWQ6O4JFGQnx3b0OiT3EPJmfSriPsXSYPbs7st7\n7gl//jNhHEJFPEFPVuIvIsOAPwEjgQ3A940xHyXZLwKscRc3GmNOyua6ilKspPIESra+0AYFlSWN\njfDyy93X/epXhHGSdsYXk3dWtjX/K4DlxpjrROQKd/nyJPttMcaMzvJailJ2FNKgoLKkubnrpwE+\nH3kA2waDhGYkj9NTTN5Z2Xb4ngwsdn8vBk7J8nyKUp6Ew7xx4QxmnRrmwgsLfh6Q0seLzjd6tI3U\n6a7++aZLCYdj/TfxczMUWuC23si25v9FY8w77u93gS+m2G+IiLQAncB1xpilyXYSkSBuxKsRI0Zk\nmTRFKRLCYSJHjWfPtnamUMV4lrNwoaOunfkiYYDFI6OnIatXczcTWGSC7BWyHp7JTHLFNO6rV/EX\nkUeB3ZJs+s/4BWOMEZFUs8HvbYx5S0S+BDwmImuMMa8l7mSMaQQawU7g3mvqFaUUCIWQ9nZ3MpCt\n1NPEcx0q/nkjrgpv2tp5es2OXM0jAFTFzdGSaJIrtj6aXsXfGHN0qm0i8m8R2d0Y846I7A68l+Ic\nb7nfr4tICBgD9BB/RSlLAgFMhR/TEcGH4cfcyj98YwgENOxzXoiLztfpq+LxSACwUVjPOy+9qBdT\nH022Nv/7gXPc3+cA9yXuICI7iUi1+3tn4DBgbZbXVZTSwXHwTzoPEASoIMJcLsYhO8N/4qQySga4\n7jqv/WQ2ofFX8ZefLueFagefDyoq4AtfKKF7aozp9weoAZYD64BHgWHu+jrgNvf3N7Funn93vydl\ncu6DDz7YKErZ8MwzxlRWGmMn+DLG5zPm2muzOt3Qocb4/fb7mWdymNZSxb1pUZ/ffMZQc5jvGTN0\nqDHTphlTUWGMSOyvKeR7CrSYDDQ2q5q/MabVGDPeGDPKGHO0MeZDd32LMebH7u9njDG1xpiD3O8F\n2VxTUUoSx4E5c2z10uezo7uy6DEsNs+TgsC9aRKNUEk7R0RDtLfD6tVeiWx3i0ZL457qCF9FKRSC\nQaitzUmPYaFNGVgUuDfNtLXTEa3iKV+AqiqYMAGeeioWZM/nK417KsYUplNNXV2daWlpyXcyFCW/\nZDFktJhGmxYM7k1bUxPgwVanS+Cbmuz3mDE2xlIh31MRWWWMqet1PxV/RSlQvNFEHR02kJj6fg46\nxTinQqbiryGdFaVQaWqyqmOM/faqn0puyMAdqpT7TtTmryhFiJp0etKne5KiSp94jlLuO1HxV5RC\npb4eFi2KKU99PVCcpoiBJv6e+P12MFZ9fZr70tQEW7fGWlWhUMpIncU0arcvqNlHUQoVx4HHH4dr\nrrHfrvKUsimivyTek/nzbWGQ1KITDsPChTHfzYoKCARS3lfHsbF8Skn4QWv+ilLYJIkXUMqmiP7i\n3ROvMh9Xoe8p2qGQVXiwMRsmTgTHIUB53VcVf0UpMkrZFNFfvHvS1GQtZZ2daQQ8sfR0zWnldl/V\n1VNRihHt8U1JRremhO+f+vkrSqmiPb5KGtTPX1FKlUHu8S2J6KAlkYncojZ/RSk20vT45tqaURKN\njJLIRO5R8VeUYiNFz+RAaFyyRkbR6WZJZCL3qPgrSjGSxAW0LxqXaQuhJNxKSyITuUfFX1FKhEw1\nri8thHy6P+bMhFVuPpwZouKvKCWC48Bzs8O0NoeomRCgNoXIJYlsUHDz0mZrwlrTGHcfgk5xTa47\nSKj4K0qpEA5TO3W8nXXkMR8w104Q030XFi2KRTbw+wvTChJvwmprg+nT7ScT/V7TGGbU5AAH0EHH\nskrWELIFgNINdfVUlFIhFIpNN9XZCVOm9HBtDIXsJrCRDc47rzArxJ4Jy+ez2Xn00TSxehKomj2L\natrxY6imnY4FGgo7GSr+ilIqBAJWLT0ikR5jAGpqrOj7fDBkSFdkgwGjv+71npn+6KNjBUBGQxrC\nYUa9+kC3VXvs0bdrlwtq9lGUUsFxYO5cohdNgWgEIz78S5daxQ8GCYdh6lQrpH4/zJ49MLV+r6O2\npsZer792e8expp6nnsrQUaexEa6/Hl/UBm0zAD4/u00b4BKuSFHxV5QSIlwbpMFfy6WRWZwSWYpZ\nuRJZuRKAUGuQ9nYr/iJ2LtqcXz+uo1bEXiu+1p6p+Md7+mTkqNPYCJMnx5ZFEL8f5s4tTLtWAaDi\nryglRCgEKyIOV/A5AOJtaG4mMD044O7u8R21Pp9tYYhkdr10LYaGhl4u3NzcbXHLnvvy5xObGFXr\noNKfHBV/RSkhvI7Se7dO4FizDINbAEyYMCDu7r1Nezh7tm1h9Ha9rFsMEybAsmWANff8x7s/p/FW\nh6rFGs0hFSr+ilJCxAQ+yOubYd/VzVYYXZfPXLq7J/PFBzjnHPuddhrFBLJpMQAxl9bmZh7eZgKN\nDwR7HelcwlGdM0LFX1FKjJjAB91ParIRwMRwEk1NsHhxjzlSMqK/LYZuBIMQDLJTGKoeSW/e0lhv\nKv6KUrZkI4DhMGzcaKe/BXs8pI8tlK6gyaVJKpNzaaw3FX9FKS/iFDgUcvolgPGFht8P558fq+XH\n1/zja9y9FTQZt0Ay3LE381ZNjTUvGVO+sd5U/BWlROmhk54Ct7WB38/3fzqHq6qCffb+ia81A4wY\nERPaVDXudDXtlAVDvPtPa2v2Awfi7svUqbH+hYEa71DoqPgrSgmSVFDjwz9Eo+x7w4WsuQzu2jHY\nJdaZVKzTRQ9NVeNOd4xXMEyMNPK9Lc1se9EubHl3BUPe3QgY663k9QJHIv0bOBCHd72BHO9QDKj4\nK0oJkrSmHQhYAY1G7U5uAdAwD3DsCOCjjooJ9OOPJ9fW/tjnkx4TDkNTExc+u5ZzIuvYnXfszqtj\nxwnWdVO8NPt8Kd2AymqOghyg4q8oJUhSgXMcmDMHLrywWwHAlClQW0tTk0Nbm13d1ma9d9LF+U/c\n1pv4Og44uDutqYFLLoG2NnYEdnD36RJ7d9l4B/t8UF2d0g2oWOYoKCRU/BWlBEkpcJ4/fHwB0NEB\nU6eyzx6z+TFrmEAzzUygNzfReFKKb2Mj3Hij7VkdOxb+9Cd7XZ8v1mlAT7E3ced+/9tnsWvga2mV\nuq/eOxreP0vxF5HTgenAV4FvGGNaUux3HHAj4AduM8Zcl811FUXpnZQC5xUAF10UE+CVK7nMdwSC\nXT6WZbz+BYBg9yo9dO+E3bwZHnyQL79vmL9lLKNYR/WWNoaf3A47dsK6dbHrvvxy7Lcx3QoAT+yj\nwKPybb7+xTeJRIT/m3gp+87svRDqzZRT7gO6kpFtzf+fwGnA/FQ7iIgfmAscA2wCnheR+40xa7O8\ntqIo/SUYhBdfhFtu6Vrli0a6TC4Gd3RwuDZWpa+osKLd2RlrNbj77gycTZy4vw/mfftT6I4BjM+P\n7+a5Ng1r1yIffMAHO+/HQwdMY1S9wzBXoHdJODaViKcz5eiAruRkJf7GmJcBRBL/3m58A1hvjHnd\n3fdO4GRAxV9R8kl9PSxcaFURwO9H3Jq4Fw+omz3FE3wTM8ok2ueTKUG8CSeKEKGCqTKHH9UGcRIa\nFvUZxv9JJuKpWjo6oCs5g2Hz3xN4M255E3BIsh1FpGs8+ogRIwY+ZYpSzjiOVcImd6ar+npYs8ZG\nyPTiAYXDMXtKmpp/st8eUeAN9ubPcgYfsyOPmwDPG4e9QnZ7prXy/oq4evckp1fxF5FHgd2SbPpP\nY8x9uUyMMaYRaASoq6tL9hwpipJLEqvLjtN93t9Eewp0s/mvDm2matmDgGG1byzf/co6fB1tbFzf\nzvvswityAItNPWEcBFt+RKMxEU4m6N4lEs03/RVx9e5JTq/ib4w5OstrvAXsFbc83F2nKEoxkKyA\nwDXDXAVtvpn4fHbelC+45cbHYbizCdauhfCTdp0x8NOfwo47dhfheEGvqUndEshGxNW7pyeDYfZ5\nHhglIvtgRf9M4IeDcF1FUQaQxJGyL75o5+v1auSLF8PWrbH9fT4r/N7ELJ6tP951P34QcltbT9OO\ninjuyNbV81TgJmyn/F9EZLUx5lgR2QPr0nmCMaZTRKYAj2BdPRcaY17KOuWKogw68Z2z8WYYvx8W\nLbLdAVVVNqZ/e3usb1jEjtHyCoZUnbdr1nQff1ZTM/h5LBey9fa5F7g3yfq3gRPilh8CHsrmWoqi\n5Ja++r4nE2zPDLNxI9x6a8x2D90LhvPO6z65S6rO29ZW20LwxoGVa9ydwUBH+CpKGdIf3/dkgt3Q\nEAsIlziRS3196sIlVedtIGBbCOqZM/Co+CtKGdIft8neonkm64zta3wd9cwZPMSYwvSorKurMy0t\nSaNFKIqSJf0d9aphEgofEVlljKnrbT+t+StKGdLfGvZAeNtogZIfVPwVpUzxhDwcjrloDrb4Njba\niNKRiLX1a9ydwUPFX1HKmIEIepZpTT4chosvtu6hkNyvXxk4VPwVpYzJddCzvhQmoVC3EEH4/erd\nM5j48p0ARVHyh+fB4/fnxrUyVayeVNeurrb+/BUVdpIxrfUPHlrzV5QyJteulYnuoDU1qfsT1K0z\nv6irp6IoOcWz+dfUwNSpOonKYJOpq6eafRRFySmOY0f+trZmbgJSBh8Vf0VRssJzFQ2Hu6/PdX+C\nklvU5q8oSr9J592jNv3CRsVfUZR+05urqMbfL1zU7KMoSr9R007xojV/RVH6jZp2ihcVf0VRskJN\nO8WJmn0URVHKEBV/RVGUMkTFX1EUpQxR8VcURSlDVPwVRVHKEBV/RVGUMqRgo3qKyPvAGxnuvjPw\nwQAmZ7AohXxoHgqDUsgDlEY+BjsPextjdultp4IV/74gIi2ZhDAtdEohH5qHwqAU8gClkY9CzYOa\nfRRFUcoQFX9FUZQypFTEvzHfCcgRpZAPzUNhUAp5gNLIR0HmoSRs/oqiKErfKJWav6IoitIHVPwV\nRVHKkKIXfxE5TkReFZH1InJFvtPTV0RkoYi8JyL/zHda+ouI7CUij4vIWhF5SUQuzXea+oOIDBGR\nlSLydzcfv8l3mvqLiPhF5EUReTDfaekPIrJBRNaIyGoRacl3evqLiOwoIneLyCsi8rKIFEzw66K2\n+YuIH/gXcAywCXge+IExZm1eE9YHRGQc8CnQZIw5MN/p6Q8isjuwuzHmBRHZHlgFnFJM/wOAiAiw\nrTHmUxGpBFYAlxpjns1z0vqMiPwMqAO+YIw5Md/p6SsisgGoM8YU9QAvEVkMPGWMuU1EqoBtjDGb\n850uKP6a/zeA9caY140x7cCdwMl5TlOfMMY8CXyY73RkgzHmHWPMC+7vT4CXgT3zm6q+YyyfuouV\n7qfoakciMhz4DnBbvtNSzojIDsA4YAGAMaa9UIQfil/89wTejFveRBGKTikhIiOBMcBz+U1J/3DN\nJauB94C/GWOKMR+zgWlANN8JyQIDLBORVSISzHdi+sk+wPvAItcEd5uIbJvvRHkUu/grBYSIbAc0\nA1ONMf+X7/T0B2NMxBgzGhgOfENEisoUJyInAu8ZY1blOy1ZcrgxZixwPHCxax4tNiqAscA8Y8wY\n4DOgYPoli1383wL2ilse7q5TBhnXRt4M/MEYc0++05MtbvP8ceC4fKeljxwGnOTazO8EviUid+Q3\nSX3HGPOW+/0ecC/WxFtsbAI2xbUe78YWBgVBsYv/88AoEdnH7Uw5E7g/z2kqO9yO0gXAy8aY3+U7\nPf1FRHYRkR3d30OxjgSv5DdVfcMY02CMGW6MGYl9Hx4zxpyd52T1CRHZ1nUcwDWTfBsoOm84Y8y7\nwJsi8hV31XigYJwgKvKdgGwwxnSKyBTgEcAPLDTGvJTnZPUJEVkCBICdRWQT8GtjzIL8pqrPHAb8\nCFjj2ssBfmGMeSiPaeoPuwOLXS8yH3CXMaYoXSWLnC8C99o6BRXAH40xf81vkvrNT4A/uJXT14GJ\neU5PF0Xt6qkoiqL0j2I3+yiKoij9QMVfURSlDFHxVxRFKUNU/BVFUcoQFX9FUZQyRMVfURSlDFHx\nVxRFKUP+P5OxXtvr2werAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3h7IcvuOOS4J", + "colab_type": "text" + }, + "source": [ + "Much better! The evaluation metrics we printed show that the model has a low loss and MAE on the test data, and the predictions line up visually with our data fairly well.\n", + "\n", + "The model isn't perfect; its predictions don't form a smooth sine curve. For instance, the line is almost straight when `x` is between 4.2 and 5.2. If we wanted to go further, we could try further increasing the capacity of the model, perhaps using some techniques to defend from overfitting.\n", + "\n", + "However, an important part of machine learning is knowing when to quit, and this model is good enough for our use case - which is to make some LEDs blink in a pleasing pattern.\n", + "\n", + "## Convert to TensorFlow Lite\n", + "We now have an acceptably accurate model in-memory. However, to use this with TensorFlow Lite for Microcontrollers, we'll need to convert it into the correct format and download it as a file. To do this, we'll use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert). The converter outputs a file in a special, space-efficient format for use on memory-constrained devices.\n", + "\n", + "Since this model is going to be deployed on a microcontroller, we want it to be as tiny as possible! One technique for reducing the size of models is called [quantization](https://www.tensorflow.org/lite/performance/post_training_quantization). It reduces the precision of the model's weights, which saves memory, often without much impact on accuracy. Quantized models also run faster, since the calculations required are simpler.\n", + "\n", + "The TensorFlow Lite Converter can apply quantization while it converts the model. In the following cell, we'll convert the model twice: once with quantization, once without:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1muAoUm8lSXL", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Convert the model to the TensorFlow Lite format without quantization\n", + "converter = tf.lite.TFLiteConverter.from_keras_model(model_2)\n", + "tflite_model = converter.convert()\n", + "\n", + "# Save the model to disk\n", + "open(\"sine_model.tflite\", \"wb\").write(tflite_model)\n", + "\n", + "# Convert the model to the TensorFlow Lite format with quantization\n", + "converter = tf.lite.TFLiteConverter.from_keras_model(model_2)\n", + "converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "tflite_model = converter.convert()\n", + "\n", + "# Save the model to disk\n", + "open(\"sine_model_quantized.tflite\", \"wb\").write(tflite_model)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L_vE-ZDkHVxe", + "colab_type": "text" + }, + "source": [ + "## Test the converted models\n", + "To prove these models are still accurate after conversion and quantization, we'll use both of them to make predictions and compare these against our test results:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-J7IKlXiYVPz", + "colab_type": "code", + "outputId": "0c10f56c-dbd7-4cc3-e332-30ad673769e5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 281 + } + }, + "source": [ + "# Instantiate an interpreter for each model\n", + "sine_model = tf.lite.Interpreter('sine_model.tflite')\n", + "sine_model_quantized = tf.lite.Interpreter('sine_model_quantized.tflite')\n", + "\n", + "# Allocate memory for each model\n", + "sine_model.allocate_tensors()\n", + "sine_model_quantized.allocate_tensors()\n", + "\n", + "# Get the input and output tensors so we can feed in values and get the results\n", + "sine_model_input = sine_model.tensor(sine_model.get_input_details()[0][\"index\"])\n", + "sine_model_output = sine_model.tensor(sine_model.get_output_details()[0][\"index\"])\n", + "sine_model_quantized_input = sine_model_quantized.tensor(sine_model_quantized.get_input_details()[0][\"index\"])\n", + "sine_model_quantized_output = sine_model_quantized.tensor(sine_model_quantized.get_output_details()[0][\"index\"])\n", + "\n", + "# Create arrays to store the results\n", + "sine_model_predictions = np.empty(x_test.size)\n", + "sine_model_quantized_predictions = np.empty(x_test.size)\n", + "\n", + "# Run each model's interpreter for each value and store the results in arrays\n", + "for i in range(x_test.size):\n", + " sine_model_input().fill(x_test[i])\n", + " sine_model.invoke()\n", + " sine_model_predictions[i] = sine_model_output()[0]\n", + "\n", + " sine_model_quantized_input().fill(x_test[i])\n", + " sine_model_quantized.invoke()\n", + " sine_model_quantized_predictions[i] = sine_model_quantized_output()[0]\n", + "\n", + "# See how they line up with the data\n", + "plt.clf()\n", + "plt.title('Comparison of various models against actual values')\n", + "plt.plot(x_test, y_test, 'bo', label='Actual')\n", + "plt.plot(x_test, predictions, 'ro', label='Original predictions')\n", + "plt.plot(x_test, sine_model_predictions, 'bx', label='Lite predictions')\n", + "plt.plot(x_test, sine_model_quantized_predictions, 'gx', label='Lite quantized predictions')\n", + "plt.legend()\n", + "plt.show()\n" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl4FFXWuN/bnbCELbIMCiHpqKzZ\nISBkYXGZDFECIhFkEWRcUFHHJCAOIo7K/DAkcRkc/XRGXAi7DIQx8+GHbAmRkTWYIMiSTtgUBAIB\nAln6/v6o7k4n6ex7ct/n6ae7q27dulV169Stc849R0gpUSgUCkXLQtfQDVAoFApF/aOEv0KhULRA\nlPBXKBSKFogS/gqFQtECUcJfoVAoWiBK+CsUCkULRAn/BkYIMUUI8W1Dt8OCEKKtEGKTEOKKEGJt\nPewvXQgxsq73Ux8IIQxCCCmEcKhE2RlCiOT6aFdlEEK4CiGuCSH0Dd2W+kAIMVIIcboO6m1U17U8\nmo3wF0JMFkLsNXfgc0KI/wghghq6XRUhpYyXUv6+odthwwSgO9BFShle1zuTUnpIKbfX9X4U5SOl\nzJJStpdSFtakHiHEdiHEk7XVLpt6K/1gVVSOZiH8hRARwHvAX9EElyvwd2BsQ7arIhppR3YDfpZS\nFtTlThrpsSsULQcpZZP+AJ2Aa0B4OWVaoz0czpo/7wGtzetGAqeBucB54BwwDggFfgYuAX+2qesN\nYB2wGsgB9gM+NuvnASfM6w4DD9usmwHsAt4FLgJvm5clm9cL87rzwFXgR8DT5ji/BC4AmcBrgM6m\n3mQgBrgMZACjyzkf/YHtQDaQDoSZl/8FyAPyzef0jyW26wHkAp1tlvkBvwGOwF3AVvOx/QbEA842\nZY3AK8Ah4BbgYF52fyWuk/U82dQngbvNv0PN5zsHOANElXHsttcgGzgJBJiXnzKf++kl+ldZ511v\nPue/met53twmB5tt/4nWp86Yr7e+5PGUd93ttP8J4CfzcZ4Enimxfq55f2eBJ0ucoweBA+Z9nALe\nsNnOUKLt24G3zOcqB/gW6Gpe1wZYbr7O2cAetEHXIqAQuInWf5aWcQxrgV+AK8BOwMNmXVsg1nyu\nr6D167ZAlrl918yfYWj34vJyjqHMc4X5vi+jfR8BMSWWbQQiKnmPJ9trj815fdLm/0xzGy8DmwG3\nqvaJasvO2qysIT7AH4AC2xNsp8ybwG7gd0A3IAV4y6YTFACvowmwp9Bu9BVAB8ADTeC5m8u/gSYc\nJ5jLR6EJW0fz+nA0IakDJgLXgTtsOkYB8AKa4GtborOEAPsAZ/PF72+z7ZfmDtjB3Kl+xiyczXXk\nm9uuB55Fu/mFnXPhCBwH/gy0Au41d+K+Nse3vJxzuRV4yub/EuBj8++7gQfQhHg3tBv7PZuyRuAg\n0Atoa7Ps/kpcJ+t5sqnPVrCdA4LNv28DBpbRfss1eMJ8rt5GEywfmtv9e/P5aF+J8z4LOGI+ns7A\nNooLn38B/wO0Mx/TD5gFUGWvu532P4j2kBXACOCG5VjR7oVf0PqsE5qAtj1HIwEvtL7pDfwKjLMn\nqNCE1AmgD1o/3Q4sNq97Bthk3oceGAR0tNnuSXtttzmGmebzaXnYH7RZ96G5jp7mugPM5Yq1z15f\ntXMM5Z2rkZQt/IejPRyFTX/KBXpU8h6vlPBH00wcN19vB7SBRUpV+0S1ZWddCub6+ABTgF8qKHMC\nCLX5HwIYbTpBLkUjsg7mC3aPTfl9NjfJG8Bum3U6bASPnX0fBMbadIysEuttO8u9aMJlKObRpXm5\nHm1EPsBm2TPAdps6jtusczIfw+122hOMJiBs61+JeRRIxcL/SWCr+bcw3yTDyyg7Djhg898IzCxR\nxkiR8C/vOlnPk816W8GWZT4nHSvoCzOAYzb/vcz1dLdZdhHwrcR53wrMsln3e3NdDmgj4VuYH3Lm\n9Y8B2yp73SvZ/zcAL5l/fwb8P5t1d9ueIzvbvge8a/5toLTwf82m7HPA/5p/z0R7MHvbqXM7FQj/\nEuWdzfvthHYv5WLzJm1Trlj77PVVe2XKOVcjKVv4C3N/Gm7+/xTmPl9G+ZL3eGWF/3+webs2H/8N\nNNVrtftEZT/NQed/EehagQ65B9prpIVM8zJrHbLI0JVr/v7VZn0u0N7m/ynLDymlCU1t1ANACPG4\nEOKgECJbCJENeAJd7W1bEinlVmAp2ujnvBDiEyFER/P2jnaOoafN/19s6rlh/mnbZgs9gFPmdpdV\nV3l8DQwTQtyBNkIyAUkAQojuQohVQogzQoiraCPPriW2L/P4qfg6lccjaKqfTCHEDiHEsHLKlry2\nSCntXe+KznsPih+PbTk387bnbPrC/6C9ARSjnOteCiHEaCHEbiHEJXOdoRSd45LtOVVi23uEENuE\nEBeEEFfQ3lxKXh9bfrH5fYOi/vQVmopilRDirBAiWgjhWE49tm3QCyEWCyFOmPuI0byqq/nTBm0Q\nUGMqOFdlIjVJvArtYQ0wGU2Faam3onu8srgB79vUcwntwdOzKn2iujQH4f892ghrXDllzqKdaAuu\n5mXVpZflhxBCB7gAZ4UQbsCnwGw0bxlnIA3tglqQ5VUspfxASjkIGID2yj0HTaecb+cYzlSj7WeB\nXuZ2V7kuKeVlNP3vRLSbYpX5ZgHN4C4BLyllR2AqxY8dyj/+8q7TdbQ3GgCEELeXaNceKeVYNOG6\nAVhTmeOpgIrO+zls+oJ5nYVTaP2yq5TS2fzpKKX0sLejMq57MYQQrdEevjFobyrOQCJF5/gcWl+0\n0Kt4DawAEoBeUspOwMeUvj4VIqXMl1L+RUo5AE0t8xDwuGV1BZtPRlN33I822jeYlwu0830TTVVT\nard2lhXrE4C1T1TiXFXESmCC+Z6+x1wXlbzHbdtHWW1E6yPP2PQPZyllWyllClSuT9SEJi/8pZRX\n0PT1HwohxgkhnIQQjuanfrS52ErgNSFENyFEV3P55TXY7SAhxHjz28af0G7y3Wi6XYlmM0AI8QTa\nqKBSCCEGm0dnjmgd5yZgMr+VrAEWCSE6mDtgRDWP4b9oo7i55vM0EhiDNtKpLCvQbvYJ5t8WOqAZ\n464IIXpS9c5a3nVKBTyEEL5CiDZor/wACCFamedLdJJS5qMZyEzUkEqc9zXAi0IIFyHEbWiGQMu2\n59AekrFCiI5CCJ0Q4i4hxIiS+ynruttpUis0/fcFoEAIMRpN1WRhDfCEEKK/EMIJWFBi+w7AJSnl\nTSHEEDRBXGWEEKOEEF7mOQFX0R6Qlvb+CtxZzuYd0O6Xi2hC8a+WFea30c+AOCFED/NbwjCzIL9g\n3odt3QeB4eY5Cp2AV23WVXSuykVKeQDtYfQPYLOUMtu8qtL3uJTyAtpAYar5WGZS/MH2MfCqEMLD\nXFcnIUS4+Xdl+0S1afLCH0BKGYt2U76GdlFOoT2ZN5iLvA3sRfMy+RHNQ+ftGuxyI9rI9zIwDRhv\nHg0dRvNU+B7tJvBC85aoLB3RRhWX0VQIF9EMqqAZia+jeS0kowndz6racCllHpqwH43Wuf8OPC6l\nPFKFahKA3mi2llSb5X8BBqJ5aXwDrK9i88q8TlLKn9EMwluAY2jnwJZpgNGsSpiFZguqDco775+i\nqT9SzW0tebyPowmhw2jXdB1wh519lHfdrUgpc4AX0YT8ZTThnWCz/j/AB2iG5+NoAxLQhC1oevs3\nhRA5aA/W6r4d3W4+lqtonio70FRBAO+jjZgvCyE+sLPtl+ZjPIN2XnaXWB+Fdu33oKlB3kHTed9A\n8ybaZVaTDJVS/h+a190hNLvcvy2VVHSuKskKtDcU6wCnGvf4U2iDoItohvgUm7r+ZT6+VeZ+m4Z2\nX0Il+0RNsFizFZVECPEGmgFtakO3RaEoDyFEfzSB0lrW8bwNRdOjWYz8FQqFhhDiYSFEa7Ma6h1g\nkxL8Cnso4a9QNC+eQZsYdAJtwtWzDdscRWNFqX0UCoWiBaJG/gqFQtECabTBtbp27SoNBkNDN0Oh\nUCiaFPv27ftNStmtonKNVvgbDAb27t3b0M1QKBSKJoUQIrPiUkrto1AoFC0SJfwVCoWiBaKEv0Kh\nULRAGq3OX6FoTOTn53P69Glu3rzZ0E1RKABo06YNLi4uODpWKqBqKZTwVygqwenTp+nQoQMGgwEh\nqhwIU6GoVaSUXLx4kdOnT+Pu7l6tOpTap5kQHw8GA+h02nd8fEVbKKrCzZs36dKlixL8ikaBEIIu\nXbrU6E1UjfybAfHx8PTTcMOcwiUzU/sPMKW2YlsqlOBXNCpq2h/VyL8ZMH9+keC3cOOGtlyhUCjs\noYR/MyAry/7yzMziqqDnnlOqoabOhg0bEEJw5Ej56Rc+//xzzp6tfrK67du389BDD1V7e0XjRwn/\nZoCrq/3lQmgPACm1748+Kv7/6afVA6CuqCsbzMqVKwkKCmLlypXllqup8Fc0f5TwbwYsWgROTsWX\nCaEJ+fK4cQOmTgUHB628ehuoHSw2mNp+0F67do3k5GT++c9/smpVUdbNd955By8vL3x8fJg3bx7r\n1q1j7969TJkyBV9fX3JzczEYDPz2228A7N27l5EjRwLwww8/MGzYMPz8/AgICODo0aM1a6SiyaAM\nvs0Ai1F3/nxNBeTqqgmcylJYqH0rQ3HtUJ4NpibndePGjfzhD3+gT58+dOnShX379nH+/Hk2btzI\nf//7X5ycnLh06RKdO3dm6dKlxMTE4O/vX26d/fr1IykpCQcHB7Zs2cKf//xnvv766+o3UtFkUCP/\nJkJFaoQpU8BoBJNJexOoriOAMhTXnLJsMGUtrywrV65k0qRJAEyaNImVK1eyZcsWnnjiCZzMr36d\nO3euUp1XrlwhPDwcT09PXn75ZdLT02vWSEWTQQn/JkBV1Qjz51es8imPkkJKzSGoGmXZYMpaXhku\nXbrE1q1befLJJzEYDCxZsoQ1ayqff93BwQGTyQRQzDd8wYIFjBo1irS0NDZt2qRmMLcglPBvAlTV\nlbOmI0xbIVVX+uvmjD0bjJOTtry6rFu3jmnTppGZmYnRaOTUqVO4u7vTqVMnli1bxg1zB7l06RIA\nHTp0ICcnx7q9wWBg3759AMXUOleuXKFnz56AZiRWtByU8G8CVFWNUJMRZkkhVZUHj3pD0JgyBT75\nBNzcNPWbm5v2vyb6/pUrV/Lwww8XW/bII49w7tw5wsLC8Pf3x9fXl5iYGABmzJjBrFmzrAbfhQsX\n8tJLL+Hv749er7fWMXfuXF599VX8/PwoKFB53lsUUspG+Rk0aJBUaLi5SamNu4t/3Nzsl1++XEoh\n7G9T3sfNTdvWlrLqEUIr6+am/e7SRUpHx+JlnJxK19dUOXz4cEM3QaEohb1+CeyVlZCxtTLyF0J8\nJoQ4L4RIK2O9EEJ8IIQ4LoQ4JIQYWBv7bSlUVY0wZUrVdf5CaPWVHJ2W9RbRuXNxddDFi5CfX7yM\nMh4rFI2X2lL7fA78oZz1o4He5s/TwEe1tN8WQXXUCF26VG0fUtrX5Zf14IHS6iB7ZGUpdZBC0Rip\nFeEvpdwJXCqnyFjgS/NbyW7AWQhxR23su6Vg68ppNFZff6wr54rbG6mX9eC5VN7VtqHkG4IyGCsU\njYP6muTVEzhl8/+0edk520JCiKfR3gxwrYnVUsHFSf1ok9uat7cKIoypPBg4nH0uNzh/1484XunB\nXZk9udjhJl1y2tD7N8FWz4v0+OV3/EJ3QhcNJHH+XGtdU6aUftjMn1/xRDJHR7h8WXtg2VIbE54U\nCkXNaFQzfKWUnwCfAPj7+9fAU71l0e1PoYica3Q5D6CdNkeXAm72OkrU43qij/nzW7fjmG47C1JP\nftcMjjifBod8LhS24og+D/Jbc+x3RjA5cH9u+bNCQVMH2YaRtocQpQW/hZq6oyoUippRX66eZ4Be\nNv9dzMsUtUDAr9240CuZI94/cMR7P0d8/kt+9xNgEiAKOd9nryb4TQ5wqx2dTwwEh3xtvT5Pq0QH\nmByIXeVOxPKlFe7Tog6y8Roshl4PeXllb295sVP2AIWiYagv4Z8APG72+hkKXJFSnqtoo5aCrQDs\n2lX7VEUYblwbT9jmIHDMA/1NTbAD6MwvT0L7BCcHEvyDH5fu3o/uSg/QS+s6HG4R/F8/IoypxYbl\noYuiiXstppiEjnsthtBF0UyZAl98Yd8gbIkXZA9HR+3NQU0gqxqnT59m7Nix9O7dm7vuuouXXnqJ\nvDKesGfPnmXChAkV1hkaGkp2dna12vPGG29Y5xXUJbb7ef3119myZUuZZQ8ePEhiYqL1f0JCAosX\nL67zNjZFasvVcyXwPdBXCHFaCPFHIcQsIcQsc5FE4CRwHPgUeK429tscKCkAL17UPlUShoWFbNyd\nRMcsL9CbigS65SMFSEgK/J6kIQfofHwgpk5noVBbjgQKWpN0zwHiDD7F/Dvvz9URlbeYOOEMUhIn\nnInKW8z9uVrXKcsg7OZWdnM7dtS2K28CWZN/I6jlA5BSMn78eMaNG8exY8f4+eefuXbtGvPt+NIW\nFBTQo0cP1q1bV2G9iYmJODs716ht1aG6E8refPNN7r///jLXlxT+YWFhzJs3r1r7avZUZjJAQ3xa\nyiSvLl0qN/mqXPR6GTY0WLJQSBboJAsp/vmzk2z/hIf2+3W99j3fUft+rZW5TGvJn50k8zrK2PlL\niup2c5OxBh8p5nSRwaNGSDGni4w1+JTZKMvEr4qOqaJJaE5OjWvCWJUmeS1fXusHsGXLFhkcHFxs\n2ZUrV2Tnzp3l9evX5bJly+SYMWPkqFGj5PDhw2VGRob08PCQUkp5/fp1GR4eLvv37y/HjRsnhwwZ\nIvfs2SOllNLNzU1euHBBZmRkyH79+sknn3xSDhgwQD7wwAPyxo0bUkopP/nkE+nv7y+9vb3l+PHj\n5fXr16WUUi5cuFAuWbJElmT69OnymWeekYMGDZK9e/eWmzZtklLKUm2UUsro6Gjp7+8vvby85Ouv\nv26t4+2335a9e/eWgYGBctKkSdb9TJ8+Xa5du1ZKKeUPP/wghw0bJr29veXgwYNldna27NWrl+za\ntav08fGRq1atksuWLZPPP/+8lFLKjIwMOWrUKOnl5SXvvfdemZmZaa3zhRdekMOGDZPu7u7W+s+e\nPSuDg4Olj4+P9PDwkDt37qz29asrGnySl6J6xMdro/yKyHQpW/UCMDZ8CgkhyZDfCgrbQIGjtqEE\nCtqAhGs9jtM+ywMKHHD8zZ1+qffQ7Wd/+h0cQtjmYNqdv4t+aUPod2k0W9raWGmzsogwphK015Ok\nETsI2utZSjVkezyWt5iKKG8Sml7fxNNS1kFezfT0dAYNGlRsWceOHXF1deX48eMA7N+/n3Xr1rFj\nx45i5f7+979z2223cfjwYd566y1rjJ+SHDt2jOeff5709HScnZ2tMYDGjx/Pnj17SE1NpX///vzz\nn/+ssL1Go5EffviBb775hlmzZlkDxtm28dtvv+XYsWP88MMPHDx4kH379rFz50727dvHqlWrrKP4\nPXv2lKo/Ly+PiRMn8v7775OamsqWLVto164db775JhMnTuTgwYNMnDix2DYvvPAC06dP59ChQ0yZ\nMoUXX3zRuu7cuXMkJyfz73//2/qmsGLFCkJCQjh48CCpqan4+vpWeNxNiUbl7dPSqJQsCIzGl3Si\n8r4B4UKEzGTsHa4kyIXE5v4FgJTuF+h2KqiYt4/OBJk9LtHrl84U4sAt5+549CvuwlkpXF2JE84k\n+6cRvGMEyf5p9G8XBDoHnvIMJCI9BYA4jwDevrsXN/wGwq4q7sMGJ6eyPYiajIdQXcV0roAHHnjA\nbkjn5ORkXnrpJQA8PT3x9va2u727u7tVwA0aNAij0QhAWloar732GtnZ2Vy7do2QkJAK2/Loo4+i\n0+no3bs3d955pzXtpG0bv/32W7799lv8/PwALVnNsWPHyMnJ4eGHH7aGqQ4LCytV/9GjR7njjjsY\nPHgwoD0IK+L7779n/fr1AEybNo25c4v66bhx49DpdAwYMIBff/0VgMGDBzNz5kzy8/MZN26cEv6K\n2qMyssD/nI59j3zDmKQBRIUfZunxADK8kwnbHETEuaXwdhQX3kskPt6+770l06ubGyR+VfU2xk2d\nTVTeYmLWuhBh3EFchg+REw+BvoDI/g5w3QeAyDFpINPwW+3PATv1VJRZTAjN1LBoUdlzCJrM1I+y\nsunU4AAGDBhQSod/9epVsrKyuPvuu9m/fz/t2rWrdv0ArVu3tv7W6/Xk5uYCWpC4DRs24OPjw+ef\nf8727dsrrEuUSChh+W/bRiklr776Ks8880yxsu+99151D6Ha2B67NHfU4cOHs3PnTr755htmzJhB\nREQEjz/+eL23ra5Qap96pKQNsKy8G0IUGU+3XVlKzFoXNgUfpsNvPcnwScH90DA27k6yPj0qo26p\n7qBzS1sTMa3mESGzQQgiZDax//ak36GBICByWjqR09JAFBC72p19xjn8xRBIh4cmQaCmlnJzg6++\nKtst1M2t+MzlugiJXK/UwQHcd9993Lhxgy+//BKAwsJCIiMjmTFjhnWEXBaBgYHW2P+HDx/mxx9/\nrNK+c3JyuOOOO8jPzye+kobrtWvXYjKZOHHiBCdPnqRv376lyoSEhPDZZ59x7do1AM6cOcP58+cZ\nPnw4GzZsIDc3l5ycHDZt2lRq2759+3Lu3DmrSignJ4eCgoJSoaxtCQgIsKa/jI+PJzg4uNxjyMzM\npHv37jz11FM8+eST7N+/v1LH3lRQI/96wiKgLSqNzExo1Upze7QGRAuMxv+cjm1XltL+UhbgSmwH\nZ77r2QnDsXZk+KbQMdMb491HiTP4aAIZ+yrmklR30GlVE70dZV0WodMRkS4Zfn0ESSMs+uVb7Li9\nI+DDwolpINLw3N6aw8HRhHrO5emn7bt/2pOJ9tJS2gs612ipgwMQQvCvf/2L5557jrfeeguTyURo\naCh//etfK9z2ueeeY/r06QwYMIB+/frh4eFBp06dKr3vt956i3vuuYdu3bpxzz33lClcbXF1dWXI\nkCFcvXqVjz/+mDZt2pQq8/vf/56ffvqJYcOGAdC+fXuWL1/OwIEDmThxIj4+Pvzud7+zqnZsadWq\nFatXr+aFF14gNzeXtm3bsmXLFkaNGsXixYvx9fXl1VdfLbbN3/72N5544gmWLFlCt27dWLZsWbnH\nsH37dpYsWYKjoyPt27e3PnibDZWxCjfEp7l5+5TlAdOlS1FY5CCPJUXeNGD1svEIDZIsFNJ9XIAU\nc7rIsKHBWjmzV05lPGdq1VPG7AHEvI6SP7eVvNa6yLNogaNkXkfN++iVjrLz48GyzX3v2G2XXt90\nQj435ZDOBQUFMjc3V0op5fHjx6XBYJC3bt2qs/3ZeuQo6hbl7dMEKEvtcrFfNC9OjcHkaiApfQ4x\na12InJhB58d9iQo/zZikAaT77idscxAnN6RYVUAeSQ9avXLKG9XXRiKRksRNnU3kpAyQELuiD7HL\n+4HJUZtToM+ndXZ3Ekakgr6AS3ek0v+E/W5mMjWh0XwT5saNGwQFBeHj48PDDz/M3//+d1q1atXQ\nzVI0MErtU0/YtQFODsU120RU3l7Nk4dMPhvQAVpf4/KdBwneMYJ8vSRmRW8eMWZhQjDemM3ytfP4\n6S4TP5pVMvbi7Dg51b7Qt/DlZRMds0YTmXaKl40pvGvw0dxLRSEIE7duP6aFjshvS+xqd142ziGT\npYw3zOZAT5PVG6jJGHCbOB06dGDv3r31tj+VDrJpoEb+9YQ9G6DnyW5kDf6WAen9iQo/TZepA0kf\nnAyFjla3yvvPXCE8Mxt3jOgx4Y6Ro+ej+McTxaNu2s6qtfjJW2bK1jbZ38zl6r9XsdC4i0GGJURO\nzADpQNi3gVBgHlHqJDjcZMftHRHAeoMzB8MX41mYDoHRCAGhobXfNoVCUTmU8K8nigVCmxyK59Dp\nHNr9JWGbg0gfvAupK+DS3fvB5EhsfD92bttBzFoXosJPs+Sx2RUmcrH1krEYVusqVo6tCutATxMd\n0kczZruPpuqRDubJZjrQmUgIScJxthuRkzI0FVbwN4wu/F/aPziJ/0mLtratyYdzUCiaGErtU48s\nSYnGafR+emZfJy3kfxlHMCN+uUpCfmtoewXy20BhkS42QmZDq3lsGWDCWAlhWN7E0tpU/xRTYe2a\nSw6w/aFJoN8HhQ7Eru4NQOTkY6DPpaBrJhTqSBiZSth2HxJG7gMJfqv9rRPdSnpCPf209lvZBBSK\nukHI8mbeNCD+/v6yPvWUNcUyycqeZ1/oomi+y1rNnafbccQ7FSR0N/bh1342x3fDGZyu4LEnkMMe\nP2m+9TbulZVBp7M/kaq8uPrVoaTbKgCB0XS4bT9RaadYYNRm/Y4bGkzCfT8gbnZEdrgAJrSHW0Eb\nYle7s/32TnzjkUMr4yRufld6VrCbm+b73xj46aef6N+/f0M3Q6Eohr1+KYTYJ6WsMCmHUvvUAhWF\nJu64Q0delyMcGZyMxyFv0BcUCX4B3HBGLsm2qoAGZPgXj69TScoyoNa2YbVkJE9AewMw2wF0SAYZ\nlrAp+DBh3w0BnQlx9Xdab3PM47Yzd7Hj9o5sCtmJqftR7r6ebp0QZktmplIB2dK+fftSyz7++GOr\n//nnn3/O2bNn67tZxVDhl5sOSvjXAmWpW156SYvNv/j/lhK7sjfktdUMug65ReGWczuB0xXGDg1m\n4+4kwo5O49eeuqrH4KF+Z8ba5hS2F775QE8TPikPsin4MGOSBiBb3bSGkL581wESQpIg34mwrYNI\nD/4GvzP2u2JTjPEfHQ3bthVftm2btry2mTVrljXkQF0JfxV+uXmihH81KGmcLCuswsWQUG79fjhf\nG5yJMKYSvHuwJvB1Uou/VugISDz2BJIQkszYSY+zceUXXHgv0X6FFVBWbP261pvbfegcmIv08WDM\nqQdJGKmpumK/8kZ/uZf1wSdutSNhZCoxa13YZ5xDBgb8DDGl3gKaVERPYPBgePTRogfAtm3afzsT\nVWuMZaS9bt069u7dy5QpU/D19SU3N5d9+/YxYsQIBg0aREhICOfOlc6fNGPGDGbNmoW/vz99+vTh\n3//+N6A9SMLCwrj33nu5776VBShvAAAgAElEQVT7AFiyZAmDBw/G29ubhQsXWutYtGgRffr0ISgo\niKNHjxar2xKPaM+ePQQEBODj48OQIUO4cuUKr7/+OqtXr8bX15fVq1fz+eefM3v2bECLCnrvvffi\n7e3NfffdR5bZy2DGjBm8+OKLBAQEcOedd1rrP3fuHMOHD8fX1xdPT0+SkpJq/2Q3M5TwryL2VDwl\nYlhZ8TzZjWt9komafIyxQ4NJCkouSp4CeOy7BwSkex3CI/33pHS/UOP22Y7ILbFy6pqyHjoHP5xL\nvqcH/S6NJma1Oztu70jhbafApAeJZgfQ3+RTzw68a/CxuoP+5czGUg+BJhPRExg1Ctas0QT+669r\n32vWaMvrigkTJuDv7098fDwHDx7EwcGBF154gXXr1rFv3z5mzpxpN/ELqPDLLRXl7VNF7Kl47NrM\nA6OZfuYQSZuDSAhJ0tQcABI89gaR7rOf9MG76LcniF8de+DQcyAX3qt+KOSGZsoU+w8ai/pq7GPT\nSej7pVXVk3DvPnC8AQ55HPHeQ6RvIRS20tRjXNfmBCQ9SFpgNOya2+QmhI0aBc8+C2+9BQsW1K3g\nt8fRo0dJS0vjgQceALRAcHfccYfdsir8cstEjfyrSGVHoH5ndMwNP8WIX66iz3axqjrcfwwgLTGZ\nmJW9af9zEEec23N54yqOfT63Sem1q4qWcyCYsK2D2BR8mNgVvQnbHIzjBXct9aRDAbS6wVK/dkVh\nLcy2gCYV0dPMtm3w0Uea4P/oo9I2gLpGSomHhwcHDx7k4MGD/Pjjj3z77bd2y1Yl/LKlvuPHj/PH\nP/6x7g6gHMoLv9yzZ09mzJjR/IKw1QFK+FeRyo5A1xu1UMyRk49R6Hzaqu7J6H+QOIMPjxizubZy\nJ6zQ9PtNTa9dVS68l8j5f+4k/6GHeP7MPB49lc2G3UksTuwIeW257aQvFLQiwyeFVjmd2RR8mJi1\nLvxgnMfMcTHEG+vAWlpHWHT8a9bAm28WqYDq+gFgG864b9++XLhwge+//x6A/Px80tPT7W6nwi+3\nTJTwr4CSxt3QUPseNV26FF/mSpYW4tjxhjbiTw0gbHMwOOYSOfkY4w2zS+2rKem1q8sUw1w+2xBF\nr0IjgwxLiAo/Texqd17bKbUUlIU6bt1+jDbZ3YgwpvKBwZMPey7GMS3dmraysbNnT3Edv8UGYEcd\nXiVu3LiBi4uL9RMXF1dsvcV46+vrS2FhIevWreOVV17Bx8cHX19fUlJS7NZrCb88evTocsMvT548\nmWHDhuHl5cWECRPIyckpFn559OjRFYZf9vHx4YEHHuDmzZuMGjWKw4cPWw2+tvztb39j2bJleHt7\n89VXX/H++++Xe262b9+Oj48Pfn5+rF692pq5TFE2apJXOdibzOTkBNOnQ2Ji8QldAFM/isbvjI71\nxqW4kUmHPw7geo+fafvrXdzs9Bsxa13YfnsnEj2vUfjTxFLpDhvTpKa6oph3VKB2vsbxL94IP8qY\npAEkjExFd8sJU8dfaHu2HzedLzAmaYD2JlCNiW+1RXOd5DVjxgweeughJkyY0NBNUVSDmkzyUgbf\ncijLfz8xsbSQDl0UjWdhOgfDv2H9WhfAmevdj4FJz9vftgG0OD2+a6fifCyK3Fywrbop6rWrQ7G3\nm11zOQAcCDThmZTOpuBviF3lToQxFaen+pHb8witf+ltVQFFyKXFksooFIrqo9Q+5VCVPNz35+pI\nD7bk2j3N/N/fBMdcwr4bwsvGVMYbs/FdO48DPU1cutQw/viNAbs2k11zSdN74Lt2Hi8a04gz+HDT\n+QKtf+nNrduPYTjelwhjasvQi9Uzn3/+uRr1t1CU8C+HyoZL6PanUHZsTyyWa/dmj6Pocn7Hxt1J\nSATuGDlgjIJdc9HpYNo0bdtZs7TvadNaRiiDsmYhdzkylwPGKIYYFlu9fW51PsNtx/3I8P6esUOD\nQacj7rWYJqP7VygaM0r4l0NlwyUE/NqNhPu3s+P2jhiO9eWq2yGQYGqdQ5zBhyyKPy0KC4smiH30\nUdkxgZojZU0Ie/997dwe6GnCI0kLCxG2dRDZd2RpM6Dv3cfYwQFE5S3m0pc6a5L7rl2b9/lSKOqM\nyuR6bIhPY8nhu3x5UY5dN7cycs7q9VrOWkse24VIXnWy5toN8lgihdBy1paXa9fycXOr32NsLFjO\nNYHvyCCPJbJQp7fmMXYfFyD5s5OMNfjIDNyKna9Wreo+F3BTzuGraL6oHL51SKXCJRQWMuKXqyB1\nIKBjljexKzVD5ZhTD9LhMRMmU+XDKrdU1bblXMvkuSSlRaGTJiKMqQTt9STDN4Xg7wcTYUzFleIn\nKC+vec+RUCjqAiX8awO9nsXDHEFIOmZ6c9X1R3bc3pGYda7ke3pYQxxUdoJYUwtlUFdc6+xKnMGH\nZP80a1pLe2o0aBkPzKYQ0tkedRW6eeTIkfWSm9h2P6GhoWRnZ5dZdsOGDRw+fNj6v6Kw1g1KZV4P\nGuLTWNQ+9hj99jsydv4Sqz4oLHCUZKGQ3R7zlxLMKiAhwyY9Xmy75culdHIqX+Xj5FT3KoymQpDH\nEinmdJGxBh8pwaoC8jMsqXdVWVXUPu8kvyO3ntxabNnWk1vlO8nv1KgN7dq1K3f9iBEj5J49e2q0\nj7pg2bJl8vnnn6/1emtyvPn5+XWyn+nTp8u1a9dWq03VQal96pHQRdE4pqUTlbeYOOEMUvIfXyMU\nOjBvt5Y8d+OeFMKOTisVpdOesfPZZ1umy2dlSHY24bt2HuON2ZgQLApoTa90f7r3/AYTgnwc8Bw6\nHSaHNqo5EoN7DObRdY+yLUOL57AtYxuPrnuUwT1qP6ZzTUM6Z2RkWGftvvbaa9a3i+3bt/PQQw9Z\ny82ePZvPP/8c0OL0Dx48GE9PT55++mlrfJ2RI0fyyiuvMGTIEPr06UNSUhJ5eXnlhm729fW1ftq2\nbcuOHTu4fv06M2fOZMiQIfj5+bFx40YAcnNzmTRpEv379+fhhx8mNzfX7jkxGAzMnTsXLy8vhgwZ\nwvHjx4GiGdD33HMPc+fOrdZ+DAYDv/32GwBffvkl3t7e+Pj4MG3aNFJSUkhISGDOnDn4+vpy4sSJ\nYmGtv/vuO/z8/PDy8mLmzJncunXLWufChQsZOHAgXl5e1sB6O3bssJ4bPz+/MkNhVJvKPCEa4tNY\nR/6x87XRqMWY6/5wgDbKHxrcci21dYSbW/HRvefQx4vOtc0bVsADj1dYV02pqsF368mtsmt0V7lg\n6wLZNbprqTeB6mBv5L9w4UK5ZMkSKWXxEWpeXp4cNmyYPH/+vJRSylWrVsknnnii1PZjxoyRX3zx\nhZRSyqVLl1r3sW3bNvnggw9ayz3//PNy2bJlUkopL168aF0+depUmZCQYN1/RESElFLKb775Rt53\n331SytIjf3tvAgkJCTIoKEjm5eXJV199VX711VdSSikvX74se/fuLa9duyZjY2Otx5Camir1er3d\nEbmbm5t8++23pZRSfvHFF9bjmD59unzwwQdlQUGBlFJWaz9ubm7ywoULMi0tTfbu3VteuHCh2Dkp\nOfK3/M/NzZUuLi7y6NGjUkopp02bJt99911rnR988IGUUsoPP/xQ/vGPf5RSSvnQQw/J5ORkKaWU\nOTk5dt9W1Mi/HolYvrSYP3+GTwruh4axcXcSpswsawwg5X5Yc0q62h7YHU+3nweREJJEpye8SQhJ\nJmxzEI8cO9TofP9HuY/iWf9neWvnWzzr/yyj3Os3prNtSGdfX1/efvttTp8+Xarcrl27eOyxxwAt\ndHJl2LZtG/fccw9eXl5s3bq1WMC48ePHAzBo0CCMlYxVcuzYMebMmcOaNWtwdHTk22+/ZfHixfj6\n+jJy5Ehu3rxJVlYWO3fuZOrUqQB4e3vj7e1dZp2WY3rssceswe0AwsPD0ev1ADXaz9atWwkPD6dr\n164A1tDXZXH06FHc3d3p06cPANOnT2fnzp3W9fbOW2BgIBEREXzwwQdkZ2fj4FC7ARlqpTYhxB+A\n9wE98A8p5eIS62cAS4Az5kVLpZT/qI191ztZWUTITJYeCyDDN4WOmd4Y7z5KnMGH8cZspI2/PigV\nTk2wnLv58zWDrl4WMu/7fCLvduCq2yE6Znoz4perRIWfJia3cY1jtmVs46O9H7Fg+AI+2vsRowyj\n6vUBIKUW0tlW8JVFyZDOAA4ODphs3NMsCV5u3rzJc889x969e+nVqxdvvPGGdR0UhVvW6/WVSv94\n7do1Hn30UT799FNrvgEpJV9//bXd6KKVxfaYbH+XDFNd0/3UFvbO27x583jwwQdJTEwkMDCQzZs3\n069fv1rbZ43vGCGEHvgQGA0MAB4TQgywU3S1lNLX/GlSgt82sudpnStjhwaT4fM97gcDyOl6xhrS\nwTZSZ3MP0Vxf2LraCr1ei5SqK4BCB666HiJy8jEt7s/ypQ3dVCsWHf+aCWt4c9SbrJmwppgNoK6o\nTkjnwMDAYqGTLbi5uXH48GFu3bpFdnY23333HVD0EOjatSvXrl2z6rMr266SzJw5kyeeeKJYyOaQ\nkBD+9re/WW0JBw4cALSY/StWrAAgLS2NQ4cOlblPS5TQ1atXM2zYMLtlarKfe++9l7Vr13Lx4kUA\nLl26VO6x9u3bF6PRaLU/fPXVV4wYMaLM9gOcOHECLy8vXnnlFQYPHmy1BdQWtTFcGgIcl1KelFLm\nAauAsbVQb6PA9/lonls1iZkikEIpWN3LmYT79tDu1AD6XXCwqoA8kh7kQM/ijvwtwf2wPhkbPsWs\n6gnmtkwvLUGO4w3tgZCZ2WhCP+w5u4c1E9ZYR/qj3EexZsIa9pytWUznugjp/P777/Phhx/i5eXF\nmTNnrMt79erFo48+iqenJ48++qg1g5ezszNPPfUUnp6ehISE2A3hXJKyQjdnZmaybt06PvvsM6th\nc+/evSxYsID8/Hy8vb3x8PBgwYIFADz77LNcu3aN/v378/rrrzNo0KAy93n58mW8vb15//33effd\nd+2Wqcl+PDw8mD9/PiNGjMDHx4eIiAgAJk2axJIlS/Dz8+PEiRPW8m3atGHZsmWEh4fj5eWFTqdj\nliW2Sxm89957eHp64u3tjaOjI6NHjy63fJWpjGGgvA8wAU3VY/k/DU2tY1tmBnAOOASsA3pVVG9j\nMfjeFjZR8mcnySsdZazBR/Z7KEgyv41kfmsZY55t6mdYIgl8R83UrWO6vjRahgWOkrEGH8m8jpI/\nt5XMbyPb/XGA1Q00dv6SYttUaoZ2JWgpM3wrcidtCliMsi2BpmDw3QQYpJTewP8BX9grJIR4Wgix\nVwix98KFmiczrymhi6IZev48FDqAvoDIyT9zxO+/4HCTsO+G8Igxu1jANltaSojm+uTCe4mMGBmq\nJYBZ5U6/Q4NA6rjePYPIiRnErHWBDf+yjv4t+RhaUuwkhaKy1IbwPwP0svnvQpFhFwAp5UUp5S3z\n338Adt/XpJSfSCn9pZT+3bp1q4Wm1YyMrP38Z+Q+wnb4ABJa5YI+H4eLrmzcnVQqzIBer/z165ot\nbU1aUhdjKk+l5WC5LreduxOAqNCj3G82/paVj0HZYsrGkqaxKWM0Gq1eOIqyqQ3hvwfoLYRwF0K0\nAiYBCbYFhBB32PwNA36qhf3WOiVTNj6x/RRISLh3HzjkaYUkFHT4rVSYAScn+OKLCmIAKWpM4vy5\nWjYvNzdtgckR8tpy2eWodfT/9Mea8bcq+Rgqg5SNM+udomVS0/5YY+EvpSwAZgOb0YT6GilluhDi\nTSFEmLnYi0KIdCFEKvAimg2gwSgp5OPj7asI5vycoo36HW+ArhBMeshzAgGREzOY1Ge2Guk3EHFT\nZ1vVP8HfD7G+lQE4XdSke2XzMVSGNm3acPHiRfUAUDQKpJRcvHjRbr7lylIrfv5SykQgscSy121+\nvwq8Whv7qikl8/Ja9MBt25ZWEQDsdreZQl7QirBtg0gYkUr7LB86P27CpFQIDcKWtibeiO8LXLcG\nfksacoBPPTsw3ujKSINNrmAbHB2rZ4txcXHh9OnTNAZblEIB2oDExcWl2tu3uBy+ZemB7Qn+twwB\nnL9zP+Q7Efz9YJKGHCBhZCqjtw9iW7s/kLhibumNFPVC4vy5BK/UsSt0sebnb9xBXIYPUeE/MT5t\nHplG+9tVNqx2SRwdHXF3d692exWKxkbjmhZZD1RF3xvj2QsKWhG7ojc7t+0gdrU7SEjp+jv+8YQS\n/A1NzggTg74uCvxmmye5LAoL4aWX6rGRCkUjpcUJ/7L0vY4jo/G/M4YMDBSiIwMDAnA7O5qnc4qE\nS9C/F2BwHaj0+42AOQFzybgShTtG9Jjw72Lf7bYk5kmZCkWLpkWpfeLjwZ4nmxDQpft+9g79D+tX\nuRNhzGS9wZmrnv+hx6XRtP/NCIABSKrPBivKpKTtBiA3V3uIexp1rDcuxZUssnBlvGG29jZQwUNB\noWhJtJiRv0VY2Bv1SQnP/ldz64yclMHwUSOInJQB0uzuqWh0lGW7GXhWx8Hwxaw3OKNDst7gzMHw\nxfidKerqXbrUc2MVikZIsxX+Jd05X3rJvlGXwGj8DDEsMKZoOn1dPkkjdoDDDWJXuzPn55RSbqGK\nhqcs282qn7WQ21Hhpxk+aoQW8XOtC+uNmu9/q1bw/vv12FCFopHSLIW/PZ/9svS8fme0keK7Bh9t\ngdCycSGLTo0KD9D4KNOHnyxr0vekETsI2utpTfrepQt06ADTpqkHuUIhGuukFX9/f1nd5MwGg30f\nb3tkYGC9wVlT8zjcAn0e7oeGkdEnDQT8ZZUnC427im3j5qbN4lU0HPZ0/k5O8GtbA590cCYq/DRB\nez1J9k8jZq0L4VnZuJqMperp0kV7E1AGfEVzQQixT0rpX1G5Zjnyr4o7p2Wk2PbSHeBwC/fUYZz8\nV4rVrTPGs1epbVSo5obHXj7kTz6BT2bNtqp6dg/IxOFaJyInZbDG1RkTgn6hw+FPLhCoBX+7eFG9\nzSlaJs1S+JelEujSxRwS5vl+uIb+ARMCgSTO4ENu9xNwswPG3lpWrheNadzzrwXkXB5Y6foV9Ytt\nohdLPCVr4DeZTZ8MF/J/dxIccvmHZwe8QoM4MjgJ2v1WzAB84wZMnapUQYqWRbNU+5SlErDE33F/\n6A8Y/TfjsSeImYdziJxyFBxuWv9HhZ8mptU8uvePKrceRSNHCDxDg0gfnAxSgJBQ0JrY+H6MN4fj\nLom6voqmTotW+5SlEpgyRYvRPzv9Fzz2aEIhcmIGONyk3SkPfkxMJkJmE9NqHlvamsqtR9E0SEtM\nRnelB+gkCAhOGWo1ANtDhXxWtBSa5cjflvj4ogTgrq4wJjCGD3tq8WDmjLuAyfksmASxX3rzsjEV\n0UjPh6IaVGPkb96s2jGAFIqGprIj/2Y9w7eY+icwms5ndLy7Yh7uBk8ipxzRvHskICSfDejAy8YG\nbrCiVvGcEEK6x2YoaE2/1MFcbneTX/vtJXLKEW6sGISfjLE781fZdBQtgWap9rFgOwvU4s//gcGT\nzwZ00AS/APfUAKsKyBAa0rANVtQqP7saaXfWm9j4fjyVlsN51wy6H/FHd70z8R5af/A/V/wWsE2/\naS/vg0LRXGjWah+dTpucBUX+/FHhp5H6W9D6Gu6pARh7HyVmrQufDujIkTvPI/92pBZar2h0GAzE\nidL+/0/nZOPZ3khmppaGs7BQs+2EhmqZ2ZSxX9HUaNEGXwudOxf9tvjzG473hTbXcD8UwMkNKdZQ\nAO2PhLF8qBL8zZYs7fo7/+pabOZvu4uZ3N46BgKjKTRP7s7MhI8/Vvl/Fc2bZi38r3ppcXu08Mya\nP3/GgH20PdsX491F/vyBifPI/4NJjeiaM66uxBl8uNzjBOS1JemeA8QZfHjX4MMPY4sHfoOiN8aS\nqAl+iuZCszP4Wrx7Ml2iYdgSDgReY318X8CZyMnHQF/ALaer1hE/rRaT9HZUQzdbUcfETZ1NVN5i\nYldp2bgiJ2YQOflnKHQkdrU7441LcafifqCMwYrmQrMS/rbePX5Cx4HWOeBwi8gpR2l7wU1LxA70\n+/kuIuQpMPvzRzRwuxV1z5a2JmKYR4RxDgCv3HCloEsWt53sS4TxIBLwHDqdtDsvwAotHbUQxd8A\nbI3BCkVTp1kZfK0B3QKj+cuZjbTnepFLpwAkeOwJ4sfEZOXP31IxG34jJx/TBgOFjsQuH8CO2zuS\nEJJM5yNDuaR3xmlDItOnQ2Ji0RyRRYuUsVfR+GmRBl+LPtbvjI43wo8C4H54kCb4ARDMPJzTIG1T\nNA7ipmqB32JX9Kb7EX/Q5xM57RAJIUl0PzKIS/12453RjU8+gb//vXTsIIWiudCshL9FH7veqCX0\niJx8jAzvFG0ilwlAEjlFM/QqWibWwG/GVH5ZvRf9ZRfQazN/f+23j7DNQaT+EK8EvaLZ06yE/6XR\noXgPnY4b5mD+DrnaqP9mB2K/9IGC1uBwkwX3K5VPSyVx/lwi3o4CvZ6xQ4MpvO0M5LcGx1voL/dk\n4+4kKCwk7rUYQhdFN3RzFYo6o1kJ/1HZ3TgU8hXjhgbzqWcHbaEE9JoDd0x8P9qe9qag462Ga6Si\nUTA2fAoJIcl0PzIIHPLApKPwttPcPtGfOIMPUXmLuT+3Wd0eCkUxmpXBFwcHxg4OICEkCUwOoCsg\nbHMwI365ag3THKHcOhVAtz+Fov8tm1/v3k3Y5iBG/HKVyKmHQZ8PeU7EruxNhMxWKdsUTY4WafCV\nhYVs3J2EPtsF9AV0zPJm4+4kXjamWsM0KxQAF95LpLCrM2FHp7FxdxIRxlSCdwWAAIdrXYkwpqoZ\nXYpmTbMS/oWY9bjOZ+iY6c1V1x+1/+iJeDuKxPlzK65E0WJ4b3Aiqd9/gRE34gw+JA3bg/vBAArb\nXNecAnQ6pftXNFualfD3G6rpccM2B3Fl2SHCNgeREJKM31DluqEojmVCYGYmjDdo7p9hWwdh7H2U\nMUkDiAo/zdjBAUr3r6hX6jOSbLPq1Yd7X8Bz8zS+3p2CBL7enYLn5mkc7n2hoZumaGTYhvs+0NOE\n79p5fL07hb4/9SdhpBYAMOHefcSsdYEN/1Kjf0WdYzsgkVL7fvrpunsANCvh/2VIIicPfYEjBeiQ\nOFLAyUNf8GVIYkM3TdHIKKbO3zWXA8YodJh4Ki0HdPlk+KQQ/P1gAKJCj6rRv6LOsR2QWKjLSLLN\nqkernLuKymIvQFsW5oUmRy3y57AfiJyYQcxaFx5ZtBQhwMEB7r9fJXlR1D5l+RfUld9BsxL+oAl6\nNSVfURGLFmmB2mwJv9Mc+mGVO8HfD4FWuZrrJ9DLnPC9sBC++67+Xs0VzRtbHb+uDGlcV5Fka0X4\nCyH+IIQ4KoQ4LoSYZ2d9ayHEavP6/wohDLWxX4WiukyZAtOna9m7QPvO8DTxxtq+AFbPHwod+dSz\nAyZ0+Bm0pC8lKflqrtI/KipDSR2/JZmQLXUZSbbGwl8IoQc+BEYDA4DHhBADShT7I3BZSnk38C7w\nTk33q1DUhPh4LU2j5YYrLIRLm+aygYeLef6E7fDhaP+feGRoAAfDSyd9sWB5Na9vo52i6VJKxx9Y\nlHyqEB1GDIwJjCHeWDfOBrUx8h8CHJdSnpRS5gGrgLElyowFvjD/XgfcJ4QQKBQNhD3jmpTFPX9i\n1rqwKfhwMc+f9calduuzvJrXt9FO0XQppssPjKa96785MOkt1huc0SF5cagrq4csIP3I/jrZf20I\n/57AKZv/p83L7JaRUhYAV4AuJSsSQjwthNgrhNh74YJyz1TUHWUa0Ww8fyKMqehvtrN6/kQYU3Ej\nE8+h02FyqHUT21fz+jbaKZoutrp8vzM6rrmlgiggclIGd44LICEkGUw6ntp1quxKakCjMvhKKT+R\nUvpLKf27devW0M1RNGPKMqJZ3kez0HL+FrT/DSQkBewmzuDDuKHBpIV8heGKCQKjS3mUlVWvSv+o\nKMmiRUX9bb1xqZZiVDqA4w0yfFOgwJHYlb2Zn5FSJ/uvDeF/Buhl89/FvMxuGSGEA9AJuFgL+1Yo\nqoU9bx8nJ5g1S3MRtsz6jV3ZG489QVo60MfTSAhJxmNPIJkD9hI7UlfKo6yselX6R0VJpkwBGaDp\n+d3IJMKYivtRT9AXaAWkvk73XxvCfw/QWwjhLoRoBUwCEkqUSQCmm39PALbKxhpOVNEiKGtOiCV7\n1+1PFiV9SUtMpvWvvUFXCDc7cNjjJ2LWuhCxvLT+X801UVSW0EXRdDDr+d81+DB2aHBR8qlCPUhB\n5KQMlvQJqJP910pIZyFEKPAeoAc+k1IuEkK8CeyVUiYIIdoAXwF+wCVgkpTyZHl1Viuks0JR21jD\nhCfDzQ7Q9iq3Hffj0vIDAMTNX8KWtiYVNFBRZfo/M4kjXTdpwl4ADjdBmMDkQNj/DSNhpGYD6Hdx\nDD/9z6pK11uvIZ2llIlSyj5SyruklIvMy16XUiaYf9+UUoZLKe+WUg6pSPArFI0FS9IXjz2B2oJC\nRy7fdQDP0CCV9EVRI57adUrLOyLQsg7qtJDzYf83jA27k3hzlSdup8fg7jqwTvbfvJK5KBS1TLc/\nhdL9jInD7nu1IG9A5JSjoM+DvPbErnJnvDGbkW5GFi1S6h1FFRCCOIMPkVOPgIM5u2B+K2Lj+/Oy\nMRUdEje3qucTapHJXBSK2ubCe4m4+t5LTKt5vGxMZUvPTrinDwSdidvO3kWEMZWvDc5kukSryVyK\nKrPj9o6gNwv+QgcwORA5KYO3DJqevy5dhJXwVygqIHH+XLr3jyILNxwLJRk+3+N+MIDs7lmMHRrM\nnPDT+J3RqclciioR5xFAwv27AbRQIvlOmgpIFBDjqTlQ1qWLsBL+CkUFWEI2PGyYzabgw4RtDsLY\n+6g28zckmTFJA6wzf9VkLkVZhC6KJu61GGvgp0/vagVAt5/9ObkhRfPzNznQ3jiInMsD69xFWAl/\nhaICLCEbDvQ0sWStC0IxldEAACAASURBVBt3J+H8qysZPim4HxpGvl7gRiZ+hhha36uSvijsk5G1\nn8iCt4gTzlosEVMBFDrinNMeE4Lxxmz8Vi3gWtZD6HfPrXMXYSX8FYoKsI7md83lEWM2cQYfLvc4\nAXltyeibxv1nrvCuwYeD4Yt5sru6pRSlCV0UTZ/08yAgcmIGw0eN4IjXftAVMi0tDz0m3DFywBiF\n04G5fPFF3TsPqJ6qUFSArd7VOvN3lTuxK/qAhMjJP1uTvvxtl/3Ab4qWzf25OjYFHCJsuw/o80ka\nsQNa3SDsuyG8ZkxpkEmBDnW/C4WiabNokabzt6h+/NbO42XjHASw4YcRJI3YwW0n+xJhPFgUrEWh\nsCFi+VIQLkROSrUmCKKgFSN+uYqg6u6ctYEa+SsUFVAsZEPKXC7JKK53cSPO4EOyfxrBO0aQ3f0U\ncQYfFcFNYR+L7lB/E/QFdMz0hsI2RE7MIM6jbsI3VIQS/gpFJSiZHvSTWZr6J2atCzu37SBmrQtR\n4aeJmzq7oZuqaIy4urJ4mCM45ON+MICcrmc0FZCA9/16Vbx9HaCEv0JRDba0NQd+k9kgBBEym5hW\n89jS1tTQTVM0QuKmzuaCu+YmfHJDUaIgz+3jyMoY2CApP1V4B4WiDoiP11xEs7I0TZAK/dCyCV0U\nzS//0LHeuBRXssjClfGG2RzoaYJdRUEBnZxqbvCtbHgHJfwViloidFE09+fqePrjpThdLH6DOx2o\ne79tRePGwcF+kvaSVCeejy0qto9CUc/cn6sjKm8xn3TQcrCuNzhbk76r0A+Kygh+qL9Z4kr4KxS1\nRMTypfT9qT+RU45w58MBVoPwZFbQ4aFJZLpE17teV9F4cHOrXLn6chhTwl+hqC2ysngqLQeQZPik\nYDjeF4A5E0+Q4/kf/M7oyMyEqVOha1f1EGhOxMdbQ/aU+YC3l+KzJPWZ8lMJf4WitrAM2QrbQH4r\nMrxTiJzyEwiIXeVuDf4GcPEiKgR0MyE+Hh7/n2g6ixhOSgMnM3UETzPwwpQYQhcVxXqyl+Lz2Wcb\nLuWnEv4KRS0RN7Uo9ENwyjAtPK9jHu5HPIkwpuJKcWWusgM0D574RzQD8tM5GL6Y9QbN3vPCPa4s\nNSwsleWt5HwRS85oy//6dAhQwl+hqCW2tDURk6ipepKGHIC8ttobgMd+4gw+ZFFamatCQDdt4uPB\n06gjPfgbxiQNICr8NHeO03I+h20dpIV1aKQo4a9Q1BKJ8+fCuIeJnJihqXpW9CE2vj8UtCJyUgbj\nDaVn/+p0SvXTlHlyWTTj+Jd10laH33qS4ZtC23N92Lg7qdjTvTJ2gfpECX+FohbZ0tZEv8ujid2k\nqXr+X4AD/X70pW+aN7f33EQ+DngOnQ6TQwHN/W/mzIYXBIrq0f+EjjfCjwJgON6Xq26HoNCB3NvO\nFYv1ZEkIlJmphfLPzGx4m48S/gpFLTLFMJfczauIOrwLg5ukR4YHRwYn0fc3QeKunTwyNIC0kK/w\nPNnNuk1eHrz0UgM2WlFtEk4tJWatC5GTj5HhnQIFrSHPibAdPsViPVkSAtnS0DYfFdJZoaglLKM7\ny02emQn5mfE8IoNICEmmU39vrromE7Y5iK93x+PIF9ZtL15soEYrakTPwizAGfR5ICB411DGZWQT\nFX6YMaceZIuniQjKtu00pM1HjfwVilrC3uhOTyEbdyfRMcuLq26H6JjlxcbdSeip5HRPRaPDNhev\nQPKpZwcobMVtJ/1I9k8DICaxL/meHpodiLInbjVkBHAl/BWKWsLeKK4QPWOHBnPV9f+3d+/RUdVZ\nose/uyoBEgQjEBEIlQqIIIk8BDWGlJGW7jRRiD29aBkjcKdv6+2eca49gWG4g3fZvZS1EEPWONfp\n26O2LjSo04zdEjTd3KZbMYFBQXmYBFAkIYDKQ4iAiUKqfvePUxXyqEpSlZB67c9atULFU6d+p5B9\nTu3fPvu3Dzl3LeccH1GY7QLAUfB9+LtJAAwf3p8jVb3ha+NRKimUOqdyIMvK8z/6rqe1tTf3/qA1\n8IP/G7z684YufzTto1QfcTisVE9b07OLqM5/masaMrngqOGqhkzK86sYemMmFxybcezM5/NEePrp\n8IxZ9UzbLq0NtmdgrBXkU06MBQNr/yOD4vq91p1a3tbexW1e76vfj6hOr8aYiHzMmDHDKBVNysqM\nSU42xqrn8D6K5prZ9yw2HjCZBbmGxzD88yDDY9ZzD5jZ9yw2Ix6Z63d/6enGiFg/y8r6/ZCU6fz3\n6kaMAeOanWf4hfXTgPUXFQGAXaYHMVbTPkr1EX+375fNreAvm6yJ3eqKKvhmKAz4Br4ZSnVFFfdm\nu3h7xsuk7UptV/8diaWB8eonL67B/p2F/NI5Cw+CYCjMdlGZ815rnj8al/DUfv5K9QMjwk0FudTc\n4j0BDDqHnL8WM+SUt/pnO4m0AFYuOCnJfwVQb3u9q+DdnFHC7oWPt6Z3tl43lPL8KriYxNpXJwBY\nHVwHrKD4iWVhHq3281cqooy7O5+aW6rI3JmLedIb+IeeRM6ndqr+aWoKXPrZcU5BXVmpPy/guusq\nWPtaBggsvf9jyr+3DTzC2lcnUFy/N2qX8NTgr1Q/qHfW49iZz0cVVRRmuzBDTiHnrsUMOUlhtgs3\n9h7tx96zzVQfyTmRyh/y32HrdUNxvTcdBjSDzcOwumnWBK8I1NdT/MSydtU90UCDv1L9IP3NAzRU\n/JEp2Yutpl+bc/GUnmT+Zhfl+VVMz+5Z2UdPV4NSvZf68wKor2P+5lzK8yupdFWBAYxwZswhSp1T\nqTeOiOjTEwoN/kr1A1+dd/W4U2RtXsTrO7ZjgN+9t50pmxdRPe5U67bJyYHr/nu6GpTqvZwTqZTn\nV/HpMAPGDjbrzJu5c5aVArrPatbnm4z/27+NrMZt3elV8BeRYSLyJxH5xPvzmgDbuUVkj/dR3pv3\nVCoatVYCbaug5r11XJ/ewitlBrunheUPryN9W0W7BT2efjrybgqKNxs3rGf+Zu8kvbitq35g/Bmh\n5LUMhtTMZfcYK8/f1AS//nV0VWf19iavFcCfjTGrRWSF9/k/+dmu2RgzrZfvpVRUKyrqfFNPwao1\nzGm2UX3hGZJNAw1HHPzVow9T7fTwkyXLqaiIoJuC4oDv76O47Jn2OTaBoUemcOf+qynPr+Lw5kWc\nf3Ndu9d2LJz0NW6L1L+z3qZ9CqG1O9U64N5e7k+puOJrFfDsEGsFqN85U9izYDVZ9Taef94K+OFY\n5SletW3dAPBm9iEABn4xgXOOjwCYt9nVLk3XlUherKdXdf4i0miMSfH+WYCzvucdtmsB9gAtwGpj\nzBsB9vcQ8BCAw+GYcUTr2lSsczq58aaxHMjch+t964ahkg1pXGAwj40pJP3Ycq3r709OJ6WSwrIF\nx7jmcwdnxu8mc2cu1d4qrfL8KrI2L6J6R/urfpHOV/4Qnvsy+qzOX0S2iEi1n0dh2+28txUHOpOk\newdzP/AvIjLe30bGmGeNMTONMTNTU1P9baJUbGlo4MHq8zCgmcq8reTuygLgFwsOkuWu4Ujamm52\noPpUQwPF9XvJ3ZXFmet3M+zT6VRXVGGA13dstwL/uFMMH97+Tu6f/jT65mi6Df7GmDnGmCw/j43A\nCREZBeD9eTLAPo57fx4G3gGm99kRKBXNfC0B3IlgoHLWDpYurGNe5WRqXG+R2xhaZjbSlgyMdK1t\nmm02Sp1TqZpZTcaeHM44DlLqnMoR0kmkxbrif6UCaJ+S+9WvOrf2ePbZyE7V9TbnXw4s8f55CbCx\n4wYico2IDPT+eQQwC6jt5fsqFRNKH3iYZQuOsfaVCWTszYGEbyGxifK8vZRsSOMPXwS/ALj2BQqe\nL9dfeEsOyxYcY17lZOonHGT+X2awbMGxTusvf/ll58+0qMg6EUTLHE1vg/9q4Lsi8gkwx/scEZkp\nIs97t7kR2CUie4G3sXL+GvyVwlrzt6RiIgD1Ew4y9MgUsLcw6Owoiuv3ctWZ4GcMI3HJwEhXXGYt\nx1j+nQ9wHprIJlctJRvSeH3HdqZtWNFa0tlWtH+m2thNqTArfbSEZRdXM69yMptctTgPTaRuyn8x\nf3MuGz9vCHrG0GbzP/koYl2VKj+8H9ods/OozNuKa2se7769FQ+CncAfWiR+ptrYTakosSXJw7yj\nd7debR7+/XZSP55B+V3vU3rVmNbEfemjJRSs6n4COBKXDIx4Dkdrrt+1Na+1TXMDXX9o0fyZavBX\nKswqVi7nUlam1RLYNIIIw5uuAo+d5zKsK9JSSWFpy+PUNXzY7f4iccnASOebeynZkMa7b29tXY6x\nY66/rWj/TDX4KxUBKlYut3rBe2cMHzx0EUwCBzL3ccfsPJbeVwcCD2472u2+/C0qE+mVJ/2p7QLs\nvm9Vz53axcSv5lBsGjEIPzraGDDXD7HxmWrwVyoCFddst3rI2y9RmbcV7JdY+1oGxTXbe1TGGW2V\nJ70VTGlru7t4vd+qDl69hTkXZuKkHrt4yE2rZ3f9MtjWuU2zt4tz1H+mGvyViiIGLePsyF9p6wMP\nwIgRnT+XglVr4I3ft6Z17pidx9KFdUzcfyP/+Ooz7fYh4v/9ojnP35YGf6UiUGlmjpXq8STi2poH\nnkSW3lfHqowcLePswF9pK/ivxZ/TbGNZwUEAcndlWd+qbJd4sPo8aaZ9Wa0xnU8A0Z7nb0uDv1IR\n6LlZY0Fg7WsZvPv21tZlBNdkjvW7fSQ3ELvSujr2jidGXz3/0vvqqLz9fbiYBJ5Eaz9+KnuMid25\nk962dFZKXQEZjpt5sHkmxeYZELGqgOz/m5XN/icgYyUVEQqHo+u1jdudHBoaID0F7JdgQDOurXnc\nW9fIsgXHKNuwAurbvzYcjdn6i175KxWBOlb/+NaJnTgRZo4roQ4nbmzU4WTmuBJS7o7fBnD+Slvb\nandidDh4LmsIuBNb6/kBVm+cyN6x7U+ssZTi8UeDv1JRZPE1Nj744Wp+57zc//+DH65m8TXx+0/Z\nV9rqb+nLjgG89IGHOXjjftb+R0a7ev6EBT/gpf+xPGZTPP7E7/8xSkUhX87aV6niuzGpuCz4BnA9\nES3dQYuK4PRpKCtrn6Of8N/WcGL/5Zr+LTv+zLyjd7Nl/KjWdFrJgBVsSfLEXXksxpiIfMyYMcMo\npToQMQaMa3ae4RfWTwPGiJiyMmPS061N0tONKSvr3VuVlRmTnGzt3vdITu79fvvT2pVPGfnH4Wat\nc6oxYNY6p1rPVz4V7qFdMcAu04MYq43dlIombVaayt2V1bry10PnGxnZXN+u5DE5uXepC6fT/0Rq\nVE2CBvi8ik1jFB1EcLSxm1IxKFAPmrnXPdzj+v+epnIClVBGVVlpm5W5fCulFdfvjbKDuDI0+CsV\nRbYkedo1gPPlrKtS/JeAdoxxwSz0Es7uoH021xCgW2dc18b69CQ3FI6H5vyV6rlBdz1ppjufMnWk\nGzdi6kg3051PmUF3Pdluu+HD2+fwfY/09M77DFfOvzfvO/eJJ83D9z9ljtqtz+HxjBzDPw01k+7J\n1Zx/h4de+SsVA34y0sbu+x/jkWxHawnongWruT2ppnUNgPXrrZYH/vjLgoSrO2iglciWLOn+G0Dt\nwQ95Zuzj/HasVQq7PtMG9haMJHSq7ol3eoevUjHg/2x7hobDMyjPr2LcqBzqrz/oXRnsLUqabwK6\n7v8TKAtSVNT/JY+B0vFut5WiAv9jKli1hsmfnOTIGFh6Xx1vvJ/HgZt2gngo+uhi65Jbxd5HvNMr\nf6ViQUMDG3dUkrHvduqmbmfI6TGtK4P57gHoqgXClbiTNdS8fVfp+K6a2M1ptvFH1z7mvzP1civs\nAU3M//MtPFq/Pdjhxzy98lcqFjgclEoK9dcfhKYUzqXvI2NPDsXeoPedeUvg/lPwSkWnlw4f3ndX\n9+vXW8HZ1xLZV0num1iG7t9r1SprW3+dOqHzN4PUnxeQcyKVja+9BM6pLF24FxK+tf5jywDyvjgX\n+gHFML3yVyoG+EpA51VOBvtFMFA3dTuF2S4Ks128PeNlsg6ndnqdCDz9dN+MoW0lEXReRD6Y1tNJ\nSYH/W8dvBjknUimf+DKF2S7rF4lfg82D/UwauAex9L46nrohp2dvHEc0+CsVA7YkeZi63VoEfu2r\nE5i/2QqE5d/bRnl+FfM357J7R+e8izF9d9UfqK9+W4Hy+b4UkQgsWhR4Ytpfs7WNG9Yzf3Mu5flV\nLP3REbC5STo+Cc/AZisFJPDinf5bYcczDf5KxYCKlcvZQybTNqzgH+r3snFHJUMbplhXwI2j2bij\nEjvuTq9LTw/9PTvm9LuaU/Dxl8/v7huDT8BqI7ebjTsqGXjiekhuxH42jabnDlCyIY1NrlqmvHMv\nAxNuDuLI4oMGf6ViRPqx5eyuX4YbO4XZLs45PmLokSm4Uz6jMNuFG3u77XvTstjfzWLdCfR+PfnG\n0OW6uXbreL8d+QkDv5iA+5rjFGa7+If6vUzbsIJ99kwa32q/Fm+0NKy7onpyM0A4HnqTl1LB8d0c\nlZW92PCYmPnZLmPAzM92GR4TM/uexe0av/3sZ6E3gktP93+zWKBHV/v39qrr9vWBzF/o/3izshe3\nvl6k8+cUzQ3rukIPb/IKe5AP9NDgr1TwysqMsS2aa7KyF5tL2I0HjLHbzfyFi82IR+a22y7UAFhW\n1vOgP3z45dcEOtF0dyLpblwjHplr5i9cbIzdOt5L2K3Af/9cvyePQO/X1QkmmmjwV0q1mvvEk1ZL\nA28EPmq32j8w68mgAqC/k0ZXD5HuTzQ/+1nnq3/fc9+JouP4TXq6WbvyKTP3iSe7HV/Hk0dXY40F\nPQ3+mvNXKgZ1zGmPr7Wx1DxG4SgHGMNvx1rtH7LcNTDr8hKQ3TW77El+vi2HI3C7hpUrrXGuW9dh\nknfWGn40p4S5d83lf8o0ipYkkPjmm63jL8hxWW2aL65mTnP7ENZdS4r1663fBxprPNF+/krFGN9k\nbNuAe0Sc/P1tDsrzKxn26c2cHXXE2/6hlszKu6m2Z8K25djtVhcEh8OanO04wWqzBa7GSUyES5cu\nP/etJ7Bokf/XiHgXX59VQNbhVL4YV8tX13zJhLox1E77kMTGkVxKOQEXk2DQOTI/vI2aW7aRse92\n6q8/GFJf/kBVSSLw8suxsXqX9vNXKk75u9JOM1b7h2Gf3syZ6z/E9m0ym1y1zKucTI3rrdZvAG73\n5eqdH/+4cxVMoKvj9HR48UX/V9ydXjNrDUPuWUjSf8+iKecWJjeepzr/Zc6M+IxLI+qovaUKEpu4\nNOw4JDbB4C9JOjGe2sz9re0rOvbl7+0aBaYP73eIFhr8lYox/gJcA1Zf+7OjjmA/Mxb3sKPYvk1q\ndwKYfrx9OLh4ER55pP1+Vq2yrujbSk6GggLrpNPQ0Plbw6pVYHOtYbqzhDqclBx/hfNZf6Dpuo85\ndcMuaqftBHcCnms+a79j+0UQsJ9No3nMAZyfTKT++oOd+vL3xRoFvbnfIVpp2kepGOMvtTHdWcKe\nBatbUz1GWiD5K+TctWB3k3xmJOmfD8Ntg3FnbZRv28b07CKqx53CrG/fD8jXv8cX6AsKrLx9u28b\nRQUkpHxCwQejuWSHBLdh0+wPsDVdzbBToxnZmETNLVXgsYOtzc1nAlxMhgHenV0YAYO/ZNin0zkz\nfjfzN+eycUclpc6p1opmA1bwr2XLerzcpL+UWG+Xu4w0PU37aGM3pWKMv8Zoe8d6uHP/3WxyvcW8\nysmU37kXPGCGniTp+CScnw+zgnHLQB5aP4kfZudQnf8Sjp354HRSkOZkTuMlnhs/ADwtfD3rG+QH\nn9F0YjQvXWri0t80Y0v4Fk9yI7bmq0n+ahgXRhyiPP8QfD0ckhrB5sYzoImvm66mZsKHVkC/fnf7\nwTdfDYO+At816eDTjDwwkxPja8ncOYtNrlpKv5hq5fq9ffm7W26y48lqyRKoqPD/LSWe9OrKX0QW\nAL8AbgRuNcb4vVQXke8DTwN24HljzOru9q1X/kqFrmPAW7UK1tevIbG6hk1j25wAEr+2rrzdCWDs\nkHCRoQ03cc7xEZk7Z1GbuZ+SDWmA1SMfewu4ExhZfwMnJnn/fboTwd5mptcjYDOXf7ZhP5uGe9gx\nrjk0nbNpn0JCk7VPPxJPjuPStYcBmLwzlzFNV9F86i6qUjykH1veGrS7Wmje34kw1q70O+rplX9v\ng/+NgAf4d2CZv+AvInbgY+C7wDFgJ/DXxpjarvatwV+pvlew6vIJoGRDGm9kpFh974GMvTl8mXKB\nc+n7GHpkCl+9uK81vZK7K4vK23aDgYyDWdRN/S+SPptI8+gDHd5BuHzZ3uZXgFwYgbnqNAO/mMC3\nIw9ZJxz7JeskYRJAWqyThTuRyR/exsmUZm4/PJg/zDhOkvsG3C9V+A3iEDjA+9pLd+QvJRQr+iXt\nY4zZ732zrja7FThkjDns3fY1oBDoMvgrpfpexcrlFKxaQ8mrh4Cvqbx1t1VKKW7qst4Hm5uhR6Zw\nzvERhdkuNu6o5I1deVTmbcW1NQ+Ayryt1jbp+7A1jsaT4p2o/WYIDDrf+U295wIz+DRJxyfRnNrA\n4KOT+XrMAQYfzeSXf7HC0PNZQzg95FsaB7tJdM7j1L9d7sfjdMKRAPcK+IK4vwnnRYv8fw7d3c8Q\nD/oj5z8GONrm+THgNn8bishDwEMAjni740KpflKxcjmlzTaWtjwOAmtfuYEXJg9pzfk/9raw9Tqr\nRXLWsFxqM6txbc27fOW/J8e68j8+ybry913oDzwPxs+VvzsRxAM2N82jD5C5M5fazP1k/amIansm\ny+q9Qb7e+mG3wz4PON+6HMS7y+sHWm7S4fB/5a/hpQelniKyRUSq/TwK+3owxphnjTEzjTEzU1M7\nLzyhlOobW5I8TDo7l7WbrHr5EynNZO5yMWnvLWwZczUbd24ns+Z71Nz8HiUb0ri3rtGK6fYW6iZW\nM/LAjMspH3dimz17c/1tiQH3QCbvzCXxdAafp1xk2oYVrTeWddT2XgNfyWagYN1dEA9Umnollq2M\nNt1e+Rtj5vTyPY4DbVdSSPP+TikVJhUr2wfdU362caxaw4+b51BsnqEgzcnaN7Naq31OD/0G21ej\nGX5iNE2Dm7g4sBl3m2qfSR+P58ioMyQ3JfP14GbsX2RR+9XN8G/LOQOcgdYr/a74UjuBJm67C+K+\nbwOB7kGIZ31S5y8i7xB4wjcBa8L3LqygvxO43xhT09U+dcJXqcjStoKot2Gj7fq+PWGM/womDeKd\n9Ut7BxH5gYgcA24H3hKRzd7fjxaRCgBjTAvwMLAZ2A/8trvAr5S6soJdzKTjXbS9YbcHtw+7dw2a\noiJrctfj6WJhF9VjeoevUnEmlLtce7pMY3eSk4PrCuoToWEqImljN6WUX121WA6kL0ojhw+/3G7Z\nH7vd/+/jse9Of9Dgr1Sc6a5s0p+uGqKVlXWuqBkwwAr2vg6fZWVw+rT1zSJQBc5DD2llTn/S4K9U\nnAmlbLKrkkl/C6i88IIV7P3l59tuD9YVf1OT1W9nyZLAC7GovqXBX6k44y+QA1y4EHjit7sVsoKd\njC0qsrqBilh1/WDNKaxbZ41PJ3WvPA3+SsUZXyAfPrz977/8MnAffN/rugvwPa0iWr8efv3rzhO5\n3c09qL6j1T5KxamuumGG0vQsmCqirqqHRKwTjAqNVvsopboUysRvV4KpIgplcln1LQ3+SsWpUPvl\nBBIooB850jkFFOg9RLS6p79o8FcqTvV107OuThod19X1994i8NOf6iRvf9Hgr1Sc6q6CJ1iBqoh8\n2qaA/L33yy/Dr34V2nur4OmEr1IqZP4Wc6+o0MnccNIJX6XUFdWx2VvbOv1ALRl0MjdyaPBXSoWk\nq+oeXUQl8mnwV0qFpKtS0b6eT1B9rz/W8FVKxaDu1scNtK6uigx65a+UCommdqKbBn+lVEg0tRPd\nNO2jlAqZpnail175K6VUHNLgr5RScUiDv1JKxSEN/kopFYc0+CulVByK2MZuInIKCNAeqpMRwOkr\nOJz+EgvHoccQGWLhGCA2jqO/jyHdGJPa3UYRG/yDISK7etLFLtLFwnHoMUSGWDgGiI3jiNRj0LSP\nUkrFIQ3+SikVh2Il+D8b7gH0kVg4Dj2GyBALxwCxcRwReQwxkfNXSikVnFi58ldKKRUEDf5KKRWH\noj74i8j3ReSgiBwSkRXhHk+wROQFETkpItXhHkuoRGSsiLwtIrUiUiMij4R7TKEQkUEi8r6I7PUe\nxy/DPaZQiYhdRHaLyJvhHksoRKReRD4SkT0isivc4wmViKSIyH+KyAER2S8it4d7TD5RnfMXETvw\nMfBd4BiwE/hrY0xtWAcWBBG5A7gAvGSMyQr3eEIhIqOAUcaYD0VkCPABcG80/T0AiIgAg40xF0Qk\nEagCHjHG7Ajz0IImIsXATGCoMeaecI8nWCJSD8w0xkT1DV4isg6oNMY8LyIDgGRjTGO4xwXRf+V/\nK3DIGHPYGHMReA0oDPOYgmKMeRc4E+5x9IYx5nNjzIfeP58H9gNjwjuq4BnLBe/TRO8j6q6ORCQN\nuBt4PtxjiWcicjVwB/AbAGPMxUgJ/BD9wX8McLTN82NEYdCJJSLiBKYD74V3JKHxpkv2ACeBPxlj\novE4/gVYDnjCPZBeMMD/E5EPROShcA8mRBnAKeBFbwrueREZHO5B+UR78FcRRESuAl4Hfm6MORfu\n8YTCGOM2xkwD0oBbRSSqUnEicg9w0hjzQbjH0ku5xpibgbnA33nTo9EmAbgZ+L/GmOnA10DEzEtG\ne/A/Doxt8zzN+zvVz7w58teB9caY34V7PL3l/Xr+NvD9cI8lSLOA+d6c+WvAd0SkLLxDCp4x5rj3\n50ng91gp3mhzDDjW5tvjf2KdDCJCtAf/ncAEEcnwTqYsBMrDPKa4450o/Q2w3xhTGu7xhEpEUkUk\nxfvnJKxCggPhaVF4kAAAANlJREFUHVVwjDH/yxiTZoxxYv17+Isx5oEwDysoIjLYWziAN03yPSDq\nquGMMV8AR0VkovdXdwERUwQR1Qu4G2NaRORhYDNgB14wxtSEeVhBEZFXgTuBESJyDHjMGPOb8I4q\naLOARcBH3nw5wD8bYyrCOKZQjALWeavIbMBvjTFRWSoZ5UYCv7euKUgAXjHG/DG8QwrZ3wPrvRen\nh4G/CfN4WkV1qadSSqnQRHvaRymlVAg0+CulVBzS4K+UUnFIg79SSsUhDf5KKRWHNPgrpVQc0uCv\nlFJx6P8D4Obclx42P3sAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jWxvLGexKv0D", + "colab_type": "text" + }, + "source": [ + "We can see from the graph that the predictions for the original model, the converted model, and the quantized model are all close enough to be indistinguishable. This means that our quantized model is ready to use!\n", + "\n", + "We can print the difference in file size:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6r42iBnULP4X", + "colab_type": "code", + "outputId": "afe526c9-498d-498e-d768-1edfbf21e870", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + } + }, + "source": [ + "import os\n", + "basic_model_size = os.path.getsize(\"sine_model.tflite\")\n", + "print(\"Basic model is %d bytes\" % basic_model_size)\n", + "quantized_model_size = os.path.getsize(\"sine_model_quantized.tflite\")\n", + "print(\"Quantized model is %d bytes\" % quantized_model_size)\n", + "difference = basic_model_size - quantized_model_size\n", + "print(\"Difference is %d bytes\" % difference)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Basic model is 2656 bytes\n", + "Quantized model is 2640 bytes\n", + "Difference is 16 bytes\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C2vpZE9ZshVH", + "colab_type": "text" + }, + "source": [ + "Our quantized model is only 16 bytes smaller than the original version, which only a tiny reduction in size! At around 2.6 kilobytes, this model is already so small that the weights make up only a small fraction of the overall size, meaning quantization has little effect.\n", + "\n", + "More complex models have many more weights, meaning the space saving from quantization will be much higher, approaching 4x for most sophisticated models.\n", + "\n", + "Regardless, our quantized model will take less time to execute than the original version, which is important on a tiny microcontroller!\n", + "\n", + "## Write to a C file\n", + "The final step in preparing our model for use with TensorFlow Lite for Microcontrollers is to convert it into a C source file. You can see an example of this format in [`hello_world/sine_model_data.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.cc).\n", + "\n", + "To do so, we can use a command line utility named [`xxd`](https://linux.die.net/man/1/xxd). The following cell runs `xxd` on our quantized model and prints the output:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "l4-WhtGpvb-E", + "colab_type": "code", + "outputId": "f975721f-bdd1-440a-93af-55f13c4c8690", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 3808 + } + }, + "source": [ + "# Install xxd if it is not available\n", + "!apt-get -qq install xxd\n", + "# Save the file as a C source file\n", + "!xxd -i sine_model_quantized.tflite > sine_model_quantized.cc\n", + "# Print the source file\n", + "!cat sine_model_quantized.cc" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "unsigned char sine_model_quantized_tflite[] = {\n", + " 0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,\n", + " 0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,\n", + " 0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x0a, 0x00, 0x00,\n", + " 0xb8, 0x05, 0x00, 0x00, 0xa0, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", + " 0x0b, 0x00, 0x00, 0x00, 0x90, 0x05, 0x00, 0x00, 0x7c, 0x05, 0x00, 0x00,\n", + " 0x24, 0x05, 0x00, 0x00, 0xd4, 0x04, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00,\n", + " 0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n", + " 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", + " 0x54, 0xf6, 0xff, 0xff, 0x58, 0xf6, 0xff, 0xff, 0x5c, 0xf6, 0xff, 0xff,\n", + " 0x60, 0xf6, 0xff, 0xff, 0xc2, 0xfa, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n", + " 0x40, 0x00, 0x00, 0x00, 0x7c, 0x19, 0xa7, 0x3e, 0x99, 0x81, 0xb9, 0x3e,\n", + " 0x56, 0x8b, 0x9f, 0x3e, 0x88, 0xd8, 0x12, 0xbf, 0x74, 0x10, 0x56, 0x3e,\n", + " 0xfe, 0xc6, 0xdf, 0xbe, 0xf2, 0x10, 0x5a, 0xbe, 0xf0, 0xe2, 0x0a, 0xbe,\n", + " 0x10, 0x5a, 0x98, 0xbe, 0xb9, 0x36, 0xce, 0x3d, 0x8f, 0x7f, 0x87, 0x3e,\n", + " 0x2c, 0xb1, 0xfd, 0xbd, 0xe6, 0xa6, 0x8a, 0xbe, 0xa5, 0x3e, 0xda, 0x3e,\n", + " 0x50, 0x34, 0xed, 0xbd, 0x90, 0x91, 0x69, 0xbe, 0x0e, 0xfb, 0xff, 0xff,\n", + " 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x67, 0x41, 0x48, 0xbf,\n", + " 0x24, 0xcd, 0xa0, 0xbe, 0xb7, 0x92, 0x0c, 0xbf, 0x00, 0x00, 0x00, 0x00,\n", + " 0x98, 0xfe, 0x3c, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", + " 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x17, 0x9a, 0xbe,\n", + " 0x41, 0xcb, 0xb6, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", + " 0x13, 0xd6, 0x1e, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n", + " 0x5a, 0xfb, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,\n", + " 0x4b, 0x98, 0xdd, 0xbd, 0x40, 0x6b, 0xcb, 0xbe, 0x36, 0x0c, 0xd4, 0x3c,\n", + " 0xbd, 0x44, 0xb5, 0x3e, 0x95, 0x70, 0xe3, 0x3e, 0xe7, 0xac, 0x86, 0x3e,\n", + " 0x00, 0xc4, 0x4e, 0x3d, 0x7e, 0xa6, 0x1d, 0x3e, 0xbd, 0x87, 0xbb, 0x3e,\n", + " 0xb4, 0xb8, 0x09, 0xbf, 0xa1, 0x1f, 0xf8, 0xbe, 0x8d, 0x90, 0xdd, 0x3e,\n", + " 0xde, 0xfa, 0x6f, 0xbe, 0xb2, 0x75, 0xe4, 0x3d, 0x6e, 0xfe, 0x36, 0x3e,\n", + " 0x20, 0x18, 0xc2, 0xbe, 0x39, 0xc7, 0xfb, 0xbe, 0xfe, 0xa4, 0x30, 0xbe,\n", + " 0xf7, 0x91, 0xde, 0xbe, 0xde, 0xab, 0x24, 0x3e, 0xfb, 0xbb, 0xce, 0x3e,\n", + " 0xeb, 0x23, 0x80, 0xbe, 0x7b, 0x58, 0x73, 0xbe, 0x9a, 0x2e, 0x03, 0x3e,\n", + " 0x10, 0x42, 0xa9, 0xbc, 0x10, 0x12, 0x64, 0xbd, 0xe3, 0x8d, 0x0c, 0x3d,\n", + " 0x9e, 0x48, 0x97, 0xbe, 0x34, 0x51, 0xd4, 0xbe, 0x02, 0x3b, 0x0d, 0x3e,\n", + " 0x62, 0x67, 0x89, 0xbe, 0x74, 0xdf, 0xa2, 0x3d, 0xf3, 0x25, 0xb3, 0xbe,\n", + " 0xef, 0x34, 0x7b, 0x3d, 0x61, 0x70, 0xe3, 0x3d, 0xba, 0x76, 0xc0, 0xbe,\n", + " 0x7d, 0xe9, 0xa7, 0x3e, 0xc3, 0xab, 0xd0, 0xbe, 0xcf, 0x7c, 0xdb, 0xbe,\n", + " 0x70, 0x27, 0x9a, 0xbe, 0x98, 0xf5, 0x3c, 0xbd, 0xff, 0x4b, 0x4b, 0x3e,\n", + " 0x7e, 0xa0, 0xf8, 0xbd, 0xd4, 0x6e, 0x86, 0x3d, 0x00, 0x4a, 0x07, 0x3a,\n", + " 0x4c, 0x24, 0x61, 0xbe, 0x54, 0x68, 0xf7, 0xbd, 0x02, 0x3f, 0x77, 0xbe,\n", + " 0x23, 0x79, 0xb3, 0x3e, 0x1c, 0x83, 0xad, 0xbd, 0xc8, 0x92, 0x8d, 0x3e,\n", + " 0xa8, 0xf3, 0x15, 0xbd, 0xe6, 0x4d, 0x6c, 0x3d, 0xac, 0xe7, 0x98, 0xbe,\n", + " 0x81, 0xec, 0xbd, 0x3e, 0xe2, 0x55, 0x73, 0x3e, 0xc1, 0x77, 0xc7, 0x3e,\n", + " 0x6e, 0x1b, 0x5e, 0x3d, 0x27, 0x78, 0x02, 0x3f, 0xd4, 0x21, 0x90, 0x3d,\n", + " 0x52, 0xdc, 0x1f, 0x3e, 0xbf, 0xda, 0x88, 0x3e, 0x80, 0x79, 0xe3, 0xbd,\n", + " 0x40, 0x6f, 0x10, 0xbe, 0x20, 0x43, 0x2e, 0xbd, 0xf0, 0x76, 0xc5, 0xbd,\n", + " 0xcc, 0xa0, 0x04, 0xbe, 0xf0, 0x69, 0xd7, 0xbe, 0xb1, 0xfe, 0x64, 0xbe,\n", + " 0x20, 0x41, 0x84, 0xbe, 0xb2, 0xc3, 0x26, 0xbe, 0xd8, 0xf4, 0x09, 0xbe,\n", + " 0x64, 0x44, 0xd1, 0x3d, 0xd5, 0xe1, 0xc8, 0xbe, 0x35, 0xbc, 0x3f, 0xbe,\n", + " 0xc0, 0x94, 0x82, 0x3d, 0xdc, 0x2b, 0xb1, 0xbd, 0x02, 0xdb, 0xbf, 0xbe,\n", + " 0xa5, 0x7f, 0x8a, 0x3e, 0x21, 0xb4, 0xa2, 0x3e, 0xcd, 0x86, 0x56, 0xbf,\n", + " 0x9c, 0x3b, 0x76, 0xbc, 0x85, 0x6d, 0x60, 0xbf, 0x86, 0x00, 0x3c, 0xbe,\n", + " 0xc1, 0x23, 0x7e, 0x3e, 0x96, 0xcd, 0x3f, 0x3e, 0x86, 0x91, 0x2d, 0x3e,\n", + " 0x55, 0xef, 0x87, 0x3e, 0x7e, 0x97, 0x03, 0xbe, 0x2a, 0xcd, 0x01, 0x3e,\n", + " 0x32, 0xc9, 0x8e, 0xbe, 0x72, 0x77, 0x3b, 0xbe, 0xe0, 0xa1, 0xbc, 0xbe,\n", + " 0x8d, 0xb7, 0xa7, 0x3e, 0x1c, 0x05, 0x95, 0xbe, 0xf7, 0x1f, 0xbb, 0x3e,\n", + " 0xc9, 0x3e, 0xd6, 0x3e, 0x80, 0x42, 0xe9, 0xbd, 0x27, 0x0c, 0xd2, 0xbe,\n", + " 0x5c, 0x32, 0x34, 0xbe, 0x14, 0xcb, 0xca, 0xbd, 0xdd, 0x3a, 0x67, 0xbe,\n", + " 0x1c, 0xbb, 0x8d, 0xbe, 0x91, 0xac, 0x5c, 0xbe, 0x52, 0x40, 0x6f, 0xbe,\n", + " 0xd7, 0x71, 0x94, 0x3e, 0x18, 0x71, 0x09, 0xbe, 0x9b, 0x29, 0xd9, 0xbe,\n", + " 0x7d, 0x66, 0xd2, 0xbe, 0x98, 0xd6, 0xb2, 0xbe, 0x00, 0xc9, 0x84, 0x3a,\n", + " 0xbc, 0xda, 0xc2, 0xbd, 0x1d, 0xc2, 0x1b, 0xbf, 0xd4, 0xdd, 0x92, 0x3e,\n", + " 0x07, 0x87, 0x6c, 0xbe, 0x40, 0xc2, 0x3b, 0xbe, 0xbd, 0xe2, 0x9c, 0x3e,\n", + " 0x0a, 0xb5, 0xa0, 0xbe, 0xe2, 0xd5, 0x9c, 0xbe, 0x3e, 0xbb, 0x7c, 0x3e,\n", + " 0x17, 0xb4, 0xcf, 0x3e, 0xd5, 0x8e, 0xc8, 0xbe, 0x7c, 0xf9, 0x5c, 0x3e,\n", + " 0x80, 0xfc, 0x0d, 0x3d, 0xc5, 0xd5, 0x8b, 0x3e, 0xf5, 0x17, 0xa2, 0x3e,\n", + " 0xc7, 0x60, 0x89, 0xbe, 0xec, 0x95, 0x87, 0x3d, 0x7a, 0xc2, 0x5d, 0xbf,\n", + " 0x77, 0x94, 0x98, 0x3e, 0x77, 0x39, 0x07, 0xbc, 0x42, 0x29, 0x00, 0x3e,\n", + " 0xaf, 0xd0, 0xa9, 0x3e, 0x31, 0x23, 0xc4, 0xbe, 0x95, 0x36, 0x5b, 0xbe,\n", + " 0xc7, 0xdc, 0x83, 0xbe, 0x1e, 0x6b, 0x47, 0x3e, 0x5b, 0x24, 0x99, 0x3e,\n", + " 0x99, 0x27, 0x54, 0x3e, 0xc8, 0x20, 0xdd, 0xbd, 0x5a, 0x86, 0x2f, 0x3e,\n", + " 0x80, 0xf0, 0x69, 0xbe, 0x44, 0xfc, 0x84, 0xbd, 0x82, 0xa0, 0x2a, 0xbe,\n", + " 0x87, 0xe6, 0x2a, 0x3e, 0xd8, 0x34, 0xae, 0x3d, 0x50, 0xbd, 0xb5, 0x3e,\n", + " 0xc4, 0x8c, 0x88, 0xbe, 0xe3, 0xbc, 0xa5, 0x3e, 0xa9, 0xda, 0x9e, 0x3e,\n", + " 0x3e, 0xb8, 0x23, 0xbe, 0x80, 0x90, 0x15, 0x3d, 0x97, 0x3f, 0xc3, 0x3e,\n", + " 0xca, 0x5c, 0x9d, 0x3e, 0x21, 0xe8, 0xe1, 0x3e, 0xc0, 0x49, 0x01, 0xbc,\n", + " 0x00, 0x0b, 0x88, 0xbd, 0x3f, 0xf7, 0xca, 0x3c, 0xfb, 0x5a, 0xb1, 0x3e,\n", + " 0x60, 0xd2, 0x0d, 0x3c, 0xce, 0x23, 0x78, 0xbf, 0x8f, 0x4f, 0xb9, 0xbe,\n", + " 0x69, 0x6a, 0x34, 0xbf, 0x4b, 0x5e, 0xa9, 0x3e, 0x64, 0x8c, 0xd9, 0x3e,\n", + " 0x52, 0x77, 0x36, 0x3e, 0xeb, 0xaf, 0xbe, 0x3e, 0x40, 0xbe, 0x36, 0x3c,\n", + " 0x08, 0x65, 0x3b, 0xbd, 0x55, 0xe0, 0x66, 0xbd, 0xd2, 0xe8, 0x9b, 0xbe,\n", + " 0x86, 0xe3, 0x09, 0xbe, 0x93, 0x3d, 0xdd, 0x3e, 0x0f, 0x66, 0x18, 0x3f,\n", + " 0x18, 0x05, 0x33, 0xbd, 0xde, 0x15, 0xd7, 0xbe, 0xaa, 0xcf, 0x49, 0xbe,\n", + " 0xa2, 0xa5, 0x64, 0x3e, 0xe6, 0x9c, 0x42, 0xbe, 0x54, 0x42, 0xcc, 0x3d,\n", + " 0xa0, 0xbd, 0x9d, 0xbe, 0xc2, 0x69, 0x48, 0x3e, 0x5b, 0x8b, 0xa2, 0xbe,\n", + " 0xc0, 0x13, 0x87, 0x3d, 0x36, 0xfd, 0x69, 0x3e, 0x05, 0x86, 0x40, 0xbe,\n", + " 0x1e, 0x7a, 0xce, 0xbe, 0x46, 0x13, 0xa7, 0xbe, 0x68, 0x52, 0x86, 0xbe,\n", + " 0x04, 0x9e, 0x86, 0xbd, 0x8c, 0x54, 0xc1, 0x3d, 0xe0, 0x3b, 0xad, 0x3c,\n", + " 0x42, 0x67, 0x85, 0xbd, 0xea, 0x97, 0x42, 0x3e, 0x6e, 0x13, 0x3b, 0xbf,\n", + " 0x56, 0x5b, 0x16, 0x3e, 0xaa, 0xab, 0xdf, 0x3e, 0xc8, 0x41, 0x36, 0x3d,\n", + " 0x24, 0x2d, 0x47, 0xbe, 0x77, 0xa5, 0xae, 0x3e, 0xc0, 0xc2, 0x5b, 0x3c,\n", + " 0xac, 0xac, 0x4e, 0x3e, 0x99, 0xec, 0x13, 0xbe, 0xf2, 0xab, 0x73, 0x3e,\n", + " 0xaa, 0xa1, 0x48, 0xbe, 0xe8, 0xd3, 0x01, 0xbe, 0x60, 0xb7, 0xc7, 0xbd,\n", + " 0x64, 0x72, 0xd3, 0x3d, 0x83, 0xd3, 0x99, 0x3e, 0x0c, 0x76, 0x34, 0xbe,\n", + " 0x42, 0xda, 0x0d, 0x3e, 0xfb, 0x47, 0x9a, 0x3e, 0x8b, 0xdc, 0x92, 0xbe,\n", + " 0x56, 0x7f, 0x6b, 0x3e, 0x04, 0xd4, 0x88, 0xbd, 0x11, 0x9e, 0x80, 0x3e,\n", + " 0x3c, 0x89, 0xff, 0x3d, 0xb3, 0x3e, 0x88, 0x3e, 0xf7, 0xf0, 0x88, 0x3e,\n", + " 0x28, 0xfb, 0xc9, 0xbe, 0x53, 0x3e, 0xcf, 0x3e, 0xac, 0x75, 0xdc, 0xbe,\n", + " 0xdd, 0xca, 0xd7, 0x3e, 0x01, 0x58, 0xa7, 0x3e, 0x29, 0xb8, 0x13, 0xbf,\n", + " 0x76, 0x81, 0x12, 0xbc, 0x28, 0x8b, 0x16, 0xbf, 0x0e, 0xec, 0x0e, 0x3e,\n", + " 0x40, 0x0a, 0xdb, 0xbd, 0x98, 0xec, 0xbf, 0xbd, 0x32, 0x55, 0x0c, 0xbe,\n", + " 0xfb, 0xf9, 0xc9, 0x3e, 0x83, 0x4a, 0x6d, 0xbe, 0x76, 0x59, 0xe2, 0xbe,\n", + " 0x54, 0x7d, 0x9f, 0xbb, 0x9d, 0xe8, 0x95, 0x3e, 0x5c, 0xd3, 0xd0, 0x3d,\n", + " 0x19, 0x8a, 0xb0, 0x3e, 0xde, 0x6f, 0x2e, 0xbe, 0xd0, 0x16, 0x83, 0x3d,\n", + " 0x9c, 0x7d, 0x11, 0xbf, 0x2b, 0xcc, 0x25, 0x3c, 0x2a, 0xa5, 0x27, 0xbe,\n", + " 0x22, 0x14, 0xc7, 0xbe, 0x5e, 0x7a, 0xac, 0x3e, 0x4e, 0x41, 0x94, 0xbe,\n", + " 0x5a, 0x68, 0x7b, 0x3e, 0x86, 0xfd, 0x4e, 0x3e, 0xa2, 0x56, 0x6a, 0xbe,\n", + " 0xca, 0xfe, 0x81, 0xbe, 0x43, 0xc3, 0xb1, 0xbd, 0xc5, 0xb8, 0xa7, 0x3e,\n", + " 0x55, 0x23, 0xcd, 0x3e, 0xaf, 0x2e, 0x76, 0x3e, 0x69, 0xa8, 0x90, 0xbe,\n", + " 0x0d, 0xba, 0xb9, 0x3e, 0x66, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n", + " 0x40, 0x00, 0x00, 0x00, 0x53, 0xd6, 0xe2, 0x3d, 0x66, 0xb6, 0xcc, 0x3e,\n", + " 0x03, 0xe7, 0xf6, 0x3e, 0xe0, 0x28, 0x10, 0xbf, 0x00, 0x00, 0x00, 0x00,\n", + " 0x3e, 0x3d, 0xb0, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x62, 0xf0, 0x77, 0x3e,\n", + " 0xa6, 0x9d, 0xa4, 0x3e, 0x3a, 0x4b, 0xf3, 0xbe, 0x71, 0x9e, 0xa7, 0x3e,\n", + " 0x00, 0x00, 0x00, 0x00, 0x34, 0x39, 0xa2, 0x3e, 0x00, 0x00, 0x00, 0x00,\n", + " 0xcc, 0x9c, 0x4a, 0x3e, 0xab, 0x40, 0xa3, 0x3e, 0xb2, 0xff, 0xff, 0xff,\n", + " 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xb3, 0x71, 0x67, 0x3f,\n", + " 0x9a, 0x7a, 0x95, 0xbf, 0xe1, 0x48, 0xe8, 0xbe, 0x8a, 0x72, 0x96, 0x3e,\n", + " 0x00, 0xd2, 0xd3, 0xbb, 0x1a, 0xc5, 0xd7, 0x3f, 0xac, 0x7e, 0xc8, 0xbe,\n", + " 0x90, 0xa7, 0x95, 0xbe, 0x3b, 0xd7, 0xdc, 0xbe, 0x41, 0xa8, 0x16, 0x3f,\n", + " 0x50, 0x5b, 0xcb, 0x3f, 0x52, 0xb9, 0xed, 0xbe, 0x2e, 0xa7, 0xc6, 0xbe,\n", + " 0xaf, 0x0f, 0x14, 0xbf, 0xb3, 0xda, 0x59, 0x3f, 0x02, 0xec, 0xd7, 0xbe,\n", + " 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x66, 0x11, 0x1f, 0xbf,\n", + " 0xb8, 0xfb, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f,\n", + " 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00,\n", + " 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,\n", + " 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,\n", + " 0xf0, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,\n", + " 0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xce, 0xff, 0xff, 0xff,\n", + " 0x00, 0x00, 0x00, 0x08, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x1c, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,\n", + " 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,\n", + " 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,\n", + " 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00,\n", + " 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00,\n", + " 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff,\n", + " 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,\n", + " 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,\n", + " 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,\n", + " 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,\n", + " 0x00, 0x00, 0x00, 0x08, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,\n", + " 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00,\n", + " 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0x0a, 0x00, 0x00, 0x00, 0x10, 0x03, 0x00, 0x00, 0xa4, 0x02, 0x00, 0x00,\n", + " 0x40, 0x02, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0xac, 0x01, 0x00, 0x00,\n", + " 0x48, 0x01, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00,\n", + " 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x26, 0xfd, 0xff, 0xff,\n", + " 0x3c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x18, 0xfd, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,\n", + " 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,\n", + " 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74,\n", + " 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00,\n", + " 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff,\n", + " 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x60, 0xfd, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00,\n", + " 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,\n", + " 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74,\n", + " 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69,\n", + " 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73,\n", + " 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n", + " 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xce, 0xfd, 0xff, 0xff,\n", + " 0x34, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0xc0, 0xfd, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00,\n", + " 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,\n", + " 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x52, 0x65, 0x6c,\n", + " 0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0x10, 0x00, 0x00, 0x00, 0x12, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00,\n", + " 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", + " 0x04, 0xfe, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n", + " 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n", + " 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,\n", + " 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0x10, 0x00, 0x00, 0x00, 0x5a, 0xfe, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", + " 0x4c, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n", + " 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n", + " 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f,\n", + " 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65,\n", + " 0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65,\n", + " 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", + " 0x10, 0x00, 0x00, 0x00, 0xba, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00,\n", + " 0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n", + " 0xac, 0xfe, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n", + " 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n", + " 0x73, 0x65, 0x5f, 0x32, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00,\n", + " 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", + " 0xfe, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,\n", + " 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf0, 0xfe, 0xff, 0xff,\n", + " 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n", + " 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,\n", + " 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73,\n", + " 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", + " 0x46, 0xff, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,\n", + " 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff,\n", + " 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n", + " 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,\n", + " 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,\n", + " 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,\n", + " 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,\n", + " 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0xa6, 0xff, 0xff, 0xff, 0x48, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n", + " 0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,\n", + " 0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", + " 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,\n", + " 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,\n", + " 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f, 0x69, 0x6e, 0x70, 0x75,\n", + " 0x74, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x04, 0x00,\n", + " 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,\n", + " 0x28, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", + " 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,\n", + " 0x08, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79,\n", + " 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n", + " 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n", + " 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00,\n", + " 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x03, 0x00, 0x00, 0x00\n", + "};\n", + "unsigned int sine_model_quantized_tflite_len = 2640;\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1sqrhBLXwILt", + "colab_type": "text" + }, + "source": [ + "We can either copy and paste this output into our project's source code, or download the file using the collapsible menu on the left hand side of this Colab.\n", + "\n" + ] + } + ] +} \ No newline at end of file diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/Makefile.inc b/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/Makefile.inc new file mode 100644 index 00000000000..879042f65d6 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/Makefile.inc @@ -0,0 +1,6 @@ +# Settings for the Discovery STM32F746NG board. +ifneq ($(filter disco_f746ng,$(ALL_TAGS)),) + MBED_PROJECT_FILES += \ + BSP_DISCO_F746NG.lib \ + LCD_DISCO_F746NG.lib +endif diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/constants.cc b/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/constants.cc new file mode 100644 index 00000000000..09d464bbfdd --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/constants.cc @@ -0,0 +1,19 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/constants.h" + +// A larger number than the default to make the animation smoother +const int kInferencesPerCycle = 70; diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/output_handler.cc b/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/output_handler.cc new file mode 100644 index 00000000000..cbfe75a7ab6 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/disco_f746ng/output_handler.cc @@ -0,0 +1,80 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h" + +#include "LCD_DISCO_F746NG.h" +#include "tensorflow/lite/experimental/micro/examples/hello_world/constants.h" + +// The LCD driver +LCD_DISCO_F746NG lcd; + +// The colors we'll draw +const uint32_t background_color = 0xFFF4B400; // Yellow +const uint32_t foreground_color = 0xFFDB4437; // Red +// The size of the dot we'll draw +const int dot_radius = 10; +// Track whether the function has run at least once +bool initialized = false; +// Size of the drawable area +int width; +int height; +// Midpoint of the y axis +int midpoint; +// Pixels per unit of x_value +int x_increment; + +// Animates a dot across the screen to represent the current x and y values +void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value, + float y_value) { + // Do this only once + if (!initialized) { + // Set the background and foreground colors + lcd.Clear(background_color); + lcd.SetTextColor(foreground_color); + // Calculate the drawable area to avoid drawing off the edges + width = lcd.GetXSize() - (dot_radius * 2); + height = lcd.GetYSize() - (dot_radius * 2); + // Calculate the y axis midpoint + midpoint = height / 2; + // Calculate fractional pixels per unit of x_value + x_increment = static_cast(width) / kXrange; + initialized = true; + } + + // Log the current X and Y values + error_reporter->Report("x_value: %f, y_value: %f\n", x_value, y_value); + + // Clear the previous drawing + lcd.Clear(background_color); + + // Calculate x position, ensuring the dot is not partially offscreen, + // which causes artifacts and crashes + int x_pos = dot_radius + static_cast(x_value * x_increment); + + // Calculate y position, ensuring the dot is not partially offscreen + int y_pos; + if (y_value >= 0) { + // Since the display's y runs from the top down, invert y_value + y_pos = dot_radius + static_cast(midpoint * (1.f - y_value)); + } else { + // For any negative y_value, start drawing from the midpoint + y_pos = + dot_radius + midpoint + static_cast(midpoint * (0.f - y_value)); + } + + // Draw the dot + lcd.FillCircle(x_pos, y_pos, dot_radius); +} diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/hello_world_test.cc b/tensorflow/lite/experimental/micro/examples/hello_world/hello_world_test.cc new file mode 100644 index 00000000000..6ca3e88b6ca --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/hello_world_test.cc @@ -0,0 +1,112 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h" +#include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h" +#include "tensorflow/lite/experimental/micro/micro_error_reporter.h" +#include "tensorflow/lite/experimental/micro/micro_interpreter.h" +#include "tensorflow/lite/experimental/micro/testing/micro_test.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/version.h" + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { + // Set up logging + tflite::MicroErrorReporter micro_error_reporter; + tflite::ErrorReporter* error_reporter = µ_error_reporter; + + // Map the model into a usable data structure. This doesn't involve any + // copying or parsing, it's a very lightweight operation. + const tflite::Model* model = ::tflite::GetModel(g_sine_model_data); + if (model->version() != TFLITE_SCHEMA_VERSION) { + error_reporter->Report( + "Model provided is schema version %d not equal " + "to supported version %d.\n", + model->version(), TFLITE_SCHEMA_VERSION); + } + + // This pulls in all the operation implementations we need + tflite::ops::micro::AllOpsResolver resolver; + + // Create an area of memory to use for input, output, and intermediate arrays. + // Finding the minimum value for your model may require some trial and error. + const int tensor_arena_size = 2 * 1024; + uint8_t tensor_arena[tensor_arena_size]; + tflite::SimpleTensorAllocator tensor_allocator(tensor_arena, + tensor_arena_size); + + // Build an interpreter to run the model with + tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator, + error_reporter); + + // Obtain a pointer to the model's input tensor + TfLiteTensor* input = interpreter.input(0); + + // Make sure the input has the properties we expect + TF_LITE_MICRO_EXPECT_NE(nullptr, input); + // The property "dims" tells us the tensor's shape. It has one element for + // each dimension. Our input is a 2D tensor containing 1 element, so "dims" + // should have size 2. + TF_LITE_MICRO_EXPECT_EQ(2, input->dims->size); + // The value of each element gives the length of the corresponding tensor. + // We should expect two single element tensors (one is contained within the + // other). + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[1]); + // The input is a 32 bit floating point value + TF_LITE_MICRO_EXPECT_EQ(kTfLiteFloat32, input->type); + + // Provide an input value + input->data.f[0] = 0.; + + // Run the model on this input and check that it succeeds + TfLiteStatus invoke_status = interpreter.Invoke(); + if (invoke_status != kTfLiteOk) { + error_reporter->Report("Invoke failed\n"); + } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); + + // Obtain a pointer to the output tensor and make sure it has the + // properties we expect. It should be the same as the input tensor. + TfLiteTensor* output = interpreter.output(0); + TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[1]); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteFloat32, output->type); + + // Obtain the output value from the tensor + float value = output->data.f[0]; + // Check that the output value is within 0.000001 of the expected value + TF_LITE_MICRO_EXPECT_NEAR(0.0486171, value, 0.000001); + + // Run inference on several more values and confirm the expected outputs + input->data.f[0] = 1.; + interpreter.Invoke(); + value = output->data.f[0]; + TF_LITE_MICRO_EXPECT_NEAR(0.8071436, value, 0.000001); + + input->data.f[0] = 3.; + interpreter.Invoke(); + value = output->data.f[0]; + TF_LITE_MICRO_EXPECT_NEAR(0.0964818, value, 0.000001); + + input->data.f[0] = 5.; + interpreter.Invoke(); + value = output->data.f[0]; + TF_LITE_MICRO_EXPECT_NEAR(-0.9352637, value, 0.000001); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/images/STM32F746.gif b/tensorflow/lite/experimental/micro/examples/hello_world/images/STM32F746.gif new file mode 100644 index 00000000000..e427bc867a7 Binary files /dev/null and b/tensorflow/lite/experimental/micro/examples/hello_world/images/STM32F746.gif differ diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/images/arduino_mkrzero.gif b/tensorflow/lite/experimental/micro/examples/hello_world/images/arduino_mkrzero.gif new file mode 100644 index 00000000000..d896534795a Binary files /dev/null and b/tensorflow/lite/experimental/micro/examples/hello_world/images/arduino_mkrzero.gif differ diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/images/sparkfun_edge.gif b/tensorflow/lite/experimental/micro/examples/hello_world/images/sparkfun_edge.gif new file mode 100644 index 00000000000..057a52d4d18 Binary files /dev/null and b/tensorflow/lite/experimental/micro/examples/hello_world/images/sparkfun_edge.gif differ diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/main.cc b/tensorflow/lite/experimental/micro/examples/hello_world/main.cc new file mode 100644 index 00000000000..495977e8f6b --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/main.cc @@ -0,0 +1,93 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/constants.h" +#include "tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h" +#include "tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h" +#include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h" +#include "tensorflow/lite/experimental/micro/micro_error_reporter.h" +#include "tensorflow/lite/experimental/micro/micro_interpreter.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/version.h" + +int main(int argc, char* argv[]) { + // Set up logging + tflite::MicroErrorReporter micro_error_reporter; + tflite::ErrorReporter* error_reporter = µ_error_reporter; + + // Map the model into a usable data structure. This doesn't involve any + // copying or parsing, it's a very lightweight operation. + const tflite::Model* model = ::tflite::GetModel(g_sine_model_data); + if (model->version() != TFLITE_SCHEMA_VERSION) { + error_reporter->Report( + "Model provided is schema version %d not equal " + "to supported version %d.\n", + model->version(), TFLITE_SCHEMA_VERSION); + } + + // This pulls in all the operation implementations we need + tflite::ops::micro::AllOpsResolver resolver; + + // Create an area of memory to use for input, output, and intermediate arrays. + // Finding the minimum value for your model may require some trial and error. + const int tensor_arena_size = 2 * 1024; + uint8_t tensor_arena[tensor_arena_size]; + tflite::SimpleTensorAllocator tensor_allocator(tensor_arena, + tensor_arena_size); + + // Build an interpreter to run the model with + tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator, + error_reporter); + + // Obtain pointers to the model's input and output tensors + TfLiteTensor* input = interpreter.input(0); + TfLiteTensor* output = interpreter.output(0); + + // Keep track of how many inferences we have performed + int inference_count = 0; + + // Loop indefinitely + while (true) { + // Calculate an x value to feed into the model. We compare the current + // inference_count to the number of inferences per cycle to determine + // our position within the range of possible x values the model was + // trained on, and use this to calculate a value. + float position = static_cast(inference_count) / + static_cast(kInferencesPerCycle); + float x_val = position * kXrange; + + // Place our calculated x value in the model's input tensor + input->data.f[0] = x_val; + + // Run inference, and report any error + TfLiteStatus invoke_status = interpreter.Invoke(); + if (invoke_status != kTfLiteOk) { + error_reporter->Report("Invoke failed on x_val: %f\n", x_val); + continue; + } + + // Read the predicted y value from the model's output tensor + float y_val = output->data.f[0]; + + // Output the results. A custom HandleOutput function can be implemented + // for each supported hardware target. + HandleOutput(error_reporter, x_val, y_val); + + // Increment the inference_counter, and reset it if we have reached + // the total number per cycle + inference_count += 1; + if (inference_count >= kInferencesPerCycle) inference_count = 0; + } +} diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/output_handler.cc b/tensorflow/lite/experimental/micro/examples/hello_world/output_handler.cc new file mode 100644 index 00000000000..63aee55c1af --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/output_handler.cc @@ -0,0 +1,22 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h" + +void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value, + float y_value) { + // Log the current X and Y values + error_reporter->Report("x_value: %f, y_value: %f\n", x_value, y_value); +} diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h b/tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h new file mode 100644 index 00000000000..20741993813 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h @@ -0,0 +1,26 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_OUTPUT_HANDLER_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_OUTPUT_HANDLER_H_ + +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/experimental/micro/micro_error_reporter.h" + +// Called by the main loop to produce some output based on the x and y values +void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value, + float y_value); + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_OUTPUT_HANDLER_H_ diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/output_handler_test.cc b/tensorflow/lite/experimental/micro/examples/hello_world/output_handler_test.cc new file mode 100644 index 00000000000..0259370eda7 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/output_handler_test.cc @@ -0,0 +1,33 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h" + +#include "tensorflow/lite/experimental/micro/testing/micro_test.h" +#include "tensorflow/lite/experimental/micro/testing/test_utils.h" + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestCallability) { + tflite::MicroErrorReporter micro_error_reporter; + tflite::ErrorReporter* error_reporter = µ_error_reporter; + + // This will have external side-effects (like printing to the debug console + // or lighting an LED) that are hard to observe, so the most we can do is + // make sure the call doesn't crash. + HandleOutput(error_reporter, 0, 0); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.cc b/tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.cc new file mode 100644 index 00000000000..c69c9492c4d --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.cc @@ -0,0 +1,255 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Automatically created from a TensorFlow Lite flatbuffer using the command: +// xxd -i sine_model.tflite > sine_model_data.cc +// See the README for a full description of the creation process. + +#include "tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h" + +// We need to keep the data array aligned on some architectures. +#ifdef __has_attribute +#define HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define HAVE_ATTRIBUTE(x) 0 +#endif +#if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__)) +#define DATA_ALIGN_ATTRIBUTE __attribute__((aligned(4))) +#else +#define DATA_ALIGN_ATTRIBUTE +#endif + +const unsigned char g_sine_model_data[] DATA_ALIGN_ATTRIBUTE = { + 0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x0a, 0x00, 0x00, + 0xb8, 0x05, 0x00, 0x00, 0xa0, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x0b, 0x00, 0x00, 0x00, 0x90, 0x05, 0x00, 0x00, 0x7c, 0x05, 0x00, 0x00, + 0x24, 0x05, 0x00, 0x00, 0xd4, 0x04, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, + 0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x54, 0xf6, 0xff, 0xff, 0x58, 0xf6, 0xff, 0xff, 0x5c, 0xf6, 0xff, 0xff, + 0x60, 0xf6, 0xff, 0xff, 0xc2, 0xfa, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x7c, 0x19, 0xa7, 0x3e, 0x99, 0x81, 0xb9, 0x3e, + 0x56, 0x8b, 0x9f, 0x3e, 0x88, 0xd8, 0x12, 0xbf, 0x74, 0x10, 0x56, 0x3e, + 0xfe, 0xc6, 0xdf, 0xbe, 0xf2, 0x10, 0x5a, 0xbe, 0xf0, 0xe2, 0x0a, 0xbe, + 0x10, 0x5a, 0x98, 0xbe, 0xb9, 0x36, 0xce, 0x3d, 0x8f, 0x7f, 0x87, 0x3e, + 0x2c, 0xb1, 0xfd, 0xbd, 0xe6, 0xa6, 0x8a, 0xbe, 0xa5, 0x3e, 0xda, 0x3e, + 0x50, 0x34, 0xed, 0xbd, 0x90, 0x91, 0x69, 0xbe, 0x0e, 0xfb, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x67, 0x41, 0x48, 0xbf, + 0x24, 0xcd, 0xa0, 0xbe, 0xb7, 0x92, 0x0c, 0xbf, 0x00, 0x00, 0x00, 0x00, + 0x98, 0xfe, 0x3c, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x17, 0x9a, 0xbe, + 0x41, 0xcb, 0xb6, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x13, 0xd6, 0x1e, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5a, 0xfb, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x4b, 0x98, 0xdd, 0xbd, 0x40, 0x6b, 0xcb, 0xbe, 0x36, 0x0c, 0xd4, 0x3c, + 0xbd, 0x44, 0xb5, 0x3e, 0x95, 0x70, 0xe3, 0x3e, 0xe7, 0xac, 0x86, 0x3e, + 0x00, 0xc4, 0x4e, 0x3d, 0x7e, 0xa6, 0x1d, 0x3e, 0xbd, 0x87, 0xbb, 0x3e, + 0xb4, 0xb8, 0x09, 0xbf, 0xa1, 0x1f, 0xf8, 0xbe, 0x8d, 0x90, 0xdd, 0x3e, + 0xde, 0xfa, 0x6f, 0xbe, 0xb2, 0x75, 0xe4, 0x3d, 0x6e, 0xfe, 0x36, 0x3e, + 0x20, 0x18, 0xc2, 0xbe, 0x39, 0xc7, 0xfb, 0xbe, 0xfe, 0xa4, 0x30, 0xbe, + 0xf7, 0x91, 0xde, 0xbe, 0xde, 0xab, 0x24, 0x3e, 0xfb, 0xbb, 0xce, 0x3e, + 0xeb, 0x23, 0x80, 0xbe, 0x7b, 0x58, 0x73, 0xbe, 0x9a, 0x2e, 0x03, 0x3e, + 0x10, 0x42, 0xa9, 0xbc, 0x10, 0x12, 0x64, 0xbd, 0xe3, 0x8d, 0x0c, 0x3d, + 0x9e, 0x48, 0x97, 0xbe, 0x34, 0x51, 0xd4, 0xbe, 0x02, 0x3b, 0x0d, 0x3e, + 0x62, 0x67, 0x89, 0xbe, 0x74, 0xdf, 0xa2, 0x3d, 0xf3, 0x25, 0xb3, 0xbe, + 0xef, 0x34, 0x7b, 0x3d, 0x61, 0x70, 0xe3, 0x3d, 0xba, 0x76, 0xc0, 0xbe, + 0x7d, 0xe9, 0xa7, 0x3e, 0xc3, 0xab, 0xd0, 0xbe, 0xcf, 0x7c, 0xdb, 0xbe, + 0x70, 0x27, 0x9a, 0xbe, 0x98, 0xf5, 0x3c, 0xbd, 0xff, 0x4b, 0x4b, 0x3e, + 0x7e, 0xa0, 0xf8, 0xbd, 0xd4, 0x6e, 0x86, 0x3d, 0x00, 0x4a, 0x07, 0x3a, + 0x4c, 0x24, 0x61, 0xbe, 0x54, 0x68, 0xf7, 0xbd, 0x02, 0x3f, 0x77, 0xbe, + 0x23, 0x79, 0xb3, 0x3e, 0x1c, 0x83, 0xad, 0xbd, 0xc8, 0x92, 0x8d, 0x3e, + 0xa8, 0xf3, 0x15, 0xbd, 0xe6, 0x4d, 0x6c, 0x3d, 0xac, 0xe7, 0x98, 0xbe, + 0x81, 0xec, 0xbd, 0x3e, 0xe2, 0x55, 0x73, 0x3e, 0xc1, 0x77, 0xc7, 0x3e, + 0x6e, 0x1b, 0x5e, 0x3d, 0x27, 0x78, 0x02, 0x3f, 0xd4, 0x21, 0x90, 0x3d, + 0x52, 0xdc, 0x1f, 0x3e, 0xbf, 0xda, 0x88, 0x3e, 0x80, 0x79, 0xe3, 0xbd, + 0x40, 0x6f, 0x10, 0xbe, 0x20, 0x43, 0x2e, 0xbd, 0xf0, 0x76, 0xc5, 0xbd, + 0xcc, 0xa0, 0x04, 0xbe, 0xf0, 0x69, 0xd7, 0xbe, 0xb1, 0xfe, 0x64, 0xbe, + 0x20, 0x41, 0x84, 0xbe, 0xb2, 0xc3, 0x26, 0xbe, 0xd8, 0xf4, 0x09, 0xbe, + 0x64, 0x44, 0xd1, 0x3d, 0xd5, 0xe1, 0xc8, 0xbe, 0x35, 0xbc, 0x3f, 0xbe, + 0xc0, 0x94, 0x82, 0x3d, 0xdc, 0x2b, 0xb1, 0xbd, 0x02, 0xdb, 0xbf, 0xbe, + 0xa5, 0x7f, 0x8a, 0x3e, 0x21, 0xb4, 0xa2, 0x3e, 0xcd, 0x86, 0x56, 0xbf, + 0x9c, 0x3b, 0x76, 0xbc, 0x85, 0x6d, 0x60, 0xbf, 0x86, 0x00, 0x3c, 0xbe, + 0xc1, 0x23, 0x7e, 0x3e, 0x96, 0xcd, 0x3f, 0x3e, 0x86, 0x91, 0x2d, 0x3e, + 0x55, 0xef, 0x87, 0x3e, 0x7e, 0x97, 0x03, 0xbe, 0x2a, 0xcd, 0x01, 0x3e, + 0x32, 0xc9, 0x8e, 0xbe, 0x72, 0x77, 0x3b, 0xbe, 0xe0, 0xa1, 0xbc, 0xbe, + 0x8d, 0xb7, 0xa7, 0x3e, 0x1c, 0x05, 0x95, 0xbe, 0xf7, 0x1f, 0xbb, 0x3e, + 0xc9, 0x3e, 0xd6, 0x3e, 0x80, 0x42, 0xe9, 0xbd, 0x27, 0x0c, 0xd2, 0xbe, + 0x5c, 0x32, 0x34, 0xbe, 0x14, 0xcb, 0xca, 0xbd, 0xdd, 0x3a, 0x67, 0xbe, + 0x1c, 0xbb, 0x8d, 0xbe, 0x91, 0xac, 0x5c, 0xbe, 0x52, 0x40, 0x6f, 0xbe, + 0xd7, 0x71, 0x94, 0x3e, 0x18, 0x71, 0x09, 0xbe, 0x9b, 0x29, 0xd9, 0xbe, + 0x7d, 0x66, 0xd2, 0xbe, 0x98, 0xd6, 0xb2, 0xbe, 0x00, 0xc9, 0x84, 0x3a, + 0xbc, 0xda, 0xc2, 0xbd, 0x1d, 0xc2, 0x1b, 0xbf, 0xd4, 0xdd, 0x92, 0x3e, + 0x07, 0x87, 0x6c, 0xbe, 0x40, 0xc2, 0x3b, 0xbe, 0xbd, 0xe2, 0x9c, 0x3e, + 0x0a, 0xb5, 0xa0, 0xbe, 0xe2, 0xd5, 0x9c, 0xbe, 0x3e, 0xbb, 0x7c, 0x3e, + 0x17, 0xb4, 0xcf, 0x3e, 0xd5, 0x8e, 0xc8, 0xbe, 0x7c, 0xf9, 0x5c, 0x3e, + 0x80, 0xfc, 0x0d, 0x3d, 0xc5, 0xd5, 0x8b, 0x3e, 0xf5, 0x17, 0xa2, 0x3e, + 0xc7, 0x60, 0x89, 0xbe, 0xec, 0x95, 0x87, 0x3d, 0x7a, 0xc2, 0x5d, 0xbf, + 0x77, 0x94, 0x98, 0x3e, 0x77, 0x39, 0x07, 0xbc, 0x42, 0x29, 0x00, 0x3e, + 0xaf, 0xd0, 0xa9, 0x3e, 0x31, 0x23, 0xc4, 0xbe, 0x95, 0x36, 0x5b, 0xbe, + 0xc7, 0xdc, 0x83, 0xbe, 0x1e, 0x6b, 0x47, 0x3e, 0x5b, 0x24, 0x99, 0x3e, + 0x99, 0x27, 0x54, 0x3e, 0xc8, 0x20, 0xdd, 0xbd, 0x5a, 0x86, 0x2f, 0x3e, + 0x80, 0xf0, 0x69, 0xbe, 0x44, 0xfc, 0x84, 0xbd, 0x82, 0xa0, 0x2a, 0xbe, + 0x87, 0xe6, 0x2a, 0x3e, 0xd8, 0x34, 0xae, 0x3d, 0x50, 0xbd, 0xb5, 0x3e, + 0xc4, 0x8c, 0x88, 0xbe, 0xe3, 0xbc, 0xa5, 0x3e, 0xa9, 0xda, 0x9e, 0x3e, + 0x3e, 0xb8, 0x23, 0xbe, 0x80, 0x90, 0x15, 0x3d, 0x97, 0x3f, 0xc3, 0x3e, + 0xca, 0x5c, 0x9d, 0x3e, 0x21, 0xe8, 0xe1, 0x3e, 0xc0, 0x49, 0x01, 0xbc, + 0x00, 0x0b, 0x88, 0xbd, 0x3f, 0xf7, 0xca, 0x3c, 0xfb, 0x5a, 0xb1, 0x3e, + 0x60, 0xd2, 0x0d, 0x3c, 0xce, 0x23, 0x78, 0xbf, 0x8f, 0x4f, 0xb9, 0xbe, + 0x69, 0x6a, 0x34, 0xbf, 0x4b, 0x5e, 0xa9, 0x3e, 0x64, 0x8c, 0xd9, 0x3e, + 0x52, 0x77, 0x36, 0x3e, 0xeb, 0xaf, 0xbe, 0x3e, 0x40, 0xbe, 0x36, 0x3c, + 0x08, 0x65, 0x3b, 0xbd, 0x55, 0xe0, 0x66, 0xbd, 0xd2, 0xe8, 0x9b, 0xbe, + 0x86, 0xe3, 0x09, 0xbe, 0x93, 0x3d, 0xdd, 0x3e, 0x0f, 0x66, 0x18, 0x3f, + 0x18, 0x05, 0x33, 0xbd, 0xde, 0x15, 0xd7, 0xbe, 0xaa, 0xcf, 0x49, 0xbe, + 0xa2, 0xa5, 0x64, 0x3e, 0xe6, 0x9c, 0x42, 0xbe, 0x54, 0x42, 0xcc, 0x3d, + 0xa0, 0xbd, 0x9d, 0xbe, 0xc2, 0x69, 0x48, 0x3e, 0x5b, 0x8b, 0xa2, 0xbe, + 0xc0, 0x13, 0x87, 0x3d, 0x36, 0xfd, 0x69, 0x3e, 0x05, 0x86, 0x40, 0xbe, + 0x1e, 0x7a, 0xce, 0xbe, 0x46, 0x13, 0xa7, 0xbe, 0x68, 0x52, 0x86, 0xbe, + 0x04, 0x9e, 0x86, 0xbd, 0x8c, 0x54, 0xc1, 0x3d, 0xe0, 0x3b, 0xad, 0x3c, + 0x42, 0x67, 0x85, 0xbd, 0xea, 0x97, 0x42, 0x3e, 0x6e, 0x13, 0x3b, 0xbf, + 0x56, 0x5b, 0x16, 0x3e, 0xaa, 0xab, 0xdf, 0x3e, 0xc8, 0x41, 0x36, 0x3d, + 0x24, 0x2d, 0x47, 0xbe, 0x77, 0xa5, 0xae, 0x3e, 0xc0, 0xc2, 0x5b, 0x3c, + 0xac, 0xac, 0x4e, 0x3e, 0x99, 0xec, 0x13, 0xbe, 0xf2, 0xab, 0x73, 0x3e, + 0xaa, 0xa1, 0x48, 0xbe, 0xe8, 0xd3, 0x01, 0xbe, 0x60, 0xb7, 0xc7, 0xbd, + 0x64, 0x72, 0xd3, 0x3d, 0x83, 0xd3, 0x99, 0x3e, 0x0c, 0x76, 0x34, 0xbe, + 0x42, 0xda, 0x0d, 0x3e, 0xfb, 0x47, 0x9a, 0x3e, 0x8b, 0xdc, 0x92, 0xbe, + 0x56, 0x7f, 0x6b, 0x3e, 0x04, 0xd4, 0x88, 0xbd, 0x11, 0x9e, 0x80, 0x3e, + 0x3c, 0x89, 0xff, 0x3d, 0xb3, 0x3e, 0x88, 0x3e, 0xf7, 0xf0, 0x88, 0x3e, + 0x28, 0xfb, 0xc9, 0xbe, 0x53, 0x3e, 0xcf, 0x3e, 0xac, 0x75, 0xdc, 0xbe, + 0xdd, 0xca, 0xd7, 0x3e, 0x01, 0x58, 0xa7, 0x3e, 0x29, 0xb8, 0x13, 0xbf, + 0x76, 0x81, 0x12, 0xbc, 0x28, 0x8b, 0x16, 0xbf, 0x0e, 0xec, 0x0e, 0x3e, + 0x40, 0x0a, 0xdb, 0xbd, 0x98, 0xec, 0xbf, 0xbd, 0x32, 0x55, 0x0c, 0xbe, + 0xfb, 0xf9, 0xc9, 0x3e, 0x83, 0x4a, 0x6d, 0xbe, 0x76, 0x59, 0xe2, 0xbe, + 0x54, 0x7d, 0x9f, 0xbb, 0x9d, 0xe8, 0x95, 0x3e, 0x5c, 0xd3, 0xd0, 0x3d, + 0x19, 0x8a, 0xb0, 0x3e, 0xde, 0x6f, 0x2e, 0xbe, 0xd0, 0x16, 0x83, 0x3d, + 0x9c, 0x7d, 0x11, 0xbf, 0x2b, 0xcc, 0x25, 0x3c, 0x2a, 0xa5, 0x27, 0xbe, + 0x22, 0x14, 0xc7, 0xbe, 0x5e, 0x7a, 0xac, 0x3e, 0x4e, 0x41, 0x94, 0xbe, + 0x5a, 0x68, 0x7b, 0x3e, 0x86, 0xfd, 0x4e, 0x3e, 0xa2, 0x56, 0x6a, 0xbe, + 0xca, 0xfe, 0x81, 0xbe, 0x43, 0xc3, 0xb1, 0xbd, 0xc5, 0xb8, 0xa7, 0x3e, + 0x55, 0x23, 0xcd, 0x3e, 0xaf, 0x2e, 0x76, 0x3e, 0x69, 0xa8, 0x90, 0xbe, + 0x0d, 0xba, 0xb9, 0x3e, 0x66, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x53, 0xd6, 0xe2, 0x3d, 0x66, 0xb6, 0xcc, 0x3e, + 0x03, 0xe7, 0xf6, 0x3e, 0xe0, 0x28, 0x10, 0xbf, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x3d, 0xb0, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x62, 0xf0, 0x77, 0x3e, + 0xa6, 0x9d, 0xa4, 0x3e, 0x3a, 0x4b, 0xf3, 0xbe, 0x71, 0x9e, 0xa7, 0x3e, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x39, 0xa2, 0x3e, 0x00, 0x00, 0x00, 0x00, + 0xcc, 0x9c, 0x4a, 0x3e, 0xab, 0x40, 0xa3, 0x3e, 0xb2, 0xff, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xb3, 0x71, 0x67, 0x3f, + 0x9a, 0x7a, 0x95, 0xbf, 0xe1, 0x48, 0xe8, 0xbe, 0x8a, 0x72, 0x96, 0x3e, + 0x00, 0xd2, 0xd3, 0xbb, 0x1a, 0xc5, 0xd7, 0x3f, 0xac, 0x7e, 0xc8, 0xbe, + 0x90, 0xa7, 0x95, 0xbe, 0x3b, 0xd7, 0xdc, 0xbe, 0x41, 0xa8, 0x16, 0x3f, + 0x50, 0x5b, 0xcb, 0x3f, 0x52, 0xb9, 0xed, 0xbe, 0x2e, 0xa7, 0xc6, 0xbe, + 0xaf, 0x0f, 0x14, 0xbf, 0xb3, 0xda, 0x59, 0x3f, 0x02, 0xec, 0xd7, 0xbe, + 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x66, 0x11, 0x1f, 0xbf, + 0xb8, 0xfb, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f, + 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, + 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0xf0, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xce, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x08, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1c, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x08, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x10, 0x03, 0x00, 0x00, 0xa4, 0x02, 0x00, 0x00, + 0x40, 0x02, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0xac, 0x01, 0x00, 0x00, + 0x48, 0x01, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x26, 0xfd, 0xff, 0xff, + 0x3c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x18, 0xfd, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, + 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, + 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74, + 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff, + 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x60, 0xfd, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, + 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, + 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74, + 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, + 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xce, 0xfd, 0xff, 0xff, + 0x34, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xc0, 0xfd, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00, + 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, + 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x52, 0x65, 0x6c, + 0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x12, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0xfe, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, + 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, + 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, + 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x5a, 0xfe, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x4c, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, + 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, + 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, + 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, + 0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0xba, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0xac, 0xfe, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, + 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, + 0x73, 0x65, 0x5f, 0x32, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xfe, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf0, 0xfe, 0xff, 0xff, + 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, + 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, + 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x46, 0xff, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff, + 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, + 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, + 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64, + 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xa6, 0xff, 0xff, 0xff, 0x48, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, + 0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f, 0x69, 0x6e, 0x70, 0x75, + 0x74, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x03, 0x00, 0x00, 0x00}; +const int g_sine_model_data_len = 2640; diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h b/tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h new file mode 100644 index 00000000000..7a7ce6f47ee --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/sine_model_data.h @@ -0,0 +1,27 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This is a standard TensorFlow Lite model file that has been converted into a +// C data array, so it can be easily compiled into a binary for devices that +// don't have a file system. It was created using the command: +// xxd -i sine_model.tflite > sine_model_data.cc + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_SINE_MODEL_DATA_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_SINE_MODEL_DATA_H_ + +extern const unsigned char g_sine_model_data[]; +extern const int g_sine_model_data_len; + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_HELLO_WORLD_SINE_MODEL_DATA_H_ diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/sparkfun_edge/constants.cc b/tensorflow/lite/experimental/micro/examples/hello_world/sparkfun_edge/constants.cc new file mode 100644 index 00000000000..169401dd532 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/sparkfun_edge/constants.cc @@ -0,0 +1,19 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/constants.h" + +// This is tuned so that a full cycle takes ~4 seconds on a SparkFun Edge. +const int kInferencesPerCycle = 1000; diff --git a/tensorflow/lite/experimental/micro/examples/hello_world/sparkfun_edge/output_handler.cc b/tensorflow/lite/experimental/micro/examples/hello_world/sparkfun_edge/output_handler.cc new file mode 100644 index 00000000000..24479eb77a6 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/hello_world/sparkfun_edge/output_handler.cc @@ -0,0 +1,84 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/hello_world/output_handler.h" + +#include "am_bsp.h" // NOLINT + +/* +This function uses the device's LEDs to visually indicate the current y value. +The y value is in the range -1 <= y <= 1. The LEDs (red, green, blue, +and yellow) are physically lined up in the following order: + + [ R G B Y ] + +The following table represents how we will light the LEDs for different values: + +| Range | LEDs lit | +| 0.75 <= y <= 1 | [ 0 0 1 1 ] | +| 0 < y < 0.75 | [ 0 0 1 0 ] | +| y = 0 | [ 0 0 0 0 ] | +| -0.75 < y < 0 | [ 0 1 0 0 ] | +| -1 <= y <= 0.75 | [ 1 1 0 0 ] | + +*/ +void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value, + float y_value) { + // The first time this method runs, set up our LEDs correctly + static bool is_initialized = false; + if (!is_initialized) { + // Set up LEDs as outputs + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_RED, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_BLUE, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_GREEN, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_YELLOW, g_AM_HAL_GPIO_OUTPUT_12); + // Ensure all pins are cleared + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_RED); + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_BLUE); + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_GREEN); + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_YELLOW); + is_initialized = true; + } + + // Set the LEDs to represent negative values + if (y_value < 0) { + // Clear unnecessary LEDs + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_GREEN); + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_YELLOW); + // The blue LED is lit for all negative values + am_hal_gpio_output_set(AM_BSP_GPIO_LED_BLUE); + // The red LED is lit in only some cases + if (y_value <= -0.75) { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_RED); + } else { + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_RED); + } + // Set the LEDs to represent positive values + } else if (y_value > 0) { + // Clear unnecessary LEDs + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_RED); + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_BLUE); + // The green LED is lit for all positive values + am_hal_gpio_output_set(AM_BSP_GPIO_LED_GREEN); + // The yellow LED is lit in only some cases + if (y_value >= 0.75) { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_YELLOW); + } else { + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_YELLOW); + } + } + // Log the current X and Y values + error_reporter->Report("x_value: %f, y_value: %f\n", x_value, y_value); +} diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/BUILD b/tensorflow/lite/experimental/micro/examples/micro_vision/BUILD index 479a178ea44..6555e03fdb4 100644 --- a/tensorflow/lite/experimental/micro/examples/micro_vision/BUILD +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/BUILD @@ -74,12 +74,38 @@ tflite_micro_cc_test( ], ) +cc_library( + name = "detection_responder", + srcs = [ + "detection_responder.cc", + ], + hdrs = [ + "detection_responder.h", + ], + deps = [ + "//tensorflow/lite/c:c_api_internal", + "//tensorflow/lite/experimental/micro:micro_framework", + ], +) + +tflite_micro_cc_test( + name = "detection_responder_test", + srcs = [ + "detection_responder_test.cc", + ], + deps = [ + ":detection_responder", + "//tensorflow/lite/experimental/micro/testing:micro_test", + ], +) + cc_binary( name = "micro_vision", srcs = [ "main.cc", ], deps = [ + ":detection_responder", ":image_provider", ":model_settings", ":person_detect_model_data", diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/Makefile.inc b/tensorflow/lite/experimental/micro/examples/micro_vision/Makefile.inc index 69d3ea479f4..af792629e08 100644 --- a/tensorflow/lite/experimental/micro/examples/micro_vision/Makefile.inc +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/Makefile.inc @@ -28,12 +28,21 @@ IMAGE_PROVIDER_TEST_HDRS := \ tensorflow/lite/experimental/micro/examples/micro_vision/image_provider.h \ tensorflow/lite/experimental/micro/examples/micro_vision/model_settings.h +DETECTION_RESPONDER_TEST_SRCS := \ +tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.cc \ +tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder_test.cc + +DETECTION_RESPONDER_TEST_HDRS := \ +tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h + MICRO_VISION_SRCS := \ +tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.cc \ tensorflow/lite/experimental/micro/examples/micro_vision/image_provider.cc \ tensorflow/lite/experimental/micro/examples/micro_vision/main.cc \ $(MICRO_VISION_MODEL_SRCS) MICRO_VISION_HDRS := \ +tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h \ tensorflow/lite/experimental/micro/examples/micro_vision/image_provider.h \ $(MICRO_VISION_MODEL_HDRS) @@ -48,6 +57,10 @@ $(MICRO_VISION_TEST_SRCS),$(MICRO_VISION_TEST_HDRS))) $(eval $(call microlite_test,image_provider_test,\ $(IMAGE_PROVIDER_TEST_SRCS),$(IMAGE_PROVIDER_TEST_HDRS))) +# Tests the detection responder module. +$(eval $(call microlite_test,detection_responder_test,\ +$(DETECTION_RESPONDER_TEST_SRCS),$(DETECTION_RESPONDER_TEST_HDRS))) + # Builds a standalone object recognition binary. $(eval $(call microlite_test,micro_vision,\ $(MICRO_VISION_SRCS),$(MICRO_VISION_HDRS))) diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/apollo3evb/image_provider.cc b/tensorflow/lite/experimental/micro/examples/micro_vision/apollo3evb/image_provider.cc index 7189efca9b2..2015aecf2b5 100644 --- a/tensorflow/lite/experimental/micro/examples/micro_vision/apollo3evb/image_provider.cc +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/apollo3evb/image_provider.cc @@ -75,11 +75,6 @@ static hm01b0_cfg_t s_HM01B0Cfg = { pfnGpioIsr : NULL, }; -static constexpr int kDebugRowLenElements = 16; -// Each byte takes two characters plus a space, and the offset takes an -// additional 8 characters plus a space. -static constexpr int kDebugLineLenBytes = kDebugRowLenElements * 3 + 9; - bool g_is_camera_initialized = false; void boost_mode_enable(tflite::ErrorReporter* error_reporter, bool bEnable) { diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.cc b/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.cc new file mode 100644 index 00000000000..e2ac98fecab --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.cc @@ -0,0 +1,25 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h" + +// This dummy implementation writes person and no person scores to the error +// console. Real applications will want to take some custom action instead, and +// should implement their own versions of this function. +void RespondToDetection(tflite::ErrorReporter* error_reporter, + uint8_t person_score, uint8_t no_person_score) { + error_reporter->Report("person score:%d no person score %d", person_score, + no_person_score); +} diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h b/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h new file mode 100644 index 00000000000..a1aca63cf3c --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h @@ -0,0 +1,34 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Provides an interface to take an action based on the output from the person +// detection model. + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_ + +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/experimental/micro/micro_error_reporter.h" + +// Called every time the results of a person detection run are available. The +// `person_score` has the numerical confidence that the captured image contains +// a person, and `no_person_score` has the numerical confidence that the image +// does not contain a person. Typically if person_score > no person score, the +// image is considered to contain a person. This threshold may be adjusted for +// particular applications. +void RespondToDetection(tflite::ErrorReporter* error_reporter, + uint8_t person_score, uint8_t no_person_score); + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_ diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder_test.cc b/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder_test.cc new file mode 100644 index 00000000000..ec25533e82c --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder_test.cc @@ -0,0 +1,34 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h" + +#include "tensorflow/lite/experimental/micro/testing/micro_test.h" +#include "tensorflow/lite/experimental/micro/testing/test_utils.h" + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestCallability) { + tflite::MicroErrorReporter micro_error_reporter; + tflite::ErrorReporter* error_reporter = µ_error_reporter; + + // This will have external side-effects (like printing to the debug console + // or lighting an LED) that are hard to observe, so the most we can do is + // make sure the call doesn't crash. + RespondToDetection(error_reporter, 100, 200); + RespondToDetection(error_reporter, 200, 100); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/platform_Sparkfun_Edge.h b/tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/platform_Sparkfun_Edge.h index 4f13c8befbe..3a580914987 100644 --- a/tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/platform_Sparkfun_Edge.h +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/platform_Sparkfun_Edge.h @@ -35,6 +35,7 @@ extern "C" { #define HM01B0_PIN_INT 4 #define HM01B0_PIN_SCL 8 #define HM01B0_PIN_SDA 9 +#define HM01B0_PIN_DVDD_EN 10 // Define AP3B's CTIMER and output pin for HM01B0 MCLK generation #define HM01B0_MCLK_GENERATOR_MOD 0 diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/main.cc b/tensorflow/lite/experimental/micro/examples/micro_vision/main.cc index ab736d4bc14..f1b3d897e39 100644 --- a/tensorflow/lite/experimental/micro/examples/micro_vision/main.cc +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/main.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h" #include "tensorflow/lite/experimental/micro/examples/micro_vision/image_provider.h" #include "tensorflow/lite/experimental/micro/examples/micro_vision/model_settings.h" #include "tensorflow/lite/experimental/micro/examples/micro_vision/person_detect_model_data.h" @@ -69,12 +70,10 @@ int main(int argc, char* argv[]) { TfLiteTensor* output = interpreter.output(0); - // Log the person score and no person score. + // Process the inference results. uint8_t person_score = output->data.uint8[kPersonIndex]; uint8_t no_person_score = output->data.uint8[kNotAPersonIndex]; - error_reporter->Report( - "person data. person score: %d, no person score: %d\n", person_score, - no_person_score); + RespondToDetection(error_reporter, person_score, no_person_score); } return 0; diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/sparkfun_edge/detection_responder.cc b/tensorflow/lite/experimental/micro/examples/micro_vision/sparkfun_edge/detection_responder.cc new file mode 100644 index 00000000000..43425b76e68 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/sparkfun_edge/detection_responder.cc @@ -0,0 +1,54 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h" + +#include "am_bsp.h" // NOLINT + +// This implementation will light up LEDs on the board in response to the +// inference results. +void RespondToDetection(tflite::ErrorReporter* error_reporter, + uint8_t person_score, uint8_t no_person_score) { + static bool is_initialized = false; + if (!is_initialized) { + // Setup LED's as outputs. Leave red LED alone since that's an error + // indicator for sparkfun_edge in image_provider. + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_BLUE, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_GREEN, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_YELLOW, g_AM_HAL_GPIO_OUTPUT_12); + is_initialized = true; + } + + // Toggle the blue LED every time an inference is performed. + static int count = 0; + if (++count & 1) { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_BLUE); + } else { + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_BLUE); + } + + // Turn on the green LED if a person was detected. Turn on the yellow LED + // otherwise. + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_YELLOW); + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_GREEN); + if (person_score > no_person_score) { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_GREEN); + } else { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_YELLOW); + } + + error_reporter->Report("person score:%d no person score %d", person_score, + no_person_score); +} diff --git a/tensorflow/lite/experimental/micro/examples/micro_vision/sparkfun_edge/image_provider.cc b/tensorflow/lite/experimental/micro/examples/micro_vision/sparkfun_edge/image_provider.cc new file mode 100644 index 00000000000..f9e5da29e29 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/micro_vision/sparkfun_edge/image_provider.cc @@ -0,0 +1,199 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/micro/examples/micro_vision/image_provider.h" + +#include "tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/HM01B0.h" +#include "tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/HM01B0_RAW8_QVGA_8bits_lsb_5fps.h" +#include "tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/HM01B0_debug.h" +#include "tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/HM01B0_optimized.h" +#include "tensorflow/lite/experimental/micro/examples/micro_vision/himax_driver/platform_Sparkfun_Edge.h" + +// These are headers from Ambiq's Apollo3 SDK. +#include "am_bsp.h" // NOLINT +#include "am_mcu_apollo.h" // NOLINT +#include "am_util.h" // NOLINT + +// #define DEMO_HM01B0_FRAMEBUFFER_DUMP_ENABLE + +// Enabling logging increases power consumption by preventing low power mode +// from being enabled. +#define ENABLE_LOGGING + +namespace { + +//***************************************************************************** +// +// HM01B0 Configuration +// +//***************************************************************************** +static hm01b0_cfg_t s_HM01B0Cfg = { + // i2c settings + ui16SlvAddr : HM01B0_DEFAULT_ADDRESS, + eIOMMode : HM01B0_IOM_MODE, + ui32IOMModule : HM01B0_IOM_MODULE, + sIOMCfg : { + eInterfaceMode : HM01B0_IOM_MODE, + ui32ClockFreq : HM01B0_I2C_CLOCK_FREQ, + }, + pIOMHandle : NULL, + + // MCLK settings + ui32CTimerModule : HM01B0_MCLK_GENERATOR_MOD, + ui32CTimerSegment : HM01B0_MCLK_GENERATOR_SEG, + ui32CTimerOutputPin : HM01B0_PIN_MCLK, + + // data interface + ui8PinSCL : HM01B0_PIN_SCL, + ui8PinSDA : HM01B0_PIN_SDA, + ui8PinD0 : HM01B0_PIN_D0, + ui8PinD1 : HM01B0_PIN_D1, + ui8PinD2 : HM01B0_PIN_D2, + ui8PinD3 : HM01B0_PIN_D3, + ui8PinD4 : HM01B0_PIN_D4, + ui8PinD5 : HM01B0_PIN_D5, + ui8PinD6 : HM01B0_PIN_D6, + ui8PinD7 : HM01B0_PIN_D7, + ui8PinVSYNC : HM01B0_PIN_VSYNC, + ui8PinHSYNC : HM01B0_PIN_HSYNC, + ui8PinPCLK : HM01B0_PIN_PCLK, + + ui8PinTrig : HM01B0_PIN_TRIG, + ui8PinInt : HM01B0_PIN_INT, + pfnGpioIsr : NULL, +}; + +bool g_is_camera_initialized = false; + +void boost_mode_enable(tflite::ErrorReporter* error_reporter, bool bEnable) { + am_hal_burst_avail_e eBurstModeAvailable; + am_hal_burst_mode_e eBurstMode; + + // Check that the Burst Feature is available. + if (AM_HAL_STATUS_SUCCESS == + am_hal_burst_mode_initialize(&eBurstModeAvailable)) { + if (AM_HAL_BURST_AVAIL == eBurstModeAvailable) { + error_reporter->Report("Apollo3 Burst Mode is Available\n"); + } else { + error_reporter->Report("Apollo3 Burst Mode is Not Available\n"); + return; + } + } else { + error_reporter->Report("Failed to Initialize for Burst Mode operation\n"); + } + + // Make sure we are in "Normal" mode. + if (AM_HAL_STATUS_SUCCESS == am_hal_burst_mode_disable(&eBurstMode)) { + if (AM_HAL_NORMAL_MODE == eBurstMode) { + error_reporter->Report("Apollo3 operating in Normal Mode (48MHz)\n"); + } + } else { + error_reporter->Report("Failed to Disable Burst Mode operation\n"); + } + + // Put the MCU into "Burst" mode. + if (bEnable) { + if (AM_HAL_STATUS_SUCCESS == am_hal_burst_mode_enable(&eBurstMode)) { + if (AM_HAL_BURST_MODE == eBurstMode) { + error_reporter->Report("Apollo3 operating in Burst Mode (96MHz)\n"); + } + } else { + error_reporter->Report("Failed to Enable Burst Mode operation\n"); + } + } +} + +} // namespace + +TfLiteStatus InitCamera(tflite::ErrorReporter* error_reporter) { + // Enable the ITM print interface. + am_bsp_itm_printf_enable(); + + error_reporter->Report("Initializing HM01B0...\n"); + + am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0); + + // Set the default cache configuration + am_hal_cachectrl_config(&am_hal_cachectrl_defaults); + am_hal_cachectrl_enable(); + + // Configure the board for low power operation. This breaks logging by + // turning off the itm and uart interfaces. +#ifndef ENABLE_LOGGING + am_bsp_low_power_init(); +#endif + + // Enable interrupts so we can receive messages from the boot host. + am_hal_interrupt_master_enable(); + + boost_mode_enable(error_reporter, true); + + // Turn on the 1.8V regulator for DVDD on the camera. + am_hal_gpio_pinconfig(HM01B0_PIN_DVDD_EN, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_output_set(HM01B0_PIN_DVDD_EN); + + hm01b0_power_up(&s_HM01B0Cfg); + + // TODO(njeff): check the delay time to just fit the spec. + am_util_delay_ms(1); + + hm01b0_mclk_enable(&s_HM01B0Cfg); + + // TODO(njeff): check the delay time to just fit the spec. + am_util_delay_ms(1); + + hm01b0_init_if(&s_HM01B0Cfg); + + hm01b0_init_system(&s_HM01B0Cfg, (hm_script_t*)sHM01B0InitScript, + sizeof(sHM01B0InitScript) / sizeof(hm_script_t)); + + // Put camera into streaming mode - this makes it so that the camera + // constantly captures images. It is still OK to read and image since the + // camera uses a double-buffered input. This means there is always one valid + // image to read while the other buffer fills. Streaming mode allows the + // camera to perform auto exposure constantly. + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_RED); + uint32_t error_code = + hm01b0_set_mode(&s_HM01B0Cfg, HM01B0_REG_MODE_SELECT_STREAMING, 0); + if (error_code == HM01B0_ERR_OK) { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_RED); + + return kTfLiteError; + } + + return kTfLiteOk; +} + +// Capture single frame. Frame pointer passed in to reduce memory usage. This +// allows the input tensor to be used instead of requiring an extra copy. +TfLiteStatus GetImage(tflite::ErrorReporter* error_reporter, int frame_width, + int frame_height, int channels, uint8_t* frame) { + if (!g_is_camera_initialized) { + TfLiteStatus init_status = InitCamera(error_reporter); + if (init_status != kTfLiteOk) { + return init_status; + } + g_is_camera_initialized = true; + } + + hm01b0_blocking_read_oneframe_scaled(frame, frame_width, frame_height, + channels); + +#ifdef DEMO_HM01B0_FRAMEBUFFER_DUMP_ENABLE + hm01b0_framebuffer_dump(frame, frame_width * frame_height * channels); +#endif + + return kTfLiteOk; +} diff --git a/tensorflow/lite/experimental/micro/tools/ci_build/ci_build_micro_projects.sh b/tensorflow/lite/experimental/micro/tools/ci_build/ci_build_micro_projects.sh index 36819f0c39c..32291a423a9 100755 --- a/tensorflow/lite/experimental/micro/tools/ci_build/ci_build_micro_projects.sh +++ b/tensorflow/lite/experimental/micro/tools/ci_build/ci_build_micro_projects.sh @@ -33,3 +33,6 @@ make -f tensorflow/lite/experimental/micro/tools/make/Makefile \ TARGET=${1} \ TAGS="${2}" \ generate_projects + +# Needed to solve CI build bug triggered by files added to source tree. +make -f tensorflow/lite/experimental/micro/tools/make/Makefile clean_downloads diff --git a/tensorflow/lite/experimental/micro/tools/ci_build/test_sparkfun.sh b/tensorflow/lite/experimental/micro/tools/ci_build/test_sparkfun.sh index a49eb4239c2..451e21501bc 100755 --- a/tensorflow/lite/experimental/micro/tools/ci_build/test_sparkfun.sh +++ b/tensorflow/lite/experimental/micro/tools/ci_build/test_sparkfun.sh @@ -24,3 +24,6 @@ cd ${ROOT_DIR} pwd make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=sparkfun_edge micro_speech_bin + +# Needed to solve CI build bug triggered by files added to source tree. +make -f tensorflow/lite/experimental/micro/tools/make/Makefile clean_downloads diff --git a/tensorflow/lite/experimental/micro/tools/ci_build/test_x86.sh b/tensorflow/lite/experimental/micro/tools/ci_build/test_x86.sh index c0de76580a3..16b3e3204d5 100755 --- a/tensorflow/lite/experimental/micro/tools/ci_build/test_x86.sh +++ b/tensorflow/lite/experimental/micro/tools/ci_build/test_x86.sh @@ -24,3 +24,6 @@ cd ${ROOT_DIR} pwd make -f tensorflow/lite/experimental/micro/tools/make/Makefile test + +# Needed to solve CI build bug triggered by files added to source tree. +make -f tensorflow/lite/experimental/micro/tools/make/Makefile clean_downloads diff --git a/tensorflow/lite/experimental/micro/tools/make/templates/LCD_DISCO_F746NG.lib.tpl b/tensorflow/lite/experimental/micro/tools/make/templates/LCD_DISCO_F746NG.lib.tpl new file mode 100644 index 00000000000..899a504ff76 --- /dev/null +++ b/tensorflow/lite/experimental/micro/tools/make/templates/LCD_DISCO_F746NG.lib.tpl @@ -0,0 +1 @@ +http://os.mbed.com/teams/ST/code/LCD_DISCO_F746NG/#d44525b1de98 diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index f74b2484de0..f70ccf3a3d9 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -669,6 +669,7 @@ cc_test( name = "cast_test", size = "small", srcs = ["cast_test.cc"], + tags = ["tflite_nnapi"], deps = [ ":builtin_ops", ":test_main", @@ -1149,6 +1150,7 @@ cc_test( name = "log_softmax_test", size = "small", srcs = ["log_softmax_test.cc"], + tags = ["tflite_nnapi"], deps = [ ":builtin_ops", ":test_main", diff --git a/tensorflow/lite/kernels/cast_test.cc b/tensorflow/lite/kernels/cast_test.cc index 6bad3d6e7b3..8f1cb44f1c9 100644 --- a/tensorflow/lite/kernels/cast_test.cc +++ b/tensorflow/lite/kernels/cast_test.cc @@ -43,7 +43,23 @@ class CastOpModel : public SingleOpModel { int output_; }; -TEST(CastOpModel, CastIntToFloat) { +TEST(CastOpModel, CastInt32ToFloat) { + CastOpModel m({TensorType_INT32, {2, 3}}, {TensorType_FLOAT32, {2, 3}}); + m.PopulateTensor(m.input(), {100, 200, 300, 400, 500, 600}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100.f, 200.f, 300.f, 400.f, 500.f, 600.f})); +} + +TEST(CastOpModel, CastFloatToInt32) { + CastOpModel m({TensorType_FLOAT32, {3, 2}}, {TensorType_INT32, {3, 2}}); + m.PopulateTensor(m.input(), {100.f, 20.f, 3.f, 0.4f, 0.999f, 1.1f}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100, 20, 3, 0, 0, 1})); +} + +TEST(CastOpModel, CastInt64ToFloat) { CastOpModel m({TensorType_INT64, {2, 3}}, {TensorType_FLOAT32, {2, 3}}); m.PopulateTensor(m.input(), {100, 200, 300, 400, 500, 600}); m.Invoke(); @@ -51,11 +67,11 @@ TEST(CastOpModel, CastIntToFloat) { ElementsAreArray({100.f, 200.f, 300.f, 400.f, 500.f, 600.f})); } -TEST(CastOpModel, CastFloatToInt) { - CastOpModel m({TensorType_FLOAT32, {3, 2}}, {TensorType_INT32, {3, 2}}); +TEST(CastOpModel, CastFloatToInt64) { + CastOpModel m({TensorType_FLOAT32, {3, 2}}, {TensorType_INT64, {3, 2}}); m.PopulateTensor(m.input(), {100.f, 20.f, 3.f, 0.4f, 0.999f, 1.1f}); m.Invoke(); - EXPECT_THAT(m.ExtractVector(m.output()), + EXPECT_THAT(m.ExtractVector(m.output()), ElementsAreArray({100, 20, 3, 0, 0, 1})); } @@ -75,6 +91,38 @@ TEST(CastOpModel, CastBoolToFloat) { ElementsAreArray({1.f, 1.0f, 0.f, 1.0f, 0.0f, 1.0f})); } +TEST(CastOpModel, CastFloatToUInt8) { + CastOpModel m({TensorType_FLOAT32, {3, 2}}, {TensorType_UINT8, {3, 2}}); + m.PopulateTensor(m.input(), {100.f, 1.0f, 0.f, 0.4f, 1.999f, 1.1f}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100, 1, 0, 0, 1, 1})); +} + +TEST(CastOpModel, CastUInt8ToFloat) { + CastOpModel m({TensorType_UINT8, {3, 2}}, {TensorType_FLOAT32, {3, 2}}); + m.PopulateTensor(m.input(), {123, 0, 1, 2, 3, 4}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({123.f, 0.f, 1.f, 2.f, 3.f, 4.f})); +} + +TEST(CastOpModel, CastInt32ToUInt8) { + CastOpModel m({TensorType_INT32, {3, 2}}, {TensorType_UINT8, {3, 2}}); + m.PopulateTensor(m.input(), {100, 1, 200, 2, 255, 3}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100, 1, 200, 2, 255, 3})); +} + +TEST(CastOpModel, CastUInt8ToInt32) { + CastOpModel m({TensorType_UINT8, {3, 2}}, {TensorType_INT32, {3, 2}}); + m.PopulateTensor(m.input(), {100, 1, 200, 2, 255, 3}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100, 1, 200, 2, 255, 3})); +} + TEST(CastOpModel, CastComplex64ToFloat) { CastOpModel m({TensorType_COMPLEX64, {2, 3}}, {TensorType_FLOAT32, {2, 3}}); m.PopulateTensor>( diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 9b4e65c6eb7..f9b62c724ee 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -367,6 +367,7 @@ cc_library( "reference/pooling.h", "reference/reference_ops.h", "reference/softmax.h", + "reference/strided_slice.h", ], deps = [ ":common", @@ -406,6 +407,7 @@ cc_library( "reference/pooling.h", "reference/reference_ops.h", "reference/softmax.h", + "reference/strided_slice.h", ], deps = [ ":common", diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h index 8889b00a7f2..ce34f525c37 100644 --- a/tensorflow/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" #include "tensorflow/lite/kernels/internal/reference/pooling.h" #include "tensorflow/lite/kernels/internal/reference/softmax.h" +#include "tensorflow/lite/kernels/internal/reference/strided_slice.h" #include "tensorflow/lite/kernels/internal/round.h" #include "tensorflow/lite/kernels/internal/strided_slice_logic.h" #include "tensorflow/lite/kernels/internal/tensor.h" @@ -3071,59 +3072,6 @@ inline void PadImageStyle(const tflite::PadParams& op_params, output_data); } -template -inline void StridedSlice(const tflite::StridedSliceParams& op_params, - const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - // Note that the output_shape is not used herein. - tflite::StridedSliceParams params_copy = op_params; - - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - // Reverse and pad to 4 dimensions because that is what the runtime code - // requires (ie. all shapes must be 4D and are given backwards). - strided_slice::StridedSlicePadIndices(¶ms_copy, 4); - - const int start_b = strided_slice::StartForAxis(params_copy, input_shape, 0); - const int stop_b = - strided_slice::StopForAxis(params_copy, input_shape, 0, start_b); - const int start_h = strided_slice::StartForAxis(params_copy, input_shape, 1); - const int stop_h = - strided_slice::StopForAxis(params_copy, input_shape, 1, start_h); - const int start_w = strided_slice::StartForAxis(params_copy, input_shape, 2); - const int stop_w = - strided_slice::StopForAxis(params_copy, input_shape, 2, start_w); - const int start_d = strided_slice::StartForAxis(params_copy, input_shape, 3); - const int stop_d = - strided_slice::StopForAxis(params_copy, input_shape, 3, start_d); - - T* out_ptr = output_data; - for (int in_b = start_b; - !strided_slice::LoopCondition(in_b, stop_b, params_copy.strides[0]); - in_b += params_copy.strides[0]) { - for (int in_h = start_h; - !strided_slice::LoopCondition(in_h, stop_h, params_copy.strides[1]); - in_h += params_copy.strides[1]) { - for (int in_w = start_w; - !strided_slice::LoopCondition(in_w, stop_w, params_copy.strides[2]); - in_w += params_copy.strides[2]) { - for (int in_d = start_d; !strided_slice::LoopCondition( - in_d, stop_d, params_copy.strides[3]); - in_d += params_copy.strides[3]) { - *out_ptr++ = input_data[Offset(input_shape, in_b, in_h, in_w, in_d)]; - } - } - } - } -} - template inline void Slice(const tflite::SliceParams& op_params, const RuntimeShape& input_shape, diff --git a/tensorflow/lite/kernels/internal/reference/strided_slice.h b/tensorflow/lite/kernels/internal/reference/strided_slice.h new file mode 100644 index 00000000000..921c49ea77b --- /dev/null +++ b/tensorflow/lite/kernels/internal/reference/strided_slice.h @@ -0,0 +1,80 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ + +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/strided_slice_logic.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { +template +inline void StridedSlice(const tflite::StridedSliceParams& op_params, + const RuntimeShape& unextended_input_shape, + const T* input_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + // Note that the output_shape is not used herein. + tflite::StridedSliceParams params_copy = op_params; + + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + // Reverse and pad to 4 dimensions because that is what the runtime code + // requires (ie. all shapes must be 4D and are given backwards). + strided_slice::StridedSlicePadIndices(¶ms_copy, 4); + + const int start_b = strided_slice::StartForAxis(params_copy, input_shape, 0); + const int stop_b = + strided_slice::StopForAxis(params_copy, input_shape, 0, start_b); + const int start_h = strided_slice::StartForAxis(params_copy, input_shape, 1); + const int stop_h = + strided_slice::StopForAxis(params_copy, input_shape, 1, start_h); + const int start_w = strided_slice::StartForAxis(params_copy, input_shape, 2); + const int stop_w = + strided_slice::StopForAxis(params_copy, input_shape, 2, start_w); + const int start_d = strided_slice::StartForAxis(params_copy, input_shape, 3); + const int stop_d = + strided_slice::StopForAxis(params_copy, input_shape, 3, start_d); + + T* out_ptr = output_data; + for (int in_b = start_b; + !strided_slice::LoopCondition(in_b, stop_b, params_copy.strides[0]); + in_b += params_copy.strides[0]) { + for (int in_h = start_h; + !strided_slice::LoopCondition(in_h, stop_h, params_copy.strides[1]); + in_h += params_copy.strides[1]) { + for (int in_w = start_w; + !strided_slice::LoopCondition(in_w, stop_w, params_copy.strides[2]); + in_w += params_copy.strides[2]) { + for (int in_d = start_d; !strided_slice::LoopCondition( + in_d, stop_d, params_copy.strides[3]); + in_d += params_copy.strides[3]) { + *out_ptr++ = input_data[Offset(input_shape, in_b, in_h, in_w, in_d)]; + } + } + } + } +} +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ diff --git a/tensorflow/lite/kernels/sparse_to_dense.cc b/tensorflow/lite/kernels/sparse_to_dense.cc index 74eef2d698e..3a9a7cd6c5e 100644 --- a/tensorflow/lite/kernels/sparse_to_dense.cc +++ b/tensorflow/lite/kernels/sparse_to_dense.cc @@ -173,6 +173,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_shape->type == kTfLiteInt64); TF_LITE_ENSURE(context, values->type == kTfLiteInt32 || values->type == kTfLiteInt64 || + values->type == kTfLiteInt8 || + values->type == kTfLiteUInt8 || values->type == kTfLiteFloat32); TF_LITE_ENSURE_EQ(context, values->type, default_value->type); @@ -232,7 +234,8 @@ TfLiteStatus EvalForIndexType(TfLiteContext* context, TfLiteNode* node, } default: context->ReportError( - context, "Type %d is currently not supported by sparse to dense.", + context, + "Indice type %d is currently not supported by sparse to dense.", indices->type); return kTfLiteError; } @@ -242,7 +245,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* indices = GetInput(context, node, kIndicesTensor); const TfLiteTensor* values = GetInput(context, node, kValueInputTensor); - // Currently only supports float32, int32 and int64. switch (values->type) { case kTfLiteFloat32: return EvalForIndexType(context, node, indices); @@ -250,9 +252,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return EvalForIndexType(context, node, indices); case kTfLiteInt64: return EvalForIndexType(context, node, indices); + case kTfLiteInt8: + return EvalForIndexType(context, node, indices); + case kTfLiteUInt8: + return EvalForIndexType(context, node, indices); default: context->ReportError( - context, "Type %d is currently not supported by sparse to dense.", + context, + "Value type %d is currently not supported by sparse to dense.", values->type); return kTfLiteError; } diff --git a/tensorflow/lite/kernels/sparse_to_dense_test.cc b/tensorflow/lite/kernels/sparse_to_dense_test.cc index 4a5ce6a36b5..ad040b2ce04 100644 --- a/tensorflow/lite/kernels/sparse_to_dense_test.cc +++ b/tensorflow/lite/kernels/sparse_to_dense_test.cc @@ -100,6 +100,21 @@ TEST(SparseToDenseOpModelTest, TwoDimensionsTest) { EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 3})); } +TEST(SparseToDenseOpModelTest, Int64IndexTest) { + SparseToDenseOpModel m({3, 3}, {3}, {3}, -1, TensorType_INT64, + TensorType_FLOAT32); + m.PopulateTensor(m.indices(), {0, 0, 0, 1, 2, 1, 2, 0, 1}); + m.PopulateTensor(m.output_shape(), {3, 3, 3}); + m.PopulateTensor(m.values(), {2, 4, 6}); + m.Invoke(); + + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray({2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 4, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 3})); +} + TEST(SparseToDenseOpModelTest, DefaultValueTest) { SparseToDenseOpModel m({3, 3}, {3}, {3}, -1, TensorType_INT32, TensorType_FLOAT32); @@ -145,12 +160,12 @@ TEST(SparseToDenseOpModelTest, Int64ValueTest) { EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 3})); } -TEST(SparseToDenseOpModelTest, Int64IndexTest) { - SparseToDenseOpModel m({3, 3}, {3}, {3}, -1, TensorType_INT64, - TensorType_FLOAT32); - m.PopulateTensor(m.indices(), {0, 0, 0, 1, 2, 1, 2, 0, 1}); +TEST(SparseToDenseOpModelTest, Int8ValueTest) { + SparseToDenseOpModel m({3, 3}, {3}, {3}, -1, TensorType_INT32, + TensorType_INT8); + m.PopulateTensor(m.indices(), {0, 0, 0, 1, 2, 1, 2, 0, 1}); m.PopulateTensor(m.output_shape(), {3, 3, 3}); - m.PopulateTensor(m.values(), {2, 4, 6}); + m.PopulateTensor(m.values(), {2, 4, 6}); m.Invoke(); EXPECT_THAT( @@ -160,5 +175,19 @@ TEST(SparseToDenseOpModelTest, Int64IndexTest) { EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 3})); } +TEST(SparseToDenseOpModelTest, UInt8ValueTest) { + SparseToDenseOpModel m({3, 3}, {3}, {3}, 1, TensorType_INT32, + TensorType_UINT8); + m.PopulateTensor(m.indices(), {0, 0, 0, 1, 2, 1, 2, 0, 1}); + m.PopulateTensor(m.output_shape(), {3, 3, 3}); + m.PopulateTensor(m.values(), {2, 4, 6}); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 4, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 3})); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/while.cc b/tensorflow/lite/kernels/while.cc index b3f00d3fe13..a6438558458 100644 --- a/tensorflow/lite/kernels/while.cc +++ b/tensorflow/lite/kernels/while.cc @@ -28,21 +28,36 @@ namespace { // Propagate tensor shapes and types from `src_tensor_indices` in `src_subgraph` // to `dst_tensor_indices` in `dst_subgraph`. +// +// When `resize_subgraph_inputs` is true, the function calls subgraphs's +// `ResizeInputTensor` function, and it may trigger the memory planner to +// reallocate memory. +// When `resize_subgraph_inputs` is false, it implies `context` belongs to +// `dst_subgraph`. The function calls `context->ResizeTensor`. This happens +// when resizing `While` op's outputs. template TfLiteStatus CopyTensorsShapeAndType(TfLiteContext* context, Subgraph* src_subgraph, const SrcVector& src_tensor_indices, Subgraph* dst_subgraph, - const DstVector& dst_tensor_indices) { + const DstVector& dst_tensor_indices, + bool resize_subgraph_inputs) { TF_LITE_ENSURE_EQ(context, src_tensor_indices.size(), dst_tensor_indices.size()); for (int i = 0; i < src_tensor_indices.size(); ++i) { const TfLiteTensor* src_tensor = src_subgraph->tensor(src_tensor_indices[i]); - std::vector dims(src_tensor->dims->data, - src_tensor->dims->data + src_tensor->dims->size); - dst_subgraph->ResizeInputTensor(dst_tensor_indices[i], dims); + TfLiteTensor* dst_tensor = dst_subgraph->tensor(dst_tensor_indices[i]); + if (resize_subgraph_inputs) { + std::vector dims(src_tensor->dims->data, + src_tensor->dims->data + src_tensor->dims->size); + dst_subgraph->ResizeInputTensor(dst_tensor_indices[i], dims); + } else { + TF_LITE_ENSURE_OK( + context, context->ResizeTensor(context, dst_tensor, + TfLiteIntArrayCopy(src_tensor->dims))); + } dst_tensor->type = src_tensor->type; } return kTfLiteOk; @@ -130,9 +145,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Prepare and check the condition subgraph. TF_LITE_ENSURE_OK( - context, CopyTensorsShapeAndType(context, this_subgraph, - TfLiteIntArrayView(node->inputs), - cond_subgraph, cond_subgraph->inputs())); + context, CopyTensorsShapeAndType( + context, this_subgraph, TfLiteIntArrayView(node->inputs), + cond_subgraph, cond_subgraph->inputs(), true)); TF_LITE_ENSURE_OK(context, cond_subgraph->AllocateTensors()); TfLiteTensor* cond_output = cond_subgraph->tensor(cond_subgraph->outputs()[0]); @@ -148,9 +163,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Prepare and check the body subgraph. TF_LITE_ENSURE_OK( - context, CopyTensorsShapeAndType(context, this_subgraph, - TfLiteIntArrayView(node->inputs), - body_subgraph, body_subgraph->inputs())); + context, CopyTensorsShapeAndType( + context, this_subgraph, TfLiteIntArrayView(node->inputs), + body_subgraph, body_subgraph->inputs(), true)); TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors()); if (body_subgraph->HasDynamicTensors()) { op_data->body_has_dynamic_output_tensors = true; @@ -232,6 +247,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // boundary. Currently we copy the input / output between the subgraphs. This // isn't optimized yet and a lot of redundant copies are made. // TODO(b/120234921): Optimize and avoid copying tensors between subgraphs. + + if (op_data->body_has_dynamic_output_tensors) { + // If body subgraph has dynamic outputs, the input of condition subgraph may + // be changed in the last invocation and may need resizing. + TF_LITE_ENSURE_OK( + context, CopyTensorsShapeAndType( + context, this_subgraph, TfLiteIntArrayView(node->inputs), + cond_subgraph, cond_subgraph->inputs(), true)); + TF_LITE_ENSURE_OK(context, cond_subgraph->AllocateTensors()); + } TF_LITE_ENSURE_OK( context, CopyTensorsData(context, this_subgraph, TfLiteIntArrayView(node->inputs), @@ -254,7 +279,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, CopyTensorsShapeAndType( context, cond_subgraph, cond_subgraph->inputs(), - body_subgraph, body_subgraph->inputs())); + body_subgraph, body_subgraph->inputs(), true)); TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors()); } @@ -273,7 +298,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, CopyTensorsShapeAndType( context, body_subgraph, body_subgraph->outputs(), - cond_subgraph, cond_subgraph->inputs())); + cond_subgraph, cond_subgraph->inputs(), true)); TF_LITE_ENSURE_OK(context, cond_subgraph->AllocateTensors()); } @@ -287,9 +312,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // TODO(b/120234921): Optimize and avoid copying tensors between subgraphs. if (op_data->body_has_dynamic_output_tensors) { TF_LITE_ENSURE_OK( - context, CopyTensorsShapeAndType(context, cond_subgraph, - cond_subgraph->inputs(), this_subgraph, - TfLiteIntArrayView(node->outputs))); + context, CopyTensorsShapeAndType( + context, cond_subgraph, cond_subgraph->inputs(), + this_subgraph, TfLiteIntArrayView(node->outputs), false)); } TF_LITE_ENSURE_OK( diff --git a/tensorflow/lite/kernels/while_test.cc b/tensorflow/lite/kernels/while_test.cc index a3a80ea6f50..1745f585ed0 100644 --- a/tensorflow/lite/kernels/while_test.cc +++ b/tensorflow/lite/kernels/while_test.cc @@ -59,8 +59,6 @@ TEST_F(WhileTest, TestTriangularNumberSequence) { } } -// This requires dynamic sized subgraphs and it's not supported right now. -// TODO(ycling): Support dynamic sized subgraphs. TEST_F(WhileTest, TestPadLoop) { interpreter_.reset(new Interpreter); interpreter_->AddSubgraphs(2); @@ -70,8 +68,6 @@ TEST_F(WhileTest, TestPadLoop) { interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {1}); interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {2}); - // This is not supported yet. The test ensures thatit doesn't crash and raises - // an error properly. ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); FillIntTensor(interpreter_->tensor(interpreter_->inputs()[0]), {1}); @@ -82,6 +78,11 @@ TEST_F(WhileTest, TestPadLoop) { CheckIntTensor(output1, {1}, {4}); TfLiteTensor* output2 = interpreter_->tensor(interpreter_->outputs()[1]); CheckIntTensor(output2, {11}, {0, 0, 0, 5, 7, 0, 0, 0, 0, 0, 0}); + + // The extra invocation serves as a regiression test: There was a bug that + // invoking a while loop with dynamic shaped body makes the interpreter + // state uninvokable. + ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk); } } // namespace diff --git a/tensorflow/lite/models/smartreply/ops/extract_feature.cc b/tensorflow/lite/models/smartreply/ops/extract_feature.cc index f9d29229457..8ec05885976 100644 --- a/tensorflow/lite/models/smartreply/ops/extract_feature.cc +++ b/tensorflow/lite/models/smartreply/ops/extract_feature.cc @@ -59,8 +59,6 @@ bool IsValidNgram(const tflite::StringRef& strref) { } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TfLiteIntArray* outputSize1 = TfLiteIntArrayCreate(1); - TfLiteIntArray* outputSize2 = TfLiteIntArrayCreate(1); const TfLiteTensor* input = GetInput(context, node, 0); int dim = input->dims->data[0]; if (dim == 0) { @@ -68,6 +66,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { dim = 1; } TF_LITE_ENSURE_EQ(context, input->type, kTfLiteString); + TfLiteIntArray* outputSize1 = TfLiteIntArrayCreate(1); + TfLiteIntArray* outputSize2 = TfLiteIntArrayCreate(1); outputSize1->data[0] = dim; outputSize2->data[0] = dim; context->ResizeTensor(context, GetOutput(context, node, 0), outputSize1); diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h index 70f6c002df1..40c3ecf3c91 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h +++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h @@ -93,6 +93,7 @@ enum { ANEURALNETWORKS_ARGMAX = 39, ANEURALNETWORKS_ARGMIN = 40, ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM = 42, + ANEURALNETWORKS_CAST = 45, ANEURALNETWORKS_EQUAL = 48, ANEURALNETWORKS_EXP = 49, ANEURALNETWORKS_EXPAND_DIMS = 50, @@ -105,6 +106,7 @@ enum { ANEURALNETWORKS_LOGICAL_AND = 61, ANEURALNETWORKS_LOGICAL_NOT = 62, ANEURALNETWORKS_LOGICAL_OR = 63, + ANEURALNETWORKS_LOG_SOFTMAX = 64, ANEURALNETWORKS_MAXIMUM = 65, ANEURALNETWORKS_MINIMUM = 66, ANEURALNETWORKS_NEG = 67, diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index a181001d351..02ccf5cc751 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -149,6 +149,7 @@ py_library( ":lite_constants", ":op_hint", "//tensorflow/python:tf_optimizer", + "//tensorflow/python/eager:wrap_function", ], ) diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py index 3dfd112cef8..d849e9d0adc 100644 --- a/tensorflow/lite/python/convert.py +++ b/tensorflow/lite/python/convert.py @@ -93,7 +93,11 @@ class ConverterError(Exception): pass -def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): +def toco_convert_protos(model_flags_str, + toco_flags_str, + input_data_str, + debug_info_str="", + enable_mlir_converter=False): """Convert `input_data_str` according to model and toco parameters. Unless you know what you are doing consider using @@ -105,6 +109,10 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): toco_flags_str: Serialized proto describing conversion properties, see `toco/toco_flags.proto`. input_data_str: Input data in serialized form (e.g. a graphdef is common) + debug_info_str: Serialized `GraphDebugInfo` proto describing logging + information. (default "") + enable_mlir_converter: Enables the MLIR converter instead of the TOCO + converter. (default False) Returns: Converted model in serialized form (e.g. a TFLITE model is common). Raises: @@ -118,10 +126,12 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): if not _toco_from_proto_bin: try: model_str = wrap_toco.wrapped_toco_convert(model_flags_str, - toco_flags_str, input_data_str) + toco_flags_str, input_data_str, + debug_info_str, + enable_mlir_converter) return model_str except Exception as e: - raise ConverterError("TOCO failed: %s" % e) + raise ConverterError(str(e)) # Windows and TemporaryFile are not that useful together, # since you cannot have two readers/writers. So we have to @@ -132,16 +142,17 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): # Build all input files with _tempfile.NamedTemporaryFile(delete=False) as fp_toco, \ _tempfile.NamedTemporaryFile(delete=False) as fp_model, \ - _tempfile.NamedTemporaryFile(delete=False) as fp_input: + _tempfile.NamedTemporaryFile(delete=False) as fp_input, \ + _tempfile.NamedTemporaryFile(delete=False) as fp_debug: toco_filename = fp_toco.name input_filename = fp_input.name model_filename = fp_model.name + debug_filename = fp_debug.name + fp_model.write(model_flags_str) fp_toco.write(toco_flags_str) fp_input.write(input_data_str) - fp_model.flush() - fp_toco.flush() - fp_input.flush() + fp_debug.write(debug_info_str) # Reserve an output file with _tempfile.NamedTemporaryFile(delete=False) as fp: @@ -149,9 +160,15 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): # Run cmd = [ - _toco_from_proto_bin, model_filename, toco_filename, input_filename, - output_filename + _toco_from_proto_bin, + model_filename, + toco_filename, + input_filename, + output_filename, + "--debug_proto_file={}".format(debug_filename), ] + if enable_mlir_converter: + cmd.append("--enable_mlir_converter") cmdline = " ".join(cmd) is_windows = _platform.system() == "Windows" proc = _subprocess.Popen( @@ -168,8 +185,7 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): else: stdout = _try_convert_to_unicode(stdout) stderr = _try_convert_to_unicode(stderr) - raise ConverterError( - "TOCO failed. See console for info.\n%s\n%s\n" % (stdout, stderr)) + raise ConverterError("See console for info.\n%s\n%s\n" % (stdout, stderr)) finally: # Must manually cleanup files. for filename in [ @@ -211,9 +227,9 @@ def build_toco_convert_protos(input_tensors, output_tensors: List of output tensors (only .name is used from this). inference_type: Target data type of real-number arrays in the output file. Must be `{tf.float32, tf.uint8}`. (default tf.float32) + Must be `{tf.float32, tf.uint8}`. (default `inference_type`) inference_input_type: Target data type of real-number input arrays. Allows for a different type for input arrays in the case of quantization. - Must be `{tf.float32, tf.uint8}`. (default `inference_type`) input_format: Type of data to read Currently must be `{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF) input_shapes: Input array shape. It needs to be a list of the same length @@ -266,7 +282,7 @@ def build_toco_convert_protos(input_tensors, Returns: model_flags, toco_flags, debug_info: three protocol buffers describing the - conversion process and debug information. + conversion process and debug information. Raises: ValueError: @@ -330,7 +346,7 @@ def build_toco_convert_protos(input_tensors, def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays, - *args, **kwargs): + enable_mlir_converter, *args, **kwargs): """"Convert a model using TOCO. This function is used to convert GraphDefs that cannot be loaded into @@ -347,6 +363,8 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays, output_arrays: List of output tensors to freeze graph with. Use only when graph cannot be loaded into TensorFlow and when `output_tensors` is None. (default None) + enable_mlir_converter: Enables the MLIR converter instead of the TOCO + converter. *args: See `build_toco_convert_protos`, **kwargs: See `build_toco_convert_protos`. @@ -375,14 +393,16 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays, for name in output_arrays: model_flags.output_arrays.append(name) - data = toco_convert_protos(model_flags.SerializeToString(), - toco_flags.SerializeToString(), - input_data.SerializeToString()) + data = toco_convert_protos( + model_flags.SerializeToString(), + toco_flags.SerializeToString(), + input_data.SerializeToString(), + enable_mlir_converter=enable_mlir_converter) return data -def toco_convert_impl(input_data, input_tensors, output_tensors, *args, - **kwargs): +def toco_convert_impl(input_data, input_tensors, output_tensors, + enable_mlir_converter, *args, **kwargs): """"Convert a model using TOCO. Typically this function is used to convert from TensorFlow GraphDef to TFLite. @@ -394,6 +414,8 @@ def toco_convert_impl(input_data, input_tensors, output_tensors, *args, input_tensors: List of input tensors. Type and shape are computed using `foo.shape` and `foo.dtype`. output_tensors: List of output tensors (only .name is used from this). + enable_mlir_converter: Enables the MLIR converter instead of the TOCO + converter. *args: See `build_toco_convert_protos`, **kwargs: See `build_toco_convert_protos`. @@ -404,11 +426,15 @@ def toco_convert_impl(input_data, input_tensors, output_tensors, *args, Raises: Defined in `build_toco_convert_protos`. """ - model_flags, toco_flags, _ = build_toco_convert_protos( + model_flags, toco_flags, debug_info = build_toco_convert_protos( input_tensors, output_tensors, *args, **kwargs) - data = toco_convert_protos(model_flags.SerializeToString(), - toco_flags.SerializeToString(), - input_data.SerializeToString()) + debug_info_str = debug_info.SerializeToString() if debug_info else "" + data = toco_convert_protos( + model_flags.SerializeToString(), + toco_flags.SerializeToString(), + input_data.SerializeToString(), + debug_info_str=debug_info_str, + enable_mlir_converter=enable_mlir_converter) return data @@ -437,5 +463,6 @@ def toco_convert(input_data, input_tensors, output_tensors, *args, **kwargs): Raises: Defined in `build_toco_convert_protos`. """ - return toco_convert_impl(input_data, input_tensors, output_tensors, *args, - **kwargs) + enable_mlir_converter = kwargs.get("enable_mlir_converter", False) + return toco_convert_impl(input_data, input_tensors, output_tensors, + enable_mlir_converter, *args, **kwargs) diff --git a/tensorflow/lite/python/convert_test.py b/tensorflow/lite/python/convert_test.py index 693f41cf082..382c351f7a7 100644 --- a/tensorflow/lite/python/convert_test.py +++ b/tensorflow/lite/python/convert_test.py @@ -90,6 +90,7 @@ class ConvertTest(test_util.TensorFlowTestCase): tflite_model = convert.toco_convert_graph_def( sess.graph_def, [("input", [1, 16, 16, 3])], ["add"], + enable_mlir_converter=False, inference_type=lite_constants.FLOAT) self.assertTrue(tflite_model) @@ -126,6 +127,7 @@ class ConvertTest(test_util.TensorFlowTestCase): sess.graph_def, input_arrays_map, output_arrays, + enable_mlir_converter=False, inference_type=lite_constants.QUANTIZED_UINT8, quantized_input_stats=[(0., 1.), (0., 1.)]) self.assertTrue(tflite_model) @@ -171,6 +173,7 @@ class ConvertTest(test_util.TensorFlowTestCase): sess.graph_def, input_arrays_map, output_arrays, + enable_mlir_converter=False, inference_type=lite_constants.QUANTIZED_UINT8) self.assertEqual( "std_dev and mean must be defined when inference_input_type is " diff --git a/tensorflow/lite/python/interpreter.py b/tensorflow/lite/python/interpreter.py index 63e54b5fef7..23a654e3035 100644 --- a/tensorflow/lite/python/interpreter.py +++ b/tensorflow/lite/python/interpreter.py @@ -40,7 +40,7 @@ try: except ImportError: # When full Tensorflow Python PIP is not available do not use lazy load # and instead of the tflite_runtime path. - from tflite_runtime.lite.python import interpreter_wrapper as _interpreter_wrapper + from tflite_runtime import interpreter_wrapper as _interpreter_wrapper def tf_export_dummy(*x, **kwargs): del x, kwargs diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index fe3922a8654..2eb3d67fdd5 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -233,6 +233,25 @@ class TFLiteConverterBase(object): self.representative_dataset.input_gen, inference_input_type, inference_output_type, allow_float) + def _get_base_converter_args(self): + """Returns the base converter args. + + Returns: + {key str: val} + """ + float16_quantize = self._is_float16_quantize() + args = { + "input_format": constants.TENSORFLOW_GRAPHDEF, + "allow_custom_ops": self.allow_custom_ops, + "post_training_quantize": (self._is_int8_weight_only_quantize() or + float16_quantize), + "quantize_to_float16": float16_quantize, + "debug_info": self._debug_info, + "target_ops": self._target_ops, + "enable_mlir_converter": self.experimental_enable_mlir_converter, + } + return args + @_tf_export("lite.TFLiteConverter", v1=[]) class TFLiteConverterV2(TFLiteConverterBase): @@ -251,6 +270,8 @@ class TFLiteConverterV2(TFLiteConverterBase): representative_dataset: A representative dataset that can be used to generate input and output samples for the model. The converter can use the dataset to evaluate different optimizations. + experimental_enable_mlir_converter: Experimental flag, subject to change. + Enables the MLIR converter instead of the TOCO converter. Example usage: @@ -287,6 +308,7 @@ class TFLiteConverterV2(TFLiteConverterBase): self.allow_custom_ops = False self.target_spec = TargetSpec() self._debug_info = None + self.experimental_enable_mlir_converter = False @classmethod def from_concrete_functions(cls, funcs): @@ -380,7 +402,7 @@ class TFLiteConverterV2(TFLiteConverterBase): "under development.") frozen_func = _convert_to_constants.convert_variables_to_constants_v2( - self._funcs[0]) + self._funcs[0], lower_control_flow=False) input_tensors = [ tensor for tensor in frozen_func.inputs if tensor.dtype != _dtypes.resource @@ -414,23 +436,7 @@ class TFLiteConverterV2(TFLiteConverterBase): self._validate_representative_dataset() self._debug_info = _get_debug_info( _build_debug_info_func(self._funcs[0].graph), graph_def) - - float16_quantize = self._is_float16_quantize() - - converter_kwargs = { - "input_format": - constants.TENSORFLOW_GRAPHDEF, - "allow_custom_ops": - self.allow_custom_ops, - "post_training_quantize": - self._is_int8_weight_only_quantize() or float16_quantize, - "quantize_to_float16": - float16_quantize, - "target_ops": - self.target_spec.supported_ops, - "debug_info": - self._debug_info - } + converter_kwargs = self._get_base_converter_args() # Converts model. result = _toco_convert_impl( @@ -522,6 +528,8 @@ class TFLiteConverter(TFLiteConverterBase): representative_dataset: A representative dataset that can be used to generate input and output samples for the model. The converter can use the dataset to evaluate different optimizations. + experimental_enable_mlir_converter: Experimental flag, subject to change. + Enables the MLIR converter instead of the TOCO converter. Example usage: @@ -597,6 +605,7 @@ class TFLiteConverter(TFLiteConverterBase): self.target_spec = TargetSpec() self._debug_info_func = experimental_debug_info_func self._debug_info = None + self.experimental_enable_mlir_converter = False # Attributes are used by models that cannot be loaded into TensorFlow. if not self._has_valid_tensors(): @@ -800,7 +809,7 @@ class TFLiteConverter(TFLiteConverterBase): concrete_func = function.get_concrete_function() frozen_func = _convert_to_constants.convert_variables_to_constants_v2( - concrete_func) + concrete_func, lower_control_flow=False) _set_tensor_shapes(frozen_func.inputs, input_shapes) return cls( frozen_func.graph.as_graph_def(), @@ -939,31 +948,11 @@ class TFLiteConverter(TFLiteConverterBase): "Provide an inference_input_type and inference_output_type of type " "tf.float32.") - float16_quantize = self._is_float16_quantize() - if not post_training_optimize and self.inference_output_type is not None: raise ValueError( "inference_output_type is currently not supported if optimizations " "are not enabled.") - converter_kwargs = { - "inference_type": self.inference_type, - "inference_input_type": toco_inference_input_type, - "input_format": constants.TENSORFLOW_GRAPHDEF, - "output_format": self.output_format, - "quantized_input_stats": quantized_stats, - "default_ranges_stats": self.default_ranges_stats, - "drop_control_dependency": self.drop_control_dependency, - "reorder_across_fake_quant": self.reorder_across_fake_quant, - "change_concat_input_ranges": self.change_concat_input_ranges, - "allow_custom_ops": self.allow_custom_ops, - "post_training_quantize": weight_only_quantize or float16_quantize, - "quantize_to_float16": float16_quantize, - "target_ops": self._target_ops, - "dump_graphviz_dir": self.dump_graphviz_dir, - "dump_graphviz_video": self.dump_graphviz_video - } - optimized_graph = self._graph_def if self.inference_type != constants.QUANTIZED_UINT8: try: @@ -977,13 +966,26 @@ class TFLiteConverter(TFLiteConverterBase): self._debug_info = _get_debug_info(self._debug_info_func, optimized_graph) + converter_kwargs = self._get_base_converter_args() + converter_kwargs.update({ + "inference_type": self.inference_type, + "inference_input_type": toco_inference_input_type, + "output_format": self.output_format, + "quantized_input_stats": quantized_stats, + "default_ranges_stats": self.default_ranges_stats, + "drop_control_dependency": self.drop_control_dependency, + "reorder_across_fake_quant": self.reorder_across_fake_quant, + "change_concat_input_ranges": self.change_concat_input_ranges, + "dump_graphviz_dir": self.dump_graphviz_dir, + "dump_graphviz_video": self.dump_graphviz_video + }) + # Converts model. if self._has_valid_tensors(): result = _toco_convert_impl( input_data=optimized_graph, input_tensors=self._input_tensors, output_tensors=self._output_tensors, - debug_info=self._debug_info, **converter_kwargs) else: result = _toco_convert_graph_def( diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index 19ca0896f7c..775881ad9f7 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -18,16 +18,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import copy import sys -from tensorflow.core.framework import graph_pb2 as _graph_pb2 from tensorflow.core.protobuf import config_pb2 as _config_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes from tensorflow.lite.toco import types_pb2 as _types_pb2 from tensorflow.python.eager import function +from tensorflow.python.framework import convert_to_constants as _convert_to_constants from tensorflow.python.framework import dtypes from tensorflow.python.framework import error_interpolation as _error_interpolation from tensorflow.python.framework import graph_util as tf_graph_util @@ -43,11 +42,10 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.string: _types_pb2.STRING, dtypes.uint8: _types_pb2.QUANTIZED_UINT8, dtypes.int8: _types_pb2.INT8, - dtypes.complex64: _types_pb2.COMPLEX64 + dtypes.complex64: _types_pb2.COMPLEX64, + dtypes.bool: _types_pb2.BOOL, } -_LOWER_USING_SWITCH_MERGE = "_lower_using_switch_merge" - def convert_dtype_to_tflite_type(tf_dtype): """Converts tf.dtype to TFLite proto type. @@ -201,26 +199,6 @@ def run_graph_optimizations(graph_def, return tf_optimizer.OptimizeGraph(config, meta_graph) -def _remove_lower_using_switch_merge(graph_def): - """Remove '_lower_using_switch_merge' attributes from the given graph. - - Args: - graph_def: GraphDef to be optimized. - - Returns: - A new GraphDef that with no '_lower_using_switch_merge' attribute. - """ - out = _graph_pb2.GraphDef() - out.library.CopyFrom(graph_def.library) - out.versions.CopyFrom(graph_def.versions) - for node in graph_def.node: - new_node = copy.deepcopy(node) - if new_node.op == "While": - new_node.attr[_LOWER_USING_SWITCH_MERGE].b = False - out.node.extend([new_node]) - return out - - def _convert_op_hints_if_present(sess, graph_def, output_tensors, hinted_outputs_nodes): if is_frozen_graph(sess): @@ -253,7 +231,8 @@ def freeze_graph(sess, input_tensors, output_tensors): # Asides from inlining any simple function, Grappler will also try to lower # while loop into switch merge representation which is undesired for Ophints, # so we simply remove those attributes to prevent Grappler from doing so. - graph_def = _remove_lower_using_switch_merge(sess.graph_def) + graph_def = _convert_to_constants.disable_lower_using_switch_merge( + sess.graph_def) config = get_grappler_config(["function"]) graph_def = run_graph_optimizations( graph_def, input_tensors, output_tensors, config, graph=sess.graph) diff --git a/tensorflow/lite/python/util_test.py b/tensorflow/lite/python/util_test.py index fe861f09a67..f13fad5e821 100644 --- a/tensorflow/lite/python/util_test.py +++ b/tensorflow/lite/python/util_test.py @@ -22,6 +22,7 @@ from tensorflow.lite.python import lite_constants from tensorflow.lite.python import util from tensorflow.lite.toco import types_pb2 as _types_pb2 from tensorflow.python.client import session +from tensorflow.python.framework import convert_to_constants from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops @@ -54,8 +55,8 @@ class UtilTest(test_util.TensorFlowTestCase): _types_pb2.COMPLEX64) self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.half), _types_pb2.FLOAT16) - with self.assertRaises(ValueError): - util.convert_dtype_to_tflite_type(dtypes.bool) + self.assertEqual( + util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) def testTensorName(self): in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.float32) @@ -75,7 +76,8 @@ class UtilTest(test_util.TensorFlowTestCase): b = lambda i: math_ops.add(i, 1) control_flow_ops.while_loop(c, b, [i]) sess = session.Session() - new_graph_def = util._remove_lower_using_switch_merge(sess.graph_def) + new_graph_def = convert_to_constants.disable_lower_using_switch_merge( + sess.graph_def) lower_using_switch_merge_is_removed = False for node in new_graph_def.node: if node.op == "While": diff --git a/tensorflow/lite/python/wrap_toco.py b/tensorflow/lite/python/wrap_toco.py index 7b514804b37..aa17e2ff192 100644 --- a/tensorflow/lite/python/wrap_toco.py +++ b/tensorflow/lite/python/wrap_toco.py @@ -29,10 +29,16 @@ _toco_python = LazyLoader( del LazyLoader -def wrapped_toco_convert(model_flags_str, toco_flags_str, input_data_str): +def wrapped_toco_convert(model_flags_str, toco_flags_str, input_data_str, + debug_info_str, enable_mlir_converter): """Wraps TocoConvert with lazy loader.""" - return _toco_python.TocoConvert(model_flags_str, toco_flags_str, - input_data_str) + return _toco_python.TocoConvert( + model_flags_str, + toco_flags_str, + input_data_str, + False, # extended_return + debug_info_str, + enable_mlir_converter) def wrapped_get_potentially_supported_ops(): diff --git a/tensorflow/lite/testing/generate_examples_lib.py b/tensorflow/lite/testing/generate_examples_lib.py index 431a9aa541b..31c8f94a075 100644 --- a/tensorflow/lite/testing/generate_examples_lib.py +++ b/tensorflow/lite/testing/generate_examples_lib.py @@ -3856,6 +3856,33 @@ def make_zeros_like_tests(options): make_zip_of_tests(options, test_parameters, build_graph, build_inputs) +@register_make_test_function() +def make_cast_tests(options): + """Generate examples for cast.""" + test_parameters = [{ + "input_dtype": [tf.int32], + "output_dtype": [tf.float32], + "input_shape": [[], [1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]], + }] + + def build_graph(parameters): + """Build the cast testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_dtype"], + name="input", + shape=parameters["input_shape"]) + out = tf.cast(input_value, parameters["output_dtype"]) + return [input_value], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value]))) + + make_zip_of_tests(options, test_parameters, build_graph, build_inputs) + + def _make_elementwise_tests(op): """Make a set of tests to do element-wise operations.""" diff --git a/tensorflow/lite/toco/graph_transformations/quantize.cc b/tensorflow/lite/toco/graph_transformations/quantize.cc index 680b2711488..8e951e017b8 100644 --- a/tensorflow/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/lite/toco/graph_transformations/quantize.cc @@ -71,7 +71,9 @@ bool SupportsQuantization(const Operator& op) { type == OperatorType::kReduceMin || type == OperatorType::kTransposeConv || type == OperatorType::kMatrixSetDiag || - type == OperatorType::kMatrixDiag || type == OperatorType::kHardSwish; + type == OperatorType::kMatrixDiag || + type == OperatorType::kSparseToDense || + type == OperatorType::kHardSwish; } // The quantized op allows output arrays of type float using diff --git a/tensorflow/lite/toco/python/BUILD b/tensorflow/lite/toco/python/BUILD index c2b7d7d9668..e11e9cf1578 100644 --- a/tensorflow/lite/toco/python/BUILD +++ b/tensorflow/lite/toco/python/BUILD @@ -22,9 +22,11 @@ cc_library( name = "toco_python_api", srcs = ["toco_python_api.cc"], hdrs = ["toco_python_api.h"], + features = ["no_layering_check"], deps = [ "//third_party/python_runtime:headers", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/lite/python/interpreter_wrapper:python_utils", "//tensorflow/lite/toco:model_flags_proto_cc", "//tensorflow/lite/toco:toco_flags_proto_cc", diff --git a/tensorflow/lite/toco/python/toco.i b/tensorflow/lite/toco/python/toco.i index de10fca99e8..3aa9ce6553f 100644 --- a/tensorflow/lite/toco/python/toco.i +++ b/tensorflow/lite/toco/python/toco.i @@ -26,13 +26,18 @@ namespace toco { // parameters (see relevant .protos for more information). Returns a string // representing the contents of the converted model. When extended_return // flag is set to true returns a dictionary that contains string representation -// of the converted model and some statitics like arithmetic ops count. +// of the converted model and some statistics like arithmetic ops count. +// `debug_info_str` contains the `GraphDebugInfo` proto. When +// `enable_mlir_converter` is True, the MLIR converter is used instead of the +// TOCO converter. PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, PyObject* toco_flags_proto_txt_raw, PyObject* input_contents_txt_raw, - bool extended_return = false); + bool extended_return = false, + PyObject* debug_info_txt_raw = nullptr, + bool enable_mlir_converter = false); // Returns a list of names of all ops potentially supported by tflite. PyObject* TocoGetPotentiallySupportedOps(); -} // namespace toco \ No newline at end of file +} // namespace toco diff --git a/tensorflow/lite/toco/python/toco_from_protos.py b/tensorflow/lite/toco/python/toco_from_protos.py index 152dd241eab..0566cb8ba60 100644 --- a/tensorflow/lite/toco/python/toco_from_protos.py +++ b/tensorflow/lite/toco/python/toco_from_protos.py @@ -26,11 +26,30 @@ FLAGS = None def execute(unused_args): - model_str = open(FLAGS.model_proto_file, "rb").read() - toco_str = open(FLAGS.toco_proto_file, "rb").read() - input_str = open(FLAGS.model_input_file, "rb").read() + """Runs the converter.""" + with open(FLAGS.model_proto_file, "rb") as model_file: + model_str = model_file.read() - output_str = tensorflow_wrap_toco.TocoConvert(model_str, toco_str, input_str) + with open(FLAGS.toco_proto_file, "rb") as toco_file: + toco_str = toco_file.read() + + with open(FLAGS.model_input_file, "rb") as input_file: + input_str = input_file.read() + + debug_info_str = "" + if FLAGS.debug_proto_file: + with open(FLAGS.debug_proto_file, "rb") as debug_info_file: + debug_info_str = debug_info_file.read() + + enable_mlir_converter = FLAGS.enable_mlir_converter + + output_str = tensorflow_wrap_toco.TocoConvert( + model_str, + toco_str, + input_str, + False, # extended_return + debug_info_str, + enable_mlir_converter) open(FLAGS.model_output_file, "wb").write(output_str) sys.exit(0) @@ -53,6 +72,17 @@ def main(): "model_output_file", type=str, help="Result of applying TOCO conversion is written here.") + parser.add_argument( + "--debug_proto_file", + type=str, + default="", + help=("File containing serialized `GraphDebugInfo` proto that describes " + "logging information.")) + parser.add_argument( + "--enable_mlir_converter", + action="store_true", + help=("Boolean indiciating whether to enable the MLIR converter instead " + "of TOCO converter. (default False)")) FLAGS, unparsed = parser.parse_known_args() diff --git a/tensorflow/lite/toco/python/toco_python_api.cc b/tensorflow/lite/toco/python/toco_python_api.cc index 22557a34cc5..de714b28e3a 100644 --- a/tensorflow/lite/toco/python/toco_python_api.cc +++ b/tensorflow/lite/toco/python/toco_python_api.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/lite/toco/python/toco_python_api.h" + #include #include #include @@ -20,20 +22,27 @@ limitations under the License. #include "tensorflow/lite/python/interpreter_wrapper/python_utils.h" #include "tensorflow/lite/toco/import_tensorflow.h" #include "tensorflow/lite/toco/model_flags.pb.h" -#include "tensorflow/lite/toco/python/toco_python_api.h" #include "tensorflow/lite/toco/toco_flags.pb.h" #include "tensorflow/lite/toco/toco_graphviz_dump_options.h" #include "tensorflow/lite/toco/toco_port.h" #include "tensorflow/lite/toco/toco_tooling.h" #include "tensorflow/lite/toco/toco_types.h" +#if defined(PLATFORM_GOOGLE) +#include "tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.h" +#else +#include "tensorflow/core/protobuf/graph_debug_info.pb.h" +#endif + namespace toco { // NOTE(aselle): We are using raw PyObject's here because we want to make // sure we input and output bytes rather than unicode strings for Python3. PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, PyObject* toco_flags_proto_txt_raw, - PyObject* input_contents_txt_raw, bool extended_return) { + PyObject* input_contents_txt_raw, bool extended_return, + PyObject* debug_info_txt_raw, + bool enable_mlir_converter) { // Use Python C API to validate and convert arguments. In py3 (bytes), // in py2 (str). auto ConvertArg = [&](PyObject* obj, bool* error) { @@ -70,12 +79,35 @@ PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, // Use TOCO to produce new outputs. toco::ModelFlags model_flags; if (!model_flags.ParseFromString(model_flags_proto_txt)) { - PyErr_SetString(PyExc_ValueError, "Model proto failed to parse."); + PyErr_SetString(PyExc_ValueError, + "Failed to convert Model to Python String."); return nullptr; } toco::TocoFlags toco_flags; if (!toco_flags.ParseFromString(toco_flags_proto_txt)) { - PyErr_SetString(PyExc_ValueError, "Toco proto failed to parse."); + PyErr_SetString(PyExc_ValueError, + "Failed to convert Toco to Python String."); + return nullptr; + } + + tensorflow::GraphDebugInfo debug_info; + if (debug_info_txt_raw) { + std::string debug_info_txt = ConvertArg(debug_info_txt_raw, &error); + if (error) { + PyErr_SetString(PyExc_ValueError, "Input DebugInfo is invalid."); + return nullptr; + } + if (!debug_info.ParseFromString(debug_info_txt)) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert DebugInfo to Python String."); + return nullptr; + } + } + + tensorflow::GraphDef graph_def; + if (!graph_def.ParseFromString(input_contents_txt)) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert GraphDef to Python String."); return nullptr; } @@ -87,18 +119,36 @@ PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, dump_options.dump_graphviz_video = toco_flags.dump_graphviz_include_video(); } - // Convert model. - std::unique_ptr model = - toco::Import(toco_flags, model_flags, input_contents_txt); - toco::Transform(toco_flags, model.get()); string output_file_contents_txt; - auto status = Export(toco_flags, *model, toco_flags.allow_custom_ops(), - &output_file_contents_txt); + tensorflow::Status status; + std::unique_ptr model; + + // Convert model. + if (enable_mlir_converter) { +#if defined(PLATFORM_GOOGLE) + status = tensorflow::ConvertGraphDefToTFLiteFlatBuffer( + model_flags, toco_flags, debug_info, graph_def, + &output_file_contents_txt); +#else + // TODO(b/124314620): Remove this condition. + PyErr_SetString(PyExc_Exception, + "This flag is not supported by this version of the " + "TFLite converter. This functionality is being " + "actively worked on."); + return nullptr; +#endif + } else { + model = toco::Import(toco_flags, model_flags, input_contents_txt); + toco::Transform(toco_flags, model.get()); + status = Export(toco_flags, *model, toco_flags.allow_custom_ops(), + &output_file_contents_txt); + } + if (!status.ok()) { PyErr_SetString(PyExc_Exception, status.error_message().c_str()); return nullptr; } - if (extended_return) { + if (extended_return && !enable_mlir_converter) { PyObject* dict = PyDict_New(); PyDict_SetItemString( dict, "flatbuffer", diff --git a/tensorflow/lite/toco/python/toco_python_api.h b/tensorflow/lite/toco/python/toco_python_api.h index 20390c32a5e..add7bf9f4ed 100644 --- a/tensorflow/lite/toco/python/toco_python_api.h +++ b/tensorflow/lite/toco/python/toco_python_api.h @@ -25,11 +25,16 @@ namespace toco { // parameters (see relevant .protos for more information). Returns a string // representing the contents of the converted model. When extended_return // flag is set to true returns a dictionary that contains string representation -// of the converted model and some statitics like arithmetic ops count. +// of the converted model and some statistics like arithmetic ops count. +// `debug_info_str` contains the `GraphDebugInfo` proto. When +// `enable_mlir_converter` is True, the MLIR converter is used instead of the +// TOCO converter. PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, PyObject* toco_flags_proto_txt_raw, PyObject* input_contents_txt_raw, - bool extended_return = false); + bool extended_return = false, + PyObject* debug_info_txt_raw = nullptr, + bool enable_mlir_converter = false); // Returns a list of names of all ops potentially supported by tflite. PyObject* TocoGetPotentiallySupportedOps(); diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/README.md b/tensorflow/lite/tools/accuracy/ilsvrc/README.md index 6e27c8570f3..1f5a0121069 100644 --- a/tensorflow/lite/tools/accuracy/ilsvrc/README.md +++ b/tensorflow/lite/tools/accuracy/ilsvrc/README.md @@ -56,6 +56,10 @@ and the following optional parameters: Optionally, the computed accuracies can be output to a file as a string-serialized instance of tflite::evaluation::TopkAccuracyEvalMetrics. +* `num_ranks`: `int` (default=10) \ + The number of top-K accuracies to return. For example, if num_ranks=5, top-1 + to top-5 accuracy fractions are returned. + The following optional parameters can be used to modify the inference runtime: * `num_interpreter_threads`: `int` (default=1) \ diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc index d7230976961..f296b89b583 100644 --- a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc +++ b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc @@ -49,6 +49,7 @@ constexpr char kInterpreterThreadsFlag[] = "num_interpreter_threads"; constexpr char kDelegateFlag[] = "delegate"; constexpr char kNnapiDelegate[] = "nnapi"; constexpr char kGpuDelegate[] = "gpu"; +constexpr char kNumRanksFlag[] = "num_ranks"; template std::vector GetFirstN(const std::vector& v, int n) { @@ -144,6 +145,9 @@ class CompositeObserver : public ImagenetModelEvaluator::Observer { tflite::Flag::CreateFlag(kDelegateFlag, ¶ms.delegate, "Delegate to use for inference, if available. " "Must be one of {'nnapi', 'gpu'}"), + tflite::Flag::CreateFlag(kNumRanksFlag, ¶ms.num_ranks, + "Generates the top-1 to top-k accuracy values" + "where k = num_ranks. Default: 10"), }; tflite::Flags::Parse(&argc, const_cast(argv), flag_list); diff --git a/tensorflow/lite/tools/evaluation/proto/evaluation_stages.proto b/tensorflow/lite/tools/evaluation/proto/evaluation_stages.proto index 45d4a6b4714..74ab8c2a712 100644 --- a/tensorflow/lite/tools/evaluation/proto/evaluation_stages.proto +++ b/tensorflow/lite/tools/evaluation/proto/evaluation_stages.proto @@ -23,7 +23,7 @@ option java_package = "tflite.evaluation"; // Defines the functionality executed by an EvaluationStage. // -// Next ID: 6 +// Next ID: 7 message ProcessSpecification { oneof params { ImagePreprocessingParams image_preprocessing_params = 1; @@ -32,6 +32,7 @@ message ProcessSpecification { ImageClassificationParams image_classification_params = 4; ObjectDetectionAveragePrecisionParams object_detection_average_precision_params = 5; + ObjectDetectionParams object_detection_params = 6; } } @@ -71,7 +72,7 @@ message AccuracyMetrics { // Contains process-specific metrics, which may differ based on what an // EvaluationStage does. // -// Next ID: 7 +// Next ID: 8 message ProcessMetrics { optional LatencyMetrics total_latency = 1; @@ -82,6 +83,7 @@ message ProcessMetrics { InferenceProfilerMetrics inference_profiler_metrics = 5; ObjectDetectionAveragePrecisionMetrics object_detection_average_precision_metrics = 6; + ObjectDetectionMetrics object_detection_metrics = 7; } } @@ -283,3 +285,33 @@ message ObjectDetectionAveragePrecisionMetrics { // Average of Average Precision across all IoU thresholds. optional float overall_mean_average_precision = 2; } + +// Parameters that define how the Object Detection task is evaluated +// end-to-end. +// +// Next ID: 4 +message ObjectDetectionParams { + // Required. + // Model's outputs should be same as a TFLite-compatible SSD model. + // Refer: + // https://www.tensorflow.org/lite/models/object_detection/overview#output + // TODO(b/133772912): Generalize support for other types of object detection + // models. + optional TfliteInferenceParams inference_params = 1; + // Optional. Used to match ground-truth categories with model output. + // SSD Mobilenet V1 Model trained on COCO assumes class 0 is background class + // in the label file and class labels start from 1 to number_of_classes+1. + // Therefore, default value is set as 1. + optional int32 class_offset = 2 [default = 1]; + optional ObjectDetectionAveragePrecisionParams ap_params = 3; +} + +// Metrics from evaluation of the object detection task. +// +// Next ID: 5 +message ObjectDetectionMetrics { + optional LatencyMetrics pre_processing_latency = 1; + optional LatencyMetrics inference_latency = 2; + optional TfliteInferenceMetrics inference_metrics = 3; + optional ObjectDetectionAveragePrecisionMetrics average_precision_metrics = 4; +} diff --git a/tensorflow/lite/tools/evaluation/stages/BUILD b/tensorflow/lite/tools/evaluation/stages/BUILD index 2af95378741..6ee00c853fb 100644 --- a/tensorflow/lite/tools/evaluation/stages/BUILD +++ b/tensorflow/lite/tools/evaluation/stages/BUILD @@ -209,3 +209,23 @@ cc_test( "@com_google_googletest//:gtest_main", ], ) + +cc_library( + name = "object_detection_stage", + srcs = ["object_detection_stage.cc"], + hdrs = ["object_detection_stage.h"], + copts = tflite_copts(), + deps = [ + ":image_preprocessing_stage", + ":object_detection_average_precision_stage", + ":tflite_inference_stage", + "//tensorflow/core:tflite_portable_logging", + "//tensorflow/lite/c:c_api_internal", + "//tensorflow/lite/tools/evaluation:evaluation_stage", + "//tensorflow/lite/tools/evaluation:utils", + "//tensorflow/lite/tools/evaluation/proto:evaluation_config_cc_proto", + "//tensorflow/lite/tools/evaluation/proto:evaluation_stages_cc_proto", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_protobuf//:protobuf_headers", + ], +) diff --git a/tensorflow/lite/tools/evaluation/stages/object_detection_stage.cc b/tensorflow/lite/tools/evaluation/stages/object_detection_stage.cc new file mode 100644 index 00000000000..298d7eece76 --- /dev/null +++ b/tensorflow/lite/tools/evaluation/stages/object_detection_stage.cc @@ -0,0 +1,188 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/tools/evaluation/stages/object_detection_stage.h" + +#include + +#include "google/protobuf/text_format.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/tools/evaluation/proto/evaluation_config.pb.h" +#include "tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h" +#include "tensorflow/lite/tools/evaluation/utils.h" + +namespace tflite { +namespace evaluation { + +TfLiteStatus ObjectDetectionStage::Init() { + // Ensure inference params are provided. + if (!config_.specification().has_object_detection_params()) { + LOG(ERROR) << "ObjectDetectionParams not provided"; + return kTfLiteError; + } + auto& params = config_.specification().object_detection_params(); + if (!params.has_inference_params()) { + LOG(ERROR) << "inference_params not provided"; + return kTfLiteError; + } + if (all_labels_ == nullptr) { + LOG(ERROR) << "Detection output labels not provided"; + return kTfLiteError; + } + + // TfliteInferenceStage. + EvaluationStageConfig tflite_inference_config; + tflite_inference_config.set_name("tflite_inference"); + *tflite_inference_config.mutable_specification() + ->mutable_tflite_inference_params() = params.inference_params(); + inference_stage_.reset(new TfliteInferenceStage(tflite_inference_config)); + TF_LITE_ENSURE_STATUS(inference_stage_->Init()); + + // Validate model inputs. + const TfLiteModelInfo* model_info = inference_stage_->GetModelInfo(); + if (model_info->inputs.size() != 1 || model_info->outputs.size() != 4) { + LOG(ERROR) << "Object detection model must have 1 input & 4 outputs"; + return kTfLiteError; + } + TfLiteType input_type = model_info->inputs[0]->type; + auto* input_shape = model_info->inputs[0]->dims; + // Input should be of the shape {1, height, width, 3} + if (input_shape->size != 4 || input_shape->data[0] != 1 || + input_shape->data[3] != 3) { + LOG(ERROR) << "Invalid input shape for model"; + return kTfLiteError; + } + + // ImagePreprocessingStage + EvaluationStageConfig preprocessing_config; + preprocessing_config.set_name("image_preprocessing"); + auto* preprocess_params = preprocessing_config.mutable_specification() + ->mutable_image_preprocessing_params(); + preprocess_params->set_image_height(input_shape->data[1]); + preprocess_params->set_image_width(input_shape->data[2]); + preprocess_params->set_cropping_fraction(1.0); + preprocess_params->set_output_type(static_cast(input_type)); + preprocessing_stage_.reset(new ImagePreprocessingStage(preprocessing_config)); + TF_LITE_ENSURE_STATUS(preprocessing_stage_->Init()); + + // ObjectDetectionAveragePrecisionStage + EvaluationStageConfig eval_config; + eval_config.set_name("average_precision"); + *eval_config.mutable_specification() + ->mutable_object_detection_average_precision_params() = + params.ap_params(); + eval_config.mutable_specification() + ->mutable_object_detection_average_precision_params() + ->set_num_classes(all_labels_->size()); + eval_stage_.reset(new ObjectDetectionAveragePrecisionStage(eval_config)); + TF_LITE_ENSURE_STATUS(eval_stage_->Init()); + + return kTfLiteOk; +} + +TfLiteStatus ObjectDetectionStage::Run() { + if (image_path_.empty()) { + LOG(ERROR) << "Input image not set"; + return kTfLiteError; + } + + // Preprocessing. + preprocessing_stage_->SetImagePath(&image_path_); + TF_LITE_ENSURE_STATUS(preprocessing_stage_->Run()); + + // Inference. + std::vector data_ptrs = {}; + data_ptrs.push_back(preprocessing_stage_->GetPreprocessedImageData()); + inference_stage_->SetInputs(data_ptrs); + TF_LITE_ENSURE_STATUS(inference_stage_->Run()); + + // Convert model output to ObjectsSet. + ObjectDetectionResult predicted_objects; + const int class_offset = + config_.specification().object_detection_params().class_offset(); + const std::vector* outputs = inference_stage_->GetOutputs(); + int num_detections = static_cast(*static_cast(outputs->at(3))); + float* detected_label_boxes = static_cast(outputs->at(0)); + float* detected_label_indices = static_cast(outputs->at(1)); + float* detected_label_probabilities = static_cast(outputs->at(2)); + for (int i = 0; i < num_detections; ++i) { + const int bounding_box_offset = i * 4; + auto* object = predicted_objects.add_objects(); + // Bounding box + auto* bbox = object->mutable_bounding_box(); + bbox->set_normalized_top(detected_label_boxes[bounding_box_offset + 0]); + bbox->set_normalized_left(detected_label_boxes[bounding_box_offset + 1]); + bbox->set_normalized_bottom(detected_label_boxes[bounding_box_offset + 2]); + bbox->set_normalized_right(detected_label_boxes[bounding_box_offset + 3]); + // Class. + object->set_class_id(static_cast(detected_label_indices[i]) + + class_offset); + // Score + object->set_score(detected_label_probabilities[i]); + } + + // AP Evaluation. + eval_stage_->SetEvalInputs(predicted_objects, *ground_truth_objects_); + TF_LITE_ENSURE_STATUS(eval_stage_->Run()); + + return kTfLiteOk; +} + +EvaluationStageMetrics ObjectDetectionStage::LatestMetrics() { + EvaluationStageMetrics metrics; + auto* detection_metrics = + metrics.mutable_process_metrics()->mutable_object_detection_metrics(); + + *detection_metrics->mutable_pre_processing_latency() = + preprocessing_stage_->LatestMetrics().process_metrics().total_latency(); + EvaluationStageMetrics inference_metrics = inference_stage_->LatestMetrics(); + *detection_metrics->mutable_inference_latency() = + inference_metrics.process_metrics().total_latency(); + *detection_metrics->mutable_inference_metrics() = + inference_metrics.process_metrics().tflite_inference_metrics(); + *detection_metrics->mutable_average_precision_metrics() = + eval_stage_->LatestMetrics() + .process_metrics() + .object_detection_average_precision_metrics(); + metrics.set_num_runs(inference_metrics.num_runs()); + return metrics; +} + +TfLiteStatus PopulateGroundTruth( + const std::string& grouth_truth_pbtxt_file, + absl::flat_hash_map* + ground_truth_mapping) { + if (ground_truth_mapping == nullptr) { + return kTfLiteError; + } + ground_truth_mapping->clear(); + + // Read the ground truth dump. + std::ifstream t(grouth_truth_pbtxt_file); + std::string proto_str((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + ObjectDetectionGroundTruth ground_truth_proto; + proto2::TextFormat::ParseFromString(proto_str, &ground_truth_proto); + + for (auto image_ground_truth : ground_truth_proto.detection_results()) { + (*ground_truth_mapping)[image_ground_truth.image_name()] = + image_ground_truth; + } + + return kTfLiteOk; +} + +} // namespace evaluation +} // namespace tflite diff --git a/tensorflow/lite/tools/evaluation/stages/object_detection_stage.h b/tensorflow/lite/tools/evaluation/stages/object_detection_stage.h new file mode 100644 index 00000000000..ec9772754eb --- /dev/null +++ b/tensorflow/lite/tools/evaluation/stages/object_detection_stage.h @@ -0,0 +1,96 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_TOOLS_EVALUATION_STAGES_OBJECT_DETECTION_STAGE_H_ +#define TENSORFLOW_LITE_TOOLS_EVALUATION_STAGES_OBJECT_DETECTION_STAGE_H_ + +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "tensorflow/lite/tools/evaluation/evaluation_stage.h" +#include "tensorflow/lite/tools/evaluation/proto/evaluation_config.pb.h" +#include "tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h" +#include "tensorflow/lite/tools/evaluation/stages/image_preprocessing_stage.h" +#include "tensorflow/lite/tools/evaluation/stages/object_detection_average_precision_stage.h" +#include "tensorflow/lite/tools/evaluation/stages/tflite_inference_stage.h" + +namespace tflite { +namespace evaluation { + +// An EvaluationStage to encapsulate the complete Object Detection task. +// Assumes that the object detection model's signature (number of +// inputs/outputs, ordering of outputs & what they denote) is same as the +// MobileNet SSD model: +// https://www.tensorflow.org/lite/models/object_detection/overview#output. +// Input size/type & number of detections could be different. +// TODO(b/133772912): Generalize support for other types of object detection +// models. +class ObjectDetectionStage : public EvaluationStage { + public: + explicit ObjectDetectionStage(const EvaluationStageConfig& config) + : EvaluationStage(config) {} + + TfLiteStatus Init() override; + + TfLiteStatus Run() override; + + EvaluationStageMetrics LatestMetrics() override; + + // Call before Init(). all_labels should contain all possible object labels + // that can be detected by the model, in the correct order. all_labels should + // outlive the call to Init(). + void SetAllLabels(const std::vector& all_labels) { + all_labels_ = &all_labels; + } + + // Call before Run(). + // ground_truth_objects instance should outlive the call to Run(). + void SetInputs(const std::string& image_path, + const ObjectDetectionResult& ground_truth_objects) { + image_path_ = image_path; + ground_truth_objects_ = &ground_truth_objects; + } + + // Provides a pointer to the underlying TfLiteInferenceStage. + // Returns non-null value only if this stage has been initialized. + TfliteInferenceStage* const GetInferenceStage() { + return inference_stage_.get(); + } + + private: + const std::vector* all_labels_ = nullptr; + std::unique_ptr preprocessing_stage_; + std::unique_ptr inference_stage_; + std::unique_ptr eval_stage_; + std::string image_path_; + const ObjectDetectionResult* ground_truth_objects_; +}; + +// Reads a tflite::evaluation::ObjectDetectionGroundTruth instance from a +// textproto file and populates a mapping of image name to +// ObjectDetectionResult. +// File with ObjectDetectionGroundTruth can be generated using the +// preprocess_coco_minival.py script in evaluation/tasks/coco_object_detection. +// Useful for wrappers/scripts that use ObjectDetectionStage. +TfLiteStatus PopulateGroundTruth( + const std::string& grouth_truth_pbtxt_file, + absl::flat_hash_map* + ground_truth_mapping); + +} // namespace evaluation +} // namespace tflite + +#endif // TENSORFLOW_LITE_TOOLS_EVALUATION_STAGES_OBJECT_DETECTION_STAGE_H_ diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index 38fb48f9703..393a66255b1 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -79,7 +79,7 @@ BENCHMARK_BINARY_NAME := benchmark_model # A small example program that shows how to link against the library. MINIMAL_SRCS := \ -tensorflow/lite/examples/minimal/minimal.cc + tensorflow/lite/examples/minimal/minimal.cc # What sources we want to compile, must be kept in sync with the main Bazel # build files. @@ -91,7 +91,7 @@ PROFILE_SUMMARIZER_SRCS := \ tensorflow/core/util/stats_calculator.cc CMD_LINE_TOOLS_SRCS := \ - tensorflow/lite/tools/command_line_flags.cc + tensorflow/lite/tools/command_line_flags.cc CORE_CC_ALL_SRCS := \ $(wildcard tensorflow/lite/*.cc) \ @@ -99,19 +99,19 @@ $(wildcard tensorflow/lite/*.c) \ $(wildcard tensorflow/lite/c/*.c) \ $(wildcard tensorflow/lite/core/*.cc) \ $(wildcard tensorflow/lite/core/api/*.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/allocator.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/block_map.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/blocking_counter.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/context.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/detect_dotprod.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/kernel.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/pack.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/pmu.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/thread_pool.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/trace.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/trmul.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/tune.cc) \ -$(wildcard tensorflow/lite/experimental/ruy/wait.cc) +tensorflow/lite/experimental/ruy/allocator.cc \ +tensorflow/lite/experimental/ruy/block_map.cc \ +tensorflow/lite/experimental/ruy/blocking_counter.cc \ +tensorflow/lite/experimental/ruy/context.cc \ +tensorflow/lite/experimental/ruy/detect_dotprod.cc \ +tensorflow/lite/experimental/ruy/kernel.cc \ +tensorflow/lite/experimental/ruy/pack.cc \ +tensorflow/lite/experimental/ruy/pmu.cc \ +tensorflow/lite/experimental/ruy/thread_pool.cc \ +tensorflow/lite/experimental/ruy/trace.cc \ +tensorflow/lite/experimental/ruy/trmul.cc \ +tensorflow/lite/experimental/ruy/tune.cc \ +tensorflow/lite/experimental/ruy/wait.cc ifneq ($(BUILD_TYPE),micro) CORE_CC_ALL_SRCS += \ $(wildcard tensorflow/lite/kernels/*.cc) \ @@ -119,13 +119,9 @@ $(wildcard tensorflow/lite/kernels/internal/*.cc) \ $(wildcard tensorflow/lite/kernels/internal/optimized/*.cc) \ $(wildcard tensorflow/lite/kernels/internal/reference/*.cc) \ $(PROFILER_SRCS) \ -$(wildcard tensorflow/lite/kernels/*.c) \ -$(wildcard tensorflow/lite/kernels/internal/*.c) \ -$(wildcard tensorflow/lite/kernels/internal/optimized/*.c) \ -$(wildcard tensorflow/lite/kernels/internal/reference/*.c) \ -$(wildcard tensorflow/lite/tools/make/downloads/farmhash/src/farmhash.cc) \ -$(wildcard tensorflow/lite/tools/make/downloads/fft2d/fftsg.c) \ -$(wildcard tensorflow/lite/tools/make/downloads/flatbuffers/src/util.cpp) +tensorflow/lite/tools/make/downloads/farmhash/src/farmhash.cc \ +tensorflow/lite/tools/make/downloads/fft2d/fftsg.c \ +tensorflow/lite/tools/make/downloads/flatbuffers/src/util.cpp endif # Remove any duplicates. CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) @@ -138,7 +134,7 @@ $(wildcard tensorflow/lite/kernels/*test_main.cc) \ $(wildcard tensorflow/lite/kernels/*test_util.cc) \ $(MINIMAL_SRCS) -BUILD_WITH_MMAP=true +BUILD_WITH_MMAP ?= true ifeq ($(BUILD_TYPE),micro) BUILD_WITH_MMAP=false endif @@ -151,7 +147,7 @@ else CORE_CC_EXCLUDE_SRCS += tensorflow/lite/mmap_allocation_disabled.cc endif -BUILD_WITH_NNAPI=true +BUILD_WITH_NNAPI ?= true ifeq ($(BUILD_TYPE),micro) BUILD_WITH_NNAPI=false endif @@ -191,7 +187,7 @@ EVALUATION_UTILS_SRCS := \ BENCHMARK_ALL_SRCS := $(TF_LITE_CC_SRCS) \ $(wildcard $(BENCHMARK_SRCS_DIR)/*.cc) \ $(PROFILE_SUMMARIZER_SRCS) \ - $(CMD_LINE_TOOLS_SRCS) \ + $(CMD_LINE_TOOLS_SRCS) \ $(EVALUATION_UTILS_SRCS) BENCHMARK_SRCS := $(filter-out \ diff --git a/tensorflow/lite/tools/make/build_aarch64_lib.sh b/tensorflow/lite/tools/make/build_aarch64_lib.sh index 054b3daedf8..0ce4089c11c 100755 --- a/tensorflow/lite/tools/make/build_aarch64_lib.sh +++ b/tensorflow/lite/tools/make/build_aarch64_lib.sh @@ -1,4 +1,4 @@ -#!/bin/bash -x +#!/bin/bash # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,9 +14,11 @@ # limitations under the License. # ============================================================================== +set -x set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR/../../../.." +TENSORFLOW_DIR="${SCRIPT_DIR}/../../../.." + +make -j 4 TARGET=aarch64 -C "${TENSORFLOW_DIR}" -f tensorflow/lite/tools/make/Makefile -CC_PREFIX=aarch64-linux-gnu- make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=aarch64 TARGET_ARCH=armv8-a diff --git a/tensorflow/lite/tools/make/build_lib.sh b/tensorflow/lite/tools/make/build_lib.sh new file mode 100755 index 00000000000..7fdd262ee9c --- /dev/null +++ b/tensorflow/lite/tools/make/build_lib.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -x +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TENSORFLOW_DIR="${SCRIPT_DIR}/../../../.." + +make -j 4 BUILD_WITH_NNAPI=false -C "${TENSORFLOW_DIR}" -f tensorflow/lite/tools/make/Makefile + diff --git a/tensorflow/lite/tools/make/build_rpi_lib.sh b/tensorflow/lite/tools/make/build_rpi_lib.sh index 1521bb39332..c7edc6755e9 100755 --- a/tensorflow/lite/tools/make/build_rpi_lib.sh +++ b/tensorflow/lite/tools/make/build_rpi_lib.sh @@ -1,4 +1,4 @@ -#!/bin/bash -x +#!/bin/bash # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,9 +14,11 @@ # limitations under the License. # ============================================================================== +set -x set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR/../../../.." +TENSORFLOW_DIR="${SCRIPT_DIR}/../../../.." + +make -j 4 TARGET=rpi -C "${TENSORFLOW_DIR}" -f tensorflow/lite/tools/make/Makefile -CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=rpi TARGET_ARCH=armv7l diff --git a/tensorflow/lite/tools/pip_package/README.md b/tensorflow/lite/tools/pip_package/README.md index 8190782c39f..adab810126a 100644 --- a/tensorflow/lite/tools/pip_package/README.md +++ b/tensorflow/lite/tools/pip_package/README.md @@ -18,8 +18,8 @@ pip install --upgrade Note, unlike tensorflow this will be installed to a tflite_runtime namespace. You can then use the Tensorflow Lite interpreter as. ``` -import tflite_runtime as tflr -interpreter = tflr.lite.Interpreter(model_path="foo.tflite") +from tflite_runtime import interpreter as tflr +interpreter = tflr.Interpreter(model_path="foo.tflite") ``` This currently works to build on Linux machines including Raspberry Pi. In diff --git a/tensorflow/lite/tools/pip_package/build_pip_package.sh b/tensorflow/lite/tools/pip_package/build_pip_package.sh index 2887ce84712..1cb3866af73 100644 --- a/tensorflow/lite/tools/pip_package/build_pip_package.sh +++ b/tensorflow/lite/tools/pip_package/build_pip_package.sh @@ -16,39 +16,36 @@ set -e +PYTHON="${PYTHON:-python}" + # Find where this script lives and then the Tensorflow root. -MY_DIRECTORY=`dirname $0` -export TENSORFLOW_SRC_ROOT=`realpath $MY_DIRECTORY/../../../..` +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -export TENSORFLOW_VERSION=`grep "_VERSION = " $TENSORFLOW_SRC_ROOT/tensorflow/tools/pip_package/setup.py | cut -d'=' -f 2 | sed "s/[ '-]//g"`; +export TENSORFLOW_SRC_ROOT="${SCRIPT_DIR}/../../../.." +export TENSORFLOW_VERSION=`grep "_VERSION = " "${TENSORFLOW_SRC_ROOT}/tensorflow/tools/pip_package/setup.py" | cut -d'=' -f 2 | sed "s/[ '-]//g"`; +TFLITE_ROOT="${TENSORFLOW_SRC_ROOT}/tensorflow/lite" # Build a pip build tree. -BUILD_ROOT=/tmp/tflite_pip -rm -rf $BUILD_ROOT -mkdir -p $BUILD_ROOT/tflite_runtime/lite -mkdir -p $BUILD_ROOT/tflite_runtime/lite/python +BUILD_ROOT="/tmp/tflite_pip/${PYTHON}" +rm -rf "${BUILD_ROOT}" +mkdir -p "${BUILD_ROOT}/tflite_runtime/" -# Build an importable module tree -cat > $BUILD_ROOT/tflite_runtime/__init__.py < $BUILD_ROOT/tflite_runtime/lite/__init__.py < $BUILD_ROOT/tflite_runtime/lite/python/__init__.py <= 1 # Has at least the iterate. + if len(results) > 1: + results = results[1:] + else: + results = () + + return results + + +def _tf_range_for_stmt(iter_, extra_test, body, get_state, set_state, + init_vars): + """Overload of for_stmt that iterates over a TF range (and elides it).""" + _disallow_undefs_into_loop(*init_vars) + + start, limit, delta = iter_.op.inputs + + def while_body(iterate, *loop_vars): + new_vars = body(iterate, *loop_vars) + + loop_vars = (iterate + delta,) + if new_vars: + loop_vars += new_vars + + return loop_vars + + def while_cond(iterate, *loop_vars): + main_test = math_ops.logical_or( + math_ops.logical_and(delta >= 0, iterate < limit), + math_ops.logical_and(delta < 0, iterate > limit)) + if extra_test is not None: + return control_flow_ops.cond( + main_test, lambda: extra_test(*loop_vars), lambda: False) + return main_test + + # This specific dtype is required by while_loop. + maximum_iterations = math_ops.cast( + misc.get_range_len(start, limit, delta), dtypes.int32) + + results = _tf_while_stmt( + while_cond, + while_body, + get_state, + set_state, + init_vars=(start,) + init_vars, + opts=dict(maximum_iterations=maximum_iterations)) + + # Note: the iteration index is not returned by the while loop, however + # if a symbol with the same name exists outside the loop, it will be captured + # by the loop variables and ultimately updated correctly. if isinstance(results, (tuple, list)): assert len(results) >= 1 # Has at least the iterate. if len(results) > 1: diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py index 0e62bbddfda..cf25075dfcd 100644 --- a/tensorflow/python/autograph/operators/control_flow_test.py +++ b/tensorflow/python/autograph/operators/control_flow_test.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -50,6 +51,39 @@ class ForLoopTest(test.TestCase): init_vars=(0,)) self.assertEqual(self.evaluate(s), (1234,)) + def test_range_tensor(self): + with ops.Graph().as_default(): + s = control_flow.for_stmt( + math_ops.range(5), + extra_test=lambda s: True, + body=lambda i, s: (s * 10 + i,), + get_state=lambda: (), + set_state=lambda _: None, + init_vars=(0,)) + self.assertEqual(self.evaluate(s), (1234,)) + + def test_range_tensor_explicit_limit_delta(self): + with ops.Graph().as_default(): + s = control_flow.for_stmt( + math_ops.range(-17, -3, 5), + extra_test=lambda s: True, + body=lambda i, s: (s * 100 + i,), + get_state=lambda: (), + set_state=lambda _: None, + init_vars=(0,)) + self.assertEqual(self.evaluate(s), (-171207,)) + + def test_range_tensor_negative_delta(self): + with ops.Graph().as_default(): + s = control_flow.for_stmt( + math_ops.range(17, 3, -5), + extra_test=lambda s: True, + body=lambda i, s: (s * 100 + i,), + get_state=lambda: (), + set_state=lambda _: None, + init_vars=(0,)) + self.assertEqual(self.evaluate(s), (171207,)) + def test_tensor_with_extra_test_only_python_state(self): class MutableObject(object): field_1 = constant_op.constant(0, dtype=dtypes.int32) diff --git a/tensorflow/python/autograph/utils/misc.py b/tensorflow/python/autograph/utils/misc.py index 046e6cf97dc..01c198e6278 100644 --- a/tensorflow/python/autograph/utils/misc.py +++ b/tensorflow/python/autograph/utils/misc.py @@ -20,6 +20,8 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops def alias_tensors(*args): @@ -55,3 +57,13 @@ def capitalize_initial(s): if s: return s[0].upper() + s[1:] return s + + +def get_range_len(start, limit, delta): + dist = ops.convert_to_tensor(limit - start) + unadjusted_len = dist // delta + adjustment = math_ops.cast( + gen_math_ops.not_equal(dist % delta, + array_ops.zeros_like(unadjusted_len)), dist.dtype) + final_len = unadjusted_len + adjustment + return gen_math_ops.maximum(final_len, array_ops.zeros_like(final_len)) diff --git a/tensorflow/python/autograph/utils/misc_test.py b/tensorflow/python/autograph/utils/misc_test.py index 24b5753a91a..67c1b827228 100644 --- a/tensorflow/python/autograph/utils/misc_test.py +++ b/tensorflow/python/autograph/utils/misc_test.py @@ -19,6 +19,8 @@ from __future__ import division from __future__ import print_function from tensorflow.python.autograph.utils import misc +from tensorflow.python.eager import def_function +from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util from tensorflow.python.framework.constant_op import constant from tensorflow.python.ops.variables import Variable @@ -61,6 +63,21 @@ class MiscTest(test.TestCase): with self.cached_session() as sess: self.assertEqual(1, self.evaluate(new_a)) + def test_get_range_len(self): + get_range_as_graph = def_function.function(misc.get_range_len) + test_range = [(i, constant_op.constant(i)) for i in range(-3, 3)] + results = [] + for i, ti in test_range: + for j, tj in test_range: + for k, tk in test_range: + if k == 0: + continue + results.append(((i, j, k), get_range_as_graph(ti, tj, tk))) + + for (i, j, k), result_tensor in results: + self.assertEqual( + len(list(range(i, j, k))), self.evaluate(result_tensor)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/autograph/utils/tensors.py b/tensorflow/python/autograph/utils/tensors.py index 352d67d2bd9..6ae2b947332 100644 --- a/tensorflow/python/autograph/utils/tensors.py +++ b/tensorflow/python/autograph/utils/tensors.py @@ -46,3 +46,8 @@ def is_tensor_list(t): # construct. return (tensor_util.is_tensor(t) and t.dtype == dtypes.variant and not t.shape.ndims) + + +def is_range_tensor(t): + """Returns True if a tensor is the result of a tf.range op. Best effort.""" + return tensor_util.is_tensor(t) and hasattr(t, 'op') and t.op.type == 'Range' diff --git a/tensorflow/python/autograph/utils/tensors_test.py b/tensorflow/python/autograph/utils/tensors_test.py index 1e7cfec9e1b..e561d7bbeff 100644 --- a/tensorflow/python/autograph/utils/tensors_test.py +++ b/tensorflow/python/autograph/utils/tensors_test.py @@ -22,6 +22,7 @@ from tensorflow.python.autograph.utils import tensors from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import list_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test @@ -52,6 +53,13 @@ class TensorsTest(test.TestCase): self.assertFalse(tensors.is_tensor_list(self._simple_list_of_tensors())) self.assertFalse(tensors.is_tensor_list(None)) + def is_range_tensor(self): + self.assertTrue(tensors.is_range_tensor(math_ops.range(1))) + self.assertTrue(tensors.is_range_tensor(math_ops.range(1, 2))) + self.assertTrue(tensors.is_range_tensor(math_ops.range(1, 2, 3))) + self.assertFalse(tensors.is_range_tensor(None)) + self.assertFalse(tensors.is_range_tensor(constant_op.constant(range(1)))) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py index e7d60de6905..90ed4d18771 100644 --- a/tensorflow/python/client/timeline_test.py +++ b/tensorflow/python/client/timeline_test.py @@ -104,7 +104,10 @@ class TimelineTest(test.TestCase): step_stats = run_metadata.step_stats devices = [d.device for d in step_stats.dev_stats] self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices) - self.assertTrue('/device:GPU:0/stream:all' in devices) + if not test.is_built_with_rocm(): + # skip this check for the ROCm platform + # stream level tracing is not yet supported on the ROCm platform + self.assertTrue('/device:GPU:0/stream:all' in devices) tl = timeline.Timeline(step_stats) ctf = tl.generate_chrome_trace_format() self._validateTrace(ctf) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 6a161c4bee9..c976e0acb20 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 8) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 12) @tf_export("compat.forward_compatible") diff --git a/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py b/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py index 295c7aaca57..ccbaf9e52fa 100644 --- a/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py +++ b/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py @@ -29,6 +29,7 @@ class ConstBroadcastTest(trt_test.TfTrtIntegrationTestBase): """Test for Constant broadcasting in TF-TRT.""" def GraphFn(self, x): + """Return the expected graph to convert.""" dtype = x.dtype filt1 = constant_op.constant( 0.3, shape=(3, 3, 2, 1), dtype=dtype, name='filt1') diff --git a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py index 46a49cc3c5f..56994617b90 100644 --- a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py +++ b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py @@ -55,6 +55,7 @@ OUTPUT_NODE_NAME = 'output' class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): + """Testing usage of quantization ranges inserted in graph.""" def _BuildGraph(self, x): @@ -239,7 +240,7 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): if mode == ModeKeys.EVAL: return EstimatorSpec( mode, loss=loss, eval_metric_ops={'accuracy': accuracy}) - elif mode == ModeKeys.TRAIN: + if mode == ModeKeys.TRAIN: optimizer = AdamOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss, global_step=get_global_step()) return EstimatorSpec(mode, loss=loss, train_op=train_op) diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py index 268eee5e8b5..6b72cbec9bd 100644 --- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py @@ -19,8 +19,11 @@ from __future__ import division from __future__ import print_function from collections import namedtuple +import errno +import gc import itertools import os +import shutil import tempfile import warnings import numpy as np @@ -31,6 +34,7 @@ from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.compiler.tensorrt import trt_convert +from tensorflow.python.eager import def_function from tensorflow.python.framework import graph_io from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_spec @@ -39,12 +43,16 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import builder +from tensorflow.python.saved_model import load from tensorflow.python.saved_model import loader +from tensorflow.python.saved_model import save from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.saved_model import tag_constants from tensorflow.python.saved_model import utils from tensorflow.python.tools import saved_model_utils +from tensorflow.python.training.tracking import tracking +from tensorflow.python.util import nest TfTrtIntegrationTestParams = namedtuple( "TfTrtIntegrationTestParams", @@ -73,6 +81,8 @@ RunParams = namedtuple( "dynamic_engine", "use_calibration", "test_name", + # Is this test for TF 2.0? + "is_v2", ]) FP32 = "FP32" @@ -286,18 +296,9 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): def _GetFeedDict(self, inputs_data): return {name: data for name, data in zip(self._GetFeedNames(), inputs_data)} - def _RunGraph(self, - run_params, - saved_model_dir, - inputs_data, - config, - graph_state, - num_runs=2): - """Run given graphdef multiple times.""" + def _RunGraphV1(self, saved_model_dir, inputs_data, config, num_runs=2): + """Run given graphdef multiple times using TF 1.x runtime.""" params = self._GetParamsCached() - for data in inputs_data: - assert len(params.input_specs) == len(data) - fetches = self._GetFetchNames() g = ops.Graph() with g.as_default(): @@ -321,10 +322,64 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): vals.append(val) return vals - def _CreateConverter(self, saved_model_dir, session_config, + def _RunGraphV2(self, saved_model_dir, inputs_data, graph_state, num_runs=2): + """Run given graphdef multiple times using TF 2.0 runtime.""" + params = self._GetParamsCached() + root = load.load(saved_model_dir) + func = root.signatures[ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + results = [] + for expected_shapes, current_input_data in zip(params.expected_output_dims, + inputs_data): + val = None + for _ in range(num_runs): + feed_dict = { + params.input_specs[i].name: current_input_data[i] + for i in range(len(params.input_specs)) + } + new_val = func(**feed_dict) + assert isinstance(new_val, dict) + # The key of the output map is always like output_i. + new_val = [new_val[key] for key in sorted(new_val)] + # Each element is an eager Tensor, and accessing individual elements is + # very expensive, so we convert them to a numpy array first. + new_val = [v.numpy() for v in new_val] + self.assertEqual(len(expected_shapes), len(new_val)) + for expected_shape, actual_val in zip(expected_shapes, new_val): + self.assertEqual(list(expected_shape), list(actual_val.shape)) + if val is not None: + self.assertAllClose(val, new_val, atol=1.e-06, rtol=1.e-06) + val = new_val + results.append(val) + + return results + + def _RunGraph(self, + run_params, + saved_model_dir, + inputs_data, + config, + graph_state, + num_runs=2): + params = self._GetParamsCached() + for data in inputs_data: + assert len(params.input_specs) == len(data) + + if run_params.is_v2: + results = self._RunGraphV2(saved_model_dir, inputs_data, graph_state, + num_runs) + gc.collect() # Force GC to destroy the TRT engine cache. + return results + return self._RunGraphV1(saved_model_dir, inputs_data, config, num_runs) + + def _CreateConverter(self, run_params, saved_model_dir, session_config, conversion_params): """Return a TrtGraphConverter.""" - converter = trt_convert.TrtGraphConverter( + if run_params.is_v2: + return trt_convert.TrtGraphConverterV2( + input_saved_model_dir=saved_model_dir, + conversion_params=conversion_params) + return trt_convert.TrtGraphConverter( input_saved_model_dir=saved_model_dir, session_config=session_config, max_batch_size=conversion_params.max_batch_size, @@ -335,7 +390,6 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): maximum_cached_engines=conversion_params.maximum_cached_engines, use_calibration=conversion_params.use_calibration, use_function_backup=conversion_params.use_function_backup) - return converter def _GetCalibratedInferGraph(self, run_params, saved_model_dir, inputs_data): """Return trt converted graphdef in INT8 mode.""" @@ -353,8 +407,8 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): session_config = self._GetConfigProto(run_params, GraphState.CALIBRATE) logging.info("Running calibration graph, config:\n%s", str(session_config)) - converter = self._CreateConverter(saved_model_dir, session_config, - conversion_params) + converter = self._CreateConverter(run_params, saved_model_dir, + session_config, conversion_params) int8_gdef = converter.convert() self._VerifyGraphDef(run_params, saved_model_dir, int8_gdef, GraphState.CALIBRATE) @@ -363,7 +417,8 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): fetch_names=self._GetFetchNames(), num_runs=5, feed_dict_fn=lambda: self._GetFeedDict(inputs_data[0])) - trt_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + trt_saved_model_dir = self._GetSavedModelDir(run_params, + GraphState.CALIBRATE) converter.save(trt_saved_model_dir) return trt_saved_model_dir @@ -375,27 +430,34 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): session_config = self._GetConfigProto(run_params, GraphState.INFERENCE) logging.info("Creating TRT graph for inference, config\n%s", str(session_config)) - converter = self._CreateConverter(saved_model_dir, session_config, - conversion_params) + converter = self._CreateConverter(run_params, saved_model_dir, + session_config, conversion_params) converter.convert() - trt_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + trt_saved_model_dir = self._GetSavedModelDir(run_params, + GraphState.INFERENCE) converter.save(trt_saved_model_dir) return trt_saved_model_dir - def _WriteGraph(self, run_params, gdef, graph_state): + def _GetGraphStateLabel(self, graph_state): if graph_state == GraphState.ORIGINAL: - label = "Original" + return "Original" elif graph_state == GraphState.CALIBRATE: - label = "CalibEngine" + return "CalibEngine" elif graph_state == GraphState.INFERENCE: - label = "InferEngine" + return "InferEngine" + else: + return "UnknownState" + + def _WriteGraph(self, run_params, gdef, graph_state): + temp_dir = os.getenv("TRT_TEST_TMPDIR") + if not temp_dir: + return + graph_name = ( - self.__class__.__name__ + "_" + run_params.test_name + "_" + label + - ".pbtxt") - temp_dir = os.getenv("TRT_TEST_TMPDIR", self.get_temp_dir()) - if temp_dir: - logging.info("Writing graph to %s/%s", temp_dir, graph_name) - graph_io.write_graph(gdef, temp_dir, graph_name) + self.__class__.__name__ + "_" + run_params.test_name + "_" + + self._GetGraphStateLabel(graph_state) + ".pbtxt") + logging.info("Writing graph to %s/%s", temp_dir, graph_name) + graph_io.write_graph(gdef, temp_dir, graph_name) def _VerifyConnections(self, expected_engines, original_gdef, converted_gdef): old_to_new_node_map = { @@ -467,19 +529,28 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): msg="\nexpected:\n%s\nvs actual:\n%s" % (sorted(expected_input_map.items()), sorted(actual_input_map.items()))) - def _GetGraphDef(self, gdef_or_saved_model_dir): + def _GetGraphDef(self, run_params, gdef_or_saved_model_dir): if isinstance(gdef_or_saved_model_dir, str): + if run_params.is_v2: + root = load.load(gdef_or_saved_model_dir) + func = root.signatures[ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + gdef = func.graph.as_graph_def() + # Manually unref the loaded saved model and force GC to destroy the TRT + # engine cache after load(). There is currently a reference cycle in 2.0 + # which prevents auto deletion of the resource. + # TODO(laigd): fix this. + del func + del root + gc.collect() + return gdef return saved_model_utils.get_meta_graph_def( gdef_or_saved_model_dir, tag_constants.SERVING).graph_def assert isinstance(gdef_or_saved_model_dir, graph_pb2.GraphDef) return gdef_or_saved_model_dir - def _VerifyGraphDef(self, run_params, original_gdef_or_saved_model_dir, - gdef_or_saved_model_dir_to_verify, graph_state): - original_gdef = self._GetGraphDef(original_gdef_or_saved_model_dir) - gdef_to_verify = self._GetGraphDef(gdef_or_saved_model_dir_to_verify) - self._WriteGraph(run_params, gdef_to_verify, graph_state) - + def _VerifyGraphDefV1(self, run_params, original_gdef, gdef_to_verify, + graph_state): expected_engines = self.ExpectedEnginesToBuild(run_params) num_engines = 0 functions = [f.signature.name for f in gdef_to_verify.library.function] @@ -521,7 +592,69 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): self._VerifyConnections(expected_engines, original_gdef, gdef_to_verify) # TODO(aaroey): consider verifying the corresponding TF function. - def _MakeSavedModel(self, run_params): + def _VerifyGraphDefV2(self, run_params, original_gdef, gdef_to_verify, + graph_state): + if graph_state == GraphState.ORIGINAL: + return + expected_engines = self.ExpectedEnginesToBuild(run_params) + all_op_names = [node.name for node in gdef_to_verify.node] + trt_op_names = [ + node.name for node in gdef_to_verify.node if node.op == "TRTEngineOp" + ] + for func in gdef_to_verify.library.function: + for node in func.node_def: + all_op_names.append(node.name) + if node.op == "TRTEngineOp": + trt_op_names.append(node.name) + # Remove the function name prefix. + def _Canonicalize(names): + return set([self._ToString(name.split("/")[-1]) for name in names]) + + all_op_names = _Canonicalize(all_op_names) + trt_op_names = _Canonicalize(trt_op_names) + + if isinstance(expected_engines, dict): + # For simplicity we don't verify the connections inside the engine in + # 2.0, but we still make sure that the converted ops are gone from the + # graph. + unexpected_names = set(nest.flatten(expected_engines.values())) + self.assertEmpty( + [name for name in unexpected_names if name in all_op_names]) + expected_engines = set(expected_engines.keys()) + + self.assertEqual(set(expected_engines), trt_op_names) + + def _VerifyGraphDef(self, run_params, original_gdef_or_saved_model_dir, + gdef_or_saved_model_dir_to_verify, graph_state): + original_gdef = self._GetGraphDef(run_params, + original_gdef_or_saved_model_dir) + gdef_to_verify = self._GetGraphDef(run_params, + gdef_or_saved_model_dir_to_verify) + self._WriteGraph(run_params, gdef_to_verify, graph_state) + if run_params.is_v2: + self._VerifyGraphDefV2(run_params, original_gdef, gdef_to_verify, + graph_state) + else: + self._VerifyGraphDefV1(run_params, original_gdef, gdef_to_verify, + graph_state) + + def _GetSavedModelDir(self, run_params, graph_state): + test_tmpdir = os.getenv("TRT_TEST_TMPDIR") + if test_tmpdir: + saved_model_dir = os.path.join( + test_tmpdir, self.__class__.__name__ + "_" + run_params.test_name + + "_" + self._GetGraphStateLabel(graph_state)) + try: + # For TF 1.x we need to make sure the output directory doesn't exist + # before exporting the saved model. + shutil.rmtree(saved_model_dir) + except OSError as e: + if e.errno != errno.ENOENT: + raise + return saved_model_dir + return tempfile.mkdtemp(dir=self.get_temp_dir()) + + def _MakeSavedModelV1(self, run_params): """Write the saved model as an input for testing.""" params = self._GetParamsCached() g = ops.Graph() @@ -534,15 +667,13 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): outputs = params.graph_fn(*inputs) if not isinstance(outputs, list) and not isinstance(outputs, tuple): outputs = [outputs] - for spec, output in zip(params.output_specs, outputs): - assert spec.name == output.name.split(":")[0] signature_def = signature_def_utils.build_signature_def( inputs={inp.op.name: utils.build_tensor_info(inp) for inp in inputs}, outputs={out.op.name: utils.build_tensor_info(out) for out in outputs}, method_name=signature_constants.PREDICT_METHOD_NAME) - saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + saved_model_dir = self._GetSavedModelDir(run_params, GraphState.ORIGINAL) saved_model_builder = builder.SavedModelBuilder(saved_model_dir) with self.session( graph=g, config=self._GetConfigProto(run_params, @@ -556,6 +687,22 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): saved_model_builder.save() return saved_model_dir + def _MakeSavedModelV2(self, run_params): + params = self._GetParamsCached() + root = tracking.AutoTrackable() + root.run = def_function.function( + params.graph_fn, input_signature=params.input_specs) + saved_model_dir = self._GetSavedModelDir(run_params, GraphState.ORIGINAL) + logging.info("Saving input SavedModel to %s", saved_model_dir) + save.save(root, saved_model_dir, + {signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: root.run}) + return saved_model_dir + + def _MakeSavedModel(self, run_params): + if run_params.is_v2: + return self._MakeSavedModelV2(run_params) + return self._MakeSavedModelV1(run_params) + def RunTest(self, run_params): if not self.ShouldRunTest(run_params): return @@ -577,9 +724,12 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): # continuous natural numbers: # seq = np.arange(np.prod(np_shape)) # seq.resize(np_shape) - # inputs_data.append(scale * seq.astype(np_dtype)) - current_input_data.append( - (scale * np.random.random_sample(np_shape)).astype(np_dtype)) + # current_inputs_data.append(scale * seq.astype(np_dtype)) + data = (scale * np.random.random_sample(np_shape)).astype(np_dtype) + if run_params.is_v2: + with ops.device("/GPU:0"): + data = ops.convert_to_tensor(data) + current_input_data.append(data) inputs_data.append(current_input_data) # Verify original graph. @@ -626,55 +776,94 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): pass -def _AddTests(test_class): - """Adds test methods to TfTrtIntegrationTestBase.""" +def _GetTestConfigsV1(): + """Returns the config combinations to run the test.""" + convert_online, convert_offline = True, False + dynamic_engine, static_engine = True, False + use_calibration, no_calibration = True, False - def _GetTestConfigs(): - """Returns the config combinations to run the test.""" - convert_online, convert_offline = True, False - dynamic_engine, static_engine = True, False - use_calibration, no_calibration = True, False + # Add all possible test cases and let the derived test class to decide + # whether to run specific ones with ShouldRunTest(). + # + # Note: INT8 without calibration behaves like FP32/FP16. + opts = list( + itertools.product([FP32, FP16, INT8], [convert_online, convert_offline], + [dynamic_engine, static_engine], [no_calibration])) + # We always run calibration with offline tool. + # TODO(aaroey): static calibration engine is not supported yet. + opts.append((INT8, convert_offline, dynamic_engine, use_calibration)) + return opts - # Add all possible test cases and let the derived test class to decide - # whether to run specific ones with ShouldRunTest(). - # - # Note: INT8 without calibration behaves like FP32/FP16. - opts = list( - itertools.product([FP32, FP16, INT8], [convert_online, convert_offline], - [dynamic_engine, static_engine], [no_calibration])) - # We always run calibration with offline tool. - # TODO(aaroey): static calibration engine is not supported yet. - opts.append((INT8, convert_offline, dynamic_engine, use_calibration)) - return opts - def _GetTest(run_params): - """Gets a single test method based on the parameters.""" +def _GetTestConfigsV2(): + """Returns the config combinations to run the test.""" + convert_offline = False + # TODO(laigd): add support for static_engine. + dynamic_engine = True + # TODO(laigd): add support for calibration. + no_calibration = False - @test_util.deprecated_graph_mode_only - def _Test(self): - logging.info( - "Running TFv1 test %s with parameters: convert_online=%s, " - "precision_mode=%s, dynamic_engine=%s", - "testTfTrt_" + run_params.test_name, run_params.convert_online, - run_params.precision_mode, run_params.dynamic_engine) - self.RunTest(run_params) + # Add all possible test cases and let the derived test class to decide + # whether to run specific ones with ShouldRunTest(). + # + # Note: + # - In TF2.0 the conversion always produce dynamic engine, and we don't test + # the offline mode here. + # - For simplicity we don't test online conversion which requires setting the + # Grappler config in default eager context. + # - INT8 without calibration behaves like FP32/FP16. + opts = list( + itertools.product([FP32, FP16, INT8], [convert_offline], [dynamic_engine], + [no_calibration])) + # We always run calibration with offline tool. + # TODO(aaroey): INT8+calibration is not supported yet in V2. + # opts.append((INT8, convert_offline, dynamic_engine, use_calibration)) + return opts - return _Test - opts = _GetTestConfigs() +def _GetTest(run_params): + """Gets a single test method based on the parameters.""" + + def _Test(self): + logging.info( + "Running test %s with parameters: convert_online=%s, " + "precision_mode=%s, dynamic_engine=%s", run_params.test_name, + run_params.convert_online, run_params.precision_mode, + run_params.dynamic_engine) + self.RunTest(run_params) + + return _Test + + +def _AddTestsFor(test_class, is_v2): + """Adds test methods to TfTrtIntegrationTestBase for specific TF version.""" + opts = _GetTestConfigsV2() if is_v2 else _GetTestConfigsV1() for (precision_mode, convert_online, dynamic_engine, use_calibration) in opts: conversion = "OnlineConversion" if convert_online else "OfflineConversion" engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine" calibration_type = "UseCalibration" if use_calibration else "NoCalibration" - test_name = "%s_%s_%s_%s" % (conversion, engine_type, precision_mode, - calibration_type) + test_name = "%s_%s_%s_%s_%s" % ("testTfTrtV2" if is_v2 else "testTfTrt", + conversion, engine_type, precision_mode, + calibration_type) run_params = RunParams( convert_online=convert_online, precision_mode=precision_mode, dynamic_engine=dynamic_engine, test_name=test_name, - use_calibration=use_calibration) - setattr(test_class, "testTfTrt_" + test_name, _GetTest(run_params)) + use_calibration=use_calibration, + is_v2=is_v2) + if is_v2: + setattr(test_class, test_name, + test_util.run_v2_only(_GetTest(run_params))) + else: + setattr(test_class, test_name, + test_util.run_v1_only("", _GetTest(run_params))) + + +def _AddTests(test_class): + """Adds test methods to TfTrtIntegrationTestBase.""" + _AddTestsFor(test_class, is_v2=False) + _AddTestsFor(test_class, is_v2=True) if is_tensorrt_enabled(): diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py index d25b1477886..b3befd69849 100644 --- a/tensorflow/python/compiler/tensorrt/trt_convert.py +++ b/tensorflow/python/compiler/tensorrt/trt_convert.py @@ -87,366 +87,6 @@ def _to_string(s): return s -class GraphConverter(object): - """Base class for offline converters to optimize SavedModels/GraphDefs. - - A `GraphConverter` object encapsulates the environment to convert (optimize) a - TensorFlow SavedModel or GraphDef. - - To create a custom GraphConverter: - - ```python - class MyGraphConverter(GraphConverter): - ... - - def get_rewriter_config(self): - my_rewriter_config = ... - return my_rewriter_config - ``` - - Then to run the conversion without quantization calibration: - - ```python - my_converter = MyGraphConverter(input_saved_model_dir="my_dir") - converted_graph_def = my_converter.convert() - my_converter.save(output_saved_model_dir) # Optional - ``` - - To run the conversion with quantization calibration: - - ```python - my_converter = MyGraphConverter(input_saved_model_dir="my_dir") - my_converter.convert() - - # Run calibration 10 times. - converted_graph_def = my_converter.calibrate( - fetch_names=['output:0'], - num_runs=10, - feed_dict_fn=lambda: {'input:0': my_next_data()}) - - my_converter.save(output_saved_model_dir) # Optional - ``` - """ - - # TODO(laigd): clean up the parameters. - def __init__(self, - input_saved_model_dir=None, - input_saved_model_tags=None, - input_saved_model_signature_key=None, - input_graph_def=None, - nodes_blacklist=None, - session_config=None): - """Initialize the converter. - - Args: - input_saved_model_dir: the directory to load the SavedModel which contains - the input graph to transforms. Used only when input_graph_def is None. - input_saved_model_tags: list of tags to load the SavedModel. - input_saved_model_signature_key: the key of the signature to optimize the - graph for. - input_graph_def: a GraphDef object containing a model to be transformed. - If set to None, the graph will be read from the SavedModel loaded from - input_saved_model_dir. - nodes_blacklist: list of node names to prevent the converter from - touching. - session_config: the ConfigProto used to create a Session. It's also used - as a template to create a RewriterConfig for conversion. If not - specified, a default ConfigProto will be used. - - Raises: - ValueError: if the combination of the parameters is invalid. - """ - if input_graph_def and input_saved_model_dir: - raise ValueError( - "Can only specify one of input_graph_def and input_saved_model_dir") - if not input_graph_def and not input_saved_model_dir: - raise ValueError("Must specify one of input_graph_def and " - "input_saved_model_dir") - - self._input_graph_def = input_graph_def - self._nodes_blacklist = nodes_blacklist - - self._input_saved_model_dir = input_saved_model_dir - self._converted = False - self._grappler_meta_graph_def = None - - self._input_saved_model_tags = ( - input_saved_model_tags or [tag_constants.SERVING]) - self._input_saved_model_signature_key = ( - input_saved_model_signature_key or - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) - self._session_config = session_config or config_pb2.ConfigProto() - - # For calibration usage. - self._calibration_graph = None - self._calibration_sess = None - self._calibration_data_collected = False - - def get_rewriter_config(self): - """Returns a RewriterConfig proto for TRT transformation. - - Returns: - A RewriterConfig proto which will be used to run the conversion using - Grappler. - """ - raise NotImplementedError("get_rewriter_config") - - def _run_conversion(self): - """Run Grappler's OptimizeGraph() tool to convert the graph.""" - # Create custom ConfigProto for Grappler. - grappler_session_config = config_pb2.ConfigProto() - grappler_session_config.CopyFrom(self._session_config) - custom_rewriter_config = self.get_rewriter_config() - grappler_session_config.graph_options.rewrite_options.CopyFrom( - custom_rewriter_config) - - # Run Grappler. - self._converted_graph_def = tf_optimizer.OptimizeGraph( - grappler_session_config, - self._grappler_meta_graph_def, - graph_id=b"tf_graph") - self._converted = True - - def _add_nodes_blacklist(self): - if self._nodes_blacklist: - collection_def = self._grappler_meta_graph_def.collection_def["train_op"] - blacklist = collection_def.node_list.value - for i in self._nodes_blacklist: - if isinstance(i, ops.Tensor): - blacklist.append(_to_bytes(i.name)) - else: - blacklist.append(_to_bytes(i)) - - def _convert_graph_def(self): - """Convert the input GraphDef.""" - graph = ops.Graph() - with graph.as_default(): - importer.import_graph_def(self._input_graph_def, name="") - self._grappler_meta_graph_def = saver.export_meta_graph( - graph_def=graph.as_graph_def(add_shapes=True), graph=graph) - self._add_nodes_blacklist() - - self._run_conversion() - - def _collections_to_keep(self, collection_keys): - # TODO(laigd): currently we use the collection key to filter out - # collections that depend on variable ops, but this may miss some - # other user-defined collections. A better way would be to use - # CollectionDef::NodeList for the filtering. - collections_to_remove = ( - ops.GraphKeys._VARIABLE_COLLECTIONS + [ - ops.GraphKeys.TRAIN_OP, ops.GraphKeys.WHILE_CONTEXT, - ops.GraphKeys.COND_CONTEXT - ]) - return [key for key in collection_keys if key not in collections_to_remove] - - def _convert_saved_model(self): - """Convert the input SavedModel.""" - graph = ops.Graph() - with session.Session(graph=graph, config=self._session_config) as sess: - input_meta_graph_def = loader.load(sess, self._input_saved_model_tags, - self._input_saved_model_dir) - input_signature_def = input_meta_graph_def.signature_def[ - self._input_saved_model_signature_key] - - def _gather_names(tensor_info): - """Get the node names from a TensorInfo.""" - return set([tensor_info[key].name.split(":")[0] for key in tensor_info]) - - # Get input and outputs from all SignatureDef. - output_node_names = _gather_names(input_signature_def.inputs).union( - _gather_names(input_signature_def.outputs)) - - # Preserve nodes in collection - for collection_key in self._collections_to_keep( - input_meta_graph_def.collection_def): - for op in sess.graph.get_collection(collection_key): - if isinstance(op, ops.Operation): - output_node_names.add(op.name.split(":")[0]) - - # Freeze the variables in the SavedModel graph and copy the frozen - # graph over. - frozen_graph_def = graph_util.convert_variables_to_constants( - sess, sess.graph.as_graph_def(add_shapes=True), - list(output_node_names)) - self._grappler_meta_graph_def = meta_graph_pb2.MetaGraphDef() - self._grappler_meta_graph_def.graph_def.CopyFrom(frozen_graph_def) - - # Copy the collections that are not variables. - for collection_key in self._collections_to_keep( - input_meta_graph_def.collection_def): - self._grappler_meta_graph_def.collection_def[collection_key].CopyFrom( - input_meta_graph_def.collection_def[collection_key]) - - self._add_nodes_blacklist() - - # Copy other information. - self._grappler_meta_graph_def.meta_info_def.CopyFrom( - input_meta_graph_def.meta_info_def) - self._grappler_meta_graph_def.signature_def[ - self._input_saved_model_signature_key].CopyFrom(input_signature_def) - # TODO(laigd): maybe add back AssetFileDef. - - self._run_conversion() - - def convert(self): - """Run the conversion. - - Returns: - The converted GraphDef for TF 1.x, or the converted ConcreteFunction in TF - 2.0+. - """ - assert not self._converted - if self._input_graph_def: - self._convert_graph_def() - else: - self._convert_saved_model() - return self._converted_graph_def - - def calibrate(self, - fetch_names, - num_runs, - feed_dict_fn=None, - input_map_fn=None): - """Run the calibration and return the calibrated GraphDef. - - Args: - fetch_names: a list of output tensor name to fetch during calibration. - num_runs: number of runs of the graph during calibration. - feed_dict_fn: a function that returns a dictionary mapping input names (as - strings) in the GraphDef to be calibrated to values (e.g. Python list, - numpy arrays, etc). One and only one of `feed_dict_fn` and - `input_map_fn` should be specified. - input_map_fn: a function that returns a dictionary mapping input names (as - strings) in the GraphDef to be calibrated to Tensor objects. The values - of the named input tensors in the GraphDef to be calibrated will be - re-mapped to the respective `Tensor` values during calibration. One and - only one of `feed_dict_fn` and `input_map_fn` should be specified. - - Raises: - ValueError: if the input combination is invalid. - RuntimeError: if this method is called in eager mode. - - Returns: - The GraphDef after the calibration. - """ - assert self._converted - assert not self._calibration_sess - - if context.executing_eagerly(): - raise RuntimeError("Calibration for TF 2.0 is not supported yet.") - - if (feed_dict_fn and input_map_fn) or (not feed_dict_fn and - not input_map_fn): - raise ValueError( - "Should specify one and only one of feed_dict_fn and input_map_fn.") - - self._calibration_graph = ops.Graph() - with self._calibration_graph.as_default(): - fetches = importer.import_graph_def( - self._converted_graph_def, - input_map=input_map_fn() if input_map_fn else None, - return_elements=fetch_names, - name="") - self._calibration_sess = session.Session( - graph=self._calibration_graph, config=self._session_config) - - for _ in range(num_runs): - self._calibration_sess.run( - fetches, feed_dict=feed_dict_fn() if feed_dict_fn else None) - - self.finalize_calibration() - return self._converted_graph_def - - def finalize_calibration(self): - """Clean up calibration resources and finalize the calibration. - - Implementations need to close self._calibration_sess before returning. - """ - raise NotImplementedError("finalize_calibration") - - def save(self, output_saved_model_dir): - """Save the converted graph as a SavedModel. - - Args: - output_saved_model_dir: construct a SavedModel using the converted - GraphDef and save it to the specified directory. This option only works - when the input graph is loaded from a SavedModel, i.e. when - input_saved_model_dir is specified and input_graph_def is None in - __init__(). - - Raises: - ValueError: if the input to the converter is a GraphDef instead of a - SavedModel. - """ - assert self._converted - if self._input_graph_def: - raise ValueError( - "Not able to save to a SavedModel since input is a GraphDef") - - def _restore_collections(dest_graph, src_meta_graph_def, collection_keys): - """Restores collections that we need to keep.""" - scope = "" - for key in collection_keys: - collection_def = src_meta_graph_def.collection_def[key] - kind = collection_def.WhichOneof("kind") - if kind is None: - tf_logging.error( - "Cannot identify data type for collection %s. Skipping.", key) - continue - from_proto = ops.get_from_proto_function(key) - if from_proto and kind == "bytes_list": - proto_type = ops.get_collection_proto_type(key) - # It is assumed that there are no Variables Keys in collections - for value in collection_def.bytes_list.value: - proto = proto_type() - proto.ParseFromString(value) - try: - new_value = from_proto(proto, import_scope=scope) - except: - continue - dest_graph.add_to_collection(key, new_value) - else: - field = getattr(collection_def, kind) - if kind == "node_list": - for value in field.value: - name = ops.prepend_name_scope(value, scope) - # Since the graph has been optimized, the node may no longer - # exists - try: - col_op = dest_graph.as_graph_element(name) - except (TypeError, ValueError, KeyError) as e: - continue - dest_graph.add_to_collection(key, col_op) - elif kind == "int64_list": - # NOTE(opensource): This force conversion is to work around the - # fact that Python2 distinguishes between int and long, while - # Python3 has only int. - for value in field.value: - dest_graph.add_to_collection(key, int(value)) - else: - for value in field.value: - dest_graph.add_to_collection(key, - ops.prepend_name_scope(value, scope)) - - # Write the transformed graphdef as SavedModel. - saved_model_builder = builder.SavedModelBuilder(output_saved_model_dir) - with ops.Graph().as_default(): - importer.import_graph_def(self._converted_graph_def, name="") - _restore_collections( - ops.get_default_graph(), self._grappler_meta_graph_def, - self._collections_to_keep( - self._grappler_meta_graph_def.collection_def)) - # We don't use any specific converter here. - with session.Session(config=self._session_config) as sess: - saved_model_builder.add_meta_graph_and_variables( - sess, - self._input_saved_model_tags, - signature_def_map=self._grappler_meta_graph_def.signature_def) - # Ignore other meta graphs from the input SavedModel. - saved_model_builder.save() - - class TrtPrecisionMode(object): FP32 = "FP32" FP16 = "FP16" @@ -643,10 +283,38 @@ def get_tensorrt_rewriter_config( return rewriter_config_with_trt -class TrtGraphConverter(GraphConverter): - """A GraphConverter for TRT transformation.""" +class TrtGraphConverter(object): + """A converter for TF-TRT transformation for TF 1.x GraphDef/SavedModels. + + To run the conversion without quantization calibration (e.g. for FP32/FP16 + precision modes): + + ```python + converter = TrtGraphConverter( + input_saved_model_dir="my_dir", + precision_mode=TrtPrecisionMode.FP16) + converted_graph_def = converter.convert() + converter.save(output_saved_model_dir) + ``` + + To run the conversion with quantization calibration: + + ```python + converter = TrtGraphConverter( + input_saved_model_dir="my_dir", + precision_mode=TrtPrecisionMode.INT8) + converter.convert() + + # Run calibration 10 times. + converted_graph_def = converter.calibrate( + fetch_names=['output:0'], + num_runs=10, + feed_dict_fn=lambda: {'input:0': my_next_data()}) + + converter.save(output_saved_model_dir) + ``` + """ - # TODO(laigd): use TrtConversionParams here. def __init__(self, input_saved_model_dir=None, input_saved_model_tags=None, @@ -707,15 +375,32 @@ class TrtGraphConverter(GraphConverter): Raises: ValueError: if the combination of the parameters is invalid. """ - super(TrtGraphConverter, self).__init__( - input_saved_model_dir=input_saved_model_dir, - input_saved_model_tags=input_saved_model_tags, - input_saved_model_signature_key=input_saved_model_signature_key, - input_graph_def=input_graph_def, - nodes_blacklist=nodes_blacklist, - session_config=session_config) + if input_graph_def and input_saved_model_dir: + raise ValueError( + "Can only specify one of input_graph_def and input_saved_model_dir") + if not input_graph_def and not input_saved_model_dir: + raise ValueError("Must specify one of input_graph_def and " + "input_saved_model_dir") _check_trt_version_compatibility() + self._input_graph_def = input_graph_def + self._nodes_blacklist = nodes_blacklist + + self._input_saved_model_dir = input_saved_model_dir + self._converted = False + self._grappler_meta_graph_def = None + + self._input_saved_model_tags = ( + input_saved_model_tags or [tag_constants.SERVING]) + self._input_saved_model_signature_key = ( + input_saved_model_signature_key or + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) + self._session_config = session_config or config_pb2.ConfigProto() + + # For calibration usage. + self._calibration_graph = None + self._calibration_sess = None + self._calibration_data_collected = False self._need_calibration = ( precision_mode == TrtPrecisionMode.INT8 and use_calibration) if self._need_calibration and not is_dynamic_op: @@ -750,11 +435,176 @@ class TrtGraphConverter(GraphConverter): max_batch_size=max_batch_size) _check_conversion_params(self._conversion_params) - def get_rewriter_config(self): - return get_tensorrt_rewriter_config( + def _run_conversion(self): + """Run Grappler's OptimizeGraph() tool to convert the graph.""" + # Create custom ConfigProto for Grappler. + grappler_session_config = config_pb2.ConfigProto() + grappler_session_config.CopyFrom(self._session_config) + custom_rewriter_config = get_tensorrt_rewriter_config( conversion_params=self._conversion_params) + grappler_session_config.graph_options.rewrite_options.CopyFrom( + custom_rewriter_config) + + # Run Grappler. + self._converted_graph_def = tf_optimizer.OptimizeGraph( + grappler_session_config, + self._grappler_meta_graph_def, + graph_id=b"tf_graph") + self._converted = True + + def _add_nodes_blacklist(self): + if self._nodes_blacklist: + collection_def = self._grappler_meta_graph_def.collection_def["train_op"] + blacklist = collection_def.node_list.value + for i in self._nodes_blacklist: + if isinstance(i, ops.Tensor): + blacklist.append(_to_bytes(i.name)) + else: + blacklist.append(_to_bytes(i)) + + def _convert_graph_def(self): + """Convert the input GraphDef.""" + graph = ops.Graph() + with graph.as_default(): + importer.import_graph_def(self._input_graph_def, name="") + self._grappler_meta_graph_def = saver.export_meta_graph( + graph_def=graph.as_graph_def(add_shapes=True), graph=graph) + self._add_nodes_blacklist() + + self._run_conversion() + + def _collections_to_keep(self, collection_keys): + # TODO(laigd): currently we use the collection key to filter out + # collections that depend on variable ops, but this may miss some + # other user-defined collections. A better way would be to use + # CollectionDef::NodeList for the filtering. + collections_to_remove = ( + ops.GraphKeys._VARIABLE_COLLECTIONS + [ + ops.GraphKeys.TRAIN_OP, ops.GraphKeys.WHILE_CONTEXT, + ops.GraphKeys.COND_CONTEXT + ]) + return [key for key in collection_keys if key not in collections_to_remove] + + def _convert_saved_model(self): + """Convert the input SavedModel.""" + graph = ops.Graph() + with session.Session(graph=graph, config=self._session_config) as sess: + input_meta_graph_def = loader.load(sess, self._input_saved_model_tags, + self._input_saved_model_dir) + input_signature_def = input_meta_graph_def.signature_def[ + self._input_saved_model_signature_key] + + def _gather_names(tensor_info): + """Get the node names from a TensorInfo.""" + return set([tensor_info[key].name.split(":")[0] for key in tensor_info]) + + # Get input and outputs from all SignatureDef. + output_node_names = _gather_names(input_signature_def.inputs).union( + _gather_names(input_signature_def.outputs)) + + # Preserve nodes in collection + for collection_key in self._collections_to_keep( + input_meta_graph_def.collection_def): + for op in sess.graph.get_collection(collection_key): + if isinstance(op, ops.Operation): + output_node_names.add(op.name.split(":")[0]) + + # Freeze the variables in the SavedModel graph and copy the frozen + # graph over. + frozen_graph_def = graph_util.convert_variables_to_constants( + sess, sess.graph.as_graph_def(add_shapes=True), + list(output_node_names)) + self._grappler_meta_graph_def = meta_graph_pb2.MetaGraphDef() + self._grappler_meta_graph_def.graph_def.CopyFrom(frozen_graph_def) + + # Copy the collections that are not variables. + for collection_key in self._collections_to_keep( + input_meta_graph_def.collection_def): + self._grappler_meta_graph_def.collection_def[collection_key].CopyFrom( + input_meta_graph_def.collection_def[collection_key]) + + self._add_nodes_blacklist() + + # Copy other information. + self._grappler_meta_graph_def.meta_info_def.CopyFrom( + input_meta_graph_def.meta_info_def) + self._grappler_meta_graph_def.signature_def[ + self._input_saved_model_signature_key].CopyFrom(input_signature_def) + # TODO(laigd): maybe add back AssetFileDef. + + self._run_conversion() + + def convert(self): + """Run the conversion. + + Returns: + The converted GraphDef for TF 1.x, or the converted ConcreteFunction in TF + 2.0+. + """ + assert not self._converted + if self._input_graph_def: + self._convert_graph_def() + else: + self._convert_saved_model() + return self._converted_graph_def + + def calibrate(self, + fetch_names, + num_runs, + feed_dict_fn=None, + input_map_fn=None): + """Run the calibration and return the calibrated GraphDef. + + Args: + fetch_names: a list of output tensor name to fetch during calibration. + num_runs: number of runs of the graph during calibration. + feed_dict_fn: a function that returns a dictionary mapping input names (as + strings) in the GraphDef to be calibrated to values (e.g. Python list, + numpy arrays, etc). One and only one of `feed_dict_fn` and + `input_map_fn` should be specified. + input_map_fn: a function that returns a dictionary mapping input names (as + strings) in the GraphDef to be calibrated to Tensor objects. The values + of the named input tensors in the GraphDef to be calibrated will be + re-mapped to the respective `Tensor` values during calibration. One and + only one of `feed_dict_fn` and `input_map_fn` should be specified. + + Raises: + ValueError: if the input combination is invalid. + RuntimeError: if this method is called in eager mode. + + Returns: + The GraphDef after the calibration. + """ + assert self._converted + assert not self._calibration_sess + + if context.executing_eagerly(): + raise RuntimeError("Calibration for TF 2.0 is not supported yet.") + + if (feed_dict_fn and input_map_fn) or (not feed_dict_fn and + not input_map_fn): + raise ValueError( + "Should specify one and only one of feed_dict_fn and input_map_fn.") + + self._calibration_graph = ops.Graph() + with self._calibration_graph.as_default(): + fetches = importer.import_graph_def( + self._converted_graph_def, + input_map=input_map_fn() if input_map_fn else None, + return_elements=fetch_names, + name="") + self._calibration_sess = session.Session( + graph=self._calibration_graph, config=self._session_config) + + for _ in range(num_runs): + self._calibration_sess.run( + fetches, feed_dict=feed_dict_fn() if feed_dict_fn else None) + + self.finalize_calibration() + return self._converted_graph_def def finalize_calibration(self): + """Clean up calibration resources and finalize the calibration.""" assert self._need_calibration assert self._converted assert not self._calibration_data_collected @@ -792,11 +642,87 @@ class TrtGraphConverter(GraphConverter): self._calibration_sess.close() def save(self, output_saved_model_dir): - """Save the converted graph as a SavedModel.""" + """Save the converted graph as a SavedModel. + + Args: + output_saved_model_dir: construct a SavedModel using the converted + GraphDef and save it to the specified directory. This option only works + when the input graph is loaded from a SavedModel, i.e. when + input_saved_model_dir is specified and input_graph_def is None in + __init__(). + + Raises: + ValueError: if the input to the converter is a GraphDef instead of a + SavedModel. + """ + assert self._converted if self._need_calibration: assert self._calibration_data_collected + if self._input_graph_def: + raise ValueError( + "Not able to save to a SavedModel since input is a GraphDef") - super(TrtGraphConverter, self).save(output_saved_model_dir) + def _restore_collections(dest_graph, src_meta_graph_def, collection_keys): + """Restores collections that we need to keep.""" + scope = "" + for key in collection_keys: + collection_def = src_meta_graph_def.collection_def[key] + kind = collection_def.WhichOneof("kind") + if kind is None: + tf_logging.error( + "Cannot identify data type for collection %s. Skipping.", key) + continue + from_proto = ops.get_from_proto_function(key) + if from_proto and kind == "bytes_list": + proto_type = ops.get_collection_proto_type(key) + # It is assumed that there are no Variables Keys in collections + for value in collection_def.bytes_list.value: + proto = proto_type() + proto.ParseFromString(value) + try: + new_value = from_proto(proto, import_scope=scope) + except: + continue + dest_graph.add_to_collection(key, new_value) + else: + field = getattr(collection_def, kind) + if kind == "node_list": + for value in field.value: + name = ops.prepend_name_scope(value, scope) + # Since the graph has been optimized, the node may no longer + # exists + try: + col_op = dest_graph.as_graph_element(name) + except (TypeError, ValueError, KeyError) as e: + continue + dest_graph.add_to_collection(key, col_op) + elif kind == "int64_list": + # NOTE(opensource): This force conversion is to work around the + # fact that Python2 distinguishes between int and long, while + # Python3 has only int. + for value in field.value: + dest_graph.add_to_collection(key, int(value)) + else: + for value in field.value: + dest_graph.add_to_collection(key, + ops.prepend_name_scope(value, scope)) + + # Write the transformed graphdef as SavedModel. + saved_model_builder = builder.SavedModelBuilder(output_saved_model_dir) + with ops.Graph().as_default(): + importer.import_graph_def(self._converted_graph_def, name="") + _restore_collections( + ops.get_default_graph(), self._grappler_meta_graph_def, + self._collections_to_keep( + self._grappler_meta_graph_def.collection_def)) + # We don't use any specific converter here. + with session.Session(config=self._session_config) as sess: + saved_model_builder.add_meta_graph_and_variables( + sess, + self._input_saved_model_tags, + signature_def_map=self._grappler_meta_graph_def.signature_def) + # Ignore other meta graphs from the input SavedModel. + saved_model_builder.save() def _get_resource_handle(name, device): diff --git a/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py b/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py index 6a42c1cb6e5..af7c4736083 100644 --- a/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py +++ b/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py @@ -33,9 +33,14 @@ class AutotuneBenchmark(test.Benchmark): def benchmark_map(self): a = self._benchmark_map(autotune=False) b = self._benchmark_map(autotune=True) - print("speedup: %f" % (a / b)) + c = self._benchmark_map( + autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT) + print("HillClimb vs Default speedup: %f" % (a / b)) + print("GradientDescent vs Default speedup: %f" % (a / c)) - def _benchmark_map(self, autotune): + def _benchmark_map(self, + autotune, + algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB): k = 1024 * 1024 dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k), np.random.rand(4 * k, @@ -45,6 +50,8 @@ class AutotuneBenchmark(test.Benchmark): options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = autotune + if autotune: + options.experimental_optimization.autotune_algorithm = algorithm.value dataset = dataset.with_options(options) iterator = dataset_ops.make_one_shot_iterator(dataset) get_next = iterator.get_next() @@ -62,15 +69,19 @@ class AutotuneBenchmark(test.Benchmark): self.report_benchmark( iters=10000, wall_time=np.median(deltas), - name="map" + ("_autotune" if autotune else "")) + name="map" + (("_autotune_%s" % algorithm.name) if autotune else "")) return np.median(deltas) def benchmark_map_and_batch(self): a = self._benchmark_map_and_batch(autotune=False) b = self._benchmark_map_and_batch(autotune=True) - print("speedup: %f" % (a / b)) + c = self._benchmark_map_and_batch( + autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT) + print("HillClimb vs Default speedup: %f" % (a / b)) + print("GradientDescent vs Default speedup: %f" % (a / c)) - def _benchmark_map_and_batch(self, autotune): + def _benchmark_map_and_batch( + self, autotune, algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB): batch_size = 16 k = 1024 * 1024 dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k), @@ -83,6 +94,8 @@ class AutotuneBenchmark(test.Benchmark): options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_and_batch_fusion = True options.experimental_optimization.autotune = autotune + if autotune: + options.experimental_optimization.autotune_algorithm = algorithm.value dataset = dataset.with_options(options) iterator = dataset_ops.make_one_shot_iterator(dataset) get_next = iterator.get_next() @@ -100,15 +113,21 @@ class AutotuneBenchmark(test.Benchmark): self.report_benchmark( iters=1000, wall_time=np.median(deltas), - name="map_and_batch" + ("_autotune" if autotune else "")) + name="map_and_batch" + + (("_autotune_%s" % algorithm.name) if autotune else "")) return np.median(deltas) def benchmark_interleave(self): a = self._benchmark_interleave(autotune=False) b = self._benchmark_interleave(autotune=True) - print("speedup: %f" % (a / b)) + c = self._benchmark_interleave( + autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT) + print("HillClimb vs Default speedup: %f" % (a / b)) + print("GradientDescent vs Default speedup: %f" % (a / c)) - def _benchmark_interleave(self, autotune): + def _benchmark_interleave(self, + autotune, + algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB): k = 1024 * 1024 dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k), np.random.rand(4 * k, @@ -121,6 +140,8 @@ class AutotuneBenchmark(test.Benchmark): options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = autotune + if autotune: + options.experimental_optimization.autotune_algorithm = algorithm.value dataset = dataset.with_options(options) iterator = dataset_ops.make_one_shot_iterator(dataset) get_next = iterator.get_next() @@ -138,15 +159,20 @@ class AutotuneBenchmark(test.Benchmark): self.report_benchmark( iters=10000, wall_time=np.median(deltas), - name="interleave" + ("_autotune" if autotune else "")) + name="interleave" + + (("_autotune_%s" % algorithm.name) if autotune else "")) return np.median(deltas) def benchmark_map_and_interleave(self): a = self._benchmark_map_and_interleave(autotune=False) b = self._benchmark_map_and_interleave(autotune=True) - print("speedup: %f" % (a / b)) + c = self._benchmark_map_and_interleave( + autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT) + print("HillClimb vs Default speedup: %f" % (a / b)) + print("GradientDescent vs Default speedup: %f" % (a / c)) - def _benchmark_map_and_interleave(self, autotune): + def _benchmark_map_and_interleave( + self, autotune, algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB): k = 1024 * 1024 a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1)) b = (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1)) @@ -181,6 +207,8 @@ class AutotuneBenchmark(test.Benchmark): options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = autotune + if autotune: + options.experimental_optimization.autotune_algorithm = algorithm.value dataset = dataset.with_options(options) iterator = dataset_ops.make_one_shot_iterator(dataset) get_next = iterator.get_next() @@ -189,16 +217,17 @@ class AutotuneBenchmark(test.Benchmark): with session.Session() as sess: for _ in range(5): sess.run(get_next) - for _ in range(1000): + for _ in range(10000): start = time.time() sess.run(get_next) end = time.time() deltas.append(end - start) self.report_benchmark( - iters=1000, + iters=10000, wall_time=np.median(deltas), - name="map_and_interleave" + ("_autotune" if autotune else "")) + name="map_and_interleave" + + (("_autotune_%s" % algorithm.name) if autotune else "")) return np.median(deltas) diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD index d45e7e07122..26c213fb8a0 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD @@ -27,6 +27,26 @@ py_test( ], ) +py_test( + name = "inject_prefetch_test", + size = "small", + srcs = ["inject_prefetch_test.py"], + python_version = "PY2", + srcs_version = "PY2AND3", + tags = [ + "no_oss", + "no_pip", + "no_windows", + ], + deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python/data/experimental/ops:optimization", + "//tensorflow/python/data/kernel_tests:test_base", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + py_test( name = "filter_fusion_test", size = "medium", diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/inject_prefetch_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/inject_prefetch_test.py new file mode 100644 index 00000000000..89f61f141b0 --- /dev/null +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/inject_prefetch_test.py @@ -0,0 +1,96 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the `AutotuneBuffers` rewrite.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.experimental.ops import optimization +from tensorflow.python.data.kernel_tests import test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test + + +@test_util.run_all_in_graph_and_eager_modes +class InjectPrefetchTest(test_base.DatasetTestBase): + + def _enable_autotune_buffers(self, dataset): + options = dataset_ops.Options() + options.experimental_optimization.autotune_buffers = True + return dataset.with_options(options) + + def testParallelMap(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply( + optimization.assert_next(["ParallelMap", "Prefetch", "FiniteTake"])) + dataset = dataset.map( + lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) + dataset = dataset.take(50) + dataset = self._enable_autotune_buffers(dataset) + self.assertDatasetProduces(dataset, range(1, 51)) + + def testMapAndBatch(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply( + optimization.assert_next(["MapAndBatch", "Prefetch", "FiniteTake"])) + dataset = dataset.map( + lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) + dataset = dataset.batch(10) + dataset = dataset.take(5) + dataset = self._enable_autotune_buffers(dataset) + self.assertDatasetProduces( + dataset, [list(range(i + 1, i + 11)) for i in range(0, 50, 10)]) + + def testParallelInterleaveV2(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply( + optimization.assert_next( + ["ParallelInterleaveV2", "Prefetch", "FiniteTake"])) + dataset = dataset.interleave( + lambda x: dataset_ops.Dataset.from_tensors(x + 1), + num_parallel_calls=dataset_ops.AUTOTUNE) + dataset = dataset.take(50) + dataset = self._enable_autotune_buffers(dataset) + self.assertDatasetProduces(dataset, range(1, 51)) + + def testChainedParallelDatasets(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply( + optimization.assert_next([ + "ParallelMap", "Prefetch", "ParallelInterleaveV2", "Prefetch", + "MapAndBatch", "Prefetch", "FiniteTake" + ])) + dataset = dataset.map( + lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) + dataset = dataset.interleave( + lambda x: dataset_ops.Dataset.from_tensors(x + 1), + num_parallel_calls=dataset_ops.AUTOTUNE) + dataset = dataset.map( + lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) + dataset = dataset.batch(1) + dataset = dataset.take(50) + dataset = self._enable_autotune_buffers(dataset) + self.assertDatasetProduces(dataset, [[i] for i in range(3, 53)]) + + def testNoRegularMap(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply(optimization.assert_next(["Map", "FiniteTake"])) + dataset = dataset.map(lambda x: x + 1).take(50) + dataset = self._enable_autotune_buffers(dataset) + self.assertDatasetProduces(dataset, range(1, 51)) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 5168effaf9f..094b11c76fb 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -90,6 +90,14 @@ class OptimizationOptions(options.OptionsBase): "When autotuning is enabled (through `autotune`), identifies the " "algorithm to use for the autotuning optimization.") + autotune_buffers = options.create_option( + name="autotune_buffers", + ty=bool, + docstring= + "When autotuning is enabled (through `autotune`), determines whether to " + "also autotune buffer sizes for datasets with parallelism. If None," + " defaults to False.") + autotune_cpu_budget = options.create_option( name="autotune_cpu_budget", ty=int, @@ -205,6 +213,9 @@ class OptimizationOptions(options.OptionsBase): if self.map_vectorization is not None: result.update(self.map_vectorization._static_optimizations()) # pylint: disable=protected-access + + if self.autotune is not False and self.autotune_buffers: # pylint: disable=g-bool-id-comparison + result.add("inject_prefetch") return sorted(list(result)) def _static_optimization_configs(self): diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index cb164d7fbee..e61f8df7d5c 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -97,6 +97,7 @@ tf_export("data.experimental.AUTOTUNE").export_constant(__name__, "AUTOTUNE") class AutotuneAlgorithm(enum.Enum): HILL_CLIMB = 0 + GRADIENT_DESCENT = 1 @tf_export("data.Dataset", v1=[]) diff --git a/tensorflow/python/debug/lib/grpc_debug_server.py b/tensorflow/python/debug/lib/grpc_debug_server.py index 1b559f1f275..d7eece43310 100644 --- a/tensorflow/python/debug/lib/grpc_debug_server.py +++ b/tensorflow/python/debug/lib/grpc_debug_server.py @@ -346,7 +346,10 @@ class EventListenerBaseServicer(debug_service_pb2_grpc.EventListenerServicer): if self._server_started: raise ValueError("Server has already started running") - self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + no_max_message_sizes = [("grpc.max_receive_message_length", -1), + ("grpc.max_send_message_length", -1)] + self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10), + options=no_max_message_sizes) debug_service_pb2_grpc.add_EventListenerServicer_to_server(self, self.server) self.server.add_insecure_port("[::]:%d" % self._server_port) diff --git a/tensorflow/python/debug/lib/source_remote.py b/tensorflow/python/debug/lib/source_remote.py index 07de9f9cbd2..e0a3695df43 100644 --- a/tensorflow/python/debug/lib/source_remote.py +++ b/tensorflow/python/debug/lib/source_remote.py @@ -28,7 +28,6 @@ from tensorflow.python.debug.lib import common from tensorflow.python.debug.lib import debug_service_pb2_grpc from tensorflow.python.debug.lib import source_utils from tensorflow.python.platform import gfile -from tensorflow.python.platform import tf_logging from tensorflow.python.profiler import tfprof_logger @@ -96,11 +95,6 @@ def _source_file_paths_outside_tensorflow_py_library(code_defs, id_to_string): return non_tf_files -def grpc_message_length_bytes(): - """Maximum gRPC message length in bytes.""" - return 4 * 1024 * 1024 - - def _send_call_tracebacks(destinations, origin_stack, is_eager_execution=False, @@ -169,20 +163,14 @@ def _send_call_tracebacks(destinations, debugged_source_files.append(source_files) for destination in destinations: - channel = grpc.insecure_channel(destination) + no_max_message_sizes = [("grpc.max_receive_message_length", -1), + ("grpc.max_send_message_length", -1)] + channel = grpc.insecure_channel(destination, options=no_max_message_sizes) stub = debug_service_pb2_grpc.EventListenerStub(channel) stub.SendTracebacks(call_traceback) if send_source: - for path, source_files in zip( - source_file_paths, debugged_source_files): - if source_files.ByteSize() < grpc_message_length_bytes(): - stub.SendSourceFiles(source_files) - else: - tf_logging.warn( - "The content of the source file at %s is not sent to " - "gRPC debug server %s, because the message size exceeds " - "gRPC message length limit (%d bytes)." % ( - path, destination, grpc_message_length_bytes())) + for source_files in debugged_source_files: + stub.SendSourceFiles(source_files) def send_graph_tracebacks(destinations, diff --git a/tensorflow/python/debug/lib/source_remote_test.py b/tensorflow/python/debug/lib/source_remote_test.py index dce400c9ab0..14c8e744934 100644 --- a/tensorflow/python/debug/lib/source_remote_test.py +++ b/tensorflow/python/debug/lib/source_remote_test.py @@ -21,6 +21,8 @@ from __future__ import print_function import os import traceback +import grpc + from tensorflow.core.debug import debug_service_pb2 from tensorflow.python.client import session from tensorflow.python.debug.lib import grpc_debug_test_server @@ -129,9 +131,17 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): send_traceback = traceback.extract_stack() send_lineno = line_number_above() - source_remote.send_graph_tracebacks( - [self._server_address, self._server_address_2], - "dummy_run_key", send_traceback, sess.graph) + + with test.mock.patch.object( + grpc, "insecure_channel", + wraps=grpc.insecure_channel) as mock_grpc_channel: + source_remote.send_graph_tracebacks( + [self._server_address, self._server_address_2], + "dummy_run_key", send_traceback, sess.graph) + mock_grpc_channel.assert_called_with( + test.mock.ANY, + options=[("grpc.max_receive_message_length", -1), + ("grpc.max_send_message_length", -1)]) servers = [self._server, self._server_2] for server in servers: @@ -157,51 +167,6 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): self.assertEqual(["dummy_run_key"], server.query_call_keys()) self.assertEqual([sess.graph.version], server.query_graph_versions()) - def testSourceFileSizeExceedsGrpcMessageLengthLimit(self): - """In case source file size exceeds the grpc message length limit. - - it ought not to have been sent to the server. - """ - this_func_name = "testSourceFileSizeExceedsGrpcMessageLengthLimit" - - # Patch the method to simulate a very small message length limit. - with test.mock.patch.object( - source_remote, "grpc_message_length_bytes", return_value=2): - with session.Session() as sess: - a = variables.Variable(21.0, name="two/a") - a_lineno = line_number_above() - b = variables.Variable(2.0, name="two/b") - b_lineno = line_number_above() - x = math_ops.add(a, b, name="two/x") - x_lineno = line_number_above() - - send_traceback = traceback.extract_stack() - send_lineno = line_number_above() - source_remote.send_graph_tracebacks( - [self._server_address, self._server_address_2], - "dummy_run_key", send_traceback, sess.graph) - - servers = [self._server, self._server_2] - for server in servers: - # Even though the source file content is not sent, the traceback - # should have been sent. - tb = server.query_op_traceback("two/a") - self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb) - tb = server.query_op_traceback("two/b") - self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb) - tb = server.query_op_traceback("two/x") - self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb) - - self.assertIn( - (self._curr_file_path, send_lineno, this_func_name), - server.query_origin_stack()[-1]) - - tf_trace_file_path = ( - self._findFirstTraceInsideTensorFlowPyLibrary(x.op)) - # Verify that the source content is not sent to the server. - with self.assertRaises(ValueError): - self._server.query_source_file_line(tf_trace_file_path, 0) - def testSendEagerTracebacksToSingleDebugServer(self): this_func_name = "testSendEagerTracebacksToSingleDebugServer" send_traceback = traceback.extract_stack() @@ -213,6 +178,20 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): self.assertIn((self._curr_file_path, send_lineno, this_func_name), self._server.query_origin_stack()[-1]) + def testGRPCServerMessageSizeLimit(self): + """Assert gRPC debug server is started with unlimited message size.""" + with test.mock.patch.object( + grpc, "server", wraps=grpc.server) as mock_grpc_server: + (_, _, _, server_thread, + server) = grpc_debug_test_server.start_server_on_separate_thread( + poll_server=True) + mock_grpc_server.assert_called_with( + test.mock.ANY, + options=[("grpc.max_receive_message_length", -1), + ("grpc.max_send_message_length", -1)]) + server.stop_server().wait() + server_thread.join() + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 28f3493ecc4..4f750277e44 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -963,6 +963,7 @@ distribute_py_test( deps = [ ":single_loss_example", "//tensorflow/contrib/tpu:tpu_lib", + "//tensorflow/python:framework_test_lib", "//tensorflow/python:variables", "//tensorflow/python/distribute:combinations", "//tensorflow/python/distribute:strategy_combinations", diff --git a/tensorflow/python/distribute/collective_all_reduce_strategy.py b/tensorflow/python/distribute/collective_all_reduce_strategy.py index e858b6a57fc..320dea1e840 100644 --- a/tensorflow/python/distribute/collective_all_reduce_strategy.py +++ b/tensorflow/python/distribute/collective_all_reduce_strategy.py @@ -157,12 +157,12 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): self._host_input_device = numpy_dataset.SingleDevice(self._worker_device) self._collective_keys = cross_device_utils.CollectiveKeys() - super(CollectiveAllReduceExtended, self)._initialize_local(local_devices) # TODO(yuefengz): remove num_gpus_per_worker from CollectiveAllReduce. self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( num_workers=self._num_workers, num_gpus_per_worker=num_gpus, collective_keys=self._collective_keys) + super(CollectiveAllReduceExtended, self)._initialize_local(local_devices) self._cluster_spec = None self._task_type = None @@ -257,13 +257,13 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): local_devices = (self._worker_device,) self._collective_keys = cross_device_utils.CollectiveKeys() - super(CollectiveAllReduceExtended, self)._initialize_local(local_devices) - self._input_workers = input_lib.InputWorkers( - self._device_map, [(self._worker_device, self.worker_devices)]) self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( num_workers=self._num_workers, num_gpus_per_worker=num_gpus, collective_keys=self._collective_keys) + super(CollectiveAllReduceExtended, self)._initialize_local(local_devices) + self._input_workers = input_lib.InputWorkers( + self._device_map, [(self._worker_device, self.worker_devices)]) # Add a default device so that ops without specified devices will not end up # on other workers. diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index cbb394af530..629994a7353 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -618,7 +618,7 @@ class Strategy(object): # Create a dataset dataset = dataset_ops.Dataset.TFRecordDataset([ - "/a/1.tfr", "/a/2.tfr", "/a/3.tfr", /a/4.tfr"]) + "/a/1.tfr", "/a/2.tfr", "/a/3.tfr", "/a/4.tfr"]) # Distribute that dataset dist_dataset = strategy.experimental_distribute_dataset(dataset) diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py index b3552171b30..811bd2541e8 100644 --- a/tensorflow/python/distribute/mirrored_strategy.py +++ b/tensorflow/python/distribute/mirrored_strategy.py @@ -381,7 +381,9 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): self._cross_device_ops = cross_device_ops self._initialize_strategy(devices) - self.experimental_enable_get_next_as_optional = True + # TODO(b/128995245): Enable last partial batch support in graph mode. + if ops.executing_eagerly_outside_functions(): + self.experimental_enable_get_next_as_optional = True def _initialize_strategy(self, devices): # The _initialize_strategy method is intended to be used by distribute @@ -400,8 +402,8 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): self._local_mode = True self._device_map = values.ReplicaDeviceMap(devices) self._input_workers = input_lib.InputWorkers(self._device_map) - self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best( - devices) + self._inferred_cross_device_ops = None if self._cross_device_ops else ( + cross_device_ops_lib.choose_the_best(devices)) self._host_input_device = numpy_dataset.SingleDevice("/cpu:0") def _initialize_multi_worker(self, devices): diff --git a/tensorflow/python/distribute/step_fn_test.py b/tensorflow/python/distribute/step_fn_test.py index 3a1a7e6eca4..28e6ad28c77 100644 --- a/tensorflow/python/distribute/step_fn_test.py +++ b/tensorflow/python/distribute/step_fn_test.py @@ -25,9 +25,11 @@ from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute.single_loss_example import single_loss_example from tensorflow.python.eager import context from tensorflow.python.eager import test +from tensorflow.python.framework import test_util from tensorflow.python.ops import variables +@test_util.with_control_flow_v2 class SingleLossStepTest(test.TestCase, parameterized.TestCase): @combinations.generate( diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index 9c19e80a5d4..7aa99b9a8c4 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -105,10 +105,9 @@ class TPUStrategy(distribute_lib.Strategy): # This implementation runs a single step. It does not use infeed or outfeed. def experimental_run_v2(self, fn, args=(), kwargs=None): """See base class.""" - # tf.distribute supports Eager functions, so AutoGraph should not be applied - # when when the caller is also in Eager mode. - fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(), - convert_by_default=False) + # Note: the target function is converted to graph even when in Eager mode, + # so autograph is on by default here. + fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx()) return self.extended.tpu_run(fn, args, kwargs) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 16b1663f71b..8cefd5a01c8 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -640,7 +640,7 @@ tf_py_test( ], ) -tf_py_test( +cuda_py_test( name = "def_function_test", srcs = ["def_function_test.py"], additional_deps = [ diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 3bebc8341ca..d6b88bcf4f8 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -1334,6 +1334,9 @@ class BackpropTest(test.TestCase): @test_util.run_in_graph_and_eager_modes def testMaxPooling3DGradient(self): + if test.is_built_with_rocm(): + self.skipTest('Pooling with 3D tensors is not supported in ROCm') + def forward(a): r = max_pooling3d(a, pool_size=pool_size, strides=strides, padding='SAME') return r diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 1799969e720..e773e0dfcd2 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -935,6 +935,26 @@ class MicroBenchmarks(test.Benchmark): self._run(fn, 10000) + def benchmark_convert_3x_list_to_tensor(self): + xs = [1, 2, 3] + self._run(lambda: ops.convert_to_tensor(xs), 1000) + + def benchmark_convert_3x_array_to_tensor(self): + xs = np.array([1, 2, 3], dtype=np.int32) + self._run(lambda: ops.convert_to_tensor(xs), 1000) + + def benchmark_constant_40x2_list_to_tensor(self): + xs = [[0] * 2] * 40 + self._run(lambda: constant_op.constant(xs), 1000) + + def benchmark_constant_40x2_array_to_tensor(self): + xs = np.array([[0] * 2] * 40, dtype=np.int32) + self._run(lambda: constant_op.constant(xs), 1000) + + def benchmark_constant_40x_list_of_2x_arrays_to_tensor(self): + xs = [np.array([0] * 2, dtype=np.int32)] * 40 + self._run(lambda: constant_op.constant(xs), 1000) + def _benchmarkFunctionWithResourceInputs(self, num_resources, num_iters): @def_function.function def add_all(*args): diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py index ccad29baf4d..4a7d6fe4e9e 100644 --- a/tensorflow/python/eager/def_function_test.py +++ b/tensorflow/python/eager/def_function_test.py @@ -578,6 +578,7 @@ class DefFunctionTest(test.TestCase): v_holder[1].assign(11.) self.assertAllClose([14., 15.], wrapper(constant_op.constant(2.))) + # TODO(b/137148281): reenable @test_util.run_gpu_only def testDeviceAnnotationRespected(self): a = [] @@ -589,13 +590,13 @@ class DefFunctionTest(test.TestCase): (2, 2), maxval=1000000, dtype=dtypes.int64) if not a: - with ops.device("CPU:0"): + with ops.device('CPU:0'): a.append(resource_variable_ops.ResourceVariable(initial_value)) return a[0].read_value() created_variable_read = create_variable() - self.assertRegexpMatches(created_variable_read.device, "CPU") + self.assertRegexpMatches(a[0].device, 'CPU') def testDecorate(self): func = def_function.function(lambda: 1) diff --git a/tensorflow/python/eager/forwardprop.py b/tensorflow/python/eager/forwardprop.py index e2ebb594aea..ae29d4c63ce 100644 --- a/tensorflow/python/eager/forwardprop.py +++ b/tensorflow/python/eager/forwardprop.py @@ -40,6 +40,24 @@ class ForwardGradientAccumulator(object): y = tf.reduce_sum(tf.sin(x) * tf.tan(x), axis=1) jvp = acc.jvp(y) ``` + + Note that `ForwardGradientAccumulator`s are always applied in creation order, + so inner accumulators may not see JVP computation from outer + accumulators. Take higher-order gradients from outer accumulators: + + ``` + primal = tf.constant(1.1) + with ForwardGradientAccumulator() as outer_acc: + outer_acc.watch(primal, tf.constant(1.)) + with ForwardGradientAccumulator() as acc: + acc.watch(primal, tf.constant(1.)) + primal_out = primal ** tf.constant(3.5) + inner_jvp = acc.jvp(primal_out) + outer_jvp = outer_acc.jvp(inner_jvp) + ``` + + Reversing the collection in the last two lines to instead retrieve + `acc.jvp(outer_acc.jvp(primal_out))` will not work. """ def __init__(self): @@ -70,6 +88,9 @@ class ForwardGradientAccumulator(object): pywrap_tensorflow.TFE_Py_ForwardAccumulatorSetRemove(self._accumulator) self._recording = False + # TODO(allenl): Does this need to be public, or should the constructor instead + # take all watched Tensors? Write a realistic usage example (e.g. Hessian-free + # optimization) and decide. def watch(self, tensor, tangents): """Ensures that `tensor` is being traced by this tape. diff --git a/tensorflow/python/eager/forwardprop_test.py b/tensorflow/python/eager/forwardprop_test.py index d3896a6200f..fdba6f8122c 100644 --- a/tensorflow/python/eager/forwardprop_test.py +++ b/tensorflow/python/eager/forwardprop_test.py @@ -191,6 +191,50 @@ class ForwardpropTest(test.TestCase): _test_gradients(self, f, [constant_op.constant([1.])], order=3) + @test_util.assert_no_new_pyobjects_executing_eagerly + def testHigherOrderPureForward(self): + + def _forwardgrad(f): + def _compute_forwardgrad(primal): + tangent = constant_op.constant(1.) + with forwardprop.ForwardGradientAccumulator() as acc: + acc.watch(primal, tangent) + primal_out = f(primal) + return acc.jvp(primal_out) + return _compute_forwardgrad + + def _forward(x): + return x ** 3.5 + + f = _forward + primal = constant_op.constant(1.1) + for expected in [1.1 ** 3.5, + 3.5 * 1.1 ** 2.5, + 3.5 * 2.5 * 1.1 ** 1.5, + 3.5 * 2.5 * 1.5 * 1.1 ** 0.5, + 3.5 * 2.5 * 1.5 * 0.5 * 1.1 ** -0.5]: + self.assertAllClose(expected, f(primal)) + f = _forwardgrad(f) + + def testFunctionGradPureForward(self): + + @def_function.function + def f(x): + return x ** 3.5 + + primal = constant_op.constant(1.1) + with forwardprop.ForwardGradientAccumulator() as outer_acc: + outer_acc.watch(primal, constant_op.constant(1.)) + with forwardprop.ForwardGradientAccumulator() as acc: + acc.watch(primal, constant_op.constant(1.)) + primal_out = f(primal) + inner_jvp = acc.jvp(primal_out) + outer_jvp = outer_acc.jvp(inner_jvp) + self.assertAllClose(1.1 ** 3.5, primal_out) + self.assertAllClose(3.5 * 1.1 ** 2.5, inner_jvp) + self.assertAllClose(3.5 * 2.5 * 1.1 ** 1.5, outer_jvp) + self.assertIsNone(acc.jvp(outer_acc.jvp(primal_out))) + def testFunctionGrad(self): @def_function.function @@ -201,8 +245,7 @@ class ForwardpropTest(test.TestCase): self, f, [constant_op.constant([1., 2.])], - # TODO(allenl): figure out why functions aren't N times differentiable - order=1) + order=3) @test_util.assert_no_new_pyobjects_executing_eagerly def testHVPMemory(self): diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 3c129fe80bd..ab512e9e630 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -657,10 +657,11 @@ static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args, // other. // Note that if `self` is not on CPU, we raise an Exception. static PyObject* EagerTensor_numpy(EagerTensor* self) { - auto status = tensorflow::make_safe(TF_NewStatus()); - auto* py_array = TFE_TensorHandleToNumpy(self->handle, status.get()); - if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_ValueError)) { + auto* py_array = TFE_TensorHandleToNumpy(self->handle, self->status); + if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) { Py_XDECREF(py_array); + // Cleanup self->status before returning. + TF_SetStatus(self->status, TF_OK, ""); return nullptr; } else { return PyArray_Return(reinterpret_cast(py_array)); @@ -745,13 +746,14 @@ static int EagerTensor_getbuffer(EagerTensor* self, Py_buffer* view, return -1; } - auto status = tensorflow::make_safe(TF_NewStatus()); // TensorHandleToNumpy is zero-copy for everything but DT_RESOURCE and // DT_STRING so the following is only slightly slower than a NumPy-free // implementation. auto py_array = tensorflow::make_safe( - TFE_TensorHandleToNumpy(self->handle, status.get())); - if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_BufferError)) { + TFE_TensorHandleToNumpy(self->handle, self->status)); + if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_BufferError)) { + // Cleanup self->status before returning. + TF_SetStatus(self->status, TF_OK, ""); return -1; } if (PyObject_GetBuffer(py_array.get(), view, flags) < 0) { diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 4b959d9f17e..643891ecf8b 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1367,14 +1367,57 @@ tensorflow::gtl::CompactPointerSet* GetTapeSet() { return tape_set.get(); } -tensorflow::gtl::CompactPointerSet* -GetAccumulatorSet() { - thread_local std::unique_ptr< - tensorflow::gtl::CompactPointerSet> - accumulator_set{nullptr}; +// A linked hash set, where iteration is in insertion order. +// +// Nested accumulators rely on op recording happening in insertion order, so an +// unordered data structure like CompactPointerSet is not suitable. Outer +// accumulators need to observe operations first so they know to watch the inner +// accumulator's jvp computation. +// +// Not thread safe. +class AccumulatorSet { + public: + void insert(TFE_Py_ForwardAccumulator* element) { + if (map_.find(element) != map_.end()) { + return; + } + ListType::iterator it = ordered_.insert(ordered_.end(), element); + map_.insert(std::make_pair(element, it)); + } + + void erase(TFE_Py_ForwardAccumulator* element) { + MapType::iterator existing = map_.find(element); + if (existing == map_.end()) { + return; + } + ListType::iterator list_position = existing->second; + map_.erase(existing); + ordered_.erase(list_position); + } + + bool empty() const { return ordered_.empty(); } + + private: + typedef std::list ListType; + typedef tensorflow::gtl::FlatMap + MapType; + + public: + typedef ListType::const_iterator const_iterator; + const_iterator begin() const { return ordered_.begin(); } + + const_iterator end() const { return ordered_.end(); } + + private: + MapType map_; + ListType ordered_; +}; + +AccumulatorSet* GetAccumulatorSet() { + thread_local std::unique_ptr accumulator_set{nullptr}; if (accumulator_set == nullptr) { - accumulator_set.reset( - new tensorflow::gtl::CompactPointerSet); + accumulator_set.reset(new AccumulatorSet); } return accumulator_set.get(); } @@ -1385,12 +1428,10 @@ inline bool HasTape() { return !GetTapeSet()->empty() || HasAccumulator(); } // A safe copy of a set, used for tapes and accumulators. The copy is not // affected by other python threads changing the set of active tapes. -template +template class SafeSetCopy { public: - explicit SafeSetCopy( - const tensorflow::gtl::CompactPointerSet& to_copy) - : set_copy_(to_copy) { + explicit SafeSetCopy(const ContainerType& to_copy) : set_copy_(to_copy) { for (auto* member : set_copy_) { Py_INCREF(member); } @@ -1402,31 +1443,29 @@ class SafeSetCopy { } } - typename tensorflow::gtl::CompactPointerSet::const_iterator - begin() const { + typename ContainerType::const_iterator begin() const { return set_copy_.begin(); } - typename tensorflow::gtl::CompactPointerSet::const_iterator end() - const { - return set_copy_.end(); - } + typename ContainerType::const_iterator end() const { return set_copy_.end(); } bool empty() const { return set_copy_.empty(); } private: - tensorflow::gtl::CompactPointerSet set_copy_; + ContainerType set_copy_; }; -class SafeTapeSet : public SafeSetCopy { +class SafeTapeSet + : public SafeSetCopy> { public: - SafeTapeSet() : SafeSetCopy(*GetTapeSet()) {} + SafeTapeSet() + : SafeSetCopy>( + *GetTapeSet()) {} }; -class SafeAccumulatorSet : public SafeSetCopy { +class SafeAccumulatorSet : public SafeSetCopy { public: - SafeAccumulatorSet() - : SafeSetCopy(*GetAccumulatorSet()) {} + SafeAccumulatorSet() : SafeSetCopy(*GetAccumulatorSet()) {} }; bool* ThreadTapeIsStopped() { diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index 4031589cf2b..f56c01bd198 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -265,11 +265,13 @@ class NumericColumnTest(test.TestCase): self.assertEqual(((3., 2.),), a.default_value) def test_shape_and_default_value_compatibility(self): - fc.numeric_column('aaa', shape=[2], default_value=[1, 2.]) + a = fc.numeric_column('aaa', shape=[2], default_value=[1, 2.]) + self.assertEqual((1, 2.), a.default_value) with self.assertRaisesRegexp(ValueError, 'The shape of default_value'): fc.numeric_column('aaa', shape=[2], default_value=[1, 2, 3.]) - fc.numeric_column( - 'aaa', shape=[3, 2], default_value=[[2, 3], [1, 2], [2, 3.]]) + a = fc.numeric_column( + 'aaa', shape=[3, 2], default_value=[[2, 3], [1, 2], [2, 3.]]) + self.assertEqual(((2, 3), (1, 2), (2, 3.)), a.default_value) with self.assertRaisesRegexp(ValueError, 'The shape of default_value'): fc.numeric_column( 'aaa', shape=[3, 1], default_value=[[2, 3], [1, 2], [2, 3.]]) @@ -858,8 +860,11 @@ class HashedCategoricalColumnTest(test.TestCase): fc.categorical_column_with_hash_bucket('aaa', 0) def test_dtype_should_be_string_or_integer(self): - fc.categorical_column_with_hash_bucket('aaa', 10, dtype=dtypes.string) - fc.categorical_column_with_hash_bucket('aaa', 10, dtype=dtypes.int32) + a = fc.categorical_column_with_hash_bucket('aaa', 10, dtype=dtypes.string) + b = fc.categorical_column_with_hash_bucket('aaa', 10, dtype=dtypes.int32) + self.assertEqual(dtypes.string, a.dtype) + self.assertEqual(dtypes.int32, b.dtype) + with self.assertRaisesRegexp(ValueError, 'dtype must be string or integer'): fc.categorical_column_with_hash_bucket('aaa', 10, dtype=dtypes.float32) diff --git a/tensorflow/python/framework/convert_to_constants.py b/tensorflow/python/framework/convert_to_constants.py index 8443946e5ac..5eb4dec34d0 100644 --- a/tensorflow/python/framework/convert_to_constants.py +++ b/tensorflow/python/framework/convert_to_constants.py @@ -24,14 +24,47 @@ from tensorflow.core.framework import variable_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.python.eager import wrap_function +from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.grappler import tf_optimizer from tensorflow.python.ops import array_ops -from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.saver import export_meta_graph -def _run_inline_graph_optimization(func): +# TODO(nupurgarg): Handle StatelessIf op. +_CONTROL_FLOW_OPS = set(["If", "While"]) + + +def disable_lower_using_switch_merge(graph_def): + """Set '_lower_using_switch_merge' attributes to False in If and While ops. + + Sets the attribute to False in the NodeDefs in the main graph and the NodeDefs + in each function's graph. + + Args: + graph_def: GraphDef proto. + + Returns: + GraphDef + """ + output_graph_def = graph_pb2.GraphDef() + output_graph_def.CopyFrom(graph_def) + + def disable_control_flow_lowering(node): + if node.op in _CONTROL_FLOW_OPS: + node.attr["_lower_using_switch_merge"].b = False + + for node in output_graph_def.node: + disable_control_flow_lowering(node) + + if output_graph_def.library: + for func in output_graph_def.library.function: + for node in func.node_def: + disable_control_flow_lowering(node) + return output_graph_def + + +def _run_inline_graph_optimization(func, lower_control_flow): """Apply function inline optimization to the graph. Returns the GraphDef after Grappler's function inlining optimization is @@ -39,12 +72,16 @@ def _run_inline_graph_optimization(func): Args: func: ConcreteFunction. + lower_control_flow: Boolean indicating whether or not to lower control flow + ops such as If and While. (default True) Returns: GraphDef """ - meta_graph = export_meta_graph( - graph_def=func.graph.as_graph_def(), graph=func.graph) + graph_def = func.graph.as_graph_def() + if not lower_control_flow: + graph_def = disable_lower_using_switch_merge(graph_def) + meta_graph = export_meta_graph(graph_def=graph_def, graph=func.graph) # Clear the initializer_name for the variables collections, since they are not # needed after saved to saved_model. @@ -73,25 +110,6 @@ def _run_inline_graph_optimization(func): return tf_optimizer.OptimizeGraph(config, meta_graph) -def _get_tensors_from_graph(graph, tensors): - """Gets the Tensors in `graph` with the name of the tensors in `tensors`. - - Args: - graph: TensorFlow Graph. - tensors: List of Tensors. - - Returns: - List of Tensors. - """ - new_tensors = [] - for orig_tensor in tensors: - new_tensor = graph.get_tensor_by_name(orig_tensor.name) - if new_tensor.shape.rank is None: - new_tensor.set_shape(orig_tensor.shape) - new_tensors.append(new_tensor) - return new_tensors - - def _get_tensor_name(name): """Returns the name of the input tensor. @@ -104,6 +122,44 @@ def _get_tensor_name(name): return name.split(":")[0] +def _get_new_function_name(name): + """Returns the function name with '_frozen' appended. + + Args: + name: str + + Returns: + str + """ + return name + "_frozen" + + +def _get_node_defs_list(graph_def): + """Returns a list of NodeDefs in the GraphDef. + + This list consists of all NodeDefs in the main graph as well as all control + flow NodeDefs in the functions. + + The remaining NodeDefs in the functions are not included because the op names + are not unique and the variables are handled differently than the main graph. + The control flow ops need to be extracted because they are need their + attributes to be updated similar to the control flow ops in the main graph. + + Args: + graph_def: GraphDef proto. + + Returns: + [NodeDef] + """ + node_defs = list(graph_def.node) + + if graph_def.library: + for func in graph_def.library.function: + node_defs.extend( + [node for node in func.node_def if node.op in _CONTROL_FLOW_OPS]) + return node_defs + + def _get_tensor_data(func): """Gets the tensor data for all Placeholders in the model. @@ -140,6 +196,39 @@ def _get_tensor_data(func): return tensor_data +def _get_control_flow_function_types(node_defs, tensor_data): + """Gets the types for the parameters to the function. + + Creates a map from function name to a list of types that correspond with the + function arguments. The type is primarily determined from the corresponding + "If" op. If the argument is a resource variable, then the type is determined + from the type of the data contained within the Tensor. + + Args: + node_defs: List of NodeDefs. + tensor_data: {str name : Tensor}. + + Returns: + {str function name : [int representing DataType]} + """ + # TODO(b/133793620): Support the "While" op. + func_types = {} + for node in node_defs: + if node.op == "If": + arg_types = [dtype for dtype in node.attr["Tin"].list.type] + + for idx in range(len(arg_types)): + if arg_types[idx] == dtypes.resource: + # Skip first index which represents the condition. + input_name = node.input[idx + 1] + numpy_type = tensor_data[input_name]["data"].dtype + arg_types[idx] = dtypes.as_dtype(numpy_type).as_datatype_enum + + func_types[node.attr["then_branch"].func.name] = arg_types + func_types[node.attr["else_branch"].func.name] = arg_types + return func_types + + def _populate_const_op(output_node, node_name, dtype, data, data_shape): """Creates a Const op. @@ -158,6 +247,39 @@ def _populate_const_op(output_node, node_name, dtype, data, data_shape): output_node.attr["value"].tensor.CopyFrom(tensor) +def _populate_identity_op(output_node, input_node): + """Creates an Identity op from a ReadVariable op. + + Args: + output_node: TensorFlow NodeDef. + input_node: TensorFlow NodeDef. + """ + output_node.op = "Identity" + output_node.name = input_node.name + output_node.input.append(input_node.input[0]) + output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) + if "_class" in input_node.attr: + output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) + + +def _populate_if_op(output_node, input_node, function_types): + """Updates the type attributes and the function names of the If op. + + Args: + output_node: TensorFlow NodeDef. + input_node: TensorFlow NodeDef. + function_types: Map of function names to the list of DataTypes that + correspond with the function arguments. + """ + output_node.CopyFrom(input_node) + then_func = input_node.attr["then_branch"].func.name + output_node.attr["then_branch"].func.name = _get_new_function_name(then_func) + output_node.attr["else_branch"].func.name = _get_new_function_name( + input_node.attr["else_branch"].func.name) + output_node.attr["Tin"].list.CopyFrom( + attr_value_pb2.AttrValue.ListValue(type=function_types[then_func])) + + def _construct_concrete_function(func, output_graph_def, converted_input_indices): """Constructs a concrete function from the `output_graph_def`. @@ -193,7 +315,7 @@ def _construct_concrete_function(func, output_graph_def, return new_func -def convert_variables_to_constants_v2(func): +def convert_variables_to_constants_v2(func, lower_control_flow=True): """Replaces all the variables in a graph with constants of the same values. TensorFlow 2.0 function for converting all Variable ops into Const ops holding @@ -207,21 +329,29 @@ def convert_variables_to_constants_v2(func): Args: func: ConcreteFunction. + lower_control_flow: Boolean indicating whether or not to lower control flow + ops such as If and While. (default True) Returns: ConcreteFunction containing a simplified version of the original. """ # TODO(nupurgarg): Replace ResourceGather with Gather. - # TODO(nupurgarg): Change attr for Variables in control flow and functions. - graph_def = _run_inline_graph_optimization(func) + # Inline the graph in order to remove functions when possible. + graph_def = _run_inline_graph_optimization(func, lower_control_flow) + + # Gets list of all node defs include those in the library. + node_defs = _get_node_defs_list(graph_def) # Get mapping from node name to node. - name_to_node = {_get_tensor_name(node.name): node for node in graph_def.node} + name_to_node = {_get_tensor_name(node.name): node for node in node_defs} # Get mapping from node name to variable value. tensor_data = _get_tensor_data(func) - # Get variable data. + # Get mapping from function name to argument types. + function_types = _get_control_flow_function_types(node_defs, tensor_data) + + # Get variable data for all nodes in `node_defs`. reference_variables = {} resource_identities = {} placeholders = {} @@ -234,7 +364,16 @@ def convert_variables_to_constants_v2(func): } converted_input_indices.add(tensor_data[node_name]["index"]) - for node in graph_def.node: + for node in node_defs: + if node.op == "If": + # Get dtype and data for resource Placeholders. + then_func = node.attr["then_branch"].func.name + arg_types = function_types[then_func] + for idx, input_tensor in enumerate(node.input[1:]): + input_name = _get_tensor_name(input_tensor) + if input_name in tensor_data: + dtype = attr_value_pb2.AttrValue(type=arg_types[idx]) + _save_placeholder(_get_tensor_name(input_tensor), dtype) if node.op == "VariableV2": # Get data for VariableV2 ops (reference variables) that cannot be lifted. with func.graph.as_default(): @@ -261,7 +400,6 @@ def convert_variables_to_constants_v2(func): # Reconstruct the graph with constants in place of variables. output_graph_def = graph_pb2.GraphDef() - how_many_converted = 0 for input_node in graph_def.node: output_node = output_graph_def.node.add() @@ -271,28 +409,75 @@ def convert_variables_to_constants_v2(func): dtype = attr_value_pb2.AttrValue(type=data.dtype.as_datatype_enum) _populate_const_op(output_node, input_node.name, dtype, data.numpy(), data.shape) - how_many_converted += 1 # Convert Placeholder ops to Const ops. elif input_node.name in placeholders: data = placeholders[input_node.name]["data"] dtype = placeholders[input_node.name]["dtype"] _populate_const_op(output_node, input_node.name, dtype, data, data.shape) - how_many_converted += 1 - # Change the dtype for Identity ops that are inputs to ReadVariableOps. + # Update the dtype for Identity ops that are inputs to ReadVariableOps. elif input_node.name in resource_identities: output_node.CopyFrom(input_node) output_node.attr["T"].CopyFrom(resource_identities[input_node.name]) # Convert ReadVariableOps to Identity ops. elif input_node.op == "ReadVariableOp": - output_node.op = "Identity" - output_node.name = input_node.name - output_node.input.extend([input_node.input[0]]) - output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) - if "_class" in input_node.attr: - output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) + _populate_identity_op(output_node, input_node) + # Update the function names and function's arguments types for the If ops. + elif input_node.op == "If": + _populate_if_op(output_node, input_node, function_types) else: output_node.CopyFrom(input_node) - logging.info("Converted %d variables to const ops.", how_many_converted) + # Add functions to reconstructed graph. + if graph_def.library: + library = output_graph_def.library + + for input_library_func in graph_def.library.function: + orig_func_name = input_library_func.signature.name + new_func_name = _get_new_function_name(orig_func_name) + + # Do not copy any functions that aren't being used in the graph. Any + # functions that are not used by control flow should have been inlined. + if orig_func_name not in function_types: + continue + + output_library_func = library.function.add() + for key, value in input_library_func.ret.items(): + output_library_func.ret[key] = value + for key, value in input_library_func.control_ret.items(): + output_library_func.control_ret[key] = value + + # Update the input types in the function signature. + output_library_func.signature.CopyFrom(input_library_func.signature) + output_library_func.signature.name = new_func_name + for dtype, arg in zip(function_types[orig_func_name], + output_library_func.signature.input_arg): + arg.type = dtype + + # Update the NodeDefs. + func_variables = { + node.name: node.input[0] + for node in input_library_func.node_def + if node.op == "ReadVariableOp" + } + + for input_node in input_library_func.node_def: + output_node = output_library_func.node_def.add() + # Convert ReadVariableOps to Identity ops. + if input_node.op == "ReadVariableOp": + _populate_identity_op(output_node, input_node) + elif input_node.op == "If": + _populate_if_op(output_node, input_node, function_types) + else: + output_node.CopyFrom(input_node) + # Convert :value to :output for ops that use the ReadVariableOp. + for idx, full_name in enumerate(input_node.input): + input_name = _get_tensor_name(full_name) + if input_name in func_variables: + full_name_parts = full_name.split(":") + full_name_parts[1] = "output" + input_name = ":".join(full_name_parts) + output_node.input[idx] = input_name + + output_graph_def.versions.CopyFrom(graph_def.versions) return _construct_concrete_function(func, output_graph_def, converted_input_indices) diff --git a/tensorflow/python/framework/convert_to_constants_test.py b/tensorflow/python/framework/convert_to_constants_test.py index 2ec340063fd..e06508804b8 100644 --- a/tensorflow/python/framework/convert_to_constants_test.py +++ b/tensorflow/python/framework/convert_to_constants_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os +import numpy as np from tensorflow.python import keras from tensorflow.python.client import session as session_lib @@ -27,8 +28,13 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import convert_to_constants from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import rnn +from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.saved_model import simple_save @@ -38,7 +44,6 @@ from tensorflow.python.training.tracking import tracking from tensorflow.python.util import nest -# TODO(nupurgarg): Simplify the test cases to use the ConcreteFunction. class VariablesToConstantsTest(test.TestCase): def _hasStatefulPartitionedCallOp(self, graph_def): @@ -56,29 +61,36 @@ class VariablesToConstantsTest(test.TestCase): input_data): # Check that the converted ConcreteFunction produces the same result as the # original Function. - expected_value = nest.flatten(func(input_data)) - actual_value = nest.flatten(converted_concrete_func(input_data)) - self.assertEqual(expected_value[0].numpy(), actual_value) + expected_value = nest.flatten(func(**input_data)) + actual_value = nest.flatten(converted_concrete_func(**input_data)) + + for expected, actual in zip(expected_value, actual_value): + np.testing.assert_almost_equal(expected.numpy(), actual.numpy()) # Ensure the shape is retained. - self.assertEqual(converted_concrete_func.inputs[0].shape, input_data.shape) + for tensor in converted_concrete_func.inputs: + actual_shape = input_data[tensor.name.split(":")[0]].shape + self.assertEqual(tensor.shape, actual_shape) # Save the converted ConcreteFunction as a signature. save_dir = os.path.join(self.get_temp_dir(), "frozen_saved_model") - save(obj, save_dir, {"mykey": converted_concrete_func}) + root = tracking.AutoTrackable() + root.f = converted_concrete_func + save(root, save_dir, {"mykey": converted_concrete_func}) # Load it back and make sure it works. loaded_obj = load(save_dir) - actual_value = nest.flatten(loaded_obj.signatures["mykey"](input_data)) - self.assertEqual(expected_value[0].numpy(), actual_value) + actual_value = nest.flatten(loaded_obj.signatures["mykey"](**input_data)) + for expected, actual in zip(expected_value, actual_value): + np.testing.assert_almost_equal(expected.numpy(), actual.numpy()) @test_util.run_v2_only def testConstSavedModel(self): """Test a basic model with functions to make sure functions are inlined.""" - input_data = constant_op.constant(1., shape=[1]) + input_data = {"x": constant_op.constant(1., shape=[1])} root = tracking.AutoTrackable() root.f = def_function.function(lambda x: 2. * x) - to_save = root.f.get_concrete_function(input_data) + to_save = root.f.get_concrete_function(input_data["x"]) save_dir = os.path.join(self.get_temp_dir(), "saved_model") save(root, save_dir, to_save) @@ -100,12 +112,12 @@ class VariablesToConstantsTest(test.TestCase): @test_util.run_v2_only def testVariableModel(self): """Test a basic model with Variables.""" - input_data = constant_op.constant(1., shape=[1]) + input_data = {"x": constant_op.constant(1., shape=[1])} root = tracking.AutoTrackable() root.v1 = variables.Variable(3.) root.v2 = variables.Variable(2.) root.f = def_function.function(lambda x: root.v1 * root.v2 * x) - input_func = root.f.get_concrete_function(input_data) + input_func = root.f.get_concrete_function(input_data["x"]) variable_graph_def = input_func.graph.as_graph_def() self.assertEqual(2, self._getNumVariables(variable_graph_def)) @@ -121,12 +133,12 @@ class VariablesToConstantsTest(test.TestCase): @test_util.run_v2_only def testScalarModel(self): """Test a basic model with Variables.""" - input_data = constant_op.constant(1., shape=[]) + input_data = {"x": constant_op.constant(1., shape=[])} root = tracking.AutoTrackable() root.v1 = variables.Variable(3.) root.v2 = variables.Variable(2.) root.f = def_function.function(lambda x: root.v1 * root.v2 * x) - input_func = root.f.get_concrete_function(input_data) + input_func = root.f.get_concrete_function(input_data["x"]) variable_graph_def = input_func.graph.as_graph_def() self.assertEqual(2, self._getNumVariables(variable_graph_def)) @@ -142,12 +154,12 @@ class VariablesToConstantsTest(test.TestCase): @test_util.run_v2_only def testVariableSavedModel(self): """Test a basic model with Variables with saving/loading the SavedModel.""" - input_data = constant_op.constant(1., shape=[1]) + input_data = {"x": constant_op.constant(1., shape=[1])} root = tracking.AutoTrackable() root.v1 = variables.Variable(3.) root.v2 = variables.Variable(2.) root.f = def_function.function(lambda x: root.v1 * root.v2 * x) - to_save = root.f.get_concrete_function(input_data) + to_save = root.f.get_concrete_function(input_data["x"]) save_dir = os.path.join(self.get_temp_dir(), "saved_model") save(root, save_dir, to_save) @@ -187,9 +199,9 @@ class VariablesToConstantsTest(test.TestCase): self.z = variables.Variable(3.) return x - self.z - input_data = constant_op.constant(1., shape=[1]) + input_data = {"x": constant_op.constant(1., shape=[1])} root = BasicModel() - input_func = root.add.get_concrete_function(input_data) + input_func = root.add.get_concrete_function(input_data["x"]) variable_graph_def = input_func.graph.as_graph_def() self.assertEqual(1, self._getNumVariables(variable_graph_def)) @@ -236,7 +248,7 @@ class VariablesToConstantsTest(test.TestCase): actual_value = nest.flatten(output_func(input_data)) self.assertEqual(expected_value.numpy(), actual_value) - def _v1_single_metagraph_saved_model(self): + def _singleMetaGraphSavedModel(self): export_graph = ops.Graph() with export_graph.as_default(): start = array_ops.placeholder( @@ -262,8 +274,8 @@ class VariablesToConstantsTest(test.TestCase): return path @test_util.run_v2_only - def test_ref_variable_import(self): - saved = self._v1_single_metagraph_saved_model() + def testRefVariableImport(self): + saved = self._singleMetaGraphSavedModel() imported = load(saved) fn = imported.signatures["serving_default"] output_func = convert_to_constants.convert_variables_to_constants_v2(fn) @@ -271,9 +283,76 @@ class VariablesToConstantsTest(test.TestCase): self.assertEqual(0, self._getNumVariables(constant_graph_def)) self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def)) - input_data = constant_op.constant(1., shape=[1, 1]) + input_data = {"start": constant_op.constant(1., shape=[1, 1])} root = tracking.AutoTrackable() self._testConvertedFunction(root, fn, output_func, input_data) + @test_util.run_v2_only + def testControlFlow(self): + input_data = { + "x": constant_op.constant([1., 2.], shape=[1, 2]), + "b": constant_op.constant(True) + } + + weights = variables.Variable([[0.1, 0.2], [0.3, 0.4]], dtype=dtypes.float32) + + def true_fn(x): + return math_ops.matmul(x, weights) + + def false_fn(x): + return math_ops.add(x, weights) + + @def_function.function(input_signature=[ + tensor_spec.TensorSpec(shape=[1, 2], dtype=dtypes.float32), + tensor_spec.TensorSpec(shape=(), dtype=dtypes.bool) + ]) + def model(x, b): + return control_flow_ops.cond( + b, true_fn=lambda: true_fn(x), false_fn=lambda: false_fn(x)) + + root = tracking.AutoTrackable() + root.f = model + input_func = root.f.get_concrete_function() + input_func(**input_data) + + output_func = convert_to_constants.convert_variables_to_constants_v2( + input_func, lower_control_flow=False) + constant_graph_def = output_func.graph.as_graph_def() + self.assertEqual(0, self._getNumVariables(constant_graph_def)) + self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def)) + + self._testConvertedFunction(root, root.f, output_func, input_data) + + @test_util.run_v2_only + def testStaticRnn(self): + input_data = { + "x": + constant_op.constant( + np.array(np.random.random_sample((3, 10)), dtype=np.float32)) + } + + cell = rnn_cell_impl.LSTMCell(10) + + @def_function.function(input_signature=[ + tensor_spec.TensorSpec(shape=[3, 10], dtype=dtypes.float32) + ]) + def model(x): + seq = array_ops.split(x, 3, 0) + return rnn.static_rnn( + cell, seq, dtype=dtypes.float32, sequence_length=[1]) + + root = tracking.AutoTrackable() + root.f = model + input_func = root.f.get_concrete_function() + + output_func = convert_to_constants.convert_variables_to_constants_v2( + input_func, lower_control_flow=False) + constant_graph_def = output_func.graph.as_graph_def() + self.assertEqual(0, self._getNumVariables(constant_graph_def)) + self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def)) + + self._testConvertedFunction(root, root.f, output_func, input_data) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py index aad008ff05b..16403b266ca 100644 --- a/tensorflow/python/framework/dtypes.py +++ b/tensorflow/python/framework/dtypes.py @@ -513,16 +513,16 @@ _STRING_TO_TF["double_ref"] = float64_ref # quantized types. # TODO(mrry,keveman): Investigate Numpy type registration to replace this # hard-coding of names. -_np_qint8 = np.dtype([("qint8", np.int8, 1)]) -_np_quint8 = np.dtype([("quint8", np.uint8, 1)]) -_np_qint16 = np.dtype([("qint16", np.int16, 1)]) -_np_quint16 = np.dtype([("quint16", np.uint16, 1)]) -_np_qint32 = np.dtype([("qint32", np.int32, 1)]) +_np_qint8 = np.dtype([("qint8", np.int8)]) +_np_quint8 = np.dtype([("quint8", np.uint8)]) +_np_qint16 = np.dtype([("qint16", np.int16)]) +_np_quint16 = np.dtype([("quint16", np.uint16)]) +_np_qint32 = np.dtype([("qint32", np.int32)]) # _np_bfloat16 is defined by a module import. # Custom struct dtype for directly-fed ResourceHandles of supported type(s). -np_resource = np.dtype([("resource", np.ubyte, 1)]) +np_resource = np.dtype([("resource", np.ubyte)]) # Standard mappings between types_pb2.DataType values and numpy.dtypes. _NP_TO_TF = { diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py index ceaf198affb..c11863ccf0a 100644 --- a/tensorflow/python/framework/func_graph.py +++ b/tensorflow/python/framework/func_graph.py @@ -373,6 +373,10 @@ class FuncGraph(ops.Graph): # optimizers. old_graph_key = self._graph_key self._graph_key = graph._graph_key + # Inherit the auto_cast_variable_read_dtype, since this should not change + # inside a function. + old_auto_cast_var_read_dtype = self._auto_cast_variable_read_dtype + self._auto_cast_variable_read_dtype = graph._auto_cast_variable_read_dtype # pylint: enable=protected-access with outer_cm as g: @@ -383,6 +387,7 @@ class FuncGraph(ops.Graph): self._device_function_stack = old_device_stack self._variable_creator_stack = old_creator_stack self._graph_key = old_graph_key + self._auto_cast_variable_read_dtype = old_auto_cast_var_read_dtype return inner_cm() @property diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index d0884de2a6e..070ce32a95a 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1157,6 +1157,9 @@ def internal_convert_to_tensor(value, (dtype.name, value.dtype.name, value)) return value + if preferred_dtype is not None: + preferred_dtype = dtypes.as_dtype(preferred_dtype) + for base_type, conversion_func in tensor_conversion_registry.get(type(value)): # If dtype is None but preferred_dtype is not None, we try to # cast to preferred_dtype first. @@ -1165,18 +1168,15 @@ def internal_convert_to_tensor(value, try: ret = conversion_func( value, dtype=preferred_dtype, name=name, as_ref=as_ref) - except (TypeError, ValueError, errors.UnimplementedError, - errors.InvalidArgumentError): + except (TypeError, ValueError): # Could not coerce the conversion to use the preferred dtype. - ret = None - - if ret is not None and ret is not NotImplemented: - if (ret.dtype.base_dtype != - dtypes.as_dtype(preferred_dtype).base_dtype): + pass + else: + if (ret is not NotImplemented and + ret.dtype.base_dtype != preferred_dtype.base_dtype): raise TypeError("convert_to_tensor did not convert to " "the preferred dtype: %s vs %s " % - (ret.dtype.base_dtype, - dtypes.as_dtype(preferred_dtype).base_dtype)) + (ret.dtype.base_dtype, preferred_dtype.base_dtype)) if ret is None: ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref) @@ -4898,6 +4898,48 @@ class Graph(object): def _global_distribute_strategy_scope(self, distribute_strategy_scope): self._thread_local.distribute_strategy_scope = (distribute_strategy_scope) + @property + def _auto_cast_variable_read_dtype(self): + """The dtype that instances of `AutoCastVariable` will be casted to. + + This is None if `AutoCastVariables` should not be casted. + + See `AutoCastVariable` for more information. + + Returns: + The dtype that instances of `AutoCastVariable` will be casted to. + """ + if not hasattr(self._thread_local, "_auto_cast_variable_read_dtype"): + self._thread_local._auto_cast_variable_read_dtype = None # pylint: disable=protected-access + return self._thread_local._auto_cast_variable_read_dtype # pylint: disable=protected-access + + @_auto_cast_variable_read_dtype.setter + def _auto_cast_variable_read_dtype(self, _auto_cast_variable_read_dtype): + self._thread_local._auto_cast_variable_read_dtype = ( # pylint: disable=protected-access + _auto_cast_variable_read_dtype) + + @tf_contextlib.contextmanager + def _enable_auto_casting_variables(self, dtype): + """Context manager to automatically cast AutoCastVariables. + + If an AutoCastVariable `var` is used under this context manager, it will be + casted to `dtype` before being used. + + See `AutoCastVariable` for more information. + + Args: + dtype: The dtype that AutoCastVariables should be casted to. + + Yields: + Nothing. + """ + prev_read_dtype = self._auto_cast_variable_read_dtype + try: + self._auto_cast_variable_read_dtype = dtype + yield + finally: + self._auto_cast_variable_read_dtype = prev_read_dtype + def _mutation_lock(self): """Returns a lock to guard code that creates & mutates ops. diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 54819b0357b..14fbddabd00 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -576,10 +576,7 @@ class Dimension(object): Returns: A Dimension whose value is `self` modulo `other`. """ - try: - other = as_dimension(other) - except (TypeError, ValueError): - return NotImplemented + other = as_dimension(other) if self._value is None or other.value is None: return Dimension(None) else: @@ -594,10 +591,7 @@ class Dimension(object): Returns: A Dimension whose value is `other` modulo `self`. """ - try: - other = as_dimension(other) - except (TypeError, ValueError): - return NotImplemented + other = as_dimension(other) return other % self def __lt__(self, other): diff --git a/tensorflow/python/framework/tensor_spec.py b/tensorflow/python/framework/tensor_spec.py index 3ef8d443f1b..1e224e628c2 100644 --- a/tensorflow/python/framework/tensor_spec.py +++ b/tensorflow/python/framework/tensor_spec.py @@ -294,3 +294,7 @@ pywrap_tensorflow.RegisterType("TensorSpec", TensorSpec) type_spec.register_type_spec_from_value_converter( ops.Tensor, lambda tensor: TensorSpec(tensor.shape, tensor.dtype)) + +type_spec.register_type_spec_from_value_converter( + np.ndarray, + lambda array: TensorSpec(array.shape, array.dtype)) diff --git a/tensorflow/python/framework/tensor_spec_test.py b/tensorflow/python/framework/tensor_spec_test.py index 175aaebe67a..466f976ade6 100644 --- a/tensorflow/python/framework/tensor_spec_test.py +++ b/tensorflow/python/framework/tensor_spec_test.py @@ -24,9 +24,11 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util +from tensorflow.python.framework import type_spec from tensorflow.python.ops import array_ops from tensorflow.python.platform import googletest @@ -146,6 +148,22 @@ class TensorSpecTest(test_util.TensorFlowTestCase): desc = tensor_spec.TensorSpec([1, 5], dtypes.float32, "test") self.assertEqual(pickle.loads(pickle.dumps(desc)), desc) + @test_util.deprecated_graph_mode_only + def testTypeSpecFromValue(self): + g = ops.Graph() + with g.as_default(): + v1 = np.array([1, 2, 3], np.int32) + t1 = constant_op.constant(v1) + + ops_before = g.get_operations() + + expected = tensor_spec.TensorSpec([3], dtypes.int32) + self.assertEqual(expected, type_spec.type_spec_from_value(v1)) + self.assertEqual(expected, type_spec.type_spec_from_value(t1)) + + # Check that creating TypeSpecs did not require building new Tensors. + self.assertLen(g.get_operations(), len(ops_before)) + class BoundedTensorSpecTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/grappler/hierarchical_controller.py b/tensorflow/python/grappler/hierarchical_controller.py index c8f2f4245bb..e39988d96b5 100644 --- a/tensorflow/python/grappler/hierarchical_controller.py +++ b/tensorflow/python/grappler/hierarchical_controller.py @@ -883,7 +883,7 @@ class HierarchicalController(Controller): actions.read(i - 1)) ) if self.hparams.keep_prob is not None: - signal = nn_ops.dropout(signal, self.hparams.keep_prob) + signal = nn_ops.dropout(signal, rate=(1 - self.hparams.keep_prob)) next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias) query = math_ops.matmul(next_h, attn_w_2) query = array_ops.reshape( diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 3da6e3f4efa..841a871bf06 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -157,16 +157,6 @@ py_library( ], ) -py_library( - name = "casting_utils", - srcs = ["engine/casting_utils.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/python:util", - "//tensorflow/python/keras/mixed_precision/experimental:autocast_variable", - ], -) - py_library( name = "engine", srcs = [ @@ -183,6 +173,7 @@ py_library( "engine/training_eager.py", "engine/training_generator.py", "engine/training_utils.py", + "engine/training_v2.py", "metrics.py", # Need base_layer "models.py", "utils/metrics_utils.py", @@ -258,10 +249,11 @@ py_library( deps = [ ":backend", ":base_layer_utils", - ":casting_utils", ":constraints", ":engine_utils", ":regularizers", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:constant_op", "//tensorflow/python/data", "//tensorflow/python/distribute:distribute_coordinator", "//tensorflow/python/distribute:distribute_lib", @@ -615,6 +607,9 @@ tf_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( @@ -824,6 +819,9 @@ tf_py_test( "@absl_py//absl/testing:parameterized", "//tensorflow/python:client_testlib", ], + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( @@ -1302,6 +1300,9 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], xla_enable_strict_auto_jit = True, ) @@ -1403,7 +1404,10 @@ tf_py_test( "//tensorflow/python:client_testlib", ], shard_count = 2, - tags = ["notsan"], + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + "notsan", + ], ) tf_py_test( @@ -1415,6 +1419,9 @@ tf_py_test( "@absl_py//absl/testing:parameterized", "//tensorflow/python:client_testlib", ], + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( @@ -1430,6 +1437,7 @@ tf_py_test( shard_count = 20, tags = [ "no_rocm", + "nomac", # TODO(mihaimaruseac): b/127695564 "notsan", ], ) @@ -1445,6 +1453,9 @@ tf_py_test( "//tensorflow/python:client_testlib", ], shard_count = 4, + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( @@ -1460,7 +1471,10 @@ tf_py_test( "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python:client_testlib", ], - tags = ["no_rocm"], + tags = [ + "no_rocm", + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( @@ -1493,6 +1507,7 @@ tf_py_test( "//tensorflow/python/feature_column:feature_column_py", ], tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 "notsan", ], ) @@ -1509,6 +1524,7 @@ tf_py_test( ], tags = [ "no_rocm", + "nomac", # TODO(mihaimaruseac): b/127695564 "notsan", ], ) @@ -1570,6 +1586,7 @@ tf_py_test( shard_count = 8, tags = [ "no-internal-py3", + "nomac", # TODO(mihaimaruseac): b/127695564 ], ) @@ -1584,7 +1601,10 @@ tf_py_test( "//tensorflow/python:client_testlib", ], shard_count = 8, - tags = ["no_rocm"], + tags = [ + "no_rocm", + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( @@ -1598,6 +1618,9 @@ tf_py_test( "//tensorflow/python:client_testlib", ], shard_count = 8, + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( @@ -1623,6 +1646,9 @@ tf_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], ) tf_py_test( diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index ce557b834d5..c7ebb4b2524 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -5429,9 +5429,9 @@ def ctc_label_dense_to_sparse(labels, label_lengths): num_batches_tns = array_ops.stack([label_shape[0]]) max_num_labels_tns = array_ops.stack([label_shape[1]]) - def range_less_than(_, current_input): + def range_less_than(old_input, current_input): return array_ops.expand_dims( - math_ops.range(label_shape[1]), 0) < array_ops.fill( + math_ops.range(array_ops.shape(old_input)[1]), 0) < array_ops.fill( max_num_labels_tns, current_input) init = math_ops.cast( diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index e3bc5467261..90b6aa671cd 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -789,6 +789,8 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase): y = keras.backend.pool2d(x, (2, 2), strides=(2, 2), pool_mode='other') def test_pool3d(self): + if test.is_built_with_rocm(): + self.skipTest('Pooling with 3D tensors is not supported in ROCm') val = np.random.random((10, 3, 10, 10, 10)) x = keras.backend.variable(val) y = keras.backend.pool3d(x, (2, 2, 2), strides=(1, 1, 1), @@ -1661,6 +1663,7 @@ class BackendCrossEntropyLossesTest(test.TestCase): @test_util.run_all_in_graph_and_eager_modes +@test_util.with_control_flow_v2 class TestCTC(test.TestCase): def test_ctc_decode(self): diff --git a/tensorflow/python/keras/datasets/cifar10.py b/tensorflow/python/keras/datasets/cifar10.py index 36e1b83c10a..c23f1a263bb 100644 --- a/tensorflow/python/keras/datasets/cifar10.py +++ b/tensorflow/python/keras/datasets/cifar10.py @@ -59,4 +59,7 @@ def load_data(): x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) + x_test = x_test.astype(x_train.dtype) + y_test = y_test.astype(y_train.dtype) + return (x_train, y_train), (x_test, y_test) diff --git a/tensorflow/python/keras/distribute/distribute_strategy_test.py b/tensorflow/python/keras/distribute/distribute_strategy_test.py index f3d8be62a1a..85d13afd0e7 100644 --- a/tensorflow/python/keras/distribute/distribute_strategy_test.py +++ b/tensorflow/python/keras/distribute/distribute_strategy_test.py @@ -1008,7 +1008,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase, predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4) with self.assertRaisesRegexp(ValueError, - 'Number of steps could not be infered'): + 'Number of steps could not be inferred'): model.fit(dataset, epochs=1) @combinations.generate(all_strategy_combinations_plus_cloning()) diff --git a/tensorflow/python/keras/distribute/distributed_training_utils.py b/tensorflow/python/keras/distribute/distributed_training_utils.py index 0d269ba59e8..b2d1e0f6784 100644 --- a/tensorflow/python/keras/distribute/distributed_training_utils.py +++ b/tensorflow/python/keras/distribute/distributed_training_utils.py @@ -547,13 +547,6 @@ def get_batch_dimension(iterator): return dims[0] if dims else None -def list_to_tuple(maybe_list): - """Datasets treat lists specially, so switch them to tuples.""" - if isinstance(maybe_list, list): - return tuple(maybe_list) - return maybe_list - - def get_iterator(dataset, distribution_strategy): with distribution_strategy.scope(): iterator = distribution_strategy.make_dataset_iterator(dataset) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index c5bc2d2c219..75dd8937896 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -37,6 +37,7 @@ from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import function from tensorflow.python.framework import auto_control_deps +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops @@ -47,7 +48,6 @@ from tensorflow.python.keras import constraints from tensorflow.python.keras import initializers from tensorflow.python.keras import regularizers from tensorflow.python.keras.engine import base_layer_utils -from tensorflow.python.keras.engine import casting_utils from tensorflow.python.keras.engine import input_spec from tensorflow.python.keras.engine import node as node_module from tensorflow.python.keras.mixed_precision.experimental import autocast_variable @@ -67,11 +67,11 @@ from tensorflow.python.ops import variables as tf_variables from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking import data_structures from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils -from tensorflow.python.training.tracking import object_identity from tensorflow.python.training.tracking import tracking from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util import nest +from tensorflow.python.util import object_identity from tensorflow.python.util import serialization from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect @@ -697,10 +697,7 @@ class Layer(module.Module): if not self.dynamic: try: - # We do not directly pass self.weights, because we do not want - # to include weights of any layers in self.layers. - with casting_utils.autocast_context_manager( - self._trainable_weights + self._non_trainable_weights, + with base_layer_utils.autocast_context_manager( input_list, self._mixed_precision_policy.should_cast_variables): # Add auto_control_deps in V2 when they are not already added by @@ -756,11 +753,8 @@ class Layer(module.Module): # Eager execution on data tensors. with backend.name_scope(self._name_scope()): self._maybe_build(inputs) - # We do not directly pass self.weights, because we do not want - # to include weights of any layers in self.layers. - with casting_utils.autocast_context_manager( - self._trainable_weights + self._non_trainable_weights, input_list, - self._mixed_precision_policy.should_cast_variables): + with base_layer_utils.autocast_context_manager( + input_list, self._mixed_precision_policy.should_cast_variables): outputs = self.call(inputs, *args, **kwargs) self._handle_activity_regularization(inputs, outputs) self._set_mask_metadata(inputs, outputs, input_masks) @@ -1503,7 +1497,7 @@ class Layer(module.Module): if not self.built: if self.__class__.__name__ == 'Sequential': with tf_utils.maybe_init_scope(self): - self.build() # pylint: disable=no-value-for-parameter + self._maybe_build() # pylint: disable=no-value-for-parameter else: raise ValueError('You tried to call `count_params` on ' + self.name + ', but the layer isn\'t built. ' @@ -2401,21 +2395,30 @@ class TensorFlowOpLayer(Layer): return self._defun_call(inputs) return self._make_op(inputs) + def _make_node_def(self, graph): + node_def = node_def_pb2.NodeDef() + node_def.CopyFrom(self.node_def) + node_def.name = graph.unique_name(node_def.name) + return node_def + def _make_op(self, inputs): inputs = nest.flatten(inputs) graph = inputs[0].graph + node_def = self._make_node_def(graph) with graph.as_default(): for index, constant in self.constants.items(): - constant = ops.convert_to_tensor(constant) + # Recreate constant in graph to add distribution context. + value = tensor_util.constant_value(constant) + if value is not None: + constant = constant_op.constant(value, name=node_def.input[index]) inputs.insert(index, constant) - - self.node_def.name = graph.unique_name(self.node_def.name) # Check for case where first input should be a list of Tensors. - if 'N' in self.node_def.attr: - num_tensors = self.node_def.attr['N'].i + if 'N' in node_def.attr: + num_tensors = node_def.attr['N'].i inputs = [inputs[:num_tensors]] + inputs[num_tensors:] - c_op = ops._create_c_op(graph, self.node_def, inputs, control_inputs=[]) + c_op = ops._create_c_op(graph, node_def, inputs, control_inputs=[]) op = graph._create_op_from_tf_operation(c_op) + op._control_flow_post_processing() # Record the gradient because custom-made ops don't go through the # code-gen'd eager call path @@ -2426,8 +2429,7 @@ class TensorFlowOpLayer(Layer): attrs.append(attr_name) attrs.append(op.get_attr(attr_name)) attrs = tuple(attrs) - execute.record_gradient(op_type, op.inputs, attrs, op.outputs, - op.name) + execute.record_gradient(op_type, op.inputs, attrs, op.outputs, op.name) if len(op.outputs) == 1: return op.outputs[0] diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index bfa9d6ed988..14e2cabf39b 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -438,6 +438,34 @@ def training_arg_passed_to_call(argspec, args, kwargs): return 'training' in full_args and full_args['training'] is not None +def _get_var_read_dtype(input_list, should_cast): + """Gets the dtype that AutoCastVariables should be read in.""" + if should_cast and input_list and input_list[0].dtype.is_floating: + return input_list[0].dtype.base_dtype + else: + return None + + +def autocast_context_manager(input_list, should_cast): + """Returns a context manager to autocast AutoCastVariables. + + Under this context manager, if `should_cast` is True, AutoCastVariables will + be casted. If `should_cast` is False, AutoCastVariables will not be casted, + which can be used to disable autocasting if nested under another + call to `autocast_context_manager`. + + Args: + input_list: The inputs to the layer with the AutoCastVariables. + should_cast: Whether AutoCastVariables should be casted. + + Returns: + A context manager to automatically cast AutoCastVariables. + """ + var_read_dtype = _get_var_read_dtype(input_list, should_cast) + return ops.get_default_graph()._enable_auto_casting_variables( # pylint: disable=protected-access + var_read_dtype) + + def is_subclassed(layer): """Returns True if the object is a subclassed layer or subclassed model.""" return (layer.__module__.find('keras.engine') == -1 and diff --git a/tensorflow/python/keras/engine/casting_utils.py b/tensorflow/python/keras/engine/casting_utils.py deleted file mode 100644 index 2b3f85d96f4..00000000000 --- a/tensorflow/python/keras/engine/casting_utils.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains private utilities related to casting.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.keras.mixed_precision.experimental import autocast_variable -from tensorflow.python.util import tf_contextlib - - -def _get_var_read_dtype(input_list): - """Gets the dtype that AutoCastVariables should be read in.""" - try: - # TODO(reedwm): Is choosing the first input the right choice? - is_floating = input_list and input_list[0].dtype.is_floating - except AttributeError: - is_floating = False - if is_floating: - return input_list[0].dtype.base_dtype - else: - return None - - -@tf_contextlib.contextmanager -def autocast_context_manager(layer_weights, input_list, should_cast): - """A context manager to autocast a layer's AutoCastVariables. - - Under this context manager, if `should_cast` is True, the AutoCastVariables in - `layer_weights` will be casted to the dtype of the first input in - `input_list`, if the first input is a floating-point dtype. If `should_cast` - is False, this context manager is a no-op. - - Args: - layer_weights: A list of weights of a layer. AutoCastVariables in this list - will be casted if `should_cast` is True. Non-AutoCastVariables are - ignored. - input_list: The inputs to the layer with the AutoCastVariables. - should_cast: Whether AutoCastVariables should be casted. - - Yields: - Nothing. - """ - if not should_cast: - yield - return - - var_read_dtype = _get_var_read_dtype(input_list) - if var_read_dtype is None: - yield - return - - autocast_vars = [var for var in layer_weights - if isinstance(var, autocast_variable.AutoCastVariable)] - old_read_dtypes = [var._read_dtype for var in autocast_vars] # pylint: disable=protected-access - for var in autocast_vars: - var._read_dtype = var_read_dtype # pylint: disable=protected-access - try: - yield - finally: - for var, old_read_dtype in zip(autocast_vars, old_read_dtypes): - var._read_dtype = old_read_dtype # pylint: disable=protected-access diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 95ca3b41dbb..4603166b6c2 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -20,12 +20,14 @@ from __future__ import print_function import abc import itertools +import math import numpy as np import six from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import ops +from tensorflow.python.keras.engine import training_utils from tensorflow.python.keras.utils import data_utils from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -53,7 +55,7 @@ class DataAdapter(object): if len(applicable_adapters) != 1: raise ValueError("Expect only one adapter class to handle the input") - dataset = applicable_adapters[0]().get_dataset(x) + dataset = applicable_adapters[0](x).get_dataset() for data in dataset: # training ``` @@ -77,8 +79,8 @@ class DataAdapter(object): raise NotImplementedError @abc.abstractmethod - def get_dataset(self, x, y=None, **kwargs): - """Convert the input x and y into dataset. + def __init__(self, x, y=None, **kwargs): + """Create a DataAdapter based on data inputs. The caller must make sure to call `can_handle()` first before invoking this method. Provide unsupported data type will result into unexpected behavior. @@ -88,14 +90,26 @@ class DataAdapter(object): y: target labels. Note that y could be None in the case of prediction. **kwargs: Other keyword arguments for DataAdapter during the construction of the tf.dataset.Dataset. For example: - - Numpy data might need to have `batch_size` parameter when constructing - the dataset and iterator. - - Numpy data might have "evaluation_split" which will split the input - data into training and validation set. - Numpy data might have `sample_weights` which will be used for weighting the loss function during training. + - Numpy data might need to have `batch_size` parameter when constructing + the dataset and iterator. + - Certain input might need to be distribution strategy aware. When + `distribution_strategy` is passed, the created dataset need to respect + the strategy. DataAdapter might choose to ignore any keyword argument if it doesn't use it, or raise exception if any required argument is not provide. + """ + if not self.can_handle(x, y): + raise ValueError("{} Cannot handle input {}".format(self.__class__, x)) + + @abc.abstractmethod + def get_dataset(self): + """Get a dataset instance for the current DataAdapter. + + Note that the dataset returned does not repeat for epoch, so caller might + need to create new iterator for the same dataset at the beginning of the + epoch. This behavior might change in future. Returns: An tf.dataset.Dataset. Caller might use the dataset in different @@ -104,20 +118,58 @@ class DataAdapter(object): """ raise NotImplementedError + @abc.abstractmethod + def get_size(self): + """Return the size (number of batches) for the dataset created. + + For certain type of the data input, the number of batches is known, eg for + Numpy data, the size is same as (number_of_element / batch_size). Whereas + for dataset or python generator, the size is unknown since it may or may not + have a end state. + + Returns: + int, the number of batches for the dataset, or None if it is unknown. The + caller could use this to control the loop of training, show progress bar, + or handle unexpected StopIteration error. + """ + raise NotImplementedError + + @abc.abstractmethod + def batch_size(self): + """Return the batch size of the dataset created. + + For certain type of the data input, the batch size is known, and even + required, like numpy array. Where as for dataset, the batch is unknown + unless we take a peek. + + Returns: + int, the batch size of the dataset, or None if it is unknown. + """ + raise NotImplementedError + + @abc.abstractmethod + def has_partial_batch(self): + """Whether the dataset has partial batch at the end.""" + raise NotImplementedError + class NumpyArrayDataAdapter(DataAdapter): """Adapter that handles the Numpy array.""" @staticmethod def can_handle(x, y=None): - if y is not None and type(x) is not type(y): - raise ValueError("input feature and target should have same type, got " - "x: {}, y: {}".format(type(x), type(y))) - return isinstance(x, np.ndarray) + flat_inputs = nest.flatten(x) + if y is not None: + flat_inputs += nest.flatten(y) - def get_dataset(self, x, y=None, sample_weights=None, batch_size=None, - shuffle=False, **kwargs): - # TODO(scottzhu): Handle validation_split + return all(isinstance(v, np.ndarray) for v in flat_inputs) + + def __init__(self, x, y=None, sample_weights=None, batch_size=None, + shuffle=False, distribution_strategy=None, **kwargs): + super(NumpyArrayDataAdapter, self).__init__(x, y, **kwargs) + x = _process_numpy_inputs(x) + y = _process_numpy_inputs(y) + sample_weights = _process_numpy_inputs(sample_weights) if y is not None and sample_weights is not None: inputs = (x, y, sample_weights) elif y is not None: @@ -125,37 +177,98 @@ class NumpyArrayDataAdapter(DataAdapter): # sample_weight is ignored. inputs = (x, y) else: - inputs = x + inputs = (x,) if not batch_size: raise ValueError("batch size is required for Numpy input data.") - # TODO(scottzhu): might need to check large data input (> 2G). - dataset = dataset_ops.DatasetV2.from_tensor_slices(inputs) + if distribution_strategy is not None: + dataset = distribution_strategy.experimental_make_numpy_dataset(inputs) + else: + dataset = dataset_ops.DatasetV2.from_tensor_slices(inputs) + + num_samples = int(nest.flatten(x)[0].shape[0]) if shuffle: - num_samples = int(nest.flatten(x)[0].shape[0]) + # Note that we use the full input data length as buffer window, which + # might have memory consumption consequence. This is on the radar of + # tf.data team and they will address it. dataset = dataset.shuffle(num_samples) - return dataset.batch(batch_size) + self._dataset = dataset.batch(batch_size) + self._size = int(math.ceil(num_samples / batch_size)) + self._batch_size = batch_size + self._has_partial_batch = (self._size != (num_samples // batch_size)) + + def get_dataset(self): + return self._dataset + + def get_size(self): + return self._size + + def batch_size(self): + return self._batch_size + + def has_partial_batch(self): + return self._has_partial_batch +# TODO(scottzhu): Eventually the numpy array and eager tensor should be treated +# in the same way. Merge this class with NumpyArrayDataAdapter. class TensorDataAdapter(DataAdapter): - """Adapter that handles Tensorflow tensors.""" + """Adapter that handles Tensorflow eager tensors.""" @staticmethod def can_handle(x, y=None): - return isinstance(x, ops.Tensor) + flat_inputs = nest.flatten(x) + if y is not None: + flat_inputs += nest.flatten(y) - def get_dataset(self, x, y=None, batch_size=None, shuffle=False, **kwargs): - inputs = x if y is None else (x, y) + return all(isinstance(v, ops.Tensor) for v in flat_inputs) - # Do we need batch_size for data tensor? - if not batch_size: - raise ValueError("batch size is required for tensor input data.") + def __init__(self, x, y=None, sample_weights=None, batch_size=None, + shuffle=False, **kwargs): + super(TensorDataAdapter, self).__init__(x, y, **kwargs) + x = _process_numpy_inputs(x) + y = _process_numpy_inputs(y) + sample_weights = _process_numpy_inputs(sample_weights) + if y is not None and sample_weights is not None: + inputs = (x, y, sample_weights) + elif y is not None: + # Sample weight is only needed for training, so if y is None, then + # sample_weight is ignored. + inputs = (x, y) + else: + inputs = (x,) + + # TODO(scottzhu): We should treat data tensor same as numpy array, make + # the batch_size a required param. + # if not batch_size: + # raise ValueError("batch size is required for tensor input data.") dataset = dataset_ops.DatasetV2.from_tensor_slices(inputs) + num_samples = int(nest.flatten(x)[0].shape[0]) if shuffle: - num_samples = int(nest.flatten(x)[0].shape[0]) dataset = dataset.shuffle(num_samples) - return dataset.batch(batch_size) + if batch_size: + dataset = dataset.batch(batch_size) + self._size = int(math.ceil(num_samples / batch_size)) + self._batch_size = batch_size + self._has_partial_batch = (self._size != (num_samples // batch_size)) + else: + self._size = 1 + self._batch_size = num_samples + self._has_partial_batch = False + self._dataset = dataset + + def get_dataset(self): + return self._dataset + + def get_size(self): + return self._size + + def batch_size(self): + return self._batch_size + + def has_partial_batch(self): + return self._has_partial_batch class DatasetAdapter(DataAdapter): @@ -165,12 +278,30 @@ class DatasetAdapter(DataAdapter): def can_handle(x, y=None): return isinstance(x, (dataset_ops.DatasetV1, dataset_ops.DatasetV2)) - def get_dataset(self, x, y=None, **kwargs): - # TODO(scottzhu): throw error when sample_weights, etc is provided. - if y is not None: - raise ValueError("target input is expected to be None when using " + def __init__(self, x, y=None, sample_weights=None, **kwargs): + super(DatasetAdapter, self).__init__(x, y, **kwargs) + if not is_none_or_empty(y): + raise ValueError("`y` argument is not supported when using " "dataset as input.") - return x + if not is_none_or_empty(sample_weights): + raise ValueError("`sample_weight` argument is not supported when using " + "dataset as input.") + # Note that the dataset instance is immutable, its fine to reusing the user + # provided dataset. + self._dataset = x + + def get_dataset(self): + return self._dataset + + def get_size(self): + # The size of dataset is unknown, unless its fully consumed. + return None + + def batch_size(self): + return None + + def has_partial_batch(self): + return False class GeneratorDataAdapter(DataAdapter): @@ -180,10 +311,13 @@ class GeneratorDataAdapter(DataAdapter): def can_handle(x, y=None): return tf_inspect.isgenerator(x) - def get_dataset(self, x, y=None, **kwargs): - # TODO(scottzhu): throw error when sample_weights, etc is provided. - if y is not None: - raise ValueError("target input is expected to be None when using " + def __init__(self, x, y=None, sample_weights=None, **kwargs): + super(GeneratorDataAdapter, self).__init__(x, y, **kwargs) + if not is_none_or_empty(y): + raise ValueError("`y` argument is not supported when using " + "python generator as input.") + if not is_none_or_empty(sample_weights): + raise ValueError("`sample_weight` argument is not supported when using " "python generator as input.") # Since we have to know the dtype of the python generator when we build the @@ -198,8 +332,21 @@ class GeneratorDataAdapter(DataAdapter): def reassemble(): return itertools.chain([peek], x) - return dataset_ops.DatasetV2.from_generator(reassemble, nested_dtypes, - output_shapes=nested_shape) + self._batch_size = int(nest.flatten(peek)[0].shape[0]) + self._dataset = dataset_ops.DatasetV2.from_generator( + reassemble, nested_dtypes, output_shapes=nested_shape) + + def get_dataset(self): + return self._dataset + + def get_size(self): + return None + + def batch_size(self): + return self._batch_size + + def has_partial_batch(self): + return False class KerasSequenceAdapter(DataAdapter): @@ -209,12 +356,14 @@ class KerasSequenceAdapter(DataAdapter): def can_handle(x, y=None): return isinstance(x, data_utils.Sequence) - def get_dataset(self, x, y=None, shuffle=False, **kwargs): - # TODO(scottzhu): throw error when sample_weights, etc is provided. - if y is not None: - raise ValueError("target input is expected to be None when using " + def __init__(self, x, y=None, sample_weights=None, shuffle=False, **kwargs): + super(KerasSequenceAdapter, self).__init__(x, y, **kwargs) + if not is_none_or_empty(y): + raise ValueError("`y` argument is not supported when using " + "`keras.utils.Sequence` as input.") + if not is_none_or_empty(sample_weights): + raise ValueError("`sample_weight` argument is not supported when using " "`keras.utils.Sequence` as input.") - peek = x[0] nested_dtypes = nest.map_structure(lambda t: t.dtype, peek) nested_shape = nest.map_structure(lambda t: t.shape, peek) @@ -226,4 +375,67 @@ class KerasSequenceAdapter(DataAdapter): output_shapes=nested_shape) if shuffle: dataset = dataset.shuffle(len(x)) - return dataset + self._dataset = dataset + self._size = len(x) + self._batch_size = int(nest.flatten(peek)[0].shape[0]) + + def get_dataset(self): + return self._dataset + + def get_size(self): + return self._size + + def batch_size(self): + return self._batch_size + + def has_partial_batch(self): + return False + + +ALL_ADAPTER_CLS = [NumpyArrayDataAdapter, TensorDataAdapter, DatasetAdapter, + GeneratorDataAdapter, KerasSequenceAdapter] + + +def select_data_adapter(x, y): + adapter_cls = [cls for cls in ALL_ADAPTER_CLS if cls.can_handle(x, y)] + if not adapter_cls: + raise ValueError("Failed to find data adapter that can handle " + "input: {}, {}".format(type(x), type(y))) + elif len(adapter_cls) > 1: + raise RuntimeError("Data adapter should be mutually exclusive for " + "handling inputs. Found multiple adapter {} to handle " + "input: {}, {}".format(adapter_cls, type(x), type(y))) + return adapter_cls[0] + + +def _process_numpy_inputs(inputs): + """Process numpy array inputs. + + For numpy inputs, it is possible to be single numpy array, or list/dict of + them. They could also be preprocessed by other lib to match with the order + of position for the model. The result here should be something that can be + used to build dataset. + + Args: + inputs: single or list/tuple/dict of numpy array. + Returns: + numpy arrays can be used to build dataset. + """ + if is_none_or_empty(inputs): + return None + flat_inputs = nest.flatten(inputs) + if len(flat_inputs) == 1: + return flat_inputs[0] + # For more complicated structure, we only convert the out most list to tuple + # since dataset will stack the list, but treat elements in the tuple as + # individual element. + return training_utils.list_to_tuple(inputs) + + +def is_none_or_empty(inputs): + # util method to check if the input is a None or a empty list. + # the python "not" check will raise an error like below if the input is a + # numpy array + # "The truth value of an array with more than one element is ambiguous. + # Use a.any() or a.all()" + return inputs is None or not nest.flatten(inputs) diff --git a/tensorflow/python/keras/engine/data_adapter_test.py b/tensorflow/python/keras/engine/data_adapter_test.py index 8e339fb7388..fd439edb724 100644 --- a/tensorflow/python/keras/engine/data_adapter_test.py +++ b/tensorflow/python/keras/engine/data_adapter_test.py @@ -41,7 +41,8 @@ class DataAdapterTestBase(test.TestCase): self.tensor_input = constant_op.constant(2.0, shape=(50, 10)) self.tensor_target = array_ops.ones((50,)) self.dataset_input = dataset_ops.DatasetV2.from_tensor_slices( - (self.numpy_input, self.numpy_target)).batch(self.batch_size).shuffle(1) + (self.numpy_input, self.numpy_target)).shuffle(50).batch( + self.batch_size) def generator(): yield (np.zeros((self.batch_size, 10)), np.ones(self.batch_size)) @@ -70,26 +71,43 @@ class NumpyDataAdapterTest(DataAdapterTestBase): def setUp(self): super(NumpyDataAdapterTest, self).setUp() - self.adapter = data_adapter.NumpyArrayDataAdapter() + self.adapter_cls = data_adapter.NumpyArrayDataAdapter def test_can_handle(self): - self.assertTrue(self.adapter.can_handle(self.numpy_input)) + self.assertTrue(self.adapter_cls.can_handle(self.numpy_input)) self.assertTrue( - self.adapter.can_handle(self.numpy_input, self.numpy_target)) + self.adapter_cls.can_handle(self.numpy_input, self.numpy_target)) - self.assertFalse(self.adapter.can_handle(self.tensor_input)) - self.assertFalse(self.adapter.can_handle(self.dataset_input)) - self.assertFalse(self.adapter.can_handle(self.generator_input)) - self.assertFalse(self.adapter.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) def test_iterator_expect_batch_size(self): with self.assertRaisesRegexp(ValueError, 'batch size is required'): - self.adapter.get_dataset(self.numpy_input, self.numpy_target) + self.adapter_cls(self.numpy_input, self.numpy_target) + + def test_size(self): + adapter = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=5) + self.assertEqual(adapter.get_size(), 10) + self.assertFalse(adapter.has_partial_batch()) + + def test_batch_size(self): + adapter = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=5) + self.assertEqual(adapter.batch_size(), 5) + + def test_partial_batch(self): + adapter = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=4) + self.assertEqual(adapter.get_size(), 13) # 50/4 + self.assertTrue(adapter.has_partial_batch()) def test_training(self): - dataset = self.adapter.get_dataset( - self.numpy_input, self.numpy_target, batch_size=5) - self.model.compile(loss='mse', optimizer='sgd') + dataset = self.adapter_cls( + self.numpy_input, self.numpy_target, batch_size=5).get_dataset() + self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd') self.model.fit(dataset) @@ -97,84 +115,134 @@ class TensorDataAdapterTest(DataAdapterTestBase): def setUp(self): super(TensorDataAdapterTest, self).setUp() - self.adapter = data_adapter.TensorDataAdapter() + self.adapter_cls = data_adapter.TensorDataAdapter def test_can_handle(self): - self.assertTrue(self.adapter.can_handle(self.tensor_input)) + self.assertTrue(self.adapter_cls.can_handle(self.tensor_input)) self.assertTrue( - self.adapter.can_handle(self.tensor_input, self.tensor_target)) + self.adapter_cls.can_handle(self.tensor_input, self.tensor_target)) - self.assertFalse(self.adapter.can_handle(self.numpy_input)) - self.assertFalse(self.adapter.can_handle(self.dataset_input)) - self.assertFalse(self.adapter.can_handle(self.generator_input)) - self.assertFalse(self.adapter.can_handle(self.sequence_input)) - - def test_iterator_expect_batch_size(self): - with self.assertRaisesRegexp(ValueError, 'batch size is required'): - self.adapter.get_dataset(self.tensor_input, self.tensor_target) + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) def test_training(self): - dataset = self.adapter.get_dataset( - self.tensor_input, self.tensor_target, batch_size=5) - self.model.compile(loss='mse', optimizer='sgd') + dataset = self.adapter_cls( + self.tensor_input, self.tensor_target, batch_size=5).get_dataset() + self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd') self.model.fit(dataset) + def test_size(self): + adapter = self.adapter_cls( + self.tensor_input, self.tensor_target, batch_size=5) + self.assertEqual(adapter.get_size(), 10) + self.assertFalse(adapter.has_partial_batch()) + + def test_batch_size(self): + adapter = self.adapter_cls( + self.tensor_input, self.tensor_target, batch_size=5) + self.assertEqual(adapter.batch_size(), 5) + + def test_partial_batch(self): + adapter = self.adapter_cls( + self.tensor_input, self.tensor_target, batch_size=4) + self.assertEqual(adapter.get_size(), 13) # 50/4 + self.assertTrue(adapter.has_partial_batch()) + class DatasetAdapterTest(DataAdapterTestBase): def setUp(self): super(DatasetAdapterTest, self).setUp() - self.adapter = data_adapter.DatasetAdapter() + self.adapter_cls = data_adapter.DatasetAdapter def test_can_handle(self): - self.assertFalse(self.adapter.can_handle(self.numpy_input)) - self.assertFalse(self.adapter.can_handle(self.tensor_input)) - self.assertTrue(self.adapter.can_handle(self.dataset_input)) - self.assertFalse(self.adapter.can_handle(self.generator_input)) - self.assertFalse(self.adapter.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertTrue(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) def test_training(self): - dataset = self.adapter.get_dataset(self.dataset_input) - self.model.compile(loss='mse', optimizer='sgd') + dataset = self.adapter_cls(self.dataset_input).get_dataset() + self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd') self.model.fit(dataset) + def test_size(self): + adapter = self.adapter_cls(self.dataset_input) + self.assertIsNone(adapter.get_size()) + + def test_batch_size(self): + adapter = self.adapter_cls(self.dataset_input) + self.assertIsNone(adapter.batch_size()) + + def test_partial_batch(self): + adapter = self.adapter_cls(self.dataset_input) + self.assertFalse(adapter.has_partial_batch()) + class GeneratorDataAdapterTest(DataAdapterTestBase): def setUp(self): super(GeneratorDataAdapterTest, self).setUp() - self.adapter = data_adapter.GeneratorDataAdapter() + self.adapter_cls = data_adapter.GeneratorDataAdapter def test_can_handle(self): - self.assertFalse(self.adapter.can_handle(self.numpy_input)) - self.assertFalse(self.adapter.can_handle(self.tensor_input)) - self.assertFalse(self.adapter.can_handle(self.dataset_input)) - self.assertTrue(self.adapter.can_handle(self.generator_input)) - self.assertFalse(self.adapter.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertTrue(self.adapter_cls.can_handle(self.generator_input)) + self.assertFalse(self.adapter_cls.can_handle(self.sequence_input)) def test_training(self): - dataset = self.adapter.get_dataset(self.generator_input) - self.model.compile(loss='mse', optimizer='sgd') + dataset = self.adapter_cls(self.generator_input).get_dataset() + self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd') self.model.fit(dataset) + def test_size(self): + adapter = self.adapter_cls(self.generator_input) + self.assertIsNone(adapter.get_size()) + + def test_batch_size(self): + adapter = self.adapter_cls(self.generator_input) + self.assertEqual(adapter.batch_size(), 5) + + def test_partial_batch(self): + adapter = self.adapter_cls(self.generator_input) + self.assertFalse(adapter.has_partial_batch()) + class KerasSequenceAdapterTest(DataAdapterTestBase): def setUp(self): super(KerasSequenceAdapterTest, self).setUp() - self.adapter = data_adapter.KerasSequenceAdapter() + self.adapter_cls = data_adapter.KerasSequenceAdapter def test_can_handle(self): - self.assertFalse(self.adapter.can_handle(self.numpy_input)) - self.assertFalse(self.adapter.can_handle(self.tensor_input)) - self.assertFalse(self.adapter.can_handle(self.dataset_input)) - self.assertFalse(self.adapter.can_handle(self.generator_input)) - self.assertTrue(self.adapter.can_handle(self.sequence_input)) + self.assertFalse(self.adapter_cls.can_handle(self.numpy_input)) + self.assertFalse(self.adapter_cls.can_handle(self.tensor_input)) + self.assertFalse(self.adapter_cls.can_handle(self.dataset_input)) + self.assertFalse(self.adapter_cls.can_handle(self.generator_input)) + self.assertTrue(self.adapter_cls.can_handle(self.sequence_input)) def test_training(self): - dataset = self.adapter.get_dataset(self.sequence_input) - self.model.compile(loss='mse', optimizer='sgd') + dataset = self.adapter_cls(self.sequence_input).get_dataset() + self.model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd') self.model.fit(dataset) + def test_size(self): + adapter = self.adapter_cls(self.sequence_input) + self.assertEqual(adapter.get_size(), 10) + + def test_batch_size(self): + adapter = self.adapter_cls(self.sequence_input) + self.assertEqual(adapter.batch_size(), 5) + + def test_partial_batch(self): + adapter = self.adapter_cls(self.sequence_input) + self.assertFalse(adapter.has_partial_batch()) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index bc64d5af38f..dbcc82e3be5 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -61,6 +61,7 @@ from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils from tensorflow.python.util import nest from tensorflow.python.util import serialization +from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import keras_export try: @@ -1414,14 +1415,19 @@ class Model(network.Network): def _update_sample_weight_modes(self, sample_weights=None): """Updates sample weight modes based on training/eval inputs. + Sample weight placeholders will be created for all or no outputs + based on whether sample_weight is provided for any output. + If model contains `_sample_weight_modes` we check if the input `sample_weights` corresponds to the sample weight modes. - 1. If sample weight mode for output i is 'temporal', we do not - change it as the `temporal` mode has been set by the user. - 2. Set sample weight mode to be 'samplewise' for output i if sample - weight mode was not set before and sample weight inputs are given. + 1. Set sample weight mode to be 'temporal' for output i, if `compile` + sample_weight_mode was set to `temporal` and sample weight inputs + are given for one or more outputs. + 2. Set sample weight mode to be 'samplewise' for output i, if `compile` + sample_weight_mode was not set and sample weight inputs are given for + one or more outputs. 3. Reset sample weight mode to None for output i if sample weight mode - was set to 'samplewise' but there is no sample weight input. + was set but there is no sample weight input. Args: sample_weights: List of sample weights of the same length as model outputs @@ -1429,21 +1435,11 @@ class Model(network.Network): """ if not self._is_compiled: return - if not sample_weights: - sample_weights = [None] * len(self._training_endpoints) - for endpoint, sample_weight in zip(self._training_endpoints, - sample_weights): - if endpoint.sample_weight_mode == 'temporal': - # If sample weight mode for endpoint is 'temporal', do nothing. - continue - if endpoint.sample_weight_mode is None and sample_weight is not None: - # Set sample weight mode to be 'samplewise' for output i if sample - # weight mode was not set before and sample weight inputs are given. - endpoint.sample_weight_mode = 'samplewise' - elif (endpoint.sample_weight_mode == 'samplewise' and - sample_weight is None): - # Reset sample weight mode to None for output i if sample weight mode - # was set to 'samplewise' but there is no sample weight input. + if sample_weights and any([s is not None for s in sample_weights]): + for endpoint in self._training_endpoints: + endpoint.sample_weight_mode = self.sample_weight_mode or 'samplewise' + else: + for endpoint in self._training_endpoints: endpoint.sample_weight_mode = None def _recompile_weights_loss_and_weighted_metrics(self): @@ -1703,7 +1699,13 @@ class Model(network.Network): if steps is None: batch_size = static_batch_size - if batch_size is None and steps is None: + if (batch_size is None + and steps is None + and not isinstance(x, (dataset_ops.DatasetV2, + iterator_ops.Iterator, + iterator_ops.IteratorV2, + data_utils.Sequence)) + and not tf_inspect.isgenerator(x)): # Backwards compatibility batch_size = 32 return batch_size @@ -2104,12 +2106,11 @@ class Model(network.Network): first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): - x = distributed_training_utils.list_to_tuple(x) + x = training_utils.list_to_tuple(x) if y is not None: - y = distributed_training_utils.list_to_tuple(y) + y = training_utils.list_to_tuple(y) if sample_weight is not None: - sample_weight = distributed_training_utils.list_to_tuple( - sample_weight) + sample_weight = training_utils.list_to_tuple(sample_weight) in_tuple = (x, y, sample_weight) else: in_tuple = (x, y) diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py index 5d01a4ab5e7..18851d5ee0c 100644 --- a/tensorflow/python/keras/engine/training_distributed.py +++ b/tensorflow/python/keras/engine/training_distributed.py @@ -374,7 +374,7 @@ def experimental_tpu_test_loop(model, if steps is not None: target_steps = steps else: - raise ValueError('Number of steps could not be infered from the data, ' + raise ValueError('Number of steps could not be inferred from the data, ' 'please pass the steps argument.') current_step = 0 @@ -519,7 +519,7 @@ def experimental_tpu_predict_loop(model, if steps is not None: target_steps = steps else: - raise ValueError('Number of steps could not be infered from the data, ' + raise ValueError('Number of steps could not be inferred from the data, ' 'please pass the steps argument.') current_step = 0 @@ -647,7 +647,7 @@ class DistributionSingleWorkerTrainingLoop(training_utils.TrainingLoop): steps_per_epoch = training_utils.infer_steps_for_dataset( dataset, steps_per_epoch, epochs, steps_name='steps_per_epoch') if steps_per_epoch is None: - raise ValueError('Number of steps could not be infered from the data, ' + raise ValueError('Number of steps could not be inferred from the data, ' 'please pass the steps_per_epoch argument.') if not context.executing_eagerly(): @@ -704,7 +704,7 @@ class DistributionSingleWorkerTrainingLoop(training_utils.TrainingLoop): steps = training_utils.infer_steps_for_dataset( dataset, steps, steps_name='steps') if steps is None: - raise ValueError('Number of steps could not be infered from the data, ' + raise ValueError('Number of steps could not be inferred from the data, ' 'please pass the steps argument.') if not context.executing_eagerly(): @@ -741,7 +741,7 @@ class DistributionSingleWorkerTrainingLoop(training_utils.TrainingLoop): steps = training_utils.infer_steps_for_dataset( dataset, steps, steps_name='steps') if steps is None: - raise ValueError('Number of steps could not be infered from the data, ' + raise ValueError('Number of steps could not be inferred from the data, ' 'please pass the steps argument.') if not context.executing_eagerly(): return experimental_tpu_predict_loop( diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 8f5c830d6e9..400530e1567 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -1723,6 +1723,13 @@ def convert_eager_tensors_to_numpy(structure): return nest.map_structure(_convert, structure) +def list_to_tuple(maybe_list): + """Datasets will stack the list of tensor, so switch them to tuples.""" + if isinstance(maybe_list, list): + return tuple(maybe_list) + return maybe_list + + def should_run_validation(validation_freq, epoch): """Checks if validation should be run this epoch. diff --git a/tensorflow/python/keras/engine/training_v2.py b/tensorflow/python/keras/engine/training_v2.py new file mode 100644 index 00000000000..9c494bfd92d --- /dev/null +++ b/tensorflow/python/keras/engine/training_v2.py @@ -0,0 +1,606 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Training related logic for Keras model in TF 2.0 context. + +Note that all the code under this module is under active development, please DO +NOT use it unless you are really sure what you are doing. +""" + +# pylint: disable=protected-access +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +import numpy as np + + +from tensorflow.python.distribute import distribution_strategy_context +from tensorflow.python.keras import backend +from tensorflow.python.keras import callbacks as cbks +from tensorflow.python.keras.distribute import distributed_training_utils as dist_utils +from tensorflow.python.keras.engine import data_adapter +from tensorflow.python.keras.engine import training_utils +from tensorflow.python.keras.utils.mode_keys import ModeKeys +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import nest +from tensorflow.python.util import tf_contextlib + + +# The list of DataAdapter that support validation_split, only numpy and data +# tensor support validation_split for now. +_ADAPTER_FOR_VALIDATION_SPLIT = [data_adapter.NumpyArrayDataAdapter, + data_adapter.TensorDataAdapter] + +# The list of DataAdapter that support model._standardize_user_data. Currently +# keras.sequence/python generator will cause error when calling +# model._standardize_user_data, this should be updated in future cl, eg, the +# dataset/generate/sequence input will be peeked and processed by +# model._standardize_user_data() +_ADAPTER_FOR_STANDARDIZE_USER_DATA = [data_adapter.NumpyArrayDataAdapter, + data_adapter.TensorDataAdapter, + data_adapter.DatasetAdapter] + + +def run_one_epoch(model, + iterator, + execution_function, + dataset_size=None, + strategy=None, + steps_per_epoch=None, + mode=ModeKeys.TRAIN, + training_context=None, + current_epoch=1): + """Run the execution function with the data from iterator. + + Given the dataset iterator and execution function, get the data from iterator + and call it with the execution function to get the result (metric/loss). + It will run for steps_per_epoch or until to the iterator is fully consumed. + + Args: + model: The keras model to run. + iterator: the dataset iterator to fetch the data. + execution_function: a tf.function that can be called with data. + dataset_size: the size of iterator, None when unknown. + strategy: the distribution strategy instance from the model. + steps_per_epoch: the number of steps to run for the epoch. + mode: the mode for the current epoch. + training_context: the context that contains callbacks and progress bar. + current_epoch: the epoch number. Used when throw error when the + the iterator is unexpected reach its end. + Returns: + The loss and metric value from the model. + """ + if mode == ModeKeys.PREDICT: + aggregator = training_utils.OutputsAggregator( + use_steps=True, num_samples_or_steps=steps_per_epoch) + else: + aggregator = training_utils.MetricsAggregator( + use_steps=True, num_samples_or_steps=steps_per_epoch) + callbacks = training_context.callbacks + progbar = training_context.progbar + + if callbacks.model.stop_training: + return + + target_steps = steps_per_epoch or np.inf + step = 0 + + while step < target_steps: + with training_context.on_batch(step, mode=mode) as batch_logs: + try: + batch_ins = create_batch_inputs(iterator, mode, model, strategy) + batch_outs = execution_function(batch_ins) + except StopIteration: + # The only acceptable case here is that the input has a unknown + # length, and configured to fully consume it. + if (dataset_size is None + and steps_per_epoch is None + and step > 0): + # The input passed by the user ran out of batches. + # Now we know the cardinality of the input(dataset or generator). + steps_per_epoch = step + aggregator.num_samples_or_steps = steps_per_epoch + progbar.params['steps'] = steps_per_epoch + progbar.progbar.target = steps_per_epoch + else: + callbacks.model.stop_training = True + logging.warning( + 'Your input ran out of data; interrupting training. ' + 'Make sure that your dataset or generator can generate at ' + 'least {} batches. You may need to use the repeat() function ' + 'when building your dataset.'.format( + current_epoch * steps_per_epoch)) + # In either case, break out the loop for training batch. + break + + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if strategy: + batch_outs = dist_utils._per_replica_aggregate_batch( + batch_outs, model, mode) + + if step == 0: + aggregator.create(batch_outs) + aggregator.aggregate(batch_outs) + cbks.make_logs(model, batch_logs, batch_outs, mode) + step += 1 + + if callbacks.model.stop_training: + break + + # End of an epoch. + aggregator.finalize() + results = aggregator.results + return results + + +def create_batch_inputs(iterator, mode, model, strategy): + """Create the input data from the iterator based on the model and strategy.""" + if strategy: + # Note that the batch_ins is a function to avoid the tf.function + # retrace. + def distribute_batch_ins(): + return dist_utils._prepare_feed_values(model, iterator, None, None, mode) + batch_ins = distribute_batch_ins + else: + batch_ins = next(iterator) + if (mode == ModeKeys.TRAIN + and not model.run_eagerly + and not isinstance(backend.symbolic_learning_phase(), int)): + # Add learning phase value. + if not isinstance(batch_ins, collections.Sequence): + batch_ins = (batch_ins, True) + else: + batch_ins += (True,) + return batch_ins + + +class Loop(training_utils.TrainingLoop): + """The training loop for the TF 2.0. + + This class has some existing assumption for runtime, eg eager by default, + have distribution strategy, etc. + """ + + def fit( + self, model, x=None, y=None, batch_size=None, epochs=1, verbose=1, + callbacks=None, validation_split=0., validation_data=None, shuffle=True, + class_weight=None, sample_weight=None, initial_epoch=0, + steps_per_epoch=None, validation_steps=None, validation_freq=1, **kwargs): + batch_size = model._validate_or_infer_batch_size( + batch_size, steps_per_epoch, x) + + strategy = _get_distribution_strategy(model) + if strategy: + batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size( + strategy, x, batch_size, steps_per_epoch, ModeKeys.TRAIN) + dist_utils.validate_callbacks(input_callbacks=callbacks, + optimizer=model.optimizer) + # Enter tf.distribute.Strategy scope. + scope = dist_utils.distributed_scope( + strategy=strategy, learning_phase=1) + scope.__enter__() + + training_data_adapter, validation_adapter = _process_training_inputs( + model, + x, + y, + batch_size=batch_size, + sample_weights=sample_weight, + class_weights=class_weight, + validation_split=validation_split, + steps_per_epoch=steps_per_epoch, + shuffle=shuffle, + validation_data=validation_data, + validation_steps=validation_steps, + distribution_strategy=strategy) + + do_validation = (validation_adapter is not None) + + if not steps_per_epoch: + steps_per_epoch = training_data_adapter.get_size() + + # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch)) + training_context = TrainingContext() + + initial_epoch = model._maybe_load_initial_epoch_from_ckpt( + initial_epoch, ModeKeys.TRAIN) + + _update_sample_weight_mode(model, ModeKeys.TRAIN, training_data_adapter) + training_function = _make_execution_function(model, ModeKeys.TRAIN) + + training_data_iter = None + # Only recreate iterator when the data has a fixed length, which will be + # fully consumed every epoch, or has a unknown length (dataset, generator) + # and will be fully consumed (steps_per_epoch is None) + recreate_training_iterator = (training_data_adapter.get_size() is not None + or steps_per_epoch is None) + + if do_validation: + if not validation_steps: + validation_steps = validation_adapter.get_size() + eval_function = _make_execution_function(model, ModeKeys.TEST) + eval_data_iter = None + recreate_eval_iterator = (validation_adapter.get_size() is not None + or validation_steps is None) + + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=do_validation, + batch_size=batch_size, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + samples=None, + verbose=0, # Handle ProgBarLogger separately in this loop. + mode=ModeKeys.TRAIN) + + with training_context.on_start(model, callbacks, verbose, ModeKeys.TRAIN): + # TODO(scottzhu): Handle TPUStrategy training loop + for epoch in range(initial_epoch, epochs): + if training_context.callbacks.model.stop_training: + break + + # Training + with training_context.on_epoch(epoch, ModeKeys.TRAIN) as epoch_logs: + model.reset_metrics() + if training_data_iter is None or recreate_training_iterator: + training_data_iter = _create_dataset_iterator( + strategy, training_data_adapter.get_dataset()) + + training_result = run_one_epoch( + model, + training_data_iter, + training_function, + dataset_size=training_data_adapter.get_size(), + strategy=strategy, + steps_per_epoch=steps_per_epoch, + mode=ModeKeys.TRAIN, + training_context=training_context, + current_epoch=epoch) + cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN) + + # Evaluation + if (do_validation and + training_utils.should_run_validation(validation_freq, epoch) and + not callbacks.model.stop_training): + if eval_data_iter is None or recreate_eval_iterator: + eval_data_iter = _create_dataset_iterator( + strategy, validation_adapter.get_dataset()) + eval_context = TrainingContext() + with eval_context.on_start( + model, callbacks, verbose=0, mode=ModeKeys.TEST): + with eval_context.on_epoch(epoch, ModeKeys.TEST): + model.reset_metrics() + eval_result = run_one_epoch( + model, + eval_data_iter, + eval_function, + dataset_size=validation_adapter.get_size(), + strategy=strategy, + steps_per_epoch=validation_steps, + mode=ModeKeys.TEST, + training_context=eval_context, + current_epoch=epochs) + cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TRAIN, + prefix='val_') + + if strategy: + scope.__exit__(None, None, None) + + return model.history + + def _model_iteration( + self, model, mode, x=None, y=None, batch_size=None, verbose=1, + sample_weight=None, steps=None, callbacks=None, **kwargs): + + batch_size = model._validate_or_infer_batch_size( + batch_size, steps, x) + strategy = _get_distribution_strategy(model) + if strategy: + batch_size, steps = dist_utils.process_batch_and_step_size( + strategy, x, batch_size, steps, mode) + dist_utils.validate_callbacks(input_callbacks=callbacks, + optimizer=model.optimizer) + # Enter tf.distribute.Strategy scope. + scope = dist_utils.distributed_scope( + strategy=strategy, learning_phase=0) + scope.__enter__() + + adapter = _process_inputs( + model, + x, + y, + batch_size=batch_size, + sample_weights=sample_weight, + steps=steps, + distribution_strategy=strategy) + + if not steps: + steps = adapter.get_size() + + # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch)) + training_context = TrainingContext() + + _update_sample_weight_mode(model, mode, adapter) + execution_function = _make_execution_function(model, mode) + data_iterator = _create_dataset_iterator( + strategy, adapter.get_dataset()) + + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=False, + batch_size=batch_size, + epochs=1, + steps_per_epoch=steps, + samples=None, + verbose=0, # Handle ProgBarLogger separately in this loop. + mode=mode) + + with training_context.on_start(model, callbacks, verbose, mode): + # TODO(scottzhu): Handle TPUStrategy training loop + with training_context.on_epoch(0, mode) as epoch_logs: + model.reset_metrics() + result = run_one_epoch( + model, + data_iterator, + execution_function, + dataset_size=adapter.get_size(), + strategy=strategy, + steps_per_epoch=steps, + mode=mode, + training_context=training_context, + current_epoch=1) + cbks.make_logs(model, epoch_logs, result, mode) + + if strategy: + scope.__exit__(None, None, None) + + if len(result) == 1: + result = result[0] + return result + + def evaluate( + self, model, x=None, y=None, batch_size=None, verbose=1, + sample_weight=None, steps=None, callbacks=None, **kwargs): + return self._model_iteration( + model, ModeKeys.TEST, x=x, y=y, batch_size=batch_size, verbose=verbose, + sample_weight=sample_weight, steps=steps, callbacks=callbacks, **kwargs) + + def predict(self, model, x, batch_size=None, verbose=0, steps=None, + callbacks=None, **kwargs): + return self._model_iteration( + model, ModeKeys.PREDICT, x=x, batch_size=batch_size, verbose=verbose, + steps=steps, callbacks=callbacks, **kwargs) + + +def _get_distribution_strategy(model): + if model._distribution_strategy: + return model._distribution_strategy + # TODO(scottzhu): might want to just get the default strategy in future. + elif distribution_strategy_context.has_strategy(): + return distribution_strategy_context.get_strategy() + else: + return None + + +def _create_dataset_iterator(strategy, training_dataset): + if strategy: + training_data_iter = strategy.make_dataset_iterator(training_dataset) + else: + training_data_iter = iter(training_dataset) + return training_data_iter + + +def _process_training_inputs(model, x, y, batch_size=None, + sample_weights=None, class_weights=None, + steps_per_epoch=None, validation_split=0., + validation_data=None, validation_steps=None, + shuffle=True, distribution_strategy=None): + """Process the data input for fit() with respect to validation_split.""" + if validation_split and 0. < validation_split < 1. and validation_data: + raise ValueError('validation_data and validation_split cannot be used ' + 'at same time.') + + adapter_cls = data_adapter.select_data_adapter(x, y) + + # Handle validation_split, we want to split the data and get the training + # section before we give it to data adapter. + if validation_split and 0. < validation_split < 1.: + if adapter_cls not in _ADAPTER_FOR_VALIDATION_SPLIT: + raise ValueError( + '`validation_split` argument is not supported when ' + 'data adapter is {}. Received: x={}, validation_split={}'.format( + adapter_cls, x, validation_split)) + # Retrieve the training section from x and y, and then construct dataset + # from it. + x, y, sample_weights = model._standardize_user_data( + x, y, sample_weight=sample_weights, + class_weight=class_weights, + batch_size=batch_size, + check_steps=True, + steps=steps_per_epoch) + (x, y, sample_weights, + val_x, val_y, + val_sample_weights) = training_utils.split_training_and_validation_data( + x, y, sample_weights, validation_split) + train_adapter = adapter_cls(x, y, batch_size=batch_size, + sample_weights=sample_weights, shuffle=shuffle, + distribution_strategy=distribution_strategy) + val_adapter = adapter_cls(val_x, val_y, + sample_weights=val_sample_weights, + batch_size=batch_size, + distribution_strategy=distribution_strategy) + else: + train_adapter = _process_inputs(model, x, y, sample_weights=sample_weights, + batch_size=batch_size, + class_weights=class_weights, + shuffle=shuffle, steps=steps_per_epoch, + distribution_strategy=distribution_strategy) + val_adapter = None + if validation_data: + (val_x, val_y, + val_sample_weights) = training_utils.unpack_validation_data( + validation_data) + # For eval data, we use the training data batch_size it was unknown. + # This is useful for generator/sequence training data input with numpy + # validation data input. + if not batch_size: + batch_size = train_adapter.batch_size() + val_adapter = _process_inputs(model, val_x, val_y, + sample_weights=val_sample_weights, + batch_size=batch_size, + class_weights=class_weights, + steps=validation_steps, + distribution_strategy=distribution_strategy) + elif validation_steps: + raise ValueError('`validation_steps` should not be specified if ' + '`validation_data` is None.') + return train_adapter, val_adapter + + +def _process_inputs(model, x, y, batch_size=None, sample_weights=None, + class_weights=None, shuffle=False, steps=None, + distribution_strategy=None): + """Process the inputs for fit/eval/predict().""" + adapter_cls = data_adapter.select_data_adapter(x, y) + if adapter_cls in _ADAPTER_FOR_STANDARDIZE_USER_DATA: + x, y, sample_weights = model._standardize_user_data( + x, + y, + sample_weight=sample_weights, + class_weight=class_weights, + batch_size=batch_size, + check_steps=True, + steps=steps) + # TODO(scottzhu): The generator and keras.sequence does not work with + # model._standardize_user_data() so far. However that method is very + # important which contains on-fly model build/tensor align for dict input, + # etc. We should still call the _standardize_user_data with the peeked data + # from generator or sequence, and let model compile. + return adapter_cls(x, y, batch_size=batch_size, + sample_weights=sample_weights, shuffle=shuffle, + distribution_strategy=distribution_strategy) + + +def _make_execution_function(model, mode): + """Makes function to run one step of model execution.""" + if model._distribution_strategy: + return dist_utils._make_execution_function(model, mode) + else: + return model._make_execution_function(mode) + + +def _update_sample_weight_mode(model, mode, adapter): + """Updates the sample_weight_mode of a given model.""" + # Add a quick return to prevent us from calling model._feed_targets that + # accesses certain model properties that may not be set in the `PREDICT` mode. + if mode == ModeKeys.PREDICT: + return + + sample_weights = None + + # Get some sample inputs from the data_adapter + iterator = _create_dataset_iterator(model._distribution_strategy, + adapter.get_dataset()) + inputs = create_batch_inputs(iterator, mode, model, + model._distribution_strategy) + # `inputs` is the model's inputs + targets + sample_weights + + # learning phase placeholder if specified. To update the sample_weight_mode + # we need to determine if the user has passed sample weights as part of the + # input. + if not callable(inputs): + # if not isinstance(inputs, collections.Sequence): + # inputs = (inputs,) + # Note that the batch inputs should be a tuple of 2, 3 or 4 items. + # (input, target, {sample_weights}, {learning_phase}) + sample_weights_index = 0 + if model._feed_inputs: + sample_weights_index += 1 + if model._feed_targets: + sample_weights_index += 1 + + sample_weights = inputs[sample_weights_index:] + has_learning_phase_pl = (mode == ModeKeys.TRAIN and + not isinstance(backend.symbolic_learning_phase(), + int)) + if has_learning_phase_pl: + sample_weights = sample_weights[:-1] + model._update_sample_weight_modes(nest.flatten(sample_weights)) + + # Call the DistributionStrategy specific function to update the + # sample_weight_mode on the model. + if model._distribution_strategy: + dist_utils._update_sample_weight_modes(model, mode, sample_weights) + + # Force delete the iterator. + del iterator + + +class TrainingContext(object): + """Utility object that wrap around callbacks and progress bars.""" + + @tf_contextlib.contextmanager + def on_start(self, model, callbacks=None, verbose=0, mode=ModeKeys.TRAIN): + """Provide a scope for the whole training process.""" + # TODO(omalleyt): Handle ProgBar as part of Callbacks once hooks are ready. + progbar = training_utils.get_progbar(model, 'steps') + progbar.params = callbacks.params + progbar.params['verbose'] = verbose + callbacks.model.stop_training = False + callbacks._call_begin_hook(mode) + progbar.on_train_begin() + + # Cache those two instance so that it can be used in other functions. + self.callbacks = callbacks + self.progbar = progbar + + try: + yield + finally: + # End of all epochs + self.callbacks._call_end_hook(mode) + + @tf_contextlib.contextmanager + def on_epoch(self, epoch=0, mode=ModeKeys.TRAIN): + """Provide a scope for running one epoch.""" + epoch_logs = {} + if mode == ModeKeys.TRAIN: + self.callbacks.on_epoch_begin(epoch, epoch_logs) + self.progbar.on_epoch_begin(epoch, epoch_logs) + try: + yield epoch_logs + finally: + if mode == ModeKeys.TRAIN: + # Epochs only apply to `fit`. + self.callbacks.on_epoch_end(epoch, epoch_logs) + self.progbar.on_epoch_end(epoch, epoch_logs) + + @tf_contextlib.contextmanager + def on_batch(self, step=0, mode=ModeKeys.TRAIN): + """Provide a scope for running one batch.""" + batch_logs = {'batch': step, 'size': 1} + self.callbacks._call_batch_hook( + mode, 'begin', step, batch_logs) + self.progbar.on_batch_begin(step, batch_logs) + try: + yield batch_logs + finally: + self.callbacks._call_batch_hook( + mode, 'end', step, batch_logs) + self.progbar.on_batch_end(step, batch_logs) diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index 2175a96d865..dcece7fc754 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -176,6 +176,7 @@ class Conv(Layer): self.bias = None self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim}) + self._convolution_op = None self.built = True def call(self, inputs): @@ -185,15 +186,16 @@ class Conv(Layer): op_padding = self.padding if not isinstance(op_padding, (list, tuple)): op_padding = op_padding.upper() - conv_op = nn_ops.Convolution( - inputs.shape, - filter_shape=self.kernel.shape, - dilation_rate=self.dilation_rate, - strides=self.strides, - padding=op_padding, - data_format=conv_utils.convert_data_format(self.data_format, - self.rank + 2)) - outputs = conv_op(inputs, self.kernel) + if self._convolution_op is None: + self._convolution_op = nn_ops.Convolution( + inputs.shape, + filter_shape=self.kernel.shape, + dilation_rate=self.dilation_rate, + strides=self.strides, + padding=op_padding, + data_format=conv_utils.convert_data_format(self.data_format, + self.rank + 2)) + outputs = self._convolution_op(inputs, self.kernel) if self.use_bias: if self.data_format == 'channels_first': diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index e28a8e52f15..1c2d8cafbef 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -404,7 +404,7 @@ class Reshape(Layer): # also supports shape inference using `-1` as dimension model.add(Reshape((-1, 2, 2))) - # now: model.output_shape == (None, 3, 2, 2) + # now: model.output_shape == (None, None, 2, 2) ``` """ diff --git a/tensorflow/python/keras/layers/pooling_test.py b/tensorflow/python/keras/layers/pooling_test.py index 67df4d7a256..b3bf8757371 100644 --- a/tensorflow/python/keras/layers/pooling_test.py +++ b/tensorflow/python/keras/layers/pooling_test.py @@ -144,6 +144,8 @@ class Pooling3DTest(test.TestCase): @tf_test_util.run_in_graph_and_eager_modes def test_maxpooling_3d(self): + if test.is_built_with_rocm(): + self.skipTest('Pooling with 3D tensors is not supported in ROCm') pool_size = (3, 3, 3) testing_utils.layer_test( keras.layers.MaxPooling3D, @@ -163,6 +165,8 @@ class Pooling3DTest(test.TestCase): @tf_test_util.run_in_graph_and_eager_modes def test_averagepooling_3d(self): + if test.is_built_with_rocm(): + self.skipTest('Pooling with 3D tensors is not supported in ROCm') pool_size = (3, 3, 3) testing_utils.layer_test( keras.layers.AveragePooling3D, diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py index 55233120a09..f3ebf8a83c8 100644 --- a/tensorflow/python/keras/layers/recurrent_test.py +++ b/tensorflow/python/keras/layers/recurrent_test.py @@ -46,9 +46,9 @@ from tensorflow.python.ops import special_math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test -from tensorflow.python.training.tracking import object_identity from tensorflow.python.training.tracking import util as trackable_util from tensorflow.python.util import nest +from tensorflow.python.util import object_identity # Used for nested input/output/state RNN test. NestedInput = collections.namedtuple('NestedInput', ['t1', 't2']) diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py index 1c249aa87a0..553c56609d9 100644 --- a/tensorflow/python/keras/layers/wrappers.py +++ b/tensorflow/python/keras/layers/wrappers.py @@ -23,7 +23,6 @@ import copy from tensorflow.python.framework import tensor_shape from tensorflow.python.keras import backend as K -from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.engine.input_spec import InputSpec from tensorflow.python.keras.layers.recurrent import _standardize_args @@ -225,7 +224,7 @@ class TimeDistributed(Wrapper): if input_shape[0] and not self._always_use_reshape: # batch size matters, use rnn-based implementation def step(x, _): - output = self.layer.call(x, **kwargs) + output = self.layer(x, **kwargs) return output, [] _, outputs, _ = K.rnn( @@ -252,20 +251,13 @@ class TimeDistributed(Wrapper): if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) kwargs['mask'] = K.reshape(mask, inner_mask_shape) - y = self.layer.call(inputs, **kwargs) + y = self.layer(inputs, **kwargs) # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() output_shape = self._get_shape_tuple( (-1, input_length), y, 1, output_shape[2:]) y = array_ops.reshape(y, output_shape) - # Apply activity regularizer if any: - if (hasattr(self.layer, 'activity_regularizer') and - self.layer.activity_regularizer is not None): - regularization_loss = self.layer.activity_regularizer(y) - base_layer_utils.check_graph_consistency( - regularization_loss, method='activity_regularizer') - self.add_loss(regularization_loss, inputs) return y def compute_mask(self, inputs, mask=None): @@ -646,13 +638,13 @@ class Bidirectional(Wrapper): forward_inputs, backward_inputs = inputs, inputs forward_state, backward_state = None, None - y = self.forward_layer.call(forward_inputs, - initial_state=forward_state, **kwargs) - y_rev = self.backward_layer.call(backward_inputs, - initial_state=backward_state, **kwargs) + y = self.forward_layer(forward_inputs, + initial_state=forward_state, **kwargs) + y_rev = self.backward_layer(backward_inputs, + initial_state=backward_state, **kwargs) else: - y = self.forward_layer.call(inputs, **kwargs) - y_rev = self.backward_layer.call(inputs, **kwargs) + y = self.forward_layer(inputs, **kwargs) + y_rev = self.backward_layer(inputs, **kwargs) if self.return_state: states = y[1:] + y_rev[1:] diff --git a/tensorflow/python/keras/layers/wrappers_test.py b/tensorflow/python/keras/layers/wrappers_test.py index c11211807bd..8fe13f4546f 100644 --- a/tensorflow/python/keras/layers/wrappers_test.py +++ b/tensorflow/python/keras/layers/wrappers_test.py @@ -32,8 +32,8 @@ from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.layers.rnn_cell_wrapper_v2 import ResidualWrapper from tensorflow.python.ops.array_ops import concat from tensorflow.python.platform import test -from tensorflow.python.training.tracking import object_identity from tensorflow.python.training.tracking import util as trackable_util +from tensorflow.python.util import object_identity class _RNNCellWithConstants(keras.layers.Layer): @@ -154,11 +154,12 @@ class TimeDistributedTest(test.TestCase): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( - keras.layers.Dense(2, kernel_regularizer='l1'), + keras.layers.Dense(2, kernel_regularizer='l1', + activity_regularizer='l1'), input_shape=(3, 4))) model.add(keras.layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') - self.assertEqual(len(model.losses), 1) + self.assertEqual(len(model.losses), 2) def test_TimeDistributed_batchnorm(self): with self.cached_session(): @@ -626,11 +627,12 @@ class BidirectionalTest(test.TestCase, parameterized.TestCase): x_reachable_loss = x * x layer = keras.layers.Bidirectional( keras.layers.SimpleRNN( - 3, kernel_regularizer='l1', bias_regularizer='l1')) + 3, kernel_regularizer='l1', bias_regularizer='l1', + activity_regularizer='l1')) _ = layer(x) - assert len(layer.losses) == 4 + assert len(layer.losses) == 6 assert len(layer.get_losses_for(None)) == 4 - assert not layer.get_losses_for(x) + assert len(layer.get_losses_for(x)) == 2 # Create a random tensor that is not conditional on the inputs. with keras.backend.get_graph().as_default(): @@ -640,9 +642,9 @@ class BidirectionalTest(test.TestCase, parameterized.TestCase): layer.forward_layer.add_loss(const_tensor, inputs=None) layer.backward_layer.add_loss(x_reachable_loss, inputs=x) layer.backward_layer.add_loss(const_tensor, inputs=None) - assert len(layer.losses) == 8 + assert len(layer.losses) == 10 assert len(layer.get_losses_for(None)) == 6 - assert len(layer.get_losses_for(x)) == 2 + assert len(layer.get_losses_for(x)) == 4 def test_Bidirectional_with_constants(self): with self.cached_session(): diff --git a/tensorflow/python/keras/metrics_correctness_test.py b/tensorflow/python/keras/metrics_correctness_test.py index 4f761bfec11..29a89203dae 100644 --- a/tensorflow/python/keras/metrics_correctness_test.py +++ b/tensorflow/python/keras/metrics_correctness_test.py @@ -45,13 +45,19 @@ def get_multi_io_model(): return testing_utils.get_multi_io_model(branch_a, branch_b) -def custom_generator_multi_io(): +def custom_generator_multi_io(sample_weights=None): batch_size = 2 num_samples = 4 inputs = np.asarray([[1.], [2.], [3.], [4.]]) - targets = np.asarray([[2.], [4.], [6.], [8.]]) - w1 = np.asarray([2., 3., 4., 5.]) - w2 = np.asarray([3.5, 2.5, 1.5, 0.5]) + targets_1 = np.asarray([[2.], [4.], [6.], [8.]]) + targets_2 = np.asarray([[1.], [2.], [3.], [4.]]) + if sample_weights: + assert len(sample_weights) == 2 + w1 = sample_weights[0] + w2 = sample_weights[1] + else: + w1 = None + w2 = None i = 0 while True: batch_index = i * batch_size % num_samples @@ -59,8 +65,14 @@ def custom_generator_multi_io(): start = batch_index end = start + batch_size x = [inputs[start:end], inputs[start:end]] - y = [targets[start:end], targets[start:end]] - w = [w1[start:end], w2[start:end]] + y = [targets_1[start:end], targets_2[start:end]] + if sample_weights: + w = [ + None if w1 is None else w1[start:end], + None if w2 is None else w2[start:end] + ] + else: + w = None yield x, y, w @@ -80,37 +92,18 @@ class TestMetricsCorrectnessMultiIO(keras_parameterized.TestCase): run_eagerly=testing_utils.should_run_eagerly()) return model - def _custom_generator(self): - batch_size = 2 - num_samples = 4 - inputs = np.asarray([[1.], [2.], [3.], [4.]]) - targets = np.asarray([[2.], [4.], [6.], [8.]]) - w1 = np.asarray([2., 3., 4., 5.]) - w2 = np.asarray([3.5, 2.5, 1.5, 0.5]) - i = 0 - while True: - batch_index = i * batch_size % num_samples - i += 1 - start = batch_index - end = start + batch_size - x = [inputs[start:end], inputs[start:end]] - y = [targets[start:end], targets[start:end]] - w = [w1[start:end], w2[start:end]] - yield x, y, w - def setUp(self): super(TestMetricsCorrectnessMultiIO, self).setUp() self.x = np.asarray([[1.], [2.], [3.], [4.]]) - self.y = np.asarray([[2.], [4.], [6.], [8.]]) - self.weights_1 = np.asarray([2., 3., 4., 5.]) - self.weights_2 = np.asarray([3.5, 2.5, 1.5, 0.5]) + self.y1 = np.asarray([[2.], [4.], [6.], [8.]]) + self.y2 = np.asarray([[1.], [2.], [3.], [4.]]) + self.sample_weight_1 = np.asarray([2., 3., 4., 5.]) + self.sample_weight_2 = np.asarray([3.5, 2.5, 1.5, 0.5]) + self.class_weight_1 = {2: 2, 4: 3, 6: 4, 8: 5} + self.class_weight_2 = {1: 3.5, 2: 2.5, 3: 1.5, 4: 0.5} - # y_true = [[2.], [4.], [6.], [8.]], y_pred = [[3.], [6.], [9.], [12.]] - - # Metric `output_1`, `output_2`: - # Total = ((3 - 2)^2 + (6 - 4)^2) + ((9 - 6)^2 + (12 - 8)^2) = 30, - # Count = 2 + 2 - # Result = 7.5 + # y_true_1 = [[2.], [4.], [6.], [8.]], y_pred = [[3.], [6.], [9.], [12.]] + # y_true_2 = [[1.], [2.], [3.], [4.]], y_pred = [[3.], [6.], [9.], [12.]] # Weighted metric `output_1`: # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + @@ -120,68 +113,164 @@ class TestMetricsCorrectnessMultiIO(keras_parameterized.TestCase): # Result = 9.2857141 # Weighted metric `output_2`: - # Total = ((3 - 2)^2 * 3.5 + (6 - 4)^2 * 2.5) + - # ((9 - 6)^2 * 1.5 + (12 - 8)^2 * 0.5) - # = 35 + # Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) + + # ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5) + # = 140 # Count = (3.5 + 2.5) + (1.5 + 0.5) - # Result = 4.375 + # Result = 17.5 - # Loss `output_1`: + # Loss `output_1` with weights: # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) # = 130 # Count = 2 + 2 # Result = 32.5 - # Loss `output_2`: - # Total = ((3 - 2)^2 * 3.5 + (6 - 4)^2 * 2.5) + - # ((9 - 6)^2 * 1.5 + (12 - 8)^2 * 0.5) - # = 35 + # Loss `output_1` without weights/Metric `output_1`: + # Total = ((3 - 2)^2 + (6 - 4)^2) + ((9 - 6)^2 + (12 - 8)^2) = 30 # Count = 2 + 2 - # Result = 8.75 + # Result = 7.5 - # Total loss = 32.5 + 8.75 = 41.25 + # Loss `output_2` with weights: + # Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) + + # ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5) + # = 140 + # Count = 2 + 2 + # Result = 35 - wmse = 'mean_squared_error_2' + # Loss `output_2` without weights/Metric `output_2`: + # Total = ((3 - 1)^2 + (6 - 2)^2) + ((9 - 3)^2 + (12 - 4)^2) = 120 + # Count = 2 + 2 + # Result = 30 + + # Total loss with weights = 32.5 + 35 = 67.5 + # Total loss without weights = 7.5 + 30 = 37.5 + + self.wmse = 'mean_squared_error_2' if not tf2.enabled(): - wmse = 'weighted_' + wmse + self.wmse = 'weighted_' + self.wmse + self.expected_fit_result_with_weights = { + 'output_1_mean_squared_error': [7.5, 7.5], + 'output_2_mean_squared_error': [30, 30], + 'output_1_' + self.wmse: [9.286, 9.286], + 'output_2_' + self.wmse: [17.5, 17.5], + 'loss': [67.5, 67.5], + 'output_1_loss': [32.5, 32.5], + 'output_2_loss': [35, 35], + } + + self.expected_fit_result_with_weights_output_2 = { + 'output_1_mean_squared_error': [7.5, 7.5], + 'output_2_mean_squared_error': [30, 30], + 'output_1_' + self.wmse: [7.5, 7.5], + 'output_2_' + self.wmse: [17.5, 17.5], + 'loss': [42.5, 42.5], + 'output_1_loss': [7.5, 7.5], + 'output_2_loss': [35, 35], + } + self.expected_fit_result = { 'output_1_mean_squared_error': [7.5, 7.5], - 'output_2_mean_squared_error': [7.5, 7.5], - 'output_1_' + wmse: [9.286, 9.286], - 'output_2_' + wmse: [4.375, 4.375], - 'loss': [41.25, 41.25], - 'output_1_loss': [32.5, 32.5], - 'output_2_loss': [8.75, 8.75], + 'output_2_mean_squared_error': [30, 30], + 'output_1_' + self.wmse: [7.5, 7.5], + 'output_2_' + self.wmse: [30, 30], + 'loss': [37.5, 37.5], + 'output_1_loss': [7.5, 7.5], + 'output_2_loss': [30, 30], } # In the order: 'loss', 'output_1_loss', 'output_2_loss', # 'output_1_mean_squared_error', 'output_1_mean_squared_error_2', # 'output_2_mean_squared_error', 'output_2_mean_squared_error_2' - self.expected_batch_result = [41.25, 32.5, 8.75, 7.5, 9.286, 7.5, 4.375] + self.expected_batch_result_with_weights = [ + 67.5, 32.5, 35, 7.5, 9.286, 30, 17.5 + ] + self.expected_batch_result_with_weights_output_2 = [ + 42.5, 7.5, 35, 7.5, 7.5, 30, 17.5 + ] + self.expected_batch_result = [37.5, 7.5, 30, 7.5, 7.5, 30, 30] def test_fit(self): model = self._get_compiled_multi_io_model() - history = model.fit([self.x, self.x], [self.y, self.y], - sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, - }, + history = model.fit([self.x, self.x], [self.y1, self.y2], batch_size=2, epochs=2, shuffle=False) for key, value in self.expected_fit_result.items(): self.assertAllClose(history.history[key], value, 1e-3) + def test_fit_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + history = model.fit([self.x, self.x], [self.y1, self.y2], + sample_weight={ + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, + }, + batch_size=2, + epochs=2, + shuffle=False) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + # Set weights for one output (use batch size). + history = model.fit([self.x, self.x], [self.y1, self.y2], + sample_weight={'output_2': self.sample_weight_2}, + batch_size=2, + epochs=2, + shuffle=False) + + for key, value in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_with_class_weight(self): + model = self._get_compiled_multi_io_model() + history = model.fit([self.x, self.x], [self.y1, self.y2], + class_weight={ + 'output_1': self.class_weight_1, + 'output_2': self.class_weight_2, + }, + batch_size=2, + epochs=2, + shuffle=False) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + # Set weights for one output. + history = model.fit([self.x, self.x], [self.y1, self.y2], + class_weight={'output_2': self.class_weight_2}, + batch_size=2, + epochs=2, + shuffle=False) + + for key, value in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + def test_eval(self): model = self._get_compiled_multi_io_model() - eval_result = model.evaluate([self.x, self.x], [self.y, self.y], + eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], + batch_size=2) + self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + + def test_eval_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], batch_size=2, sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, }) - self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + self.assertAllClose(eval_result, self.expected_batch_result_with_weights, + 1e-3) + + # Set weights for one output. + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], + batch_size=2, + sample_weight={ + 'output_2': self.sample_weight_2, + }) + self.assertAllClose(eval_result, + self.expected_batch_result_with_weights_output_2, 1e-3) # Verify that metric value is same with arbitrary weights and batch size. x = np.random.random((50, 1)) @@ -194,22 +283,65 @@ class TestMetricsCorrectnessMultiIO(keras_parameterized.TestCase): def test_train_on_batch(self): model = self._get_compiled_multi_io_model() - result = model.train_on_batch([self.x, self.x], [self.y, self.y], - sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, - }) + result = model.train_on_batch([self.x, self.x], [self.y1, self.y2]) self.assertAllClose(result, self.expected_batch_result, 1e-3) + def test_train_on_batch_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], + sample_weight={ + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, + }) + self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) + + # Set weights for one output. + result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], + sample_weight={ + 'output_2': self.sample_weight_2, + }) + self.assertAllClose(result, + self.expected_batch_result_with_weights_output_2, 1e-3) + + def test_train_on_batch_with_class_weight(self): + model = self._get_compiled_multi_io_model() + result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], + class_weight={ + 'output_1': self.class_weight_1, + 'output_2': self.class_weight_2, + }) + self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) + + # Set weights for one output. + result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], + class_weight={ + 'output_2': self.class_weight_2, + }) + self.assertAllClose(result, + self.expected_batch_result_with_weights_output_2, 1e-3) + def test_test_on_batch(self): model = self._get_compiled_multi_io_model() - result = model.test_on_batch([self.x, self.x], [self.y, self.y], - sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, - }) + result = model.test_on_batch([self.x, self.x], [self.y1, self.y2]) self.assertAllClose(result, self.expected_batch_result, 1e-3) + def test_test_on_batch_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], + sample_weight={ + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, + }) + self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) + + # Set weights for one output. + result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], + sample_weight={ + 'output_2': self.sample_weight_2, + }) + self.assertAllClose(result, + self.expected_batch_result_with_weights_output_2, 1e-3) + def test_fit_generator(self): model = self._get_compiled_multi_io_model() history = model.fit_generator( @@ -217,11 +349,67 @@ class TestMetricsCorrectnessMultiIO(keras_parameterized.TestCase): for key, value in self.expected_fit_result.items(): self.assertAllClose(history.history[key], value, 1e-3) + def test_fit_generator_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + history = model.fit_generator( + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2]), + steps_per_epoch=2, + epochs=2) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + # Set weights for one output. + history = model.fit_generator( + custom_generator_multi_io(sample_weights=[None, self.sample_weight_2]), + steps_per_epoch=2, + epochs=2) + for key, value in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_generator_with_class_weight(self): + model = self._get_compiled_multi_io_model() + history = model.fit_generator( + custom_generator_multi_io(), + class_weight={ + 'output_1': self.class_weight_1, + 'output_2': self.class_weight_2, + }, + steps_per_epoch=2, + epochs=2) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + # Set weights for one output. + history = model.fit_generator( + custom_generator_multi_io(), + class_weight={'output_2': self.class_weight_2}, + steps_per_epoch=2, + epochs=2) + for key, value in self.expected_fit_result_with_weights_output_2.items(): + self.assertAllClose(history.history[key], value, 1e-3) + def test_eval_generator(self): model = self._get_compiled_multi_io_model() eval_result = model.evaluate_generator(custom_generator_multi_io(), steps=2) self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + def test_eval_generator_with_sample_weight(self): + model = self._get_compiled_multi_io_model() + eval_result = model.evaluate_generator( + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2]), + steps=2) + self.assertAllClose(eval_result, self.expected_batch_result_with_weights, + 1e-3) + + # Set weights for one output. + eval_result = model.evaluate_generator( + custom_generator_multi_io(sample_weights=[None, self.sample_weight_2]), + steps=2) + self.assertAllClose(eval_result, + self.expected_batch_result_with_weights_output_2, 1e-3) + @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes @@ -242,25 +430,27 @@ class TestMetricsCorrectnessSingleIO(keras_parameterized.TestCase): run_eagerly=testing_utils.should_run_eagerly()) return model - def _custom_generator(self): + def _custom_generator(self, sample_weight=None): batch_size = 2 num_samples = 4 x = np.asarray([[1.], [2.], [3.], [4.]]) y = np.asarray([[2.], [4.], [6.], [8.]]) - w = np.asarray([2., 3., 4., 5.]) + w = sample_weight i = 0 + while True: batch_index = i * batch_size % num_samples i += 1 start = batch_index end = start + batch_size - yield x[start:end], y[start:end], w[start:end] + yield x[start:end], y[start:end], None if w is None else w[start:end] def setUp(self): super(TestMetricsCorrectnessSingleIO, self).setUp() self.x = np.asarray([[1.], [2.], [3.], [4.]]) self.y = np.asarray([[2.], [4.], [6.], [8.]]) - self.weights = np.asarray([2., 3., 4., 5.]) + self.sample_weight = np.asarray([2., 3., 4., 5.]) + self.class_weight = {2: 2, 4: 3, 6: 4, 8: 5} # y_true = [[2.], [4.], [6.], [8.]], y_pred = [[3.], [6.], [9.], [12.]] @@ -276,43 +466,88 @@ class TestMetricsCorrectnessSingleIO(keras_parameterized.TestCase): # Count = (2 + 3) + (4 + 5) # Result = 9.2857141 - # Total loss: + # Total loss with weights: # Total = ((3 - 2)^2 * 2 + (6 - 4)^2 * 3) + # ((9 - 6)^2 * 4 + (12 - 8)^2 * 5) # = 130, # Count = 2 + 2 # Result = 32.5 + # Total loss without weights: + # Total = ((3 - 2)^2 + (6 - 4)^2) + + # ((9 - 6)^2 + (12 - 8)^2) + # = 30, + # Count = 2 + 2 + # Result = 7.5 + wmse = 'mean_squared_error_2' if not tf2.enabled(): wmse = 'weighted_' + wmse - self.expected_fit_result = { + + self.expected_fit_result_with_weights = { 'mean_squared_error': [7.5, 7.5], wmse: [9.286, 9.286], 'loss': [32.5, 32.5] } + self.expected_fit_result = { + 'mean_squared_error': [7.5, 7.5], + wmse: [7.5, 7.5], + 'loss': [7.5, 7.5] + } + # In the order: 'loss', 'mean_squared_error', 'mean_squared_error_2' - self.expected_batch_result = [32.5, 7.5, 9.286] + self.expected_batch_result_with_weights = [32.5, 7.5, 9.286] + self.expected_batch_result = [7.5, 7.5, 7.5] def test_fit(self): model = self._get_model() + history = model.fit( self.x, self.y, - sample_weight=self.weights, batch_size=2, epochs=2, shuffle=False) for key, value in self.expected_fit_result.items(): self.assertAllClose(history.history[key], value, 1e-3) + def test_fit_with_sample_weight(self): + model = self._get_model() + history = model.fit( + self.x, + self.y, + sample_weight=self.sample_weight, + batch_size=2, + epochs=2, + shuffle=False) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_with_class_weight(self): + model = self._get_model() + history = model.fit( + self.x, + self.y, + class_weight=self.class_weight, + batch_size=2, + epochs=2, + shuffle=False) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + def test_eval(self): model = self._get_model() - eval_result = model.evaluate( - self.x, self.y, batch_size=2, sample_weight=self.weights) + eval_result = model.evaluate(self.x, self.y, batch_size=2) self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + def test_eval_with_sample_weight(self): + model = self._get_model() + eval_result = model.evaluate( + self.x, self.y, batch_size=2, sample_weight=self.sample_weight) + self.assertAllClose(eval_result, self.expected_batch_result_with_weights, + 1e-3) + # Verify that metric value is same with arbitrary weights and batch size. x = np.random.random((50, 1)) y = np.random.random((50, 1)) @@ -323,14 +558,32 @@ class TestMetricsCorrectnessSingleIO(keras_parameterized.TestCase): def test_train_on_batch(self): model = self._get_model() - result = model.train_on_batch(self.x, self.y, sample_weight=self.weights) + result = model.train_on_batch(self.x, self.y) self.assertAllClose(result, self.expected_batch_result, 1e-3) + def test_train_on_batch_with_sample_weight(self): + model = self._get_model() + result = model.train_on_batch( + self.x, self.y, sample_weight=self.sample_weight) + self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) + + def test_train_on_batch_with_class_weight(self): + model = self._get_model() + result = model.train_on_batch( + self.x, self.y, class_weight=self.class_weight) + self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) + def test_test_on_batch(self): model = self._get_model() - result = model.test_on_batch(self.x, self.y, sample_weight=self.weights) + result = model.test_on_batch(self.x, self.y) self.assertAllClose(result, self.expected_batch_result, 1e-3) + def test_test_on_batch_with_sample_weight(self): + model = self._get_model() + result = model.test_on_batch( + self.x, self.y, sample_weight=self.sample_weight) + self.assertAllClose(result, self.expected_batch_result_with_weights, 1e-3) + def test_fit_generator(self): model = self._get_model() history = model.fit_generator( @@ -338,11 +591,37 @@ class TestMetricsCorrectnessSingleIO(keras_parameterized.TestCase): for key, value in self.expected_fit_result.items(): self.assertAllClose(history.history[key], value, 1e-3) + def test_fit_generator_with_sample_weight(self): + model = self._get_model() + history = model.fit_generator( + self._custom_generator(sample_weight=self.sample_weight), + steps_per_epoch=2, + epochs=2) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + + def test_fit_generator_with_class_weight(self): + model = self._get_model() + history = model.fit_generator( + self._custom_generator(), + steps_per_epoch=2, + epochs=2, + class_weight=self.class_weight) + for key, value in self.expected_fit_result_with_weights.items(): + self.assertAllClose(history.history[key], value, 1e-3) + def test_eval_generator(self): model = self._get_model() eval_result = model.evaluate_generator(self._custom_generator(), steps=2) self.assertAllClose(eval_result, self.expected_batch_result, 1e-3) + def test_eval_generator_with_sample_weight(self): + model = self._get_model() + eval_result = model.evaluate_generator( + self._custom_generator(sample_weight=self.sample_weight), steps=2) + self.assertAllClose(eval_result, self.expected_batch_result_with_weights, + 1e-3) + @keras_parameterized.run_with_all_model_types(exclude_models=['sequential']) @keras_parameterized.run_all_keras_modes @@ -364,9 +643,10 @@ class TestOutputLossMetrics(keras_parameterized.TestCase): def setUp(self): super(TestOutputLossMetrics, self).setUp() self.x = np.asarray([[1.], [2.], [3.], [4.]]) - self.y = np.asarray([[2.], [4.], [6.], [8.]]) - self.weights_1 = np.asarray([2., 3., 4., 5.]) - self.weights_2 = np.asarray([3.5, 2.5, 1.5, 0.5]) + self.y1 = np.asarray([[2.], [4.], [6.], [8.]]) + self.y2 = np.asarray([[1.], [2.], [3.], [4.]]) + self.sample_weight_1 = np.asarray([2., 3., 4., 5.]) + self.sample_weight_2 = np.asarray([3.5, 2.5, 1.5, 0.5]) # y_true = [[2.], [4.], [6.], [8.]], y_pred = [[3.], [6.], [9.], [12.]] @@ -380,11 +660,11 @@ class TestOutputLossMetrics(keras_parameterized.TestCase): # Loss `output_2`: # Per-sample weighted losses - # Batch 1 = [(3 - 2)^2 * 3.5, (6 - 4)^2 * 2.5)] = [3.5, 10] - # Batch 2 = [(9 - 6)^2 * 1.5, (12 - 8)^2 * 0.5)] = [13.5, 8] + # Batch 1 = [(3 - 1)^2 * 3.5, (6 - 2)^2 * 2.5)] = [14, 40] + # Batch 2 = [(9 - 3)^2 * 1.5, (12 - 4)^2 * 0.5)] = [54, 32] - # Result (reduction=SUM) = ((3.5 + 10) + (13.5 + 8))/2 = 17.5 - # Result (reduction=SUM_OVER_BATCH_SIZE/AUTO/NONE) = 35 / 4 = 8.75 + # Result (reduction=SUM) = ((14 + 40) + (54 + 32))/2 = 70 + # Result (reduction=SUM_OVER_BATCH_SIZE/AUTO/NONE) = 140 / 4 = 35 # When reduction is 'NONE' loss value that is passed to the optimizer will # be vector loss but what is reported is a scalar, which is an average of @@ -393,18 +673,18 @@ class TestOutputLossMetrics(keras_parameterized.TestCase): # Total loss = Output_loss_1 + Output_loss_2 sum_over_batch_size_fit_result = { - 'loss': [41.25, 41.25], + 'loss': [67.5, 67.5], 'output_1_loss': [32.5, 32.5], - 'output_2_loss': [8.75, 8.75], + 'output_2_loss': [35, 35], } self.expected_fit_result = { loss_reduction.ReductionV2.NONE: sum_over_batch_size_fit_result, loss_reduction.ReductionV2.SUM: { - 'loss': [82.5, 82.5], + 'loss': [135, 135], 'output_1_loss': [65, 65], - 'output_2_loss': [17.5, 17.5], + 'output_2_loss': [70, 70], }, loss_reduction.ReductionV2.AUTO: sum_over_batch_size_fit_result, @@ -414,19 +694,19 @@ class TestOutputLossMetrics(keras_parameterized.TestCase): # In the order: 'loss', 'output_1_loss', 'output_2_loss', self.expected_batch_result = { - loss_reduction.ReductionV2.NONE: [41.25, 32.5, 8.75], - loss_reduction.ReductionV2.SUM: [82.5, 65, 17.5], - loss_reduction.ReductionV2.AUTO: [41.25, 32.5, 8.75], - loss_reduction.ReductionV2.SUM_OVER_BATCH_SIZE: [41.25, 32.5, 8.75], + loss_reduction.ReductionV2.NONE: [67.5, 32.5, 35], + loss_reduction.ReductionV2.SUM: [135, 65, 70], + loss_reduction.ReductionV2.AUTO: [67.5, 32.5, 35], + loss_reduction.ReductionV2.SUM_OVER_BATCH_SIZE: [67.5, 32.5, 35], } def test_fit(self, reduction): model = self._get_compiled_multi_io_model( loss=losses.MeanSquaredError(reduction=reduction)) - history = model.fit([self.x, self.x], [self.y, self.y], + history = model.fit([self.x, self.x], [self.y1, self.y2], sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, }, batch_size=2, epochs=2, @@ -437,21 +717,21 @@ class TestOutputLossMetrics(keras_parameterized.TestCase): def test_eval(self, reduction): model = self._get_compiled_multi_io_model( loss=losses.MeanSquaredError(reduction=reduction)) - eval_result = model.evaluate([self.x, self.x], [self.y, self.y], + eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2], batch_size=2, sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, }) self.assertAllClose(eval_result, self.expected_batch_result[reduction]) def test_train_on_batch(self, reduction): model = self._get_compiled_multi_io_model( loss=losses.MeanSquaredError(reduction=reduction)) - result = model.train_on_batch([self.x, self.x], [self.y, self.y], + result = model.train_on_batch([self.x, self.x], [self.y1, self.y2], sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, }) expected_values = self.expected_batch_result[reduction] @@ -463,10 +743,10 @@ class TestOutputLossMetrics(keras_parameterized.TestCase): def test_test_on_batch(self, reduction): model = self._get_compiled_multi_io_model( loss=losses.MeanSquaredError(reduction=reduction)) - result = model.test_on_batch([self.x, self.x], [self.y, self.y], + result = model.test_on_batch([self.x, self.x], [self.y1, self.y2], sample_weight={ - 'output_1': self.weights_1, - 'output_2': self.weights_2, + 'output_1': self.sample_weight_1, + 'output_2': self.sample_weight_2, }) expected_values = self.expected_batch_result[reduction] if reduction == loss_reduction.ReductionV2.SUM: @@ -478,14 +758,20 @@ class TestOutputLossMetrics(keras_parameterized.TestCase): model = self._get_compiled_multi_io_model( loss=losses.MeanSquaredError(reduction=reduction)) history = model.fit_generator( - custom_generator_multi_io(), steps_per_epoch=2, epochs=2) + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2]), + steps_per_epoch=2, + epochs=2) for key, value in self.expected_fit_result[reduction].items(): self.assertAllClose(history.history[key], value) def test_eval_generator(self, reduction): model = self._get_compiled_multi_io_model( loss=losses.MeanSquaredError(reduction=reduction)) - eval_result = model.evaluate_generator(custom_generator_multi_io(), steps=2) + eval_result = model.evaluate_generator( + custom_generator_multi_io( + sample_weights=[self.sample_weight_1, self.sample_weight_2]), + steps=2) self.assertAllClose(eval_result, self.expected_batch_result[reduction]) diff --git a/tensorflow/python/keras/mixed_precision/experimental/BUILD b/tensorflow/python/keras/mixed_precision/experimental/BUILD index cf08fd257d8..982ebf0c0ea 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/BUILD +++ b/tensorflow/python/keras/mixed_precision/experimental/BUILD @@ -76,6 +76,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:resource_variable_ops", "//tensorflow/python/distribute:values", + "//tensorflow/python/eager:context", ], ) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py index e968594ef08..59a0e08cba1 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.distribute import values as distribute_values +from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -30,9 +31,8 @@ class AutoCastVariable(trackable.Trackable): This class wraps a floating-point tf.Variable. It emulates the variable interface and delegates to the wrapped variable, but it additionally will cast - the wrapped variable to `auto_cast_variable._read_dtype`. `_read_dtype` - defaults to the wrapped Variable's dtype, meaning the casts are a no-op, but - `_read_dtype` can be set to a different value, + the wrapped variable under a `Graph._enable_variable_auto_cast(dtype)` context + manager. For example: @@ -40,15 +40,14 @@ class AutoCastVariable(trackable.Trackable): v = tf.Variable(1.0, dtype=tf.float32) v = AutoCastVariable(v) print(tf.identity(v).dtype) # tf.float32 - v._read_dtype = tf.float16 - print(tf.identity(v).dtype) # tf.float16, as v will cast itself to float16 - print(v.dtype) # tf.float16, as v.dtype also changes + with ops.get_default_graph()._enable_variable_auto_cast(tf.float16): + print(tf.identity(v).dtype) # tf.float16, as v will cast itself to float16 + print(v.dtype) # tf.float16, as v.dtype also changes under the ctx manager. ``` The purpose of this class is to allow Keras layers to create variables in float32, and automatically cast them to float16 or bfloat16 when the layer is - called. Keras layers will set `_read_dtype` to the appropriate dtype when - called, then set it back to None when the call returns. + called. """ def __init__(self, variable): @@ -68,11 +67,6 @@ class AutoCastVariable(trackable.Trackable): 'type: %s' % variable.dtype.name) self._variable = variable - # The dtype this variable will be read in. This is public to other internal - # classes, but not externally. It can be accessed externally via the `dtype` - # property. - self._read_dtype = self._variable.dtype - # Delegate to the underlying variable for checkpointing. self._gather_saveables_for_checkpoint = ( self._variable._gather_saveables_for_checkpoint) # pylint: disable=protected-access @@ -81,10 +75,21 @@ class AutoCastVariable(trackable.Trackable): def name(self): return self._variable.name + def _should_cast(self): + """Returns True if this variable should be casted when accessed.""" + g = ops.get_default_graph() + # pylint:disable=protected-access + return (g._auto_cast_variable_read_dtype is not None and + self.true_dtype != g._auto_cast_variable_read_dtype) + # pylint:enable=protected-access + @property def dtype(self): """The dtype this variable will be casted to when read.""" - return self._read_dtype + if self._should_cast(): + return ops.get_default_graph()._auto_cast_variable_read_dtype # pylint:disable=protected-access + else: + return self._variable.dtype @property def true_dtype(self): @@ -93,6 +98,8 @@ class AutoCastVariable(trackable.Trackable): def value(self): val = self._variable.value() + if not self._should_cast(): + return val # We colocate_with(None) to ignore the existing device constraints, so that # the cast is always done on the variable's device with ops.colocate_with(None, ignore_existing=True): @@ -101,11 +108,15 @@ class AutoCastVariable(trackable.Trackable): def read_value(self): val = self._variable.read_value() + if not self._should_cast(): + return val return math_ops.cast(val, self.dtype) def sparse_read(self, indices, name=None): """Reads the value of this variable sparsely, using `gather`.""" val = self._variable.sparse_read(indices, name=name) + if not self._should_cast(): + return val return math_ops.cast(val, self.dtype) def assign(self, value, use_locking=None, name=None, read_value=True): @@ -128,7 +139,7 @@ class AutoCastVariable(trackable.Trackable): def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False): """Converts this variable to a tensor.""" - if self.dtype == self.true_dtype: + if not self._should_cast(): return ops.internal_convert_to_tensor(self._variable, dtype, name, as_ref) # TODO(reedwm): Support as_ref? @@ -148,6 +159,18 @@ class AutoCastVariable(trackable.Trackable): """Pass resource_variable_ops.is_resource_variable check.""" pass + def __repr__(self): + if context.executing_eagerly() and not self._in_graph_mode: + repr_str = ("') + return repr_str.format( + v=self, np_repr=ops.numpy_text(self.read_value(), is_repr=True)) + else: + repr_str = ("') + return repr_str.format(v=self) + # Operator overloads: # Note we only overload operators that support floating-point types, as # non-float variables cannot be wrapped with an AutoCastVariable. @@ -226,3 +249,6 @@ class AutoCastDistributedVariable(AutoCastVariable, raise ValueError('variable must be of type DistributedValues, ' 'but got: %s' % variable) super(AutoCastDistributedVariable, self).__init__(variable) + + def __repr__(self): + return distribute_values.DistributedVariable.__repr__(self) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py index 850691c86b4..60cb1ca0ee9 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py @@ -23,11 +23,12 @@ from absl.testing import parameterized import numpy as np from tensorflow.python.distribute import mirrored_strategy +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras.mixed_precision.experimental import autocast_variable - from tensorflow.python.ops import array_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -65,8 +66,8 @@ def get_autocast_var(var, distribute): return autocast_variable.AutoCastVariable(var) -def get_var(val, dtype): - return variables.VariableV1(val, use_resource=True, dtype=dtype) +def get_var(val, dtype, name=None): + return variables.VariableV1(val, use_resource=True, dtype=dtype, name=name) @test_util.run_all_in_graph_and_eager_modes @@ -79,20 +80,47 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): x = get_autocast_var(x, distribute) self.evaluate(x.initializer) - # _read_dtype is same dtype as variable + # outside of auto cast scope. self.assertEqual(x.dtype, dtypes.float32) - self.assertEqual(x.true_dtype, dtypes.float32) self.assertEqual(x.value().dtype, dtypes.float32) self.assertEqual(x.read_value().dtype, dtypes.float32) self.assertEqual(array_ops.identity(x).dtype, dtypes.float32) - # Setting _read_dtype to a different dtype - x._read_dtype = dtypes.float16 - self.assertEqual(x.dtype, dtypes.float16) - self.assertEqual(x.true_dtype, dtypes.float32) - self.assertEqual(x.value().dtype, dtypes.float16) - self.assertEqual(x.read_value().dtype, dtypes.float16) - self.assertEqual(array_ops.identity(x).dtype, dtypes.float16) + # within auto cast scope of different dtype + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float16): + self.assertEqual(x.dtype, dtypes.float16) + self.assertEqual(x.value().dtype, dtypes.float16) + self.assertEqual(x.read_value().dtype, dtypes.float16) + self.assertEqual(array_ops.identity(x).dtype, dtypes.float16) + + # within auto cast scope of same dtype + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float32): + self.assertEqual(x.dtype, dtypes.float32) + self.assertEqual(x.value().dtype, dtypes.float32) + self.assertEqual(x.read_value().dtype, dtypes.float32) + self.assertEqual(array_ops.identity(x).dtype, dtypes.float32) + + @parameterized.named_parameters(*TESTCASES) + def test_read_nested_scopes(self, distribute): + with get_distribute_scope(distribute): + x = get_var(1., dtypes.float32) + x = get_autocast_var(x, distribute) + self.evaluate(x.initializer) + + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float16): + self.assertEqual(x.dtype, dtypes.float16) + self.assertEqual(x.read_value().dtype, dtypes.float16) + + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float32): + self.assertEqual(x.dtype, dtypes.float32) + self.assertEqual(x.read_value().dtype, dtypes.float32) + + self.assertEqual(x.dtype, dtypes.float16) + self.assertEqual(x.read_value().dtype, dtypes.float16) @parameterized.named_parameters(*TESTCASES) def test_operator_overloads(self, distribute): @@ -100,43 +128,43 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): for read_dtype in (dtypes.float32, dtypes.float16): x = get_var(7., dtypes.float32) x = get_autocast_var(x, distribute) - x._read_dtype = read_dtype - self.evaluate(x.initializer) - self.assertAlmostEqual(8, self.evaluate(x + 1)) - self.assertAlmostEqual(10, self.evaluate(3 + x)) - self.assertAlmostEqual(14, self.evaluate(x + x)) - self.assertAlmostEqual(5, self.evaluate(x - 2)) - self.assertAlmostEqual(6, self.evaluate(13 - x)) - self.assertAlmostEqual(0, self.evaluate(x - x)) - self.assertAlmostEqual(14, self.evaluate(x * 2)) - self.assertAlmostEqual(21, self.evaluate(3 * x)) - self.assertAlmostEqual(49, self.evaluate(x * x)) - self.assertAlmostEqual(3.5, self.evaluate(x / 2)) - self.assertAlmostEqual(1.5, self.evaluate(10.5 / x)) - self.assertAlmostEqual(3, self.evaluate(x // 2)) - self.assertAlmostEqual(2, self.evaluate(15 // x)) - if read_dtype == dtypes.float32: - # The "mod" operator does not support float16 - self.assertAlmostEqual(1, self.evaluate(x % 2)) - self.assertAlmostEqual(2, self.evaluate(16 % x)) - self.assertTrue(self.evaluate(x < 12)) - self.assertTrue(self.evaluate(x <= 12)) - self.assertFalse(self.evaluate(x > 12)) - self.assertFalse(self.evaluate(x >= 12)) - self.assertFalse(self.evaluate(12 < x)) - self.assertFalse(self.evaluate(12 <= x)) - self.assertTrue(self.evaluate(12 > x)) - self.assertTrue(self.evaluate(12 >= x)) - self.assertAlmostEqual(343, self.evaluate(pow(x, 3)), places=4) - self.assertAlmostEqual(128, self.evaluate(pow(2, x)), places=4) - self.assertAlmostEqual(-7, self.evaluate(-x)) - self.assertAlmostEqual(7, self.evaluate(abs(x))) + with ops.get_default_graph()._enable_auto_casting_variables( + read_dtype): + self.evaluate(x.initializer) + self.assertAlmostEqual(8, self.evaluate(x + 1)) + self.assertAlmostEqual(10, self.evaluate(3 + x)) + self.assertAlmostEqual(14, self.evaluate(x + x)) + self.assertAlmostEqual(5, self.evaluate(x - 2)) + self.assertAlmostEqual(6, self.evaluate(13 - x)) + self.assertAlmostEqual(0, self.evaluate(x - x)) + self.assertAlmostEqual(14, self.evaluate(x * 2)) + self.assertAlmostEqual(21, self.evaluate(3 * x)) + self.assertAlmostEqual(49, self.evaluate(x * x)) + self.assertAlmostEqual(3.5, self.evaluate(x / 2)) + self.assertAlmostEqual(1.5, self.evaluate(10.5 / x)) + self.assertAlmostEqual(3, self.evaluate(x // 2)) + self.assertAlmostEqual(2, self.evaluate(15 // x)) + if read_dtype == dtypes.float32: + # The "mod" operator does not support float16 + self.assertAlmostEqual(1, self.evaluate(x % 2)) + self.assertAlmostEqual(2, self.evaluate(16 % x)) + self.assertTrue(self.evaluate(x < 12)) + self.assertTrue(self.evaluate(x <= 12)) + self.assertFalse(self.evaluate(x > 12)) + self.assertFalse(self.evaluate(x >= 12)) + self.assertFalse(self.evaluate(12 < x)) + self.assertFalse(self.evaluate(12 <= x)) + self.assertTrue(self.evaluate(12 > x)) + self.assertTrue(self.evaluate(12 >= x)) + self.assertAlmostEqual(343, self.evaluate(pow(x, 3)), places=4) + self.assertAlmostEqual(128, self.evaluate(pow(2, x)), places=4) + self.assertAlmostEqual(-7, self.evaluate(-x)) + self.assertAlmostEqual(7, self.evaluate(abs(x))) - x = get_var([7, 8, 9], dtypes.float32) - x = get_autocast_var(x, distribute) - x._read_dtype = read_dtype - self.evaluate(x.initializer) - self.assertEqual(self.evaluate(x[1]), 8) + x = get_var([7, 8, 9], dtypes.float32) + x = get_autocast_var(x, distribute) + self.evaluate(x.initializer) + self.assertEqual(self.evaluate(x[1]), 8) @parameterized.named_parameters(*TESTCASES) def test_assign(self, distribute): @@ -145,6 +173,7 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): x = get_autocast_var(x, distribute) self.evaluate(x.initializer) + # outside of auto cast scope. v1 = constant_op.constant(3.14, dtype=dtypes.float32) v2 = constant_op.constant(3.14, dtype=dtypes.float16) @@ -176,37 +205,37 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): run_and_check() # reset x self.evaluate(x.assign(0.)) - x._read_dtype = dtypes.float16 - # assign still expect float32 value even _read_dtype is float16 - run_and_check() + # within auto cast scope. + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float16): + # assign still expect float32 value even if in float16 scope + run_and_check() @parameterized.named_parameters(*TESTCASES) def test_assign_stays_in_true_dtype(self, distribute): with get_distribute_scope(distribute): x = get_var(1., dtypes.float32) x = get_autocast_var(x, distribute) - x._read_dtype = dtypes.float16 self.evaluate(x.initializer) # small_val is a value such that 1.0 + small_val == 1.0 in fp16, but not # in fp32 small_val = np.finfo('float16').eps / 2 small_tensor = constant_op.constant(small_val, dtype=dtypes.float32) - # Variable should be increased, despite it appearing to be the same - # float16 value. - self.assertEqual(1. + small_val, - self.evaluate(x.assign(1. + small_tensor))) - self.assertEqual(1., self.evaluate(x.value())) - - x._read_dtype = dtypes.float32 + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float16): + # Variable should be increased, despite it appearing to be the same + # float16 value. + self.assertEqual(1. + small_val, + self.evaluate(x.assign(1. + small_tensor))) + self.assertEqual(1., self.evaluate(x.value())) self.assertEqual(1. + small_val, self.evaluate(x.value())) - x._read_dtype = dtypes.float16 self.evaluate(x.assign(1.)) - self.assertEqual(1. + small_val, - self.evaluate(x.assign_add(small_tensor))) - self.assertEqual(1., self.evaluate(x.value())) - - x._read_dtype = dtypes.float32 + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float16): + self.assertEqual(1. + small_val, + self.evaluate(x.assign_add(small_tensor))) + self.assertEqual(1., self.evaluate(x.value())) self.assertEqual(1. + small_val, self.evaluate(x.value())) @parameterized.named_parameters(*TESTCASES) @@ -244,6 +273,36 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): x = get_var(1., dtypes.float32) get_autocast_var(x, distribute) + def test_repr(self): + # We do not test with DistributionStrategy because we do not want to rely on + # the exact __repr__ output of a DistributedVariable. + x = get_var(1., dtypes.float32, name='x') + x = get_autocast_var(x, distribute=False) + if context.executing_eagerly(): + self.assertStartsWith( + repr(x), + "" + ) + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float16): + self.assertEqual( + repr(x), + "" + ) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py index 247282c9aea..820daf405f7 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py @@ -39,6 +39,7 @@ from tensorflow.python.keras import regularizers from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine import base_layer from tensorflow.python.keras.layers import core +from tensorflow.python.keras.layers import recurrent from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer from tensorflow.python.keras.mixed_precision.experimental import policy from tensorflow.python.keras.mixed_precision.experimental import test_util as mp_test_util @@ -203,17 +204,6 @@ class KerasLayerTest(keras_parameterized.TestCase): self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(y), 2.) - @parameterized.named_parameters(*TESTCASES) - @test_util.run_in_graph_and_eager_modes - def test_variable_not_casted_for_int_inputs(self, strategy_fn): - x = constant_op.constant([[1]], dtype=dtypes.int32) - with strategy_fn().scope(): - with policy.policy_scope('infer_float32_vars'): - layer = layers.Embedding(input_dim=10, output_dim=32) - y = layer(x) - self.assertEqual(layer.embeddings.dtype, dtypes.float32) - self.assertEqual(y.dtype, dtypes.float32) - @parameterized.named_parameters(*TESTCASES) @test_util.run_in_graph_and_eager_modes def test_layer_regularizer_runs_in_float32(self, strategy_fn): @@ -347,22 +337,17 @@ class KerasModelTest(keras_parameterized.TestCase): 'testcase_name': 'nocloning', 'strategy_fn': create_mirrored_strategy, 'cloning': False - }, { - 'testcase_name': 'function', - 'strategy_fn': create_mirrored_strategy, - 'layer_with_tf_function': True }) def test_model(self, strategy_fn, use_operator=False, use_regularizer=False, - cloning=True, layer_with_tf_function=False): + cloning=True): if not self._is_strategy_supported(strategy_fn): return regularizer = IdentityRegularizer() if use_regularizer else None - layer_class = AddLayerWithFunction if layer_with_tf_function else AddLayer with strategy_fn().scope(): with policy.policy_scope('infer_float32_vars'): x = layers.Input(shape=(1,), batch_size=2, dtype=dtypes.float16) - layer = layer_class(assert_type=dtypes.float16, - use_operator=use_operator, regularizer=regularizer) + layer = AddLayer(assert_type=dtypes.float16, use_operator=use_operator, + regularizer=regularizer) y = layer(x) y = math_ops.cast(y, dtypes.float32) model = models.Model(inputs=x, outputs=y) @@ -732,5 +717,52 @@ class KerasModelTest(keras_parameterized.TestCase): self.assertEqual(backend.get_value(loss_scale._num_good_steps), 1) +class RnnTest(keras_parameterized.TestCase): + """Test mixed precision with RNNs.""" + + # TODO(b/136512020): Support and test recurrent_v2.GRU. + @parameterized.named_parameters({ + 'testcase_name': 'base_simple', + 'strategy_fn': default_strategy_fn, + 'rnn_class': recurrent.SimpleRNN, + }, { + 'testcase_name': 'distribute_simple', + 'strategy_fn': create_mirrored_strategy, + 'rnn_class': recurrent.SimpleRNN, + }, { + 'testcase_name': 'base_gru', + 'strategy_fn': default_strategy_fn, + 'rnn_class': recurrent.GRU, + }, { + 'testcase_name': 'distribute_gru', + 'strategy_fn': create_mirrored_strategy, + 'rnn_class': recurrent.GRU, + }) + @test_util.run_in_graph_and_eager_modes + # RNNs do not work properly with GradientTape in graph mode when V1 control + # flow is used. + @test_util.enable_control_flow_v2 + def test_rnn(self, strategy_fn, rnn_class): + x = array_ops.ones((2, 3, 4), dtype=dtypes.float16) + strategy = strategy_fn() + with strategy.scope(), policy.policy_scope('infer_float32_vars'): + layer = rnn_class(units=4) + def run_fn(): + with backprop.GradientTape() as tape: + y = layer(x) + self.assertEqual(y.dtype, dtypes.float16) + opt = gradient_descent.SGD(1.) + grads = tape.gradient(y, layer.trainable_weights) + return opt.apply_gradients(zip(grads, layer.trainable_weights)) + + op = strategy.experimental_run(run_fn) + if not context.executing_eagerly(): + self.evaluate(variables.global_variables_initializer()) + self.evaluate(op) + + for v in layer.weights: + self.assertEqual(v.dtype, dtypes.float32) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index 9cc358bb0cb..46ebdb6bf70 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -108,7 +108,7 @@ class OptimizerV2(trackable.Trackable): opt = tf.keras.optimizers.SGD(learning_rate=0.1) model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(num_hidden, activation='relu')) - model.add(tf.keras.layers.Dense(num_classes, activation='sigmoid') + model.add(tf.keras.layers.Dense(num_classes, activation='sigmoid')) loss_fn = lambda: tf.keras.losses.mse(model(input), output) var_list_fn = lambda: model.trainable_weights for input, output in data: diff --git a/tensorflow/python/keras/saving/saved_model/constants.py b/tensorflow/python/keras/saving/saved_model/constants.py index 093de2884f5..3f1eca9c500 100644 --- a/tensorflow/python/keras/saving/saved_model/constants.py +++ b/tensorflow/python/keras/saving/saved_model/constants.py @@ -22,3 +22,7 @@ from __future__ import print_function # e.g. the list of layers can be accessed using `loaded.keras_api.layers`, in an # object loaded from `tf.saved_model.load()`. KERAS_ATTR = 'keras_api' + +# Keys for the serialization cache. +# Maps to the keras serialization dict {Layer --> SerializedAttributes object} +KERAS_CACHE_KEY = 'keras_serialized_attributes' diff --git a/tensorflow/python/keras/saving/saved_model/save.py b/tensorflow/python/keras/saving/saved_model/save.py index 89c16433764..045386c97d7 100644 --- a/tensorflow/python/keras/saving/saved_model/save.py +++ b/tensorflow/python/keras/saving/saved_model/save.py @@ -28,6 +28,7 @@ from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.engine import input_spec from tensorflow.python.keras.saving import saving_utils +from tensorflow.python.keras.saving.saved_model import constants from tensorflow.python.keras.saving.saved_model import load as keras_load from tensorflow.python.keras.saving.saved_model import serialized_attributes from tensorflow.python.keras.saving.saved_model import utils @@ -38,6 +39,7 @@ from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking import data_structures from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils from tensorflow.python.util import nest +from tensorflow.python.util import tf_decorator from tensorflow.python.util.lazy_loader import LazyLoader # To avoid circular dependencies between keras/engine and keras/saving, @@ -86,23 +88,18 @@ def save(model, filepath, overwrite, include_optimizer): model.optimizer = orig_optimizer -# Keys for the serialization cache. -# Maps to the keras serialization dict {Layer --> SerializedAttributes object} -_KERAS_CACHE_KEY = 'keras_serialized_attributes' - - def serialize_all_attributes(layer, serialization_cache): """Serialize all attributes in the layer.""" save_model_default_signature = False - if _KERAS_CACHE_KEY not in serialization_cache: - keras_cache = serialization_cache[_KERAS_CACHE_KEY] = {} + if constants.KERAS_CACHE_KEY not in serialization_cache: + keras_cache = serialization_cache[constants.KERAS_CACHE_KEY] = {} if isinstance(layer, training_lib.Model): # Only trace default signature if the root object is a Model. Since the # keras cache key is only created in this method, we know that the object # is root if the key does not yet exist in the cache. save_model_default_signature = True else: - keras_cache = serialization_cache[_KERAS_CACHE_KEY] + keras_cache = serialization_cache[constants.KERAS_CACHE_KEY] if layer in keras_cache: return keras_cache[layer] @@ -255,7 +252,8 @@ def _wrap_layer_functions(layer, serialization_cache): fns['activity_regularizer_fn'] = _wrap_activity_regularizer(layer) fns['call_and_return_all_conditional_losses'] = ( call_collection.add_function( - _append_activity_regularizer_loss(call_fn_with_losses, + _append_activity_regularizer_loss(layer, + call_fn_with_losses, fns['activity_regularizer_fn']), '{}_layer_call_and_return_all_conditional_losses'.format(layer.name) )) @@ -320,11 +318,12 @@ def _replace_child_layer_functions(layer, serialization_cache): # pylint: disable=protected-access original_fns = {} for child_layer in _list_all_layers(layer): - if child_layer not in serialization_cache[_KERAS_CACHE_KEY]: + if child_layer not in serialization_cache[constants.KERAS_CACHE_KEY]: layer_fns = (serialize_all_attributes(child_layer, serialization_cache) .functions) else: - layer_fns = serialization_cache[_KERAS_CACHE_KEY][child_layer].functions + layer_fns = ( + serialization_cache[constants.KERAS_CACHE_KEY][child_layer].functions) if not layer_fns: # This indicates either: # - circular dependency, which means the current layer's functions @@ -393,8 +392,10 @@ class LayerCallCollection(object): """ def __init__(self, layer): - self._layer = layer + self.layer = layer self._expects_training_arg = layer._expects_training_arg # pylint: disable=protected-access + self._training_arg_index = utils.get_training_arg_index(layer) + self._input_signature = self._generate_input_signature(layer) self._functions = weakref.WeakValueDictionary() # Bool indicating whether this object is currently tracing the layer call @@ -442,18 +443,21 @@ class LayerCallCollection(object): *args: Positional args passed to the original function. **kwargs: Keyword args passed to the original function. """ + args = list(args) kwargs = kwargs.copy() self.tracing = True for fn in self._functions.values(): # TODO(kathywu): Replace arguments with broader shapes defined in the # input signature. if self._expects_training_arg: - kwargs['training'] = False - fn.original_get_concrete_function(*args, **kwargs) - kwargs['training'] = True - fn.original_get_concrete_function(*args, **kwargs) + args, kwargs = utils.set_training_arg(False, self._training_arg_index, + args, kwargs) + fn.get_concrete_function(*args, **kwargs) + args, kwargs = utils.set_training_arg(True, self._training_arg_index, + args, kwargs) + fn.get_concrete_function(*args, **kwargs) else: - fn.original_get_concrete_function(*args, **kwargs) + fn.get_concrete_function(*args, **kwargs) self.tracing = False @property @@ -483,6 +487,18 @@ class LayerCallCollection(object): return fn +def maintain_losses(method): + """Ensures layer losses are kept the same, and runs method in call context.""" + def wrapper(self, *args, **kwargs): + layer = self.call_collection.layer + original_losses = _reset_layer_losses(layer) + with base_layer_utils.call_context().enter(layer, None, True, None): + ret = method(self, *args, **kwargs) + _restore_layer_losses(original_losses) + return ret + return tf_decorator.make_decorator(target=method, decorator_func=wrapper) + + class LayerCall(def_function.Function): """Function that triggers traces of other functions in the same collection.""" @@ -490,19 +506,18 @@ class LayerCall(def_function.Function): super(LayerCall, self).__init__(*args, **kwargs) self.call_collection = call_collection + @maintain_losses def __call__(self, *args, **kwargs): if not self.call_collection.tracing: self.call_collection.add_trace(*args, **kwargs) return super(LayerCall, self).__call__(*args, **kwargs) + @maintain_losses def get_concrete_function(self, *args, **kwargs): if not self.call_collection.tracing: self.call_collection.add_trace(*args, **kwargs) return super(LayerCall, self).get_concrete_function(*args, **kwargs) - def original_get_concrete_function(self, *args, **kwargs): - return super(LayerCall, self).get_concrete_function(*args, **kwargs) - def _wrap_call_and_conditional_losses(layer): """Wraps call function that returns a tuple of (outputs, losses). @@ -519,37 +534,32 @@ def _wrap_call_and_conditional_losses(layer): """ # Create function that generates both outputs and losses layer_call = layer.call - if layer._expects_training_arg: # pylint: disable=protected-access - def call_and_return_conditional_losses(inputs, training=False): - return layer_call(inputs, training=training), layer.get_losses_for(inputs) - else: - def call_and_return_conditional_losses(inputs): - K.set_learning_phase(0) - return layer_call(inputs), layer.get_losses_for(inputs) - return call_and_return_conditional_losses + + def call_and_return_conditional_losses(inputs, *args, **kwargs): + return layer_call(inputs, *args, **kwargs), layer.get_losses_for(inputs) + return tf_decorator.make_decorator( + layer_call, call_and_return_conditional_losses) def _extract_outputs_from_fn(layer, call_and_return_conditional_losses): """Returns a function that returns only call function outputs.""" if isinstance(layer, keras_load.RevivedLayer): return layer.keras_api.__call__ # pylint: disable=protected-access - if layer._expects_training_arg: # pylint: disable=protected-access - def call(inputs, training=False): - return call_and_return_conditional_losses(inputs, training=training)[0] - else: - def call(inputs): - return call_and_return_conditional_losses(inputs)[0] - return call + def call(inputs, *args, **kwargs): + return call_and_return_conditional_losses(inputs, *args, **kwargs)[0] + layer_call = layer.call + return tf_decorator.make_decorator(layer_call, call) def _append_activity_regularizer_loss( - call_fn_with_losses, activity_regularizer_fn): + layer, call_fn_with_losses, activity_regularizer_fn): """Appends activity regularizer loss to losses returned by the wrapped fn.""" - def fn(*args, **kwargs): - outputs, losses = call_fn_with_losses(*args, **kwargs) + def fn(inputs, *args, **kwargs): + outputs, losses = call_fn_with_losses(inputs, *args, **kwargs) losses.append(activity_regularizer_fn(outputs)) return outputs, losses - return fn + layer_call = layer.call + return tf_decorator.make_decorator(target=layer_call, decorator_func=fn) def _wrap_unconditional_loss(loss_fn, index): diff --git a/tensorflow/python/keras/saving/saved_model/saved_model_test.py b/tensorflow/python/keras/saving/saved_model/saved_model_test.py index 7358f431df1..b90b576446d 100644 --- a/tensorflow/python/keras/saving/saved_model/saved_model_test.py +++ b/tensorflow/python/keras/saving/saved_model/saved_model_test.py @@ -30,7 +30,8 @@ from tensorflow.python.framework import test_util from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import regularizers from tensorflow.python.keras import testing_utils -from tensorflow.python.keras.saving.saved_model import load as saved_model_load +from tensorflow.python.keras.saving.saved_model import load as keras_load +from tensorflow.python.keras.saving.saved_model import save as keras_save from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops @@ -60,6 +61,13 @@ class LayerWithLearningPhase(keras.engine.base_layer.Layer): return input_shape +class LayerWithLoss(keras.layers.Layer): + + def call(self, inputs): + self.add_loss(math_ops.reduce_sum(inputs), inputs) + return inputs + + @test_util.run_all_in_graph_and_eager_modes class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): @@ -87,7 +95,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): saved_model_dir = self._save_model_dir() tf_save.save(model, saved_model_dir) - loaded = saved_model_load.load(saved_model_dir) + loaded = keras_load.load(saved_model_dir) self.evaluate(variables.variables_initializer(loaded.variables)) self.assertAllClose(self.evaluate(model.weights), self.evaluate(loaded.weights)) @@ -123,7 +131,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): saved_model_dir = self._save_model_dir() self.evaluate(variables.variables_initializer(layer.variables)) tf_save.save(layer, saved_model_dir) - loaded = saved_model_load.load(saved_model_dir) + loaded = keras_load.load(saved_model_dir) self.evaluate(variables.variables_initializer(loaded.variables)) equal_attrs = ['name', '_expects_training_arg', 'trainable'] @@ -137,13 +145,6 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): def test_maintains_losses(self): """Tests that the layer losses do not change before and after export.""" - - class LayerWithLoss(keras.layers.Layer): - - def call(self, inputs): - self.add_loss(math_ops.reduce_sum(inputs), inputs) - return inputs - model = keras.models.Sequential([LayerWithLoss()]) model.compile( loss='mse', @@ -172,7 +173,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): layer.build([None, None]) saved_model_dir = self._save_model_dir() tf_save.save(layer, saved_model_dir) - loaded = saved_model_load.load(saved_model_dir) + loaded = keras_load.load(saved_model_dir) input_arr = array_ops.ones((4, 3)) # Run the layer, and use the keras backend learing phase @@ -214,7 +215,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): self.assertEqual(expected_layers, len(loaded.keras_api.layers)) input_arr = array_ops.ones((4, 3)) self.assertAllClose(self.evaluate(model(input_arr)), - self.evaluate(loaded(input_arr))) + self.evaluate(loaded(input_arr, training=False))) @keras_parameterized.run_with_all_model_types def test_compiled_model(self): @@ -232,7 +233,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): # TODO(b/134519980): Issue with model.fit if the model call function uses # a tf.function (Graph mode only). with context.eager_mode(): - loaded = saved_model_load.load(saved_model_dir) + loaded = keras_load.load(saved_model_dir) actual_predict = loaded.predict(input_arr) self.assertAllClose(expected_predict, actual_predict) @@ -261,7 +262,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): layer = LayerWithNestedSpec() saved_model_dir = self._save_model_dir() tf_save.save(layer, saved_model_dir) - loaded = saved_model_load.load(saved_model_dir) + loaded = keras_load.load(saved_model_dir) self.assertEqual(3, loaded.input_spec['a'].max_ndim) self.assertEqual({-1: 2}, loaded.input_spec['a'].axes) self.assertAllEqual([None, 2, 3], loaded.input_spec['b'].shape) @@ -274,7 +275,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): saved_model_dir = self._save_model_dir() model.save(saved_model_dir, save_format='tf') - loaded = saved_model_load.load(saved_model_dir) + loaded = keras_load.load(saved_model_dir) input_arr_1 = np.random.random((1, 3)).astype('float32') input_arr_2 = np.random.random((1, 5)).astype('float32') @@ -292,7 +293,7 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): saved_model_dir = self._save_model_dir() model.save(saved_model_dir, save_format='tf') - loaded = saved_model_load.load(saved_model_dir) + loaded = keras_load.load(saved_model_dir) self.assertLen(loaded.layers, 2) self.assertLen(loaded.losses, 2) @@ -307,5 +308,81 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): self.assertLen(loaded.layers, 2) self.assertLen(loaded.losses, 2) + +class TestLayerCallTracing(test.TestCase): + + def test_functions_have_same_trace(self): + + class Layer(keras.engine.base_layer.Layer): + + def call(self, inputs): + return inputs + + def call2(self, inputs): + return inputs * 2 + + layer = Layer() + call_collection = keras_save.LayerCallCollection(layer) + fn = call_collection.add_function(layer.call, 'call') + fn2 = call_collection.add_function(layer.call2, 'call2') + + fn(np.ones((2, 3))) + fn(np.ones((4, 5))) + + self.assertLen(fn._list_all_concrete_functions_for_serialization(), 2) + self.assertLen(fn2._list_all_concrete_functions_for_serialization(), 2) + + # Check that the shapes are correct + self.assertEqual( + {(2, 3), (4, 5)}, + set(tuple(c.structured_input_signature[0][0].shape.as_list()) + for c in fn2._list_all_concrete_functions_for_serialization())) + + def test_training_arg_replacement(self): + + def assert_num_traces(layer_cls, training_keyword): + layer = layer_cls() + call_collection = keras_save.LayerCallCollection(layer) + fn = call_collection.add_function(layer.call, 'call') + + fn(np.ones((2, 3)), training=True) + self.assertLen(fn._list_all_concrete_functions_for_serialization(), 2) + + fn(np.ones((2, 4)), training=False) + self.assertLen(fn._list_all_concrete_functions_for_serialization(), 4) + + if training_keyword: + fn(np.ones((2, 5)), True) + self.assertLen(fn._list_all_concrete_functions_for_serialization(), 6) + fn(np.ones((2, 6))) + self.assertLen(fn._list_all_concrete_functions_for_serialization(), 8) + + class LayerWithTrainingKeyword(keras.engine.base_layer.Layer): + + def call(self, inputs, training=False): + return inputs * training + + assert_num_traces(LayerWithTrainingKeyword, training_keyword=True) + + class LayerWithKwargs(keras.engine.base_layer.Layer): + + def call(self, inputs, **kwargs): + return inputs * kwargs['training'] + + assert_num_traces(LayerWithKwargs, training_keyword=False) + + @test_util.run_in_graph_and_eager_modes + def test_maintains_losses(self): + layer = LayerWithLoss() + layer(np.ones((2, 3))) + previous_losses = layer.losses[:] + + call_collection = keras_save.LayerCallCollection(layer) + fn = call_collection.add_function(layer.call, 'call') + fn(np.ones((2, 3))) + + self.assertAllEqual(previous_losses, layer.losses) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/saving/saved_model/utils.py b/tensorflow/python/keras/saving/saved_model/utils.py index 960b3709273..1f44174deec 100644 --- a/tensorflow/python/keras/saving/saved_model/utils.py +++ b/tensorflow/python/keras/saving/saved_model/utils.py @@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.python.keras import backend as K from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.util import tf_inspect def use_wrapped_call(layer, call_fn): @@ -33,19 +34,72 @@ def use_wrapped_call(layer, call_fn): function that calls call_fn and returns the outputs. Losses returned by call_fn are added to the layer losses. """ - # TODO(kathywu): Support mask argument and multi-input call functions. - def wrapped_call(inputs, **kwargs): + training_arg_index = get_training_arg_index(layer) + + def wrapped_call(inputs, *args, **kwargs): """Returns the outputs from the call_fn, and adds the losses.""" if layer._expects_training_arg: # pylint: disable=protected-access - training = kwargs.pop('training', None) + training = get_training_arg(training_arg_index, args, kwargs) if training is None: training = K.learning_phase() + + args = list(args) + kwargs = kwargs.copy() + + def replace_training_and_call(training): + new_args, new_kwargs = set_training_arg(training, training_arg_index, + args, kwargs) + return call_fn(inputs, *new_args, **new_kwargs) + outputs, losses = tf_utils.smart_cond( training, - lambda: call_fn(inputs, training=True), - lambda: call_fn(inputs, training=False)) + lambda: replace_training_and_call(True), + lambda: replace_training_and_call(False)) else: outputs, losses = call_fn(inputs) layer.add_loss(losses, inputs) return outputs return wrapped_call + + +def get_training_arg_index(layer): + """Returns the index of 'training' in the layer call function arguments. + + Args: + layer: Keras layer + + Returns: + - n: index of 'training' in the call function arguments. + - -1: if 'training' is not found in the arguments, but layer.call accepts + variable keyword arguments + - None: if layer doesn't expect a training argument. + """ + if not layer._expects_training_arg: # pylint: disable=protected-access + return None + + arg_list = tf_inspect.getfullargspec(layer.call).args + if tf_inspect.ismethod(layer.call): + arg_list = arg_list[1:] + if 'training' in arg_list: + return arg_list.index('training') + else: + return -1 + + +def set_training_arg(training, index, args, kwargs): + if index is None: + pass + elif index >= 0 and len(args) > index: + args[index] = training + else: + kwargs['training'] = training + return args, kwargs + + +def get_training_arg(index, args, kwargs): + if index is None: + return None + elif index >= 0 and len(args) > index: + return args[index] + else: + return kwargs.get('training', None) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index f26fbb56ddd..a7281fa5c1a 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -235,6 +235,7 @@ cuda_py_test( shard_count = 5, tags = [ "no_gpu", # TODO(b/131773093): Re-enable. + "no_rocm", # TODO(rocm): feature not supported on ROCm platform "nomsan", # TODO(b/131773093): Re-enable. ], xla_enable_strict_auto_jit = True, @@ -3505,6 +3506,9 @@ cuda_py_test( "//tensorflow/python:math_ops", ], shard_count = 20, + tags = [ + "no_rocm", # TODO(rocm): feature not supported on ROCm platform + ], xla_enable_strict_auto_jit = True, ) diff --git a/tensorflow/python/kernel_tests/batch_matmul_op_test.py b/tensorflow/python/kernel_tests/batch_matmul_op_test.py index 4cc427970b0..b4db7003bd0 100644 --- a/tensorflow/python/kernel_tests/batch_matmul_op_test.py +++ b/tensorflow/python/kernel_tests/batch_matmul_op_test.py @@ -265,9 +265,11 @@ class BatchMatMulBenchmark(test.Benchmark): if __name__ == "__main__": - for dtype_ in [ - np.float16, np.float32, np.float64, np.complex64, np.complex128, np.int32 - ]: + dtypes_to_test = [np.float16, np.float32, np.float64, np.int32] + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + dtypes_to_test += [np.complex64, np.complex128] + for dtype_ in dtypes_to_test: for adjoint_a_ in False, True: for adjoint_b_ in False, True: name = "%s_%s_%s" % (dtype_.__name__, adjoint_a_, adjoint_b_) diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index e136d091393..deb465a0474 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_ops import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test +from tensorflow.python.framework import test_util def GetTestConfigs(): @@ -220,7 +221,7 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testConv3D1x1x1Filter2x1x1Dilation(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): self._VerifyDilatedConvValues( tensor_in_sizes=[1, 3, 6, 1, 1], filter_in_sizes=[1, 1, 1, 1, 1], @@ -245,7 +246,7 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testConv3D2x2x2Filter1x2x1Dilation(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): self._VerifyDilatedConvValues( tensor_in_sizes=[1, 4, 6, 3, 1], filter_in_sizes=[2, 2, 2, 1, 1], diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_adjoint_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_adjoint_test.py index d305277b5f4..1687054398b 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_adjoint_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_adjoint_test.py @@ -138,6 +138,8 @@ class LinearOperatorAdjointTest( full_matrix2, adjoint=True, adjoint_arg=True).to_dense())) def test_matmul_adjoint_complex_operator(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") matrix1 = np.random.randn(4, 4) + 1j * np.random.randn(4, 4) matrix2 = np.random.randn(4, 4) + 1j * np.random.randn(4, 4) full_matrix1 = linalg.LinearOperatorFullMatrix(matrix1) @@ -188,6 +190,8 @@ class LinearOperatorAdjointTest( full_matrix2, adjoint=True, adjoint_arg=True).to_dense())) def test_solve_adjoint_complex_operator(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") matrix1 = self.evaluate(linear_operator_test_util.random_tril_matrix( [4, 4], dtype=dtypes.complex128, force_well_conditioned=True) + 1j * linear_operator_test_util.random_tril_matrix( diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py index 348574116f0..5c89607c1da 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py @@ -239,6 +239,7 @@ class LinearOperatorLowRankUpdatetestNoDiagCannotUseCholesky( self._rtol[dtypes.float32] = 1e-4 self._atol[dtypes.float64] = 1e-9 self._rtol[dtypes.float64] = 1e-9 + self._atol[dtypes.complex64] = 1e-5 self._rtol[dtypes.complex64] = 2e-4 diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py index 1494329f806..64ecd491b3c 100644 --- a/tensorflow/python/kernel_tests/linalg_grad_test.py +++ b/tensorflow/python/kernel_tests/linalg_grad_test.py @@ -161,6 +161,13 @@ if __name__ == '__main__': for lower in True, False: name = '%s_low_%s' % (name, lower) + if (name == 'float32_10_10_adj_False_low_True') and \ + test_lib.is_built_with_rocm(): + # Skip this one particular subtest on the ROCm platform + # It will fail because of 1 element in 10,000 mismatch, + # and the mismatch is minor (tolerance is 0.20, mismtach is 0,22) + # TODO(rocm) : investigate cause of mistmach and fix + continue _AddTest(MatrixBinaryFunctorGradientTest, 'MatrixTriangularSolveGradient', name, _GetMatrixBinaryFunctorGradientTest( diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index 87eabf9e015..0db7d27e7e7 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -1998,17 +1998,6 @@ class IndexTableFromTensor(test.TestCase): ids = table.lookup(constant_op.constant(("salad", "surgery", "tarkus"))) self.assertAllEqual((1, 2, 3), self.evaluate(ids)) - def test_index_table_from_tensor_with_tensor_init_in_function(self): - @function.defun() - def lookup_fn(): - vocabulary_list = constant_op.constant(["brain", "salad", "surgery"]) - table = lookup_ops.index_table_from_tensor( - vocabulary_list=vocabulary_list, num_oov_buckets=1) - return table.lookup(constant_op.constant(("salad", "surgery", "tarkus"))) - - ids = lookup_fn() - self.assertAllEqual((1, 2, 3), self.evaluate(ids)) - def test_int32_index_table_from_tensor_with_tensor_init(self): with self.cached_session(): table = lookup_ops.index_table_from_tensor( diff --git a/tensorflow/python/kernel_tests/lu_op_test.py b/tensorflow/python/kernel_tests/lu_op_test.py index 1c0280c3ce6..875a3768602 100644 --- a/tensorflow/python/kernel_tests/lu_op_test.py +++ b/tensorflow/python/kernel_tests/lu_op_test.py @@ -130,12 +130,14 @@ class LuOpTest(test.TestCase): for output_idx_type in (dtypes.int32, dtypes.int64): self._verifyLu(data.astype(dtype), output_idx_type=output_idx_type) - for dtype in (np.complex64, np.complex128): - for output_idx_type in (dtypes.int32, dtypes.int64): - complex_data = np.tril(1j * data, -1).astype(dtype) - complex_data += np.triu(-1j * data, 1).astype(dtype) - complex_data += data - self._verifyLu(complex_data, output_idx_type=output_idx_type) + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + for dtype in (np.complex64, np.complex128): + for output_idx_type in (dtypes.int32, dtypes.int64): + complex_data = np.tril(1j * data, -1).astype(dtype) + complex_data += np.triu(-1j * data, 1).astype(dtype) + complex_data += data + self._verifyLu(complex_data, output_idx_type=output_idx_type) def testPivoting(self): # This matrix triggers partial pivoting because the first diagonal entry @@ -150,15 +152,17 @@ class LuOpTest(test.TestCase): # Make sure p_val is not the identity permutation. self.assertNotAllClose(np.arange(3), p_val) - for dtype in (np.complex64, np.complex128): - complex_data = np.tril(1j * data, -1).astype(dtype) - complex_data += np.triu(-1j * data, 1).astype(dtype) - complex_data += data - self._verifyLu(complex_data) - _, p = linalg_ops.lu(data) - p_val = self.evaluate([p]) - # Make sure p_val is not the identity permutation. - self.assertNotAllClose(np.arange(3), p_val) + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + for dtype in (np.complex64, np.complex128): + complex_data = np.tril(1j * data, -1).astype(dtype) + complex_data += np.triu(-1j * data, 1).astype(dtype) + complex_data += data + self._verifyLu(complex_data) + _, p = linalg_ops.lu(data) + p_val = self.evaluate([p]) + # Make sure p_val is not the identity permutation. + self.assertNotAllClose(np.arange(3), p_val) def testInvalidMatrix(self): # LU factorization gives an error when the input is singular. @@ -191,11 +195,13 @@ class LuOpTest(test.TestCase): matrices = np.random.rand(batch_size, 5, 5) self._verifyLu(matrices) - # Generate random complex valued matrices. - np.random.seed(52) - matrices = np.random.rand(batch_size, 5, - 5) + 1j * np.random.rand(batch_size, 5, 5) - self._verifyLu(matrices) + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + # Generate random complex valued matrices. + np.random.seed(52) + matrices = np.random.rand(batch_size, 5, + 5) + 1j * np.random.rand(batch_size, 5, 5) + self._verifyLu(matrices) def testLargeMatrix(self): # Generate random matrices. @@ -204,10 +210,12 @@ class LuOpTest(test.TestCase): data = np.random.rand(n, n) self._verifyLu(data) - # Generate random complex valued matrices. - np.random.seed(129) - data = np.random.rand(n, n) + 1j * np.random.rand(n, n) - self._verifyLu(data) + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + # Generate random complex valued matrices. + np.random.seed(129) + data = np.random.rand(n, n) + 1j * np.random.rand(n, n) + self._verifyLu(data) @test_util.run_v1_only("b/120545219") def testEmpty(self): diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py index a3dd7dbf2af..f123492ff15 100644 --- a/tensorflow/python/kernel_tests/matmul_op_test.py +++ b/tensorflow/python/kernel_tests/matmul_op_test.py @@ -225,10 +225,13 @@ class MatMulInfixOperatorTest(test_lib.TestCase): if __name__ == "__main__": sizes = [1, 3, 5] trans_options = [[False, False], [True, False], [False, True]] + dtypes_to_test = [np.int32, np.int64, np.float16, np.float32, np.float64] + if not test_lib.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + dtypes_to_test += [np.complex64, np.complex128] # TF2 does not support placeholders under eager so we skip it for use_static_shape in set([True, tf2.enabled()]): - for dtype in (np.int32, np.int64, np.float16, np.float32, np.float64, - np.complex64, np.complex128): + for dtype in dtypes_to_test: if not use_static_shape and (dtype == np.int32 or dtype == np.int64): # TODO(rmlarsen): Re-enable this test when we have fixed the underlying # bug in Windows (b/35935459). diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py index 705f25b4fcd..520e4d3eb8d 100644 --- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py @@ -91,6 +91,8 @@ class ExponentialOpTest(test.TestCase): @test_util.run_deprecated_v1 def testNonsymmetricComplex(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") matrix1 = np.array([[1., 2.], [3., 4.]]) matrix2 = np.array([[1., 3.], [3., 5.]]) matrix1 = matrix1.astype(np.complex64) @@ -112,6 +114,8 @@ class ExponentialOpTest(test.TestCase): self._verifyExponentialReal(self._makeBatch(matrix1, matrix2)) def testSymmetricPositiveDefiniteComplex(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") matrix1 = np.array([[2., 1.], [1., 2.]]) matrix2 = np.array([[3., -1.], [-1., 3.]]) matrix1 = matrix1.astype(np.complex64) diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py index 60603f62112..56a242c0234 100644 --- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py @@ -74,15 +74,17 @@ class InverseOpTest(test.TestCase): self._verifyInverseReal(matrix2) # A multidimensional batch of 2x2 matrices self._verifyInverseReal(self._makeBatch(matrix1, matrix2)) - # Complex - matrix1 = matrix1.astype(np.complex64) - matrix1 += 1j * matrix1 - matrix2 = matrix2.astype(np.complex64) - matrix2 += 1j * matrix2 - self._verifyInverseComplex(matrix1) - self._verifyInverseComplex(matrix2) - # Complex batch - self._verifyInverseComplex(self._makeBatch(matrix1, matrix2)) + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + # Complex + matrix1 = matrix1.astype(np.complex64) + matrix1 += 1j * matrix1 + matrix2 = matrix2.astype(np.complex64) + matrix2 += 1j * matrix2 + self._verifyInverseComplex(matrix1) + self._verifyInverseComplex(matrix2) + # Complex batch + self._verifyInverseComplex(self._makeBatch(matrix1, matrix2)) def testSymmetricPositiveDefinite(self): # 2x2 matrices @@ -92,15 +94,17 @@ class InverseOpTest(test.TestCase): self._verifyInverseReal(matrix2) # A multidimensional batch of 2x2 matrices self._verifyInverseReal(self._makeBatch(matrix1, matrix2)) - # Complex - matrix1 = matrix1.astype(np.complex64) - matrix1 += 1j * matrix1 - matrix2 = matrix2.astype(np.complex64) - matrix2 += 1j * matrix2 - self._verifyInverseComplex(matrix1) - self._verifyInverseComplex(matrix2) - # Complex batch - self._verifyInverseComplex(self._makeBatch(matrix1, matrix2)) + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + # Complex + matrix1 = matrix1.astype(np.complex64) + matrix1 += 1j * matrix1 + matrix2 = matrix2.astype(np.complex64) + matrix2 += 1j * matrix2 + self._verifyInverseComplex(matrix1) + self._verifyInverseComplex(matrix2) + # Complex batch + self._verifyInverseComplex(self._makeBatch(matrix1, matrix2)) @test_util.deprecated_graph_mode_only def testNonSquareMatrix(self): diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py index 82f249a6444..ee6e3bb464f 100644 --- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py @@ -60,6 +60,8 @@ class LogarithmOpTest(test.TestCase): @test_util.run_v1_only("b/120545219") def testNonsymmetric(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") # 2x2 matrices matrix1 = np.array([[1., 2.], [3., 4.]]) matrix2 = np.array([[1., 3.], [3., 5.]]) @@ -74,6 +76,8 @@ class LogarithmOpTest(test.TestCase): @test_util.run_v1_only("b/120545219") def testSymmetricPositiveDefinite(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") # 2x2 matrices matrix1 = np.array([[2., 1.], [1., 2.]]) matrix2 = np.array([[3., -1.], [-1., 3.]]) @@ -108,6 +112,8 @@ class LogarithmOpTest(test.TestCase): @test_util.run_v1_only("b/120545219") def testRandomSmallAndLargeComplex64(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") np.random.seed(42) for batch_dims in [(), (1,), (3,), (2, 2)]: for size in 8, 31, 32: @@ -119,6 +125,8 @@ class LogarithmOpTest(test.TestCase): @test_util.run_v1_only("b/120545219") def testRandomSmallAndLargeComplex128(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") np.random.seed(42) for batch_dims in [(), (1,), (3,), (2, 2)]: for size in 8, 31, 32: diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py index 463477a6a2c..b99c8f6d256 100644 --- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py @@ -353,7 +353,11 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark): if __name__ == "__main__": - for dtype_ in [np.float32, np.float64, np.complex64, np.complex128]: + dtypes_to_test = [np.float32, np.float64] + if not test_lib.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + dtypes_to_test += [np.complex64, np.complex128] + for dtype_ in dtypes_to_test: # TF2 does not support placeholders under eager so we skip it for use_placeholder_ in set([False, not tf2.enabled()]): for fast_ in [True, False]: @@ -368,7 +372,7 @@ if __name__ == "__main__": l2_regularizer_) _AddTest(MatrixSolveLsOpTest, "MatrixSolveLsOpTest", name, test_case) - for dtype_ in [np.float32, np.float64, np.complex64, np.complex128]: + for dtype_ in dtypes_to_test: for test_case in _GetLargeMatrixSolveLsOpTests(dtype_, False, True, 0.0): name = "%s_%s" % (test_case.__name__, dtype_.__name__) _AddTest(MatrixSolveLsOpTest, "MatrixSolveLsOpTest", name, test_case) diff --git a/tensorflow/python/kernel_tests/matrix_square_root_op_test.py b/tensorflow/python/kernel_tests/matrix_square_root_op_test.py index 51a90e8f337..2a761140b0a 100644 --- a/tensorflow/python/kernel_tests/matrix_square_root_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_square_root_op_test.py @@ -59,14 +59,16 @@ class SquareRootOpTest(test.TestCase): self._verifySquareRootReal(matrix1) self._verifySquareRootReal(matrix2) self._verifySquareRootReal(self._makeBatch(matrix1, matrix2)) - # Complex - matrix1 = matrix1.astype(np.complex64) - matrix2 = matrix2.astype(np.complex64) - matrix1 += 1j * matrix1 - matrix2 += 1j * matrix2 - self._verifySquareRootComplex(matrix1) - self._verifySquareRootComplex(matrix2) - self._verifySquareRootComplex(self._makeBatch(matrix1, matrix2)) + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + # Complex + matrix1 = matrix1.astype(np.complex64) + matrix2 = matrix2.astype(np.complex64) + matrix1 += 1j * matrix1 + matrix2 += 1j * matrix2 + self._verifySquareRootComplex(matrix1) + self._verifySquareRootComplex(matrix2) + self._verifySquareRootComplex(self._makeBatch(matrix1, matrix2)) def testSymmetricPositiveDefinite(self): matrix1 = np.array([[2., 1.], [1., 2.]]) diff --git a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py index 2d0427cad94..a8eda0f4fe8 100644 --- a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py @@ -110,6 +110,8 @@ class MatrixTriangularSolveOpTest(test.TestCase): @test_util.run_deprecated_v1 def testSolveComplex(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") # 1x1 matrix, single rhs. matrix = np.array([[0.1 + 1j * 0.1]]) rhs0 = np.array([[1. + 1j]]) @@ -136,6 +138,8 @@ class MatrixTriangularSolveOpTest(test.TestCase): @test_util.run_deprecated_v1 def testSolveBatchComplex(self): + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support BLAS operations for complex types") matrix = np.array([[1., 2.], [3., 4.]]).astype(np.complex64) matrix += 1j * matrix rhs = np.array([[1., 0., 1.], [0., 1., 1.]]).astype(np.complex64) diff --git a/tensorflow/python/kernel_tests/pool_test.py b/tensorflow/python/kernel_tests/pool_test.py index 78e786f01ca..0f0eaa25402 100644 --- a/tensorflow/python/kernel_tests/pool_test.py +++ b/tensorflow/python/kernel_tests/pool_test.py @@ -219,6 +219,8 @@ class PoolingTest(test.TestCase): strides=strides) def testPool3D(self): + if test.is_built_with_rocm(): + self.skipTest("Pooling with 3D tensors is not supported in ROCm") with self.session(use_gpu=test.is_gpu_available()): for padding in ["SAME", "VALID"]: for pooling_type in ["MAX", "AVG"]: @@ -274,6 +276,9 @@ class PoolingTest(test.TestCase): strides=[1, 2], dilation_rate=[1, 1], data_format="NCHW") + if test.is_built_with_rocm(): + # Pooling with 3D tensors is not supported in ROCm + continue self._test( input_shape=[2, 2, 7, 5, 3], window_shape=[2, 2, 2], @@ -358,6 +363,8 @@ class PoolingTest(test.TestCase): @test_util.run_deprecated_v1 def testGradient3D(self): + if test.is_built_with_rocm(): + self.skipTest("Pooling with 3D tensors is not supported in ROCm") with self.session(use_gpu=test.is_gpu_available()): for padding in ["SAME", "VALID"]: for pooling_type in ["AVG", "MAX"]: diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 68b23a20bcf..b51e6571042 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -1014,7 +1014,7 @@ class PoolingTest(test.TestCase): output_sizes, x_init_value=x_init_value, delta=1e-2) - tf_logging.info("%s gradient error = " % func_name, err) + tf_logging.info("%s gradient error = %.4f" % (func_name, err)) self.assertLess(err, err_tolerance) def _ConstructAndTestSecondGradient(self, @@ -1091,7 +1091,7 @@ class PoolingTest(test.TestCase): input_sizes, x_init_value=x_init_value, delta=1e-2) - tf_logging.info("%s second-order gradient error = " % func_name, err) + tf_logging.info("%s second-order gradient error = %.4f" % (func_name, err)) self.assertLess(err, err_tolerance) def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu): @@ -1439,6 +1439,13 @@ class PoolingTest(test.TestCase): if not test.is_gpu_available(): return + # The functionality associated with TF_ENABLE_NANPROP is currently + # not supported on the ROCm platform, so skip this part of the test + # NANs in input lead to non-deterministic results, and hence skipping + # the remaining tests altogeher on the ROCm platform + if test.is_built_with_rocm(): + return + # Test the GPU implementation that uses cudnn for now. saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") # Do not propagate the diff in cases of NaNs @@ -1519,6 +1526,13 @@ class PoolingTest(test.TestCase): if not test.is_gpu_available(): return + # The functionality associated with TF_ENABLE_NANPROP is currently + # not supported on the ROCm platform, so skip this part of the test + # NANs in input lead to non-deterministic results, and hence skipping + # the remaining tests altogeher on the ROCm platform + if test.is_built_with_rocm(): + return + # Test the GPU implementation that uses cudnn for now. saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") # Do not propagate the diff in cases of NaNs diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 737024d871f..eed8bd7d258 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -80,6 +80,8 @@ class ReluTest(test.TestCase): def testReluInt8x4GoodShape(self): if not test.is_gpu_available(cuda_only=True): self.skipTest("No GPU available") + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support int8x4 type") inputs = np.array([[-50, 7, 23, 0], [-1, -5, 6, 11]]) np_relu = self._npRelu(inputs) tf_relu = nn_ops.relu(constant_op.constant(inputs, dtypes.qint8)) @@ -90,6 +92,8 @@ class ReluTest(test.TestCase): def testReluInt8x4BadShape(self): if not test.is_gpu_available(cuda_only=True): self.skipTest("No GPU available") + if test.is_built_with_rocm(): + self.skipTest("ROCm does not support int8x4 type") inputs = constant_op.constant( np.array([[-50, 7, 23], [0, 1, -5], [6, -2, 11]]), dtypes.qint8) with self.assertRaisesRegexp( diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py index 47b22ec2967..a42d7922bfb 100644 --- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py +++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py @@ -240,9 +240,12 @@ def _GetSelfAdjointEigGradTest(dtype_, shape_, compute_v_): if __name__ == "__main__": + dtypes_to_test = [dtypes_lib.float32, dtypes_lib.float64] + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + dtypes_to_test += [dtypes_lib.complex64, dtypes_lib.complex128] for compute_v in True, False: - for dtype in (dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64, - dtypes_lib.complex128): + for dtype in dtypes_to_test: for size in 1, 2, 5, 10: for batch_dims in [(), (3,)] + [(3, 2)] * (max(size, size) < 10): shape = batch_dims + (size, size) diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py index 36f3e95dd7e..781ddb7e6c1 100644 --- a/tensorflow/python/kernel_tests/svd_op_test.py +++ b/tensorflow/python/kernel_tests/svd_op_test.py @@ -365,9 +365,13 @@ class SVDBenchmark(test.Benchmark): if __name__ == "__main__": + dtypes_to_test = [np.float32, np.float64] + if not test.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + dtypes_to_test += [np.complex64, np.complex128] for compute_uv in False, True: for full_matrices in False, True: - for dtype in np.float32, np.float64, np.complex64, np.complex128: + for dtype in dtypes_to_test: for rows in 1, 2, 5, 10, 32, 100: for cols in 1, 2, 5, 10, 32, 100: for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10): @@ -382,8 +386,8 @@ if __name__ == "__main__": compute_uv, full_matrices)) for compute_uv in False, True: for full_matrices in False, True: - dtypes = ([np.float32, np.float64] - + [np.complex64, np.complex128] * (not compute_uv)) + dtypes = ([np.float32, np.float64] + [np.complex64, np.complex128] * + (not compute_uv) * (not test.is_built_with_rocm())) for dtype in dtypes: mat_shapes = [(10, 11), (11, 10), (11, 11), (2, 2, 2, 3)] if not full_matrices or not compute_uv: diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py index febfe23b16d..635a76323f6 100644 --- a/tensorflow/python/kernel_tests/tensordot_op_test.py +++ b/tensorflow/python/kernel_tests/tensordot_op_test.py @@ -221,7 +221,11 @@ def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_): if __name__ == "__main__": - for dtype in np.float16, np.float32, np.float64, np.complex64, np.complex128: + dtypes_to_test = [np.float16, np.float32, np.float64] + if not test_lib.is_built_with_rocm(): + # ROCm does not support BLAS operations for complex types + dtypes_to_test += [np.complex64, np.complex128] + for dtype in dtypes_to_test: for rank_a in 1, 2, 4, 5: for rank_b in 1, 2, 4, 5: for num_dims in range(0, min(rank_a, rank_b) + 1): diff --git a/tensorflow/python/kernel_tests/topk_op_test.py b/tensorflow/python/kernel_tests/topk_op_test.py index 32ac9a41569..7872e62050a 100644 --- a/tensorflow/python/kernel_tests/topk_op_test.py +++ b/tensorflow/python/kernel_tests/topk_op_test.py @@ -108,6 +108,10 @@ class TopKTest(test.TestCase): values = -np.sort(-inputs)[:k] self._validateTopK(inputs, k, values, indices) + def testTop1AllNan(self): + inputs = [[np.NaN, np.NaN], [np.NaN, np.NaN]] + self._validateTopK(inputs, 1, [[np.NaN], [np.NaN]], [[0], [0]]) + def _testLargeSort(self, dtype): b = 10 n = 5000 diff --git a/tensorflow/python/kernel_tests/zero_division_test.py b/tensorflow/python/kernel_tests/zero_division_test.py index 1220be45733..0f791b9012c 100644 --- a/tensorflow/python/kernel_tests/zero_division_test.py +++ b/tensorflow/python/kernel_tests/zero_division_test.py @@ -54,7 +54,11 @@ class ZeroDivisionTest(test.TestCase): # # XLA constant folds integer division by zero to 1. self.assertTrue(test.is_gpu_available()) - self.assertIn(result, (-1, 1, 0xff, 0xffffffff)) + if not test.is_built_with_rocm(): + # division by zero yields a different pattern on AMD GPUs + # TODO(rocm) : investigate whether the resulting bit pattern on + # AMD GPUs is deterministic + self.assertIn(result, (-1, 1, 0xff, 0xffffffff)) if __name__ == '__main__': diff --git a/tensorflow/python/lib/io/tf_record_multiprocessing_test.py b/tensorflow/python/lib/io/tf_record_multiprocessing_test.py new file mode 100644 index 00000000000..eaecffe9ff4 --- /dev/null +++ b/tensorflow/python/lib/io/tf_record_multiprocessing_test.py @@ -0,0 +1,64 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Multiprocessing tests for TFRecordWriter and tf_record_iterator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import multiprocessing +import os + +from tensorflow.python.lib.io import tf_record +from tensorflow.python.platform import test +from tensorflow.python.util import compat + +TFRecordCompressionType = tf_record.TFRecordCompressionType + + +def ChildProcess(writer, rs): + for r in rs: + writer.write(r) + writer.flush() + + +class TFRecordWriterCloseAndFlushTests(test.TestCase): + """TFRecordWriter close and flush tests.""" + + # pylint: disable=arguments-differ + def setUp(self, compression_type=TFRecordCompressionType.NONE): + super(TFRecordWriterCloseAndFlushTests, self).setUp() + self._fn = os.path.join(self.get_temp_dir(), "tf_record_writer_test.txt") + self._options = tf_record.TFRecordOptions(compression_type) + self._writer = tf_record.TFRecordWriter(self._fn, self._options) + self._num_records = 20 + + def _Record(self, r): + return compat.as_bytes("Record %d" % r) + + def testFlush(self): + """test Flush.""" + records = [self._Record(i) for i in range(self._num_records)] + + write_process = multiprocessing.Process( + target=ChildProcess, args=(self._writer, records)) + write_process.start() + write_process.join() + actual = list(tf_record.tf_record_iterator(self._fn, self._options)) + self.assertListEqual(actual, records) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/lib/io/tf_record_test.py b/tensorflow/python/lib/io/tf_record_test.py index f620c0ec39e..280c2b10918 100644 --- a/tensorflow/python/lib/io/tf_record_test.py +++ b/tensorflow/python/lib/io/tf_record_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import gzip -import multiprocessing import os import random import string @@ -34,9 +33,7 @@ from tensorflow.python.util import compat prefix_path = "third_party/tensorflow/core/lib" -# pylint: disable=invalid-name TFRecordCompressionType = tf_record.TFRecordCompressionType -# pylint: enable=invalid-name # Edgar Allan Poe's 'Eldorado' _TEXT = b"""Gaily bedight, @@ -174,7 +171,6 @@ class TFRecordWriterTest(TFCompressionTestCase): # Negative number => better compression. return os.path.getsize(fn_a) - os.path.getsize(fn_b) -# pylint: disable=invalid-name def testWriteReadZLibFiles(self): """test Write Read ZLib Files""" @@ -198,7 +194,6 @@ class TFRecordWriterTest(TFCompressionTestCase): for i, fn in enumerate(compressed_files) ] self._AssertFilesEqual(uncompressed_files, files, True) -# pylint: disable=invalid-name def testWriteReadGzipFiles(self): """test Write Read Gzip Files""" @@ -224,7 +219,6 @@ class TFRecordWriterTest(TFCompressionTestCase): for i, fn in enumerate(compressed_files) ] self._AssertFilesEqual(uncompressed_files, files, True) -# pylint: disable=invalid-name def testNoCompressionType(self): """test No Compression Type""" @@ -243,7 +237,6 @@ class TFRecordWriterTest(TFCompressionTestCase): with self.assertRaises(ValueError): tf_record.TFRecordOptions("BZ2") -# pylint: disable=invalid-name def testZlibCompressionType(self): """test Zlib Compression Type""" @@ -263,7 +256,6 @@ class TFRecordWriterTest(TFCompressionTestCase): "ZLIB", tf_record.TFRecordOptions.get_compression_type_string( tf_record.TFRecordOptions(tf_record.TFRecordOptions(zlib_t)))) -# pylint: disable=invalid-name def testCompressionOptions(self): """Create record with mix of random and repeated data to test compression on.""" @@ -304,7 +296,6 @@ class TFRecordWriterTest(TFCompressionTestCase): class TFRecordWriterZlibTest(TFCompressionTestCase): """TFRecordWriter Zlib test""" - # pylint: disable=invalid-name def testZLibFlushRecord(self): """test ZLib Flush Record""" original = [b"small record"] @@ -333,7 +324,6 @@ class TFRecordWriterZlibTest(TFCompressionTestCase): options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) actual = list(tf_record.tf_record_iterator(fn, options=options)) self.assertEqual(actual, original) -# pylint: disable=invalid-name def testZlibReadWrite(self): """Verify that files produced are zlib compatible.""" @@ -345,7 +335,6 @@ class TFRecordWriterZlibTest(TFCompressionTestCase): options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) actual = list(tf_record.tf_record_iterator(zfn, options=options)) self.assertEqual(actual, original) -# pylint: disable=invalid-name def testZlibReadWriteLarge(self): """Verify that writing large contents also works.""" @@ -358,7 +347,6 @@ class TFRecordWriterZlibTest(TFCompressionTestCase): options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) actual = list(tf_record.tf_record_iterator(zfn, options=options)) self.assertEqual(actual, original) -# pylint: disable=invalid-name def testGzipReadWrite(self): """Verify that files produced are gzip compatible.""" @@ -377,7 +365,6 @@ class TFRecordIteratorTest(TFCompressionTestCase): def setUp(self): super(TFRecordIteratorTest, self).setUp() self._num_records = 7 -# pylint: disable=invalid-name def testIterator(self): """test Iterator""" @@ -391,7 +378,6 @@ class TFRecordIteratorTest(TFCompressionTestCase): self.assertAllEqual(expected, record) with self.assertRaises(StopIteration): record = next(reader) -# pylint: disable=invalid-name def testWriteZlibRead(self): """Verify compression with TFRecordWriter is zlib library compatible.""" @@ -403,7 +389,6 @@ class TFRecordIteratorTest(TFCompressionTestCase): zfn = self._ZlibDecompressFile(fn, "write_zlib_read.tfrecord") actual = list(tf_record.tf_record_iterator(zfn)) self.assertEqual(actual, original) -# pylint: disable=invalid-name def testWriteZlibReadLarge(self): """Verify compression for large records is zlib library compatible.""" @@ -415,7 +400,6 @@ class TFRecordIteratorTest(TFCompressionTestCase): zfn = self._ZlibDecompressFile(fn, "write_zlib_read_large.tfrecord") actual = list(tf_record.tf_record_iterator(zfn)) self.assertEqual(actual, original) -# pylint: disable=invalid-name def testWriteGzipRead(self): original = [b"foo", b"bar"] @@ -426,7 +410,6 @@ class TFRecordIteratorTest(TFCompressionTestCase): gzfn = self._GzipDecompressFile(fn, "write_gzip_read.tfrecord") actual = list(tf_record.tf_record_iterator(gzfn)) self.assertEqual(actual, original) -# pylint: disable=invalid-name def testBadFile(self): """Verify that tf_record_iterator throws an exception on bad TFRecords.""" @@ -457,7 +440,6 @@ class TFRecordWriterCloseAndFlushTests(test.TestCase): def _Record(self, r): return compat.as_bytes("Record %d" % r) -# pylint: disable=invalid-name def testWriteAndLeaveOpen(self): records = list(map(self._Record, range(self._num_records))) @@ -465,7 +447,6 @@ class TFRecordWriterCloseAndFlushTests(test.TestCase): self._writer.write(record) # Verify no segfault if writer isn't explicitly closed. -# pylint: disable=invalid-name def testWriteAndRead(self): records = list(map(self._Record, range(self._num_records))) @@ -475,30 +456,11 @@ class TFRecordWriterCloseAndFlushTests(test.TestCase): actual = list(tf_record.tf_record_iterator(self._fn, self._options)) self.assertListEqual(actual, records) -# pylint: disable=invalid-name - - def testFlush(self): - """test Flush""" - records = list(map(self._Record, range(self._num_records))) - - def childProcess(writer, rs): - for r in rs: - writer.write(r) - writer.flush() - - write_process = multiprocessing.Process( - target=childProcess, args=(self._writer, records)) - write_process.start() - write_process.join() - actual = list(tf_record.tf_record_iterator(self._fn, self._options)) - self.assertListEqual(actual, records) -# pylint: disable=invalid-name def testDoubleClose(self): self._writer.write(self._Record(0)) self._writer.close() self._writer.close() -# pylint: disable=invalid-name def testFlushAfterCloseIsError(self): self._writer.write(self._Record(0)) @@ -506,7 +468,6 @@ class TFRecordWriterCloseAndFlushTests(test.TestCase): with self.assertRaises(errors_impl.FailedPreconditionError): self._writer.flush() -# pylint: disable=invalid-name def testWriteAfterCloseIsError(self): self._writer.write(self._Record(0)) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 629ca6e9abf..0a65d19c014 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -3588,7 +3588,7 @@ def where(condition, x=None, y=None, name=None): If both non-None, `x` and `y` must have the same shape. The `condition` tensor must be a scalar if `x` and `y` are scalar. - If `x` and `y` are vectors of higher rank, then `condition` must be either a + If `x` and `y` are tensors of higher rank, then `condition` must be either a vector with size matching the first dimension of `x`, or must have the same shape as `x`. @@ -3807,7 +3807,7 @@ def gather(params, A `Tensor`. Has the same type as `params`. """ del validate_indices - if compat.forward_compatible(2019, 7, 10): + if compat.forward_compatible(2019, 8, 10): if axis is None: axis = batch_dims if axis != 0: diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py index c08548712fc..4d07d60d8ee 100644 --- a/tensorflow/python/ops/control_flow_ops_test.py +++ b/tensorflow/python/ops/control_flow_ops_test.py @@ -51,6 +51,7 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables import tensorflow.python.ops.tensor_array_grad # pylint: disable=unused-import from tensorflow.python.platform import googletest +from tensorflow.python.platform import test from tensorflow.python.training import momentum from tensorflow.python.util import nest @@ -1081,6 +1082,10 @@ class IndexedCaseTest(test_util.TensorFlowTestCase, parameterized.TestCase): @test_util.disable_xla("Wants RunMetadata") def testParallelExecution(self): """Verify disjoint branches across while iterations are run in parallel.""" + if test.is_built_with_rocm(): + self.skipTest( + "Disable subtest on ROCm due to missing Cholesky op support") + with ops.Graph().as_default() as g: nbranches = 7 matrices = array_ops.unstack( # Ensure all are ready before while. diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index e2bbdc7f788..12b4feb68e5 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -74,7 +74,7 @@ def copy_handle_data(source_t, target_t): shapes, types = zip(*[(pair.shape, pair.dtype) for pair in handle_data.shape_and_type]) ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes] - shapes = [[d.size for d in s.dim] + shapes = [[d.size for d in s.dim] # pylint: disable=g-complex-comprehension if not s.unknown_rank else None for s in shapes] pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper( target_t._op._graph._c_graph, # pylint: disable=protected-access @@ -394,3 +394,56 @@ def recompute_grad(f): return result, grad return inner + + +@tf_export("grad_pass_through") +def grad_pass_through(f): + """Creates a grad-pass-through op with the forward behavior provided in f. + + Use this function to wrap any op, maintaining its behavior in the forward + pass, but replacing the original op in the backward graph with an identity. + For example: + + ```python + x = tf.Variable(1.0, name="x") + z = tf.Variable(3.0, name="z") + + with tf.GradientTape() as tape: + # y will evaluate to 9.0 + y = tf.grad_pass_through(x.assign)(z**2) + # grads will evaluate to 6.0 + grads = tape.gradient(y, z) + ``` + + Another example is a 'differentiable' moving average approximation, where + gradients are allowed to flow into the last value fed to the moving average, + but the moving average is still used for the forward pass: + + ```python + x = ... # Some scalar value + # A moving average object, we don't need to know how this is implemented + moving_average = MovingAverage() + with backprop.GradientTape() as tape: + # mavg_x will evaluate to the current running average value + mavg_x = tf.grad_pass_through(moving_average)(x) + grads = tape.gradient(mavg_x, x) # grads will evaluate to 1.0 + ``` + + Args: + f: function `f(*x)` that returns a `Tensor` or nested structure of `Tensor` + outputs. + + Returns: + A function `h(x)` which returns the same values as `f(x)` and whose + gradients are the same as those of an identity function. + """ + @custom_gradient + def _grad_pass_through_op(*args, **kwargs): + def grad(*args, **kwargs): + variables = kwargs.get("variables") + if variables is not None: + # Variables involved in the wrapped op will not receive gradients. + return args, [None] * len(variables) + return args + return f(*args, **kwargs), grad + return tf_decorator.make_decorator(f, _grad_pass_through_op) diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index bf7314d918d..9c49fc85270 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -1217,7 +1217,7 @@ class ConditionalAccumulatorBase(object): if name is None: name = "%s_NumAccumulated" % self._name - if compat.forward_compatible(2019, 7, 8): + if compat.forward_compatible(2019, 8, 8): return gen_data_flow_ops.resource_accumulator_num_accumulated( self._accumulator_ref, name=name) @@ -1237,7 +1237,7 @@ class ConditionalAccumulatorBase(object): Returns: Operation that sets the accumulator's time step. """ - if compat.forward_compatible(2019, 7, 8): + if compat.forward_compatible(2019, 8, 8): return gen_data_flow_ops.resource_accumulator_set_global_step( self._accumulator_ref, math_ops.cast(ops.convert_to_tensor(new_global_step), _dtypes.int64), @@ -1276,7 +1276,7 @@ class ConditionalAccumulator(ConditionalAccumulatorBase): name: Optional name for the accumulator. reduction_type: Reduction type to use when taking the gradient. """ - if compat.forward_compatible(2019, 7, 8): + if compat.forward_compatible(2019, 8, 8): accumulator_ref = gen_data_flow_ops.resource_conditional_accumulator( dtype=dtype, shape=shape, @@ -1316,7 +1316,7 @@ class ConditionalAccumulator(ConditionalAccumulatorBase): grad.get_shape().assert_is_compatible_with(self._shape) local_step = math_ops.cast(ops.convert_to_tensor(local_step), _dtypes.int64) - if compat.forward_compatible(2019, 7, 8): + if compat.forward_compatible(2019, 8, 8): return gen_data_flow_ops.resource_accumulator_apply_gradient( self._accumulator_ref, local_step=local_step, @@ -1347,7 +1347,7 @@ class ConditionalAccumulator(ConditionalAccumulatorBase): Raises: InvalidArgumentError: If num_required < 1 """ - if compat.forward_compatible(2019, 7, 8): + if compat.forward_compatible(2019, 8, 8): out = gen_data_flow_ops.resource_accumulator_take_gradient( self._accumulator_ref, num_required, dtype=self._dtype, name=name) else: diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py index 1d1a666317f..c1ec6ed6c69 100644 --- a/tensorflow/python/ops/distributions/beta.py +++ b/tensorflow/python/ops/distributions/beta.py @@ -312,7 +312,7 @@ class Beta(distribution.Distribution): name="nan") is_defined = math_ops.logical_and(self.concentration1 > 1., self.concentration0 > 1.) - return array_ops.where(is_defined, mode, nan) + return array_ops.where_v2(is_defined, mode, nan) return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones([], dtype=self.dtype), diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 971ce46efbc..74271775c12 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -293,9 +293,8 @@ class Dirichlet(distribution.Distribution): array_ops.shape(mode), np.array(np.nan, dtype=self.dtype.as_numpy_dtype()), name="nan") - return array_ops.where( - math_ops.reduce_all(self.concentration > 1., axis=-1), - mode, nan) + return array_ops.where_v2( + math_ops.reduce_all(self.concentration > 1., axis=-1), mode, nan) return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones([], self.dtype), diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py index 8b956993ed3..6fb105c2cbe 100644 --- a/tensorflow/python/ops/distributions/gamma.py +++ b/tensorflow/python/ops/distributions/gamma.py @@ -267,7 +267,7 @@ class Gamma(distribution.Distribution): self.batch_shape_tensor(), np.array(np.nan, dtype=self.dtype.as_numpy_dtype()), name="nan") - return array_ops.where(self.concentration > 1., mode, nan) + return array_ops.where_v2(self.concentration > 1., mode, nan) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py index ccc667cae3e..c529fb45d43 100644 --- a/tensorflow/python/ops/distributions/special_math.py +++ b/tensorflow/python/ops/distributions/special_math.py @@ -149,11 +149,10 @@ def _ndtr(x): 0.5 * np.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") w = x * half_sqrt_2 z = math_ops.abs(w) - y = array_ops.where(math_ops.less(z, half_sqrt_2), - 1. + math_ops.erf(w), - array_ops.where(math_ops.greater(w, 0.), - 2. - math_ops.erfc(z), - math_ops.erfc(z))) + y = array_ops.where_v2( + math_ops.less(z, half_sqrt_2), 1. + math_ops.erf(w), + array_ops.where_v2( + math_ops.greater(w, 0.), 2. - math_ops.erfc(z), math_ops.erfc(z))) return 0.5 * y @@ -250,11 +249,11 @@ def _ndtri(p): return array_ops.zeros_like(var) return coeffs[0] + _create_polynomial(var, coeffs[1:]) * var - maybe_complement_p = array_ops.where(p > -np.expm1(-2.), 1. - p, p) + maybe_complement_p = array_ops.where_v2(p > -np.expm1(-2.), 1. - p, p) # Write in an arbitrary value in place of 0 for p since 0 will cause NaNs # later on. The result from the computation when p == 0 is not used so any # number that doesn't result in NaNs is fine. - sanitized_mcp = array_ops.where( + sanitized_mcp = array_ops.where_v2( maybe_complement_p <= 0., array_ops.fill(array_ops.shape(p), np.array(0.5, p.dtype.as_numpy_dtype)), maybe_complement_p) @@ -280,15 +279,15 @@ def _ndtri(p): x_for_small_p = first_term - second_term_small_p x_otherwise = first_term - second_term_otherwise - x = array_ops.where(sanitized_mcp > np.exp(-2.), - x_for_big_p, - array_ops.where(z >= 8.0, x_for_small_p, x_otherwise)) + x = array_ops.where_v2( + sanitized_mcp > np.exp(-2.), x_for_big_p, + array_ops.where_v2(z >= 8.0, x_for_small_p, x_otherwise)) - x = array_ops.where(p > 1. - np.exp(-2.), x, -x) + x = array_ops.where_v2(p > 1. - np.exp(-2.), x, -x) infinity_scalar = constant_op.constant(np.inf, dtype=p.dtype) infinity = array_ops.fill(array_ops.shape(p), infinity_scalar) - x_nan_replaced = array_ops.where( - p <= 0.0, -infinity, array_ops.where(p >= 1.0, infinity, x)) + x_nan_replaced = array_ops.where_v2(p <= 0.0, -infinity, + array_ops.where_v2(p >= 1.0, infinity, x)) return x_nan_replaced @@ -375,13 +374,13 @@ def log_ndtr(x, series_order=3, name="log_ndtr"): # the gradient of a select involves the calculation 1*dy+0*(-inf)=nan # regardless of whether dy is finite. Note that the minimum is a NOP if # the branch is chosen. - return array_ops.where( + return array_ops.where_v2( math_ops.greater(x, upper_segment), -_ndtr(-x), # log(1-x) ~= -x, x << 1 - array_ops.where(math_ops.greater(x, lower_segment), - math_ops.log(_ndtr(math_ops.maximum(x, lower_segment))), - _log_ndtr_lower(math_ops.minimum(x, lower_segment), - series_order))) + array_ops.where_v2( + math_ops.greater(x, lower_segment), + math_ops.log(_ndtr(math_ops.maximum(x, lower_segment))), + _log_ndtr_lower(math_ops.minimum(x, lower_segment), series_order))) def _log_ndtr_lower(x, series_order): @@ -484,4 +483,4 @@ def log_cdf_laplace(x, name="log_cdf_laplace"): # internally by log1p, rather than being done explicitly here. upper_solution = math_ops.log1p(-0.5 * safe_exp_neg_x) - return array_ops.where(x < 0., lower_solution, upper_solution) + return array_ops.where_v2(x < 0., lower_solution, upper_solution) diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py index 351f5605e24..efc3290592d 100644 --- a/tensorflow/python/ops/distributions/student_t.py +++ b/tensorflow/python/ops/distributions/student_t.py @@ -281,7 +281,7 @@ class StudentT(distribution.Distribution): y = (x - self.loc) / math_ops.abs(self.scale) x_t = self.df / (y**2. + self.df) neg_cdf = 0.5 * math_ops.betainc(0.5 * self.df, 0.5, x_t) - return array_ops.where(math_ops.less(y, 0.), neg_cdf, 1. - neg_cdf) + return array_ops.where_v2(math_ops.less(y, 0.), neg_cdf, 1. - neg_cdf) def _entropy(self): v = array_ops.ones(self.batch_shape_tensor(), @@ -304,12 +304,11 @@ class StudentT(distribution.Distribution): dtype=self.dtype) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) - return array_ops.where( + return array_ops.where_v2( math_ops.greater( self.df, array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), - mean, - array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) + mean, array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) else: return control_flow_ops.with_dependencies( [ @@ -332,22 +331,21 @@ class StudentT(distribution.Distribution): def _variance(self): # We need to put the tf.where inside the outer tf.where to ensure we never # hit a NaN in the gradient. - denom = array_ops.where(math_ops.greater(self.df, 2.), - self.df - 2., - array_ops.ones_like(self.df)) + denom = array_ops.where_v2( + math_ops.greater(self.df, 2.), self.df - 2., + array_ops.ones_like(self.df)) # Abs(scale) superfluous. var = (array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) * math_ops.square(self.scale) * self.df / denom) # When 1 < df <= 2, variance is infinite. inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) - result_where_defined = array_ops.where( - self.df > array_ops.fill(self.batch_shape_tensor(), 2.), - var, + result_where_defined = array_ops.where_v2( + self.df > array_ops.fill(self.batch_shape_tensor(), 2.), var, array_ops.fill(self.batch_shape_tensor(), inf, name="inf")) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) - return array_ops.where( + return array_ops.where_v2( math_ops.greater( self.df, array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py index eada3cc6b9a..61397f1e4f2 100644 --- a/tensorflow/python/ops/distributions/transformed_distribution.py +++ b/tensorflow/python/ops/distributions/transformed_distribution.py @@ -91,7 +91,7 @@ def _pick_scalar_condition(pred, cond_true, cond_false): # tf.select even though we use tf.select to implement it. pred_ = _static_value(pred) if pred_ is None: - return array_ops.where(pred, cond_true, cond_false) + return array_ops.where_v2(pred, cond_true, cond_false) return cond_true if pred_ else cond_false diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py index 8fac0167778..0221ccff78c 100644 --- a/tensorflow/python/ops/distributions/uniform.py +++ b/tensorflow/python/ops/distributions/uniform.py @@ -177,10 +177,9 @@ class Uniform(distribution.Distribution): def _prob(self, x): broadcasted_x = x * array_ops.ones( self.batch_shape_tensor(), dtype=x.dtype) - return array_ops.where( - math_ops.is_nan(broadcasted_x), - broadcasted_x, - array_ops.where( + return array_ops.where_v2( + math_ops.is_nan(broadcasted_x), broadcasted_x, + array_ops.where_v2( math_ops.logical_or(broadcasted_x < self.low, broadcasted_x >= self.high), array_ops.zeros_like(broadcasted_x), @@ -192,9 +191,9 @@ class Uniform(distribution.Distribution): zeros = array_ops.zeros(broadcast_shape, dtype=self.dtype) ones = array_ops.ones(broadcast_shape, dtype=self.dtype) broadcasted_x = x * ones - result_if_not_big = array_ops.where( + result_if_not_big = array_ops.where_v2( x < self.low, zeros, (broadcasted_x - self.low) / self.range()) - return array_ops.where(x >= self.high, ones, result_if_not_big) + return array_ops.where_v2(x >= self.high, ones, result_if_not_big) def _entropy(self): return math_ops.log(self.range()) diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py index 71d84770ba7..ec66b5c2527 100644 --- a/tensorflow/python/ops/distributions/util.py +++ b/tensorflow/python/ops/distributions/util.py @@ -647,7 +647,7 @@ def rotate_transpose(x, shift, name="rotate_transpose"): # Finally, we transform shift by modulo length so it can be specified # independently from the array upon which it operates (like python). ndims = array_ops.rank(x) - shift = array_ops.where( + shift = array_ops.where_v2( math_ops.less(shift, 0), math_ops.mod(-shift, ndims), ndims - math_ops.mod(shift, ndims)) first = math_ops.range(0, shift) @@ -699,7 +699,7 @@ def pick_vector(cond, true_vector, false_vector, name="pick_vector"): n = array_ops.shape(true_vector)[0] return array_ops.slice( array_ops.concat([true_vector, false_vector], 0), - [array_ops.where(cond, 0, n)], [array_ops.where(cond, n, -1)]) + [array_ops.where_v2(cond, 0, n)], [array_ops.where(cond, n, -1)]) def prefer_static_broadcast_shape(shape1, @@ -1125,7 +1125,7 @@ def reduce_weighted_logsumexp(logx, # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That # this is ok follows from the fact that we're actually free to subtract any # value we like, so long as we add it back after taking the `log(sum(...))`. - max_log_absw_x = array_ops.where( + max_log_absw_x = array_ops.where_v2( math_ops.is_inf(max_log_absw_x), array_ops.zeros_like(max_log_absw_x), max_log_absw_x) wx_over_max_absw_x = ( @@ -1191,12 +1191,13 @@ def softplus_inverse(x, name=None): too_large_value = x # This `where` will ultimately be a NOP because we won't select this # codepath whenever we used the surrogate `ones_like`. - x = array_ops.where( + x = array_ops.where_v2( math_ops.logical_or(is_too_small, is_too_large), array_ops.ones_like(x), x) y = x + math_ops.log(-math_ops.expm1(-x)) # == log(expm1(x)) - return array_ops.where(is_too_small, too_small_value, - array_ops.where(is_too_large, too_large_value, y)) + return array_ops.where_v2( + is_too_small, too_small_value, + array_ops.where_v2(is_too_large, too_large_value, y)) # TODO(b/35290280): Add unit-tests. @@ -1332,7 +1333,7 @@ def pad(x, axis, front=False, back=False, value=0, count=1, name=None): else: final_shape = None else: - axis = array_ops.where(axis < 0, ndims + axis, axis) + axis = array_ops.where_v2(axis < 0, ndims + axis, axis) final_shape = None x = array_ops.pad( x, diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 1b6dcd35bf3..be98f2a6279 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -44,12 +44,14 @@ from tensorflow.python.ops import data_flow_ops # pylint: disable=unused-import from tensorflow.python.ops import functional_ops # pylint: disable=unused-import from tensorflow.python.ops import gradients from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import init_ops from tensorflow.python.ops import list_ops from tensorflow.python.ops import math_grad # pylint: disable=unused-import from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_grad # pylint: disable=unused-import +from tensorflow.python.ops import state_ops from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import unconnected_gradients @@ -1389,5 +1391,64 @@ class VariablesGradientTest(test_util.TensorFlowTestCase): self.assertAllClose(g, g_re) +class GradPassThroughTest(test_util.TensorFlowTestCase): + + @test_util.run_v1_only("b/120545219") + def test_gradients_v1(self): + x = variable_scope.get_variable( + name="x", shape=(), initializer=init_ops.constant_initializer(1.0), + use_resource=True) + z = variable_scope.get_variable( + name="z", shape=(), initializer=init_ops.constant_initializer(3.0), + use_resource=True) + + # Verify that assign op is not differentiable + y = state_ops.assign(x, z**2) + grads = gradients.gradients(y, z) + self.assertIsNone(grads[0]) + + # Verify that when the (non differentiable) assign op is wrapped with + # grad_pass_through, gradients are correctly forwarded to the inputs. + # Form an input as quadratic function of variable z and check that the + # gradient of output wrt to z is correct. + y = custom_gradient.grad_pass_through( + lambda v: state_ops.assign(x, v))(z**2) + grads = gradients.gradients(y, z) + with self.cached_session() as sess: + sess.run(variables.global_variables_initializer()) + self.assertAllClose(grads[0].eval(), 6.0) + + # Verify that variables involved in the wrapped op do not receive gradients. + y = custom_gradient.grad_pass_through(lambda v: x * v)(z) + grads = gradients.gradients(y, x) + self.assertIsNone(grads[0]) + + @test_util.run_v2_only + def test_gradients_v2(self): + x = variables.Variable(1.0, name="x") + z = variables.Variable(3.0, name="z") + + # Verify that assign op is not differentiable + with backprop.GradientTape() as tape: + y = x.assign(z**2) + grads = tape.gradient(y, z) + self.assertIsNone(grads) + + # Verify that when the (non differentiable) assign op is wrapped with + # grad_pass_through, gradients are correctly forwarded to the inputs. + # Form an input as quadratic function of variable z and check that the + # gradient of output wrt to z is correct. + with backprop.GradientTape() as tape: + y = custom_gradient.grad_pass_through(x.assign)(z**2) + grads = tape.gradient(y, z) + self.assertAllClose(grads, 6.0) + + # Verify that variables involved in the wrapped op do not receive gradients. + with backprop.GradientTape() as tape: + y = custom_gradient.grad_pass_through(lambda v: x * v)(z) + grads = tape.gradient(y, x) + self.assertIsNone(grads) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/image_grad.py b/tensorflow/python/ops/image_grad.py index 7d240dc6b63..3b3bd015f13 100644 --- a/tensorflow/python/ops/image_grad.py +++ b/tensorflow/python/ops/image_grad.py @@ -22,6 +22,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_image_ops +from tensorflow.python.ops import math_ops @ops.RegisterGradient("ResizeNearestNeighbor") @@ -153,3 +154,228 @@ def _CropAndResizeGrad(op, grad): grad, op.inputs[0], op.inputs[1], op.inputs[2]) return [grad0, grad1, None, None] + + +def _CustomReciprocal(x): + """Wrapper function around `math_ops.div_no_nan()` to perform a "safe" reciprocal incase the input is zero. Avoids divide by zero and NaNs. + + Input: + x -> input tensor to be reciprocat-ed. + Returns: + x_reciprocal -> reciprocal of x without NaNs. + """ + return math_ops.div_no_nan(1.0, x) + + +@ops.RegisterGradient("RGBToHSV") +def _RGBToHSVGrad(op, grad): + """The gradients for `rgb_to_hsv` operation. + + This function is a piecewise continuous function as defined here: + https://en.wikipedia.org/wiki/HSL_and_HSV#From_RGB + We perform the multi variate derivative and compute all partial derivates + seperately before adding them in the end. Formulas are given before each + partial derivative calculation. + + Args: + op: The `rgb_to_hsv` `Operation` that we are differentiating. + grad: Gradient with respect to the output of the `rgb_to_hsv` op. + + Returns: + Gradients with respect to the input of `rgb_to_hsv`. + """ + # Input Channels + reds = op.inputs[0][..., 0] + greens = op.inputs[0][..., 1] + blues = op.inputs[0][..., 2] + # Output Channels + saturation = op.outputs[0][..., 1] + value = op.outputs[0][..., 2] + + # Mask/Indicator for max and min values of each pixel. + # Arbitrary assignment in case of tie breakers with R>G>B. + # Max values + red_biggest = math_ops.cast((reds >= blues) & \ + (reds >= greens), dtypes.float32) + green_biggest = math_ops.cast((greens > reds) & \ + (greens >= blues), dtypes.float32) + blue_biggest = math_ops.cast((blues > reds) & \ + (blues > greens), dtypes.float32) + # Min values + red_smallest = math_ops.cast((reds < blues) & \ + (reds < greens), dtypes.float32) + green_smallest = math_ops.cast((greens <= reds) & \ + (greens < blues), dtypes.float32) + blue_smallest = math_ops.cast((blues <= reds) & \ + (blues <= greens), dtypes.float32) + + # Derivatives of R, G, B wrt Value slice + dv_dr = red_biggest + dv_dg = green_biggest + dv_db = blue_biggest + + # Derivatives of R, G, B wrt Saturation slice + + # The first term in the addition is the case when the corresponding color + # from (r,g,b) was "MAX" + # -> derivative = MIN/square(MAX), MIN could be one of the other two colors + # The second term is the case when the corresponding color from + # (r,g,b) was "MIN" + # -> derivative = -1/MAX, MAX could be one of the other two colours. + ds_dr = math_ops.cast(reds > 0, dtypes.float32) * \ + math_ops.add(red_biggest * \ + math_ops.add(green_smallest * greens, blue_smallest * blues) * \ + _CustomReciprocal(math_ops.square(reds)),\ + red_smallest * -1 * _CustomReciprocal((green_biggest * \ + greens) + (blue_biggest * blues))) + ds_dg = math_ops.cast(greens > 0, dtypes.float32) * \ + math_ops.add(green_biggest * \ + math_ops.add(red_smallest * reds, blue_smallest * blues) * \ + _CustomReciprocal(math_ops.square(greens)),\ + green_smallest * -1 * _CustomReciprocal((red_biggest * \ + reds) + (blue_biggest * blues))) + ds_db = math_ops.cast(blues > 0, dtypes.float32) * \ + math_ops.add(blue_biggest * \ + math_ops.add(green_smallest * greens, red_smallest * reds) * \ + _CustomReciprocal(math_ops.square(blues)),\ + blue_smallest * -1 * _CustomReciprocal((green_biggest * \ + greens) + (red_biggest * reds))) + + # Derivatives of R, G, B wrt Hue slice + + # Need to go case by case for each color. + # for red, dh_dr -> dh_dr_1 + dh_dr_2 + dh_dr_3 + dh_dr_4 + dh_dr_5 + # dh_dr_1 -> + # if red was MAX, then derivative = 60 * -1 * (G-B)/square(MAX-MIN) == 60 *\ + # -1 * (greens-blues) * reciprocal(square(saturation)) * \ + # reciprical(square(value)) + # elif green was MAX, there are two subcases + # ie when red was MIN and when red was NOT MIN + # dh_dr_2 -> + # if red was MIN (use UV rule) -> 60 * ((1 * -1/(MAX-MIN)) +\ + # (B-R)*(-1/square(MAX-MIN) * -1)) == 60 * (blues - greens) *\ + # reciprocal(square(reds - greens)) + # dh_dr_3 -> + # if red was NOT MIN -> 60 * -1/MAX-MIN == -60 * reciprocal(greens-blues) + # elif blue was MAX, there are two subcases + # dh_dr_4 -> + # if red was MIN (similarly use the UV rule) -> 60 * (blues - greens) *\ + # reciprocal(square(blues - reds)) + # dh_dr_5 -> + # if red was NOT MIN -> 60 * 1/MAX-MIN == 60 * reciprocal(blues-greens) + dh_dr_1 = 60 * (math_ops.cast(reds > 0, dtypes.float32) * red_biggest * \ + -1 * \ + (greens - blues) * \ + _CustomReciprocal(math_ops.square(saturation)) *\ + _CustomReciprocal(math_ops.square(value))) + dh_dr_2 = 60 * (math_ops.cast(greens > 0, dtypes.float32) * green_biggest * \ + red_smallest * (blues - greens) * \ + _CustomReciprocal(math_ops.square(reds - greens))) + dh_dr_3 = 60 * (math_ops.cast(greens > 0, dtypes.float32) * green_biggest * \ + blue_smallest * -1 * _CustomReciprocal(greens - blues)) + dh_dr_4 = 60 * (math_ops.cast(blues > 0, dtypes.float32) * blue_biggest * \ + red_smallest * (blues - greens) * \ + _CustomReciprocal(math_ops.square(blues - reds))) + dh_dr_5 = 60 * (math_ops.cast(blues > 0, dtypes.float32) * blue_biggest * \ + green_smallest * _CustomReciprocal(blues - greens)) + + dh_dr = dh_dr_1 + dh_dr_2 + dh_dr_3 + dh_dr_4 + dh_dr_5 + # Converting from degrees to [0,1] scale as specified in + # https://www.tensorflow.org/api_docs/python/tf/image/rgb_to_hsv + dh_dr = dh_dr / 360 + + # for green, dh_dg -> dh_dg_1 + dh_dg_2 + dh_dg_3 + dh_dg_4 + dh_dg_5 + # dh_dg_1 -> + # if green was MAX, then derivative = 60 * -1 * (B-R)/square(MAX-MIN) == 60 *\ + # -1 * (blues - reds) * reciprocal(square(saturation)) * \ + # reciprocal(square(value)) + # elif red was MAX, there are two subcases ie + # when green was MIN and when green was NOT MIN + # dh_dg_2 -> + # if green was MIN (use UV rule) -> 60 * ((1 * 1/(MAX-MIN)) + \ + # (greens-blues) * (-1/square(MAX-MIN) * -1)) == 60 * \ + # ((reciprocal(reds-greens) + (greens-blues) * \ + # reciprocal(square(reds-greens)))) + # dh_dg_3 -> + # if green was NOT MIN -> 60 * 1/MAX-MIN == 60 * reciprocal(reds - blues) + # elif blue was MAX, there are two subcases + # dh_dg_4 -> + # if green was MIN (similarly use the UV rule) -> 60 * -1 * \ + # (reciprocal(blues - greens) + (reds-greens)* -1 * \ + # reciprocal(square(blues-greens))) + # dh_dr_5 -> + # if green was NOT MIN -> 60 * -1/MAX-MIN == -60 * reciprocal(blues - reds) + dh_dg_1 = 60 * (math_ops.cast(greens > 0, dtypes.float32) * green_biggest * \ + -1 * (blues - reds) * \ + _CustomReciprocal(math_ops.square(saturation))\ + * _CustomReciprocal(math_ops.square(value))) + dh_dg_2 = 60 * (math_ops.cast(reds > 0, dtypes.float32) * red_biggest * \ + green_smallest * (reds - blues) * \ + _CustomReciprocal(math_ops.square(reds - greens))) + dh_dg_3 = 60 * (math_ops.cast(reds > 0, dtypes.float32) * red_biggest * \ + blue_smallest * _CustomReciprocal(reds - blues)) + dh_dg_4 = 60 * (math_ops.cast(blues > 0, dtypes.float32) * blue_biggest * \ + green_smallest * (reds - blues) * \ + _CustomReciprocal(math_ops.square(blues - greens))) + dh_dg_5 = 60 * (math_ops.cast(blues > 0, dtypes.float32) * blue_biggest * \ + red_smallest * -1 * _CustomReciprocal(blues - reds)) + + dh_dg = dh_dg_1 + dh_dg_2 + dh_dg_3 + dh_dg_4 + dh_dg_5 + # Converting from degrees to [0,1] scale as specified in + # https://www.tensorflow.org/api_docs/python/tf/image/rgb_to_hsv + dh_dg = dh_dg / 360 + + # for blue, dh_db -> dh_db_1 + dh_db_2 + dh_db_3 + dh_db_4 + dh_db_5 + # dh_db_1 -> + # if blue was MAX, then derivative = 60 * -1 * (R-G)/square(MAX-MIN) == 60 *\ + # -1 * reciprocal(square(saturation)) * reciprocal(square(value)) + # elif red was MAX, there are two subcases + # ie when blue was MIN and when blue was NOT MIN + # dh_dg_2 -> + # if blue was MIN (use UV rule) -> 60 * ((1 * -1/(MAX-MIN)) + \ + # (greens-blues) * (-1/square(MAX-MIN) * -1)) == 60 * (greens - reds) *\ + # reciprocal(square(reds - blues)) + # dh_dg_3 -> + # if blue was NOT MIN -> 60 * -1/MAX-MIN == 60 * -1 * \ + # reciprocal(reds - greens) + # elif green was MAX, there are two subcases + # dh_dg_4 -> + # if blue was MIN (similarly use the UV rule) -> 60 * -1 * \ + # (reciprocal(greens - blues) + (blues - reds) * -1 * \ + # reciprocal(square(greens - blues))) + # dh_dr_5 -> + # if blue was NOT MIN -> 60 * 1/MAX-MIN == 60 * reciprocal(greens - reds) + dh_db_1 = 60 * (math_ops.cast(blues > 0, dtypes.float32) * blue_biggest * \ + -1 * \ + (reds - greens) * \ + _CustomReciprocal(math_ops.square(saturation)) * \ + _CustomReciprocal(math_ops.square(value))) + dh_db_2 = 60 * (math_ops.cast(reds > 0, dtypes.float32) * red_biggest *\ + blue_smallest * (greens - reds) * \ + _CustomReciprocal(math_ops.square(reds - blues))) + dh_db_3 = 60 * (math_ops.cast(reds > 0, dtypes.float32) * red_biggest * \ + green_smallest * -1 * _CustomReciprocal(reds - greens)) + dh_db_4 = 60 * (math_ops.cast(greens > 0, dtypes.float32) * green_biggest * \ + blue_smallest * (greens - reds) * \ + _CustomReciprocal(math_ops.square(greens - blues))) + dh_db_5 = 60 * (math_ops.cast(greens > 0, dtypes.float32) * green_biggest * \ + red_smallest * _CustomReciprocal(greens - reds)) + + dh_db = dh_db_1 + dh_db_2 + dh_db_3 + dh_db_4 + dh_db_5 + # Converting from degrees to [0,1] scale as specified in + # https://www.tensorflow.org/api_docs/python/tf/image/rgb_to_hsv + dh_db = dh_db / 360 + + # Gradients wrt to inputs + dv_drgb = array_ops.stack( + [grad[..., 2] * dv_dr, grad[..., 2] * dv_dg, grad[..., 2] * dv_db], + axis=-1) + ds_drgb = array_ops.stack( + [grad[..., 1] * ds_dr, grad[..., 1] * ds_dg, grad[..., 1] * ds_db], + axis=-1) + dh_drgb = array_ops.stack( + [grad[..., 0] * dh_dr, grad[..., 0] * dh_dg, grad[..., 0] * dh_db], + axis=-1) + + gradient_input = math_ops.add(math_ops.add(dv_drgb, ds_drgb), dh_drgb) + return gradient_input diff --git a/tensorflow/python/ops/image_grad_test.py b/tensorflow/python/ops/image_grad_test.py index 43d9699980e..7f4d9be113d 100644 --- a/tensorflow/python/ops/image_grad_test.py +++ b/tensorflow/python/ops/image_grad_test.py @@ -24,9 +24,13 @@ from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import image_ops +from tensorflow.python.ops import gen_image_ops from tensorflow.python.platform import test +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import array_ops @test_util.for_all_test_methods(test_util.disable_xla, @@ -457,5 +461,93 @@ class CropAndResizeOpTest(test.TestCase): self.assertLess(err, 2e-3) +@test_util.run_all_in_graph_and_eager_modes +class RGBToHSVOpTest(test.TestCase): + + TYPES = [np.float32, np.float64] + + def testShapeIsCorrectAfterOp(self): + in_shape = [2, 20, 30, 3] + out_shape = [2, 20, 30, 3] + + for nptype in self.TYPES: + x = np.random.randint(0, high=255, size=[2, 20, 30, 3]).astype(nptype) + with self.cached_session(use_gpu=True): + rgb_input_tensor = constant_op.constant(x, shape=in_shape) + hsv_out = gen_image_ops.rgb_to_hsv(rgb_input_tensor) + self.assertEqual(out_shape, list(hsv_out.get_shape())) + + hsv_out = self.evaluate(hsv_out) + self.assertEqual(out_shape, list(hsv_out.shape)) + + def testRGBToHSVGradSimpleCase(self): + + def f(x): + return gen_image_ops.rgb_to_hsv(x) + + # Building a simple input tensor to avoid any discontinuity + x = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, + 0.9]]).astype(np.float32) + rgb_input_tensor = constant_op.constant(x, shape=x.shape) + # Computing Analytical and Numerical gradients of f(x) + analytical, numerical = gradient_checker_v2.compute_gradient( + f, [rgb_input_tensor]) + self.assertAllClose(numerical, analytical, atol=1e-4) + + def testRGBToHSVGradRandomCase(self): + + def f(x): + return gen_image_ops.rgb_to_hsv(x) + + np.random.seed(0) + # Building a simple input tensor to avoid any discontinuity + x = np.random.rand(1, 5, 5, 3).astype(np.float32) + rgb_input_tensor = constant_op.constant(x, shape=x.shape) + # Computing Analytical and Numerical gradients of f(x) + self.assertLess( + gradient_checker_v2.max_error( + *gradient_checker_v2.compute_gradient(f, [rgb_input_tensor])), 1e-4) + + def testRGBToHSVGradSpecialCaseRGreatest(self): + # This test tests a specific subset of the input space + # with a dummy function implemented with native TF operations. + in_shape = [2, 10, 20, 3] + + def f(x): + return gen_image_ops.rgb_to_hsv(x) + + def f_dummy(x): + # This dummy function is a implementation of RGB to HSV using + # primitive TF functions for one particular case when R>G>B. + r = x[..., 0] + g = x[..., 1] + b = x[..., 2] + # Since MAX = r and MIN = b, we get the following h,s,v values. + v = r + s = 1 - math_ops.div_no_nan(b, r) + h = 60 * math_ops.div_no_nan(g - b, r - b) + h = h / 360 + return array_ops.stack([h, s, v], axis=-1) + + # Building a custom input tensor where R>G>B + x_reds = np.ones((in_shape[0], in_shape[1], in_shape[2])).astype(np.float32) + x_greens = 0.5 * np.ones( + (in_shape[0], in_shape[1], in_shape[2])).astype(np.float32) + x_blues = 0.2 * np.ones( + (in_shape[0], in_shape[1], in_shape[2])).astype(np.float32) + x = np.stack([x_reds, x_greens, x_blues], axis=-1) + rgb_input_tensor = constant_op.constant(x, shape=in_shape) + + # Computing Analytical and Numerical gradients of f(x) + analytical, numerical = gradient_checker_v2.compute_gradient( + f, [rgb_input_tensor]) + # Computing Analytical and Numerical gradients of f_dummy(x) + analytical_dummy, numerical_dummy = gradient_checker_v2.compute_gradient( + f_dummy, [rgb_input_tensor]) + self.assertAllClose(numerical, analytical, atol=1e-4) + self.assertAllClose(analytical_dummy, analytical, atol=1e-4) + self.assertAllClose(numerical_dummy, numerical, atol=1e-4) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 211231714c6..94406a28270 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -43,9 +43,6 @@ from tensorflow.python.util.tf_export import tf_export ops.NotDifferentiable('RandomCrop') # TODO(b/31222613): This op may be differentiable, and there may be # latent bugs here. -ops.NotDifferentiable('RGBToHSV') -# TODO(b/31222613): This op may be differentiable, and there may be -# latent bugs here. ops.NotDifferentiable('HSVToRGB') ops.NotDifferentiable('DrawBoundingBoxes') ops.NotDifferentiable('SampleDistortedBoundingBox') @@ -639,13 +636,8 @@ def central_crop(image, central_fraction): image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D Tensor of shape [batch_size, height, width, depth]. central_fraction: float (0, 1], fraction of size to crop - - Usage Example: - ```python - >> import tensorflow as tf - >> x = tf.random.normal(shape=(256, 256, 3)) - >> tf.image.central_crop(x, 0.5) - ``` + Usage Example: ```python >> import tensorflow as tf >> x = + tf.random.normal(shape=(256, 256, 3)) >> tf.image.central_crop(x, 0.5) ``` Raises: ValueError: if central_crop_fraction is not within (0, 1]. @@ -894,9 +886,9 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height, return cropped -@tf_export('image.resize_with_crop_or_pad', - v1=['image.resize_with_crop_or_pad', - 'image.resize_image_with_crop_or_pad']) +@tf_export( + 'image.resize_with_crop_or_pad', + v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad']) def resize_image_with_crop_or_pad(image, target_height, target_width): """Crops and/or pads an image to a target width and height. @@ -1668,6 +1660,7 @@ def adjust_contrast(images, contrast_factor): @tf_export('image.adjust_gamma') def adjust_gamma(image, gamma=1, gain=1): """Performs Gamma Correction on the input image. + Also known as Power Law Transform. This function converts the input images at first to float representation, then transforms them pixelwise according to the equation `Out = gain * In**gamma`, @@ -1677,6 +1670,7 @@ def adjust_gamma(image, gamma=1, gain=1): image : RGB image or images to adjust. gamma : A scalar or tensor. Non negative real number. gain : A scalar or tensor. The constant multiplier. + Returns: A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`. Usage Example: @@ -1745,14 +1739,14 @@ def convert_image_dtype(image, dtype, saturate=False, name=None): Returns: `image`, converted to `dtype`. - + Usage Example: ```python >> import tensorflow as tf >> x = tf.random.normal(shape=(256, 256, 3), dtype=tf.float32) >> tf.image.convert_image_dtype(x, dtype=tf.float16, saturate=False) ``` - + Raises: AttributeError: Raises an attribute error when dtype is neither float nor integer @@ -1924,7 +1918,7 @@ def adjust_hue(image, delta, name=None): Returns: Adjusted image(s), same shape and DType as `image`. - + Usage Example: ```python >> import tensorflow as tf @@ -2202,11 +2196,11 @@ def decode_image(contents, the decoded image. dtype: The desired DType of the returned `Tensor`. name: A name for the operation (optional) - expand_animations: Controls the shape of the returned op's output. - If `True`, the returned op will produce a 3-D tensor for PNG, JPEG, and - BMP files; and a 4-D tensor for all GIFs, whether animated or not. - If, `False`, the returned op will produce a 3-D tensor for all file - types and will truncate animated GIFs to the first frame. + expand_animations: Controls the shape of the returned op's output. If + `True`, the returned op will produce a 3-D tensor for PNG, JPEG, and BMP + files; and a 4-D tensor for all GIFs, whether animated or not. If, + `False`, the returned op will produce a 3-D tensor for all file types and + will truncate animated GIFs to the first frame. Returns: `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on @@ -2621,10 +2615,8 @@ def non_max_suppression(boxes, iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') score_threshold = ops.convert_to_tensor( score_threshold, name='score_threshold') - return gen_image_ops.non_max_suppression_v3(boxes, scores, - max_output_size, - iou_threshold, - score_threshold) + return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size, + iou_threshold, score_threshold) @tf_export('image.non_max_suppression_with_scores') @@ -2683,8 +2675,8 @@ def non_max_suppression_with_scores(boxes, remove boxes based on score. soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter; See Bodla et al, https://arxiv.org/abs/1704.04503). When - `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) - NMS. + `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) + NMS. name: A name for the operation (optional). Returns: @@ -2701,10 +2693,15 @@ def non_max_suppression_with_scores(boxes, score_threshold, name='score_threshold') soft_nms_sigma = ops.convert_to_tensor( soft_nms_sigma, name='soft_nms_sigma') - (selected_indices, selected_scores, _ - ) = gen_image_ops.non_max_suppression_v5( - boxes, scores, max_output_size, iou_threshold, score_threshold, - soft_nms_sigma, pad_to_max_output_size=False) + (selected_indices, selected_scores, + _) = gen_image_ops.non_max_suppression_v5( + boxes, + scores, + max_output_size, + iou_threshold, + score_threshold, + soft_nms_sigma, + pad_to_max_output_size=False) return selected_indices, selected_scores @@ -3408,8 +3405,9 @@ def image_gradients(image): ValueError: If `image` is not a 4D tensor. """ if image.get_shape().ndims != 4: - raise ValueError('image_gradients expects a 4D tensor ' - '[batch_size, h, w, d], not %s.', image.get_shape()) + raise ValueError( + 'image_gradients expects a 4D tensor ' + '[batch_size, h, w, d], not %s.', image.get_shape()) image_shape = array_ops.shape(image) batch_size, height, width, depth = array_ops.unstack(image_shape) dy = image[:, 1:, :, :] - image[:, :-1, :, :] diff --git a/tensorflow/python/ops/init_ops_test.py b/tensorflow/python/ops/init_ops_test.py index 2955a0bf69f..ae8bfbdbdd0 100644 --- a/tensorflow/python/ops/init_ops_test.py +++ b/tensorflow/python/ops/init_ops_test.py @@ -178,6 +178,10 @@ class InitializersTest(test.TestCase): @test_util.run_gpu_only def testVariablePlacementWithOrthogonalInitializer(self): + + if test.is_built_with_rocm(): + self.skipTest('Disable subtest on ROCm due to missing QR op support') + with ops.Graph().as_default() as g: with ops.device('gpu:0'): variable_scope.get_variable( diff --git a/tensorflow/python/ops/init_ops_v2_test.py b/tensorflow/python/ops/init_ops_v2_test.py index fceba1d04a2..9a8865fbc35 100644 --- a/tensorflow/python/ops/init_ops_v2_test.py +++ b/tensorflow/python/ops/init_ops_v2_test.py @@ -366,6 +366,10 @@ class OrthogonalInitializerTest(InitializersTest): @test_util.run_in_graph_and_eager_modes def testShapesValues(self): + + if test.is_built_with_rocm(): + self.skipTest("Disable subtest on ROCm due to missing QR op support") + for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]: init = init_ops_v2.Orthogonal() tol = 1e-5 diff --git a/tensorflow/python/ops/linalg/linear_operator_util.py b/tensorflow/python/ops/linalg/linear_operator_util.py index 1057c28a4bd..573d373ea93 100644 --- a/tensorflow/python/ops/linalg/linear_operator_util.py +++ b/tensorflow/python/ops/linalg/linear_operator_util.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.module import module from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -29,7 +30,6 @@ from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables as variables_module from tensorflow.python.ops.linalg import linalg_impl as linalg -from tensorflow.python.util import tf_inspect ################################################################################ @@ -37,43 +37,14 @@ from tensorflow.python.util import tf_inspect ################################################################################ -def convert_immutable_to_tensor(value, dtype=None, dtype_hint=None, name=None): - """Converts the given `value` to a `Tensor` only if input is immutable. +def convert_nonref_to_tensor(value, dtype=None, dtype_hint=None, name=None): + """Converts the given `value` to a `Tensor` if input is nonreference type. This function converts Python objects of various types to `Tensor` objects - except if the input is mutable. A mutable object is characterized by - `tensor_util.is_mutable` and is, roughly speaking, any object which is a - `tf.Variable` or known to depend on a `tf.Variable`. It accepts `Tensor` - objects, numpy arrays, Python lists, and Python scalars. This function does - not descend through structured input--it only verifies if the input is mutable - per `tensor_util.is_mutable`. For example: - - ```python - from tensorflow_probability.python.internal import tensor_util - - x = tf.Variable(0.) - y = tensor_util.convert_immutable_to_tensor(x) - x is y - # ==> True - - x = tf.constant(0.) - y = tensor_util.convert_immutable_to_tensor(x) - x is y - # ==> True - - x = np.array(0.) - y = tensor_util.convert_immutable_to_tensor(x) - x is y - # ==> False - tf.is_tensor(y) - # ==> True - ``` - - This function can be useful when composing a new operation in Python - (such as `my_func` in the example above). All standard Python op - constructors apply this function to each of their Tensor-valued - inputs, which allows those ops to accept numpy arrays, Python lists, - and scalars in addition to `Tensor` objects. + except if the input has nonreference semantics. Reference semantics are + characterized by `is_ref` and is any object which is a + `tf.Variable` or instance of `tf.Module`. This function accepts any input + which `tf.convert_to_tensor` would also. Note: This function diverges from default Numpy behavior for `float` and `string` types when `None` is present in a Python list or scalar. Rather @@ -81,13 +52,13 @@ def convert_immutable_to_tensor(value, dtype=None, dtype_hint=None, name=None): Args: value: An object whose type has a registered `Tensor` conversion function. - dtype: Optional element type for the returned tensor. If missing, the type - is inferred from the type of `value`. - dtype_hint: Optional element type for the returned tensor, used when dtype - is None. In some cases, a caller may not have a dtype in mind when - converting to a tensor, so dtype_hint can be used as a soft preference. - If the conversion to `dtype_hint` is not possible, this argument has no - effect. + dtype: Optional element type for the returned tensor. If missing, the + type is inferred from the type of `value`. + dtype_hint: Optional element type for the returned tensor, + used when dtype is None. In some cases, a caller may not have a + dtype in mind when converting to a tensor, so dtype_hint + can be used as a soft preference. If the conversion to + `dtype_hint` is not possible, this argument has no effect. name: Optional name to use if a new `Tensor` is created. Returns: @@ -97,42 +68,93 @@ def convert_immutable_to_tensor(value, dtype=None, dtype_hint=None, name=None): TypeError: If no conversion function is registered for `value` to `dtype`. RuntimeError: If a registered conversion function returns an invalid value. ValueError: If the `value` is a tensor not of given `dtype` in graph mode. + + + #### Examples: + + ```python + + x = tf.Variable(0.) + y = convert_nonref_to_tensor(x) + x is y + # ==> True + + x = tf.constant(0.) + y = convert_nonref_to_tensor(x) + x is y + # ==> True + + x = np.array(0.) + y = convert_nonref_to_tensor(x) + x is y + # ==> False + tf.is_tensor(y) + # ==> True + + x = tfp.util.DeferredTensor(lambda x: x, 13.37) + y = convert_nonref_to_tensor(x) + x is y + # ==> True + tf.is_tensor + # ==> False + tf.equal(y, 13.37) + # ==> True + ``` + """ # We explicitly do not use a tf.name_scope to avoid graph clutter. if value is None: return None - if is_mutable(value): - if not hasattr(value, "dtype"): - raise ValueError("Mutable type ({}) must implement `dtype` property " - "({}).".format(type(value).__name__, value)) - if not hasattr(value, "shape"): - raise ValueError("Mutable type ({}) must implement `shape` property " - "({}).".format(type(value).__name__, value)) - if dtype is not None and dtype.base_dtype != value.dtype.base_dtype: - raise TypeError("Mutable type must be of dtype '{}' but is '{}'.".format( - dtype.base_dtype.name, value.dtype.base_dtype.name)) + if is_ref(value): + if dtype is None: + return value + dtype_base = base_dtype(dtype) + value_dtype_base = base_dtype(value.dtype) + if dtype_base != value_dtype_base: + raise TypeError('Mutable type must be of dtype "{}" but is "{}".'.format( + dtype_name(dtype_base), dtype_name(value_dtype_base))) return value return ops.convert_to_tensor( value, dtype=dtype, dtype_hint=dtype_hint, name=name) -def is_mutable(x): - """Evaluates if the object is known to have `tf.Variable` ancestors. +def base_dtype(dtype): + """Returns a non-reference `dtype` based on this `dtype`.""" + dtype = dtypes.as_dtype(dtype) + if hasattr(dtype, "base_dtype"): + return dtype.base_dtype + return dtype - An object is deemed mutable if it is a `tf.Variable` instance or has a - properties `variables` or `trainable_variables` one of which is non-empty (as - might be the case for a subclasses of `tf.Module` or a Keras layer). + +def dtype_name(dtype): + """Returns the string name for this `dtype`.""" + dtype = dtypes.as_dtype(dtype) + if hasattr(dtype, "name"): + return dtype.name + if hasattr(dtype, "__name__"): + return dtype.__name__ + return str(dtype) + + +def is_ref(x): + """Evaluates if the object has reference semantics. + + An object is deemed "reference" if it is a `tf.Variable` instance or is + derived from a `tf.Module` with `dtype` and `shape` properties. Args: - x: Python object which may or may not have a `tf.Variable` ancestor. + x: Any object. Returns: - is_mutable: Python `bool` indicating input is mutable or is known to depend - on mutable objects. + is_ref: Python `bool` indicating input is has nonreference semantics, i.e., + is a `tf.Variable` or a `tf.Module` with `dtype` and `shape` properties. """ - return ((tf_inspect.isclass(variables_module.Variable) and - isinstance(x, variables_module.Variable)) or - getattr(x, "variables", ()) or getattr(x, "trainable_variables", ())) + return ( + # Note: we check that tf.Variable is a class because we might be using a + # different backend other than TF. + isinstance(x, variables_module.Variable) or + (isinstance(x, module.Module) and hasattr(x, "dtype") and + hasattr(x, "shape"))) ################################################################################ diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index b822519ed57..914e5748534 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -605,7 +605,7 @@ def norm(tensor, perm_after = map_fn.map_fn( lambda i: math_ops.cast( array_ops.squeeze( - array_ops.where(math_ops.equal(perm_before, i))), + array_ops.where_v2(math_ops.equal(perm_before, i))), dtype=dtypes.int32), axes) permed = array_ops.transpose(tensor, perm=perm_before) matrix_2_norm = array_ops.expand_dims( diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index af7d4b0e4a1..3b726a611fa 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -171,7 +171,7 @@ class InitializableLookupTableBase(LookupInterface): self._initializer = self._track_trackable(initializer, "_initializer") with ops.init_scope(): self._resource_handle = self._create_resource() - self._init_op = self._initialize() + self._init_op = self._initialize() def _initialize(self): return self._initializer.initialize(self) @@ -420,9 +420,10 @@ class KeyValueTensorInitializer(TableInitializerBase): value_dtype: The `values` data type. Used when `values` is a python array. name: A name for the operation (optional). """ - self._keys = ops.convert_to_tensor(keys, dtype=key_dtype, name="keys") - self._values = ops.convert_to_tensor( - values, dtype=value_dtype, name="values") + with ops.init_scope(): + self._keys = ops.convert_to_tensor(keys, dtype=key_dtype, name="keys") + self._values = ops.convert_to_tensor( + values, dtype=value_dtype, name="values") self._name = name if name is not None else "key_value_init" if context.executing_eagerly(): # Ensure a unique name when eager execution is enabled to avoid spurious diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index df07721e5d3..c59551f1349 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -313,7 +313,7 @@ class DropoutTest(test_lib.TestCase): num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) - dropout = nn_ops.dropout(t, keep_prob) + dropout = nn_ops.dropout(t, rate=(1 - keep_prob)) final_count = 0 self.assertEqual([x_dim, y_dim], dropout.get_shape()) for _ in xrange(0, num_iter): @@ -340,7 +340,7 @@ class DropoutTest(test_lib.TestCase): num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) - dropout = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, 1]) + dropout = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[x_dim, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) final_count = 0 for _ in xrange(0, num_iter): @@ -364,7 +364,7 @@ class DropoutTest(test_lib.TestCase): num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) - dropout = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, 1]) + dropout = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[x_dim, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) for _ in xrange(0, num_iter): value = self.evaluate(dropout) @@ -409,7 +409,9 @@ class DropoutTest(test_lib.TestCase): keep_prob = 0.5 x = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) dropout_x = nn_ops.dropout( - x, keep_prob, noise_shape=array_ops.placeholder(dtypes.int32)) + x, + rate=(1 - keep_prob), + noise_shape=array_ops.placeholder(dtypes.int32)) self.assertEqual(x.get_shape(), dropout_x.get_shape()) def testPartialShapedDropout(self): @@ -419,7 +421,7 @@ class DropoutTest(test_lib.TestCase): for keep_prob in [0.1, 0.5, 0.8]: t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) # Set noise_shape=[None, 1] which means [x_dim, 1]. - dropout = nn_ops.dropout(t, keep_prob, noise_shape=[None, 1]) + dropout = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[None, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) final_count = 0 for _ in xrange(0, num_iter): @@ -478,22 +480,23 @@ class DropoutTest(test_lib.TestCase): keep_prob = 0.5 t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) with self.assertRaises(ValueError): - _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, y_dim + 10]) + _ = nn_ops.dropout( + t, rate=(1 - keep_prob), noise_shape=[x_dim, y_dim + 10]) with self.assertRaises(ValueError): - _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, y_dim, 5]) + _ = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[x_dim, y_dim, 5]) with self.assertRaises(ValueError): - _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim + 3]) + _ = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[x_dim + 3]) with self.assertRaises(ValueError): - _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim]) + _ = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[x_dim]) # test that broadcasting proceeds - _ = nn_ops.dropout(t, keep_prob, noise_shape=[y_dim]) - _ = nn_ops.dropout(t, keep_prob, noise_shape=[1, y_dim]) - _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, 1]) - _ = nn_ops.dropout(t, keep_prob, noise_shape=[1, 1]) + _ = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[y_dim]) + _ = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[1, y_dim]) + _ = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[x_dim, 1]) + _ = nn_ops.dropout(t, rate=(1 - keep_prob), noise_shape=[1, 1]) def testNoDropoutFast(self): x = array_ops.zeros((5,)) - y = nn_ops.dropout(x, keep_prob=1) + y = nn_ops.dropout(x, rate=0) self.assertTrue(x is y) y = nn_ops.dropout_v2(x, rate=0) @@ -1291,6 +1294,8 @@ class AvgPoolTest(test_lib.TestCase): self.assertAllEqual(self.evaluate(y1), self.evaluate(y2)) def test3DTensor(self): + if test_lib.is_built_with_rocm(): + self.skipTest("Pooling with 3D tensors is not supported in ROCm") x = array_ops.ones([3, 7, 6, 6, 5]) ksize = 2 strides = 2 @@ -1301,6 +1306,8 @@ class AvgPoolTest(test_lib.TestCase): self.assertAllEqual(self.evaluate(y1), self.evaluate(y2)) def test3DNumpy(self): + if test_lib.is_built_with_rocm(): + self.skipTest("Pooling with 3D tensors is not supported in ROCm") x = np.ones([3, 7, 6, 6, 5], dtype=np.float32) ksize = 2 strides = 2 @@ -1355,6 +1362,8 @@ class MaxPoolTest(test_lib.TestCase): self.assertAllEqual(self.evaluate(y1), self.evaluate(y2)) def test3DTensor(self): + if test_lib.is_built_with_rocm(): + self.skipTest("Pooling with 3D tensors is not supported in ROCm") x = array_ops.ones([3, 7, 6, 6, 5]) ksize = 2 strides = 2 @@ -1365,6 +1374,8 @@ class MaxPoolTest(test_lib.TestCase): self.assertAllEqual(self.evaluate(y1), self.evaluate(y2)) def test3DNumpy(self): + if test_lib.is_built_with_rocm(): + self.skipTest("Pooling with 3D tensors is not supported in ROCm") x = np.ones([3, 7, 6, 6, 5], dtype=np.float32) ksize = 2 strides = 2 diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops.py b/tensorflow/python/ops/parallel_for/control_flow_ops.py index 787d03fc9c0..7c569560d43 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops.py @@ -113,6 +113,21 @@ def _flatten_first_two_dims(x): PFOR_CONFIG_ARG = "pfor_config" +def _is_under_xla_context(): + """Check if we are currently inside an XLA compile context.""" + g = ops.get_default_graph() + while g is not None: + control_flow_context = g._get_control_flow_context() # pylint: disable=protected-access + while control_flow_context is not None: + if control_flow_context.IsXLAContext(): + return True + else: + control_flow_context = control_flow_context.outer_context + # If g is a FuncGraph, get its outer_graph. + g = getattr(g, "outer_graph", None) + return False + + def pfor(loop_fn, iters, parallel_iterations=None): """Equivalent to running `loop_fn` `iters` times and stacking the outputs. @@ -162,13 +177,10 @@ def pfor(loop_fn, iters, parallel_iterations=None): """ def f(): return _pfor_impl(loop_fn, iters, parallel_iterations=parallel_iterations) - control_flow_context = ops.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access # Note that we wrap into a tf.function if in eager execution mode or under # XLA compilation. The latter is so that we don't compile operations like # tf.placeholder that are created by the loop body. - if (context.executing_eagerly() or - (control_flow_context is not None and - control_flow_context.IsXLAContext())): + if context.executing_eagerly() or _is_under_xla_context(): f = function.defun(f) return f() diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py index f795b7c7a2f..ac45a89473f 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py @@ -354,6 +354,8 @@ class NNTest(PForTestCase): self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.float32] * 3) def test_max_pool3d(self): + if test.is_built_with_rocm(): + self.skipTest("Pooling with 3D tensors is not supported in ROCm") with backprop.GradientTape(persistent=True) as g: x = random_ops.random_uniform([3, 3, 2, 12, 12, 3]) g.watch(x) diff --git a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py index 6d2bad23b94..0b1678823f9 100644 --- a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py @@ -22,6 +22,7 @@ from __future__ import print_function from tensorflow.python.compiler.xla import xla from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.parallel_for import control_flow_ops as pfor_control_flow_ops from tensorflow.python.ops.parallel_for.test_util import PForTestCase @@ -39,10 +40,31 @@ class PForTest(PForTestCase): def vectorized_compute(x): return pfor_control_flow_ops.vectorized_map(compute, x) - result = xla.compile(vectorized_compute, - inputs=[array_ops.ones((10, 5, 3))]) + result = xla.compile( + vectorized_compute, inputs=[array_ops.ones((10, 5, 3))]) self.run_and_assert_equal(result, array_ops.ones((10, 1, 3))) + def test_xla_while_loop(self): -if __name__ == "__main__": + def compute(x): + return math_ops.reduce_mean(x, axis=0, keepdims=True) + + def vectorized_compute(x, i): + inp = array_ops.gather(x, i) + output = pfor_control_flow_ops.vectorized_map(compute, inp) + output.set_shape([5, 1]) + return output + + def while_compute(x): + return control_flow_ops.while_loop_v2( + lambda i, _: i < 10, + lambda i, y: (i + 1, y + vectorized_compute(x, i)), + (0, array_ops.zeros([5, 1])))[1] + + result = xla.compile(while_compute, inputs=[array_ops.ones((10, 5, 3))]) + expected = array_ops.ones([5, 1]) * 10 + self.run_and_assert_equal(expected, result) + + +if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/ragged/ragged_string_ops.py b/tensorflow/python/ops/ragged/ragged_string_ops.py index e37b345ff4d..4b225da2edd 100644 --- a/tensorflow/python/ops/ragged/ragged_string_ops.py +++ b/tensorflow/python/ops/ragged/ragged_string_ops.py @@ -36,9 +36,9 @@ def string_bytes_split(input, name=None): # pylint: disable=redefined-builtin Examples: ```python - >>> tf.strings.to_bytes('hello') + >>> tf.strings.bytes_split('hello') ['h', 'e', 'l', 'l', 'o'] - >>> tf.strings.to_bytes(['hello', '123']) + >>> tf.strings.bytes_split(['hello', '123']) ``` @@ -53,7 +53,7 @@ def string_bytes_split(input, name=None): # pylint: disable=redefined-builtin name: A name for the operation (optional). Returns: - A `RaggedTensor` of rank `N+1`: the bytes that make up the soruce strings. + A `RaggedTensor` of rank `N+1`: the bytes that make up the source strings. """ with ops.name_scope(name, "StringsByteSplit", [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py index 99205128693..d06819cbf90 100644 --- a/tensorflow/python/ops/ragged/ragged_tensor.py +++ b/tensorflow/python/ops/ragged/ragged_tensor.py @@ -980,6 +980,44 @@ class RaggedTensor(composite_tensor.CompositeTensor): with ops.name_scope(name, "RaggedValueRowIds", [self]): return segment_id_ops.row_splits_to_segment_ids(self.row_splits) + def nested_value_rowids(self, name=None): + """Returns a tuple containing the value_rowids for all ragged dimensions. + + `rt.nested_value_rowids` is a tuple containing the `value_rowids` tensors + for + all ragged dimensions in `rt`, ordered from outermost to innermost. In + particular, `rt.nested_value_rowids = (rt.value_rowids(),) + value_ids` + where: + + * `value_ids = ()` if `rt.values` is a `Tensor`. + * `value_ids = rt.values.nested_value_rowids` otherwise. + + Args: + name: A name prefix for the returned tensors (optional). + + Returns: + A `tuple` of 1-D integer `Tensor`s. + + #### Example: + + ```python + >>> rt = ragged.constant([[[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]]]) + >>> for i, ids in enumerate(rt.nested_value_rowids()): + ... print('row ids for dimension %d: %s' % (i+1, ids)) + row ids for dimension 1: [0] + row ids for dimension 2: [0, 0, 0, 2, 2] + row ids for dimension 3: [0, 0, 0, 0, 2, 2, 2, 3] + ``` + + """ + with ops.name_scope(name, "RaggedNestedValueRowIds", [self]): + rt_nested_ids = [self.value_rowids()] + rt_values = self.values + while isinstance(rt_values, RaggedTensor): + rt_nested_ids.append(rt_values.value_rowids()) + rt_values = rt_values.values + return tuple(rt_nested_ids) + def nrows(self, out_type=None, name=None): """Returns the number of rows in this ragged tensor. @@ -1106,8 +1144,8 @@ class RaggedTensor(composite_tensor.CompositeTensor): def nested_row_lengths(self, name=None): """Returns a tuple containing the row_lengths for all ragged dimensions. - `rtnested_row_lengths()` is a tuple containing the `row_lengths` tensors for - all ragged dimensions in `rt`, ordered from outermost to innermost. + `rt.nested_row_lengths()` is a tuple containing the `row_lengths` tensors + for all ragged dimensions in `rt`, ordered from outermost to innermost. Args: name: A name prefix for the returned tensors (optional). diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py index 60f04c4dc9b..eb8767b56e0 100644 --- a/tensorflow/python/ops/ragged/ragged_tensor_test.py +++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py @@ -651,6 +651,9 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]]) self.assertLen(rt.nested_row_splits, 1) self.assertAllEqual(rt.nested_row_splits[0], [0, 2, 2, 5, 6, 7]) + self.assertLen(rt.nested_value_rowids(), 1) + + self.assertAllEqual(rt.nested_value_rowids()[0], [0, 0, 2, 2, 2, 3, 4]) def testRaggedTensorAccessors_3d_with_ragged_rank_2(self): values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g']) @@ -685,6 +688,9 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, self.assertLen(rt.nested_row_splits, 2) self.assertAllEqual(rt.nested_row_splits[0], [0, 2, 3, 3, 5]) self.assertAllEqual(rt.nested_row_splits[1], [0, 2, 2, 5, 6, 7]) + self.assertLen(rt.nested_value_rowids(), 2) + self.assertAllEqual(rt.nested_value_rowids()[0], [0, 0, 1, 3, 3]) + self.assertAllEqual(rt.nested_value_rowids()[1], [0, 0, 2, 2, 2, 3, 4]) #============================================================================= # RaggedTensor.shape diff --git a/tensorflow/python/platform/test.py b/tensorflow/python/platform/test.py index fdfdae093b9..a1669e6ad3a 100644 --- a/tensorflow/python/platform/test.py +++ b/tensorflow/python/platform/test.py @@ -15,6 +15,8 @@ """Testing. +See the [Testing](https://tensorflow.org/api_docs/python/tf/test) guide. + Note: `tf.compat.v1.test.mock` is an alias to the python `mock` or `unittest.mock` depending on the python version. """ diff --git a/tensorflow/python/profiler/internal/run_metadata_test.py b/tensorflow/python/profiler/internal/run_metadata_test.py index daaff0ab601..16279ce36ae 100644 --- a/tensorflow/python/profiler/internal/run_metadata_test.py +++ b/tensorflow/python/profiler/internal/run_metadata_test.py @@ -129,7 +129,10 @@ class RunMetadataTest(test.TestCase): ret = _extract_node(run_meta, 'MatMul') self.assertEqual(len(ret['gpu:0']), 1) - self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta) + if not test.is_built_with_rocm(): + # skip this check for the ROCm platform + # stream level tracing is not yet supported on the ROCm platform + self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta) @test_util.run_deprecated_v1 def testAllocationHistory(self): @@ -234,7 +237,11 @@ class RunMetadataTest(test.TestCase): for node in ret['gpu:0']: total_cpu_execs += node.op_end_rel_micros - self.assertGreaterEqual(len(ret['gpu:0/stream:all']), 4, '%s' % run_meta) + if not test.is_built_with_rocm(): + # skip this check for the ROCm platform + # stream level tracing is not yet supported on the ROCm platform + self.assertGreaterEqual( + len(ret['gpu:0/stream:all']), 4, '%s' % run_meta) if __name__ == '__main__': diff --git a/tensorflow/python/profiler/profile_context_test.py b/tensorflow/python/profiler/profile_context_test.py index 885f08ca4b9..f4051ed7b7d 100644 --- a/tensorflow/python/profiler/profile_context_test.py +++ b/tensorflow/python/profiler/profile_context_test.py @@ -69,6 +69,13 @@ class ProfilerContextTest(test.TestCase): os.path.join(test.get_temp_dir(), "profile_100")) as profiler: profiler.profile_operations(options=opts) with gfile.Open(outfile, "r") as f: + + if test.is_built_with_rocm(): + # The profiler output for ROCm mode, includes an extra warning + # related to the lack of stream tracing in ROCm mode. + # Need to skip this warning when doing the diff + profile_str = "\n".join(profile_str.split("\n")[7:]) + self.assertEqual(profile_str, f.read()) @test_util.run_deprecated_v1 diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index 7201bb48d1e..29ce69ce9a3 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -311,7 +311,6 @@ py_library( "//tensorflow/python/training/tracking", "//tensorflow/python/training/tracking:base", "//tensorflow/python/training/tracking:graph_view", - "//tensorflow/python/training/tracking:object_identity", "//tensorflow/python/training/tracking:util", ], ) diff --git a/tensorflow/python/saved_model/nested_structure_coder.py b/tensorflow/python/saved_model/nested_structure_coder.py index 59a2687edaf..66b02b119d1 100644 --- a/tensorflow/python/saved_model/nested_structure_coder.py +++ b/tensorflow/python/saved_model/nested_structure_coder.py @@ -423,6 +423,7 @@ class _TensorSpecCodec(object): return value.HasField("tensor_spec_value") def do_decode(self, value, decode_fn): + name = value.tensor_spec_value.name return tensor_spec.TensorSpec( shape=decode_fn( struct_pb2.StructuredValue( @@ -430,7 +431,7 @@ class _TensorSpecCodec(object): dtype=decode_fn( struct_pb2.StructuredValue( tensor_dtype_value=value.tensor_spec_value.dtype)), - name=value.tensor_spec_value.name) + name=(name if name else None)) StructureCoder.register_codec(_TensorSpecCodec()) diff --git a/tensorflow/python/saved_model/nested_structure_coder_test.py b/tensorflow/python/saved_model/nested_structure_coder_test.py index 1538fbf1271..16c56b1ddbf 100644 --- a/tensorflow/python/saved_model/nested_structure_coder_test.py +++ b/tensorflow/python/saved_model/nested_structure_coder_test.py @@ -171,6 +171,22 @@ class NestedStructureTest(test.TestCase): decoded = self._coder.decode_proto(encoded) self.assertEqual(structure, decoded) + def testEncodeDecodeTensorSpecWithNoName(self): + structure = [tensor_spec.TensorSpec([1, 2, 3], dtypes.int64)] + self.assertTrue(self._coder.can_encode(structure)) + encoded = self._coder.encode_structure(structure) + expected = struct_pb2.StructuredValue() + expected_list = expected.list_value + expected_tensor_spec = expected_list.values.add().tensor_spec_value + expected_tensor_spec.shape.dim.add().size = 1 + expected_tensor_spec.shape.dim.add().size = 2 + expected_tensor_spec.shape.dim.add().size = 3 + expected_tensor_spec.name = "" + expected_tensor_spec.dtype = dtypes.int64.as_datatype_enum + self.assertEqual(expected, encoded) + decoded = self._coder.decode_proto(encoded) + self.assertEqual(structure, decoded) + def testNotEncodable(self): class NotEncodable(object): diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py index e82334b4923..f357ed0728d 100644 --- a/tensorflow/python/saved_model/save.py +++ b/tensorflow/python/saved_model/save.py @@ -51,10 +51,10 @@ from tensorflow.python.saved_model import utils_impl from tensorflow.python.training.saving import functional_saver from tensorflow.python.training.tracking import base from tensorflow.python.training.tracking import graph_view -from tensorflow.python.training.tracking import object_identity from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import util from tensorflow.python.util import compat +from tensorflow.python.util import object_identity from tensorflow.python.util.tf_export import tf_export _UNCOPIABLE_DTYPES = frozenset((dtypes.resource, dtypes.variant)) diff --git a/tensorflow/python/tools/api/generator/create_python_api.py b/tensorflow/python/tools/api/generator/create_python_api.py index 7dd3f97b79d..aeeec69cec8 100644 --- a/tensorflow/python/tools/api/generator/create_python_api.py +++ b/tensorflow/python/tools/api/generator/create_python_api.py @@ -48,15 +48,48 @@ _GENERATED_FILE_HEADER = """# This file is MACHINE GENERATED! Do not edit. from __future__ import print_function as _print_function +import sys as _sys + """ _GENERATED_FILE_FOOTER = '\n\ndel _print_function\n' _DEPRECATION_FOOTER = """ -import sys as _sys -from tensorflow.python.util import deprecation_wrapper as _deprecation_wrapper +from tensorflow.python.util import module_wrapper as _module_wrapper -if not isinstance(_sys.modules[__name__], _deprecation_wrapper.DeprecationWrapper): - _sys.modules[__name__] = _deprecation_wrapper.DeprecationWrapper( - _sys.modules[__name__], "%s") +if not isinstance(_sys.modules[__name__], _module_wrapper.TFModuleWrapper): + _sys.modules[__name__] = _module_wrapper.TFModuleWrapper( + _sys.modules[__name__], "%s", public_apis=_PUBLIC_APIS, deprecation=%s, + has_lite=%s) +""" +_MODULE_TEXT_TEMPLATE = """ +# Inform pytype that this module is dynamically populated (b/111239204). +_LAZY_LOADING = False + +_PUBLIC_APIS = { +%s +} + +if _LAZY_LOADING: + _HAS_DYNAMIC_ATTRIBUTES = True +else: + import importlib as _importlib + for symbol, symbol_loc_info in _PUBLIC_APIS.items(): + if symbol_loc_info[0]: + attr = getattr(_importlib.import_module(symbol_loc_info[0]), symbol_loc_info[1]) + else: + attr = _importlib.import_module(symbol_loc_info[1]) + setattr(_sys.modules[__name__], symbol, attr) + try: + del symbol + except NameError: + pass + try: + del symbol_loc_info + except NameError: + pass + try: + del attr + except NameError: + pass """ @@ -76,17 +109,7 @@ def format_import(source_module_name, source_name, dest_name): Returns: An import statement string. """ - if source_module_name: - if source_name == dest_name: - return 'from %s import %s' % (source_module_name, source_name) - else: - return 'from %s import %s as %s' % ( - source_module_name, source_name, dest_name) - else: - if source_name == dest_name: - return 'import %s' % source_name - else: - return 'import %s as %s' % (source_name, dest_name) + return " '%s': ('%s', '%s')," % (dest_name, source_module_name, source_name) def get_canonical_import(import_set): @@ -129,7 +152,6 @@ class _ModuleInitCodeBuilder(object): lambda: collections.defaultdict(set)) self._dest_import_to_id = collections.defaultdict(int) # Names that start with underscore in the root module. - self._underscore_names_in_root = [] self._api_version = api_version def _check_already_imported(self, symbol_id, api_name): @@ -166,9 +188,6 @@ class _ModuleInitCodeBuilder(object): symbol_id = -1 if not symbol else id(symbol) self._check_already_imported(symbol_id, full_api_name) - if not dest_module_name and dest_name.startswith('_'): - self._underscore_names_in_root.append(dest_name) - # The same symbol can be available in multiple modules. # We store all possible ways of importing this symbol and later pick just # one. @@ -197,11 +216,13 @@ class _ModuleInitCodeBuilder(object): submodule = module_split[submodule_index-1] parent_module += '.' + submodule if parent_module else submodule import_from = self._output_package - if submodule_index > 0: - import_from += '.' + '.'.join(module_split[:submodule_index]) + import_from += '.' + '.'.join(module_split[:submodule_index + 1]) self.add_import( - None, import_from, module_split[submodule_index], - parent_module, module_split[submodule_index]) + symbol=None, + source_module_name='', + source_name=import_from, + dest_module_name=parent_module, + dest_name=module_split[submodule_index]) def build(self): """Get a map from destination module to __init__.py code for that module. @@ -221,26 +242,20 @@ class _ModuleInitCodeBuilder(object): get_canonical_import(imports) for _, imports in dest_name_to_imports.items() ] - module_text_map[dest_module] = '\n'.join(sorted(imports_list)) + module_text_map[dest_module] = _MODULE_TEXT_TEMPLATE % '\n'.join( + sorted(imports_list)) - # Expose exported symbols with underscores in root module - # since we import from it using * import. - underscore_names_str = ', '.join( - '\'%s\'' % name for name in self._underscore_names_in_root) - # We will always generate a root __init__.py file to let us handle * - # imports consistently. Be sure to have a root __init__.py file listed in - # the script outputs. - module_text_map[''] = module_text_map.get('', '') + ''' -_names_with_underscore = [%s] -__all__ = [_s for _s in dir() if not _s.startswith('_')] -__all__.extend([_s for _s in _names_with_underscore]) -''' % underscore_names_str - - if self._api_version == 1: # Add 1.* deprecations. - for dest_module, _ in self._module_imports.items(): + for dest_module, _ in self._module_imports.items(): + deprecation = 'False' + has_lite = 'False' + if self._api_version == 1: # Add 1.* deprecations. if not dest_module.startswith(_COMPAT_MODULE_PREFIX): - footer_text_map[dest_module] = _DEPRECATION_FOOTER % ( - dest_module) + deprecation = 'True' + # Workaround to make sure not load lite from lite/__init__.py + if not dest_module and 'lite' in self._module_imports: + has_lite = 'True' + footer_text_map[dest_module] = _DEPRECATION_FOOTER % ( + dest_module, deprecation, has_lite) return module_text_map, footer_text_map @@ -519,7 +534,11 @@ def create_api_files(output_files, packages, root_init_template, output_dir, _GENERATED_FILE_HEADER % get_module_docstring( module, packages[0], api_name) + text + _GENERATED_FILE_FOOTER) if module in deprecation_footer_map: - contents += deprecation_footer_map[module] + if '# WRAPPER_PLACEHOLDER' in contents: + contents = contents.replace('# WRAPPER_PLACEHOLDER', + deprecation_footer_map[module]) + else: + contents += deprecation_footer_map[module] with open(module_name_to_file_path[module], 'w') as fp: fp.write(contents) diff --git a/tensorflow/python/tools/api/generator/create_python_api_test.py b/tensorflow/python/tools/api/generator/create_python_api_test.py index 6e0970ec80a..98afd9a241f 100644 --- a/tensorflow/python/tools/api/generator/create_python_api_test.py +++ b/tensorflow/python/tools/api/generator/create_python_api_test.py @@ -67,15 +67,16 @@ class CreatePythonApiTest(test.TestCase): output_package='tensorflow', api_name='tensorflow', api_version=1) - expected_import = ( - 'from tensorflow.python.test_module ' - 'import test_op as test_op1') + expected_import = ('\'test_op1\': ' + '(\'tensorflow.python.test_module\',' + ' \'test_op\')') self.assertTrue( expected_import in str(imports), msg='%s not in %s' % (expected_import, str(imports))) - expected_import = ('from tensorflow.python.test_module ' - 'import test_op') + expected_import = ('\'test_op\': ' + '(\'tensorflow.python.test_module\',' + ' \'test_op\')') self.assertTrue( expected_import in str(imports), msg='%s not in %s' % (expected_import, str(imports))) @@ -89,8 +90,10 @@ class CreatePythonApiTest(test.TestCase): output_package='tensorflow', api_name='tensorflow', api_version=2) - expected_import = ('from tensorflow.python.test_module ' - 'import TestClass') + expected_import = ( + '\'NewTestClass\':' + ' (\'tensorflow.python.test_module\',' + ' \'TestClass\')') self.assertTrue( 'TestClass' in str(imports), msg='%s not in %s' % (expected_import, str(imports))) @@ -101,8 +104,9 @@ class CreatePythonApiTest(test.TestCase): output_package='tensorflow', api_name='tensorflow', api_version=1) - expected = ('from tensorflow.python.test_module ' - 'import _TEST_CONSTANT') + expected = ('\'_TEST_CONSTANT\':' + ' (\'tensorflow.python.test_module\',' + ' \'_TEST_CONSTANT\')') self.assertTrue(expected in str(imports), msg='%s not in %s' % (expected, str(imports))) diff --git a/tensorflow/python/tpu/tpu_feed.py b/tensorflow/python/tpu/tpu_feed.py index 54a77a14655..2b01eeb3934 100644 --- a/tensorflow/python/tpu/tpu_feed.py +++ b/tensorflow/python/tpu/tpu_feed.py @@ -908,7 +908,7 @@ class _PartitionedInfeedQueue(InfeedQueue): if dims.prod() != self._device_assignment.num_cores_per_replica: raise ValueError( - "The product of each input parition dim should equal to " + "The product of each input partition dim should equal to " "num_cores_per_replica. (dim = {}, num_cores_per_replica " "= {})".format(dims, self._device_assignment.num_cores_per_replica)) if dims.shape[0] != tensor.shape.ndims: diff --git a/tensorflow/python/tpu/tpu_strategy_util.py b/tensorflow/python/tpu/tpu_strategy_util.py index 0f2f93deac5..6b62f55b5bf 100644 --- a/tensorflow/python/tpu/tpu_strategy_util.py +++ b/tensorflow/python/tpu/tpu_strategy_util.py @@ -23,6 +23,7 @@ from tensorflow.python.client import session as session_lib from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver from tensorflow.python.eager import context from tensorflow.python.eager import function +from tensorflow.python.framework import device from tensorflow.python.framework import ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.tpu import topology @@ -48,7 +49,15 @@ def initialize_tpu_system(cluster_resolver=None): Raises: RuntimeError: If no TPU devices found for eager execution. """ + job = None if cluster_resolver is None: + # If no cluster resolver is specified, and running eagerly, execute the init + # ops in the current device scope. + if context.executing_eagerly(): + curr_device = device.DeviceSpec.from_string(context.context().device_name) + if curr_device.job is not None: + job = "{}/replica:0/task:0".format(curr_device.job) + cluster_resolver = TPUClusterResolver("") assert isinstance(cluster_resolver, TPUClusterResolver) @@ -66,7 +75,6 @@ def initialize_tpu_system(cluster_resolver=None): # DistributedTPURewritePass. This pass actually adds real ops that # initialize the TPU system. Thus, we can't simply run tpu.initialize_system # eagerly. We need to wrap it in defun and trigger the rewrite passes on it. - job = None if tpu_name not in _LOCAL_MASTERS: # Explicitly place the tpu.initialize_system in the first worker to # avoid the output node match multiple devices error. diff --git a/tensorflow/python/training/checkpoint_management.py b/tensorflow/python/training/checkpoint_management.py index 32b9c023aae..e2dbde57a18 100644 --- a/tensorflow/python/training/checkpoint_management.py +++ b/tensorflow/python/training/checkpoint_management.py @@ -347,6 +347,30 @@ def latest_checkpoint(checkpoint_dir, latest_filename=None): return None +def checkpoint_exists_internal(checkpoint_prefix): + """Checks whether a V1 or V2 checkpoint exists with the specified prefix. + + This is an internal function to check if a checkpoint exists, + since it takes into account the naming difference between V1 and V2 formats. + + Args: + checkpoint_prefix: the prefix of a V1 or V2 checkpoint, with V2 taking + priority. Typically the result of `Saver.save()` or that of + `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or + V1/V2. + Returns: + A bool, true if a checkpoint referred to by `checkpoint_prefix` exists. + """ + pathname = _prefix_to_checkpoint_path(checkpoint_prefix, + saver_pb2.SaverDef.V2) + if file_io.get_matching_files(pathname): + return True + elif file_io.get_matching_files(checkpoint_prefix): + return True + else: + return False + + @deprecation.deprecated( date=None, instructions="Use standard file APIs to check for files with this prefix.") @@ -362,17 +386,11 @@ def checkpoint_exists(checkpoint_prefix): priority. Typically the result of `Saver.save()` or that of `tf.train.latest_checkpoint()`, regardless of sharded/non-sharded or V1/V2. + Returns: - A bool, true iff a checkpoint referred to by `checkpoint_prefix` exists. + A bool, true if a checkpoint referred to by `checkpoint_prefix` exists. """ - pathname = _prefix_to_checkpoint_path(checkpoint_prefix, - saver_pb2.SaverDef.V2) - if file_io.get_matching_files(pathname): - return True - elif file_io.get_matching_files(checkpoint_prefix): - return True - else: - return False + return checkpoint_exists_internal(checkpoint_prefix) @deprecation.deprecated( diff --git a/tensorflow/python/training/experimental/loss_scale.py b/tensorflow/python/training/experimental/loss_scale.py index 711ec91369e..bbbd0cd7ec4 100644 --- a/tensorflow/python/training/experimental/loss_scale.py +++ b/tensorflow/python/training/experimental/loss_scale.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.training.tracking import base as trackable from tensorflow.python.ops import variable_scope +from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -93,10 +94,10 @@ class LossScale(trackable.Trackable): cross-replica context. Args: - grads: A list of unscaled gradients, each which is the gradient of the - loss with respect to a weight. The gradients should have already been - divided by the loss scale being before passed to this function. 'None' - gradients are accepted, and are ignored. + grads: A nested structure of unscaled gradients, each which is the + gradient of the loss with respect to a weight. The gradients should have + already been divided by the loss scale being before passed to this + function. 'None' gradients are accepted, and are ignored. Returns: update_op: In eager mode, None. In graph mode, an op to update the loss @@ -324,10 +325,11 @@ class DynamicLossScale(LossScale): return self._multiplier def __call__(self): - return self._current_loss_scale + return ops.convert_to_tensor(self._current_loss_scale) def update(self, grads): """Updates loss scale based on if gradients are finite in current step.""" + grads = nest.flatten(grads) if distribution_strategy_context.has_strategy(): distribution = distribution_strategy_context.get_cross_replica_context() diff --git a/tensorflow/python/training/experimental/loss_scale_test.py b/tensorflow/python/training/experimental/loss_scale_test.py index 3d2d5ba8aa3..c3e18a18422 100644 --- a/tensorflow/python/training/experimental/loss_scale_test.py +++ b/tensorflow/python/training/experimental/loss_scale_test.py @@ -87,6 +87,11 @@ class FixedLossScaleTest(test.TestCase): loss_scale = loss_scale_module.FixedLossScale.from_config(config) self.assertEqual(self.evaluate(loss_scale()), 123.) + @test_util.run_in_graph_and_eager_modes + def test_call_type(self): + scalar = loss_scale_module.FixedLossScale(123) + self.assertIsInstance(scalar(), ops.Tensor) + def _get_example_iter(inputs): dataset = dataset_ops.Dataset.from_tensor_slices(inputs) @@ -259,6 +264,15 @@ class DynamicLossScaleTest(test.TestCase, parameterized.TestCase): expected_outputs = [2, 2, 4, 4, 2, 2, 1, 1, 2, 2, 1] self._test_helper(inputs, expected_outputs, init_loss_scale) + @parameterized.named_parameters(*TESTCASES) + @test_util.run_in_graph_and_eager_modes + def test_single_tensor_gradient(self, strategy_fn): + with strategy_fn().scope(): + loss_scale = loss_scale_module.DynamicLossScale() + grad = constant_op.constant(4.0) + _, should_apply = loss_scale.update(grad) + self.assertTrue(self.evaluate(should_apply)) + @test_util.run_in_graph_and_eager_modes def test_serialization(self): loss_scale = loss_scale_module.DynamicLossScale( @@ -283,6 +297,10 @@ class DynamicLossScaleTest(test.TestCase, parameterized.TestCase): self.assertEqual(scalar.increment_period, scalar2.increment_period) self.assertEqual(scalar.multiplier, scalar2.multiplier) + @test_util.run_in_graph_and_eager_modes + def test_call_type(self): + scalar = loss_scale_module.DynamicLossScale() + self.assertIsInstance(scalar(), ops.Tensor) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py index 756b9b5a99c..21408f3988e 100644 --- a/tensorflow/python/training/input.py +++ b/tensorflow/python/training/input.py @@ -63,7 +63,7 @@ _restore_sparse = sparse_ops._take_many_sparse_from_tensors_map def match_filenames_once(pattern, name=None): """Save the list of files matching pattern, so it is only computed once. - NOTE: The order of the files returned can be non-deterministic. + NOTE: The order of the files returned is deterministic. Args: pattern: A file pattern (glob), or 1D tensor of file patterns. diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 7b502bffa38..d65297fb30d 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1276,11 +1276,12 @@ class Saver(object): if save_path is None: raise ValueError("Can't load save_path when it is None.") - if not checkpoint_management.checkpoint_exists(compat.as_text(save_path)): + checkpoint_prefix = compat.as_text(save_path) + if not checkpoint_management.checkpoint_exists_internal(checkpoint_prefix): raise ValueError("The passed save_path is not a valid checkpoint: " + - compat.as_text(save_path)) + checkpoint_prefix) - logging.info("Restoring parameters from %s", compat.as_text(save_path)) + logging.info("Restoring parameters from %s", checkpoint_prefix) try: if context.executing_eagerly(): self._build_eager(save_path, build_save=False, build_restore=True) diff --git a/tensorflow/python/training/tracking/BUILD b/tensorflow/python/training/tracking/BUILD index 6fd1e7826ec..ce8e9af3328 100644 --- a/tensorflow/python/training/tracking/BUILD +++ b/tensorflow/python/training/tracking/BUILD @@ -103,19 +103,12 @@ tf_py_test( ], ) -py_library( - name = "object_identity", - srcs = ["object_identity.py"], - srcs_version = "PY2AND3", -) - py_library( name = "graph_view", srcs = ["graph_view.py"], srcs_version = "PY2AND3", deps = [ ":base", - ":object_identity", ":tracking", "//tensorflow/core:protos_all_py", "//tensorflow/python:constant_op", @@ -134,7 +127,6 @@ py_library( ":base", ":data_structures", ":graph_view", - ":object_identity", ":tracking", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/python/training/tracking/graph_view.py b/tensorflow/python/training/tracking/graph_view.py index ba238787018..54b22fa07f9 100644 --- a/tensorflow/python/training/tracking/graph_view.py +++ b/tensorflow/python/training/tracking/graph_view.py @@ -28,8 +28,8 @@ from tensorflow.python.training import optimizer as optimizer_v1 from tensorflow.python.training.saving import saveable_object as saveable_object_lib from tensorflow.python.training.saving import saveable_object_util from tensorflow.python.training.tracking import base -from tensorflow.python.training.tracking import object_identity from tensorflow.python.training.tracking import tracking +from tensorflow.python.util import object_identity _ESCAPE_CHAR = "." # For avoiding conflicts with user-specified names. diff --git a/tensorflow/python/training/tracking/layer_utils.py b/tensorflow/python/training/tracking/layer_utils.py index 75c766e7fc3..b83b0f84f91 100644 --- a/tensorflow/python/training/tracking/layer_utils.py +++ b/tensorflow/python/training/tracking/layer_utils.py @@ -21,7 +21,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.training.tracking import object_identity +from tensorflow.python.util import object_identity def is_layer(obj): diff --git a/tensorflow/python/training/tracking/util.py b/tensorflow/python/training/tracking/util.py index a22bef263da..c4c2ccdbf29 100644 --- a/tensorflow/python/training/tracking/util.py +++ b/tensorflow/python/training/tracking/util.py @@ -49,11 +49,11 @@ from tensorflow.python.training.saving import saveable_object_util from tensorflow.python.training.tracking import base from tensorflow.python.training.tracking import data_structures from tensorflow.python.training.tracking import graph_view as graph_view_lib -from tensorflow.python.training.tracking import object_identity from tensorflow.python.training.tracking import tracking from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util import lazy_loader +from tensorflow.python.util import object_identity from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/util/deprecation_wrapper.py b/tensorflow/python/util/deprecation_wrapper.py index 0bdaf1631da..2e0eee5ea32 100644 --- a/tensorflow/python/util/deprecation_wrapper.py +++ b/tensorflow/python/util/deprecation_wrapper.py @@ -12,138 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Provides wrapper for TensorFlow modules to support deprecation messages. +"""Compatibility wrapper for TensorFlow modules to support deprecation messages. -TODO(annarev): potentially merge with LazyLoader. +Please use module_wrapper instead. +TODO(yifeif): remove once no longer referred by estimator """ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import sys -import types +from tensorflow.python.util import module_wrapper -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util import tf_decorator -from tensorflow.python.util import tf_inspect -from tensorflow.python.util import tf_stack -from tensorflow.tools.compatibility import all_renames_v2 - - -_PER_MODULE_WARNING_LIMIT = 1 - - -def get_rename_v2(name): - if name not in all_renames_v2.symbol_renames: - return None - return all_renames_v2.symbol_renames[name] - - -def _call_location(): - # We want to get stack frame 2 frames up from current frame, - # i.e. above _getattr__ and _call_location calls. - stack = tf_stack.extract_stack_file_and_line(max_length=3) - if not stack: # should never happen as we're in a function - return 'UNKNOWN' - frame = stack[0] - return '{}:{}'.format(frame.file, frame.line) - - -def contains_deprecation_decorator(decorators): - return any( - d.decorator_name == 'deprecated' for d in decorators) - - -def has_deprecation_decorator(symbol): - """Checks if given object has a deprecation decorator. - - We check if deprecation decorator is in decorators as well as - whether symbol is a class whose __init__ method has a deprecation - decorator. - Args: - symbol: Python object. - - Returns: - True if symbol has deprecation decorator. - """ - decorators, symbol = tf_decorator.unwrap(symbol) - if contains_deprecation_decorator(decorators): - return True - if tf_inspect.isfunction(symbol): - return False - if not tf_inspect.isclass(symbol): - return False - if not hasattr(symbol, '__init__'): - return False - init_decorators, _ = tf_decorator.unwrap(symbol.__init__) - return contains_deprecation_decorator(init_decorators) - - -class DeprecationWrapper(types.ModuleType): - """Wrapper for TensorFlow modules to support deprecation messages.""" - - def __init__(self, wrapped, module_name): # pylint: disable=super-on-old-class - super(DeprecationWrapper, self).__init__(wrapped.__name__) - self.__dict__.update(wrapped.__dict__) - # Prefix all local attributes with _dw_ so that we can - # handle them differently in attribute access methods. - self._dw_wrapped_module = wrapped - self._dw_module_name = module_name - # names we already checked for deprecation - self._dw_deprecated_checked = set() - self._dw_warning_count = 0 - - def __getattribute__(self, name): # pylint: disable=super-on-old-class - attr = super(DeprecationWrapper, self).__getattribute__(name) - if name.startswith('__') or name.startswith('_dw_'): - return attr - - if (self._dw_warning_count < _PER_MODULE_WARNING_LIMIT and - name not in self._dw_deprecated_checked): - - self._dw_deprecated_checked.add(name) - - if self._dw_module_name: - full_name = 'tf.%s.%s' % (self._dw_module_name, name) - else: - full_name = 'tf.%s' % name - rename = get_rename_v2(full_name) - if rename and not has_deprecation_decorator(attr): - call_location = _call_location() - # skip locations in Python source - if not call_location.startswith('<'): - logging.warning( - 'From %s: The name %s is deprecated. Please use %s instead.\n', - _call_location(), full_name, rename) - self._dw_warning_count += 1 - return attr - - def __setattr__(self, arg, val): # pylint: disable=super-on-old-class - if arg.startswith('_dw_'): - super(DeprecationWrapper, self).__setattr__(arg, val) - else: - setattr(self._dw_wrapped_module, arg, val) - self.__dict__[arg] = val - - def __dir__(self): - return dir(self._dw_wrapped_module) - - def __delattr__(self, name): # pylint: disable=super-on-old-class - if name.startswith('_dw_'): - super(DeprecationWrapper, self).__delattr__(name) - else: - delattr(self._dw_wrapped_module, name) - - def __repr__(self): - return self._dw_wrapped_module.__repr__() - - def __getstate__(self): - return self.__name__ - - def __setstate__(self, d): - # pylint: disable=protected-access - self.__init__( - sys.modules[d]._dw_wrapped_module, - sys.modules[d]._dw_module_name) - # pylint: enable=protected-access +# For backward compatibility for other pip packages that use this class. +DeprecationWrapper = module_wrapper.TFModuleWrapper diff --git a/tensorflow/python/util/deprecation_wrapper_test.py b/tensorflow/python/util/deprecation_wrapper_test.py deleted file mode 100644 index 84ff22c5937..00000000000 --- a/tensorflow/python/util/deprecation_wrapper_test.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorflow.python.util.deprecation_wrapper.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging -import types - -from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util import deprecation_wrapper -from tensorflow.python.util import tf_inspect -from tensorflow.tools.compatibility import all_renames_v2 - -deprecation_wrapper._PER_MODULE_WARNING_LIMIT = 5 - - -class MockModule(types.ModuleType): - pass - - -class DeprecationWrapperTest(test.TestCase): - - def testWrapperIsAModule(self): - module = MockModule('test') - wrapped_module = deprecation_wrapper.DeprecationWrapper( - module, 'test') - self.assertTrue(tf_inspect.ismodule(wrapped_module)) - - @test.mock.patch.object(logging, 'warning', autospec=True) - def testDeprecationWarnings(self, mock_warning): - module = MockModule('test') - module.foo = 1 - module.bar = 2 - module.baz = 3 - all_renames_v2.symbol_renames['tf.test.bar'] = 'tf.bar2' - all_renames_v2.symbol_renames['tf.test.baz'] = 'tf.compat.v1.baz' - - wrapped_module = deprecation_wrapper.DeprecationWrapper( - module, 'test') - self.assertTrue(tf_inspect.ismodule(wrapped_module)) - - self.assertEqual(0, mock_warning.call_count) - bar = wrapped_module.bar - self.assertEqual(1, mock_warning.call_count) - foo = wrapped_module.foo - self.assertEqual(1, mock_warning.call_count) - baz = wrapped_module.baz - self.assertEqual(2, mock_warning.call_count) - baz = wrapped_module.baz - self.assertEqual(2, mock_warning.call_count) - - # Check that values stayed the same - self.assertEqual(module.foo, foo) - self.assertEqual(module.bar, bar) - self.assertEqual(module.baz, baz) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/util/module_wrapper.py b/tensorflow/python/util/module_wrapper.py new file mode 100644 index 00000000000..aa232d58495 --- /dev/null +++ b/tensorflow/python/util/module_wrapper.py @@ -0,0 +1,205 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides wrapper for TensorFlow modules.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import importlib +import sys +import types + +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import tf_decorator +from tensorflow.python.util import tf_inspect +from tensorflow.python.util import tf_stack +from tensorflow.tools.compatibility import all_renames_v2 + + +_PER_MODULE_WARNING_LIMIT = 1 + + +def get_rename_v2(name): + if name not in all_renames_v2.symbol_renames: + return None + return all_renames_v2.symbol_renames[name] + + +def _call_location(): + # We want to get stack frame 2 frames up from current frame, + # i.e. above _getattr__ and _call_location calls. + stack = tf_stack.extract_stack_file_and_line(max_length=3) + if not stack: # should never happen as we're in a function + return 'UNKNOWN' + frame = stack[0] + return '{}:{}'.format(frame.file, frame.line) + + +def contains_deprecation_decorator(decorators): + return any( + d.decorator_name == 'deprecated' for d in decorators) + + +def has_deprecation_decorator(symbol): + """Checks if given object has a deprecation decorator. + + We check if deprecation decorator is in decorators as well as + whether symbol is a class whose __init__ method has a deprecation + decorator. + Args: + symbol: Python object. + + Returns: + True if symbol has deprecation decorator. + """ + decorators, symbol = tf_decorator.unwrap(symbol) + if contains_deprecation_decorator(decorators): + return True + if tf_inspect.isfunction(symbol): + return False + if not tf_inspect.isclass(symbol): + return False + if not hasattr(symbol, '__init__'): + return False + init_decorators, _ = tf_decorator.unwrap(symbol.__init__) + return contains_deprecation_decorator(init_decorators) + + +class TFModuleWrapper(types.ModuleType): + """Wrapper for TF modules to support deprecation messages and lazyloading.""" + + def __init__( # pylint: disable=super-on-old-class + self, + wrapped, + module_name, + public_apis=None, + deprecation=True, + has_lite=False): # pylint: enable=super-on-old-class + super(TFModuleWrapper, self).__init__(wrapped.__name__) + self.__dict__.update(wrapped.__dict__) + # Prefix all local attributes with _tfmw_ so that we can + # handle them differently in attribute access methods. + self._tfmw_wrapped_module = wrapped + self._tfmw_module_name = module_name + self._tfmw_public_apis = public_apis + self._tfmw_print_deprecation_warnings = deprecation + self._tfmw_has_lite = has_lite + # Set __all__ so that import * work for lazy loaded modules + if self._tfmw_public_apis: + self._tfmw_wrapped_module.__all__ = list(self._tfmw_public_apis.keys()) + self.__all__ = list(self._tfmw_public_apis.keys()) + # names we already checked for deprecation + self._tfmw_deprecated_checked = set() + self._tfmw_warning_count = 0 + + def _tfmw_add_deprecation_warning(self, name, attr): + """Print deprecation warning for attr with given name if necessary.""" + if (self._tfmw_warning_count < _PER_MODULE_WARNING_LIMIT and + name not in self._tfmw_deprecated_checked): + + self._tfmw_deprecated_checked.add(name) + + if self._tfmw_module_name: + full_name = 'tf.%s.%s' % (self._tfmw_module_name, name) + else: + full_name = 'tf.%s' % name + rename = get_rename_v2(full_name) + if rename and not has_deprecation_decorator(attr): + call_location = _call_location() + # skip locations in Python source + if not call_location.startswith('<'): + logging.warning( + 'From %s: The name %s is deprecated. Please use %s instead.\n', + _call_location(), full_name, rename) + self._tfmw_warning_count += 1 + + def _tfmw_import_module(self, name): + symbol_loc_info = self._tfmw_public_apis[name] + if symbol_loc_info[0]: + module = importlib.import_module(symbol_loc_info[0]) + attr = getattr(module, symbol_loc_info[1]) + else: + attr = importlib.import_module(symbol_loc_info[1]) + setattr(self._tfmw_wrapped_module, name, attr) + self.__dict__[name] = attr + return attr + + def __getattribute__(self, name): # pylint: disable=super-on-old-class + # Workaround to make sure we do not import from tensorflow/lite/__init__.py + if name == 'lite': + if self._tfmw_has_lite: + attr = self._tfmw_import_module(name) + setattr(self._tfmw_wrapped_module, 'lite', attr) + return attr + + attr = super(TFModuleWrapper, self).__getattribute__(name) + if name.startswith('__') or name.startswith('_tfmw_'): + return attr + + if self._tfmw_print_deprecation_warnings: + self._tfmw_add_deprecation_warning(name, attr) + return attr + + def __getattr__(self, name): + try: + attr = getattr(self._tfmw_wrapped_module, name) + except AttributeError as e: + if not self._tfmw_public_apis: + raise e + if name not in self._tfmw_public_apis: + raise e + attr = self._tfmw_import_module(name) + + if self._tfmw_print_deprecation_warnings: + self._tfmw_add_deprecation_warning(name, attr) + return attr + + def __setattr__(self, arg, val): # pylint: disable=super-on-old-class + if not arg.startswith('_tfmw_'): + setattr(self._tfmw_wrapped_module, arg, val) + self.__dict__[arg] = val + if arg not in self.__all__ and arg != '__all__': + self.__all__.append(arg) + super(TFModuleWrapper, self).__setattr__(arg, val) + + def __dir__(self): + if self._tfmw_public_apis: + return list( + set(self._tfmw_public_apis.keys()).union( + set([ + attr for attr in dir(self._tfmw_wrapped_module) + if not attr.startswith('_') + ]))) + else: + return dir(self._tfmw_wrapped_module) + + def __delattr__(self, name): # pylint: disable=super-on-old-class + if name.startswith('_tfmw_'): + super(TFModuleWrapper, self).__delattr__(name) + else: + delattr(self._tfmw_wrapped_module, name) + + def __repr__(self): + return self._tfmw_wrapped_module.__repr__() + + def __getstate__(self): + return self.__name__ + + def __setstate__(self, d): + # pylint: disable=protected-access + self.__init__(sys.modules[d]._tfmw_wrapped_module, + sys.modules[d]._tfmw_module_name) + # pylint: enable=protected-access diff --git a/tensorflow/python/util/module_wrapper_test.py b/tensorflow/python/util/module_wrapper_test.py new file mode 100644 index 00000000000..582e98abdfa --- /dev/null +++ b/tensorflow/python/util/module_wrapper_test.py @@ -0,0 +1,136 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.python.util.module_wrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import types + +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import module_wrapper +from tensorflow.python.util import tf_inspect +from tensorflow.tools.compatibility import all_renames_v2 + +module_wrapper._PER_MODULE_WARNING_LIMIT = 5 + + +class MockModule(types.ModuleType): + pass + + +class DeprecationWrapperTest(test.TestCase): + + def testWrapperIsAModule(self): + module = MockModule('test') + wrapped_module = module_wrapper.TFModuleWrapper(module, 'test') + self.assertTrue(tf_inspect.ismodule(wrapped_module)) + + @test.mock.patch.object(logging, 'warning', autospec=True) + def testDeprecationWarnings(self, mock_warning): + module = MockModule('test') + module.foo = 1 + module.bar = 2 + module.baz = 3 + all_renames_v2.symbol_renames['tf.test.bar'] = 'tf.bar2' + all_renames_v2.symbol_renames['tf.test.baz'] = 'tf.compat.v1.baz' + + wrapped_module = module_wrapper.TFModuleWrapper(module, 'test') + self.assertTrue(tf_inspect.ismodule(wrapped_module)) + + self.assertEqual(0, mock_warning.call_count) + bar = wrapped_module.bar + self.assertEqual(1, mock_warning.call_count) + foo = wrapped_module.foo + self.assertEqual(1, mock_warning.call_count) + baz = wrapped_module.baz # pylint: disable=unused-variable + self.assertEqual(2, mock_warning.call_count) + baz = wrapped_module.baz + self.assertEqual(2, mock_warning.call_count) + + # Check that values stayed the same + self.assertEqual(module.foo, foo) + self.assertEqual(module.bar, bar) + + +class LazyLoadingWrapperTest(test.TestCase): + + def testLazyLoad(self): + module = MockModule('test') + apis = {'cmd': ('', 'cmd'), 'ABCMeta': ('abc', 'ABCMeta')} + wrapped_module = module_wrapper.TFModuleWrapper( + module, 'test', public_apis=apis, deprecation=False) + import cmd as _cmd # pylint: disable=g-import-not-at-top + from abc import ABCMeta as _ABCMeta # pylint: disable=g-import-not-at-top, g-importing-member + self.assertEqual(wrapped_module.cmd, _cmd) + self.assertEqual(wrapped_module.ABCMeta, _ABCMeta) + + def testLazyLoadLocalOverride(self): + # Test that we can override and add fields to the wrapped module. + module = MockModule('test') + apis = {'cmd': ('', 'cmd')} + wrapped_module = module_wrapper.TFModuleWrapper( + module, 'test', public_apis=apis, deprecation=False) + import cmd as _cmd # pylint: disable=g-import-not-at-top + self.assertEqual(wrapped_module.cmd, _cmd) + setattr(wrapped_module, 'cmd', 1) + setattr(wrapped_module, 'cgi', 2) + self.assertEqual(wrapped_module.cmd, 1) # override + self.assertEqual(wrapped_module.cgi, 2) # add + + def testLazyLoadDict(self): + # Test that we can override and add fields to the wrapped module. + module = MockModule('test') + apis = {'cmd': ('', 'cmd')} + wrapped_module = module_wrapper.TFModuleWrapper( + module, 'test', public_apis=apis, deprecation=False) + import cmd as _cmd # pylint: disable=g-import-not-at-top + # At first cmd key does not exist in __dict__ + self.assertNotIn('cmd', wrapped_module.__dict__) + # After it is referred (lazyloaded), it gets added to __dict__ + wrapped_module.cmd # pylint: disable=pointless-statement + self.assertEqual(wrapped_module.__dict__['cmd'], _cmd) + # When we call setattr, it also gets added to __dict__ + setattr(wrapped_module, 'cmd2', _cmd) + self.assertEqual(wrapped_module.__dict__['cmd2'], _cmd) + + def testLazyLoadWildcardImport(self): + # Test that public APIs are in __all__. + module = MockModule('test') + module._should_not_be_public = 5 + apis = {'cmd': ('', 'cmd')} + wrapped_module = module_wrapper.TFModuleWrapper( + module, 'test', public_apis=apis, deprecation=False) + setattr(wrapped_module, 'hello', 1) + self.assertIn('hello', wrapped_module.__all__) + self.assertIn('cmd', wrapped_module.__all__) + self.assertNotIn('_should_not_be_public', wrapped_module.__all__) + + def testLazyLoadCorrectLiteModule(self): + # If set, always load lite module from public API list. + module = MockModule('test') + apis = {'lite': ('', 'cmd')} + module.lite = 5 + import cmd as _cmd # pylint: disable=g-import-not-at-top + wrapped_module = module_wrapper.TFModuleWrapper( + module, 'test', public_apis=apis, deprecation=False, has_lite=True) + self.assertEqual(wrapped_module.lite, _cmd) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index 3e02efffa9a..d43720f0ed8 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -142,8 +142,6 @@ def _sequence_like(instance, args): elif _is_type_spec(instance): # Pack a CompositeTensor's components according to a TypeSpec. assert len(args) == 1 - if args[0] and _is_type_spec(args[0][0]): - raise ValueError("Can not pack TypeSpec into a TypeSpec.") return instance._from_components(args[0]) # pylint: disable=protected-access elif isinstance(instance, _six.moves.range): return _sequence_like(list(instance), args) diff --git a/tensorflow/python/training/tracking/object_identity.py b/tensorflow/python/util/object_identity.py similarity index 100% rename from tensorflow/python/training/tracking/object_identity.py rename to tensorflow/python/util/object_identity.py diff --git a/tensorflow/python/util/tf_should_use.py b/tensorflow/python/util/tf_should_use.py index 2d0a447a0cd..b71c96ccf6b 100644 --- a/tensorflow/python/util/tf_should_use.py +++ b/tensorflow/python/util/tf_should_use.py @@ -65,7 +65,8 @@ class _TFShouldUseHelper(object): else: logger = tf_logging.error creation_stack = ''.join( - [line.rstrip() for line in traceback.format_stack(self._stack_frame)]) + [line.rstrip() + for line in traceback.format_stack(self._stack_frame, limit=5)]) logger( '==================================\n' 'Object was never used (type %s):\n%s\nIf you want to mark it as ' diff --git a/tensorflow/stream_executor/host/BUILD b/tensorflow/stream_executor/host/BUILD index 80beaa4e4f7..c8f833f8777 100644 --- a/tensorflow/stream_executor/host/BUILD +++ b/tensorflow/stream_executor/host/BUILD @@ -111,6 +111,7 @@ cc_library( "//tensorflow/stream_executor:stream_executor_pimpl", "//tensorflow/stream_executor:timer", "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/synchronization", ], alwayslink = True, ) diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc index ad25dd453da..f17e188cbc4 100644 --- a/tensorflow/stream_executor/host/host_gpu_executor.cc +++ b/tensorflow/stream_executor/host/host_gpu_executor.cc @@ -19,12 +19,14 @@ limitations under the License. #include +#include "absl/synchronization/notification.h" #include "tensorflow/core/platform/profile_utils/cpu_utils.h" #include "tensorflow/stream_executor/host/host_platform_id.h" #include "tensorflow/stream_executor/host/host_stream.h" #include "tensorflow/stream_executor/host/host_timer.h" #include "tensorflow/stream_executor/lib/statusor.h" #include "tensorflow/stream_executor/plugin_registry.h" +#include "tensorflow/stream_executor/stream_executor_internal.h" namespace stream_executor { namespace host { @@ -167,6 +169,61 @@ bool HostExecutor::CreateStreamDependency(Stream *dependent, Stream *other) { return true; } +class HostEvent : public internal::EventInterface { + public: + HostEvent() : notification_(std::make_shared()) {} + + std::shared_ptr ¬ification() { return notification_; } + + private: + // We use a std::shared_ptr here because the client may delete the HostEvent + // object while there are still RecordEvent and WaitForEvent callbacks pending + // on a stream. + std::shared_ptr notification_; +}; + +std::unique_ptr +HostExecutor::CreateEventImplementation() { + return std::unique_ptr(new HostEvent()); +} + +static HostEvent *AsHostEvent(Event *event) { + DCHECK(event != nullptr); + return static_cast(event->implementation()); +} + +port::Status HostExecutor::AllocateEvent(Event * /*event*/) { + return port::Status::OK(); +} + +port::Status HostExecutor::DeallocateEvent(Event * /*event*/) { + return port::Status::OK(); +} + +port::Status HostExecutor::RecordEvent(Stream *stream, Event *event) { + std::shared_ptr notification = + AsHostEvent(event)->notification(); + AsHostStream(stream)->EnqueueTask([notification]() { + CHECK(!notification->HasBeenNotified()); + notification->Notify(); + }); + return port::Status::OK(); +} + +port::Status HostExecutor::WaitForEvent(Stream *stream, Event *event) { + std::shared_ptr notification = + AsHostEvent(event)->notification(); + AsHostStream(stream)->EnqueueTask( + [notification]() { notification->WaitForNotification(); }); + return port::Status::OK(); +} + +Event::Status HostExecutor::PollForEventStatus(Event *event) { + absl::Notification ¬ification = *AsHostEvent(event)->notification(); + return notification.HasBeenNotified() ? Event::Status::kComplete + : Event::Status::kPending; +} + bool HostExecutor::StartTimer(Stream *stream, Timer *timer) { dynamic_cast(timer->implementation())->Start(stream); return true; diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h index a1dbb9f40e9..dfe43e1f43a 100644 --- a/tensorflow/stream_executor/host/host_gpu_executor.h +++ b/tensorflow/stream_executor/host/host_gpu_executor.h @@ -106,25 +106,11 @@ class HostExecutor : public internal::StreamExecutorInterface { bool HostCallback(Stream *stream, std::function callback) override; - port::Status AllocateEvent(Event *event) override { - return port::Status(port::error::UNIMPLEMENTED, ""); - } - - port::Status DeallocateEvent(Event *event) override { - return port::Status(port::error::UNIMPLEMENTED, ""); - } - - port::Status RecordEvent(Stream *stream, Event *event) override { - return port::Status(port::error::UNIMPLEMENTED, ""); - } - - port::Status WaitForEvent(Stream *stream, Event *event) override { - return port::Status(port::error::UNIMPLEMENTED, ""); - } - - Event::Status PollForEventStatus(Event *event) override { - return Event::Status::kError; - } + port::Status AllocateEvent(Event *event) override; + port::Status DeallocateEvent(Event *event) override; + port::Status RecordEvent(Stream *stream, Event *event) override; + port::Status WaitForEvent(Stream *stream, Event *event) override; + Event::Status PollForEventStatus(Event *event) override; bool AllocateStream(Stream *stream) override; void DeallocateStream(Stream *stream) override; @@ -190,10 +176,7 @@ class HostExecutor : public internal::StreamExecutorInterface { rng::RngSupport *CreateRng() override; std::unique_ptr CreateEventImplementation() - override { - LOG(WARNING) << "Events not currently supported by HostExecutor."; - return nullptr; - } + override; std::unique_ptr CreateKernelImplementation() override { diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc index 4a0df0af171..efe49ddcf3f 100644 --- a/tensorflow/stream_executor/rocm/rocm_dnn.cc +++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc @@ -115,7 +115,6 @@ class MIOpenHandle { namespace wrap { #ifdef PLATFORM_GOOGLE - #define STREAM_EXECUTOR_MIOPEN_WRAP(__name) \ struct WrapperShim__##__name { \ template \ @@ -162,6 +161,7 @@ namespace wrap { __macro(miopenBatchNormalizationForwardInference) \ __macro(miopenBatchNormalizationForwardTraining) \ __macro(miopenGetConvolutionForwardOutputDim) \ + __macro(miopenGetConvolutionNdForwardOutputDim) \ __macro(miopenFindConvolutionForwardAlgorithm) \ __macro(miopenCreateTensorDescriptor) \ __macro(miopenDestroyTensorDescriptor) \ @@ -183,7 +183,9 @@ namespace wrap { __macro(miopenConvolutionBackwardBias) \ __macro(miopenConvolutionForwardGetWorkSpaceSize) \ __macro(miopenInitConvolutionDescriptor) \ + __macro(miopenInitConvolutionNdDescriptor) \ __macro(miopenGetConvolutionDescriptor) \ + __macro(miopenGetConvolutionNdDescriptor) \ __macro(miopenSetConvolutionGroupCount) \ __macro(miopenSet4dTensorDescriptor) \ __macro(miopenGetTensorDescriptor) \ @@ -282,28 +284,29 @@ uint64 GetHashValue(miopenTensorDescriptor_t tensor_desc) { uint64 GetHashValue(miopenConvolutionDescriptor_t conv_desc) { miopenConvolutionMode_t c_mode = miopenConvolution; - int pad_h = 0, pad_w = 0, u = 0, v = 0, dilation_h = 0, dilation_w = 0; - wrap::miopenGetConvolutionDescriptor(conv_desc, &c_mode, &pad_h, &pad_w, &u, - &v, &dilation_h, &dilation_w); + int nd = 0; + wrap::miopenGetConvolutionNdDescriptor(conv_desc, 0, &nd, nullptr, nullptr, + nullptr, &c_mode); + + std::vector stride(nd); + std::vector pad(nd); + std::vector dilation(nd); + + wrap::miopenGetConvolutionNdDescriptor( + conv_desc, nd, &nd, pad.data(), stride.data(), dilation.data(), &c_mode); uint64 hash_value = tensorflow::hash()(c_mode); - hash_value = - tensorflow::Hash64Combine(hash_value, tensorflow::hash()(pad_h)); - hash_value = - tensorflow::Hash64Combine(hash_value, tensorflow::hash()(pad_w)); - hash_value = - tensorflow::Hash64Combine(hash_value, tensorflow::hash()(u)); - hash_value = - tensorflow::Hash64Combine(hash_value, tensorflow::hash()(v)); - hash_value = tensorflow::Hash64Combine(hash_value, - tensorflow::hash()(dilation_h)); - hash_value = tensorflow::Hash64Combine(hash_value, - tensorflow::hash()(dilation_w)); + auto hash64Combine = [&hash_value](int element) { + tensorflow::Hash64Combine(hash_value, tensorflow::hash()(element)); + }; + std::for_each(pad.begin(), pad.end(), hash64Combine); + std::for_each(stride.begin(), stride.end(), hash64Combine); + std::for_each(dilation.begin(), dilation.end(), hash64Combine); return hash_value; } -// Class to implement a cache of compiled fusion plans. +// Class to implement a cache of compiled fusion plans class CachedFusionPlans { public: // Check if we already have a fusion_plan corresponding to the given hash @@ -340,7 +343,7 @@ class CachedFusionPlans { return found_cached_plan; } - // Need to figure out the right place to call this routine. + // Need to figure out the right place to call this routine static void Clear() { absl::MutexLock lock{&cached_plans_mutex}; @@ -357,24 +360,24 @@ class CachedFusionPlans { unsupported_plans.clear(); } - // Is the Fusion plan corresponding to this hash unsupported. + // Is the Fusion plan corresponding to this hash unsupported static bool IsUnsupportedFusionPlan(uint64 hash) { absl::MutexLock lock{&cached_plans_mutex}; return unsupported_plans.count(hash) > 0; } - // Mark the given hash value as corresponding to an unsupported fusion plan. + // Mark the given hash value as corresponding to an unsupported fusion plan static void MarkFusionPlanUnsupported(uint64 hash) { absl::MutexLock lock{&cached_plans_mutex}; unsupported_plans.insert(hash); } private: - // Mutex to guard access to all data within this class. + // Mutex to guard access to all data within this class static absl::Mutex cached_plans_mutex; - // Map of hash-value to MIOpen Fusion plan descriptors. - // Need to be able share this across more than one stream and hence static. + // Map of hash-value to MIOpen Fusion plan descriptors + // Need to be able share this across more than one stream and hence static static std::map cached_plans; // Set of hash-values that correspond to MIOpen Fusion plans that will fail @@ -386,6 +389,10 @@ absl::Mutex CachedFusionPlans::cached_plans_mutex; std::map CachedFusionPlans::cached_plans; std::set CachedFusionPlans::unsupported_plans; +} // namespace + +namespace { + miopenHandle_t ToHandle(void* opaque_handle) { return static_cast(opaque_handle); } @@ -538,10 +545,6 @@ class ScopedTensorDescriptor { case dnn::DataLayout::kBatchYXDepth: case dnn::DataLayout::kBatchDepthYX: { const int nd = batch_descriptor.ndims() + 2; - if (nd != 4) { - LOG(FATAL) << "miopen only supports 4D tensors, dim=" << nd - << " not allowed"; - } // MIOpen requires the strides and dims to be ordered as BDYX. std::vector strides64 = @@ -556,8 +559,8 @@ class ScopedTensorDescriptor { &CheckedNarrowing); std::transform(dims64.cbegin(), dims64.cend(), dims.begin(), &CheckedNarrowing); - status = wrap::miopenSet4dTensorDescriptor(handle_, elem_type, dims[0], - dims[1], dims[2], dims[3]); + status = wrap::miopenSetTensorDescriptor(handle_, elem_type, nd, + dims.data(), strides.data()); if (status != miopenStatusSuccess) { LOG(FATAL) << "could not convert BatchDescriptor " @@ -604,19 +607,14 @@ class ScopedFilterDescriptor { const int nd = batch_descriptor.ndims() + 2; - if (nd != 4) { - LOG(FATAL) << "miopen only supports 4D filters, dim=" << nd - << "not allowed" << ToString(status); - } - std::vector dims(2 + filter_descriptor.ndims()); dims[0] = filter_descriptor.output_feature_map_count(); dims[1] = filter_descriptor.input_feature_map_count(); const auto& spatial_dims = filter_descriptor.input_filter_dims(); std::copy(spatial_dims.begin(), spatial_dims.end(), dims.begin() + 2); - status = wrap::miopenSet4dTensorDescriptor(handle_, elem_type, dims[0], - dims[1], dims[2], dims[3]); + status = wrap::miopenSetTensorDescriptor(handle_, elem_type, nd, + dims.data(), nullptr); if (status != miopenStatusSuccess) { LOG(FATAL) << "could not set miopen filter descriptor: " << ToString(status); @@ -667,11 +665,15 @@ class ScopedConvolutionDescriptor { &CheckedNarrowing); std::transform(padding64.cbegin(), padding64.cend(), padding.begin(), &CheckedNarrowing); - std::vector upscale(convolution_descriptor.ndims(), 1); - status = wrap::miopenInitConvolutionDescriptor( - handle_, miopenConvolution, padding[0], padding[1], strides[0], - strides[1], upscale[0], upscale[1]); + std::vector upscale(convolution_descriptor.ndims()); + const auto& dilations64 = convolution_descriptor.dilations(); + std::transform(dilations64.cbegin(), dilations64.cend(), upscale.begin(), + &CheckedNarrowing); + + status = wrap::miopenInitConvolutionNdDescriptor( + handle_, convolution_descriptor.ndims(), padding.data(), strides.data(), + upscale.data(), miopenConvolution); if (status != miopenStatusSuccess) { LOG(FATAL) << "could not set miopen convolution descriptor: " << ToString(status); @@ -4003,9 +4005,8 @@ bool MIOpenSupport::DeriveOutputBatchDescriptor( int dn = batch_descriptor.ndims() + 2; std::vector dims(dn); // in BDYX - auto status = wrap::miopenGetConvolutionForwardOutputDim( - conv.handle(), input_nd.handle(), filter.handle(), &dims[0], &dims[1], - &dims[2], &dims[3]); + auto status = wrap::miopenGetConvolutionNdForwardOutputDim( + conv.handle(), input_nd.handle(), filter.handle(), &dn, dims.data()); if (status != miopenStatusSuccess) { LOG(ERROR) << "could not get output tensor for convolution: " << ToString(status); diff --git a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h index ba803edaafb..bc5b6a87888 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h +++ b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h @@ -27,10 +27,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform/dso_loader.h" #include "tensorflow/stream_executor/platform/port.h" -#if defined(TENSORFLOW_USE_ROCM) - -#endif - namespace tensorflow { namespace wrap { #ifdef PLATFORM_GOOGLE @@ -83,8 +79,8 @@ namespace wrap { __macro(hipDeviceTotalMem) \ __macro(hipDriverGetVersion) \ __macro(hipEventCreateWithFlags) \ - __macro(hipEventElapsedTime) \ __macro(hipEventDestroy) \ + __macro(hipEventElapsedTime) \ __macro(hipEventQuery) \ __macro(hipEventRecord) \ __macro(hipEventSynchronize) \ diff --git a/tensorflow/stream_executor/rocm/rocm_fft.cc b/tensorflow/stream_executor/rocm/rocm_fft.cc index 2af973309c0..d2c542fef18 100644 --- a/tensorflow/stream_executor/rocm/rocm_fft.cc +++ b/tensorflow/stream_executor/rocm/rocm_fft.cc @@ -86,21 +86,33 @@ namespace wrap { #endif -#define ROCFFT_ROUTINE_EACH(__macro) \ - __macro(hipfftDestroy) __macro(hipfftSetStream) __macro(hipfftPlan1d) \ - __macro(hipfftPlan2d) __macro(hipfftPlan3d) __macro(hipfftPlanMany) \ - __macro(hipfftCreate) __macro(hipfftSetAutoAllocation) \ - __macro(hipfftSetWorkArea) __macro(hipfftGetSize1d) \ - __macro(hipfftMakePlan1d) __macro(hipfftGetSize2d) \ - __macro(hipfftMakePlan2d) __macro(hipfftGetSize3d) \ - __macro(hipfftMakePlan3d) __macro(hipfftGetSizeMany) \ - __macro(hipfftMakePlanMany) \ - __macro(hipfftExecD2Z) \ - __macro(hipfftExecZ2D) \ - __macro(hipfftExecC2C) \ - __macro(hipfftExecC2R) \ - __macro(hipfftExecZ2Z) \ - __macro(hipfftExecR2C) +// clang-format off +#define ROCFFT_ROUTINE_EACH(__macro) \ + __macro(hipfftDestroy) \ + __macro(hipfftSetStream) \ + __macro(hipfftPlan1d) \ + __macro(hipfftPlan2d) \ + __macro(hipfftPlan3d) \ + __macro(hipfftPlanMany) \ + __macro(hipfftCreate) \ + __macro(hipfftSetAutoAllocation) \ + __macro(hipfftSetWorkArea) \ + __macro(hipfftGetSize1d) \ + __macro(hipfftMakePlan1d) \ + __macro(hipfftGetSize2d) \ + __macro(hipfftMakePlan2d) \ + __macro(hipfftGetSize3d) \ + __macro(hipfftMakePlan3d) \ + __macro(hipfftGetSizeMany) \ + __macro(hipfftMakePlanMany) \ + __macro(hipfftExecD2Z) \ + __macro(hipfftExecZ2D) \ + __macro(hipfftExecC2C) \ + __macro(hipfftExecC2R) \ + __macro(hipfftExecZ2Z) \ + __macro(hipfftExecR2C) + +// clang-format on ROCFFT_ROUTINE_EACH(STREAM_EXECUTOR_ROCFFT_WRAP) diff --git a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc index 98bc68929b9..e37d6d24232 100644 --- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc +++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc @@ -110,6 +110,7 @@ GpuExecutor::~GpuExecutor() { if (context_ != nullptr) { GpuDriver::DestroyContext(context_); } + CHECK(kernel_to_gpu_binary_.empty()) << "GpuExecutor has live kernels."; CHECK(gpu_binary_to_module_.empty()) << "GpuExecutor has loaded modules."; } bool GpuExecutor::UnloadModule(ModuleHandle module_handle) { @@ -136,7 +137,19 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) { } void GpuExecutor::UnloadKernel(const KernelBase* kernel) { - LOG(FATAL) << "Feature not supported on ROCM platform (UnloadKernel)"; + VLOG(3) << "Unloading kernel " << kernel << " : " << kernel->name(); + + absl::MutexLock lock{&in_memory_modules_mu_}; + auto gpu_binary_it = kernel_to_gpu_binary_.find(kernel); + if (kernel_to_gpu_binary_.end() == gpu_binary_it) { + VLOG(3) << "Kernel " << kernel << " : " << kernel->name() + << " has never been loaded."; + return; // We've never seen this kernel. + } + VLOG(3) << "Kernel " << kernel << " : " << kernel->name() + << " has loaded GPU code " << gpu_binary_it->second; + UnloadGpuBinary(gpu_binary_it->second); + kernel_to_gpu_binary_.erase(gpu_binary_it); } port::Status GpuExecutor::Init(int device_ordinal, @@ -244,8 +257,8 @@ bool GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, LOG(ERROR) << "failed to load HSACO\n"; return false; } - in_memory_modules_[hsaco] = module; } + kernel_to_gpu_binary_[kernel] = hsaco; } else { LOG(WARNING) << "no method of loading ROCM kernel provided"; return false; @@ -401,6 +414,7 @@ bool GpuExecutor::LoadModuleFromHsaco(const char* hsaco, hipModule_t* module) { return false; } module_refcount = 1; + in_memory_modules_[hsaco] = *module; VLOG(3) << "Loaded HSACO " << static_cast(hsaco) << " as module " << *module; } else { @@ -765,29 +779,6 @@ bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const { bool GpuExecutor::GetSymbol(const string& symbol_name, ModuleHandle module_handle, void** mem, size_t* bytes) { - { // give limited scope to lock - absl::MutexLock lock{&disk_modules_mu_}; - for (auto& it : disk_modules_) { - if (GpuDriver::GetModuleSymbol(context_, it.second, symbol_name.c_str(), - reinterpret_cast(mem), - bytes)) { - return true; - } - } - } - - { // give limited scope to lock - absl::MutexLock lock{&in_memory_modules_mu_}; - for (auto& it : in_memory_modules_) { - if (GpuDriver::GetModuleSymbol(context_, it.second, symbol_name.c_str(), - reinterpret_cast(mem), - bytes)) { - return true; - } - } - } - - { // give limited scope to lock absl::MutexLock lock{&in_memory_modules_mu_}; if (static_cast(module_handle)) { auto it = gpu_binary_to_module_.find(module_handle.id()); @@ -806,7 +797,6 @@ bool GpuExecutor::GetSymbol(const string& symbol_name, return true; } } - } LOG(INFO) << "Falied to find symbol in any modules: " << symbol_name; return false; diff --git a/tensorflow/stream_executor/rocm/rocm_rng.cc b/tensorflow/stream_executor/rocm/rocm_rng.cc index 38f4f8bb0c6..2492cc0e5d9 100644 --- a/tensorflow/stream_executor/rocm/rocm_rng.cc +++ b/tensorflow/stream_executor/rocm/rocm_rng.cc @@ -14,12 +14,11 @@ limitations under the License. ==============================================================================*/ #include "rocm/include/hiprand/hiprand.h" -#include "tensorflow/stream_executor/gpu/gpu_rng.h" - #include "tensorflow/stream_executor/device_memory.h" #include "tensorflow/stream_executor/gpu/gpu_activation.h" #include "tensorflow/stream_executor/gpu/gpu_executor.h" #include "tensorflow/stream_executor/gpu/gpu_helpers.h" +#include "tensorflow/stream_executor/gpu/gpu_rng.h" #include "tensorflow/stream_executor/gpu/gpu_stream.h" #include "tensorflow/stream_executor/lib/env.h" #include "tensorflow/stream_executor/lib/initialize.h" diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 64cbc7c370e..5d9aba8637a 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -44,6 +44,7 @@ load( load( "//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", + "if_mkl_v1_open_source_only", ) load( "//third_party/ngraph:build_defs.bzl", @@ -291,6 +292,7 @@ def tf_copts(android_optimization_level_override = "-O2", is_external = False): if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + + if_mkl_v1_open_source_only(["-DENABLE_MKLDNN_V1"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_ngraph(["-DINTEL_NGRAPH=1"]) + if_mkl_lnx_x64(["-fopenmp"]) + @@ -2490,3 +2492,6 @@ def if_mlir(if_true, if_false = []): "//conditions:default": if_false, "//tensorflow:with_mlir_support": if_true, }) + +def tfcompile_extra_flags(): + return "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt index 972e7d12daa..2f7918843dd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt @@ -87,6 +87,10 @@ tf_class { name: "nested_row_lengths" argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "nested_value_rowids" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "nrows" argspec: "args=[\'self\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt index 3d009186c7e..f7301ff180c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt @@ -15,6 +15,10 @@ tf_class { name: "autotune_algorithm" mtype: "" } + member { + name: "autotune_buffers" + mtype: "" + } member { name: "autotune_cpu_budget" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 50b3b399a9f..178daad4a2a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1388,6 +1388,10 @@ tf_module { name: "global_variables_initializer" argspec: "args=[], varargs=None, keywords=None, defaults=None" } + member_method { + name: "grad_pass_through" + argspec: "args=[\'f\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "gradients" argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\', \'UnconnectedGradients.NONE\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt index 972e7d12daa..2f7918843dd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt @@ -87,6 +87,10 @@ tf_class { name: "nested_row_lengths" argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "nested_value_rowids" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "nrows" argspec: "args=[\'self\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt index 3d009186c7e..f7301ff180c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt @@ -15,6 +15,10 @@ tf_class { name: "autotune_algorithm" mtype: "" } + member { + name: "autotune_buffers" + mtype: "" + } member { name: "autotune_cpu_budget" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 6d15fa0c841..33c4610d97b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -664,6 +664,10 @@ tf_module { name: "get_static_value" argspec: "args=[\'tensor\', \'partial\'], varargs=None, keywords=None, defaults=[\'False\'], " } + member_method { + name: "grad_pass_through" + argspec: "args=[\'f\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "gradients" argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'None\', \'None\', \'UnconnectedGradients.NONE\'], " diff --git a/tensorflow/tools/api/tests/deprecation_test.py b/tensorflow/tools/api/tests/deprecation_test.py index 8f6748f5787..3a5cf0d043e 100644 --- a/tensorflow/tools/api/tests/deprecation_test.py +++ b/tensorflow/tools/api/tests/deprecation_test.py @@ -23,9 +23,9 @@ import tensorflow as tf from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util import deprecation_wrapper +from tensorflow.python.util import module_wrapper -deprecation_wrapper._PER_MODULE_WARNING_LIMIT = 5 +module_wrapper._PER_MODULE_WARNING_LIMIT = 5 class DeprecationTest(test.TestCase): @@ -38,9 +38,8 @@ class DeprecationTest(test.TestCase): tf.tables_initializer() self.assertEqual(1, mock_warning.call_count) - self.assertRegexpMatches( - mock_warning.call_args[0][1], - "deprecation_test.py:") + self.assertRegexpMatches(mock_warning.call_args[0][1], + "module_wrapper.py:") self.assertRegexpMatches( mock_warning.call_args[0][2], r"tables_initializer") self.assertRegexpMatches( @@ -60,9 +59,8 @@ class DeprecationTest(test.TestCase): tf.ragged.RaggedTensorValue(value, row_splits) self.assertEqual(1, mock_warning.call_count) - self.assertRegexpMatches( - mock_warning.call_args[0][1], - "deprecation_test.py:") + self.assertRegexpMatches(mock_warning.call_args[0][1], + "module_wrapper.py:") self.assertRegexpMatches( mock_warning.call_args[0][2], r"ragged.RaggedTensorValue") self.assertRegexpMatches( @@ -84,9 +82,8 @@ class DeprecationTest(test.TestCase): tf.sparse_mask(array, mask_indices) self.assertEqual(1, mock_warning.call_count) - self.assertRegexpMatches( - mock_warning.call_args[0][1], - "deprecation_test.py:") + self.assertRegexpMatches(mock_warning.call_args[0][1], + "module_wrapper.py:") self.assertRegexpMatches( mock_warning.call_args[0][2], r"sparse_mask") self.assertRegexpMatches( @@ -103,9 +100,8 @@ class DeprecationTest(test.TestCase): tf.VarLenFeature(tf.dtypes.int32) self.assertEqual(1, mock_warning.call_count) - self.assertRegexpMatches( - mock_warning.call_args[0][1], - "deprecation_test.py:") + self.assertRegexpMatches(mock_warning.call_args[0][1], + "module_wrapper.py:") self.assertRegexpMatches( mock_warning.call_args[0][2], r"VarLenFeature") self.assertRegexpMatches( @@ -122,9 +118,8 @@ class DeprecationTest(test.TestCase): tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY # pylint: disable=pointless-statement self.assertEqual(1, mock_warning.call_count) - self.assertRegexpMatches( - mock_warning.call_args[0][1], - "deprecation_test.py:") + self.assertRegexpMatches(mock_warning.call_args[0][1], + "module_wrapper.py:") self.assertRegexpMatches( mock_warning.call_args[0][2], r"saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY") diff --git a/tensorflow/tools/api/tests/module_test.py b/tensorflow/tools/api/tests/module_test.py index 787df35ac30..257d558cda7 100644 --- a/tensorflow/tools/api/tests/module_test.py +++ b/tensorflow/tools/api/tests/module_test.py @@ -38,6 +38,11 @@ class ModuleTest(test.TestCase): def testDict(self): # Check that a few modules are in __dict__. + # pylint: disable=pointless-statement + tf.nn + tf.keras + tf.image + # pylint: enable=pointless-statement self.assertIn('nn', tf.__dict__) self.assertIn('keras', tf.__dict__) self.assertIn('image', tf.__dict__) diff --git a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu index 438ffa4821d..cf04ebb4620 100644 --- a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu +++ b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu @@ -5,7 +5,7 @@ LABEL maintainer="Jan Prach " # Copy and run the install scripts. COPY install/*.sh /install/ RUN /install/install_bootstrap_deb_packages.sh -RUN echo "deb http://archive.debian.org/debian jessie-backports main" | \ +RUN echo "deb [check-valid-until=no] http://archive.debian.org/debian jessie-backports main" | \ tee -a /etc/apt/sources.list # Workaround bug in Jessie backport repository deb packages # http://serverfault.com/questions/830636/cannot-install-openjdk-8-jre-headless-on-debian-jessie diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 62c1e014d5e..49bb51465a4 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -98,6 +98,8 @@ # # This script can be used by Jenkins parameterized / matrix builds. +set -ex + # Helper function: Convert to lower case to_lower () { echo "$1" | tr '[:upper:]' '[:lower:]' diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile index 9f10a7f03a3..02d8f89919e 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile @@ -72,7 +72,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile index 1d258e0404a..6d00ef3c115 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile @@ -72,7 +72,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile index 981d42c5759..fde7c9e8c39 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile @@ -58,7 +58,7 @@ RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile index 270524ae298..a6ff1a5ccea 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile @@ -58,7 +58,7 @@ RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile index 7dcaa72ab90..a05c718f6fb 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile @@ -72,7 +72,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile index 9a5a5c5cb9a..44d91ad067f 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile @@ -72,7 +72,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile index f9a0aa8ab6c..b2f1ce152c2 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile @@ -58,7 +58,7 @@ RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile index 62f17046468..3422eadb60c 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile @@ -58,7 +58,7 @@ RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile index ad68e0c8a5f..fc0976b023f 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile @@ -49,7 +49,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile index aeee9f7d689..b09c6456e9c 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile @@ -35,4 +35,4 @@ RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index bb3757099c3..767c918a145 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -140,6 +140,7 @@ py_test( "nomsan", "notsan", "optonly", + "notap", ], deps = [ ":generate2_lib", diff --git a/tensorflow/tools/docs/generate2.py b/tensorflow/tools/docs/generate2.py index a6d01ae9306..0e8cba27e14 100644 --- a/tensorflow/tools/docs/generate2.py +++ b/tensorflow/tools/docs/generate2.py @@ -150,6 +150,16 @@ _raw_ops_doc = textwrap.dedent("""\n if LooseVersion(tf.__version__) < LooseVersion('2'): tf.raw_ops.__doc__ = _raw_ops_doc + tf.contrib.__doc__ = """ + Contrib module containing volatile or experimental code. + + Warning: The `tf.contrib` module will not be included in TensorFlow 2.0. Many + of its submodules have been integrated into TensorFlow core, or spun-off into + other projects like [`tensorflow_io`](https://github.com/tensorflow/io), or + [`tensorflow_addons`](https://github.com/tensorflow/addons). For instructions + on how to upgrade see the + [Migration guide](https://www.tensorflow.org/beta/guide/migration_guide). + """ else: tf.raw_ops.__doc__ += _raw_ops_doc diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 9d32e54eada..85fc4faaa74 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -171,6 +171,7 @@ filegroup( "@farmhash_archive//:COPYING", "@fft2d//:fft2d/readme2d.txt", "@flatbuffers//:LICENSE.txt", + "@functools32_archive//:LICENSE", "@gast_archive//:PKG-INFO", "@gemmlowp//:LICENSE", "@gif_archive//:COPYING", diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index 8c19ca010e9..5420769e25d 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -178,15 +178,11 @@ function prepare_src() { # # import tensorflow as tf # - # which is not ok. We are removing the deprecation stuff by using sed and - # deleting the pattern that the wrapper uses (all lines between a line ending - # with _deprecation_wrapper -- the import line -- and a line containing - # _sys.modules[__name__] as the argument of a function -- the last line in - # the deprecation autogenerated pattern) + # which is not ok. We disable deprecation by using sed to toggle the flag # TODO(mihaimaruseac): When we move the API to root, remove this hack # Note: Can't do in place sed that works on all OS, so use a temp file instead sed \ - "/_deprecation_wrapper$/,/_sys.modules[__name__],/ d" \ + "s/deprecation=True/deprecation=False/g" \ "${TMPDIR}/tensorflow_core/__init__.py" > "${TMPDIR}/tensorflow_core/__init__.out" mv "${TMPDIR}/tensorflow_core/__init__.out" "${TMPDIR}/tensorflow_core/__init__.py" } diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 5868b763d77..6661f328416 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -52,6 +52,8 @@ _VERSION = '1.14.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.7.0', 'astor >= 0.6.0', + 'backports.weakref >= 1.0rc1;python_version<"3.4"', + 'enum34 >= 1.1.6;python_version<"3.4"', 'gast >= 0.2.0', 'google_pasta >= 0.1.6', 'keras_applications >= 1.0.8', @@ -86,22 +88,19 @@ else: REQUIRED_PACKAGES.append('wheel') # mock comes with unittest.mock for python3, need to install for python2 REQUIRED_PACKAGES.append('mock >= 2.0.0') + # functools comes with python3, need to install the backport for python2 + REQUIRED_PACKAGES.append('functools32 >= 3.2.3') # tf-nightly should depend on tb-nightly if 'tf_nightly' in project_name: for i, pkg in enumerate(REQUIRED_PACKAGES): if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.14.0a0, < 1.15.0a0' + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.15.0a0, < 1.16.0a0' elif 'tensorflow_estimator' in pkg and '2.0' in project_name: REQUIRED_PACKAGES[i] = 'tensorflow-estimator-2.0-preview' elif 'tensorflow_estimator' in pkg: REQUIRED_PACKAGES[i] = 'tf-estimator-nightly' -# weakref.finalize and enum were introduced in Python 3.4 -if sys.version_info < (3, 4): - REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1') - REQUIRED_PACKAGES.append('enum34 >= 1.1.6') - # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ 'toco_from_protos = tensorflow.lite.toco.python.toco_from_protos:main', diff --git a/tensorflow/virtual_root_template_v1.__init__.py b/tensorflow/virtual_root_template_v1.__init__.py index bb076759e60..785043a1a3f 100644 --- a/tensorflow/virtual_root_template_v1.__init__.py +++ b/tensorflow/virtual_root_template_v1.__init__.py @@ -98,28 +98,6 @@ for _m in _top_level_modules: # We still need all the names that are toplevel on tensorflow_core from tensorflow_core import * -# We also need to bring in keras if available in tensorflow_core -# Above import * doesn't import it as __all__ is updated before keras is hooked -try: - from tensorflow_core import keras -except ImportError as e: - pass - -# Similarly for estimator, but only if this file is not read via a -# import tensorflow_estimator (same reasoning as above when forwarding estimator -# separatedly from the rest of the top level modules) -if not _root_estimator: - try: - from tensorflow_core import estimator - except ImportError as e: - pass - -# And again for tensorboard (comes as summary) -try: - from tensorflow_core import summary -except ImportError as e: - pass - # In V1 API we need to print deprecation messages from tensorflow.python.util import deprecation_wrapper as _deprecation if not isinstance(_sys.modules[__name__], _deprecation.DeprecationWrapper): diff --git a/tensorflow/virtual_root_template_v2.__init__.py b/tensorflow/virtual_root_template_v2.__init__.py index bd212adf3d2..7d40733be7b 100644 --- a/tensorflow/virtual_root_template_v2.__init__.py +++ b/tensorflow/virtual_root_template_v2.__init__.py @@ -97,32 +97,4 @@ for _m in _top_level_modules: # We still need all the names that are toplevel on tensorflow_core from tensorflow_core import * -# We also need to bring in keras if available in tensorflow_core -# Above import * doesn't import it as __all__ is updated before keras is hooked -try: - from tensorflow_core import keras -except ImportError as e: - pass - -# Similarly for estimator, but only if this file is not read via a -# import tensorflow_estimator (same reasoning as above when forwarding estimator -# separatedly from the rest of the top level modules) -if not _root_estimator: - try: - from tensorflow_core import estimator - except ImportError as e: - pass - -# And again for tensorboard (comes as summary) -try: - from tensorflow_core import summary -except ImportError as e: - pass - -# Also import module aliases -try: - from tensorflow_core import losses, metrics, initializers, optimizers -except ImportError: - pass - # LINT.ThenChange(//tensorflow/virtual_root_template_v1.__init__.py.oss) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index f8df6d7a0b8..4fa98fc0ca9 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -141,6 +141,17 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ], ) + tf_http_archive( + name = "mkl_dnn_v1", + build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), + sha256 = "fcc2d951f7170eade0cfdd0d8d1d58e3e7785bd326bca6555f3722f8cba71811", + strip_prefix = "mkl-dnn-1.0-pc2", + urls = [ + "http://mirror.tensorflow.org/github.com/intel/mkl-dnn/archive/v1.0-pc2.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v1.0-pc2.tar.gz", + ], + ) + tf_http_archive( name = "com_google_absl", build_file = clean_dep("//third_party:com_google_absl.BUILD"), @@ -305,6 +316,17 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ], ) + tf_http_archive( + name = "functools32_archive", + build_file = clean_dep("//third_party:functools32.BUILD"), + sha256 = "f6253dfbe0538ad2e387bd8fdfd9293c925d63553f5813c4e587745416501e6d", + strip_prefix = "functools32-3.2.3-2", + urls = [ + "http://mirror.tensorflow.org/pypi.python.org/packages/c5/60/6ac26ad05857c601308d8fb9e87fa36d0ebf889423f47c3502ef034365db/functools32-3.2.3-2.tar.gz", + "https://pypi.python.org/packages/c5/60/6ac26ad05857c601308d8fb9e87fa36d0ebf889423f47c3502ef034365db/functools32-3.2.3-2.tar.gz", + ], + ) + tf_http_archive( name = "gast_archive", build_file = clean_dep("//third_party:gast.BUILD"), @@ -521,11 +543,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "llvm", build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"), - sha256 = "a6c9e2379fb92da8b94315b96bb2c7f569ff9fc2a05de4af1afb23956908f393", - strip_prefix = "llvm-27f427783fe8d884a98276ae7d9b5413c0e03533", + sha256 = "9257e111ae3d5b9d80925ef1329666440460abf4d052e701fa587f5236be6fcc", + strip_prefix = "llvm-df22a5e50a3d36a7b68eea106970dfa5df6d2453", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/27f427783fe8d884a98276ae7d9b5413c0e03533.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/27f427783fe8d884a98276ae7d9b5413c0e03533.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/df22a5e50a3d36a7b68eea106970dfa5df6d2453.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/df22a5e50a3d36a7b68eea106970dfa5df6d2453.tar.gz", ], ) diff --git a/third_party/functools32.BUILD b/third_party/functools32.BUILD new file mode 100644 index 00000000000..32dccf3b72d --- /dev/null +++ b/third_party/functools32.BUILD @@ -0,0 +1,18 @@ +# Description: +# functools32 provides a backport of the functools module for Python 2. + +licenses(["notice"]) # Python 2.0 + +exports_files(["LICENSE"]) + +py_library( + name = "functools32", + srcs = [ + "functools32/__init__.py", + "functools32/_dummy_thread32.py", + "functools32/functools32.py", + "functools32/reprlib32.py", + ], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], +) diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index 10c2d90c848..9f16fdd124e 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -107,6 +107,7 @@ def mkl_deps(): """ return select({ str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): ["@mkl_dnn"], + str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_v1_only")): ["@mkl_dnn_v1//:mkl_dnn"], str(Label("//third_party/mkl:build_with_mkl_ml_only")): ["//third_party/mkl:intel_binary_blob"], str(Label("//third_party/mkl:build_with_mkl")): [ "//third_party/mkl:intel_binary_blob", diff --git a/third_party/mkl_dnn/BUILD b/third_party/mkl_dnn/BUILD index 047d062cdda..5229dd5aa74 100644 --- a/third_party/mkl_dnn/BUILD +++ b/third_party/mkl_dnn/BUILD @@ -10,3 +10,12 @@ config_setting( }, visibility = ["//visibility:public"], ) + +config_setting( + name = "build_with_mkl_dnn_v1_only", + define_values = { + "build_with_mkl": "true", + "build_with_mkl_dnn_v1_only": "true", + }, + visibility = ["//visibility:public"], +) diff --git a/third_party/mkl_dnn/build_defs.bzl b/third_party/mkl_dnn/build_defs.bzl index 6388f31971c..384b528c273 100644 --- a/third_party/mkl_dnn/build_defs.bzl +++ b/third_party/mkl_dnn/build_defs.bzl @@ -1,13 +1,31 @@ def if_mkl_open_source_only(if_true, if_false = []): - """Shorthand for select()'ing on whether we're building with - MKL-DNN open source lib only, without depending on MKL binary form. + """Returns `if_true` if MKL-DNN v0.x is used. + + Shorthand for select()'ing on whether we're building with + MKL-DNN v0.x open source library only, without depending on MKL binary form. Returns a select statement which evaluates to if_true if we're building - with MKL-DNN open source lib only. Otherwise, - the select statement evaluates to if_false. + with MKL-DNN v0.x open source library only. Otherwise, the select statement + evaluates to if_false. """ return select({ str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): if_true, "//conditions:default": if_false, }) + +def if_mkl_v1_open_source_only(if_true, if_false = []): + """Returns `if_true` if MKL-DNN v1.x is used. + + Shorthand for select()'ing on whether we're building with + MKL-DNN v1.x open source library only, without depending on MKL binary form. + + Returns a select statement which evaluates to if_true if we're building + with MKL-DNN v1.x open source library only. Otherwise, the + select statement evaluates to if_false. + + """ + return select({ + str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_v1_only")): if_true, + "//conditions:default": if_false, + }) diff --git a/third_party/mkl_dnn/mkldnn.BUILD b/third_party/mkl_dnn/mkldnn.BUILD index 487e24adc11..120da20f560 100644 --- a/third_party/mkl_dnn/mkldnn.BUILD +++ b/third_party/mkl_dnn/mkldnn.BUILD @@ -3,6 +3,7 @@ exports_files(["LICENSE"]) load( "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", + "if_mkl_v1_open_source_only", ) load( "@org_tensorflow//third_party:common.bzl", @@ -17,6 +18,16 @@ config_setting( }, ) +template_rule( + name = "mkldnn_config_h", + src = "include/mkldnn_config.h.in", + out = "include/mkldnn_config.h", + substitutions = { + "#cmakedefine MKLDNN_CPU_BACKEND MKLDNN_BACKEND_${MKLDNN_CPU_BACKEND}": "#define MKLDNN_CPU_BACKEND MKLDNN_BACKEND_NATIVE", + "#cmakedefine MKLDNN_GPU_BACKEND MKLDNN_BACKEND_${MKLDNN_GPU_BACKEND}": "#define MKLDNN_GPU_BACKEND MKLDNN_BACKEND_NONE", + }, +) + # Create the file mkldnn_version.h with MKL-DNN version numbers. # Currently, the version numbers are hard coded here. If MKL-DNN is upgraded then # the version numbers have to be updated manually. The version numbers can be @@ -24,6 +35,8 @@ config_setting( # set to "version_major.version_minor.version_patch". The git hash version can # be set to NA. # TODO(agramesh1) Automatically get the version numbers from CMakeLists.txt. +# TODO(bhavanis): MKL-DNN minor version needs to be updated for MKL-DNN v1.x. +# The current version numbers will work only if MKL-DNN v0.18 is used. template_rule( name = "mkldnn_version_h", @@ -53,6 +66,10 @@ cc_library( "src/cpu/rnn/*.cpp", "src/cpu/rnn/*.hpp", "src/cpu/xbyak/*.h", + ]) + if_mkl_v1_open_source_only([ + ":mkldnn_config_h", + "src/cpu/jit_utils/jit_utils.cpp", + "src/cpu/jit_utils/jit_utils.hpp", ]) + [":mkldnn_version_h"], hdrs = glob(["include/*"]), copts = [ @@ -62,6 +79,9 @@ cc_library( ] + if_mkl_open_source_only([ "-UUSE_MKL", "-UUSE_CBLAS", + ]) + if_mkl_v1_open_source_only([ + "-UUSE_MKL", + "-UUSE_CBLAS", ]) + select({ "@org_tensorflow//tensorflow:linux_x86_64": [ "-fopenmp", # only works with gcc diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 0645b51eb7e..ca8a310d123 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -808,6 +808,7 @@ cc_library( ":AffineOps", ":Analysis", ":IR", + ":StandardDialectRegistration", ":StandardOps", ":Support", "@llvm//:support", @@ -823,6 +824,7 @@ cc_library( "lib/Transforms/DmaGeneration.cpp", "lib/Transforms/LoopFusion.cpp", "lib/Transforms/LoopInvariantCodeMotion.cpp", + "lib/Transforms/LoopParametricTiling.cpp", "lib/Transforms/LoopTiling.cpp", "lib/Transforms/LoopUnroll.cpp", "lib/Transforms/LoopUnrollAndJam.cpp", @@ -857,18 +859,19 @@ cc_library( ) cc_library( - name = "AffineToGPU", + name = "LoopsToGPU", srcs = [ - "lib/Conversion/AffineToGPU/AffineToGPU.cpp", + "lib/Conversion/LoopsToGPU/LoopsToGPU.cpp", ], hdrs = [ - "include/mlir/Conversion/AffineToGPU/AffineToGPU.h", + "include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h", ], includes = ["include"], deps = [ ":AffineOps", ":GPUDialect", ":IR", + ":Linalg", ":StandardOps", ":Support", ":TransformUtils", @@ -878,17 +881,18 @@ cc_library( ) cc_library( - name = "AffineToGPUPass", + name = "LoopsToGPUPass", srcs = [ - "lib/Conversion/AffineToGPU/AffineToGPUPass.cpp", + "lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp", ], hdrs = [ - "include/mlir/Conversion/AffineToGPU/AffineToGPUPass.h", + "include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h", ], includes = ["include"], deps = [ ":AffineOps", - ":AffineToGPU", + ":Linalg", + ":LoopsToGPU", ":Pass", "@llvm//:support", ], @@ -1186,13 +1190,13 @@ cc_binary( name = "mlir-opt", deps = [ ":AffineDialectRegistration", - ":AffineToGPUPass", ":Analysis", ":FxpMathOps", ":FxpMathOpsDialectRegistration", ":GPUDialectRegistration", ":IR", ":LinalgDialectRegistration", + ":LoopsToGPUPass", ":MlirOptLib", ":MlirOptMain", ":QuantOps", diff --git a/third_party/mlir/mlir_configure.bzl b/third_party/mlir/mlir_configure.bzl index 614cfd59158..2a6cd531962 100644 --- a/third_party/mlir/mlir_configure.bzl +++ b/third_party/mlir/mlir_configure.bzl @@ -1,7 +1,7 @@ """Repository rule to setup the external MLIR repository.""" -_MLIR_REV = "35500c0d6c8fee4802d9cdedcac6cafc8900fe01" -_MLIR_SHA256 = "a8102a4ac1d40f6c24fd68bbefd317fccbc371416d2ce39139338496ad5c478d" +_MLIR_REV = "5f2159dab14169f8878d76d42a9367866c1b8d8d" +_MLIR_SHA256 = "c1d429d53dda2e38fd24ac895b6395965c53d9b6e3a29e20fa86e73005a3a86e" def _mlir_autoconf_impl(repository_ctx): """Implementation of the mlir_configure repository rule."""