From cc08b5dff7346b2e2cd9fb80409c8f09bfebc089 Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Tue, 7 Apr 2020 11:20:02 +0200 Subject: [PATCH 01/66] Update network_tester. New features: Multiple inputs Multiple outputs Output in json format Can call invoke() more than once Updated README --- .../examples/network_tester/Makefile.inc | 4 + .../micro/examples/network_tester/README.md | 34 ++++++- .../network_tester/expected_output_data.h | 2 +- .../examples/network_tester/input_data.h | 4 +- .../examples/network_tester/network_model.h | 5 +- .../network_tester/network_tester_test.cc | 88 +++++++++++++------ 6 files changed, 104 insertions(+), 33 deletions(-) diff --git a/tensorflow/lite/micro/examples/network_tester/Makefile.inc b/tensorflow/lite/micro/examples/network_tester/Makefile.inc index 27f54a66763..a5c911238c8 100644 --- a/tensorflow/lite/micro/examples/network_tester/Makefile.inc +++ b/tensorflow/lite/micro/examples/network_tester/Makefile.inc @@ -33,6 +33,10 @@ ifeq ($(COMPARE_OUTPUT_DATA),no) CXXFLAGS += -DNO_COMPARE_OUTPUT_DATA endif +ifdef NUM_INFERENCES + CXXFLAGS += -DNUM_INFERENCES=$(NUM_INFERENCES) +endif + # Builds a standalone object recognition binary. $(eval $(call microlite_test,network_tester_test,\ $(NETWORK_TESTER_TEST_SRCS),$(NETWORK_TESTER_TEST_HDRS))) diff --git a/tensorflow/lite/micro/examples/network_tester/README.md b/tensorflow/lite/micro/examples/network_tester/README.md index 7c4c48e4eb1..0cb709dce0a 100644 --- a/tensorflow/lite/micro/examples/network_tester/README.md +++ b/tensorflow/lite/micro/examples/network_tester/README.md @@ -34,8 +34,40 @@ make -f tensorflow/lite/micro/tools/make/Makefile network_tester_test \ `ARENA_SIZE`: The size of the memory to be allocated (in bytes) by the interpreter. \ `NUM_BYTES_TO_PRINT`: The number of bytes of the output data to print. \ -Defaults to 0 if not specified. \ +If set to 0, all bytes of the output are printed. \ `COMPARE_OUTPUT_DATA`: If set to "no" the output data is not compared to the expected output data. This could be useful e.g. if the execution time needs to be minimized, or there is no expected output data. If omitted, the output data is compared to the expected output. +`NUM_INFERENCES`: Define how many inferences that are made. Defaults to 1. \ + +The output is printed in JSON format using printf: +``` +num_of_outputs: 1 +output_begin +[ +{ +"dims": [4,1,2,2,1], +"data_address": "0x000000", +"data":"0x06,0x08,0x0e,0x10" +}] +output_end +``` + +If there are multiple output tensors, the output will look like this: +``` +num_of_outputs: 2 +output_begin +[ +{ +"dims": [4,1,2,2,1], +"data_address": "0x000000", +"data":"0x06,0x08,0x0e,0x10" +}, +{ +"dims": [4,1,2,2,1], +"data_address": "0x111111", +"data":"0x06,0x08,0x0e,0x10" +}] +output_end +``` diff --git a/tensorflow/lite/micro/examples/network_tester/expected_output_data.h b/tensorflow/lite/micro/examples/network_tester/expected_output_data.h index 03e21954b7f..934722bad94 100644 --- a/tensorflow/lite/micro/examples/network_tester/expected_output_data.h +++ b/tensorflow/lite/micro/examples/network_tester/expected_output_data.h @@ -17,6 +17,6 @@ limitations under the License. #define TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ static unsigned int expected_output_data_len = 4; -static unsigned char expected_output_data[] = {6, 8, 14, 16}; +static unsigned char expected_output_data[1][4] = {6, 8, 14, 16}; #endif // TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ diff --git a/tensorflow/lite/micro/examples/network_tester/input_data.h b/tensorflow/lite/micro/examples/network_tester/input_data.h index b47277cca93..b3710313dd2 100644 --- a/tensorflow/lite/micro/examples/network_tester/input_data.h +++ b/tensorflow/lite/micro/examples/network_tester/input_data.h @@ -17,7 +17,7 @@ limitations under the License. #define TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_INPUT_DATA_H_ static const int input_data_len = 16; -static const unsigned char input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}; +static const unsigned char input_data[1][16] = {{1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}}; #endif // TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_INPUT_DATA_H_ diff --git a/tensorflow/lite/micro/examples/network_tester/network_model.h b/tensorflow/lite/micro/examples/network_tester/network_model.h index 4c275dbfbba..0431d7deee7 100644 --- a/tensorflow/lite/micro/examples/network_tester/network_model.h +++ b/tensorflow/lite/micro/examples/network_tester/network_model.h @@ -1,8 +1,11 @@ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -64,4 +67,4 @@ const unsigned char network_model[] = { 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11}; const unsigned int network_model_len = 576; -#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_NETWORK_MODEL_H_ +#endif diff --git a/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc b/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc index 0650222b970..5a307fb5c2a 100644 --- a/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc +++ b/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc @@ -1,8 +1,11 @@ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -10,44 +13,54 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/micro/examples/network_tester/expected_output_data.h" -#include "tensorflow/lite/micro/examples/network_tester/input_data.h" -#include "tensorflow/lite/micro/examples/network_tester/network_model.h" #include "tensorflow/lite/micro/kernels/all_ops_resolver.h" #include "tensorflow/lite/micro/micro_error_reporter.h" #include "tensorflow/lite/micro/micro_interpreter.h" -#include "tensorflow/lite/micro/testing/micro_test.h" -#include "tensorflow/lite/micro/testing/test_utils.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/version.h" +#include "tensorflow/lite/micro/examples/network_tester/expected_output_data.h" +#include "tensorflow/lite/micro/examples/network_tester/input_data.h" +#include "tensorflow/lite/micro/examples/network_tester/network_model.h" + +#include "tensorflow/lite/micro/testing/micro_test.h" +#include "tensorflow/lite/micro/testing/test_utils.h" + #ifndef TENSOR_ARENA_SIZE #define TENSOR_ARENA_SIZE (1024) #endif +#ifndef NUM_INFERENCES +#define NUM_INFERENCES 1 +#endif + uint8_t tensor_arena[TENSOR_ARENA_SIZE]; #ifdef NUM_BYTES_TO_PRINT inline void print_output_data(TfLiteTensor* output) { int num_bytes_to_print = - (output->bytes < NUM_BYTES_TO_PRINT) ? output->bytes : NUM_BYTES_TO_PRINT; + ((output->bytes < NUM_BYTES_TO_PRINT) || NUM_BYTES_TO_PRINT == 0) + ? output->bytes + : NUM_BYTES_TO_PRINT; int dims_size = output->dims->size; - printf("dims: {%d,", dims_size); + printf("{\n"); + printf("\"dims\": [%d,", dims_size); for (int i = 0; i < output->dims->size - 1; ++i) { printf("%d,", output->dims->data[i]); } - printf("%d}\n", output->dims->data[dims_size - 1]); + printf("%d],\n", output->dims->data[dims_size - 1]); - printf("data_address: %p\n", output->data.raw); - printf("data:\n{"); + printf("\"data_address\": \"%p\",\n", output->data.raw); + printf("\"data\":\""); for (int i = 0; i < num_bytes_to_print - 1; ++i) { - if (i % 16 == 0) { + if (i % 16 == 0 && i != 0) { printf("\n"); } printf("0x%02x,", output->data.uint8[i]); } - printf("0x%02x\n}\n", output->data.uint8[num_bytes_to_print - 1]); + printf("0x%02x\"\n", output->data.uint8[num_bytes_to_print - 1]); + printf("}"); } #endif @@ -63,7 +76,7 @@ TF_LITE_MICRO_TEST(TestInvoke) { "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); - return 1; + return kTfLiteError; } tflite::ops::micro::AllOpsResolver resolver; @@ -74,29 +87,48 @@ TF_LITE_MICRO_TEST(TestInvoke) { TfLiteStatus allocate_status = interpreter.AllocateTensors(); if (allocate_status != kTfLiteOk) { TF_LITE_REPORT_ERROR(error_reporter, "Tensor allocation failed\n"); + return kTfLiteError; } - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocate_status); - TfLiteTensor* input = interpreter.input(0); - memcpy(input->data.uint8, input_data, input->bytes); - - TfLiteStatus invoke_status = interpreter.Invoke(); - if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); - } - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); - - TfLiteTensor* output = interpreter.output(0); + for (int n = 0; n < NUM_INFERENCES; n++) { + for (int i = 0; i < interpreter.inputs_size(); ++i) { + TfLiteTensor* input = interpreter.input(i); + memcpy(input->data.uint8, input_data[i], input->bytes); + } + TfLiteStatus invoke_status = interpreter.Invoke(); + if (invoke_status != kTfLiteOk) { + TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + return kTfLiteError; + } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); #ifdef NUM_BYTES_TO_PRINT - print_output_data(output); + // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes, + // whichever comes first as well as the output shape. + printf("num_of_outputs: %d\n", interpreter.outputs_size()); + printf("output_begin\n"); + printf("[\n"); + for (int i = 0; i < interpreter.outputs_size(); i++) { + TfLiteTensor* output = interpreter.output(i); + print_output_data(output); + if (i != interpreter.outputs_size() - 1) { + printf(",\n"); + } + } + printf("]\n"); + printf("output_end\n"); #endif #ifndef NO_COMPARE_OUTPUT_DATA - for (int i = 0; i < output->bytes; ++i) { - TF_LITE_MICRO_EXPECT_EQ(output->data.uint8[i], expected_output_data[i]); - } + for (int i = 0; i < interpreter.outputs_size(); i++) { + TfLiteTensor* output = interpreter.output(i); + for (int j = 0; j < output->bytes; ++j) { + TF_LITE_MICRO_EXPECT_EQ(output->data.uint8[j], + expected_output_data[i][j]); + } + } #endif + } TF_LITE_REPORT_ERROR(error_reporter, "Ran successfully\n"); } From 02c6b9edaf14fb7ca35c2b4f0bc80b74cc8a551a Mon Sep 17 00:00:00 2001 From: jerryyin Date: Wed, 10 Jun 2020 21:11:38 +0000 Subject: [PATCH 02/66] [ROCm][mlir] Disable mlir saved model test --- .../mlir/tensorflow/tests/tf_saved_model/build_defs.bzl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl index 594afa10453..95ad05aa1e6 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl @@ -4,8 +4,6 @@ load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "lit_test") def tf_saved_model_test(name, data, tags = None): """Create a SavedModel test.""" - if tags == None: - tags = ["no_rocm"] native.py_binary( name = name, testonly = 1, @@ -26,5 +24,5 @@ def tf_saved_model_test(name, data, tags = None): name = name + ".py", data = [name] + data, driver = "@llvm-project//mlir:run_lit.sh", - tags = tags, + tags = tags + ["no_rocm"], ) From 6bb481c58b27b3fc99c0d8fc71b9d58f13e8b0ba Mon Sep 17 00:00:00 2001 From: Amedeo Cavallo Date: Mon, 15 Jun 2020 11:56:50 +0200 Subject: [PATCH 03/66] C linkage for stm32l4HAL target C linkage for output retargeting on stm32l4HAL target --- tensorflow/lite/micro/stm32f4HAL/debug_log.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/stm32f4HAL/debug_log.cc b/tensorflow/lite/micro/stm32f4HAL/debug_log.cc index 4be3b40e782..90dd7cfd787 100644 --- a/tensorflow/lite/micro/stm32f4HAL/debug_log.cc +++ b/tensorflow/lite/micro/stm32f4HAL/debug_log.cc @@ -22,6 +22,10 @@ limitations under the License. extern UART_HandleTypeDef DEBUG_UART_HANDLE; +#ifdef __cplusplus + extern "C" { +#endif + #ifdef __GNUC__ int __io_putchar(int ch) { HAL_UART_Transmit(&DEBUG_UART_HANDLE, (uint8_t *)&ch, 1, HAL_MAX_DELAY); @@ -36,4 +40,8 @@ int fputc(int ch, FILE *f) { } #endif /* __GNUC__ */ -extern "C" void DebugLog(const char *s) { fprintf(stderr, "%s", s); } +void DebugLog(const char *s) { fprintf(stderr, "%s", s); } + +#ifdef __cplusplus +} +#endif From 5b5ab0034ae0f12731943177b275e35cb9f92bb1 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 17 Jun 2020 23:26:29 +0200 Subject: [PATCH 04/66] Fix distributed autocast variable assign --- .../experimental/autocast_variable.py | 104 +++++++++--------- .../experimental/autocast_variable_test.py | 30 +++-- 2 files changed, 77 insertions(+), 57 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py index 7d0abe30581..ca6420f0c0b 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py @@ -188,61 +188,88 @@ class AutoCastVariable(variables.Variable, core.Tensor): def constraint(self): return self._variable.constraint + def _apply_assign_update( + self, update_fn, value, use_locking=None, name=None, read_value=True): + if not read_value: + return update_fn(value, use_locking, name, read_value) + + if context.executing_eagerly() or ops.inside_function(): + assign_op = update_fn(value, use_locking, name, False) + with ops.control_dependencies([assign_op]): + return self + + # Fallback to wrapping the returned variable in graph mode if possible + assign_var = update_fn(value, use_locking, name, read_value) + if resource_variable_ops.is_resource_variable(assign_var): + return create_autocast_variable(assign_var) + return assign_var + + def _apply_update(self, update_fn, *args, **kwargs): + update_var = update_fn(*args, **kwargs) + if context.executing_eagerly() or ops.inside_function(): + with ops.control_dependencies([update_var]): + return self + + # Fallback to wrapping the returned variable in graph mode if possible + if resource_variable_ops.is_resource_variable(update_var): + return create_autocast_variable(update_var) + return update_var + def assign(self, value, use_locking=None, name=None, read_value=True): - assign_op = self._variable.assign(value, use_locking, name, read_value) - return _maybe_wrap(assign_op, wrap=read_value) + return self._apply_assign_update( + self._variable.assign, value, use_locking, name, read_value) def assign_add(self, delta, use_locking=None, name=None, read_value=True): - assign_op = self._variable.assign_add(delta, use_locking, name, read_value) - return _maybe_wrap(assign_op, wrap=read_value) + return self._apply_assign_update( + self._variable.assign_add, delta, use_locking, name, read_value) def assign_sub(self, delta, use_locking=None, name=None, read_value=True): - assign_op = self._variable.assign_sub(delta, use_locking, name, read_value) - return _maybe_wrap(assign_op, wrap=read_value) + return self._apply_assign_update( + self._variable.assign_sub, delta, use_locking, name, read_value) def scatter_sub(self, sparse_delta, use_locking=False, name=None): - var = self._variable.scatter_sub(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_sub, sparse_delta, use_locking, name) def scatter_add(self, sparse_delta, use_locking=False, name=None): - var = self._variable.scatter_add(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_add, sparse_delta, use_locking, name) def scatter_max(self, sparse_delta, use_locking=False, name=None): - var = self._variable.scatter_max(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_max, sparse_delta, use_locking, name) def scatter_min(self, sparse_delta, use_locking=False, name=None): - var = self._variable.scatter_min(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_min, sparse_delta, use_locking, name) def scatter_mul(self, sparse_delta, use_locking=False, name=None): - var = self._variable.scatter_mul(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_mul, sparse_delta, use_locking, name) def scatter_div(self, sparse_delta, use_locking=False, name=None): - var = self._variable.scatter_div(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_div, sparse_delta, use_locking, name) def scatter_update(self, sparse_delta, use_locking=False, name=None): - var = self._variable.scatter_update(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_update, sparse_delta, use_locking, name) def batch_scatter_update(self, sparse_delta, use_locking=False, name=None): - var = self._variable.batch_scatter_update(sparse_delta, use_locking, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.batch_scatter_update, sparse_delta, use_locking, name) def scatter_nd_sub(self, indices, updates, name=None): - var = self._variable.scatter_nd_sub(indices, updates, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_nd_sub, indices, updates, name) def scatter_nd_add(self, indices, updates, name=None): - var = self._variable.scatter_nd_add(indices, updates, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_nd_add, indices, updates, name) def scatter_nd_update(self, indices, updates, name=None): - var = self._variable.scatter_nd_update(indices, updates, name) - return _maybe_wrap(var) + return self._apply_update( + self._variable.scatter_nd_update, indices, updates, name) def load(self, value, session=None): return self._variable.load(value, session) @@ -462,24 +489,3 @@ def create_autocast_variable(variable): # pylint: enable=missing-format-attribute return AutoCastDistributedVariable(variable) - - -def _maybe_wrap(variable, wrap=True): - """Creates an AutoCastVariable that wraps another variable if applicable. - - This function is used to wrap the return value of AutoCastVariable.assign. - Unfortunately MirroredVariable.assign will (incorrectly) return a Mirrored - value instead of a MirroredVariable. So we cannot properly wrap it in an - AutoCastVariable. We return the original variable in that case. - - Args: - variable: A tf.Variable or op. - wrap: A boolean to define whether to wrap the variable in an - AutoCastVariable or not. - - Returns: - An AutoCastVariable if wrap is True and variable is a resource variable. - """ - if wrap and resource_variable_ops.is_resource_variable(variable): - return create_autocast_variable(variable) - return variable diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py index c45015b644e..940bd07c813 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py @@ -304,8 +304,8 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertAllClose(3., self.evaluate(x.assign_sub(3.))) # Assign multiple times - # This currently only works if no strategy is used - if not ds_context.has_strategy(): + # This currently doesn't work in graph mode + if context.executing_eagerly() or ops.inside_function(): assign = x.assign(1.) self.assertAllClose(1., self.evaluate(assign)) self.assertAllClose(0., self.evaluate(assign.assign(0.))) @@ -343,6 +343,20 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): # assign still expect float32 value even if in float16 scope run_and_check() + @combinations.generate(maybe_distribute) + def test_assign_tf_function(self, distribution): + with distribution.scope(): + x = get_var(0., dtypes.float32) + x = autocast_variable.create_autocast_variable(x) + + @def_function.function + def run_assign(): + return x.assign(1.).assign_add(3.).assign_add(3.).assign_sub(2.) + + with ops.get_default_graph()._enable_auto_casting_variables( + dtypes.float16): + self.assertAllClose(5., self.evaluate(run_assign())) + @combinations.generate(maybe_distribute) def test_assign_stays_in_true_dtype(self, distribution): with distribution.scope(): @@ -357,18 +371,18 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): dtypes.float16): # Variable should be increased, despite it appearing to be the same # float16 value. - self.assertEqual(1. + small_val, - self.evaluate(x.assign(1. + small_tensor))) + self.evaluate(x.assign(1. + small_tensor)) + self.assertEqual(1. + small_val, self.evaluate(x._variable)) self.assertEqual(1., self.evaluate(x.value())) - self.assertEqual(1. + small_val, self.evaluate(x.value())) + self.assertEqual(1. + small_val, self.evaluate(x)) self.evaluate(x.assign(1.)) with ops.get_default_graph()._enable_auto_casting_variables( dtypes.float16): - self.assertEqual(1. + small_val, - self.evaluate(x.assign_add(small_tensor))) + self.evaluate(x.assign_add(small_tensor)) + self.assertEqual(1. + small_val, self.evaluate(x._variable)) self.assertEqual(1., self.evaluate(x.value())) - self.assertEqual(1. + small_val, self.evaluate(x.value())) + self.assertEqual(1. + small_val, self.evaluate(x)) @combinations.generate(maybe_distribute) def test_checkpoint(self, distribution): From c31f2ca4a29d4469f29c57735d74cbd2748c0e03 Mon Sep 17 00:00:00 2001 From: Sam Holt Date: Tue, 16 Jun 2020 16:50:16 +0100 Subject: [PATCH 05/66] fix: convolutional padding argument valid and same explaination --- .../python/keras/layers/convolutional.py | 55 +++++++++++++++---- .../keras/layers/convolutional_recurrent.py | 6 ++ tensorflow/python/keras/layers/local.py | 2 + tensorflow/python/keras/layers/pooling.py | 22 ++++++-- .../keras/legacy_tf_layers/convolutional.py | 42 ++++++++++++++ tensorflow/python/keras/utils/conv_utils.py | 12 ++++ tensorflow/python/ops/nn_ops.py | 3 + 7 files changed, 128 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index 51f4e3b320a..471d94570a5 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -72,6 +72,10 @@ class Conv(Layer): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"`, `"same"`, or `"causal"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. `"causal"` results in causal + (dilated) convolutions, e.g. `output[t]` does not depend on `input[t+1:]`. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -418,6 +422,9 @@ class Conv1D(Conv): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"`, `"causal"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. `"causal"` results in causal (dilated) convolutions, e.g. `output[t]` does not depend on `input[t+1:]`. Useful when modeling temporal data where the model should not violate the temporal order. @@ -571,6 +578,9 @@ class Conv2D(Conv): specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch_size, height, width, channels)` while @@ -712,6 +722,9 @@ class Conv3D(Conv): specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `batch_shape + (spatial_dim1, spatial_dim2, @@ -833,6 +846,9 @@ class Conv1DTranspose(Conv1D): time dimension. Specifying a stride value != 1 is incompatible with specifying a `dilation_rate` value != 1. Defaults to 1. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. output_padding: An integer specifying the amount of padding along the time dimension of the output tensor. The amount of output padding must be lower than the stride. @@ -1083,6 +1099,9 @@ class Conv2DTranspose(Conv2D): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. output_padding: An integer or tuple/list of 2 integers, specifying the amount of padding along the height and width of the output tensor. @@ -1371,19 +1390,22 @@ class Conv3DTranspose(Conv3D): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. + depth, height and width of the 3D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, height - and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. + specifying the strides of the convolution along the depth, height + and width. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. output_padding: An integer or tuple/list of 3 integers, specifying the amount of padding along the depth, height, and width. @@ -1681,6 +1703,9 @@ class SeparableConv(Conv): Specifying any `stride` value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -1885,6 +1910,10 @@ class SeparableConv1D(SeparableConv): Specifying any `stride` value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"`, `"same"`, or `"causal"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. `"causal"` results in causal + (dilated) convolutions, e.g. `output[t]` does not depend on `input[t+1:]`. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -2070,6 +2099,9 @@ class SeparableConv2D(SeparableConv): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -2230,6 +2262,9 @@ class DepthwiseConv2D(Conv2D): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: one of `'valid'` or `'same'` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output diff --git a/tensorflow/python/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/layers/convolutional_recurrent.py index 19831429b73..54196f8725c 100644 --- a/tensorflow/python/keras/layers/convolutional_recurrent.py +++ b/tensorflow/python/keras/layers/convolutional_recurrent.py @@ -434,6 +434,9 @@ class ConvLSTM2DCell(DropoutRNNCellMixin, Layer): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. It defaults to the `image_data_format` value found in your @@ -710,6 +713,9 @@ class ConvLSTM2D(ConvRNN2D): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. diff --git a/tensorflow/python/keras/layers/local.py b/tensorflow/python/keras/layers/local.py index 3e9c0f9c0a3..c33c88f3a3d 100644 --- a/tensorflow/python/keras/layers/local.py +++ b/tensorflow/python/keras/layers/local.py @@ -67,6 +67,7 @@ class LocallyConnected1D(Layer): any `dilation_rate` value != 1. padding: Currently only supports `"valid"` (case-insensitive). `"same"` may be supported in the future. + `"valid"` means no padding. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -375,6 +376,7 @@ class LocallyConnected2D(Layer): all spatial dimensions. padding: Currently only support `"valid"` (case-insensitive). `"same"` will be supported in future. + `"valid"` means no padding. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. diff --git a/tensorflow/python/keras/layers/pooling.py b/tensorflow/python/keras/layers/pooling.py index ff7d157acad..51dc5131a8a 100644 --- a/tensorflow/python/keras/layers/pooling.py +++ b/tensorflow/python/keras/layers/pooling.py @@ -164,8 +164,9 @@ class MaxPooling1D(Pooling1D): for each pooling step. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). - "valid" adds no padding. "same" adds padding such that if the stride - is 1, the output shape is the same as the input shape. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -209,6 +210,9 @@ class AveragePooling1D(Pooling1D): E.g. 2 will halve the input. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -419,8 +423,9 @@ class MaxPooling2D(Pooling2D): Strides values. Specifies how far the pooling window moves for each pooling step. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). - "valid" adds no zero padding. "same" adds padding such that if the stride - is 1, the output shape is the same as input shape. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -475,6 +480,9 @@ class AveragePooling2D(Pooling2D): Strides values. If None, it will default to `pool_size`. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -617,6 +625,9 @@ class MaxPooling3D(Pooling3D): `(2, 2, 2)` will halve the size of the 3D input in each dimension. strides: tuple of 3 integers, or None. Strides values. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. @@ -667,6 +678,9 @@ class AveragePooling3D(Pooling3D): `(2, 2, 2)` will halve the size of the 3D input in each dimension. strides: tuple of 3 integers, or None. Strides values. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. diff --git a/tensorflow/python/keras/legacy_tf_layers/convolutional.py b/tensorflow/python/keras/legacy_tf_layers/convolutional.py index 4c91251a0e7..4fd53531fd1 100644 --- a/tensorflow/python/keras/legacy_tf_layers/convolutional.py +++ b/tensorflow/python/keras/legacy_tf_layers/convolutional.py @@ -46,6 +46,9 @@ class Conv1D(keras_layers.Conv1D, base.Layer): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -157,6 +160,9 @@ def conv1d(inputs, Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -242,6 +248,9 @@ class Conv2D(keras_layers.Conv2D, base.Layer): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -360,6 +369,9 @@ def conv2d(inputs, Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -449,6 +461,9 @@ class Conv3D(keras_layers.Conv3D, base.Layer): Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -568,6 +583,9 @@ def conv3d(inputs, Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -652,6 +670,9 @@ class SeparableConv1D(keras_layers.SeparableConv1D, base.Layer): Specifying any `stride` value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -761,6 +782,9 @@ class SeparableConv2D(keras_layers.SeparableConv2D, base.Layer): Specifying any `stride` value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -897,6 +921,9 @@ def separable_conv1d(inputs, Specifying any `stride` value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -1019,6 +1046,9 @@ def separable_conv2d(inputs, Specifying any `stride` value != 1 is incompatible with specifying any `dilation_rate` value != 1. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -1117,6 +1147,9 @@ class Conv2DTranspose(keras_layers.Conv2DTranspose, base.Layer): of the convolution. Can be a single integer to specify the same value for all spatial dimensions. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -1223,6 +1256,9 @@ def conv2d_transpose(inputs, of the convolution. Can be a single integer to specify the same value for all spatial dimensions. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -1295,6 +1331,9 @@ class Conv3DTranspose(keras_layers.Conv3DTranspose, base.Layer): Can be a single integer to specify the same value for all spatial dimensions. padding: One of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape @@ -1396,6 +1435,9 @@ def conv3d_transpose(inputs, of the convolution. Can be a single integer to specify the same value for all spatial dimensions. padding: one of `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape diff --git a/tensorflow/python/keras/utils/conv_utils.py b/tensorflow/python/keras/utils/conv_utils.py index f38fdc18252..e8ee866d958 100644 --- a/tensorflow/python/keras/utils/conv_utils.py +++ b/tensorflow/python/keras/utils/conv_utils.py @@ -264,6 +264,9 @@ def conv_kernel_mask(input_shape, kernel_shape, strides, padding): receptive field. strides: tuple of size N, strides along each spatial dimension. padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. Returns: A boolean 2N-D `np.ndarray` of shape @@ -338,6 +341,9 @@ def conv_kernel_idxs(input_shape, kernel_shape, strides, padding, filters_in, receptive field. strides: tuple of size N, strides along each spatial dimension. padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. filters_in: `int`, number if filters in the input to the layer. filters_out: `int', number if filters in the output of the layer. data_format: string, "channels_first" or "channels_last". @@ -430,6 +436,9 @@ def conv_connected_inputs(input_shape, kernel_shape, output_position, strides, in the output of the convolution. strides: tuple of size N, strides along each spatial dimension. padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. Returns: N ranges `[[p_in_left1, ..., p_in_right1], ..., @@ -468,6 +477,9 @@ def conv_output_shape(input_shape, kernel_shape, strides, padding): receptive field. strides: tuple of size N, strides along each spatial dimension. padding: type of padding, string `"same"` or `"valid"`. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. Returns: tuple of size N: `(d_out1, ..., d_outN)`, spatial shape of the output. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 1318f575737..5a9a63637f6 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -940,6 +940,9 @@ def convolution( filter: An (N+2)-D `Tensor` with the same type as `input` and shape `spatial_filter_shape + [in_channels, out_channels]`. padding: A string, either `"VALID"` or `"SAME"`. The padding algorithm. + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. strides: Optional. Sequence of N ints >= 1. Specifies the output stride. Defaults to [1]*N. If any value of strides is > 1, then all values of dilation_rate must be 1. From c21328bd992fe359b585452df92fc82976d27557 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Thu, 18 Jun 2020 12:06:40 +0200 Subject: [PATCH 06/66] Don't skip distributed test case in eager mode --- .../mixed_precision/experimental/autocast_variable_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py index 940bd07c813..2fa7c103258 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py @@ -304,8 +304,8 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertAllClose(3., self.evaluate(x.assign_sub(3.))) # Assign multiple times - # This currently doesn't work in graph mode - if context.executing_eagerly() or ops.inside_function(): + # This currently doesn't work in graph mode if a strategy is used + if not ds_context.has_strategy() or context.executing_eagerly(): assign = x.assign(1.) self.assertAllClose(1., self.evaluate(assign)) self.assertAllClose(0., self.evaluate(assign.assign(0.))) From 8ad5bc80e71921c3c2530d93d3856ba59e524c60 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Fri, 19 Jun 2020 11:50:24 +0200 Subject: [PATCH 07/66] Remove unnecessary assert --- .../mixed_precision/experimental/autocast_variable_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py index 2fa7c103258..9036109af96 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py @@ -372,7 +372,6 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): # Variable should be increased, despite it appearing to be the same # float16 value. self.evaluate(x.assign(1. + small_tensor)) - self.assertEqual(1. + small_val, self.evaluate(x._variable)) self.assertEqual(1., self.evaluate(x.value())) self.assertEqual(1. + small_val, self.evaluate(x)) @@ -380,7 +379,6 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): with ops.get_default_graph()._enable_auto_casting_variables( dtypes.float16): self.evaluate(x.assign_add(small_tensor)) - self.assertEqual(1. + small_val, self.evaluate(x._variable)) self.assertEqual(1., self.evaluate(x.value())) self.assertEqual(1. + small_val, self.evaluate(x)) From dc8d42922b9ff89e717f130515c968186ec4504c Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Fri, 19 Jun 2020 12:33:01 +0200 Subject: [PATCH 08/66] Remove unnecessary control_dependencies --- .../mixed_precision/experimental/autocast_variable.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py index ca6420f0c0b..b60100c7b48 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py @@ -190,25 +190,20 @@ class AutoCastVariable(variables.Variable, core.Tensor): def _apply_assign_update( self, update_fn, value, use_locking=None, name=None, read_value=True): - if not read_value: - return update_fn(value, use_locking, name, read_value) - if context.executing_eagerly() or ops.inside_function(): assign_op = update_fn(value, use_locking, name, False) - with ops.control_dependencies([assign_op]): - return self + return self if read_value else assign_op # Fallback to wrapping the returned variable in graph mode if possible assign_var = update_fn(value, use_locking, name, read_value) - if resource_variable_ops.is_resource_variable(assign_var): + if read_value and resource_variable_ops.is_resource_variable(assign_var): return create_autocast_variable(assign_var) return assign_var def _apply_update(self, update_fn, *args, **kwargs): update_var = update_fn(*args, **kwargs) if context.executing_eagerly() or ops.inside_function(): - with ops.control_dependencies([update_var]): - return self + return self # Fallback to wrapping the returned variable in graph mode if possible if resource_variable_ops.is_resource_variable(update_var): From 8e305b9cec35ccf9821c2fd2a82194e328d98704 Mon Sep 17 00:00:00 2001 From: Denisa Roberts Date: Wed, 6 May 2020 11:29:37 -0400 Subject: [PATCH 09/66] Add Qr Grad for wide matrices --- tensorflow/python/kernel_tests/qr_op_test.py | 5 +-- tensorflow/python/ops/linalg_grad.py | 47 +++++++++++++++----- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/kernel_tests/qr_op_test.py b/tensorflow/python/kernel_tests/qr_op_test.py index d5337c183a6..0c291dbd940 100644 --- a/tensorflow/python/kernel_tests/qr_op_test.py +++ b/tensorflow/python/kernel_tests/qr_op_test.py @@ -278,14 +278,13 @@ if __name__ == "__main__": use_static_shape)) # TODO(pfau): Get working with complex types. - # TODO(pfau): Get working with full_matrices when rows != cols - # TODO(pfau): Get working when rows < cols + # TODO(pfau): Get working with full_matrices when rows > cols # TODO(pfau): Get working with shapeholders (dynamic shapes) for full_matrices in False, True: for dtype in np.float32, np.float64: for rows in 1, 2, 5, 10: for cols in 1, 2, 5, 10: - if rows == cols or (not full_matrices and rows > cols): + if rows <= cols or (not full_matrices and rows > cols): for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10): shape = batch_dims + (rows, cols) name = "%s_%s_full_%s" % (dtype.__name__, diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 437e28e7e6b..5ec372430ba 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -493,15 +493,10 @@ def _QrGrad(op, dq, dr): if (r.shape.ndims is None or r.shape.as_list()[-2] is None or r.shape.as_list()[-1] is None): raise NotImplementedError("QrGrad not implemented with dynamic shapes.") - if r.shape.dims[-2].value != r.shape.dims[-1].value: + if (r.shape.dims[-2].value > r.shape.dims[-1].value and + q.shape.dims[-2].value == q.shape.dims[-1].value): raise NotImplementedError("QrGrad not implemented when ncols > nrows " - "or full_matrices is true and ncols != nrows.") - - qdq = math_ops.matmul(q, dq, adjoint_a=True) - qdq_ = qdq - _linalg.adjoint(qdq) - rdr = math_ops.matmul(r, dr, adjoint_b=True) - rdr_ = rdr - _linalg.adjoint(rdr) - tril = array_ops.matrix_band_part(qdq_ + rdr_, -1, 0) + "and full_matrices is true.") def _TriangularSolve(x, r): """Equiv to matmul(x, adjoint(matrix_inverse(r))) if r is upper-tri.""" @@ -509,9 +504,39 @@ def _QrGrad(op, dq, dr): linalg_ops.matrix_triangular_solve( r, _linalg.adjoint(x), lower=False, adjoint=False)) - grad_a = math_ops.matmul(q, dr + _TriangularSolve(tril, r)) - grad_b = _TriangularSolve(dq - math_ops.matmul(q, qdq), r) - return grad_a + grad_b + def _QrGradSquareAndDeepMatrices(q, r, dq, dr): + """Gradient for matrix orders num_rows >= num_cols + and full_matrices is false. + """ + qdq = math_ops.matmul(q, dq, adjoint_a=True) + qdq_ = qdq - _linalg.adjoint(qdq) + rdr = math_ops.matmul(r, dr, adjoint_b=True) + rdr_ = rdr - _linalg.adjoint(rdr) + tril = array_ops.matrix_band_part(qdq_ + rdr_, -1, 0) + + grad_a = math_ops.matmul(q, dr + _TriangularSolve(tril, r)) + grad_b = _TriangularSolve(dq - math_ops.matmul(q, qdq), r) + return grad_a + grad_b + + num_rows, num_cols = q.shape.dims[-2].value, r.shape.dims[-1] + + if num_rows >= num_cols: + return _QrGradSquareAndDeepMatrices(q, r, dq, dr) + + # Partition a = [x, y], r = [u, v] and reduce to the square case + a = op.inputs[0] + y = a[..., :, num_rows:] + u = r[..., :, :num_rows] + dv = dr[..., :, num_rows:] + du = dr[..., :, :num_rows] + dy = math_ops.matmul(q, dv) + dx = _QrGradSquareAndDeepMatrices(q, + u, + dq + math_ops.matmul(y, + dv, + adjoint_b=True), + du) + return array_ops.concat([dx, dy], axis=-1) @ops.RegisterGradient("MatrixSolve") From 7cd6c3115badaf79fff2b8809cb4a0b49a5f9c7c Mon Sep 17 00:00:00 2001 From: Denisa Roberts Date: Fri, 19 Jun 2020 14:26:51 -0400 Subject: [PATCH 10/66] Allow gradient access to QR input --- tensorflow/python/eager/pywrap_gradient_exclusions.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/eager/pywrap_gradient_exclusions.cc b/tensorflow/python/eager/pywrap_gradient_exclusions.cc index 7da45e36118..7e9f0b16334 100644 --- a/tensorflow/python/eager/pywrap_gradient_exclusions.cc +++ b/tensorflow/python/eager/pywrap_gradient_exclusions.cc @@ -50,7 +50,7 @@ auto OpGradientInfoInit(const T &a) { absl::optional> OpGradientUnusedInputIndices( const tensorflow::string &op_name) { - static std::array a = {{ + static std::array a = {{ {"Acosh"}, {"AllToAll", 1, {0}}, {"ApproximateEqual"}, @@ -222,7 +222,6 @@ absl::optional> OpGradientUnusedInputIndices( {"PlaceholderWithDefault"}, {"PopulationCount"}, {"PreventGradient"}, - {"Qr"}, {"QuantizeAndDequantize"}, {"QuantizeAndDequantizeV2"}, {"QuantizeAndDequantizeV3"}, From f5a0fdaa0aeff548623811b887c6da34303ab25f Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 20 Jun 2020 01:41:44 +0200 Subject: [PATCH 11/66] Use executing_eagerly_outside_functions --- .../keras/mixed_precision/experimental/autocast_variable.py | 4 ++-- .../mixed_precision/experimental/autocast_variable_test.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py index b60100c7b48..a717fbb41e2 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py @@ -190,7 +190,7 @@ class AutoCastVariable(variables.Variable, core.Tensor): def _apply_assign_update( self, update_fn, value, use_locking=None, name=None, read_value=True): - if context.executing_eagerly() or ops.inside_function(): + if ops.executing_eagerly_outside_functions(): assign_op = update_fn(value, use_locking, name, False) return self if read_value else assign_op @@ -202,7 +202,7 @@ class AutoCastVariable(variables.Variable, core.Tensor): def _apply_update(self, update_fn, *args, **kwargs): update_var = update_fn(*args, **kwargs) - if context.executing_eagerly() or ops.inside_function(): + if ops.executing_eagerly_outside_functions(): return self # Fallback to wrapping the returned variable in graph mode if possible diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py index 9036109af96..cb5a5d7cb3f 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py @@ -345,6 +345,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): @combinations.generate(maybe_distribute) def test_assign_tf_function(self, distribution): + if not context.executing_eagerly(): + self.skipTest("Test is not compatible with graph mode") + with distribution.scope(): x = get_var(0., dtypes.float32) x = autocast_variable.create_autocast_variable(x) From 5b46965a7dbcb7d775d9bca1b6bc4ee4f4652101 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 20 Jun 2020 14:34:22 +0200 Subject: [PATCH 12/66] Use ObjectIdentitySet for FuncGraph.control_captures --- tensorflow/python/framework/func_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py index b0f8821b17f..94d5913cbd1 100644 --- a/tensorflow/python/framework/func_graph.py +++ b/tensorflow/python/framework/func_graph.py @@ -187,7 +187,7 @@ class FuncGraph(ops.Graph): self.inputs = [] self.outputs = [] self.control_outputs = [] - self.control_captures = set() + self.control_captures = object_identity.ObjectIdentitySet() self.structured_input_signature = None self.structured_outputs = None self._weak_variables = [] From a1f9c4d80b68c47deddfbfb4ee5ccc093d347d48 Mon Sep 17 00:00:00 2001 From: Samuel Holt <6444377+samholt@users.noreply.github.com> Date: Mon, 22 Jun 2020 01:28:32 +0100 Subject: [PATCH 13/66] Update tensorflow/python/keras/layers/convolutional.py Co-authored-by: kyscg --- tensorflow/python/keras/layers/convolutional.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index 471d94570a5..19d5ea71527 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -421,7 +421,7 @@ class Conv1D(Conv): specifying the stride length of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. - padding: One of `"valid"`, `"causal"` or `"same"` (case-insensitive). + padding: One of `"valid"`, `"same"` or `"causal"` (case-insensitive). `"valid"` means no padding. `"same"` results in padding evenly to the left/right or up/down of the input such that output has the same height/width dimension as the input. From 8717d5c92c17bad370856dfb1debab48b6158f8b Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Mon, 22 Jun 2020 09:38:10 +0200 Subject: [PATCH 14/66] Fix reviewer comments. --- tensorflow/lite/micro/examples/network_tester/network_model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/examples/network_tester/network_model.h b/tensorflow/lite/micro/examples/network_tester/network_model.h index 0431d7deee7..5b4b4cf3070 100644 --- a/tensorflow/lite/micro/examples/network_tester/network_model.h +++ b/tensorflow/lite/micro/examples/network_tester/network_model.h @@ -67,4 +67,4 @@ const unsigned char network_model[] = { 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11}; const unsigned int network_model_len = 576; -#endif +#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_NETWORK_MODEL_H_ From a44416d0fc2ef89a6119e75ad35ef3824edb8cbd Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Mon, 22 Jun 2020 21:49:59 +0800 Subject: [PATCH 15/66] fix broken figures --- tensorflow/compiler/xla/g3doc/tiled_layout.md | 14 ++++++++------ tensorflow/compiler/xla/xla_data.proto | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/g3doc/tiled_layout.md b/tensorflow/compiler/xla/g3doc/tiled_layout.md index 21e88ceab62..b40f0a95a3d 100644 --- a/tensorflow/compiler/xla/g3doc/tiled_layout.md +++ b/tensorflow/compiler/xla/g3doc/tiled_layout.md @@ -3,9 +3,10 @@ Caution: Tiled layout is *pre-release* and this describes how it's intended to work. Errors may be silently ignored. -
![](images/xla_array_layout_figure1.png) - -Figure 1
+

+ + Figure 1 +

Figure 1 shows how an array F32[3,5] is laid out in memory with 2x2 tiling. A shape with this layout is written as F32[3,5]{1,0:(2,2)}, where 1,0 relates to @@ -120,9 +121,10 @@ element follows the formula above as expected. XLA's tiling becomes even more flexible by applying it repeatedly. -
![](images/xla_array_layout_figure2.png) - -Figure 2
+

+ + Figure 2 +

Figure 2 shows how an array of size 4x8 is tiled by two levels of tiling (first 2x4 then 2x1). We represent this repeated tiling as (2,4)(2,1). Each color diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 5c21121b98e..e8b6105d3fe 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -120,7 +120,7 @@ enum Format { } // Describes a tile used in tiling-based layout. Refer to -// g3doc/third_party/tensorflow/compiler/xla/g3doc/layout_with_tiling.md for +// g3doc/third_party/tensorflow/compiler/xla/g3doc/tiled_layout.md for // details about tiling-based layout. message TileProto { // Number of elements in each dimension of the tile. It's ordered from the From dce31a431df80aa55f2cfd87d60952c413d6f690 Mon Sep 17 00:00:00 2001 From: chuanqiw Date: Tue, 23 Jun 2020 14:08:27 +0800 Subject: [PATCH 16/66] Update sqlite version to 3.32.1 to fix the CVE-2020-13630 and CVE-2020-11656 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0f591ba8b90..29c6ef99397 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -409,12 +409,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "org_sqlite", build_file = clean_dep("//third_party:sqlite.BUILD"), - sha256 = "f3c79bc9f4162d0b06fa9fe09ee6ccd23bb99ce310b792c5145f87fbcc30efca", - strip_prefix = "sqlite-amalgamation-3310100", + sha256 = "8d46ef69b96628bedb781bd8309210f2a1f4a353792097302f6b754044e6540f", + strip_prefix = "sqlite-amalgamation-3320100", system_build_file = clean_dep("//third_party/systemlibs:sqlite.BUILD"), urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/www.sqlite.org/2020/sqlite-amalgamation-3310100.zip", - "https://www.sqlite.org/2020/sqlite-amalgamation-3310100.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/www.sqlite.org/2020/sqlite-amalgamation-3320100.zip", + "https://www.sqlite.org/2020/sqlite-amalgamation-3320100.zip", ], ) From 7543ea1f37cea5ff67ecf4e77c93704d29fd8e1d Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 23 Jun 2020 11:10:49 -0700 Subject: [PATCH 17/66] Support dynamic outputs for XLA on demand ops. PiperOrigin-RevId: 317902879 Change-Id: I6b6dfa54855d5996ac15d4b5c48a5db5dc230025 --- tensorflow/compiler/jit/xla_launch_util.cc | 28 ++++++- tensorflow/compiler/xla/service/BUILD | 5 +- .../compiler/xla/service/transfer_manager.cc | 63 +++++++++++++++ .../compiler/xla/service/transfer_manager.h | 9 +++ .../compiler/xrt/kernels/xrt_execute_op.cc | 78 +++---------------- .../python/distribute/tpu_strategy_test.py | 9 +++ 6 files changed, 121 insertions(+), 71 deletions(-) diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index fc0ff8d9445..eb31b23c991 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -476,10 +476,36 @@ Status XlaComputationLaunchContext::PopulateOutputs( stream->ThenRecordEvent(definition_event.get()); } + std::vector output_tensor_shapes; + output_tensor_shapes.reserve(ctx->num_outputs()); + if (output.on_host_shape().is_dynamic()) { + TF_ASSIGN_OR_RETURN( + auto transfer_manager, + xla::TransferManager::GetForPlatform(stream->parent()->platform())); + + xla::Shape output_host_shape = output.on_host_shape(); + xla::Shape output_device_shape = output.on_device_shape(); + TF_RETURN_IF_ERROR(transfer_manager->ReadDynamicShapes( + stream, &output, &output_host_shape, &output_device_shape)); + + output.set_shapes(output_host_shape, output_device_shape); + for (int i = 0; i < ctx->num_outputs(); ++i) { + const xla::Shape& subshape = + xla::ShapeUtil::GetSubshape(output_host_shape, {i}); + TensorShape shape; + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(subshape, &shape)); + output_tensor_shapes.push_back(shape); + } + } else { + for (int i = 0; i < ctx->num_outputs(); ++i) { + output_tensor_shapes.push_back(compilation_result->outputs[i].shape); + } + } + // Copy XLA results to the OpOutputList. int output_num = 0; for (int i = 0; i < ctx->num_outputs(); ++i) { - const TensorShape& shape = compilation_result->outputs[i].shape; + const TensorShape& shape = output_tensor_shapes[i]; const DataType& type = compilation_result->outputs[i].type; VLOG(2) << "Retval " << i << " shape " << shape.DebugString() << " type " << DataTypeString(type); diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 2fd457e8e47..10e2d7e65d1 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1202,6 +1202,9 @@ cc_library( srcs = ["transfer_manager.cc"], hdrs = ["transfer_manager.h"], deps = [ + ":compiler", + ":executable", + ":maybe_owning_device_memory", ":shaped_buffer", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", @@ -1210,8 +1213,6 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto_cc", - "//tensorflow/compiler/xla/service:executable", - "//tensorflow/compiler/xla/service:maybe_owning_device_memory", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/stream_executor:device_memory", diff --git a/tensorflow/compiler/xla/service/transfer_manager.cc b/tensorflow/compiler/xla/service/transfer_manager.cc index ebb0226476f..0fd64209152 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.cc +++ b/tensorflow/compiler/xla/service/transfer_manager.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" +#include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -33,6 +34,7 @@ limitations under the License. using absl::StrCat; namespace xla { + /* static */ tensorflow::mutex TransferManager::platform_transfer_manager_mutex_( tensorflow::LINKER_INITIALIZED); @@ -200,6 +202,67 @@ void TransferManager::TransferArrayFromDevice( std::move(done), transfer_metadata); } +Status TransferManager::ReadDynamicShapes(se::Stream* stream, + ShapedBuffer* device_buffer, + Shape* host_shape, + Shape* device_shape) { + DCHECK(device_shape->is_dynamic()); + Shape original_device_shape = *device_shape; + Shape original_host_shape = *host_shape; + TF_RETURN_IF_ERROR(stream->BlockHostUntilDone()); + + TF_ASSIGN_OR_RETURN(auto compiler, + Compiler::GetForPlatform(stream->parent()->platform())); + TF_RETURN_IF_ERROR(device_buffer->buffers().ForEachMutableElementWithStatus( + [&](const ShapeIndex& index, se::DeviceMemoryBase* buffer) { + const Shape& buffer_shape = + ShapeUtil::GetSubshape(*device_shape, index); + if (buffer_shape.IsTuple()) { + return Status::OK(); + } + Shape& host_sub_shape = + *ShapeUtil::GetMutableSubshape(host_shape, index); + Shape& device_sub_shape = + *ShapeUtil::GetMutableSubshape(device_shape, index); + if (device_sub_shape.is_static()) { + return Status::OK(); + } + + // Read the dynamic shape metadata from the device stream. + auto shape_size_fn = compiler->ShapeSizeBytesFunction(); + Shape buffer_shape_static = ShapeUtil::MakeStaticShape(buffer_shape); + const int64 offset = shape_size_fn(buffer_shape_static); + int64 metadata_size = shape_size_fn(buffer_shape) - offset; + if (metadata_size == 0) { + return InvalidArgument("Dynamic shape metadata size should not be 0"); + } + auto buffer_8 = se::DeviceMemory(*buffer); + auto metadata_buffer = + stream->parent()->GetSubBuffer(&buffer_8, offset, metadata_size); + TF_ASSIGN_OR_RETURN( + auto metadata, + TransferArrayFromDevice( + stream, + ShapeUtil::MakeShape(S32, {buffer_shape.dimensions_size()}), + metadata_buffer)); + + // Update shape size from metadata. + for (int64 i = 0; i < metadata.element_count(); ++i) { + host_sub_shape.mutable_dimensions()[i] = metadata.Get({i}); + device_sub_shape.mutable_dimensions()[i] = metadata.Get({i}); + } + return Status::OK(); + })); + host_shape->clear_dynamic_dimensions(); + device_shape->clear_dynamic_dimensions(); + + TF_RET_CHECK(ShapeUtil::DynamicShapeIsCompatible(*device_shape, + original_device_shape)); + TF_RET_CHECK( + ShapeUtil::DynamicShapeIsCompatible(*host_shape, original_host_shape)); + return Status::OK(); +} + /* static */ void TransferManager::RegisterTransferManager( se::Platform::Id platform_id, TransferManagerCreationFunction creation_function) { diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h index e3f8ceacc42..c0670d26eee 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.h +++ b/tensorflow/compiler/xla/service/transfer_manager.h @@ -184,6 +184,15 @@ class TransferManager { const se::DeviceMemoryBase& source, const TransferMetadata* transfer_metadata = nullptr); + // Read from a device buffer and update the dynamic dimension sizes of + // `host_shape` and `device_shape`. The function takes in bounded dynamic + // shapes, and returns static shapes with dynamic shapes updated. + // The shape of the buffer also have to be compatible with the host shape and + // device shape. + virtual Status ReadDynamicShapes(se::Stream* stream, + ShapedBuffer* device_buffer, + Shape* host_shape, Shape* device_shape); + // Transfers the given literal into the Infeed interface of the device, // using the given executor. virtual Status TransferLiteralToInfeed(se::StreamExecutor* executor, diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc index 3bd8af577c8..bfd48bd1442 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc @@ -264,86 +264,28 @@ Status UpdateDynamicInputs( return Status::OK(); } -xla::StatusOr ReadMetadataLiteral( - se::Stream* stream, se::DeviceMemoryBase buffer, - const xla::Shape& buffer_shape, xla::TransferManager* transfer_manager) { - TF_ASSIGN_OR_RETURN(auto compiler, xla::Compiler::GetForPlatform( - stream->parent()->platform())); - auto shape_size_fn = compiler->ShapeSizeBytesFunction(); - xla::Shape buffer_shape_static = - xla::ShapeUtil::MakeStaticShape(buffer_shape); - const int64 offset = shape_size_fn(buffer_shape_static); - int64 metadata_size = shape_size_fn(buffer_shape) - offset; - TF_RET_CHECK(metadata_size != 0); - auto buffer_8 = se::DeviceMemory(buffer); - auto metadata_buffer = - stream->parent()->GetSubBuffer(&buffer_8, offset, metadata_size); - return transfer_manager->TransferArrayFromDevice( - stream, - xla::ShapeUtil::MakeShape(xla::S32, {buffer_shape.dimensions_size()}), - metadata_buffer); -} - -// For each subshape in the result buffer that's dynamic, read the dynamic -// dimension sizes from the metadata, and update output shapes. The result shape -// is a static and concrete shape. -xla::Status UpdateDynamicOutputs(se::Stream* stream, - const xla::ShapedBuffer& shaped_buffer, - xla::Shape* output_host_shape, - xla::Shape* output_device_shape) { - DCHECK(output_device_shape->is_dynamic()); - TF_ASSIGN_OR_RETURN( - auto transfer_manager, - xla::TransferManager::GetForPlatform(stream->parent()->platform())); - TF_RETURN_IF_ERROR(stream->BlockHostUntilDone()); - TF_RETURN_IF_ERROR(shaped_buffer.buffers().ForEachElementWithStatus( - [&](const xla::ShapeIndex& index, const se::DeviceMemoryBase& buffer) { - const xla::Shape& buffer_shape = - xla::ShapeUtil::GetSubshape(*output_device_shape, index); - if (buffer_shape.IsTuple()) { - return Status::OK(); - } - xla::Shape& host_shape = - *xla::ShapeUtil::GetMutableSubshape(output_host_shape, index); - xla::Shape& device_shape = - *xla::ShapeUtil::GetMutableSubshape(output_device_shape, index); - if (device_shape.is_static()) { - return Status::OK(); - } - TF_ASSIGN_OR_RETURN(auto metadata, - ReadMetadataLiteral(stream, buffer, buffer_shape, - transfer_manager)); - // Update shape size from metadata. - for (int64 i = 0; i < metadata.element_count(); ++i) { - host_shape.mutable_dimensions()[i] = metadata.Get({i}); - device_shape.mutable_dimensions()[i] = metadata.Get({i}); - } - return Status::OK(); - })); - output_host_shape->clear_dynamic_dimensions(); - output_device_shape->clear_dynamic_dimensions(); - return Status::OK(); -} - xla::StatusOr> CreateOutputTuple( se::Stream* stream, xla::ExecutionOutput run_result, xla::Backend* backend, int device_ordinal) { XRTTupleAllocation* output_tuple; - const xla::ScopedShapedBuffer& shaped_buffer = run_result.Result(); - if (shaped_buffer.on_device_shape().is_dynamic()) { + xla::ScopedShapedBuffer* shaped_buffer = run_result.MutableResult(); + if (shaped_buffer->on_device_shape().is_dynamic()) { // Update dynamic shapes from output buffer, and create a XRT tensor with // dimension sizes read from metadata. - xla::Shape output_host_shape = shaped_buffer.on_host_shape(); - xla::Shape output_device_shape = shaped_buffer.on_device_shape(); - TF_RETURN_IF_ERROR(UpdateDynamicOutputs( + xla::Shape output_host_shape = shaped_buffer->on_host_shape(); + xla::Shape output_device_shape = shaped_buffer->on_device_shape(); + TF_ASSIGN_OR_RETURN( + auto transfer_manager, + xla::TransferManager::GetForPlatform(stream->parent()->platform())); + TF_RETURN_IF_ERROR(transfer_manager->ReadDynamicShapes( stream, shaped_buffer, &output_host_shape, &output_device_shape)); TF_RETURN_IF_ERROR(XRTTupleAllocation::CreateFromBuffer( - shaped_buffer, output_host_shape, output_device_shape, backend, + *shaped_buffer, output_host_shape, output_device_shape, backend, device_ordinal, &output_tuple)); } else { // Fast-path: Don't copy shapes of output buffer. TF_RETURN_IF_ERROR(XRTTupleAllocation::CreateFromBuffer( - shaped_buffer, backend, device_ordinal, &output_tuple)); + *shaped_buffer, backend, device_ordinal, &output_tuple)); } // After the output tuple is created, we can release the output result // buffers, to make sure they won't be cleared by its destructor. diff --git a/tensorflow/python/distribute/tpu_strategy_test.py b/tensorflow/python/distribute/tpu_strategy_test.py index 142743a6ec2..850981e073e 100644 --- a/tensorflow/python/distribute/tpu_strategy_test.py +++ b/tensorflow/python/distribute/tpu_strategy_test.py @@ -123,6 +123,15 @@ class TPUTest(test.TestCase): result = bar() + 1 self.assertAllEqual(result, 2) + def test_on_demand_op_with_dynamic_output(self): + with ops.device("/device:TPU:0"): + where_output = array_ops.where([True, False, True]) + self.assertAllEqual(where_output, [[0], [2]]) + + with ops.device("/device:TPU:0"): + repeat_output = array_ops.repeat(math_ops.range(2), [1, 4]) + self.assertAllEqual(repeat_output, [0, 1, 1, 1, 1]) + @parameterized.named_parameters([("PackedVar", True), ("", False)]) class TPUStrategyTest(test.TestCase, parameterized.TestCase): From c41685d118a9da1b8b7c4fb5620a9aa920bf0740 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Tue, 23 Jun 2020 11:35:26 -0700 Subject: [PATCH 18/66] Separate out parse functionality into helper functions. Ops in this change: * Abs * Add * ArgMax * ArgMin PiperOrigin-RevId: 317908035 Change-Id: I6c33bd83c987c92b71992c6c113d8678bc9d35d8 --- .../lite/core/api/flatbuffer_conversions.cc | 134 +++++++++++++----- .../lite/core/api/flatbuffer_conversions.h | 16 +++ .../lite/micro/micro_mutable_op_resolver.h | 16 +-- 3 files changed, 121 insertions(+), 45 deletions(-) diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 73d785bf369..c496c456542 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -177,6 +177,91 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, } } +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseAbs(const Operator*, BuiltinOperator, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseAdd(const Operator* op, BuiltinOperator, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const AddOptions* schema_params = op->builtin_options_as_AddOptions(); + + if (schema_params != nullptr) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseArgMax(const Operator* op, BuiltinOperator, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ArgMaxOptions* schema_params = op->builtin_options_as_ArgMaxOptions(); + + if (schema_params != nullptr) { + TF_LITE_ENSURE_STATUS(ConvertTensorType( + schema_params->output_type(), ¶ms->output_type, error_reporter)); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseArgMin(const Operator* op, BuiltinOperator, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ArgMinOptions* schema_params = op->builtin_options_as_ArgMinOptions(); + + if (schema_params != nullptr) { + TF_LITE_ENSURE_STATUS(ConvertTensorType( + schema_params->output_type(), ¶ms->output_type, error_reporter)); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + TfLiteStatus ParseConv2D(const Operator* op, BuiltinOperator, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data) { @@ -430,6 +515,22 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, SafeBuiltinDataAllocator safe_allocator(allocator); *builtin_data = nullptr; switch (op_type) { + case BuiltinOperator_ABS: { + return ParseAbs(op, op_type, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ADD: { + return ParseAdd(op, op_type, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ARG_MAX: { + return ParseArgMax(op, op_type, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ARG_MIN: { + return ParseArgMin(op, op_type, error_reporter, allocator, builtin_data); + } + case BuiltinOperator_CONV_2D: { return ParseConv2D(op, op_type, error_reporter, allocator, builtin_data); } @@ -586,16 +687,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = params.release(); return kTfLiteOk; } - case BuiltinOperator_ADD: { - auto params = safe_allocator.Allocate(); - TF_LITE_ENSURE(error_reporter, params != nullptr); - if (const auto* schema_params = op->builtin_options_as_AddOptions()) { - params->activation = - ConvertActivation(schema_params->fused_activation_function()); - } - *builtin_data = params.release(); - return kTfLiteOk; - } case BuiltinOperator_DIV: { auto params = safe_allocator.Allocate(); TF_LITE_ENSURE(error_reporter, params != nullptr); @@ -838,28 +929,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = params.release(); return kTfLiteOk; } - case BuiltinOperator_ARG_MAX: { - auto params = safe_allocator.Allocate(); - TF_LITE_ENSURE(error_reporter, params != nullptr); - if (const auto* schema_params = op->builtin_options_as_ArgMaxOptions()) { - TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->output_type(), - ¶ms->output_type, - error_reporter)); - } - *builtin_data = params.release(); - return kTfLiteOk; - } - case BuiltinOperator_ARG_MIN: { - auto params = safe_allocator.Allocate(); - TF_LITE_ENSURE(error_reporter, params != nullptr); - if (const auto* schema_params = op->builtin_options_as_ArgMinOptions()) { - TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->output_type(), - ¶ms->output_type, - error_reporter)); - } - *builtin_data = params.release(); - return kTfLiteOk; - } case BuiltinOperator_TRANSPOSE_CONV: { auto params = safe_allocator.Allocate(); TF_LITE_ENSURE(error_reporter, params != nullptr); @@ -1019,7 +1088,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, return kTfLiteOk; } // Below are the ops with no builtin_data structure. - case BuiltinOperator_ABS: case BuiltinOperator_BATCH_TO_SPACE_ND: // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are // ok for now, since there is no call implementation either. diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.h b/tensorflow/lite/core/api/flatbuffer_conversions.h index 78d2aca6222..a6431aa5ee1 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.h +++ b/tensorflow/lite/core/api/flatbuffer_conversions.h @@ -75,6 +75,22 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, // removed once we are no longer using ParseOpData for the OpResolver // implementation in micro. +TfLiteStatus ParseAbs(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseAdd(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseArgMax(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseArgMin(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + TfLiteStatus ParseConv2D(const Operator* op, BuiltinOperator op_type, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data); diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h index 1b76f440a61..8c99f77729d 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -108,31 +108,23 @@ class MicroMutableOpResolver : public MicroOpResolver { // MicroMutableOpResolver object. TfLiteStatus AddAbs() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function. return AddBuiltin(BuiltinOperator_ABS, *tflite::ops::micro::Register_ABS(), - ParseOpData); + ParseAbs); } TfLiteStatus AddAdd() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function. return AddBuiltin(BuiltinOperator_ADD, *tflite::ops::micro::Register_ADD(), - ParseOpData); + ParseAdd); } TfLiteStatus AddArgMax() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function. return AddBuiltin(BuiltinOperator_ARG_MAX, - *tflite::ops::micro::Register_ARG_MAX(), ParseOpData); + *tflite::ops::micro::Register_ARG_MAX(), ParseArgMax); } TfLiteStatus AddArgMin() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function. return AddBuiltin(BuiltinOperator_ARG_MIN, - *tflite::ops::micro::Register_ARG_MIN(), ParseOpData); + *tflite::ops::micro::Register_ARG_MIN(), ParseArgMin); } TfLiteStatus AddAveragePool2D() { From 6bbb6f6940bcfe973568898126c9695626e85327 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 11:45:40 -0700 Subject: [PATCH 19/66] Remove recompile workaround in integration test since the fix has been in for a while. PiperOrigin-RevId: 317910115 Change-Id: I5174cbf5eea8b4983c0ba9aff10ec8a53e34bf13 --- tensorflow/python/keras/tests/integration_test.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/python/keras/tests/integration_test.py b/tensorflow/python/keras/tests/integration_test.py index 8e4d38c1a6a..64a7b694355 100644 --- a/tensorflow/python/keras/tests/integration_test.py +++ b/tensorflow/python/keras/tests/integration_test.py @@ -160,10 +160,6 @@ class SequentialIntegrationTest(KerasIntegrationTest): model.pop() model.add(keras.layers.Dense(y_train.shape[-1], activation='softmax')) - # TODO(b/134523282): There is an bug with Sequential models, so the model - # must be marked as compiled=False to ensure the next compile goes through. - model._is_compiled = False - model.compile( loss='categorical_crossentropy', optimizer=keras.optimizer_v2.adam.Adam(0.005), From 7d94f9589de4388d249794d9856b10ca55d146c8 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Tue, 23 Jun 2020 11:47:29 -0700 Subject: [PATCH 20/66] Add MultiProcessPoolRunner Tensorflow initialization can take a long time when GPUs are present. We cannot afford starting a new group of workers for every single test. MultiProcessPoolRunner uses a pool of workers so that we can avoid the initialization cost. Compared to MultiProcessRunner, it doesn't support terminating workers. Note that implementation wise we could build MultiProcessPoolRunner on top of MultiProcessRunner or vice-versa if there's no need to support termination. Since it's easier for MultiProcessPoolRunner not to support termination, we choose MultiProcessPoolRunner on top of MultiProcessRunner. PiperOrigin-RevId: 317910434 Change-Id: I72edd5231cfc9b0dc57df7e5bc135da097cca362 --- .../python/distribute/multi_process_runner.py | 210 ++++++++++++++++-- .../distribute/multi_process_runner_test.py | 69 +++++- 2 files changed, 254 insertions(+), 25 deletions(-) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index db31b9c4dd4..84b61be1ea2 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -67,8 +67,7 @@ except ImportError: # exception stack trace info is stored in exc_info to pass on to parent process # to be re-raised. _ProcessStatusInfo = collections.namedtuple( - '_ProcessStatusInfo', - ['task_type', 'is_successful', 'exc_info', 'return_value']) + '_ProcessStatusInfo', ['is_successful', 'exc_info', 'return_value']) # Information returned from a successful MultiProcessRunner run. MultiProcessRunnerResult = collections.namedtuple('MultiProcessRunnerResult', @@ -124,6 +123,7 @@ class MultiProcessRunner(object): stream_stdout=True, list_stdout=False, use_dill_for_args=True, + daemon=False, args=None, kwargs=None): """Creates a multi-process runner. @@ -157,6 +157,7 @@ class MultiProcessRunner(object): use_dill_for_args: Whether to use dill to pickle `args` and `kwargs`. dill can pickle more objects, but doesn't work with types in `multiprocessing` library like `Mutex`. + daemon: Whether to start processes as daemons. args: Positional arguments to be sent to functions run on processes. kwargs: Keyword arguments to be sent to functions run on processes. @@ -188,6 +189,7 @@ class MultiProcessRunner(object): self._list_stdout = list_stdout self._dependence_on_chief = True self._use_dill_for_args = use_dill_for_args + self._daemon = daemon self._args = args or () self._kwargs = kwargs or {} @@ -268,7 +270,8 @@ class MultiProcessRunner(object): test_env=test_env, target=_ProcFunc(), args=(resources, test_env, proc_func, args, kwargs, - self._use_dill_for_args)) + self._use_dill_for_args), + daemon=self._daemon) p.start() self._processes[(task_type, task_id)] = p self._outstanding_subprocess_count += 1 @@ -580,7 +583,6 @@ class _ProcFunc(object): time.sleep(0.1) self._resources.process_status_queue.put( _ProcessStatusInfo( - task_type=task_type, is_successful=True, exc_info=None, return_value=None)) @@ -640,17 +642,9 @@ class _ProcFunc(object): if test_env.v2_enabled: v2_compat.enable_v2_behavior() - try: - with self._runtime_mode(test_env.executing_eagerly): - return_value = proc_func(*args, **kwargs) - is_successful = True - exc_info = None - - except Exception: # pylint: disable=broad-except - # Capture all exceptions to be reported to parent process. - return_value = None - is_successful = False - exc_info = sys.exc_info() + with self._runtime_mode(test_env.executing_eagerly): + info = _run_contained(proc_func, args, kwargs) + self._resources.process_status_queue.put(info) # Re-raise the exception in addition to reporting it to the parent # process, so that even if `--test_timeout` flag is set and the @@ -659,18 +653,188 @@ class _ProcFunc(object): # instead of silently suppressing the error due to early bazel # timeout. Raising an error in the subprocess produces stack trace in # the log, but the program continues running. - raise + if not info.is_successful: + six.reraise(*info.exc_info) - finally: - info = _ProcessStatusInfo( - task_type=test_env.task_type, - is_successful=is_successful, - exc_info=exc_info, - return_value=return_value) - self._resources.process_status_queue.put(info) self._close_streaming() +class MultiProcessPoolRunner(object): + """A utility class to start a process pool to simulate a cluster. + + It's similar to MultiProcessRunner, but uses a pool of processes to avoid the + expensive initialization cost of Tensorflow. + """ + + def __init__(self, cluster_spec, initializer=None): + """Creates a multi-process pool runner. + + Args: + cluster_spec: Dict for cluster spec. The following is an example of + cluster with three workers. + {"worker": ["worker0.example.com:2222", + "worker1.example.com:2222", + "worker2.example.com:2222"]} + initializer: a callable to called at the startup of worker processes. + + Raises: + RuntimeError: if `multi_process_runner.test_main()` is not called. + ValueError: if there are more than one chief in the `cluster_spec`. + """ + self._cluster_spec = cluster_spec + self._initializer = initializer + self._conn = {} + self._runner = None + + def __del__(self): + self._reset() + + def _reset(self): + for conn in self._conn.values(): + conn.close() + self._conn = {} + if self._runner is not None: + self._runner.join() + self._runner = None + + def _start(self): + """Starts the worker pool.""" + # We need different arguments for different processes so we're passing a + # no-op proc_func here and use start_single_process instead. + # + # We also need to start the process pool as daemon, so that they don't block + # the program from exiting. Note that __del__ may not get called when + # there's an exception. The user may also store a pool runner in a global + # object to share across test cases + + if dill is None: + raise unittest.SkipTest( + 'TODO(b/150264776): Resolve dependency issue in CI') + + self._runner = MultiProcessRunner( + proc_func=lambda: None, + cluster_spec=self._cluster_spec, + use_dill_for_args=False, + daemon=True) + if self._initializer: + initializer = dill.dumps(self._initializer, dill.HIGHEST_PROTOCOL) + else: + initializer = None + for task_type, addresses in self._cluster_spec.items(): + for task_id, _ in enumerate(addresses): + conn1, conn2 = multiprocessing.Pipe(duplex=True) + self._conn[(task_type, task_id)] = conn1 + self._runner.start_single_process( + task_type, + task_id, + proc_func=_pool_runner_worker, + args=(initializer, conn2)) + + def run(self, proc_func, args=None, kwargs=None): + """Runs `proc_func` with `args` and `kwargs` on all jobs. + + Args: + proc_func: The function to be run. + args: Optional positional arguments to be supplied in `proc_func`. + kwargs: Optional keyword arguments to be supplied in `proc_func`. + + Returns: + A list of return values. + """ + if self._runner is None: + self._start() + + # Since we start the processes as daemon they're going to be killed by + # SIGTERM when the program exits. We only turn on streaming during run() to + # avoid printing the stacktrace caused by the SIGTERM. + self._runner._stream_stdout = True # pylint: disable=protected-access + + try: + proc_func = dill.dumps(proc_func, dill.HIGHEST_PROTOCOL) + for conn in self._conn.values(): + conn.send((proc_func, args or [], kwargs or {})) + + process_statuses = [] + for (task_type, task_id), conn in self._conn.items(): + logging.info('Waiting for the result from %s-%d', task_type, task_id) + try: + process_statuses.append(conn.recv()) + except EOFError: + # This shouldn't happen due to exceptions in proc_func. This usually + # means bugs in the runner. + self._reset() + raise RuntimeError('Unexpected EOF. Worker process may have died. ' + 'Please report a bug') + + return_values = [] + for process_status in process_statuses: + assert isinstance(process_status, _ProcessStatusInfo) + if not process_status.is_successful: + six.reraise(*process_status.exc_info) + if process_status.return_value is not None: + return_values.append(process_status.return_value) + + return return_values + finally: + self._runner._stream_stdout = False # pylint: disable=protected-access + + +def _pool_runner_worker(initializer, conn): + """Function that runs on the workers in a pool. + + It listens for callables to run and returns the result until `conn` is closed. + It captures the exceptions during executing the callable and return it through + `conn`. + + Args: + initializer: A callable to execute during startup. + conn: A multiprocessing.Connection object to listen for tasks and send + results. + """ + if initializer: + initializer = dill.loads(initializer) + initializer() + while True: + try: + proc_func, args, kwargs = conn.recv() + except EOFError: + break + proc_func = dill.loads(proc_func) + info = _run_contained(proc_func, args, kwargs) + sys.stdout.flush() + sys.stderr.flush() + conn.send(info) + + +def _run_contained(proc_func, args, kwargs): + """Runs `proc_func` with `args` and `kwargs`. + + The function returns _ProcessStatusInfo which captures the return value and + the exception. + + Args: + proc_func: The function to be run. + args: Optional positional arguments to be supplied in `proc_func`. + kwargs: Optional keyword arguments to be supplied in `proc_func`. + + Returns: + a _ProcessStatusInfo. + """ + try: + return_value = proc_func(*args, **kwargs) + is_successful = True + exc_info = None + except Exception: # pylint: disable=broad-except + return_value = None + is_successful = False + exc_info = sys.exc_info() + finally: + return _ProcessStatusInfo( # pylint: disable=lost-exception + is_successful=is_successful, + exc_info=exc_info, + return_value=return_value) + + class SubprocessTimeoutError(RuntimeError): """An error that indicates there is at least one subprocess timing out. diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index d6e04010e34..32d3ae6c84e 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -22,6 +22,8 @@ import json import os import threading import time +import unittest + from absl import logging from tensorflow.python.distribute import multi_process_runner @@ -45,7 +47,7 @@ def proc_func_that_adds_simple_return_data(): return 'dummy_data' -def proc_func_that_return_args_and_kwargs(*args, **kwargs): +def proc_func_that_returns_args_and_kwargs(*args, **kwargs): return list(args) + list(kwargs.items()) @@ -53,6 +55,20 @@ def proc_func_with_barrier(): return multi_process_runner.barrier() +def proc_func_that_returns_pid(): + return os.getpid() + + +V = None + + +def proc_func_that_sets_global(val): + global V + old_val = V + V = val + return old_val + + class MultiProcessRunnerTest(test.TestCase): def _worker_idx(self): @@ -95,7 +111,7 @@ class MultiProcessRunnerTest(test.TestCase): def test_multi_process_runner_args_passed_correctly(self): return_value = multi_process_runner.run( - proc_func_that_return_args_and_kwargs, + proc_func_that_returns_args_and_kwargs, multi_worker_test_base.create_cluster_spec(num_workers=1), args=('a', 'b'), kwargs={ @@ -299,5 +315,54 @@ class MultiProcessRunnerTest(test.TestCase): any('something printed' in line for line in list_to_assert)) +class MultiProcessPoolRunnerTest(test.TestCase): + + def test_same_process_across_runs(self): + cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) + runner = multi_process_runner.MultiProcessPoolRunner(cluster_spec) + pid = runner.run(proc_func_that_returns_pid) + for _ in range(3): + self.assertAllEqual(runner.run(proc_func_that_returns_pid), pid) + + def test_exceptions_in_sub_process(self): + cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) + runner = multi_process_runner.MultiProcessPoolRunner(cluster_spec) + pid = runner.run(proc_func_that_returns_pid) + with self.assertRaisesRegexp(ValueError, 'This is an error.'): + runner.run(proc_func_that_errors) + self.assertAllEqual(runner.run(proc_func_that_returns_pid), pid) + + def test_tf_config(self): + cluster_spec = multi_worker_test_base.create_cluster_spec( + has_chief=True, num_workers=2) + runner = multi_process_runner.MultiProcessPoolRunner(cluster_spec) + result = runner.run(proc_func_that_adds_task_type_in_return_data) + + job_count_dict = {'worker': 2, 'chief': 1} + for data in result: + job_count_dict[data] -= 1 + + self.assertEqual(job_count_dict['worker'], 0) + self.assertEqual(job_count_dict['chief'], 0) + + @unittest.expectedFailure + def test_exception_in_main_process(self): + # When there's an exception in the main process, __del__() is not called. + # This test is to verify MultiProcessPoolRunner can cope with __del__() not + # being called. + cluster_spec = multi_worker_test_base.create_cluster_spec( + has_chief=True, num_workers=2) + runner = multi_process_runner.MultiProcessPoolRunner(cluster_spec) + runner.run(proc_func_that_returns_pid) + raise ValueError('failure') + + def test_initializer(self): + cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) + runner = multi_process_runner.MultiProcessPoolRunner( + cluster_spec, initializer=lambda: proc_func_that_sets_global(1)) + result = runner.run(proc_func_that_sets_global, args=(2,)) + self.assertAllEqual(result, [1, 1]) + + if __name__ == '__main__': multi_process_runner.test_main() From 846a410161f7c7d1210f26f71742ccdaf08a0975 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 11:53:12 -0700 Subject: [PATCH 21/66] Update ops-related pbtxt files. PiperOrigin-RevId: 317911581 Change-Id: Id1b6ee7c3db66e6a2c26d514af6b967c9be4ae3b --- .../compat/ops_history_v2/DecodeImage.pbtxt | 39 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 39 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt new file mode 100644 index 00000000000..066ffd1091d --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt @@ -0,0 +1,39 @@ +op { + name: "DecodeImage" + input_arg { + name: "contents" + type: DT_STRING + } + output_arg { + name: "image" + type_attr: "dtype" + } + attr { + name: "channels" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_UINT8 + } + allowed_values { + list { + type: DT_UINT8 + type: DT_UINT16 + type: DT_FLOAT + } + } + } + attr { + name: "expand_animations" + type: "bool" + default_value { + b: true + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index dbd91c91b65..dec894cc173 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -11476,6 +11476,45 @@ op { type: DT_UINT8 } } +op { + name: "DecodeImage" + input_arg { + name: "contents" + type: DT_STRING + } + output_arg { + name: "image" + type_attr: "dtype" + } + attr { + name: "channels" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_UINT8 + } + allowed_values { + list { + type: DT_UINT8 + type: DT_UINT16 + type: DT_FLOAT + } + } + } + attr { + name: "expand_animations" + type: "bool" + default_value { + b: true + } + } +} op { name: "DecodeJSONExample" input_arg { From dd7bf5b85e3062af84f2c8ebc5eac900c34b9d7f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 11:53:53 -0700 Subject: [PATCH 22/66] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 317911728 Change-Id: Ibab9243d032babf1f1299829c85e1ee73d7ed02c --- tensorflow/go/op/wrappers.go | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 3675c26751c..106e7445be9 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -15370,6 +15370,80 @@ func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { return op.Output(0) } +// DecodeImageAttr is an optional argument to DecodeImage. +type DecodeImageAttr func(optionalAttr) + +// DecodeImageChannels sets the optional channels attribute to value. +// +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeImageChannels(value int64) DecodeImageAttr { + return func(m optionalAttr) { + m["channels"] = value + } +} + +// DecodeImageDtype sets the optional dtype attribute to value. +// +// value: The desired DType of the returned Tensor. +// If not specified, defaults to DT_UINT8 +func DecodeImageDtype(value tf.DataType) DecodeImageAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// DecodeImageExpandAnimations sets the optional expand_animations attribute to value. +// +// value: Controls the output shape of the returned op. If True, the returned op will +// produce a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all +// GIFs, whether animated or not. If, False, the returned op will produce a 3-D +// tensor for all file types and will truncate animated GIFs to the first frame. +// If not specified, defaults to true +func DecodeImageExpandAnimations(value bool) DecodeImageAttr { + return func(m optionalAttr) { + m["expand_animations"] = value + } +} + +// Function for decode_bmp, decode_gif, decode_jpeg, and decode_png. +// +// Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the +// appropriate operation to convert the input bytes string into a Tensor of type +// dtype. +// +// *NOTE*: decode_gif returns a 4-D array [num_frames, height, width, 3], as +// opposed to decode_bmp, decode_jpeg and decode_png, which return 3-D arrays +// [height, width, num_channels]. Make sure to take this into account when +// constructing your graph if you are intermixing GIF files with BMP, JPEG, and/or +// PNG files. Alternately, set the expand_animations argument of this function to +// False, in which case the op will return 3-dimensional tensors and will truncate +// animated GIF files to the first frame. +// +// Arguments: +// contents: 0-D. The encoded image bytes. +// +// Returns 3-D with shape `[height, width, channels]` or 4-D with shape +// `[frame, height, width, channels]`.. +func DecodeImage(scope *Scope, contents tf.Output, optional ...DecodeImageAttr) (image tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeImage", + Input: []tf.Input{ + contents, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // AvgPoolAttr is an optional argument to AvgPool. type AvgPoolAttr func(optionalAttr) From 66d4dbfc9d71812c7be3f8a195293e0cc0539fff Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 23 Jun 2020 12:00:02 -0700 Subject: [PATCH 23/66] Optimize NeonCwiseClipping doing a single loop with a single postamble. Also rename the float ClipVector function, as it's doing the same thing. Make the parameters among float/intN_t versions the same (removing output array / n_batch as applicable) and fixing call sites, and change the portable implementations to a template. PiperOrigin-RevId: 317912994 Change-Id: I94fa22b00d0c76e2f69794d18c493eeb2cb27a1c --- .../internal/optimized/neon_tensor_utils.cc | 137 ++++++++---------- .../internal/optimized/neon_tensor_utils.h | 21 ++- .../optimized/neon_tensor_utils_impl.h | 15 +- .../internal/optimized/sse_tensor_utils.h | 21 +-- .../reference/portable_tensor_utils.cc | 37 ----- .../reference/portable_tensor_utils.h | 22 +-- .../reference/portable_tensor_utils_impl.h | 21 ++- .../lite/kernels/internal/tensor_utils.h | 29 ++-- .../kernels/internal/tensor_utils_test.cc | 31 ++-- tensorflow/lite/kernels/lstm_eval.cc | 32 ++-- .../calibration/builtin_logging_ops/lstm.cc | 8 +- 11 files changed, 152 insertions(+), 222 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc index c96f298370a..800d7008b4b 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -1892,61 +1892,70 @@ void NeonCwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, } } -void NeonCwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input) { - const int16x8_t max_dup = vdupq_n_s16(clipping_value); - const int16x8_t min_dup = vdupq_n_s16(-clipping_value); - for (int batch = 0; batch < n_batch; ++batch) { - int i = 0; - for (; i <= n_input - 16; i += 16) { - const int index = batch * n_input + i; - int16x8_t val_0 = vld1q_s16(input + index); - int16x8_t val_1 = vld1q_s16(input + index + 8); - val_0 = vminq_s16(val_0, max_dup); - val_1 = vminq_s16(val_1, max_dup); - val_0 = vmaxq_s16(val_0, min_dup); - val_1 = vmaxq_s16(val_1, min_dup); - vst1q_s16(input + index, val_0); - vst1q_s16(input + index + 8, val_1); - } - for (; i < n_input; ++i) { - const int index = batch * n_input + i; - if (input[index] > clipping_value) { - input[index] = clipping_value; - } - if (input[index] < -clipping_value) { - input[index] = -clipping_value; - } - } +void NeonCwiseClipping(float* vector, const int v_size, + const float clipping_value) { + const float32x4_t clipping_value_f32x4 = vmovq_n_f32(clipping_value); + const float32x4_t neg_clipping_value_f32x4 = vmovq_n_f32(-clipping_value); + + int i = 0; + for (; i <= v_size - kFloatValuesPerNeonVector; + i += kFloatValuesPerNeonVector) { + // Load from memory to vector. + float32x4_t v_f32x4 = vld1q_f32(vector + i); + // Clip between clipping_value and -clipping_value. + v_f32x4 = vminq_f32(clipping_value_f32x4, v_f32x4); + v_f32x4 = vmaxq_f32(neg_clipping_value_f32x4, v_f32x4); + // Save to output. + vst1q_f32(vector + i, v_f32x4); + } + for (; i < v_size; i++) { + vector[i] = std::max(std::min(clipping_value, vector[i]), -clipping_value); } } -void NeonCwiseClipping(int8_t* input, const int8_t clipping_value, - int32_t n_batch, int32_t n_input) { +void NeonCwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value) { + const int16x8_t max_dup = vdupq_n_s16(clipping_value); + const int16x8_t min_dup = vdupq_n_s16(-clipping_value); + + int i = 0; + for (; i <= v_size - kInt16ValuesPerNeonVector * 2; + i += kInt16ValuesPerNeonVector * 2) { + int16x8_t val_0 = vld1q_s16(vector + i); + int16x8_t val_1 = vld1q_s16(vector + i + kInt16ValuesPerNeonVector); + val_0 = vminq_s16(val_0, max_dup); + val_1 = vminq_s16(val_1, max_dup); + val_0 = vmaxq_s16(val_0, min_dup); + val_1 = vmaxq_s16(val_1, min_dup); + vst1q_s16(vector + i, val_0); + vst1q_s16(vector + i + kInt16ValuesPerNeonVector, val_1); + } + for (; i < v_size; i++) { + vector[i] = std::max(std::min(clipping_value, vector[i]), + static_cast(-clipping_value)); + } +} + +void NeonCwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value) { const int8x16_t max_dup = vdupq_n_s8(clipping_value); const int8x16_t min_dup = vdupq_n_s8(-clipping_value); - for (int batch = 0; batch < n_batch; ++batch) { - int i = 0; - for (; i <= n_input - 32; i += 32) { - const int index = batch * n_input + i; - int8x16_t val_0 = vld1q_s8(input + index); - int8x16_t val_1 = vld1q_s8(input + index + 16); - val_0 = vminq_s8(val_0, max_dup); - val_1 = vminq_s8(val_1, max_dup); - val_0 = vmaxq_s8(val_0, min_dup); - val_1 = vmaxq_s8(val_1, min_dup); - vst1q_s8(input + index, val_0); - vst1q_s8(input + index + 16, val_1); - } - for (; i < n_input; ++i) { - const int index = batch * n_input + i; - if (input[index] > clipping_value) { - input[index] = clipping_value; - } - if (input[index] < -clipping_value) { - input[index] = -clipping_value; - } - } + + int i = 0; + for (; i < v_size - kInt8ValuesPerNeonVector * 2; + i += kInt8ValuesPerNeonVector * 2) { + int8x16_t val_0 = vld1q_s8(vector + i); + int8x16_t val_1 = vld1q_s8(vector + i + kInt8ValuesPerNeonVector); + val_0 = vminq_s8(val_0, max_dup); + val_1 = vminq_s8(val_1, max_dup); + val_0 = vmaxq_s8(val_0, min_dup); + val_1 = vmaxq_s8(val_1, min_dup); + vst1q_s8(vector + i, val_0); + vst1q_s8(vector + i + kInt8ValuesPerNeonVector, val_1); + } + for (; i < v_size; i++) { + vector[i] = std::max(std::min(clipping_value, vector[i]), + static_cast(-clipping_value)); } } @@ -2208,34 +2217,6 @@ bool NeonIsZeroVector(const int8_t* vector, int v_size) { return true; } -void NeonClipVector(const float* vector, int v_size, float abs_limit, - float* result) { - // If v_size is not divisible by the vector size, then we need to process the - // final few elements sequentially. postamble_start shows the start index - // where this should happen. - const int postamble_start = - RoundDownVectors(v_size); - - // Replicate abs_limit and -abs_limit in two vectors. - const float32x4_t abs_limit_f32x4 = vmovq_n_f32(abs_limit); - const float32x4_t neg_abs_limit_f32x4 = vmovq_n_f32(-abs_limit); - - int v = 0; - for (; v < postamble_start; v += kFloatValuesPerNeonVector) { - // Load from memory to vector. - float32x4_t v_f32x4 = vld1q_f32(vector + v); - // Clip between abs_limit and -abs_limit. - float32x4_t result_f32x4 = vminq_f32(abs_limit_f32x4, v_f32x4); - result_f32x4 = vmaxq_f32(neg_abs_limit_f32x4, result_f32x4); - // Save to output. - vst1q_f32(result + v, result_f32x4); - } - // Postamble loop. - for (; v < v_size; v++) { - result[v] = std::max(std::min(abs_limit, vector[v]), -abs_limit); - } -} - void NeonVectorScalarMultiply(const int8_t* vector, const int v_size, const float scale, float* result) { // Here the assumption is that each buffer is 4-byte aligned. diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h index 86951fcd559..7417e836b5c 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h @@ -198,14 +198,17 @@ void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, NEON_OR_PORTABLE(CwiseAdd, input_1, input_2, n_batch, n_input, output); } -void CwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input) { - NEON_OR_PORTABLE(CwiseClipping, input, clipping_value, n_batch, n_input); +void CwiseClipping(float* vector, const int v_size, + const float clipping_value) { + NEON_OR_PORTABLE(CwiseClipping, vector, v_size, clipping_value); } - -void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch, - int32_t n_input) { - NEON_OR_PORTABLE(CwiseClipping, input, clipping_value, n_batch, n_input); +void CwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value) { + NEON_OR_PORTABLE(CwiseClipping, vector, v_size, clipping_value); +} +void CwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value) { + NEON_OR_PORTABLE(CwiseClipping, vector, v_size, clipping_value); } void BatchVectorBatchVectorDotProduct(const int16_t* vector1, @@ -255,10 +258,6 @@ void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, float* result) { NEON_OR_PORTABLE(VectorScalarMultiply, vector, v_size, scale, result); } -void ClipVector(const float* vector, int v_size, float abs_limit, - float* result) { - NEON_OR_PORTABLE(ClipVector, vector, v_size, abs_limit, result); -} void SymmetricQuantizeFloats(const float* values, const int size, int8_t* quantized_values, float* min_value, diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h index 1554d07a61c..44bc83a0669 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h @@ -83,11 +83,12 @@ void NeonCwiseMul(const int16_t* input_1, const int16_t* input_2, void NeonCwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, int n_input, int16_t* output); -void NeonCwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input); - -void NeonCwiseClipping(int8_t* input, const int8_t clipping_value, - int32_t n_batch, int32_t n_input); +void NeonCwiseClipping(float* vector, const int v_size, + const float clipping_value); +void NeonCwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value); +void NeonCwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value); void NeonMatrixBatchVectorMultiplyAccumulate( const int8_t* input, const int32_t* bias, @@ -133,10 +134,6 @@ void NeonSub1Vector(const float* vector, int v_size, float* result); void NeonSub1Vector(const int16_t* vector, int v_size, int16_t* result); -// Clip elements of a vector using a abs_limit value. -void NeonClipVector(const float* vector, int v_size, float abs_limit, - float* result); - // Multiply all elements of vector with a scalar. void NeonVectorScalarMultiply(const int8_t* vector, int v_size, float scale, float* result); diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h index 224d811e862..af29dda7229 100644 --- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h @@ -206,14 +206,19 @@ void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, PortableCwiseAdd(input_1, input_2, n_batch, n_input, output); } -void CwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input) { - PortableCwiseClipping(input, clipping_value, n_batch, n_input); +void CwiseClipping(float* vector, const int v_size, + const float clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); } -void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch, - int32_t n_input) { - PortableCwiseClipping(input, clipping_value, n_batch, n_input); +void CwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); +} + +void CwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); } void BatchVectorBatchVectorDotProduct(const int16_t* vector1, @@ -263,10 +268,6 @@ void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, float* result) { NEON_OR_PORTABLE(VectorScalarMultiply, vector, v_size, scale, result); } -void ClipVector(const float* vector, int v_size, float abs_limit, - float* result) { - NEON_OR_PORTABLE(ClipVector, vector, v_size, abs_limit, result); -} void SymmetricQuantizeFloats(const float* values, const int size, int8_t* quantized_values, float* min_value, diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc index 4f6db290d4f..856331a62e7 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -651,36 +651,6 @@ void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, } } -void PortableCwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input) { - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - if (input[index] > clipping_value) { - input[index] = clipping_value; - } - if (input[index] < -clipping_value) { - input[index] = -clipping_value; - } - } - } -} - -void PortableCwiseClipping(int8_t* input, const int8_t clipping_value, - int32_t n_batch, int32_t n_input) { - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - if (input[index] > clipping_value) { - input[index] = clipping_value; - } - if (input[index] < -clipping_value) { - input[index] = -clipping_value; - } - } - } -} - float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, int v_size) { float result = 0.0; @@ -757,13 +727,6 @@ void PortableVectorScalarMultiply(const int8_t* vector, const int v_size, } } -void PortableClipVector(const float* vector, int v_size, float abs_limit, - float* result) { - for (int v = 0; v < v_size; v++) { - result[v] = std::max(std::min(abs_limit, vector[v]), -abs_limit); - } -} - void PortableReductionSumVector(const float* input_vector, float* output_vector, int output_size, int reduction_size) { const float* input_vector_ptr = input_vector; diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h index 0fd7a407595..ecb7fe8ea2b 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h @@ -230,14 +230,19 @@ void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, PortableCwiseAdd(input_1, input_2, n_batch, n_input, output); } -void CwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input) { - PortableCwiseClipping(input, clipping_value, n_batch, n_input); +void CwiseClipping(float* vector, const int v_size, + const float clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); } -void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch, - int32_t n_input) { - PortableCwiseClipping(input, clipping_value, n_batch, n_input); +void CwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); +} + +void CwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); } void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, @@ -279,11 +284,6 @@ void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, PortableVectorScalarMultiply(vector, v_size, scale, result); } -void ClipVector(const float* vector, int v_size, float abs_limit, - float* result) { - PortableClipVector(vector, v_size, abs_limit, result); -} - void ReductionSumVector(const float* input_vector, float* output_vector, int output_size, int reduction_size) { PortableReductionSumVector(input_vector, output_vector, output_size, diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h index 34767ccd942..556e4640cbb 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h @@ -15,6 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ +#include #include // TODO(ghodrat): Remove this header file and the dependency to internal data @@ -33,9 +34,6 @@ class CpuBackendContext; namespace tensor_utils { -// Limit a float input f between +abs_limit and -abs_limit. -float PortableClip(float f, float abs_limit); - template bool PortableIsZeroVector(const T* vector, int v_size) { for (int i = 0; i < v_size; ++i) { @@ -178,11 +176,14 @@ void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, int n_input, int16_t* output); -void PortableCwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input); - -void PortableCwiseClipping(int8_t* input, const int8_t clipping_value, - int32_t n_batch, int32_t n_input); +template +void PortableCwiseClipping(T* vector, const int v_size, + const T clipping_value) { + for (int i = 0; i < v_size; i++) { + vector[i] = std::max(std::min(clipping_value, vector[i]), + static_cast(-clipping_value)); + } +} // Batch vector initialization with another vector. void PortableVectorBatchVectorAssign(const float* vector, int v_size, @@ -201,10 +202,6 @@ void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result); void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale, float* result); -// Clip elements of a vector using a abs_limit value. -void PortableClipVector(const float* vector, int v_size, float abs_limit, - float* result); - // Reduce-sum on a float input vector: // input_vector: float pointer to input vector. // output_vector: float pointer to vector. diff --git a/tensorflow/lite/kernels/internal/tensor_utils.h b/tensorflow/lite/kernels/internal/tensor_utils.h index 8c956c49f5f..716fbaa740e 100644 --- a/tensorflow/lite/kernels/internal/tensor_utils.h +++ b/tensorflow/lite/kernels/internal/tensor_utils.h @@ -406,23 +406,16 @@ void CwiseMul(const int16_t* input_1, const int16_t* input_2, void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, int n_input, int16_t* output); -// Element-wise in-place clipping of a quantized vector. -// Parameters: -// - input: batch vector of size n_batch * n_input; 16 bit. +// Element-wise in-place clipping of a vector. Overloaded for float, int16_t, +// int8_t. Parameters: +// - vector: vector of size v_size. +// - v_size: the size of the vector. // - clipping_value: the value used for clipping. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -void CwiseClipping(int16_t* input, const int16_t clipping_value, - int32_t n_batch, int32_t n_input); - -// Element-wise in-place clipping of a quantized vector. -// Parameters: -// - input: batch vector of size n_batch * n_input; 8 bit. -// - clipping_value: the value used for clipping. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch, - int32_t n_input); +void CwiseClipping(float* vector, const int v_size, const float clipping_value); +void CwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value); +void CwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value); // Cwise product of two vectors. template @@ -611,10 +604,6 @@ void Sub1Vector(const int16_t* vector, int v_size, int16_t* result); void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, float* result); -// Clip elements of a vector using a abs_limit value. -void ClipVector(const float* vector, int v_size, float abs_limit, - float* result); - // Reduce-sum on a float input vector: // input_vector: float pointer to input vector. // output_vector: float pointer to vector. diff --git a/tensorflow/lite/kernels/internal/tensor_utils_test.cc b/tensorflow/lite/kernels/internal/tensor_utils_test.cc index 878cf0d2618..825070cf510 100644 --- a/tensorflow/lite/kernels/internal/tensor_utils_test.cc +++ b/tensorflow/lite/kernels/internal/tensor_utils_test.cc @@ -37,18 +37,6 @@ TEST(uKernels, FloorLog2Test) { } } -TEST(uKernels, ClipTest) { - constexpr int kVectorSize = 10; - constexpr float kAbsLimit = 2.0; - static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, - -2.5, 3.0, -3.5, 4.0, -4.5}; - std::vector output(kVectorSize); - ClipVector(input, kVectorSize, kAbsLimit, output.data()); - EXPECT_THAT(output, - ElementsAreArray(ArrayFloatNear( - {0.0, -0.5, 1.0, -1.5, 2.0, -2.0, 2.0, -2.0, 2.0, -2.0}))); -} - TEST(uKernels, VectorScalarMultiply) { constexpr int kVectorSize = 29; static int8_t input[kVectorSize]; @@ -976,15 +964,28 @@ TEST(uKernels, QuantAddTest) { EXPECT_THAT(output, testing::ElementsAreArray(expected_output)); } +TEST(uKernels, ClipTest) { + constexpr int kVectorSize = 10; + constexpr float kAbsLimit = 2.0; + std::vector input = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + CwiseClipping(input.data(), kVectorSize, kAbsLimit); + const std::vector expected_output = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.0, 2.0, -2.0, 2.0, -2.0}; + EXPECT_THAT(input, testing::ElementsAreArray(expected_output)); +} + // Quantized clipping for 16 bit. TEST(uKernels, QuantClip16Test) { + constexpr int kVectorSize = 30; + constexpr int16_t kAbsLimit = 300; std::vector input = { -10500, 1, -2, -7404, 200, -5401, -1757, -7668, -19248, -9692, -24249, -17923, -15840, -10026, 5249, -89, 1787, -200, -6691, -19524, -13439, -24048, -1123, 32767, -17267, -3378, 823, 11482, -11139, 7508, }; - CwiseClipping(input.data(), 300, 2, 15); + CwiseClipping(input.data(), kVectorSize, kAbsLimit); const std::vector expected_output = { -300, 1, -2, -300, 200, -300, -300, -300, -300, -300, -300, -300, -300, -300, 300, -89, 300, -200, -300, -300, @@ -995,11 +996,13 @@ TEST(uKernels, QuantClip16Test) { // Quantized clipping for 8 bit. TEST(uKernels, QuantClip8Test) { + constexpr int kVectorSize = 30; + constexpr int8_t kAbsLimit = 32; std::vector input = { 4, -11, -5, -34, -10, -17, -27, -22, 15, 127, -128, 1, 3, 56, 3, -21, 1, 9, -13, 10, 0, -1, -55, -40, 127, -128, 11, 4, 6, 32, }; - CwiseClipping(input.data(), 32, 2, 15); + CwiseClipping(input.data(), kVectorSize, kAbsLimit); const std::vector expected_output = { 4, -11, -5, -32, -10, -17, -27, -22, 15, 32, -32, 1, 3, 32, 3, -21, 1, 9, -13, 10, 0, -1, -32, -32, 32, -32, 11, 4, 6, 32, diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc index ca8344d863b..3f74f3e7fff 100644 --- a/tensorflow/lite/kernels/lstm_eval.cc +++ b/tensorflow/lite/kernels/lstm_eval.cc @@ -374,8 +374,8 @@ inline void LstmStepFloat( cell_state_ptr); } if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, - params->cell_clip, cell_state_ptr); + tensor_utils::CwiseClipping(cell_state_ptr, n_batch * n_cell, + params->cell_clip); } // For each batch and cell: update the output gate. @@ -415,8 +415,8 @@ inline void LstmStepFloat( projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch, output_state_ptr); if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_state_ptr, n_batch * n_output, - params->proj_clip, output_state_ptr); + tensor_utils::CwiseClipping(output_state_ptr, n_batch * n_output, + params->proj_clip); } } else { std::copy_n(output_gate_scratch, n_batch * n_output, output_state_ptr); @@ -837,8 +837,8 @@ inline void LstmStepHybrid( cell_state_ptr); } if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, - params->cell_clip, cell_state_ptr); + tensor_utils::CwiseClipping(cell_state_ptr, n_batch * n_cell, + params->cell_clip); } // For each batch and cell: update the output gate. @@ -893,8 +893,8 @@ inline void LstmStepHybrid( scaling_factors_scratch, context); } if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_state_ptr, n_batch * n_output, - params->proj_clip, output_state_ptr); + tensor_utils::CwiseClipping(output_state_ptr, n_batch * n_output, + params->proj_clip); } } else { std::copy_n(output_gate_scratch, n_batch * n_output, output_state_ptr); @@ -1187,8 +1187,8 @@ inline void LstmStepInteger8x8_16( n_cell, cell_state_ptr); if (quantized_cell_clip > 0) { - tensor_utils::CwiseClipping(cell_state_ptr, quantized_cell_clip, n_batch, - n_cell); + tensor_utils::CwiseClipping(cell_state_ptr, n_batch * n_cell, + quantized_cell_clip); } // Ouptut gate. @@ -1234,8 +1234,8 @@ inline void LstmStepInteger8x8_16( effective_proj_scale_a, effective_proj_scale_b, n_batch, n_cell, n_output, output_state_zp, scratch5, output_ptr, context); if (quantized_proj_clip > 0) { - tensor_utils::CwiseClipping(output_ptr, quantized_proj_clip, n_batch, - n_output); + tensor_utils::CwiseClipping(output_ptr, n_batch * n_output, + quantized_proj_clip); } } else { std::copy_n(scratch4, n_batch * n_output, output_ptr); @@ -1498,8 +1498,8 @@ inline void LstmStepInteger8x8_8( tensor_utils::CwiseAdd(scratch6, scratch7, n_batch, n_cell, cell_state_ptr); if (quantized_cell_clip > 0) { - tensor_utils::CwiseClipping(cell_state_ptr, quantized_cell_clip, n_batch, - n_cell); + tensor_utils::CwiseClipping(cell_state_ptr, n_batch * n_cell, + quantized_cell_clip); } // Cell to hidden. @@ -1517,8 +1517,8 @@ inline void LstmStepInteger8x8_8( // Projection clipping. if (quantized_proj_clip > 0) { - tensor_utils::CwiseClipping(output_ptr, quantized_proj_clip, n_batch, - n_output); + tensor_utils::CwiseClipping(output_ptr, n_batch * n_output, + quantized_proj_clip); } // Copy output to output state. diff --git a/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc b/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc index 09ce81c1d97..ed1ef07d8d3 100644 --- a/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc +++ b/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc @@ -222,8 +222,8 @@ inline void LstmStepWithAuxInput( cell_state_ptr); } if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, - params->cell_clip, cell_state_ptr); + tensor_utils::CwiseClipping(cell_state_ptr, n_batch * n_cell, + params->cell_clip); } // For each batch and cell: update the output gate. @@ -268,8 +268,8 @@ inline void LstmStepWithAuxInput( projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch, output_state_ptr); if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_state_ptr, n_batch * n_output, - params->proj_clip, output_state_ptr); + tensor_utils::CwiseClipping(output_state_ptr, n_batch * n_output, + params->proj_clip); } } else { std::copy_n(output_gate_scratch, n_batch * n_output, output_state_ptr); From 465ca119b20697cd51a14297e3e81b8e6b2ecf91 Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Tue, 23 Jun 2020 12:05:35 -0700 Subject: [PATCH 24/66] Introduce a SaveContext to detect whether we are building a graph for a SavedModel. And don't use packed variables under a SaveContext. PiperOrigin-RevId: 317914296 Change-Id: I92cc6043484d642a1919cb5ab238d5e5cacc4c2a --- tensorflow/python/distribute/BUILD | 2 + .../distribute/packed_distributed_variable.py | 4 ++ .../distribute/saved_model_save_load_test.py | 14 +++++ .../distribute/saved_model_test_base.py | 17 ++++++ tensorflow/python/distribute/values.py | 23 ++++---- tensorflow/python/distribute/values_test.py | 11 ++++ tensorflow/python/saved_model/BUILD | 10 ++++ tensorflow/python/saved_model/save.py | 19 ++++++- tensorflow/python/saved_model/save_context.py | 56 +++++++++++++++++++ 9 files changed, 144 insertions(+), 12 deletions(-) create mode 100644 tensorflow/python/saved_model/save_context.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 85ee8de5635..9900040a6e6 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -751,6 +751,7 @@ py_library( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/eager:context", + "//tensorflow/python/saved_model:save_context", "//tensorflow/python/training/saving:saveable_object", "//tensorflow/python/training/saving:saveable_object_util", "//tensorflow/python/training/tracking:base", @@ -1171,6 +1172,7 @@ distribute_py_test( "//tensorflow/python/eager:context", "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:test", + "//tensorflow/python/saved_model:save_context", "//tensorflow/python/saved_model/model_utils:mode_keys", "//tensorflow/python/tpu:tpu_lib", "//tensorflow/python/types", diff --git a/tensorflow/python/distribute/packed_distributed_variable.py b/tensorflow/python/distribute/packed_distributed_variable.py index c249b8efc1c..4c9433dc164 100644 --- a/tensorflow/python/distribute/packed_distributed_variable.py +++ b/tensorflow/python/distribute/packed_distributed_variable.py @@ -108,6 +108,10 @@ class PackedDistributedVariable(resource_variable_ops.BaseResourceVariable): else: return self._handle + @property + def packed_handle(self): + return self._handle + def _read_variable_op(self): if context.executing_eagerly(): return self.get_var_on_current_device().value() diff --git a/tensorflow/python/distribute/saved_model_save_load_test.py b/tensorflow/python/distribute/saved_model_save_load_test.py index 23050a612f5..2b753c1e1c8 100644 --- a/tensorflow/python/distribute/saved_model_save_load_test.py +++ b/tensorflow/python/distribute/saved_model_save_load_test.py @@ -70,6 +70,20 @@ class SavedModelKerasModelTest(test_base.TestSavedModelBase): distribution_for_restoring, save_in_scope) + @combinations.generate( + combinations.times(test_base.simple_models_with_strategies(), + combinations.combine(save_in_scope=[True, False]))) + def test_no_variable_device_placement(self, model_and_input, distribution, + save_in_scope): + saved_dir = self.run_test_save_strategy(model_and_input, distribution, + save_in_scope) + func = saved_model.load(saved_dir) + concrete_function = func.signatures[test_base._DEFAULT_FUNCTION_KEY] + for f in concrete_function.graph.as_graph_def().library.function: + for n in f.node_def: + if n.op == 'ReadVariableOp': + self.assertEmpty(n.device) + class SavedModelTFModuleTest(test_base.TestSavedModelBase): diff --git a/tensorflow/python/distribute/saved_model_test_base.py b/tensorflow/python/distribute/saved_model_test_base.py index 70ea582baff..1fab0f2b0bd 100644 --- a/tensorflow/python/distribute/saved_model_test_base.py +++ b/tensorflow/python/distribute/saved_model_test_base.py @@ -274,3 +274,20 @@ class TestSavedModelBase(test.TestCase, parameterized.TestCase): tolerance = get_tolerance(distribution_for_saving, distribution_for_restoring) self.assertAllClose(result_before_save, load_result, atol=tolerance) + + def run_test_save_strategy(self, model_and_input, + distribution, save_in_scope): + """Save a model with DS.""" + saved_dir = os.path.join(self.get_temp_dir(), '3') + with distribution.scope(): + model = model_and_input.get_model() + x_train, y_train, _ = model_and_input.get_data() + batch_size = model_and_input.get_batch_size() + self._train_model(model, x_train, y_train, batch_size) + + if save_in_scope: + with distribution.scope(): + self._save_model(model, saved_dir) + else: + self._save_model(model, saved_dir) + return saved_dir diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 37643e03b18..35f040edc83 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -35,6 +35,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as variables_lib +from tensorflow.python.saved_model import save_context from tensorflow.python.training.saving import saveable_object from tensorflow.python.training.saving import saveable_object_util from tensorflow.python.training.tracking import base as trackable @@ -472,11 +473,10 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, # variable. self._var_policy = var_policy - @property - def _devices(self): - if self._packed_var is not None: - return tuple(d for d in self._packed_var.devices) - return tuple(v.device for v in self._values) + def _use_packed_variable(self): + # Don't use packed variable when under a SaveContext to avoid explicit + # device placement on variable consuming ops. + return self._packed_var is not None and not save_context.in_save_context() def is_initialized(self, name=None): """Identifies if all the component variables are initialized. @@ -488,7 +488,7 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, The op that evaluates to True or False depending on if all the component variables are initialized. """ - if self._packed_var is not None: + if self._use_packed_variable(): return self._packed_var.is_initialized() result = self._primary.is_initialized() # We iterate through the list of values except the last one to allow us to @@ -562,7 +562,9 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, @property def _packed_variable(self): - return self._packed_var + if self._use_packed_variable(): + return self._packed_var + return None @property def handle(self): @@ -571,7 +573,7 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, raise ValueError("`handle` is not available outside the replica context" " or a `tf.distribute.Strategy.update()` call.") else: - if self._packed_var is not None: + if self._use_packed_variable(): return self._packed_var.handle return self._values[replica_id].handle @@ -623,7 +625,7 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, def _get_replica(self, replica_id): """Returns the value on a device with the given replica_id.""" - if self._packed_var is not None: + if self._use_packed_variable(): return self._packed_var.on_device(self._devices[replica_id]) return self._values[replica_id] @@ -844,8 +846,9 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, obj_map[v] = new_obj resource_map[v.handle] = new_obj.handle obj_map[self] = new_obj - resource_map[self.handle] = new_obj.handle resource_map[self] = new_obj.handle + if self._packed_var is not None: + resource_map[self._packed_var.packed_handle] = new_obj.handle return obj_map, resource_map diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index d0e3eec22a8..69884a06814 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -55,6 +55,7 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib +from tensorflow.python.saved_model import save_context from tensorflow.python.saved_model.model_utils import mode_keys from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training import saver as saver_lib @@ -753,6 +754,16 @@ class PackedDistributedVariableTest(test.TestCase, parameterized.TestCase): self.assertEqual(val.device, devices[i]) self.assertEqual(self.evaluate(val.read_value()), i) + def testIgnorePackedVariableInSaveContext(self, distribution): + distribution._enable_packed_variable_in_eager_mode = True + with distribution.scope(): + v = variables_lib.Variable(0) + self.assertIsInstance( + v._packed_variable, packed.PackedDistributedVariable) + + with save_context.save_context(): + self.assertIsNone(v._packed_variable) + class MirroredVariableTest(test.TestCase, parameterized.TestCase): diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index 240b60f43f6..1fc6253f763 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -281,6 +281,15 @@ py_library( ], ) +py_library( + name = "save_context", + srcs = [ + "save_context.py", + ], + srcs_version = "PY2AND3", + deps = [], +) + py_library( name = "save", srcs = [ @@ -293,6 +302,7 @@ py_library( ":function_serialization", ":nested_structure_coder", ":revived_types", + ":save_context", ":save_options", ":signature_constants", ":signature_def_utils", diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py index 802ce1d61b7..84764431b9d 100644 --- a/tensorflow/python/saved_model/save.py +++ b/tensorflow/python/saved_model/save.py @@ -45,6 +45,7 @@ from tensorflow.python.saved_model import constants from tensorflow.python.saved_model import function_serialization from tensorflow.python.saved_model import nested_structure_coder from tensorflow.python.saved_model import revived_types +from tensorflow.python.saved_model import save_context from tensorflow.python.saved_model import save_options from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils @@ -985,8 +986,11 @@ def export_meta_graph(obj, filename, signatures=None, options=None): ops.dismantle_graph(exported_graph) -def _build_meta_graph(obj, export_dir, signatures, options, - meta_graph_def=None): +def _build_meta_graph_impl(obj, + export_dir, + signatures, + options, + meta_graph_def=None): """Creates a MetaGraph containing the resources and functions of an object.""" if ops.inside_function(): raise AssertionError( @@ -1044,3 +1048,14 @@ def _build_meta_graph(obj, export_dir, signatures, options, graph_debug_info.SerializeToString(deterministic=True)) return meta_graph_def, exported_graph, object_saver, asset_info + + +def _build_meta_graph(obj, + export_dir, + signatures, + options, + meta_graph_def=None): + """Creates a MetaGraph under a SaveContext.""" + with save_context.save_context(): + return _build_meta_graph_impl(obj, export_dir, signatures, options, + meta_graph_def) diff --git a/tensorflow/python/saved_model/save_context.py b/tensorflow/python/saved_model/save_context.py new file mode 100644 index 00000000000..53d92587247 --- /dev/null +++ b/tensorflow/python/saved_model/save_context.py @@ -0,0 +1,56 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Context for building SavedModel.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import threading + + +class SaveContext(threading.local): + """A context for building a graph of SavedModel.""" + + def __init__(self): + super(SaveContext, self).__init__() + self._in_save_context = False + + def enter_save_context(self): + self._in_save_context = True + + def exit_save_context(self): + self._in_save_context = False + + def in_save_context(self): + return self._in_save_context + +_save_context = SaveContext() + + +@contextlib.contextmanager +def save_context(): + _save_context.enter_save_context() + try: + yield + finally: + _save_context.exit_save_context() + + +def in_save_context(): + """Returns whether under a save context.""" + return _save_context.in_save_context() + From 68256cc9a847fcbc590ea04293c08bdd8f6498ea Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 23 Jun 2020 12:22:53 -0700 Subject: [PATCH 25/66] Add internal test for multi-tenant with the RecordingMicroInterpreter. PiperOrigin-RevId: 317918028 Change-Id: I93c006dcb3b35750bcf269d1defb0ae6e59aebe5 --- tensorflow/lite/micro/memory_arena_threshold_test.cc | 2 +- tensorflow/lite/micro/recording_micro_interpreter.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/memory_arena_threshold_test.cc b/tensorflow/lite/micro/memory_arena_threshold_test.cc index b45de85a21b..c698f2c7115 100644 --- a/tensorflow/lite/micro/memory_arena_threshold_test.cc +++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc @@ -31,7 +31,7 @@ namespace { // Ensure memory doesn't expand more that 3%: constexpr float kAllocationThreshold = 0.03; constexpr float kAllocationTailMiscCeiling = 1024; -const bool kIs64BitSystem = sizeof(void*) == 8; +const bool kIs64BitSystem = (sizeof(void*) == 8); constexpr int kKeywordModelTensorArenaSize = 22 * 1024; uint8_t keyword_model_tensor_arena[kKeywordModelTensorArenaSize]; diff --git a/tensorflow/lite/micro/recording_micro_interpreter.h b/tensorflow/lite/micro/recording_micro_interpreter.h index eb443fc6fd1..0a579b0be8e 100644 --- a/tensorflow/lite/micro/recording_micro_interpreter.h +++ b/tensorflow/lite/micro/recording_micro_interpreter.h @@ -45,6 +45,13 @@ class RecordingMicroInterpreter : public MicroInterpreter { recording_micro_allocator_( static_cast(allocator())) {} + RecordingMicroInterpreter(const Model* model, + const MicroOpResolver& op_resolver, + RecordingMicroAllocator* allocator, + ErrorReporter* error_reporter) + : MicroInterpreter(model, op_resolver, allocator, error_reporter), + recording_micro_allocator_(*allocator) {} + const RecordingMicroAllocator& GetMicroAllocator() const { return recording_micro_allocator_; } From 893635aad3dddbfaf6067918f26b3c635a292439 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 23 Jun 2020 12:29:17 -0700 Subject: [PATCH 26/66] Remove error message which does not apply since we create the datasets and element spec when we call `experimental_distributed_datasets_from_function`. PiperOrigin-RevId: 317919230 Change-Id: Ia2f9449ecee4aa7198f53fbae99a3f1ac048802a --- tensorflow/python/distribute/input_lib.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index e4a362a92c6..64089e54bfa 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -1216,12 +1216,6 @@ class DistributedDatasetsFromFunction(_IterableInput): @property def element_spec(self): """The type specification of an element of this dataset.""" - if self._element_spec is None: - raise ValueError("You must create an iterator before calling " - "`element_spec` on the distributed dataset or iterator. " - "This is because the dataset function is not called " - "before an iterator is created.") - return self._element_spec From 7d025c63c53f3066354a2d1f41abf184b359bceb Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Tue, 23 Jun 2020 12:31:29 -0700 Subject: [PATCH 27/66] Ignore other graph inputs in custom gradient Though the inputs should not be valid we ignore them to make it easier for v1 code to migrate to custom gradients. PiperOrigin-RevId: 317919685 Change-Id: Idc68fa39277cfb006e7e6c665b035a70db40b600 --- tensorflow/python/ops/custom_gradient.py | 32 +++++++++++++++++++++--- tensorflow/python/ops/gradients_test.py | 21 ++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index 5f4ee055621..ed666840436 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -336,15 +336,38 @@ def _graph_mode_decorator(f, args, kwargs): "All variables used by a function wrapped with @custom_gradient must " "be `ResourceVariable`s. Ensure that no `variable_scope` is created " "with `use_resource=False`.") + + # It is possible for the caller to pass in an input that is from a different + # graph. Even though this is not valid we filter these out if they are not + # from the output graph to make it easier for some code to migrate to custom + # gradients. + inputs = nest.flatten(args) + outputs = nest.flatten(result) + graphs = {getattr(o, "graph", None) for o in outputs} + # Not all results may be tensors. However, we want to ensure that all outputs + # are from the same graph and use that to filter the inputs. + graphs.discard(None) # Discard non-graph outputs + if graphs: + if len(graphs) > 1: + raise ValueError("All graph outputs should be from the same graph") + output_graph = graphs.pop() + filtered_inputs = [] + for i in inputs: + if i.graph != output_graph: + logging.warn("%s does not belong to output graph %s", i, output_graph) + else: + filtered_inputs.append(i) + + inputs = filtered_inputs + # The variables that grad_fn needs to return gradients for are the set of # variables used that are *not* part of the inputs. - inputs = args variables_in_tape = frozenset([ v.ref() for v in variable_watcher.watched_variables() ]) - frozenset(v.ref() for v in inputs) variables_in_subgraph = frozenset([ v.ref() - for v in get_dependent_variables(input_ops=inputs, output_ops=result) + for v in get_dependent_variables(input_ops=inputs, output_ops=outputs) ]) variables = list( [v.deref() for v in variables_in_subgraph.union(variables_in_tape)]) @@ -363,7 +386,7 @@ def _graph_mode_decorator(f, args, kwargs): flat_result = nest.flatten(result) flat_result_len = len(flat_result) - all_tensors = flat_result + args + variables + all_tensors = flat_result + inputs + variables def tape_grad_fn(*result_grads): """Custom grad fn wrapper.""" @@ -515,7 +538,8 @@ def recompute_grad(f): def transpose(*t_args, **t_kwargs): """Gradient function calculation for forward mode autodiff.""" - # Just throw an error since gradients / activations are not stored on tape for recompute. + # Just throw an error since gradients / activations are not stored on + # tape for recompute. raise NotImplementedError( "recompute_grad tried to transpose grad of {}. " "Consider not using recompute_grad in forward mode" diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index fc5f38aedba..158253d1aab 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -1197,6 +1197,27 @@ class CustomGradientTest(test_util.TensorFlowTestCase, parameterized.TestCase): dw = sess.run(math_ops.reduce_sum(grads[1])) self.assertEqual(12., dw) + def testCustomGradientOtherGraphVariables(self): + with ops.Graph().as_default(): + v = variables.Variable(1.0) + + @custom_gradient.custom_gradient + def MyMultiply(x1, x2, unused_y): + result = x1 * x2 + + def Grad(dy): + # Switched the ordering here. + return [dy * x1, dy * x2] + + return result, Grad + + with ops.Graph().as_default(): + x1 = constant(3.) + x2 = constant(5.) + y = MyMultiply(x1, x2, v) + dy = gradients.gradients(y, [x1, x2]) + self.assertAllEqual([3., 5.], self.evaluate(dy)) + def testCustomGradientWithVariablesNoFalsePositives(self): @custom_gradient.custom_gradient From 95428e83f53de7cfafd5ea10dc1ca353398e8c66 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Tue, 23 Jun 2020 12:33:36 -0700 Subject: [PATCH 28/66] Adding RevivedConstant class for Constant reloading in the SavedModelAPI C API. PiperOrigin-RevId: 317920112 Change-Id: I2dc84de102c1edc5513df319e66ee20351bdb725 --- .../c/experimental/saved_model/core/BUILD | 43 ++++ .../saved_model/core/revived_types/BUILD | 39 ++++ .../core/revived_types/constant.cc | 46 ++++ .../saved_model/core/revived_types/constant.h | 55 +++++ .../revived_types/tensorhandle_convertible.h | 49 +++++ .../saved_model/core/saved_model_utils.cc | 38 ++++ .../saved_model/core/saved_model_utils.h | 39 ++++ .../core/saved_model_utils_test.cc | 199 ++++++++++++++++++ 8 files changed, 508 insertions(+) create mode 100644 tensorflow/c/experimental/saved_model/core/revived_types/BUILD create mode 100644 tensorflow/c/experimental/saved_model/core/revived_types/constant.cc create mode 100644 tensorflow/c/experimental/saved_model/core/revived_types/constant.h create mode 100644 tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h create mode 100644 tensorflow/c/experimental/saved_model/core/saved_model_utils.cc create mode 100644 tensorflow/c/experimental/saved_model/core/saved_model_utils.h create mode 100644 tensorflow/c/experimental/saved_model/core/saved_model_utils_test.cc diff --git a/tensorflow/c/experimental/saved_model/core/BUILD b/tensorflow/c/experimental/saved_model/core/BUILD index dbe1b6d656c..bc9a5fd9442 100644 --- a/tensorflow/c/experimental/saved_model/core/BUILD +++ b/tensorflow/c/experimental/saved_model/core/BUILD @@ -3,6 +3,10 @@ # Targets in this directory are pure C++ "Classes" underlying the C API types # under tf/c/experimental/saved_model/public/. They are subject to change and # have visibility limited to Tensorflow's implementation only. +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) package( default_visibility = [ @@ -47,6 +51,22 @@ cc_library( ], ) +cc_library( + name = "saved_model_utils", + srcs = [ + "saved_model_utils.cc", + ], + hdrs = [ + "saved_model_utils.h", + ], + deps = [ + "//tensorflow/c:tf_tensor_internal", + "//tensorflow/c/eager:immediate_execution_context", + "//tensorflow/c/experimental/saved_model/core/revived_types:constant", + "//tensorflow/core:protos_all_cc", + ], +) + cc_library( name = "tf_saved_model_impl", srcs = [ @@ -84,3 +104,26 @@ filegroup( ], visibility = ["//tensorflow/core:__pkg__"], ) + +tf_cc_test( + name = "saved_model_utils_test", + srcs = [ + "saved_model_utils_test.cc", + ], + deps = [ + ":saved_model_utils", + "//tensorflow/c:tensor_interface", + "//tensorflow/c/eager:abstract_tensor_handle", + "//tensorflow/c/eager:immediate_execution_context", + "//tensorflow/c/eager:immediate_execution_tensor_handle", + "//tensorflow/c/experimental/saved_model/core/revived_types:constant", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/common_runtime:core_cpu_lib", + "//tensorflow/core/common_runtime/eager:context", + "//tensorflow/core/common_runtime/eager:core", + ], +) diff --git a/tensorflow/c/experimental/saved_model/core/revived_types/BUILD b/tensorflow/c/experimental/saved_model/core/revived_types/BUILD new file mode 100644 index 00000000000..ad3844e00a0 --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/revived_types/BUILD @@ -0,0 +1,39 @@ +# This package contains classes corresponding to Revived SavedObjectGraph types +# used by SavedModel. See https://cs.opensource.google/tensorflow/tensorflow/+/c575e2ba93c442121d98d3f125d83fed1339924d:tensorflow/core/protobuf/saved_object_graph.proto;l=56-62 +package( + default_visibility = [ + # Restricting visibility for now + "//tensorflow/c/experimental/saved_model/core:__pkg__", + ], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "constant", + srcs = [ + "constant.cc", + ], + hdrs = [ + "constant.h", + ], + deps = [ + ":tensorhandle_convertible", + "//tensorflow/c:tensor_interface", + "//tensorflow/c/eager:immediate_execution_context", + "//tensorflow/c/eager:immediate_execution_tensor_handle", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/common_runtime/eager:tensor_handle", + ], +) + +cc_library( + name = "tensorhandle_convertible", + hdrs = [ + "tensorhandle_convertible.h", + ], + deps = [ + "//tensorflow/c/eager:immediate_execution_tensor_handle", + ], +) diff --git a/tensorflow/c/experimental/saved_model/core/revived_types/constant.cc b/tensorflow/c/experimental/saved_model/core/revived_types/constant.cc new file mode 100644 index 00000000000..0cabf83a123 --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/revived_types/constant.cc @@ -0,0 +1,46 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/experimental/saved_model/core/revived_types/constant.h" + +#include + +#include "tensorflow/c/eager/immediate_execution_context.h" +#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +Constant::Constant(ImmediateTensorHandlePtr handle) + : TensorHandleConvertible(std::move(handle)) {} + +Status Constant::Create(ImmediateExecutionContext* ctx, + AbstractTensorInterface* tensor, + std::unique_ptr* output) { + ImmediateExecutionTensorHandle* handle = ctx->CreateLocalHandle(tensor); + if (handle == nullptr) { + return errors::Internal("Failed to convert tensor to tensorhandle"); + } + output->reset(new Constant(ImmediateTensorHandlePtr(handle))); + return Status(); +} + +} // namespace tensorflow diff --git a/tensorflow/c/experimental/saved_model/core/revived_types/constant.h b/tensorflow/c/experimental/saved_model/core/revived_types/constant.h new file mode 100644 index 00000000000..845a6f391c0 --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/revived_types/constant.h @@ -0,0 +1,55 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_REVIVED_CONSTANT_H_ +#define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_REVIVED_CONSTANT_H_ + +#include + +#include "tensorflow/c/eager/immediate_execution_context.h" +#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h" +#include "tensorflow/c/tensor_interface.h" +#include "tensorflow/core/framework/tensor.pb.h" + +namespace tensorflow { + +// This class corresponds to python's tf.constant, which is effectively a +// TensorHandle explicitly initialized to some value. +// For now this doesn't do much beyond wrap Context's CreateLocalHandle method, +// and offer a subclass of TensorHandleConvertible. Note that similar to +// the python's eager mode logic, we bypass calling the "Const" op: +// https://github.com/tensorflow/tensorflow/blob/1c064ab76064c58e54261b805027474885a1534d/tensorflow/python/framework/constant_op.py#L301 +class Constant : public TensorHandleConvertible { + public: + static Status Create(ImmediateExecutionContext* ctx, + AbstractTensorInterface* tensor, + std::unique_ptr* output); + + // RevivedConstant is movable, but not copyable. + Constant(Constant&& other) = default; + Constant& operator=(Constant&& other) = default; + + ~Constant() override = default; + + private: + explicit Constant(ImmediateTensorHandlePtr handle); + Constant(const Constant&) = delete; + Constant& operator=(const Constant&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_REVIVED_CONSTANT_H_ diff --git a/tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h b/tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h new file mode 100644 index 00000000000..98179586e83 --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h @@ -0,0 +1,49 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_TENSORHANDLE_CONVERTIBLE_H_ +#define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_TENSORHANDLE_CONVERTIBLE_H_ + +#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" + +namespace tensorflow { + +// A common interface for objects that can be converted to a TensorHandle. +// Examples of objects that implement this include Variables, Constants, Assets, +// etc. This is used to convert captured objects into a ConcreteFunction's +// captured TensorHandles: +// https://github.com/tensorflow/tensorflow/blob/676a68963ea4b64fe479b9cede06aa8f5b290ab8/tensorflow/python/saved_model/load.py#L229-L240 +class TensorHandleConvertible { + public: + explicit TensorHandleConvertible(ImmediateTensorHandlePtr handle) + : handle_(std::move(handle)) {} + + ImmediateExecutionTensorHandle* handle() { return handle_.get(); } + + // TensorHandleConvertible is movable, but not copyable. + TensorHandleConvertible(TensorHandleConvertible&& other) = default; + TensorHandleConvertible& operator=(TensorHandleConvertible&& other) = default; + + virtual ~TensorHandleConvertible() = default; + + protected: + TensorHandleConvertible(const TensorHandleConvertible&) = delete; + TensorHandleConvertible& operator=(const TensorHandleConvertible&) = delete; + ImmediateTensorHandlePtr handle_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_TENSORHANDLE_CONVERTIBLE_H_ diff --git a/tensorflow/c/experimental/saved_model/core/saved_model_utils.cc b/tensorflow/c/experimental/saved_model/core/saved_model_utils.cc new file mode 100644 index 00000000000..9fe9caa27d7 --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/saved_model_utils.cc @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/experimental/saved_model/core/saved_model_utils.h" + +#include "tensorflow/c/experimental/saved_model/core/revived_types/constant.h" +#include "tensorflow/c/tf_tensor_internal.h" + +namespace tensorflow { +namespace internal { + +Status TensorProtoToConstant(ImmediateExecutionContext* ctx, + const TensorProto& proto, + std::unique_ptr* output) { + tensorflow::Tensor tensor; + bool parse_result = tensor.FromProto(proto); + if (!parse_result) { + return errors::Internal("Failed to parse tensor from tensorproto"); + } + + TensorInterface tensor_interface(std::move(tensor)); + return Constant::Create(ctx, &tensor_interface, output); +} + +} // namespace internal +} // namespace tensorflow diff --git a/tensorflow/c/experimental/saved_model/core/saved_model_utils.h b/tensorflow/c/experimental/saved_model/core/saved_model_utils.h new file mode 100644 index 00000000000..5223f1c5f7d --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/saved_model_utils.h @@ -0,0 +1,39 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_SAVED_MODEL_UTILS_H_ +#define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_SAVED_MODEL_UTILS_H_ + +// Some internal utility functions for the SavedModelAPI, factored out into a +// separately unit-testable header. + +#include "tensorflow/c/eager/immediate_execution_context.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/constant.h" +#include "tensorflow/core/framework/tensor.pb.h" + +namespace tensorflow { +namespace internal { + +// Load a TensorProto into a tensorflow::Constant. This is similar to the +// constant loading logic in python: +// https://github.com/tensorflow/tensorflow/blob/516608035f85cec8b126712b0ff8407220206b22/tensorflow/python/saved_model/load.py#L437 +Status TensorProtoToConstant(ImmediateExecutionContext* ctx, + const TensorProto& proto, + std::unique_ptr* output); + +} // namespace internal +} // namespace tensorflow + +#endif // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_SAVED_MODEL_UTILS_H_ diff --git a/tensorflow/c/experimental/saved_model/core/saved_model_utils_test.cc b/tensorflow/c/experimental/saved_model/core/saved_model_utils_test.cc new file mode 100644 index 00000000000..483162574f7 --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/saved_model_utils_test.cc @@ -0,0 +1,199 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/experimental/saved_model/core/saved_model_utils.h" + +#include + +#include +#include + +#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/constant.h" +#include "tensorflow/c/tensor_interface.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/lib/bfloat16/bfloat16.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace { + +// Converts a tensorflow::DatatypeSet to std::vector. +// This is needed for GTest's ::testing::ValuesIn, since +// DataTypeSet doesn't fullfill all the constraints of an STL-like iterable. +std::vector DataTypeSetToVector(DataTypeSet set) { + std::vector result; + result.reserve(set.size()); + for (DataType dt : set) { + result.push_back(dt); + } + return result; +} + +// Returns a vector of shapes intended to be "interesting" test cases. +std::vector> InterestingShapes() { + std::vector> interesting_shapes; + interesting_shapes.push_back({}); // Scalar + interesting_shapes.push_back({10}); // 1D Vector + interesting_shapes.push_back({3, 3}); // 2D Matrix + interesting_shapes.push_back({1, 4, 6, 10}); // Higher Dimension Tensor + return interesting_shapes; +} + +// Fills a numeric tensor with `value`. +void FillNumericTensor(Tensor* tensor, int8 value) { + switch (tensor->dtype()) { +#define CASE(type) \ + case DataTypeToEnum::value: { \ + const auto& flattened = tensor->flat(); \ + for (int i = 0; i < tensor->NumElements(); ++i) { \ + flattened(i) = value; \ + } \ + break; \ + } + TF_CALL_INTEGRAL_TYPES(CASE); + TF_CALL_double(CASE); + TF_CALL_float(CASE); +#undef CASE + default: + CHECK(false) << "Unsupported data type: " + << DataTypeString(tensor->dtype()); + break; + } +} + +// Checks the underlying data is equal for the buffers for two numeric tensors. +// Note: The caller must ensure to check that the dtypes and sizes of the +// underlying buffers are the same before calling this. +void CheckBufferDataIsEqual(DataType dtype, int64 num_elements, void* a, + void* b) { + switch (dtype) { +#define CASE(type) \ + case DataTypeToEnum::value: { \ + type* typed_a = static_cast(a); \ + type* typed_b = static_cast(b); \ + for (int64 i = 0; i < num_elements; ++i) { \ + if (DataTypeIsFloating(dtype)) { \ + EXPECT_FLOAT_EQ(typed_a[i], typed_b[i]); \ + } else { \ + EXPECT_EQ(typed_a[i], typed_b[i]); \ + } \ + } \ + break; \ + } + TF_CALL_INTEGRAL_TYPES(CASE); + TF_CALL_double(CASE); + TF_CALL_float(CASE); +#undef CASE + default: + CHECK(false) << "Unsupported data type: " << DataTypeString(dtype); + } +} + +class ConstantTest : public ::testing::TestWithParam< + std::tuple, bool>> { + public: + ConstantTest() + : device_mgr_(std::make_unique(DeviceFactory::NewDevice( + "CPU", {}, "/job:localhost/replica:0/task:0"))), + ctx_(new EagerContext( + SessionOptions(), + tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, + tensorflow::ContextMirroringPolicy::MIRRORING_NONE, + /* async= */ false, + /* lazy_copy_function_remote_inputs= */ false, device_mgr_.get(), + /* device_mgr_owned= */ false, /* rendezvous= */ nullptr, + /* custom_kernel_creator= */ nullptr, + /* cluster_flr= */ nullptr)) {} + + EagerContext* context() { return ctx_.get(); } + + private: + std::unique_ptr device_mgr_; + EagerContextPtr ctx_; +}; + +// Basic sanity check that roundtripping a Tensor->Tensorproto->Constant +// preserves values. +TEST_P(ConstantTest, CreateConstantSuccessful) { + // Get test parameters + auto& test_params = GetParam(); + DataType dtype = std::get<0>(test_params); + TensorShape shape(std::get<1>(test_params)); + bool tensorproto_use_tensor_content = std::get<2>(test_params); + + // Construct a Tensor with the given dtype + shape + Tensor expected(dtype, shape); + FillNumericTensor(&expected, 42); + + // Serialize it to a Tensorproto + TensorProto proto; + if (tensorproto_use_tensor_content) { + expected.AsProtoTensorContent(&proto); + } else { + expected.AsProtoField(&proto); + } + + // Revival should succeed w/o errors + std::unique_ptr revived; + TF_EXPECT_OK(internal::TensorProtoToConstant(context(), proto, &revived)); + + // The revived tensorhandle should have the exact same dtype, shape, + + // approx equivalent data to the original. + ImmediateExecutionTensorHandle* handle = revived->handle(); + Status status; + AbstractTensorPtr revived_tensor(handle->Resolve(&status)); + TF_EXPECT_OK(status) << "Failed to convert tensorhandle to tensor"; + EXPECT_EQ(revived_tensor->Type(), expected.dtype()); + EXPECT_EQ(revived_tensor->NumElements(), expected.NumElements()); + EXPECT_EQ(revived_tensor->NumDims(), expected.dims()); + for (int i = 0; i < expected.dims(); ++i) { + EXPECT_EQ(revived_tensor->Dim(i), expected.dim_size(i)); + } + + CheckBufferDataIsEqual(expected.dtype(), expected.NumElements(), + revived_tensor->Data(), expected.data()); +} + +// Test against combinations of tensors that are +// 1. Varying dtypes +// 2. Varying shapes +// 3. TensorProto serialized using tensor_content vs repeated type +INSTANTIATE_TEST_SUITE_P( + ConstantIntegerDtypesTest, ConstantTest, + ::testing::Combine( + ::testing::ValuesIn(DataTypeSetToVector(kDataTypeIsInteger)), + ::testing::ValuesIn(InterestingShapes()), + ::testing::Values(false, true))); + +INSTANTIATE_TEST_SUITE_P( + ConstantFloatingDtypesTest, ConstantTest, + ::testing::Combine(::testing::Values(DT_FLOAT, DT_DOUBLE), + ::testing::ValuesIn(InterestingShapes()), + ::testing::Values(false, true))); + +} // namespace +} // namespace tensorflow From e8c972652ad77076faf464df4f59240a2dd1548a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 12:36:13 -0700 Subject: [PATCH 29/66] Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5 --- .../process_function_library_runtime_test.cc | 4 +-- tensorflow/core/framework/resource_mgr.h | 16 +++++------ .../core/framework/resource_op_kernel.h | 2 +- tensorflow/core/framework/variant.h | 10 +++---- .../core/framework/variant_encode_decode.h | 2 +- .../core/framework/variant_op_registry.h | 6 ++-- .../framework/variant_op_registry_test.cc | 28 ++++++++++--------- tensorflow/core/framework/variant_test.cc | 2 +- .../kernels/conditional_accumulator_op.cc | 2 +- tensorflow/core/kernels/data/dataset_utils.h | 2 +- .../experimental/threadpool_dataset_op.cc | 2 +- tensorflow/core/kernels/data/iterator_ops.cc | 2 +- .../kernels/data/multi_device_iterator_ops.cc | 2 +- tensorflow/core/kernels/ops_testutil.h | 2 +- tensorflow/core/kernels/tile_ops.cc | 2 +- tensorflow/core/platform/abi_test.cc | 4 +-- 16 files changed, 45 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc index 6e17cdf4316..9d662956504 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc @@ -764,8 +764,8 @@ Tensor GetResourceHandle(const string& var_name, const string& container, handle.set_device(device_name); handle.set_container(container); handle.set_name(var_name); - handle.set_hash_code(MakeTypeIndex().hash_code()); - handle.set_maybe_type_name(MakeTypeIndex().name()); + handle.set_hash_code(TypeIndex::Make().hash_code()); + handle.set_maybe_type_name(TypeIndex::Make().name()); Tensor tensor(DT_RESOURCE, TensorShape({})); tensor.scalar()() = handle; return tensor; diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h index b0e4eace16e..3af8d81b0dc 100644 --- a/tensorflow/core/framework/resource_mgr.h +++ b/tensorflow/core/framework/resource_mgr.h @@ -301,7 +301,7 @@ ResourceHandle MakeResourceHandle( return MakeResourceHandle( container.empty() ? ctx->resource_manager()->default_container() : container, - name, *ctx->device(), MakeTypeIndex(), dtypes_and_shapes); + name, *ctx->device(), TypeIndex::Make(), dtypes_and_shapes); } template @@ -311,7 +311,7 @@ ResourceHandle MakeResourceHandle( return MakeResourceHandle( container.empty() ? ctx->resource_manager()->default_container() : container, - name, *ctx->device(), MakeTypeIndex(), dtypes_and_shapes); + name, *ctx->device(), TypeIndex::Make(), dtypes_and_shapes); } Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index, @@ -589,7 +589,7 @@ Status ResourceMgr::Create(const string& container, const string& name, CheckDeriveFromResourceBase(); CHECK(resource != nullptr); mutex_lock l(mu_); - return DoCreate(container, MakeTypeIndex(), name, resource); + return DoCreate(container, TypeIndex::Make(), name, resource); } template @@ -635,7 +635,7 @@ template Status ResourceMgr::LookupInternal(const string& container, const string& name, T** resource) const { ResourceBase* found = nullptr; - Status s = DoLookup(container, MakeTypeIndex(), name, &found); + Status s = DoLookup(container, TypeIndex::Make(), name, &found); if (s.ok()) { // It's safe to down cast 'found' to T* since // typeid(T).hash_code() is part of the map key. @@ -660,7 +660,7 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name, s = LookupInternal(container, name, resource); if (s.ok()) return s; TF_RETURN_IF_ERROR(creator(resource)); - s = DoCreate(container, MakeTypeIndex(), name, *resource); + s = DoCreate(container, TypeIndex::Make(), name, *resource); if (!s.ok()) { return errors::Internal("LookupOrCreate failed unexpectedly"); } @@ -671,7 +671,7 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name, template Status ResourceMgr::Delete(const string& container, const string& name) { CheckDeriveFromResourceBase(); - return DoDelete(container, MakeTypeIndex(), name); + return DoDelete(container, TypeIndex::Make(), name); } template @@ -710,7 +710,7 @@ Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p); template Status ValidateDeviceAndType(OpKernelContext* ctx, const ResourceHandle& p) { TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p)); - auto type_index = MakeTypeIndex(); + auto type_index = TypeIndex::Make(); if (type_index.hash_code() != p.hash_code()) { return errors::InvalidArgument( "Trying to access resource using the wrong type. Expected ", @@ -883,7 +883,7 @@ ResourceHandle ScopedStepContainer::MakeResourceHandle( mutex_lock ml(mu_); dirty_ = true; return tensorflow::MakeResourceHandle(container_, name, device, - MakeTypeIndex(), {}); + TypeIndex::Make(), {}); } template diff --git a/tensorflow/core/framework/resource_op_kernel.h b/tensorflow/core/framework/resource_op_kernel.h index d8ee52a0e5d..4cb732ae973 100644 --- a/tensorflow/core/framework/resource_op_kernel.h +++ b/tensorflow/core/framework/resource_op_kernel.h @@ -105,7 +105,7 @@ class ResourceOpKernel : public OpKernel { if (has_resource_type_) { OP_REQUIRES_OK(context, MakeResourceHandleToOutput( context, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } else { context->set_output_ref(0, &mu_, handle_.AccessTensor(context)); } diff --git a/tensorflow/core/framework/variant.h b/tensorflow/core/framework/variant.h index 3200d7c81fa..e8a0c332968 100644 --- a/tensorflow/core/framework/variant.h +++ b/tensorflow/core/framework/variant.h @@ -144,7 +144,7 @@ void EncodeVariant(const T& value, string* buf); // Variant y_type_unknown = serialized_proto_f; // Store serialized Variant. // // EXPECT_EQ(x.TypeName(), y_type_unknown.TypeName()); // Looks like Foo. -// EXPECT_EQ(MakeTypeIndex(), +// EXPECT_EQ(TypeIndex::Make(), // y_type_unknown.TypeId()); // class Variant { @@ -227,7 +227,7 @@ class Variant { // of the original type when a TensorValueDataProto is stored as the // value. In this case, it returns the TypeIndex of TensorValueDataProto. TypeIndex TypeId() const { - const TypeIndex VoidTypeIndex = MakeTypeIndex(); + const TypeIndex VoidTypeIndex = TypeIndex::Make(); if (is_empty()) { return VoidTypeIndex; } @@ -244,7 +244,7 @@ class Variant { // otherwise. template T* get() { - const TypeIndex TTypeIndex = MakeTypeIndex(); + const TypeIndex TTypeIndex = TypeIndex::Make(); if (is_empty() || (TTypeIndex != TypeId())) return nullptr; return std::addressof(static_cast*>(GetValue())->value); } @@ -253,7 +253,7 @@ class Variant { // otherwise. template const T* get() const { - const TypeIndex TTypeIndex = MakeTypeIndex(); + const TypeIndex TTypeIndex = TypeIndex::Make(); if (is_empty() || (TTypeIndex != TypeId())) return nullptr; return std::addressof( static_cast*>(GetValue())->value); @@ -333,7 +333,7 @@ class Variant { TypeIndex TypeId() const final { const TypeIndex value_type_index = - MakeTypeIndex::type>(); + TypeIndex::Make::type>(); return value_type_index; } diff --git a/tensorflow/core/framework/variant_encode_decode.h b/tensorflow/core/framework/variant_encode_decode.h index 5e08e5a7a60..502bbd57422 100644 --- a/tensorflow/core/framework/variant_encode_decode.h +++ b/tensorflow/core/framework/variant_encode_decode.h @@ -160,7 +160,7 @@ string TypeNameVariantImpl( const T& value, TypeNameResolver) { - return port::MaybeAbiDemangle(MakeTypeIndex().name()); + return port::MaybeAbiDemangle(TypeIndex::Make().name()); } template diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index 4d94dcd35dd..5879597e5eb 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -521,7 +521,7 @@ class UnaryVariantBinaryOpRegistration { #define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(T, direction, \ device_copy_fn) \ INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, T, direction, MakeTypeIndex(), device_copy_fn) + __COUNTER__, T, direction, TypeIndex::Make(), device_copy_fn) #define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \ ctr, T, direction, type_index, device_copy_fn) \ @@ -542,7 +542,7 @@ class UnaryVariantBinaryOpRegistration { #define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(op, device, T, \ unary_op_function) \ REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, op, device, T, MakeTypeIndex(), unary_op_function) + __COUNTER__, op, device, T, TypeIndex::Make(), unary_op_function) #define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ ctr, op, device, T, type_index, unary_op_function) \ @@ -563,7 +563,7 @@ class UnaryVariantBinaryOpRegistration { #define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(op, device, T, \ binary_op_function) \ REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, op, device, T, MakeTypeIndex(), binary_op_function) + __COUNTER__, op, device, T, TypeIndex::Make(), binary_op_function) #define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \ ctr, op, device, T, type_index, binary_op_function) \ diff --git a/tensorflow/core/framework/variant_op_registry_test.cc b/tensorflow/core/framework/variant_op_registry_test.cc index 0aef6154a1f..1c45a39770c 100644 --- a/tensorflow/core/framework/variant_op_registry_test.cc +++ b/tensorflow/core/framework/variant_op_registry_test.cc @@ -155,12 +155,12 @@ TEST(VariantOpCopyToGPURegistryTest, TestBasic) { // No registered copy fn for GPU<->GPU. EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetDeviceCopyFn( VariantDeviceCopyDirection::DEVICE_TO_DEVICE, - MakeTypeIndex()), + TypeIndex::Make()), nullptr); auto* copy_to_gpu_fn = UnaryVariantOpRegistry::Global()->GetDeviceCopyFn( VariantDeviceCopyDirection::HOST_TO_DEVICE, - MakeTypeIndex()); + TypeIndex::Make()); EXPECT_NE(copy_to_gpu_fn, nullptr); VariantValue vv{true /* early_exit */}; @@ -183,7 +183,7 @@ TEST(VariantOpCopyToGPURegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::AsyncVariantDeviceCopyFn f; class FjFjFj {}; - const auto kTypeIndex = MakeTypeIndex(); + const auto kTypeIndex = TypeIndex::Make(); registry.RegisterDeviceCopyFn(VariantDeviceCopyDirection::HOST_TO_DEVICE, kTypeIndex, f); EXPECT_DEATH(registry.RegisterDeviceCopyFn( @@ -193,9 +193,10 @@ TEST(VariantOpCopyToGPURegistryTest, TestDuplicate) { TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { class Blah {}; - EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn( - ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, MakeTypeIndex()), - nullptr); + EXPECT_EQ( + UnaryVariantOpRegistry::Global()->GetUnaryOpFn( + ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, TypeIndex::Make()), + nullptr); VariantValue vv_early_exit{true /* early_exit */, 0 /* value */}; Variant v = vv_early_exit; @@ -218,9 +219,10 @@ TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST(VariantOpUnaryOpRegistryTest, TestBasicGPU) { class Blah {}; - EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn( - ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, MakeTypeIndex()), - nullptr); + EXPECT_EQ( + UnaryVariantOpRegistry::Global()->GetUnaryOpFn( + ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, TypeIndex::Make()), + nullptr); VariantValue vv_early_exit{true /* early_exit */, 0 /* value */}; Variant v = vv_early_exit; @@ -245,7 +247,7 @@ TEST(VariantOpUnaryOpRegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::VariantUnaryOpFn f; class FjFjFj {}; - const auto kTypeIndex = MakeTypeIndex(); + const auto kTypeIndex = TypeIndex::Make(); registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, kTypeIndex, f); @@ -263,7 +265,7 @@ TEST(VariantOpUnaryOpRegistryTest, TestDuplicate) { TEST(VariantOpAddRegistryTest, TestBasicCPU) { class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn( - ADD_VARIANT_BINARY_OP, DEVICE_CPU, MakeTypeIndex()), + ADD_VARIANT_BINARY_OP, DEVICE_CPU, TypeIndex::Make()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 3 /* value */}; @@ -290,7 +292,7 @@ TEST(VariantOpAddRegistryTest, TestBasicCPU) { TEST(VariantOpAddRegistryTest, TestBasicGPU) { class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn( - ADD_VARIANT_BINARY_OP, DEVICE_GPU, MakeTypeIndex()), + ADD_VARIANT_BINARY_OP, DEVICE_GPU, TypeIndex::Make()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 3 /* value */}; @@ -318,7 +320,7 @@ TEST(VariantOpAddRegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::VariantBinaryOpFn f; class FjFjFj {}; - const auto kTypeIndex = MakeTypeIndex(); + const auto kTypeIndex = TypeIndex::Make(); registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, kTypeIndex, f); EXPECT_DEATH(registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, diff --git a/tensorflow/core/framework/variant_test.cc b/tensorflow/core/framework/variant_test.cc index 3aa9743353e..5edb6efdc5e 100644 --- a/tensorflow/core/framework/variant_test.cc +++ b/tensorflow/core/framework/variant_test.cc @@ -589,7 +589,7 @@ TEST(VariantTest, TensorListTest) { serialized.ToProto(&data); const Variant y_unknown = data; EXPECT_EQ(y_unknown.TypeName(), "TensorList"); - EXPECT_EQ(y_unknown.TypeId(), MakeTypeIndex()); + EXPECT_EQ(y_unknown.TypeId(), TypeIndex::Make()); EXPECT_EQ(y_unknown.DebugString(), strings::StrCat( "Variant")); diff --git a/tensorflow/core/kernels/conditional_accumulator_op.cc b/tensorflow/core/kernels/conditional_accumulator_op.cc index 6b6feb81cfa..debe2368d28 100644 --- a/tensorflow/core/kernels/conditional_accumulator_op.cc +++ b/tensorflow/core/kernels/conditional_accumulator_op.cc @@ -90,7 +90,7 @@ class ResourceConditionalAccumulatorOp : public ConditionalAccumulatorBaseOp { h(1) = cinfo_.name(); OP_REQUIRES_OK(ctx, MakeResourceHandleToOutput( ctx, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } TF_DISALLOW_COPY_AND_ASSIGN(ResourceConditionalAccumulatorOp); diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index ac087360fd0..0127fe68641 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -35,7 +35,7 @@ Status CreateHandle(OpKernelContext* ctx, T* resource, TF_RETURN_IF_ERROR(mgr->Create(container_name, unique_name, resource)); *handle = MakeResourceHandle(container_name, unique_name, *ctx->device(), - MakeTypeIndex()); + TypeIndex::Make()); return Status::OK(); } diff --git a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc index 65252e3dbcf..a9c682a426b 100644 --- a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc @@ -111,7 +111,7 @@ class ThreadPoolHandleOp : public OpKernel { } OP_REQUIRES_OK(ctx, MakeResourceHandleToOutput( ctx, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } private: diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 8dd7f4c364b..1996e7f230e 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -443,7 +443,7 @@ void IteratorHandleOp::Compute(OpKernelContext* context) } OP_REQUIRES_OK(context, MakeResourceHandleToOutput( context, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } Status IteratorHandleOp::VerifyResource(IteratorResource* resource) { diff --git a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc index 7be03632d94..f3f67bcad07 100644 --- a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc +++ b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc @@ -475,7 +475,7 @@ class MultiDeviceIteratorHandleOp : public OpKernel { } OP_REQUIRES_OK(context, MakeResourceHandleToOutput( context, 0, container_name, unique_name, - MakeTypeIndex())); + TypeIndex::Make())); } private: diff --git a/tensorflow/core/kernels/ops_testutil.h b/tensorflow/core/kernels/ops_testutil.h index ea79a4b416b..93eee6ff350 100644 --- a/tensorflow/core/kernels/ops_testutil.h +++ b/tensorflow/core/kernels/ops_testutil.h @@ -126,7 +126,7 @@ class OpsTestBase : public ::testing::Test { std::string container_name = container.empty() ? rm->default_container() : container; EXPECT_TRUE(rm->Create(container_name, name, resource).ok()); - AddResourceInputInternal(container_name, name, MakeTypeIndex()); + AddResourceInputInternal(container_name, name, TypeIndex::Make()); } // Runs an operation producing 'num_outputs' outputs. diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc index e626d430864..f733d9b9aea 100644 --- a/tensorflow/core/kernels/tile_ops.cc +++ b/tensorflow/core/kernels/tile_ops.cc @@ -554,7 +554,7 @@ inline void TileGradientOp::HandleCase( OpKernelContext* context, const std::vector& input_dims, const gtl::ArraySlice& multiples_array, Tensor* result) { LOG(FATAL) << "TileGradientOp: Invalid combination of Device, DT and NDIM: " - << MakeTypeIndex().name() << ", " << DataTypeString(DT) + << TypeIndex::Make().name() << ", " << DataTypeString(DT) << ", " << NDIM; } diff --git a/tensorflow/core/platform/abi_test.cc b/tensorflow/core/platform/abi_test.cc index 3a01953aec2..b6f8dd5c4ec 100644 --- a/tensorflow/core/platform/abi_test.cc +++ b/tensorflow/core/platform/abi_test.cc @@ -23,14 +23,14 @@ namespace tensorflow { struct MyRandomPODType {}; TEST(AbiTest, AbiDemangleTest) { - EXPECT_EQ(port::MaybeAbiDemangle(MakeTypeIndex().name()), "int"); + EXPECT_EQ(port::MaybeAbiDemangle(TypeIndex::Make().name()), "int"); #ifdef PLATFORM_WINDOWS const char pod_type_name[] = "struct tensorflow::MyRandomPODType"; #else const char pod_type_name[] = "tensorflow::MyRandomPODType"; #endif - EXPECT_EQ(port::MaybeAbiDemangle(MakeTypeIndex().name()), + EXPECT_EQ(port::MaybeAbiDemangle(TypeIndex::Make().name()), pod_type_name); EXPECT_EQ( From f430bdcc862fc0ab534d5800bdb6f88b4957bb1e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 12:58:44 -0700 Subject: [PATCH 30/66] call cuptiFinalize for CUDA11 PiperOrigin-RevId: 317925127 Change-Id: I06670239f21e784b1d1f4cd02f450a479e35b534 --- tensorflow/core/profiler/internal/gpu/cupti_tracer.cc | 1 + tensorflow/core/profiler/internal/gpu/device_tracer.cc | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc index 931801427e7..b620b51cc99 100644 --- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc @@ -1518,6 +1518,7 @@ Status CuptiTracer::DisableActivityTracing() { Status CuptiTracer::Finalize() { if (option_->cupti_finalize) { + VLOG(1) << "CuptiFinalize"; RETURN_IF_CUPTI_ERROR(cupti_interface_->Finalize()); } return Status::OK(); diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer.cc b/tensorflow/core/profiler/internal/gpu/device_tracer.cc index 3c0ac04caf2..48391324f79 100644 --- a/tensorflow/core/profiler/internal/gpu/device_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/device_tracer.cc @@ -612,8 +612,11 @@ Status GpuTracer::DoStart() { options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_MEMCPY2); options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_OVERHEAD); +// CUDA/CUPTI 10 have issues (leaks and crashes) with CuptiFinalize. #if CUDA_VERSION < 10000 - if (!trace_concurrent_kernels) options_.cupti_finalize = true; + if (!options.trace_concurrent_kernels()) options_.cupti_finalize = true; +#elif CUDA_VERSION >= 11000 + options_.cupti_finalize = true; #endif CuptiTracerCollectorOptions collector_options; From 8984c6d97a4a8d5dd8eed3f8dd40e64664f13e45 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 23 Jun 2020 13:18:22 -0700 Subject: [PATCH 31/66] Ensure that multi-tenant allocation is properly recorded. The RecordingMicroAllocator currently keeps track of logging buckets for a single model. To help support auditing efforts using multi-tenant APIs, this class needs to keep track of the sum of allocation in all buckets. To test this use case, I allocate the same model twice in a single arena. All allocations should be bucketed for auditing. PiperOrigin-RevId: 317928536 Change-Id: I61ce3cdb8a5e45b0cb6232f19de0bdeec7b5d7b0 --- .../lite/micro/recording_micro_allocator.cc | 69 +++++++----- .../lite/micro/recording_micro_allocator.h | 22 ++-- .../micro/recording_micro_allocator_test.cc | 105 +++++++++++++++--- 3 files changed, 142 insertions(+), 54 deletions(-) diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc index 05ccdbdbfaa..e667e7db9a9 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/tensorflow/lite/micro/recording_micro_allocator.cc @@ -110,37 +110,54 @@ void RecordingMicroAllocator::PrintRecordedAllocation( TfLiteStatus RecordingMicroAllocator::AllocateTfLiteTensorArray( TfLiteContext* context, const SubGraph* subgraph) { - SnapshotAllocationUsage(recorded_tflite_tensor_array_data_); + RecordedAllocation allocations = SnapshotAllocationUsage(); TfLiteStatus status = MicroAllocator::AllocateTfLiteTensorArray(context, subgraph); - RecordAllocationUsage(recorded_tflite_tensor_array_data_); - recorded_tflite_tensor_array_data_.count = context->tensors_size; + RecordAllocationUsage(allocations, recorded_tflite_tensor_array_data_); + // The allocation for this recording will always be 1. This is because the + // parent class mallocs one large allocation for the number of tensors in the + // graph (e.g. sizeof(TfLiteTensor) * num_tensors). + // To prevent extra overhead and potential for fragmentation, manually adjust + // the accounting by decrementing by 1 and adding the actual number of tensors + // used in the graph: + recorded_tflite_tensor_array_data_.count += context->tensors_size - 1; return status; } TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer( const Model* model, TfLiteContext* context, const SubGraph* subgraph) { - SnapshotAllocationUsage(recorded_tflite_tensor_array_quantization_data_); + RecordedAllocation allocations = SnapshotAllocationUsage(); TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer( model, context, subgraph); - RecordAllocationUsage(recorded_tflite_tensor_array_quantization_data_); + RecordAllocationUsage(allocations, + recorded_tflite_tensor_array_quantization_data_); return status; } TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations( const SubGraph* subgraph, NodeAndRegistration** node_and_registrations) { - SnapshotAllocationUsage(recorded_node_and_registration_array_data_); + RecordedAllocation allocations = SnapshotAllocationUsage(); TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations( subgraph, node_and_registrations); - RecordAllocationUsage(recorded_node_and_registration_array_data_); - recorded_node_and_registration_array_data_.count = - subgraph->operators()->size(); + RecordAllocationUsage(allocations, + recorded_node_and_registration_array_data_); + // The allocation count in SimpleMemoryAllocator will only be 1. To provide + // better logging, decrement by 1 and add in the actual number of operators + // used in the graph: + // The allocation for this recording will always be 1. This is because the + // parent class mallocs one large allocation for the number of nodes in the + // graph (e.g. sizeof(NodeAndRegistration) * num_nodes). + // To prevent extra overhead and potential for fragmentation, manually adjust + // the accounting by decrementing by 1 and adding the actual number of nodes + // used in the graph: + recorded_node_and_registration_array_data_.count += + subgraph->operators()->size() - 1; return status; } @@ -149,43 +166,45 @@ RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( const Model* model, const SubGraph* subgraph, const MicroOpResolver& op_resolver, NodeAndRegistration* node_and_registrations) { - SnapshotAllocationUsage(recorded_op_data_); + RecordedAllocation allocations = SnapshotAllocationUsage(); TfLiteStatus status = MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( model, subgraph, op_resolver, node_and_registrations); - RecordAllocationUsage(recorded_op_data_); + RecordAllocationUsage(allocations, recorded_op_data_); return status; } TfLiteStatus RecordingMicroAllocator::AllocateVariables( TfLiteContext* context, const SubGraph* subgraph) { - SnapshotAllocationUsage(recorded_tflite_tensor_variable_buffer_data_); + RecordedAllocation allocations = SnapshotAllocationUsage(); TfLiteStatus status = MicroAllocator::AllocateVariables(context, subgraph); - RecordAllocationUsage(recorded_tflite_tensor_variable_buffer_data_); + RecordAllocationUsage(allocations, + recorded_tflite_tensor_variable_buffer_data_); return status; } -void RecordingMicroAllocator::SnapshotAllocationUsage( - RecordedAllocation& recorded_allocation) { - recorded_allocation.requested_bytes = - recording_memory_allocator_->GetRequestedBytes(); - recorded_allocation.used_bytes = recording_memory_allocator_->GetUsedBytes(); - recorded_allocation.count = recording_memory_allocator_->GetAllocatedCount(); +RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const { + return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(), + /*used_bytes=*/recording_memory_allocator_->GetUsedBytes(), + /*count=*/recording_memory_allocator_->GetAllocatedCount()}; } void RecordingMicroAllocator::RecordAllocationUsage( + const RecordedAllocation& snapshotted_allocation, RecordedAllocation& recorded_allocation) { - recorded_allocation.requested_bytes = + recorded_allocation.requested_bytes += recording_memory_allocator_->GetRequestedBytes() - - recorded_allocation.requested_bytes; - recorded_allocation.used_bytes = recording_memory_allocator_->GetUsedBytes() - - recorded_allocation.used_bytes; - recorded_allocation.count = recording_memory_allocator_->GetAllocatedCount() - - recorded_allocation.count; + snapshotted_allocation.requested_bytes; + recorded_allocation.used_bytes += + recording_memory_allocator_->GetUsedBytes() - + snapshotted_allocation.used_bytes; + recorded_allocation.count += + recording_memory_allocator_->GetAllocatedCount() - + snapshotted_allocation.count; } } // namespace tflite diff --git a/tensorflow/lite/micro/recording_micro_allocator.h b/tensorflow/lite/micro/recording_micro_allocator.h index b30b045cc34..a5b97c7ef3a 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.h +++ b/tensorflow/lite/micro/recording_micro_allocator.h @@ -36,12 +36,11 @@ enum class RecordedAllocationType { // type. Each recording contains the number of bytes requested, the actual bytes // allocated (can defer from requested by alignment), and the number of items // allocated. -typedef struct RecordedAllocation { - RecordedAllocation() : requested_bytes(0), used_bytes(0), count(0) {} +struct RecordedAllocation { size_t requested_bytes; size_t used_bytes; size_t count; -} RecordedAllocation; +}; // Utility subclass of MicroAllocator that records all allocations // inside the arena. A summary of allocations can be logged through the @@ -82,9 +81,6 @@ class RecordingMicroAllocator : public MicroAllocator { TfLiteStatus AllocateVariables(TfLiteContext* context, const SubGraph* subgraph) override; - void SnapshotAllocationUsage(RecordedAllocation& recorded_allocation); - void RecordAllocationUsage(RecordedAllocation& recorded_allocation); - private: RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator, ErrorReporter* error_reporter); @@ -93,13 +89,17 @@ class RecordingMicroAllocator : public MicroAllocator { const char* allocation_name, const char* allocation_description) const; + RecordedAllocation SnapshotAllocationUsage() const; + void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation, + RecordedAllocation& recorded_allocation); + const RecordingSimpleMemoryAllocator* recording_memory_allocator_; - RecordedAllocation recorded_tflite_tensor_array_data_; - RecordedAllocation recorded_tflite_tensor_array_quantization_data_; - RecordedAllocation recorded_tflite_tensor_variable_buffer_data_; - RecordedAllocation recorded_node_and_registration_array_data_; - RecordedAllocation recorded_op_data_; + RecordedAllocation recorded_tflite_tensor_array_data_ = {}; + RecordedAllocation recorded_tflite_tensor_array_quantization_data_ = {}; + RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {}; + RecordedAllocation recorded_node_and_registration_array_data_ = {}; + RecordedAllocation recorded_op_data_ = {}; TF_LITE_REMOVE_VIRTUAL_DELETE }; diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc index 775a2de2dfd..8b8eaa20638 100644 --- a/tensorflow/lite/micro/recording_micro_allocator_test.cc +++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc @@ -43,12 +43,20 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) { tflite::RecordingMicroAllocator* micro_allocator = tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); - TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_GE( - kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context)); + // TODO(b/158102673): ugly workaround for not having fatal assertions. Same + // throughout this file. + TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr); + if (micro_allocator == nullptr) return 1; + + TfLiteStatus status; + status = micro_allocator->StartModelAllocation( + model, &context, all_ops_resolver, &node_and_registration); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; + + status = micro_allocator->FinishModelAllocation(model, &context); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; tflite::RecordedAllocation recorded_allocation = micro_allocator->GetRecordedAllocation( @@ -70,12 +78,18 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) { tflite::RecordingMicroAllocator* micro_allocator = tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); - TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_GE( - kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context)); + TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr); + if (micro_allocator == nullptr) return 1; + + TfLiteStatus status; + status = micro_allocator->StartModelAllocation( + model, &context, all_ops_resolver, &node_and_registration); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; + + status = micro_allocator->FinishModelAllocation(model, &context); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; // Walk the model subgraph to find all tensors with quantization params and // keep a tally. @@ -124,12 +138,18 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) { tflite::RecordingMicroAllocator* micro_allocator = tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); - TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_GE( - kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context)); + TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr); + if (micro_allocator == nullptr) return 1; + + TfLiteStatus status; + status = micro_allocator->StartModelAllocation( + model, &context, all_ops_resolver, &node_and_registration); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; + + status = micro_allocator->FinishModelAllocation(model, &context); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; size_t num_ops = model->subgraphs()->Get(0)->operators()->size(); tflite::RecordedAllocation recorded_allocation = @@ -142,6 +162,55 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) { num_ops * NODE_AND_REGISTRATION_STRUCT_SIZE); } +TF_LITE_MICRO_TEST(TestRecordsMultiTenantAllocations) { + TfLiteContext context; + tflite::AllOpsResolver all_ops_resolver; + tflite::NodeAndRegistration* node_and_registration; + const tflite::Model* model = tflite::GetModel(kTestConvModelData); + + // Double the arena size to allocate two models inside of it: + uint8_t arena[kTestConvArenaSize * 2]; + + TfLiteStatus status; + + tflite::RecordingMicroAllocator* micro_allocator = + tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize * 2, + micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr); + if (micro_allocator == nullptr) return 1; + + // First allocation with the model in the arena: + status = micro_allocator->StartModelAllocation( + model, &context, all_ops_resolver, &node_and_registration); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; + + status = micro_allocator->FinishModelAllocation(model, &context); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; + + // Second allocation with the same model in the arena: + status = micro_allocator->StartModelAllocation( + model, &context, all_ops_resolver, &node_and_registration); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; + + status = kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + if (status != kTfLiteOk) return 1; + + tflite::RecordedAllocation recorded_allocation = + micro_allocator->GetRecordedAllocation( + tflite::RecordedAllocationType::kTfLiteTensorArray); + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, context.tensors_size * 2); + TF_LITE_MICRO_EXPECT_EQ( + recorded_allocation.requested_bytes, + context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE * 2); + TF_LITE_MICRO_EXPECT_GE( + recorded_allocation.used_bytes, + context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE * 2); +} + // TODO(b/158124094): Find a way to audit OpData allocations on // cross-architectures. From 89a5efab7d3fa63c6d2a34c78ef0914112fc8e7a Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 23 Jun 2020 13:44:38 -0700 Subject: [PATCH 32/66] Disable failing //tensorflow/core/kernels:conv_ops_test_gpu on CUDA11. PiperOrigin-RevId: 317932591 Change-Id: I70cbb7489ef8125d6cbe4b6f3e18229913c2c7c9 --- tensorflow/core/kernels/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index e2ff5aed283..1e05ee90ff8 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1762,6 +1762,7 @@ tf_cuda_cc_test( name = "conv_ops_test", size = "medium", srcs = ["conv_ops_test.cc"], + tags = ["no_cuda11"], # b/159664089 deps = [ ":conv_ops", ":image", From 77644192c476d39f2d8a31eb5dc12ec591d2a5c9 Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Tue, 23 Jun 2020 13:52:13 -0700 Subject: [PATCH 33/66] Add unit test for Prelu in XNNPACK delegate - Add PreluTester class and unit test for XNNPACK-delegated Prelu operator - Relax restrictions on the number of input/output dimensions in delegated Prelu operators PiperOrigin-RevId: 317933686 Change-Id: Ie7bac6c8d6bd358ef8b5d79042d6ae1af07e1c49 --- tensorflow/lite/delegates/xnnpack/BUILD | 32 + .../delegates/xnnpack/leaky_relu_tester.h | 1 - .../lite/delegates/xnnpack/prelu_test.cc | 583 ++++++++++++++++++ .../lite/delegates/xnnpack/prelu_tester.cc | 237 +++++++ .../lite/delegates/xnnpack/prelu_tester.h | 88 +++ .../delegates/xnnpack/xnnpack_delegate.cc | 6 +- 6 files changed, 944 insertions(+), 3 deletions(-) create mode 100644 tensorflow/lite/delegates/xnnpack/prelu_test.cc create mode 100644 tensorflow/lite/delegates/xnnpack/prelu_tester.cc create mode 100644 tensorflow/lite/delegates/xnnpack/prelu_tester.h diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD index eaf7d8f6f03..e0d3d39f719 100644 --- a/tensorflow/lite/delegates/xnnpack/BUILD +++ b/tensorflow/lite/delegates/xnnpack/BUILD @@ -180,6 +180,23 @@ cc_library( ], ) +cc_library( + name = "prelu_tester", + testonly = 1, + srcs = ["prelu_tester.cc"], + hdrs = ["prelu_tester.h"], + deps = [ + "//tensorflow/lite:framework", + "//tensorflow/lite:schema_fbs_version", + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels:builtin_ops", + "//tensorflow/lite/schema:schema_fbs", + "@FP16", + "@com_google_googletest//:gtest", + "@flatbuffers", + ], +) + cc_library( name = "reduce_tester", testonly = 1, @@ -527,6 +544,21 @@ cc_test( ], ) +cc_test( + name = "prelu_test", + srcs = ["prelu_test.cc"], + linkopts = select({ + "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS, + "//conditions:default": [], + }), + deps = [ + ":prelu_tester", + ":test_main", + ":xnnpack_delegate_test_mode", + "@com_google_googletest//:gtest", + ], +) + cc_test( name = "relu_test", srcs = ["relu_test.cc"], diff --git a/tensorflow/lite/delegates/xnnpack/leaky_relu_tester.h b/tensorflow/lite/delegates/xnnpack/leaky_relu_tester.h index f1d9efd7209..191dc938e89 100644 --- a/tensorflow/lite/delegates/xnnpack/leaky_relu_tester.h +++ b/tensorflow/lite/delegates/xnnpack/leaky_relu_tester.h @@ -21,7 +21,6 @@ limitations under the License. #include #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/schema/schema_generated.h" namespace tflite { namespace xnnpack { diff --git a/tensorflow/lite/delegates/xnnpack/prelu_test.cc b/tensorflow/lite/delegates/xnnpack/prelu_test.cc new file mode 100644 index 00000000000..10026915add --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/prelu_test.cc @@ -0,0 +1,583 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include +#include "tensorflow/lite/delegates/xnnpack/prelu_tester.h" +#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" + +namespace tflite { +namespace xnnpack { + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy4D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({batch, height, width, channels}) + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, 4DBy4DBroadcastChannels) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({1, 1, 1, channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy4DBroadcastWidth) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({1, 1, width, 1}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy4DBroadcastHeight) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({1, height, 1, 1}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy4DBroadcastBatch) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({batch, 1, 1, 1}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy4DBroadcastHeightWidthChannels) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({1, height, width, channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy3D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({height, width, channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy2D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({width, channels}) + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, 4DBy1D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_4DBy0D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_3DBy3D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({batch, width, channels}) + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, 3DBy3DBroadcastChannels) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({1, 1, channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_3DBy3DBroadcastWidth) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({1, width, 1}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_3DBy3DBroadcastBatch) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({batch, 1, 1}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_3DBy3DBroadcastWidthChannels) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({1, width, channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_3DBy2D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({width, channels}) + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, 3DBy1D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_3DBy0D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, width, channels}) + .SlopeShape({}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_2DBy2D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, channels}) + .SlopeShape({batch, channels}) + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, 2DBy2DBroadcastChannels) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, channels}) + .SlopeShape({1, channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_2DBy2DBroadcastBatch) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, channels}) + .SlopeShape({batch, 1}) + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, 2DBy1D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, channels}) + .SlopeShape({channels}) + .Test(xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_2DBy0D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, channels}) + .SlopeShape({}) + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, 1DBy1D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + + PreluTester().InputShape({batch}).SlopeShape({batch}).Test( + xnnpack_delegate.get()); +} + +// TODO(b/159727692) +TEST(Prelu, DISABLED_1DBy0D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + + PreluTester().InputShape({batch}).SlopeShape({}).Test(xnnpack_delegate.get()); +} + +TEST(Prelu, FP16Weights) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({channels}) + .FP16Weights() + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, SparseWeights) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({channels}) + .SparseWeights() + .Test(xnnpack_delegate.get()); +} + +TEST(Prelu, MultiThreading) { + TfLiteXNNPackDelegateOptions delegate_options = + TfLiteXNNPackDelegateOptionsDefault(); + delegate_options.num_threads = 2; + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto channels = shape_rng(); + + PreluTester() + .InputShape({batch, height, width, channels}) + .SlopeShape({channels}) + .Test(xnnpack_delegate.get()); +} + +} // namespace xnnpack +} // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/prelu_tester.cc b/tensorflow/lite/delegates/xnnpack/prelu_tester.cc new file mode 100644 index 00000000000..ab20c2c51dc --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/prelu_tester.cc @@ -0,0 +1,237 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/xnnpack/prelu_tester.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "flatbuffers/flatbuffers.h" // from @flatbuffers +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/model.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/version.h" + +namespace tflite { +namespace xnnpack { + +void PreluTester::Test(TfLiteDelegate* delegate) const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto input_rng = std::bind(std::uniform_real_distribution(-1.0f, 1.0f), + std::ref(rng)); + + std::vector buffer = CreateTfLiteModel(); + const Model* model = GetModel(buffer.data()); + + std::unique_ptr delegate_interpreter; + ASSERT_EQ( + InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())( + &delegate_interpreter), + kTfLiteOk); + std::unique_ptr default_interpreter; + ASSERT_EQ( + InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())( + &default_interpreter), + kTfLiteOk); + + ASSERT_TRUE(delegate_interpreter); + ASSERT_TRUE(default_interpreter); + + ASSERT_EQ(delegate_interpreter->inputs().size(), 1); + ASSERT_EQ(default_interpreter->inputs().size(), 1); + + ASSERT_EQ(delegate_interpreter->outputs().size(), 1); + ASSERT_EQ(default_interpreter->outputs().size(), 1); + + ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk); + + ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk); + + float* default_input_data = default_interpreter->typed_tensor( + default_interpreter->inputs()[0]); + std::generate(default_input_data, + default_input_data + ComputeSize(InputShape()), + std::ref(input_rng)); + + float* xnnpack_input_data = delegate_interpreter->typed_tensor( + delegate_interpreter->inputs()[0]); + std::copy(default_input_data, default_input_data + ComputeSize(InputShape()), + xnnpack_input_data); + + ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk); + ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk); + + float* default_output_data = default_interpreter->typed_tensor( + default_interpreter->outputs()[0]); + float* xnnpack_output_data = delegate_interpreter->typed_tensor( + delegate_interpreter->outputs()[0]); + + for (size_t i = 0; i < ComputeSize(OutputShape()); i++) { + ASSERT_EQ(default_output_data[i], xnnpack_output_data[i]); + } +} + +std::vector PreluTester::CreateTfLiteModel() const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto slope_rng = std::bind(std::uniform_real_distribution(0.25f, 0.5f), + std::ref(rng)); + + flatbuffers::FlatBufferBuilder builder; + std::vector> operator_codes{ + {CreateOperatorCode(builder, BuiltinOperator_PRELU)}}; + if (FP16Weights()) { + operator_codes.emplace_back( + CreateOperatorCode(builder, BuiltinOperator_DEQUANTIZE)); + } else if (SparseWeights()) { + operator_codes.emplace_back( + CreateOperatorCode(builder, BuiltinOperator_DENSIFY)); + } + + std::vector> buffers{{ + CreateBuffer(builder, builder.CreateVector({})), + }}; + + if (FP16Weights()) { + std::vector slope_data(ComputeSize(SlopeShape())); + std::generate(slope_data.begin(), slope_data.end(), + std::bind(fp16_ieee_from_fp32_value, slope_rng)); + + buffers.push_back(CreateBuffer( + builder, builder.CreateVector( + reinterpret_cast(slope_data.data()), + sizeof(uint16_t) * slope_data.size()))); + } else { + std::vector slope_data(ComputeSize(SlopeShape())); + std::generate(slope_data.begin(), slope_data.end(), slope_rng); + + buffers.push_back(CreateBuffer( + builder, builder.CreateVector( + reinterpret_cast(slope_data.data()), + sizeof(float) * slope_data.size()))); + } + + std::vector> tensors; + std::vector> operators; + if (FP16Weights()) { + tensors.emplace_back(CreateTensor( + builder, + builder.CreateVector(SlopeShape().data(), SlopeShape().size()), + TensorType_FLOAT16, /*buffer=*/1)); + } else if (SparseWeights()) { + const int dims_count = SlopeShape().size(); + std::vector> dim_metadata( + dims_count); + std::vector traversal_order(dims_count); + for (int i = 0; i < dims_count; i++) { + traversal_order[i] = i; + dim_metadata[i] = CreateDimensionMetadata(builder, DimensionType_DENSE, + SlopeShape()[i]); + } + const flatbuffers::Offset sparsity_param = + CreateSparsityParameters(builder, builder.CreateVector(traversal_order), + 0, builder.CreateVector(dim_metadata)); + tensors.emplace_back(CreateTensor( + builder, + builder.CreateVector(SlopeShape().data(), SlopeShape().size()), + TensorType_FLOAT32, /*buffer=*/1, /*name=*/0, /*quantization=*/0, + /*is_variable=*/false, /*sparsity=*/sparsity_param)); + } + if (FP16Weights()) { + const std::array dequantize_inputs{{0}}; + const std::array dequantize_outputs{{2}}; + operators.emplace_back(CreateOperator( + builder, /*opcode_index=*/1, + builder.CreateVector(dequantize_inputs.data(), + dequantize_inputs.size()), + builder.CreateVector(dequantize_outputs.data(), + dequantize_outputs.size()))); + } else if (SparseWeights()) { + const std::array densify_inputs{{0}}; + const std::array densify_outputs{{2}}; + operators.emplace_back( + CreateOperator(builder, /*opcode_index=*/1, + builder.CreateVector(densify_inputs.data(), + densify_inputs.size()), + builder.CreateVector(densify_outputs.data(), + densify_outputs.size()))); + } + tensors.emplace_back(CreateTensor( + builder, + builder.CreateVector(InputShape().data(), InputShape().size()), + TensorType_FLOAT32)); + tensors.emplace_back(CreateTensor( + builder, + builder.CreateVector(SlopeShape().data(), SlopeShape().size()), + TensorType_FLOAT32, + /*buffer=*/(FP16Weights() || SparseWeights()) ? 0 : 1)); + tensors.emplace_back(CreateTensor( + builder, + builder.CreateVector(OutputShape().data(), OutputShape().size()), + TensorType_FLOAT32)); + + const std::array op_inputs{ + {static_cast(tensors.size()) - 3, + static_cast(tensors.size()) - 2}}; + const std::array op_outputs{ + {static_cast(tensors.size()) - 1}}; + operators.emplace_back(CreateOperator( + builder, /*opcode_index=*/0, + builder.CreateVector(op_inputs.data(), op_inputs.size()), + builder.CreateVector(op_outputs.data(), op_outputs.size()))); + + const std::array subgraph_inputs{ + {static_cast(tensors.size() - 3)}}; + const std::array subgraph_outputs{ + {static_cast(tensors.size()) - 1}}; + flatbuffers::Offset subgraph = CreateSubGraph( + builder, builder.CreateVector(tensors.data(), tensors.size()), + builder.CreateVector(subgraph_inputs.data(), + subgraph_inputs.size()), + builder.CreateVector(subgraph_outputs.data(), + subgraph_outputs.size()), + builder.CreateVector(operators.data(), operators.size())); + + flatbuffers::Offset description = + builder.CreateString("PReLU model"); + + flatbuffers::Offset model_buffer = CreateModel( + builder, TFLITE_SCHEMA_VERSION, + builder.CreateVector(operator_codes.data(), operator_codes.size()), + builder.CreateVector(&subgraph, 1), description, + builder.CreateVector(buffers.data(), buffers.size())); + + builder.Finish(model_buffer); + + return std::vector(builder.GetBufferPointer(), + builder.GetBufferPointer() + builder.GetSize()); +} + +int32_t PreluTester::ComputeSize(const std::vector& shape) { + return std::accumulate(shape.cbegin(), shape.cend(), 1, + std::multiplies()); +} + +} // namespace xnnpack +} // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/prelu_tester.h b/tensorflow/lite/delegates/xnnpack/prelu_tester.h new file mode 100644 index 00000000000..e89bae6029b --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/prelu_tester.h @@ -0,0 +1,88 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_PRELU_TESTER_H_ +#define TENSORFLOW_LITE_DELEGATES_XNNPACK_PRELU_TESTER_H_ + +#include +#include + +#include +#include "tensorflow/lite/c/common.h" + +namespace tflite { +namespace xnnpack { + +class PreluTester { + public: + PreluTester() = default; + PreluTester(const PreluTester&) = delete; + PreluTester& operator=(const PreluTester&) = delete; + + inline PreluTester& InputShape(std::initializer_list shape) { + for (auto it = shape.begin(); it != shape.end(); ++it) { + EXPECT_GT(*it, 0); + } + input_shape_ = std::vector(shape.begin(), shape.end()); + return *this; + } + + inline const std::vector& InputShape() const { return input_shape_; } + + inline PreluTester& SlopeShape(std::initializer_list shape) { + for (auto it = shape.begin(); it != shape.end(); ++it) { + EXPECT_GT(*it, 0); + } + slope_shape_ = std::vector(shape.begin(), shape.end()); + return *this; + } + + inline const std::vector& SlopeShape() const { return slope_shape_; } + + inline const std::vector& OutputShape() const { + return InputShape(); + } + + inline PreluTester& FP16Weights() { + fp16_weights_ = true; + return *this; + } + + inline bool FP16Weights() const { return fp16_weights_; } + + inline PreluTester& SparseWeights() { + sparse_weights_ = true; + return *this; + } + + inline bool SparseWeights() const { return sparse_weights_; } + + void Test(TfLiteDelegate* delegate) const; + + private: + std::vector CreateTfLiteModel() const; + + static int32_t ComputeSize(const std::vector& shape); + + std::vector input_shape_; + std::vector slope_shape_; + bool fp16_weights_ = false; + bool sparse_weights_ = false; +}; + +} // namespace xnnpack +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_PRELU_TESTER_H_ diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc index 0afc9c32122..31468ef7407 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc @@ -2266,7 +2266,8 @@ class Subgraph { const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]]; TF_LITE_ENSURE_STATUS(CheckTensorFloatType( logging_context, input_tensor, node->inputs->data[0], node_index)); - TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_tensor, 4, + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_tensor, 1, + XNN_MAX_TENSOR_DIMS, node->inputs->data[0])); TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation( logging_context, input_tensor, node->inputs->data[0], node_index)); @@ -2284,7 +2285,8 @@ class Subgraph { const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]]; TF_LITE_ENSURE_STATUS(CheckTensorFloatType( logging_context, output_tensor, node->outputs->data[0], node_index)); - TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 4, + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 1, + XNN_MAX_TENSOR_DIMS, node->outputs->data[0])); TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation( logging_context, output_tensor, node->outputs->data[0], node_index)); From 68fe003b85997020d7d0d9bac03400230ba4bf63 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 23 Jun 2020 13:59:05 -0700 Subject: [PATCH 34/66] Move the keras related CTL training test to keras/distribute. PiperOrigin-RevId: 317934704 Change-Id: Iafccd476f87c2ff04c8fbbe9910239ac6d2b7cf3 --- tensorflow/python/distribute/BUILD | 22 ---------------- tensorflow/python/keras/distribute/BUILD | 26 +++++++++++++++++++ .../custom_training_loop_models_test.py | 0 3 files changed, 26 insertions(+), 22 deletions(-) rename tensorflow/python/{ => keras}/distribute/custom_training_loop_models_test.py (100%) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 9900040a6e6..d2c46f64f18 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1276,28 +1276,6 @@ distribute_py_test( ], ) -distribute_py_test( - name = "custom_training_loop_models_test", - srcs = ["custom_training_loop_models_test.py"], - main = "custom_training_loop_models_test.py", - tags = [ - "multi_and_single_gpu", - ], - tpu_tags = [ - "no_oss", # b/153615544. - ], - deps = [ - ":combinations", - ":strategy_combinations", - "//tensorflow/python:errors", - "//tensorflow/python:variables", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/eager:test", - "//tensorflow/python/keras", - "@absl_py//absl/testing:parameterized", - ], -) - distribute_py_test( name = "custom_training_loop_optimizer_test", srcs = ["custom_training_loop_optimizer_test.py"], diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index c6a8f2c5f91..4245d70b1f0 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -77,6 +77,32 @@ cuda_py_test( ], ) +distribute_py_test( + name = "custom_training_loop_models_test", + srcs = ["custom_training_loop_models_test.py"], + main = "custom_training_loop_models_test.py", + tags = [ + "multi_and_single_gpu", + ], + tpu_tags = [ + "no_oss", # b/153615544. + ], + deps = [ + "//tensorflow/python:math_ops", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/distribute:combinations", + "//tensorflow/python/distribute:reduce_util", + "//tensorflow/python/distribute:strategy_combinations", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:def_function", + "//tensorflow/python/eager:test", + "//tensorflow/python/keras", + "//tensorflow/python/module", + "@absl_py//absl/testing:parameterized", + ], +) + py_library( name = "distribute_strategy_test_lib", srcs = [ diff --git a/tensorflow/python/distribute/custom_training_loop_models_test.py b/tensorflow/python/keras/distribute/custom_training_loop_models_test.py similarity index 100% rename from tensorflow/python/distribute/custom_training_loop_models_test.py rename to tensorflow/python/keras/distribute/custom_training_loop_models_test.py From cc9d951afa75936d533d6a011ad29d1728e3cc3a Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Tue, 23 Jun 2020 14:03:51 -0700 Subject: [PATCH 35/66] Fix a TPU test failure. PiperOrigin-RevId: 317935580 Change-Id: Id0d06446f4d7159b375879ea97f251d10ee80195 --- tensorflow/python/keras/layers/preprocessing/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD index 6916712d52c..9adf97d1fa5 100644 --- a/tensorflow/python/keras/layers/preprocessing/BUILD +++ b/tensorflow/python/keras/layers/preprocessing/BUILD @@ -369,6 +369,7 @@ distribute_py_test( tags = [ "multi_and_single_gpu", ], + tpu_tags = ["no_oss"], deps = [ ":category_crossing", "//tensorflow/python/distribute:combinations", From b88bebf1ed553f282c960ce3c4ffec7a3d35dcf5 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Tue, 23 Jun 2020 14:19:01 -0700 Subject: [PATCH 36/66] Add check for control flow v1 in converter. If found any control flow v1 in the graph after importing then an error message will show and failure is signaled. PiperOrigin-RevId: 317938209 Change-Id: I58e14a25dad2f2337d8ad05de55aff757dfcd0b2 --- .../compiler/mlir/lite/tests/end2end/BUILD | 1 + .../lite/tests/end2end/control_flow_v1.pbtxt | 257 ++++++++++++++++++ .../compiler/mlir/lite/tf_tfl_translate.cc | 2 +- .../mlir/lite/tf_to_tfl_flatbuffer.cc | 41 ++- .../compiler/mlir/lite/tf_to_tfl_flatbuffer.h | 3 +- 5 files changed, 297 insertions(+), 7 deletions(-) create mode 100644 tensorflow/compiler/mlir/lite/tests/end2end/control_flow_v1.pbtxt diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/BUILD b/tensorflow/compiler/mlir/lite/tests/end2end/BUILD index cf584987d2d..25bd761f99e 100644 --- a/tensorflow/compiler/mlir/lite/tests/end2end/BUILD +++ b/tensorflow/compiler/mlir/lite/tests/end2end/BUILD @@ -26,6 +26,7 @@ filegroup( "//tensorflow/compiler/mlir/lite:flatbuffer_to_string", "//tensorflow/compiler/mlir/lite:tf_tfl_translate", "@llvm-project//llvm:FileCheck", + "@llvm-project//llvm:not", ], ) diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/control_flow_v1.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/control_flow_v1.pbtxt new file mode 100644 index 00000000000..7b3a4d14fea --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/end2end/control_flow_v1.pbtxt @@ -0,0 +1,257 @@ +# RUN: not tf_tfl_translate -tf-upgrade-legacy=false -tf-input-arrays=Placeholder,Placeholder_1 -tf-input-shapes=1,2:1 -tf-output-arrays=cond/Merge -tf-enable-shape-inference-on-import=false -mlir-print-debuginfo -output-mlir %s -o - 2>&1 | FileCheck %s + +# CHECK: error: The graph has Control Flow V1 ops. TFLite converter doesn't support Control Flow V1 ops. Consider using Control Flow V2 ops instead. + +node { + name: "Const" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + dim { + size: 2 + } + } + tensor_content: "\315\314\314=\315\314L>\232\231\231>\315\314\314>" + } + } + } +} +node { + name: "Placeholder" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: -1 + } + dim { + size: 2 + } + } + } + } +} +node { + name: "Placeholder_1" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "shape" + value { + shape { + } + } + } +} +node { + name: "cond/Switch" + op: "Switch" + input: "Placeholder_1" + input: "Placeholder_1" + attr { + key: "T" + value { + type: DT_BOOL + } + } +} +node { + name: "cond/switch_t" + op: "Identity" + input: "cond/Switch:1" + attr { + key: "T" + value { + type: DT_BOOL + } + } +} +node { + name: "cond/switch_f" + op: "Identity" + input: "cond/Switch" + attr { + key: "T" + value { + type: DT_BOOL + } + } +} +node { + name: "cond/pred_id" + op: "Identity" + input: "Placeholder_1" + attr { + key: "T" + value { + type: DT_BOOL + } + } +} +node { + name: "cond/MatMul" + op: "MatMul" + input: "cond/MatMul/Switch:1" + input: "cond/MatMul/Switch_1:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "cond/MatMul/Switch" + op: "Switch" + input: "Placeholder" + input: "cond/pred_id" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Placeholder" + } + } + } +} +node { + name: "cond/MatMul/Switch_1" + op: "Switch" + input: "Const" + input: "cond/pred_id" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Const" + } + } + } +} +node { + name: "cond/Add" + op: "Add" + input: "cond/Add/Switch" + input: "cond/Add/Switch_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } +} +node { + name: "cond/Add/Switch" + op: "Switch" + input: "Placeholder" + input: "cond/pred_id" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Placeholder" + } + } + } +} +node { + name: "cond/Add/Switch_1" + op: "Switch" + input: "Const" + input: "cond/pred_id" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Const" + } + } + } +} +node { + name: "cond/Merge" + op: "Merge" + input: "cond/Add" + input: "cond/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } +} +node { + name: "init" + op: "NoOp" +} +versions { + producer: 134 +} diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc index 31dad60c294..fcaebe82f74 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc @@ -172,7 +172,7 @@ int main(int argc, char **argv) { input_file_name, input_mlir, use_splatted_constant, custom_opdefs, debug_info_file, input_arrays, input_dtypes, input_shapes, output_arrays, - /*prune_unused_nodes=*/true, &source_mgr, &context); + /*prune_unused_nodes=*/true, upgrade_legacy, &source_mgr, &context); } // If errors occur, the library call in the above already logged the error diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc index 38b96cf833f..2e45953c5fa 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc @@ -21,6 +21,7 @@ limitations under the License. #include "llvm/Support/raw_ostream.h" #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Parser.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Support/FileUtilities.h" // from @llvm-project @@ -28,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/flatbuffer_export.h" #include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h" #include "tensorflow/compiler/mlir/lite/transforms/passes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h" @@ -39,19 +41,47 @@ limitations under the License. #include "tensorflow/stream_executor/lib/statusor.h" namespace tensorflow { - +namespace { using mlir::MLIRContext; using mlir::ModuleOp; +using mlir::Operation; using mlir::OwningModuleRef; using stream_executor::port::StatusOr; +bool IsControlFlowV1Op(Operation* op) { + return mlir::isa(op) || + mlir::isa(op) || + mlir::isa(op) || + mlir::isa(op) || + mlir::isa(op) || + mlir::isa(op); +} + +mlir::LogicalResult IsValidGraph(mlir::ModuleOp module) { + auto result = module.walk([&](Operation* op) { + return IsControlFlowV1Op(op) ? mlir::WalkResult::interrupt() + : mlir::WalkResult::advance(); + }); + if (result.wasInterrupted()) { + module.emitError( + "The graph has Control Flow V1 ops. TFLite converter doesn't support " + "Control Flow V1 ops. Consider using Control Flow V2 ops instead. See " + "https://www.tensorflow.org/api_docs/python/tf/compat/v1/" + "enable_control_flow_v2."); + return mlir::failure(); + } + return mlir::success(); +} +} // namespace + StatusOr LoadFromGraphdefOrMlirSource( const std::string& input_filename, bool input_mlir, bool use_splatted_constant, const std::vector& extra_tf_opdefs, absl::string_view debug_info_file, absl::string_view input_arrays, absl::string_view input_dtypes, absl::string_view input_shapes, absl::string_view output_arrays, bool prune_unused_nodes, - llvm::SourceMgr* source_mgr, MLIRContext* context) { + bool enable_upgrade_legacy, llvm::SourceMgr* source_mgr, + MLIRContext* context) { // Set up the input file. std::string error_message; auto file = mlir::openInputFile(input_filename, &error_message); @@ -86,14 +116,14 @@ StatusOr LoadFromGraphdefOrMlirSource( file->getBuffer(), debug_info_file, input_arrays, input_dtypes, input_shapes, output_arrays, /*control_output_arrays=*/"", prune_unused_nodes, /*convert_legacy_fed_inputs=*/true, - /*graph_as_function=*/false, /*upgrade_legacy=*/true, + /*graph_as_function=*/false, enable_upgrade_legacy, /*enable_shape_inference=*/false, context); } return tensorflow::GraphdefToMlirTranslateFunction( file->getBuffer(), debug_info_file, input_arrays, input_dtypes, input_shapes, output_arrays, /*control_output_arrays=*/"", prune_unused_nodes, /*convert_legacy_fed_inputs=*/true, - /*graph_as_function=*/false, /*upgrade_legacy=*/true, + /*graph_as_function=*/false, enable_upgrade_legacy, /*enable_shape_inference=*/false, context); } @@ -104,7 +134,8 @@ Status ConvertTFExecutorToTFLOrFlatbuffer( mlir::PassManager* pass_manager) { mlir::StatusScopedDiagnosticHandler statusHandler(module.getContext(), /*propagate=*/true); - if (failed(pass_manager->run(module))) { + + if (failed(IsValidGraph(module)) || failed(pass_manager->run(module))) { return statusHandler.ConsumeStatus(); } diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h index d2c31a6b972..82cf9c9549b 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h @@ -41,7 +41,8 @@ LoadFromGraphdefOrMlirSource( absl::string_view debug_info_file, absl::string_view input_arrays, absl::string_view input_dtypes, absl::string_view input_shapes, absl::string_view output_arrays, bool prune_unused_nodes, - llvm::SourceMgr* source_mgr, mlir::MLIRContext* context); + bool enable_upgrade_legacy, llvm::SourceMgr* source_mgr, + mlir::MLIRContext* context); // Load Saved model (either v1 or v2) into MLIR. stream_executor::port::StatusOr ImportSavedModel( From 4f6e48e9fd6fd8d536a96d78a6e6006e4ac0074c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 14:28:35 -0700 Subject: [PATCH 37/66] Fixed shape inference in xlog1py. Modified binary ops tests to run on 1d & 2d inputs so that broadcasting is also tested implicitly. Verified that test failed on xlog1py prior to change in binary_ops.cc. PiperOrigin-RevId: 317939721 Change-Id: I6f1f8e501028b84933c152d7315fe67fee5b9b46 --- tensorflow/compiler/tests/binary_ops_test.py | 12 ++++++------ tensorflow/compiler/tf2xla/kernels/binary_ops.cc | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 789309bb3bc..07a41d67520 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -229,16 +229,16 @@ class BinaryOpsTest(xla_test.XLATestCase): self._testBinary( gen_math_ops.xdivy, np.array([0, 4, 3, 2, 1, 0], dtype=dtype), - np.array([0, 5, 6, 7, 8, float("NaN")], dtype=dtype), - expected=np.array([0, 0.8, 0.5, 0.285714, 0.125, 0], dtype=dtype), + np.array([[0, 5, 6, 7, 8, float("NaN")]], dtype=dtype), + expected=np.array([[0, 0.8, 0.5, 0.285714, 0.125, 0]], dtype=dtype), rtol=1e-6, atol=1e-6) self._testBinary( gen_math_ops.xlogy, np.array([0, 4, 3, 2, 1, 0], dtype=dtype), - np.array([0, 5, 6, 7, 8, float("NaN")], dtype=dtype), - expected=np.array([0, 6.437752, 5.375278, 3.89182, 2.079442, 0], + np.array([[0, 5, 6, 7, 8, float("NaN")]], dtype=dtype), + expected=np.array([[0, 6.437752, 5.375278, 3.89182, 2.079442, 0]], dtype=dtype), rtol=1e-4, atol=1e-6) @@ -246,8 +246,8 @@ class BinaryOpsTest(xla_test.XLATestCase): self._testBinary( gen_math_ops.xlog1py, np.array([0, 4, 3, 2, 1, 0], dtype=dtype), - np.array([-1, 5, 6, 7, 8, float("NaN")], dtype=dtype), - expected=np.array([0, 7.167038, 5.837730, 4.158883, 2.197225, 0], + np.array([[-1, 5, 6, 7, 8, float("NaN")]], dtype=dtype), + expected=np.array([[0, 7.167038, 5.837730, 4.158883, 2.197225, 0]], dtype=dtype), rtol=1e-4, atol=1e-6) diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index 0ea851e9325..88d7525e5d5 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -153,6 +153,7 @@ XLA_MAKE_BINARY(Xlogy, XlogyImpl(lhs, rhs, broadcast_helper)); xla::XlaOp Xlog1pyImpl(xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { + std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); auto non_zero = xla::Mul(x, xla::Log1p(y)); auto zero = xla::ZerosLike(non_zero); auto x_is_zero = xla::Eq(x, zero); From 00be49f0c049e75b2478bc1b691e11ca4d92644c Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Tue, 23 Jun 2020 14:35:42 -0700 Subject: [PATCH 38/66] Extract code to utils for setting device for outside compilation host computation. PiperOrigin-RevId: 317940898 Change-Id: Id15913f3aa2ea42c2e2f2e78302d6fff60ee49c5 --- tensorflow/compiler/mlir/tensorflow/BUILD | 3 + ...u_extract_head_tail_outside_compilation.cc | 66 +------ .../utils/tpu_rewrite_device_util.cc | 55 ++++++ .../utils/tpu_rewrite_device_util.h | 10 + .../utils/tpu_rewrite_device_util_test.cc | 182 ++++++++++++++++++ 5 files changed, 254 insertions(+), 62 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index b159815d5eb..db31d4faf5f 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -1356,6 +1356,7 @@ cc_library( srcs = ["utils/tpu_rewrite_device_util.cc"], hdrs = ["utils/tpu_rewrite_device_util.h"], deps = [ + ":tensorflow", "//tensorflow/compiler/xla:array4d", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service:computation_placer", @@ -1366,6 +1367,7 @@ cc_library( "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", + "@llvm-project//mlir:Support", ], ) @@ -1374,6 +1376,7 @@ tf_cc_test( size = "small", srcs = ["utils/tpu_rewrite_device_util_test.cc"], deps = [ + ":device_util", ":tpu_rewrite_device_util", "//tensorflow/core:framework", "//tensorflow/core:test", diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc index bdfe43fc9cb..2be6ee7a78c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc @@ -113,64 +113,6 @@ tf_device::LaunchOp CreateLaunchForBlock(OpBuilder* builder, Operation* op, return launch; } -// Parses TPU compilation and execution devices from a TPU cluster and returns -// the host device for the head and tail computations. If the TPU computation is -// replicated, kTPUReplicatedHost is returned instead. -LogicalResult GetHostDeviceForHeadTailComputation( - mlir::TF::RuntimeDevices devices, tf_device::ClusterOp cluster, - std::string* host_device) { - auto replicate = cluster.getParentOfType(); - if (replicate) { - *host_device = tensorflow::kTPUReplicatedHost; - return success(); - } - - auto num_cores_per_replica_attr = - cluster.getAttrOfType(tensorflow::kNumCoresPerReplicaAttr); - if (!num_cores_per_replica_attr) - return cluster.emitOpError( - "cluster op missing `num_cores_per_replica` attribute"); - - if (num_cores_per_replica_attr.getInt() != 1) - return cluster.emitOpError( - "outside compilation is not supported with model parallelism."); - - auto topology_attr = - cluster.getAttrOfType(tensorflow::kTopologyAttr); - if (!topology_attr) - return cluster.emitOpError("cluster op missing `topology` attribute"); - - auto device_assignment_attr = - cluster.getAttrOfType(tensorflow::kDeviceAssignmentAttr); - if (!device_assignment_attr) - return cluster.emitOpError(llvm::formatv("requires attribute '{0}'", - tensorflow::kDeviceAssignmentAttr) - .str()); - - auto status_or_device_coodinates = - tensorflow::GetDeviceCoordinates(device_assignment_attr); - - if (!status_or_device_coodinates.ok()) - return cluster.emitError() - << "error in fetching tpu device coordinates: " - << status_or_device_coodinates.status().error_message(); - - // Determine compilation and execution devices. - auto status_or_tpu_device_assignment = - tensorflow::GetTPUCompilationAndExecutionDevices( - devices.device_names(), /*num_replicas=*/1, - /*num_cores_per_replica=*/1, topology_attr.getValue(), - status_or_device_coodinates.ConsumeValueOrDie()); - if (!status_or_tpu_device_assignment.ok()) - return cluster.emitError() - << "error in fetching TPU compilation/execution devices: " - << status_or_tpu_device_assignment.status().error_message(); - auto& tpu_device_assignment = status_or_tpu_device_assignment.ValueOrDie(); - - *host_device = tpu_device_assignment.tpu_devices[0][0].host; - return success(); -} - // Returns a set of ops that are outside compiled and can be extracted to before // the TPU computation. These ops are either connected to the inputs of the TPU // computation or other ops that can be extracted, and have no operands from @@ -232,8 +174,8 @@ mlir::LogicalResult LiftHeadOutsideCompiledOps( llvm::SmallVector head_outside_compiled_ops = FindOutsideCompiledOpsAtHead(cluster); if (head_outside_compiled_ops.empty()) return success(); - if (failed( - GetHostDeviceForHeadTailComputation(devices, cluster, host_device))) + if (failed(tensorflow::GetHostDeviceOutsideComputation(devices, cluster, + host_device))) return failure(); CreateHeadComputation(builder, cluster, head_outside_compiled_ops, @@ -361,8 +303,8 @@ mlir::LogicalResult LiftTailOutsideCompiledOps( if (tail_outside_compiled_ops.empty()) return success(); if (host_device.empty()) - if (failed(GetHostDeviceForHeadTailComputation(devices, *cluster, - &host_device))) + if (failed(tensorflow::GetHostDeviceOutsideComputation(devices, *cluster, + &host_device))) return failure(); // Forward all results of cluster first. These results will be remapped once diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc index 282b7ad3139..f884b75bce1 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc @@ -484,4 +484,59 @@ std::string GetDeviceAliasForLogicalCore(int core_index) { return llvm::formatv("{0}_{1}", kTPUReplicatedCore, core_index).str(); } +mlir::LogicalResult GetHostDeviceOutsideComputation( + mlir::TF::RuntimeDevices devices, mlir::tf_device::ClusterOp cluster, + std::string* host_device) { + auto replicate = cluster.getParentOfType(); + if (replicate) { + *host_device = tensorflow::kTPUReplicatedHost; + return mlir::success(); + } + + auto num_cores_per_replica_attr = cluster.getAttrOfType( + tensorflow::kNumCoresPerReplicaAttr); + if (!num_cores_per_replica_attr) + return cluster.emitOpError( + "cluster op missing `num_cores_per_replica` attribute"); + + if (num_cores_per_replica_attr.getInt() != 1) + return cluster.emitOpError( + "outside compilation is not supported with model parallelism."); + + auto topology_attr = + cluster.getAttrOfType(tensorflow::kTopologyAttr); + if (!topology_attr) + return cluster.emitOpError("cluster op missing `topology` attribute"); + + auto device_assignment_attr = + cluster.getAttrOfType(tensorflow::kDeviceAssignmentAttr); + if (!device_assignment_attr) + return cluster.emitOpError(llvm::formatv("requires attribute '{0}'", + tensorflow::kDeviceAssignmentAttr) + .str()); + + auto status_or_device_coodinates = + tensorflow::GetDeviceCoordinates(device_assignment_attr); + + if (!status_or_device_coodinates.ok()) + return cluster.emitError() + << "error in fetching tpu device coordinates: " + << status_or_device_coodinates.status().error_message(); + + // Determine compilation and execution devices. + auto status_or_tpu_device_assignment = + tensorflow::GetTPUCompilationAndExecutionDevices( + devices.device_names(), /*num_replicas=*/1, + /*num_cores_per_replica=*/1, topology_attr.getValue(), + status_or_device_coodinates.ConsumeValueOrDie()); + if (!status_or_tpu_device_assignment.ok()) + return cluster.emitError() + << "error in fetching TPU compilation/execution devices: " + << status_or_tpu_device_assignment.status().error_message(); + auto& tpu_device_assignment = status_or_tpu_device_assignment.ValueOrDie(); + + *host_device = tpu_device_assignment.tpu_devices[0][0].host; + return mlir::success(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h index 6bb541ab683..96cc8d7877b 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h @@ -23,6 +23,9 @@ limitations under the License. #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/util/device_name_utils.h" @@ -237,6 +240,13 @@ StatusOr GetTPUCompilationAndExecutionDevices( // logical core. std::string GetDeviceAliasForLogicalCore(int core_index); +// Parses TPU compilation and execution devices from a TPU cluster and returns +// the host device for the head and tail computations. If the TPU computation is +// replicated, kTPUReplicatedHost is returned instead. +mlir::LogicalResult GetHostDeviceOutsideComputation( + mlir::TF::RuntimeDevices devices, mlir::tf_device::ClusterOp cluster, + std::string* host_device); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_TPU_REWRITE_DEVICE_UTIL_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc index a70e93a0195..49a8f704b30 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc @@ -21,6 +21,8 @@ limitations under the License. #include "llvm/Support/FormatVariadic.h" #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/utils/device_util.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/protobuf/tpu/topology.pb.h" @@ -622,5 +624,185 @@ TEST(TPURewriteDeviceUtilTest, TestInvalidAttrForDeviceAssignmentDisallowed) { "bad 'device_assignment' attribute at index 0, not an int"); } +TEST(TPURewriteDeviceUtilTest, TestGetHostFailDeviceMissingAttributes) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + + mlir::TF::RuntimeDevices devices; + std::string host_device; + EXPECT_TRUE(mlir::failed( + GetHostDeviceOutsideComputation(devices, cluster, &host_device))); +} + +TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailModelParallelism) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + cluster.setAttr(kNumCoresPerReplicaAttr, + builder.getIntegerAttr(builder.getIntegerType(64), 5)); + cluster.setAttr(kTopologyAttr, builder.getStringAttr("")); + cluster.setAttr(kDeviceAssignmentAttr, builder.getArrayAttr({})); + + mlir::TF::RuntimeDevices runtime_devices; + std::string host_device; + EXPECT_TRUE(mlir::failed( + GetHostDeviceOutsideComputation(runtime_devices, cluster, &host_device))); +} + +TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailMissingTopology) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + cluster.setAttr(kNumCoresPerReplicaAttr, + builder.getIntegerAttr(builder.getIntegerType(64), 1)); + cluster.setAttr(kDeviceAssignmentAttr, builder.getArrayAttr({})); + + mlir::TF::RuntimeDevices runtime_devices; + std::string host_device; + EXPECT_TRUE(mlir::failed( + GetHostDeviceOutsideComputation(runtime_devices, cluster, &host_device))); +} + +TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailMissingDeviceAssignment) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + cluster.setAttr(kNumCoresPerReplicaAttr, + builder.getIntegerAttr(builder.getIntegerType(64), 1)); + cluster.setAttr(kTopologyAttr, builder.getStringAttr("")); + + mlir::TF::RuntimeDevices runtime_devices; + std::string host_device; + EXPECT_TRUE(mlir::failed( + GetHostDeviceOutsideComputation(runtime_devices, cluster, &host_device))); +} + +TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailBadDeviceAssignment) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + cluster.setAttr(kNumCoresPerReplicaAttr, + builder.getIntegerAttr(builder.getIntegerType(64), 1)); + cluster.setAttr(kTopologyAttr, builder.getStringAttr("")); + cluster.setAttr(kDeviceAssignmentAttr, + builder.getStrArrayAttr(llvm::ArrayRef( + {"bad_device_assigment"}))); + + mlir::TF::RuntimeDevices runtime_devices; + std::string host_device; + EXPECT_TRUE(mlir::failed( + GetHostDeviceOutsideComputation(runtime_devices, cluster, &host_device))); +} + +TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailBadDeviceName) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + module_ref->setAttr( + "tf.devices", builder.getStrArrayAttr( + llvm::ArrayRef({"bad_device_name"}))); + + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + cluster.setAttr(kNumCoresPerReplicaAttr, + builder.getIntegerAttr(builder.getIntegerType(64), 1)); + cluster.setAttr(kTopologyAttr, builder.getStringAttr("")); + cluster.setAttr(kDeviceAssignmentAttr, builder.getArrayAttr({})); + + mlir::TF::RuntimeDevices runtime_devices; + GetDevicesFromOp(*module_ref, &runtime_devices); + std::string host_device; + EXPECT_TRUE(mlir::failed( + GetHostDeviceOutsideComputation(runtime_devices, cluster, &host_device))); +} + +TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceTPUReplicate) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + + llvm::SmallDenseMap> + devices; + auto replicate = builder.create( + mlir::UnknownLoc::get(&context), /*num_replicas=*/2, devices, + llvm::ArrayRef, mlir::Type>>{}, + llvm::ArrayRef{}); + builder.setInsertionPoint(&replicate.body().front(), + replicate.body().front().begin()); + + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + + mlir::TF::RuntimeDevices runtime_devices; + std::string host_device; + EXPECT_TRUE(mlir::succeeded( + GetHostDeviceOutsideComputation(runtime_devices, cluster, &host_device))); + EXPECT_EQ(host_device, kTPUReplicatedHost); +} + +TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceNotReplicated) { + mlir::registerDialect(); + mlir::MLIRContext context; + mlir::OwningModuleRef module_ref = + mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); + mlir::OpBuilder builder(module_ref->getBodyRegion()); + module_ref->setAttr( + "tf.devices", builder.getStrArrayAttr(llvm::ArrayRef( + {"/job:localhost/replica:0/task:0/device:TPU_SYSTEM:0", + "/job:localhost/replica:0/task:0/device:TPU:0", + "/job:worker/replica:0/task:0/device:CPU:0"}))); + + llvm::SmallVector result_types; + auto cluster = builder.create( + mlir::UnknownLoc::get(&context), result_types); + cluster.setAttr(kNumCoresPerReplicaAttr, + builder.getIntegerAttr(builder.getIntegerType(64), 1)); + cluster.setAttr(kTopologyAttr, builder.getStringAttr("")); + cluster.setAttr(kDeviceAssignmentAttr, builder.getArrayAttr({})); + + mlir::TF::RuntimeDevices runtime_devices; + GetDevicesFromOp(*module_ref, &runtime_devices); + std::string host_device; + EXPECT_TRUE(mlir::succeeded( + GetHostDeviceOutsideComputation(runtime_devices, cluster, &host_device))); + EXPECT_EQ(host_device, "/job:localhost/replica:0/task:0/device:CPU:0"); +} + } // anonymous namespace } // namespace tensorflow From 26dc5fc6534755a53f37d0a7f3e0bde0a6686cf3 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 23 Jun 2020 14:39:07 -0700 Subject: [PATCH 39/66] DepthwiseConv3x3 converted to new style. PiperOrigin-RevId: 317941480 Change-Id: I4c562fc8cc965dc16b34ed99b4840a81a64646cf --- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 208 +++++++++--------- .../gpu/cl/kernels/depthwise_conv_3x3.h | 26 ++- .../lite/delegates/gpu/cl/tensor_type.cc | 41 ++++ .../lite/delegates/gpu/cl/tensor_type.h | 6 + 4 files changed, 162 insertions(+), 119 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index c8ac82581c0..309ce4a9d87 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -28,55 +28,47 @@ namespace gpu { namespace cl { namespace { -std::string GenerateDepthwiseConvCode( - const OperationDef& op_def, - const std::vector& linked_operations, - const CLDevice& device, bool weights_are_buffer, bool local_mem_uploads) { - std::string c = GetCommonDefines(op_def.precision); - TensorCodeGenerator src_tensor( - "src_data", WHSPoint{"dst_size.x", "dst_size.y", "dst_size.z"}, - op_def.src_tensors[0]); - TensorCodeGenerator dst_tensor( - "dst_data", WHSPoint{"dst_size.x", "dst_size.y", "dst_size.z"}, - op_def.dst_tensors[0]); +std::string GenerateDepthwiseConvCode(const OperationDef& op_def, + const CLDevice& device, + bool weights_are_buffer, + bool local_mem_uploads, Arguments* args) { + auto src_desc = absl::make_unique(op_def.src_tensors[0]); + src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); + args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); + args->AddObjectRef( + "dst_tensor", AccessType::WRITE, + absl::make_unique(op_def.dst_tensors[0])); const auto src_tensor_type = op_def.src_tensors[0].storage_type; - const auto mode = GetFastestZeroMode(device); - const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || src_tensor_type == TensorStorageType::IMAGE_BUFFER; + std::string c = GetCommonDefines(op_def.precision); if (local_mem_uploads) { c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n"; } c += "__kernel void main_function(\n"; - c += src_tensor.GetDeclaration(AccessType::READ) + ",\n"; - if (weights_are_buffer) { - c += " __global FLT4* filters\n"; - } else { - c += " __read_only image2d_t filters\n"; - } - c += GetArgsDeclaration(linked_operations); - c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n"; - c += " int4 dst_size\n"; - c += ") {\n"; + c += "$0) {\n"; c += " int X = get_global_id(0) * 2;\n"; c += " int Y = get_global_id(1) * 2;\n"; - c += " int Z = get_global_id(2);\n"; + c += " int S = get_global_id(2);\n"; c += " ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f);\n"; c += " ACCUM_FLT4 r1 = (ACCUM_FLT4)(0.0f);\n"; c += " ACCUM_FLT4 r2 = (ACCUM_FLT4)(0.0f);\n"; c += " ACCUM_FLT4 r3 = (ACCUM_FLT4)(0.0f);\n"; if (!local_mem_uploads) { - c += " if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) " - "return;\n"; + c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() " + "|| S >= args.dst_tensor.Slices()) { \n"; + c += " return; \n"; + c += " } \n"; } if (local_mem_uploads) { c += " __local FLT4 f[10];\n"; - c += " event_t e = async_work_group_copy(f, filters + Z * 10, 10, 0);\n"; + c += " event_t e = async_work_group_copy(f, args.weights.GetPtr() + S * " + "10, 10, 0);\n"; c += " wait_group_events(1, &e);\n"; } else if (weights_are_buffer) { - c += " __global FLT4* f = filters + Z * 10;\n"; + c += " __global FLT4* f = args.weights.GetPtr() + S * 10;\n"; } c += " FLT4 s0;\n"; c += " FLT4 s1;\n"; @@ -87,15 +79,15 @@ std::string GenerateDepthwiseConvCode( std::string xc[4] = {"X - 1", "X", "X + 1", "X + 2"}; std::string yc[4] = {"Y - 1", "Y", "Y + 1", "Y + 2"}; if (!weights_are_buffer) { - c += " FLT4 f0 = READ_IMAGE(filters, smp_none, (int2)(0, Z));\n"; - c += " FLT4 f1 = READ_IMAGE(filters, smp_none, (int2)(1, Z));\n"; - c += " FLT4 f2 = READ_IMAGE(filters, smp_none, (int2)(2, Z));\n"; - c += " FLT4 f3 = READ_IMAGE(filters, smp_none, (int2)(3, Z));\n"; - c += " FLT4 f4 = READ_IMAGE(filters, smp_none, (int2)(4, Z));\n"; - c += " FLT4 f5 = READ_IMAGE(filters, smp_none, (int2)(5, Z));\n"; - c += " FLT4 f6 = READ_IMAGE(filters, smp_none, (int2)(6, Z));\n"; - c += " FLT4 f7 = READ_IMAGE(filters, smp_none, (int2)(7, Z));\n"; - c += " FLT4 f8 = READ_IMAGE(filters, smp_none, (int2)(8, Z));\n"; + c += " FLT4 f0 = args.weights.Read(0, S);\n"; + c += " FLT4 f1 = args.weights.Read(1, S);\n"; + c += " FLT4 f2 = args.weights.Read(2, S);\n"; + c += " FLT4 f3 = args.weights.Read(3, S);\n"; + c += " FLT4 f4 = args.weights.Read(4, S);\n"; + c += " FLT4 f5 = args.weights.Read(5, S);\n"; + c += " FLT4 f6 = args.weights.Read(6, S);\n"; + c += " FLT4 f7 = args.weights.Read(7, S);\n"; + c += " FLT4 f8 = args.weights.Read(8, S);\n"; } if (manual_clamp) { c += " int x0 = X - 1;\n"; @@ -106,25 +98,25 @@ std::string GenerateDepthwiseConvCode( c += " int y1 = Y;\n"; c += " int y2 = Y + 1;\n"; c += " int y3 = Y + 2;\n"; - c += " bool x0_in = x0 >= 0 && x0 < dst_size.x;\n"; - c += " bool x1_in = x1 >= 0 && x1 < dst_size.x;\n"; - c += " bool x2_in = x2 >= 0 && x2 < dst_size.x;\n"; - c += " bool x3_in = x3 >= 0 && x3 < dst_size.x;\n"; - c += " bool y0_in = y0 >= 0 && y0 < dst_size.y;\n"; - c += " bool y1_in = y1 >= 0 && y1 < dst_size.y;\n"; - c += " bool y2_in = y2 >= 0 && y2 < dst_size.y;\n"; - c += " bool y3_in = y3 >= 0 && y3 < dst_size.y;\n"; - c += " x0 = clamp(x0, 0, dst_size.x - 1);\n"; - c += " x1 = clamp(x1, 0, dst_size.x - 1);\n"; - c += " x2 = clamp(x2, 0, dst_size.x - 1);\n"; - c += " x3 = clamp(x3, 0, dst_size.x - 1);\n"; - c += " y0 = clamp(y0, 0, dst_size.y - 1);\n"; - c += " y1 = clamp(y1, 0, dst_size.y - 1);\n"; - c += " y2 = clamp(y2, 0, dst_size.y - 1);\n"; - c += " y3 = clamp(y3, 0, dst_size.y - 1);\n"; + c += " bool x0_in = x0 >= 0 && x0 < args.dst_tensor.Width();\n"; + c += " bool x1_in = x1 >= 0 && x1 < args.dst_tensor.Width();\n"; + c += " bool x2_in = x2 >= 0 && x2 < args.dst_tensor.Width();\n"; + c += " bool x3_in = x3 >= 0 && x3 < args.dst_tensor.Width();\n"; + c += " bool y0_in = y0 >= 0 && y0 < args.dst_tensor.Height();\n"; + c += " bool y1_in = y1 >= 0 && y1 < args.dst_tensor.Height();\n"; + c += " bool y2_in = y2 >= 0 && y2 < args.dst_tensor.Height();\n"; + c += " bool y3_in = y3 >= 0 && y3 < args.dst_tensor.Height();\n"; + c += " x0 = clamp(x0, 0, args.dst_tensor.Width() - 1);\n"; + c += " x1 = clamp(x1, 0, args.dst_tensor.Width() - 1);\n"; + c += " x2 = clamp(x2, 0, args.dst_tensor.Width() - 1);\n"; + c += " x3 = clamp(x3, 0, args.dst_tensor.Width() - 1);\n"; + c += " y0 = clamp(y0, 0, args.dst_tensor.Height() - 1);\n"; + c += " y1 = clamp(y1, 0, args.dst_tensor.Height() - 1);\n"; + c += " y2 = clamp(y2, 0, args.dst_tensor.Height() - 1);\n"; + c += " y3 = clamp(y3, 0, args.dst_tensor.Height() - 1);\n"; if (src_tensor_type == TensorStorageType::BUFFER) { - c += " __global FLT4* src_loc = src_data + Z * dst_size.x * " - "dst_size.y;\n"; + c += " __global FLT4* src_loc = " + "args.src_tensor.GetPtrWithSliceOffset(S);\n"; } xc[0] = "x0"; xc[1] = "x1"; @@ -150,29 +142,29 @@ std::string GenerateDepthwiseConvCode( auto read_4x_line = [&](int y) { if (src_tensor_type == TensorStorageType::BUFFER) { const std::string y_in = "y" + std::to_string(y) + "_in"; - c += " s0 = src_loc[" + yc[y] + " * dst_size.x + " + xc[0] + - "] * (FLT)(x0_in && " + y_in + ");\n"; - c += " s1 = src_loc[" + yc[y] + " * dst_size.x + " + xc[1] + - "] * (FLT)(x1_in && " + y_in + ");\n"; - c += " s2 = src_loc[" + yc[y] + " * dst_size.x + " + xc[2] + - "] * (FLT)(x2_in && " + y_in + ");\n"; - c += " s3 = src_loc[" + yc[y] + " * dst_size.x + " + xc[3] + - "] * (FLT)(x3_in && " + y_in + ");\n"; + c += " s0 = src_loc[args.src_tensor.GetWHOffset(" + xc[0] + ", " + + yc[y] + ")] * (FLT)(x0_in && " + y_in + ");\n"; + c += " s1 = src_loc[args.src_tensor.GetWHOffset(" + xc[1] + ", " + + yc[y] + ")] * (FLT)(x1_in && " + y_in + ");\n"; + c += " s2 = src_loc[args.src_tensor.GetWHOffset(" + xc[2] + ", " + + yc[y] + ")] * (FLT)(x2_in && " + y_in + ");\n"; + c += " s3 = src_loc[args.src_tensor.GetWHOffset(" + xc[3] + ", " + + yc[y] + ")] * (FLT)(x3_in && " + y_in + ");\n"; } else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) { const std::string y_in = "y" + std::to_string(y) + "_in"; - c += " s0 = " + src_tensor.ReadWHS(xc[0], yc[y], "Z", mode) + - " * (FLT)(x0_in && " + y_in + ");\n"; - c += " s1 = " + src_tensor.ReadWHS(xc[1], yc[y], "Z", mode) + - " * (FLT)(x1_in && " + y_in + ");\n"; - c += " s2 = " + src_tensor.ReadWHS(xc[2], yc[y], "Z", mode) + - " * (FLT)(x2_in && " + y_in + ");\n"; - c += " s3 = " + src_tensor.ReadWHS(xc[3], yc[y], "Z", mode) + - " * (FLT)(x3_in && " + y_in + ");\n"; + c += " s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + + ", S) * (FLT)(x0_in && " + y_in + ");\n"; + c += " s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + + ", S) * (FLT)(x1_in && " + y_in + ");\n"; + c += " s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + + ", S) * (FLT)(x2_in && " + y_in + ");\n"; + c += " s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + + ", S) * (FLT)(x3_in && " + y_in + ");\n"; } else { - c += " s0 = " + src_tensor.ReadWHS(xc[0], yc[y], "Z", mode) + ";\n"; - c += " s1 = " + src_tensor.ReadWHS(xc[1], yc[y], "Z", mode) + ";\n"; - c += " s2 = " + src_tensor.ReadWHS(xc[2], yc[y], "Z", mode) + ";\n"; - c += " s3 = " + src_tensor.ReadWHS(xc[3], yc[y], "Z", mode) + ";\n"; + c += " s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S);\n"; + c += " s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S);\n"; + c += " s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S);\n"; + c += " s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S);\n"; } }; c += " {\n"; @@ -224,40 +216,38 @@ std::string GenerateDepthwiseConvCode( c += " r3 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n"; c += " }\n"; if (!weights_are_buffer) { - c += " FLT4 bias = READ_IMAGE(filters, smp_none, (int2)(9, Z));\n"; + c += " FLT4 bias = args.weights.Read(9, S);\n"; } c += " r0 += TO_ACCUM_TYPE(" + bias + ");\n"; c += " r1 += TO_ACCUM_TYPE(" + bias + ");\n"; c += " r2 += TO_ACCUM_TYPE(" + bias + ");\n"; c += " r3 += TO_ACCUM_TYPE(" + bias + ");\n"; if (local_mem_uploads) { - c += " if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) " - "return;\n"; + c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() " + "|| " + "S >= args.dst_tensor.Slices()) { \n"; + c += " return; \n"; + c += " } \n"; } - c += " if(X + 0 < dst_size.x && Y + 0 < dst_size.y) {\n"; + c += " if(X + 0 < args.dst_tensor.Width() && Y + 0 < " + "args.dst_tensor.Height()) {\n"; c += " FLT4 result = TO_FLT4(r0);\n"; - c += " " + dst_tensor.GetAddressWHS("address", "X + 0", "Y + 0", "Z") + "\n"; - LinkingContext context{"result", "X + 0", "Y + 0", "Z"}; - c += PostProcess(linked_operations, context); - c += " " + dst_tensor.WriteWHS("result", "X + 0", "Y + 0", "Z") + "\n"; + c += " args.dst_tensor.Write(result, X + 0, Y + 0, S)\n"; c += " }\n"; - c += " if(X + 1 < dst_size.x && Y + 0 < dst_size.y) {\n"; + c += " if(X + 1 < args.dst_tensor.Width() && Y + 0 < " + "args.dst_tensor.Height()) {\n"; c += " FLT4 result = TO_FLT4(r1);\n"; - context = {"result", "X + 1", "Y + 0", "Z"}; - c += PostProcess(linked_operations, context); - c += " " + dst_tensor.WriteWHS("result", "X + 1", "Y + 0", "Z") + "\n"; + c += " args.dst_tensor.Write(result, X + 1, Y + 0, S)\n"; c += " }\n"; - c += " if(X + 0 < dst_size.x && Y + 1 < dst_size.y) {\n"; + c += " if(X + 0 < args.dst_tensor.Width() && Y + 1 < " + "args.dst_tensor.Height()) {\n"; c += " FLT4 result = TO_FLT4(r2);\n"; - context = {"result", "X + 0", "Y + 1", "Z"}; - c += PostProcess(linked_operations, context); - c += " " + dst_tensor.WriteWHS("result", "X + 0", "Y + 1", "Z") + "\n"; + c += " args.dst_tensor.Write(result, X + 0, Y + 1, S)\n"; c += " }\n"; - c += " if(X + 1 < dst_size.x && Y + 1 < dst_size.y) {\n"; + c += " if(X + 1 < args.dst_tensor.Width() && Y + 1 < " + "args.dst_tensor.Height()) {\n"; c += " FLT4 result = TO_FLT4(r3);\n"; - context = {"result", "X + 1", "Y + 1", "Z"}; - c += PostProcess(linked_operations, context); - c += " " + dst_tensor.WriteWHS("result", "X + 1", "Y + 1", "Z") + "\n"; + c += " args.dst_tensor.Write(result, X + 1, Y + 1, S)\n"; c += " }\n"; c += "}\n"; @@ -277,9 +267,6 @@ DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3&& operation) : GPUOperation(std::move(operation)), weights_are_buffer_(operation.weights_are_buffer_), local_mem_uploads_(operation.local_mem_uploads_), - weights_tex2d_(std::move(operation.weights_tex2d_)), - weights_buf_(std::move(operation.weights_buf_)), - weights_(operation.weights_), kernel_(std::move(operation.kernel_)), work_group_size_(operation.work_group_size_) {} @@ -287,9 +274,6 @@ DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) { if (this != &operation) { std::swap(weights_are_buffer_, operation.weights_are_buffer_); std::swap(local_mem_uploads_, operation.local_mem_uploads_); - weights_tex2d_ = std::move(operation.weights_tex2d_); - weights_buf_ = std::move(operation.weights_buf_); - std::swap(weights_, operation.weights_); kernel_ = std::move(operation.kernel_); std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); @@ -300,8 +284,15 @@ DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) { absl::Status DepthwiseConv3x3::Compile( const CreationContext& creation_context) { std::string code = GenerateDepthwiseConvCode( - definition_, linked_operations_, *creation_context.device, - weights_are_buffer_, local_mem_uploads_); + definition_, *creation_context.device, weights_are_buffer_, + local_mem_uploads_, &args_); + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), + {{"dst_tensor", element_wise_code}}, + &code)); + std::vector options; if (definition_.precision == CalculationsPrecision::F16 && creation_context.device->IsPowerVR()) { @@ -313,13 +304,10 @@ absl::Status DepthwiseConv3x3::Compile( } absl::Status DepthwiseConv3x3::BindArguments() { - kernel_.ResetBindingCounter(); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr())); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_)); - RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_)); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting())); - RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB())); - return absl::OkStatus(); + RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); + RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); + RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); + return args_.Bind(kernel_.kernel()); } int3 DepthwiseConv3x3::GetGridSize() const { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index 1ab17e3048c..9cb2ac41c87 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -71,9 +71,6 @@ class DepthwiseConv3x3 : public GPUOperation { bool weights_are_buffer_; bool local_mem_uploads_; - Texture2D weights_tex2d_; - Buffer weights_buf_; - cl_mem weights_; CLKernel kernel_; int3 work_group_size_ = int3(8, 4, 1); @@ -90,17 +87,19 @@ absl::Status DepthwiseConv3x3::UploadWeightsAndBiases( const bool fp32_weights = definition_.precision == CalculationsPrecision::F32; const int float4_size = fp32_weights ? 16 : 8; + Texture2D weights_tex2d; + Buffer weights_buf; if (fp32_weights) { std::vector gpu_data(elements_count); RearrangeWeightsAndBiasesData(weights, biases, absl::MakeSpan(gpu_data)); if (weights_are_buffer_) { RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(), context, - &weights_buf_)); + &weights_buf)); } else { RETURN_IF_ERROR(CreateTexture2DRGBA( definition_.GetDataType(), texture_width, texture_height, - gpu_data.data(), context, &weights_tex2d_)); + gpu_data.data(), context, &weights_tex2d)); } } else { std::vector gpu_data(elements_count); @@ -108,18 +107,27 @@ absl::Status DepthwiseConv3x3::UploadWeightsAndBiases( if (weights_are_buffer_) { RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(), context, - &weights_buf_)); + &weights_buf)); } else { RETURN_IF_ERROR(CreateTexture2DRGBA( definition_.GetDataType(), texture_width, texture_height, - gpu_data.data(), context, &weights_tex2d_)); + gpu_data.data(), context, &weights_tex2d)); } } if (weights_are_buffer_) { - weights_ = weights_buf_.GetMemoryPtr(); + BufferDescriptor desc; + desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; + desc.element_size = 4; + args_.AddObject("weights", AccessType::READ, + absl::make_unique(std::move(weights_buf)), + absl::make_unique(desc)); } else { - weights_ = weights_tex2d_.GetMemoryPtr(); + Texture2DDescriptor desc; + desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; + args_.AddObject("weights", AccessType::READ, + absl::make_unique(std::move(weights_tex2d)), + absl::make_unique(desc)); } return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc index 0c3a1e3508c..ef49f67cf77 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc @@ -172,6 +172,10 @@ absl::Status TensorDescriptor::PerformSelector( return PerformWriteLinearSelector(args, result); } else if (selector == "GetAddress") { return PerformGetAddressSelector(args, result); + } else if (selector == "GetPtrWithSliceOffset") { + return PerformGetPtrWithSliceOffsetSelector(args, result); + } else if (selector == "GetWHOffset") { + return PerformGetWHOffsetSelector(args, result); } else { return absl::NotFoundError(absl::StrCat( "TensorDescriptor don't have selector with name - ", selector)); @@ -351,6 +355,43 @@ absl::Status TensorDescriptor::PerformGetAddressSelector( return absl::OkStatus(); } +absl::Status TensorDescriptor::PerformGetPtrWithSliceOffsetSelector( + const std::vector& args, std::string* result) const { + if (storage_type != TensorStorageType::BUFFER) { + return absl::InvalidArgumentError( + "GetPtrWithSliceOffset selector can be used only with BUFFER"); + } + if (args.size() != 1) { + return absl::NotFoundError(absl::StrCat( + "GetPtrWithSliceOffset require one argument(slice coordinate), but ", + args.size(), " was passed")); + } + const std::string width = IsBatchedWidth() ? "width_batched" : "width"; + if (HasAxis(Axis::DEPTH)) { + *result = + absl::StrCat("buffer + ", args[0], " * ", width, " * height * depth"); + } else { + *result = absl::StrCat("buffer + ", args[0], " * ", width, " * height"); + } + return absl::OkStatus(); +} + +absl::Status TensorDescriptor::PerformGetWHOffsetSelector( + const std::vector& args, std::string* result) const { + if (storage_type != TensorStorageType::BUFFER) { + return absl::InvalidArgumentError( + "GetWHOffset selector can be used only with BUFFER"); + } + if (args.size() != 2) { + return absl::NotFoundError(absl::StrCat( + "GetWHOffset require two arguments(X and Y coordinates), but ", + args.size(), " was passed")); + } + const std::string width = IsBatchedWidth() ? "width_batched" : "width"; + *result = absl::StrCat(args[1], " * ", width, " + ", args[0]); + return absl::OkStatus(); +} + std::string TensorDescriptor::DeclareAddress(const std::string& var_name, const std::string& address) const { return absl::StrCat(StorageTypeToAddressType(), " ", var_name, " = ", address, diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.h b/tensorflow/lite/delegates/gpu/cl/tensor_type.h index 3a1d7abb01a..12c078f1025 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.h +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.h @@ -85,6 +85,12 @@ struct TensorDescriptor : public GPUObjectDescriptor { absl::Status PerformGetAddressSelector(const std::vector& args, std::string* result) const; + absl::Status PerformGetPtrWithSliceOffsetSelector( + const std::vector& args, std::string* result) const; + + absl::Status PerformGetWHOffsetSelector(const std::vector& args, + std::string* result) const; + std::string DeclareAddress(const std::string& var_name, const std::string& address) const; From ce78d27ad1dd6aa5ba749aa40b9822a554bc1f74 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 23 Jun 2020 14:41:43 -0700 Subject: [PATCH 40/66] ConvolutionTransposed3x3Thin converted to new style. PiperOrigin-RevId: 317941931 Change-Id: Iccb665ff6a34eeb4b836fbf925a5b02aa38e371f --- .../convolution_transposed_3x3_thin.cc | 152 +++++++----------- .../kernels/convolution_transposed_3x3_thin.h | 57 +++++-- .../convolution_transposed_3x3_thin_test.cc | 2 +- 3 files changed, 101 insertions(+), 110 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc index d65ff071c7e..020a99852d7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc @@ -27,21 +27,19 @@ namespace gpu { namespace cl { namespace { -std::string GenerateConvolutionTransposedCode( - const OperationDef& op_def, const LinearStorage& biases, int src_depth, - int dst_depth, const CLDevice& device, - const std::vector& linked_operations) { - TensorCodeGenerator src_tensor( - "src_data", - WHSBPoint{"src_size.x", "src_size.y", "src_size.z", "src_size.w"}, - op_def.src_tensors[0]); - TensorCodeGenerator dst_tensor( - "dst_data", - WHSBPoint{"dst_size.x", "dst_size.y", "dst_size.z", "dst_size.w"}, - op_def.dst_tensors[0]); +std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, + int src_depth, int dst_depth, + const CLDevice& device, + Arguments* args) { + auto src_desc = absl::make_unique(op_def.src_tensors[0]); + src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); + args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); + args->AddObjectRef( + "dst_tensor", AccessType::WRITE, + absl::make_unique(op_def.dst_tensors[0])); + const auto src_tensor_type = op_def.src_tensors[0].storage_type; - const std::string batch_id = op_def.IsBatchSupported() ? "B" : ""; std::string c = GetCommonDefines(op_def.precision); switch (op_def.precision) { @@ -61,23 +59,19 @@ std::string GenerateConvolutionTransposedCode( } c += "__kernel void main_function(\n"; - c += src_tensor.GetDeclaration(AccessType::READ) + ",\n"; - c += " __constant FLT4* filters, \n"; - c += biases.GetDeclaration(); - c += GetArgsDeclaration(linked_operations); - c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n"; - c += " int4 src_size, \n"; - c += " int4 dst_size \n"; - c += ") {\n"; + c += "$0) {\n"; if (op_def.IsBatchSupported()) { c += " int linear_id = get_global_id(0);\n"; - c += " int X = linear_id / dst_size.w;\n"; - c += " int B = linear_id % dst_size.w;\n"; + c += " int X = linear_id / args.dst_tensor.Batch();\n"; + c += " int B = linear_id % args.dst_tensor.Batch();\n"; + c += " args.dst_tensor.SetBatchRef(B);\n"; + c += " args.src_tensor.SetBatchRef(B);\n"; } else { c += " int X = get_global_id(0);\n"; } c += " int Y = get_global_id(1);\n"; - c += " if (X >= src_size.x || Y >= src_size.y) return;\n"; + c += " if (X >= args.src_tensor.Width() || Y >= args.src_tensor.Height()) " + "return;\n"; for (int d = 0; d < dst_depth; ++d) { const std::string layer = std::to_string(d); c += " ACCUM_FLT4 r" + layer + "[2][2];\n"; @@ -91,61 +85,48 @@ std::string GenerateConvolutionTransposedCode( const std::string z = std::to_string(s); c += " {\n"; if (src_tensor_type == TensorStorageType::BUFFER) { - c += " bool x_in = X + 1 < src_size.x;\n"; - c += " bool y_in = Y + 1 < src_size.y;\n"; - c += - " FLT4 src0 = " + src_tensor.ReadWHSB("X", "Y", z, batch_id) + ";\n"; + c += " bool x_in = X + 1 < args.src_tensor.Width();\n"; + c += " bool y_in = Y + 1 < args.src_tensor.Height();\n"; + c += " FLT4 src0 = args.src_tensor.Read(X, Y, " + z + ");\n"; c += " FLT4 src1 = (FLT4)(0.0);\n"; c += " FLT4 src2 = (FLT4)(0.0);\n"; c += " FLT4 src3 = (FLT4)(0.0);\n"; c += " if (x_in) {\n"; - c += " src1 = " + src_tensor.ReadWHSB("X + 1", "Y", z, batch_id) + - ";\n"; + c += " src1 = args.src_tensor.Read(X + 1, Y, " + z + ");\n"; c += " }\n"; c += " if (y_in) {\n"; - c += " src2 = " + src_tensor.ReadWHSB("X", "Y + 1", z, batch_id) + - ";\n"; + c += " src2 = args.src_tensor.Read(X, Y + 1, " + z + ");\n"; c += " }\n"; c += " if (x_in && y_in) {\n"; - c += " src3 = " + src_tensor.ReadWHSB("X + 1", "Y + 1", z, batch_id) + - ";\n"; + c += " src3 = args.src_tensor.Read(X + 1, Y + 1, " + z + ");\n"; c += " }\n"; } else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) { - c += - " " + src_tensor.GetAddressWHSB("c0", "X", "Y", z, batch_id) + ";\n"; - c += " " + src_tensor.GetAddressWHSB("c1", "X + 1", "Y", z, batch_id) + - ";\n"; - c += " " + src_tensor.GetAddressWHSB("c2", "X", "Y + 1", z, batch_id) + - ";\n"; - c += " " + - src_tensor.GetAddressWHSB("c3", "X + 1", "Y + 1", z, batch_id) + - ";\n"; - c += " bool x_in = X + 1 < src_size.x;\n"; - c += " bool y_in = Y + 1 < src_size.y;\n"; + c += " args.src_tensor.GetAddress(c0, X, Y, " + z + ");\n"; + c += " args.src_tensor.GetAddress(c1, X + 1, Y, " + z + ");\n"; + c += " args.src_tensor.GetAddress(c2, X, Y + 1, " + z + ");\n"; + c += " args.src_tensor.GetAddress(c3, X + 1, Y + 1, " + z + ");\n"; + c += " bool x_in = X + 1 < args.src_tensor.Width();\n"; + c += " bool y_in = Y + 1 < args.src_tensor.Height();\n"; c += " c1 = select(-1, c1, x_in);\n"; c += " c2 = select(-1, c2, y_in);\n"; c += " c3 = select(-1, c3, x_in && y_in);\n"; - c += " FLT4 src0 = " + src_tensor.Read("c0") + ";\n"; - c += " FLT4 src1 = " + src_tensor.Read("c1") + ";\n"; - c += " FLT4 src2 = " + src_tensor.Read("c2") + ";\n"; - c += " FLT4 src3 = " + src_tensor.Read("c3") + ";\n"; + c += " FLT4 src0 = args.src_tensor.Read(c0);\n"; + c += " FLT4 src1 = args.src_tensor.Read(c1);\n"; + c += " FLT4 src2 = args.src_tensor.Read(c2);\n"; + c += " FLT4 src3 = args.src_tensor.Read(c3);\n"; } else { - const auto mode = GetFastestZeroMode(device); - c += " FLT4 src0 = " + src_tensor.ReadWHSB("X", "Y", z, batch_id, mode) + - ";\n"; - c += " FLT4 src1 = " + - src_tensor.ReadWHSB("X + 1", "Y", z, batch_id, mode) + ";\n"; - c += " FLT4 src2 = " + - src_tensor.ReadWHSB("X", "Y + 1", z, batch_id, mode) + ";\n"; - c += " FLT4 src3 = " + - src_tensor.ReadWHSB("X + 1", "Y + 1", z, batch_id, mode) + ";\n"; + c += " FLT4 src0 = args.src_tensor.Read(X, Y, " + z + ");\n"; + c += " FLT4 src1 = args.src_tensor.Read(X + 1, Y, " + z + ");\n"; + c += " FLT4 src2 = args.src_tensor.Read(X, Y + 1, " + z + ");\n"; + c += " FLT4 src3 = args.src_tensor.Read(X + 1, Y + 1, " + z + ");\n"; } for (int d = 0; d < dst_depth; ++d) { const std::string layer = std::to_string(d); const std::string f_offset = std::to_string(filters_index); filters_index++; c += " {\n"; - c += " __constant FLT4* L0 = filters + 36 * " + f_offset + ";\n"; + c += " __constant FLT4* L0 = args.weights.GetPtr() + 36 * " + f_offset + + ";\n"; c += " CONV(r" + layer + "[0][0], src0, L0, 0);\n"; c += " CONV(r" + layer + "[0][1], src0, L0, 4);\n"; c += " CONV(r" + layer + "[0][1], src1, L0, 8);\n"; @@ -164,7 +145,8 @@ std::string GenerateConvolutionTransposedCode( for (int d = 0; d < dst_depth; ++d) { const std::string layer = std::to_string(d); c += " {\n"; - c += " FLT4 bias_val = " + biases.ReadLinearFLT4(layer) + ";\n"; + c += " FLT4 bias_val = args.weights.Read(" + + std::to_string(36 * filters_index + d) + ");\n"; for (int y = 0; y < 2; ++y) { for (int x = 0; x < 2; ++x) { const std::string x_coord = "X + " + std::to_string(x); @@ -172,14 +154,8 @@ std::string GenerateConvolutionTransposedCode( c += " {\n"; c += " FLT4 result = TO_FLT4(r" + layer + "[" + std::to_string(y) + "][" + std::to_string(x) + "]) + bias_val;\n"; - const std::string x_3dcoord = op_def.IsBatchSupported() - ? "(" + x_coord + ") * dst_size.w + B" - : x_coord; - const LinkingContext context{"result", x_3dcoord, y_coord, layer}; - c += PostProcess(linked_operations, context); - c += " " + - dst_tensor.WriteWHSB("result", x_coord, y_coord, layer, batch_id) + - "\n"; + c += " args.dst_tensor.Write(result, " + x_coord + ", " + y_coord + + ", " + layer + ");\n"; c += " }\n"; } } @@ -200,8 +176,6 @@ ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( ConvolutionTransposed3x3Thin&& operation) : GPUOperation(std::move(operation)), - weights_(std::move(operation.weights_)), - biases_(std::move(operation.biases_)), src_channels_(operation.src_channels_), dst_channels_(operation.dst_channels_), kernel_(std::move(operation.kernel_)), @@ -210,8 +184,6 @@ ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( ConvolutionTransposed3x3Thin&& operation) { if (this != &operation) { - weights_ = std::move(operation.weights_); - biases_ = std::move(operation.biases_); std::swap(src_channels_, operation.src_channels_); std::swap(dst_channels_, operation.dst_channels_); kernel_ = std::move(operation.kernel_); @@ -223,25 +195,25 @@ ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( absl::Status ConvolutionTransposed3x3Thin::Compile( const CreationContext& creation_context) { - const auto code = GenerateConvolutionTransposedCode( - definition_, biases_, DivideRoundUp(src_channels_, 4), - DivideRoundUp(dst_channels_, 4), *creation_context.device, - linked_operations_); + std::string code = GenerateConvolutionTransposedCode( + definition_, DivideRoundUp(src_channels_, 4), + DivideRoundUp(dst_channels_, 4), *creation_context.device, &args_); + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), + {{"dst_tensor", element_wise_code}}, + &code)); return creation_context.cache->GetOrCreateCLKernel( code, "main_function", *creation_context.context, *creation_context.device, &kernel_); } absl::Status ConvolutionTransposed3x3Thin::BindArguments() { - kernel_.ResetBindingCounter(); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr())); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr())); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(biases_.GetMemoryPtr())); - RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_)); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting())); - RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB())); - RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB())); - return absl::OkStatus(); + RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); + RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); + RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); + return args_.Bind(kernel_.kernel()); } int3 ConvolutionTransposed3x3Thin::GetGridSize() const { @@ -282,15 +254,7 @@ absl::Status CreateConvolutionTransposed3x3Thin( } *result = ConvolutionTransposed3x3Thin(definition, attr); RETURN_IF_ERROR( - result->UploadWeights(attr.weights, creation_context.context)); - LinearStorageCreateInfo create_info; - create_info.storage_type = - DeduceLinearStorageType(definition.GetPrimaryStorageType()); - create_info.data_type = definition.GetDataType(); - create_info.name = "biases"; - create_info.aligned_size = attr.weights.shape.o; - RETURN_IF_ERROR(CreateLinearStorage( - create_info, attr.bias, creation_context.context, &result->biases_)); + result->UploadData(attr.weights, attr.bias, creation_context.context)); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h index 447afb621e2..e292f416796 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h @@ -59,8 +59,9 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { const OperationDef& definition, const ConvolutionTransposedAttributes& attr); template - absl::Status UploadWeights(const tflite::gpu::Tensor& weights, - CLContext* context); + absl::Status UploadData(const tflite::gpu::Tensor& weights, + const tflite::gpu::Tensor& biases, + CLContext* context); template void RearrangeWeightsData(const tflite::gpu::Tensor& weights, @@ -69,9 +70,6 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { absl::Status BindArguments(); int3 GetGridSize() const; - Buffer weights_; - LinearStorage biases_; - int src_channels_; int dst_channels_; @@ -80,29 +78,58 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { }; template -absl::Status ConvolutionTransposed3x3Thin::UploadWeights( - const tflite::gpu::Tensor& weights, CLContext* context) { +absl::Status ConvolutionTransposed3x3Thin::UploadData( + const tflite::gpu::Tensor& weights, + const tflite::gpu::Tensor& biases, CLContext* context) { const int src_depth = DivideRoundUp(src_channels_, 4); const int dst_depth = DivideRoundUp(dst_channels_, 4); const int kernel_x = 3; // This operation support only 3x3 kernel const int kernel_y = 3; const int flt4_count = kernel_x * kernel_y * src_depth * dst_depth * 4; - const int flt4_size = definition_.precision == CalculationsPrecision::F32 - ? sizeof(float4) - : sizeof(half4); + const bool f32_weights = definition_.precision == CalculationsPrecision::F32; - if (definition_.GetDataType() == DataType::FLOAT32) { + BufferDescriptor desc; + desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16; + desc.element_size = 4; + desc.memory_type = MemoryType::CONSTANT; + + Buffer weights_buffer; + if (f32_weights) { std::vector gpu_data(flt4_count); RearrangeWeightsData(weights, absl::MakeSpan(gpu_data)); - return CreateReadOnlyBuffer(flt4_size * flt4_count, gpu_data.data(), - context, &weights_); + for (int i = 0; i < dst_depth; ++i) { + float4 bias_value(0.0f); + for (int c = 0; c < 4; ++c) { + int ch = i * 4 + c; + bias_value[c] = ch < weights.shape.o ? biases.data[ch] : 0.0f; + } + gpu_data.push_back(bias_value); + } + RETURN_IF_ERROR(CreateReadOnlyBuffer(sizeof(float4) * gpu_data.size(), + gpu_data.data(), context, + &weights_buffer)); } else { std::vector gpu_data(flt4_count); RearrangeWeightsData(weights, absl::MakeSpan(gpu_data)); - return CreateReadOnlyBuffer(flt4_size * flt4_count, gpu_data.data(), - context, &weights_); + for (int i = 0; i < dst_depth; ++i) { + half4 bias_value(0.0f); + for (int c = 0; c < 4; ++c) { + int ch = i * 4 + c; + bias_value[c] = ch < weights.shape.o ? biases.data[ch] : 0.0f; + } + gpu_data.push_back(bias_value); + } + RETURN_IF_ERROR(CreateReadOnlyBuffer(sizeof(half4) * gpu_data.size(), + gpu_data.data(), context, + &weights_buffer)); } + + args_.AddObject("weights", AccessType::READ, + absl::make_unique(std::move(weights_buffer)), + absl::make_unique(desc)); + + return absl::OkStatus(); } template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc index 1d25605582a..82d4492866d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc @@ -43,7 +43,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3ThinSimpleWeights) { attr.weights.shape = OHWI(1, 3, 3, 1); attr.weights.data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; attr.bias.shape = Linear(2); - attr.bias.data = {0.0f}; + attr.bias.data = {0.0f, 0.0f}; for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { From 86caeb05ba53d568df499f099245d581f1b8b5ca Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 14:42:11 -0700 Subject: [PATCH 41/66] Ignore other graph inputs in custom gradient Though the inputs should not be valid we ignore them to make it easier for v1 code to migrate to custom gradients. PiperOrigin-RevId: 317942016 Change-Id: I374b97097cdd44093132f93f7098c8c52c877c90 --- tensorflow/python/ops/custom_gradient.py | 32 +++--------------------- tensorflow/python/ops/gradients_test.py | 21 ---------------- 2 files changed, 4 insertions(+), 49 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index ed666840436..5f4ee055621 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -336,38 +336,15 @@ def _graph_mode_decorator(f, args, kwargs): "All variables used by a function wrapped with @custom_gradient must " "be `ResourceVariable`s. Ensure that no `variable_scope` is created " "with `use_resource=False`.") - - # It is possible for the caller to pass in an input that is from a different - # graph. Even though this is not valid we filter these out if they are not - # from the output graph to make it easier for some code to migrate to custom - # gradients. - inputs = nest.flatten(args) - outputs = nest.flatten(result) - graphs = {getattr(o, "graph", None) for o in outputs} - # Not all results may be tensors. However, we want to ensure that all outputs - # are from the same graph and use that to filter the inputs. - graphs.discard(None) # Discard non-graph outputs - if graphs: - if len(graphs) > 1: - raise ValueError("All graph outputs should be from the same graph") - output_graph = graphs.pop() - filtered_inputs = [] - for i in inputs: - if i.graph != output_graph: - logging.warn("%s does not belong to output graph %s", i, output_graph) - else: - filtered_inputs.append(i) - - inputs = filtered_inputs - # The variables that grad_fn needs to return gradients for are the set of # variables used that are *not* part of the inputs. + inputs = args variables_in_tape = frozenset([ v.ref() for v in variable_watcher.watched_variables() ]) - frozenset(v.ref() for v in inputs) variables_in_subgraph = frozenset([ v.ref() - for v in get_dependent_variables(input_ops=inputs, output_ops=outputs) + for v in get_dependent_variables(input_ops=inputs, output_ops=result) ]) variables = list( [v.deref() for v in variables_in_subgraph.union(variables_in_tape)]) @@ -386,7 +363,7 @@ def _graph_mode_decorator(f, args, kwargs): flat_result = nest.flatten(result) flat_result_len = len(flat_result) - all_tensors = flat_result + inputs + variables + all_tensors = flat_result + args + variables def tape_grad_fn(*result_grads): """Custom grad fn wrapper.""" @@ -538,8 +515,7 @@ def recompute_grad(f): def transpose(*t_args, **t_kwargs): """Gradient function calculation for forward mode autodiff.""" - # Just throw an error since gradients / activations are not stored on - # tape for recompute. + # Just throw an error since gradients / activations are not stored on tape for recompute. raise NotImplementedError( "recompute_grad tried to transpose grad of {}. " "Consider not using recompute_grad in forward mode" diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 158253d1aab..fc5f38aedba 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -1197,27 +1197,6 @@ class CustomGradientTest(test_util.TensorFlowTestCase, parameterized.TestCase): dw = sess.run(math_ops.reduce_sum(grads[1])) self.assertEqual(12., dw) - def testCustomGradientOtherGraphVariables(self): - with ops.Graph().as_default(): - v = variables.Variable(1.0) - - @custom_gradient.custom_gradient - def MyMultiply(x1, x2, unused_y): - result = x1 * x2 - - def Grad(dy): - # Switched the ordering here. - return [dy * x1, dy * x2] - - return result, Grad - - with ops.Graph().as_default(): - x1 = constant(3.) - x2 = constant(5.) - y = MyMultiply(x1, x2, v) - dy = gradients.gradients(y, [x1, x2]) - self.assertAllEqual([3., 5.], self.evaluate(dy)) - def testCustomGradientWithVariablesNoFalsePositives(self): @custom_gradient.custom_gradient From 7fbee9ba492298cbbaeefb483ca2369e78b32783 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 14:54:48 -0700 Subject: [PATCH 42/66] Remove unused `MakeTypeIndex<>()`. PiperOrigin-RevId: 317944111 Change-Id: I1dda4ebe9334b17bd8162d01977c4e8e7ff0c778 --- tensorflow/core/framework/type_index.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/core/framework/type_index.h b/tensorflow/core/framework/type_index.h index fcf68677a12..e8f715bebda 100644 --- a/tensorflow/core/framework/type_index.h +++ b/tensorflow/core/framework/type_index.h @@ -95,11 +95,6 @@ class TypeIndex { const char* name_; }; -template -inline TypeIndex MakeTypeIndex() { - return TypeIndex::Make(); -} - } // namespace tensorflow #endif // TENSORFLOW_CORE_FRAMEWORK_TYPE_INDEX_H_ From 06a954905bfa5ff23cd23330880d7598558ffcb0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 15:07:18 -0700 Subject: [PATCH 43/66] Internal change PiperOrigin-RevId: 317946737 Change-Id: I39415c323b1d612d14f7a165ce6b188c4b84143d --- tensorflow/python/profiler/internal/python_hooks.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/profiler/internal/python_hooks.cc b/tensorflow/python/profiler/internal/python_hooks.cc index 73bc3731290..7ad15cd921d 100644 --- a/tensorflow/python/profiler/internal/python_hooks.cc +++ b/tensorflow/python/profiler/internal/python_hooks.cc @@ -200,8 +200,12 @@ void PythonHooks::ClearProfilerInAllThreads() { void PythonHooks::EnableTraceMe(bool enable) { const char* kModuleName = "tensorflow.python.profiler.trace"; - auto trace_module = py::module::import(kModuleName); - trace_module.attr("enabled") = py::bool_(enable); + try { + auto trace_module = py::module::import(kModuleName); + trace_module.attr("enabled") = py::bool_(enable); + } catch (const py::error_already_set& e) { + LOG(ERROR) << "Can't import " << kModuleName; + } } } // namespace profiler From 8535dafb37ec4ce5c7272ffa4b8b4c491d44e999 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 15:27:04 -0700 Subject: [PATCH 44/66] Internal change PiperOrigin-RevId: 317950322 Change-Id: I83c81973a220b74c015a8571c4f1d50b4ede91db --- tensorflow/python/framework/func_graph.py | 2 +- .../experimental/autocast_variable.py | 103 +++++++++--------- .../experimental/autocast_variable_test.py | 31 ++---- 3 files changed, 58 insertions(+), 78 deletions(-) diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py index 55a1a358458..e8e8fcbf081 100644 --- a/tensorflow/python/framework/func_graph.py +++ b/tensorflow/python/framework/func_graph.py @@ -187,7 +187,7 @@ class FuncGraph(ops.Graph): self.inputs = [] self.outputs = [] self.control_outputs = [] - self.control_captures = object_identity.ObjectIdentitySet() + self.control_captures = set() self.structured_input_signature = None self.structured_outputs = None self._weak_variables = [] diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py index b4415fb7f78..57e8ced65a0 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py @@ -188,87 +188,61 @@ class AutoCastVariable(variables.Variable, core.Tensor): def constraint(self): return self._variable.constraint - def _apply_assign_update(self, - update_fn, - value, - use_locking=None, - name=None, - read_value=True): - if ops.executing_eagerly_outside_functions(): - assign_op = update_fn(value, use_locking, name, False) - return self if read_value else assign_op - - # Fallback to wrapping the returned variable in graph mode if possible - assign_var = update_fn(value, use_locking, name, read_value) - if read_value and resource_variable_ops.is_resource_variable(assign_var): - return create_autocast_variable(assign_var) - return assign_var - - def _apply_update(self, update_fn, *args, **kwargs): - update_var = update_fn(*args, **kwargs) - if ops.executing_eagerly_outside_functions(): - return self - - # Fallback to wrapping the returned variable in graph mode if possible - if resource_variable_ops.is_resource_variable(update_var): - return create_autocast_variable(update_var) - return update_var - def assign(self, value, use_locking=None, name=None, read_value=True): - return self._apply_assign_update(self._variable.assign, value, use_locking, - name, read_value) + assign_op = self._variable.assign(value, use_locking, name, read_value) + return _maybe_wrap(assign_op, wrap=read_value) def assign_add(self, delta, use_locking=None, name=None, read_value=True): - return self._apply_assign_update(self._variable.assign_add, delta, - use_locking, name, read_value) + assign_op = self._variable.assign_add(delta, use_locking, name, read_value) + return _maybe_wrap(assign_op, wrap=read_value) def assign_sub(self, delta, use_locking=None, name=None, read_value=True): - return self._apply_assign_update(self._variable.assign_sub, delta, - use_locking, name, read_value) + assign_op = self._variable.assign_sub(delta, use_locking, name, read_value) + return _maybe_wrap(assign_op, wrap=read_value) def scatter_sub(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_sub, sparse_delta, - use_locking, name) + var = self._variable.scatter_sub(sparse_delta, use_locking, name) + return _maybe_wrap(var) def scatter_add(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_add, sparse_delta, - use_locking, name) + var = self._variable.scatter_add(sparse_delta, use_locking, name) + return _maybe_wrap(var) def scatter_max(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_max, sparse_delta, - use_locking, name) + var = self._variable.scatter_max(sparse_delta, use_locking, name) + return _maybe_wrap(var) def scatter_min(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_min, sparse_delta, - use_locking, name) + var = self._variable.scatter_min(sparse_delta, use_locking, name) + return _maybe_wrap(var) def scatter_mul(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_mul, sparse_delta, - use_locking, name) + var = self._variable.scatter_mul(sparse_delta, use_locking, name) + return _maybe_wrap(var) def scatter_div(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_div, sparse_delta, - use_locking, name) + var = self._variable.scatter_div(sparse_delta, use_locking, name) + return _maybe_wrap(var) def scatter_update(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.scatter_update, sparse_delta, - use_locking, name) + var = self._variable.scatter_update(sparse_delta, use_locking, name) + return _maybe_wrap(var) def batch_scatter_update(self, sparse_delta, use_locking=False, name=None): - return self._apply_update(self._variable.batch_scatter_update, sparse_delta, - use_locking, name) + var = self._variable.batch_scatter_update(sparse_delta, use_locking, name) + return _maybe_wrap(var) def scatter_nd_sub(self, indices, updates, name=None): - return self._apply_update(self._variable.scatter_nd_sub, indices, updates, - name) + var = self._variable.scatter_nd_sub(indices, updates, name) + return _maybe_wrap(var) def scatter_nd_add(self, indices, updates, name=None): - return self._apply_update(self._variable.scatter_nd_add, indices, updates, - name) + var = self._variable.scatter_nd_add(indices, updates, name) + return _maybe_wrap(var) def scatter_nd_update(self, indices, updates, name=None): - return self._apply_update(self._variable.scatter_nd_update, indices, - updates, name) + var = self._variable.scatter_nd_update(indices, updates, name) + return _maybe_wrap(var) def load(self, value, session=None): return self._variable.load(value, session) @@ -495,3 +469,24 @@ def create_autocast_variable(variable): # pylint: enable=missing-format-attribute return AutoCastDistributedVariable(variable) + + +def _maybe_wrap(variable, wrap=True): + """Creates an AutoCastVariable that wraps another variable if applicable. + + This function is used to wrap the return value of AutoCastVariable.assign. + Unfortunately MirroredVariable.assign will (incorrectly) return a Mirrored + value instead of a MirroredVariable. So we cannot properly wrap it in an + AutoCastVariable. We return the original variable in that case. + + Args: + variable: A tf.Variable or op. + wrap: A boolean to define whether to wrap the variable in an + AutoCastVariable or not. + + Returns: + An AutoCastVariable if wrap is True and variable is a resource variable. + """ + if wrap and resource_variable_ops.is_resource_variable(variable): + return create_autocast_variable(variable) + return variable diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py index ad5d782c2c6..964118136d4 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py @@ -305,8 +305,8 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertAllClose(3., self.evaluate(x.assign_sub(3.))) # Assign multiple times - # This currently doesn't work in graph mode if a strategy is used - if not ds_context.has_strategy() or context.executing_eagerly(): + # This currently only works if no strategy is used + if not ds_context.has_strategy(): assign = x.assign(1.) self.assertAllClose(1., self.evaluate(assign)) self.assertAllClose(0., self.evaluate(assign.assign(0.))) @@ -344,23 +344,6 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): # assign still expect float32 value even if in float16 scope run_and_check() - @combinations.generate(maybe_distribute) - def test_assign_tf_function(self, distribution): - if not context.executing_eagerly(): - self.skipTest('Test is not compatible with graph mode') - - with distribution.scope(): - x = get_var(0., dtypes.float32) - x = autocast_variable.create_autocast_variable(x) - - @def_function.function - def run_assign(): - return x.assign(1.).assign_add(3.).assign_add(3.).assign_sub(2.) - - with ops.get_default_graph()._enable_auto_casting_variables( - dtypes.float16): - self.assertAllClose(5., self.evaluate(run_assign())) - @combinations.generate(maybe_distribute) def test_assign_stays_in_true_dtype(self, distribution): with distribution.scope(): @@ -375,16 +358,18 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): dtypes.float16): # Variable should be increased, despite it appearing to be the same # float16 value. - self.evaluate(x.assign(1. + small_tensor)) + self.assertEqual(1. + small_val, + self.evaluate(x.assign(1. + small_tensor))) self.assertEqual(1., self.evaluate(x.value())) - self.assertEqual(1. + small_val, self.evaluate(x)) + self.assertEqual(1. + small_val, self.evaluate(x.value())) self.evaluate(x.assign(1.)) with ops.get_default_graph()._enable_auto_casting_variables( dtypes.float16): - self.evaluate(x.assign_add(small_tensor)) + self.assertEqual(1. + small_val, + self.evaluate(x.assign_add(small_tensor))) self.assertEqual(1., self.evaluate(x.value())) - self.assertEqual(1. + small_val, self.evaluate(x)) + self.assertEqual(1. + small_val, self.evaluate(x.value())) @combinations.generate(maybe_distribute) def test_checkpoint(self, distribution): From ab05b8d7776e04e6e483c5b0bc7d7358df3ec967 Mon Sep 17 00:00:00 2001 From: Dero Gharibian Date: Tue, 23 Jun 2020 15:30:46 -0700 Subject: [PATCH 45/66] Replaced extern inline with static inline to mitigate duplicate symbols in cgo PiperOrigin-RevId: 317950897 Change-Id: Ia3cb17d5946a969187d8f1a81ff4c77844dcde3a --- tensorflow/core/platform/ctstring_internal.h | 32 ++++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/platform/ctstring_internal.h b/tensorflow/core/platform/ctstring_internal.h index 69338e6e4b7..f75fd04f955 100644 --- a/tensorflow/core/platform/ctstring_internal.h +++ b/tensorflow/core/platform/ctstring_internal.h @@ -136,7 +136,7 @@ typedef struct TF_TString { // NOLINT // _Static_assert(CHAR_BIT == 8); // _Static_assert(sizeof(TF_TString) == 24); -extern inline TF_TString_Type TF_TString_GetType(const TF_TString *str) { +static inline TF_TString_Type TF_TString_GetType(const TF_TString *str) { return (TF_TString_Type)(str->u.raw.raw[0] & TF_TSTR_TYPE_MASK); // NOLINT } @@ -168,12 +168,12 @@ static inline size_t TF_TString_ToInternalSizeT(size_t size, #endif // TF_TSTRING_LITTLE_ENDIAN } -extern inline void TF_TString_Init(TF_TString *str) { +static inline void TF_TString_Init(TF_TString *str) { str->u.smll.size = 0; str->u.smll.str[0] = '\0'; } -extern inline void TF_TString_Dealloc(TF_TString *str) { +static inline void TF_TString_Dealloc(TF_TString *str) { if (TF_TString_GetType(str) == TF_TSTR_LARGE && str->u.large.ptr != NULL) { // NOLINT free(str->u.large.ptr); @@ -181,7 +181,7 @@ extern inline void TF_TString_Dealloc(TF_TString *str) { } } -extern inline size_t TF_TString_GetSize(const TF_TString *str) { +static inline size_t TF_TString_GetSize(const TF_TString *str) { switch (TF_TString_GetType(str)) { case TF_TSTR_SMALL: return str->u.smll.size >> 2; @@ -196,7 +196,7 @@ extern inline size_t TF_TString_GetSize(const TF_TString *str) { } } -extern inline size_t TF_TString_GetCapacity(const TF_TString *str) { +static inline size_t TF_TString_GetCapacity(const TF_TString *str) { switch (TF_TString_GetType(str)) { case TF_TSTR_SMALL: return TF_TString_SmallCapacity; @@ -209,7 +209,7 @@ extern inline size_t TF_TString_GetCapacity(const TF_TString *str) { } } -extern inline const char *TF_TString_GetDataPointer(const TF_TString *str) { +static inline const char *TF_TString_GetDataPointer(const TF_TString *str) { switch (TF_TString_GetType(str)) { case TF_TSTR_SMALL: return str->u.smll.str; @@ -225,7 +225,7 @@ extern inline const char *TF_TString_GetDataPointer(const TF_TString *str) { } } -extern inline char *TF_TString_ResizeUninitialized(TF_TString *str, +static inline char *TF_TString_ResizeUninitialized(TF_TString *str, size_t new_size) { size_t curr_size = TF_TString_GetSize(str); size_t copy_size = TF_min(new_size, curr_size); @@ -288,7 +288,7 @@ extern inline char *TF_TString_ResizeUninitialized(TF_TString *str, return str->u.large.ptr; } -extern inline char *TF_TString_GetMutableDataPointer(TF_TString *str) { +static inline char *TF_TString_GetMutableDataPointer(TF_TString *str) { switch (TF_TString_GetType(str)) { case TF_TSTR_SMALL: return str->u.smll.str; @@ -306,7 +306,7 @@ extern inline char *TF_TString_GetMutableDataPointer(TF_TString *str) { } } -extern inline void TF_TString_Reserve(TF_TString *str, size_t new_cap) { +static inline void TF_TString_Reserve(TF_TString *str, size_t new_cap) { TF_TString_Type curr_type = TF_TString_GetType(str); if (new_cap <= TF_TString_SmallCapacity) { @@ -347,7 +347,7 @@ extern inline void TF_TString_Reserve(TF_TString *str, size_t new_cap) { str->u.large.cap = new_cap; } -extern inline char *TF_TString_Resize(TF_TString *str, size_t new_size, +static inline char *TF_TString_Resize(TF_TString *str, size_t new_size, char c) { size_t curr_size = TF_TString_GetSize(str); char *cstr = TF_TString_ResizeUninitialized(str, new_size); @@ -359,7 +359,7 @@ extern inline char *TF_TString_Resize(TF_TString *str, size_t new_size, return cstr; } -extern inline void TF_TString_AssignView(TF_TString *dst, const char *src, +static inline void TF_TString_AssignView(TF_TString *dst, const char *src, size_t size) { TF_TString_Dealloc(dst); @@ -367,7 +367,7 @@ extern inline void TF_TString_AssignView(TF_TString *dst, const char *src, dst->u.view.ptr = src; } -extern inline void TF_TString_AppendN(TF_TString *dst, const char *src, +static inline void TF_TString_AppendN(TF_TString *dst, const char *src, size_t src_size) { if (!src_size) return; @@ -378,21 +378,21 @@ extern inline void TF_TString_AppendN(TF_TString *dst, const char *src, memcpy(dst_c + dst_size, src, src_size); } -extern inline void TF_TString_Append(TF_TString *dst, const TF_TString *src) { +static inline void TF_TString_Append(TF_TString *dst, const TF_TString *src) { const char *src_c = TF_TString_GetDataPointer(src); size_t size = TF_TString_GetSize(src); TF_TString_AppendN(dst, src_c, size); } -extern inline void TF_TString_Copy(TF_TString *dst, const char *src, +static inline void TF_TString_Copy(TF_TString *dst, const char *src, size_t size) { char *dst_c = TF_TString_ResizeUninitialized(dst, size); if (size) memcpy(dst_c, src, size); } -extern inline void TF_TString_Assign(TF_TString *dst, const TF_TString *src) { +static inline void TF_TString_Assign(TF_TString *dst, const TF_TString *src) { if (dst == src) return; TF_TString_Dealloc(dst); @@ -421,7 +421,7 @@ extern inline void TF_TString_Assign(TF_TString *dst, const TF_TString *src) { } } -extern inline void TF_TString_Move(TF_TString *dst, TF_TString *src) { +static inline void TF_TString_Move(TF_TString *dst, TF_TString *src) { if (dst == src) return; TF_TString_Dealloc(dst); From 83b4d04ae2621456172aaf7fa0fa54aea6fb2e81 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 23 Jun 2020 15:39:59 -0700 Subject: [PATCH 46/66] [XLA] Evaluate tf.sign for real arguments with fewer operations We can evaluate it as: (x > 0) - (x < 0) which should be cheaper than: x != x ? 0 : sign(x) PiperOrigin-RevId: 317952523 Change-Id: I7b848497c9ceedb8aba10185cdba8d9c3d3d6a3d --- tensorflow/compiler/tf2xla/kernels/unary_ops.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc index 405c5e787da..66545fc72cf 100644 --- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/lib/constants.h" #include "tensorflow/compiler/xla/client/lib/math.h" #include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/primitive_util.h" #include "tensorflow/core/framework/kernel_def_builder.h" namespace tensorflow { @@ -85,8 +86,20 @@ XLAJIT_MAKE_UNARY(Rsqrt, xla::Rsqrt(x)); XLAJIT_MAKE_UNARY(Sigmoid, xla::Logistic(x)); // Returns 0 if x is NaN, 0 if x is 0, -1 if x < 0 and 1 if x > 0. -XLAJIT_MAKE_UNARY(Sign, - xla::Select(xla::Ne(x, x), xla::ZerosLike(x), xla::Sign(x))); +static xla::XlaOp Sign(xla::XlaBuilder* b, xla::XlaOp x) { + return b->ReportErrorOrReturn([&]() -> xla::StatusOr { + TF_ASSIGN_OR_RETURN(auto shape, b->GetShape(x)); + if (xla::primitive_util::IsComplexType(shape.element_type())) { + return xla::Sign(x); + } + auto gt = xla::Gt(x, xla::ZerosLike(x)); + auto lt = xla::Lt(x, xla::ZerosLike(x)); + return xla::ConvertElementType(gt, shape.element_type()) - + xla::ConvertElementType(lt, shape.element_type()); + }); +} + +XLAJIT_MAKE_UNARY(Sign, Sign(b, x)); XLAJIT_MAKE_UNARY(Sinh, xla::Sinh(x)); static xla::XlaOp Softplus(xla::XlaBuilder* b, xla::XlaOp features) { From a912655abd2f8b55441c2a8396c2580ceee07a29 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Tue, 23 Jun 2020 15:58:49 -0700 Subject: [PATCH 47/66] Fix a heap-use-after-free issue. PiperOrigin-RevId: 317955770 Change-Id: I843e4bcd9b5cac3c22893d4e0e9aa6867e18a8c4 --- tensorflow/core/common_runtime/eager/core.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/eager/core.cc b/tensorflow/core/common_runtime/eager/core.cc index 77d2b665f5e..0191527748b 100644 --- a/tensorflow/core/common_runtime/eager/core.cc +++ b/tensorflow/core/common_runtime/eager/core.cc @@ -197,7 +197,7 @@ Status EagerOperation::Execute(absl::Span retvals, if (device == kVariantDeviceNull) { bool pin_to_cpu; TF_RETURN_IF_ERROR(eager::MaybePinSmallOpsToCpu( - &pin_to_cpu, op_name(), + &pin_to_cpu, Name(), absl::MakeSpan( reinterpret_cast(inputs_.data()), inputs_.size()), From a24767dcaeac10dd87b01ac4de27f0f7ff1e3c55 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 23 Jun 2020 16:20:31 -0700 Subject: [PATCH 48/66] Skip testClusterResolverProperty for TPU cases and follow up with a fix. PiperOrigin-RevId: 317959686 Change-Id: I6ad671e2a5b03886e24d5db88d2cf57db35b3bd1 --- tensorflow/python/distribute/strategy_common_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/distribute/strategy_common_test.py b/tensorflow/python/distribute/strategy_common_test.py index 7070fbbf18f..7744364c544 100644 --- a/tensorflow/python/distribute/strategy_common_test.py +++ b/tensorflow/python/distribute/strategy_common_test.py @@ -156,6 +156,8 @@ class StrategyClusterResolverTest(test.TestCase, parameterized.TestCase): with strategy.scope(): self.assertIs(strategy.cluster_resolver, resolver) self.assertTrue(hasattr(resolver, 'cluster_spec')) + if isinstance(strategy, TPUStrategy): + self.skipTest('b/159747888') self.assertTrue(hasattr(resolver, 'environment')) self.assertTrue(hasattr(resolver, 'master')) self.assertTrue(hasattr(resolver, 'num_accelerators')) From dfe03768e01a3488e57b428e4c7f02ede66af555 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Tue, 23 Jun 2020 16:50:55 -0700 Subject: [PATCH 49/66] Enhance the docstring for tf.distribute.Stategy.reduce API. PiperOrigin-RevId: 317965125 Change-Id: I46ce4c2e6a8d547d9d26c01ccb27b25394f1dc7d --- .../python/distribute/distribute_lib.py | 91 ++++++++++++++++--- 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index f32427b88e0..d7893ae54f8 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -1217,20 +1217,85 @@ class StrategyBase(object): return self.run(fn, args=args, kwargs=kwargs, options=options) def reduce(self, reduce_op, value, axis): - """Reduce `value` across replicas. + """Reduce `value` across replicas and return result on current device. + + >>> strategy = tf.distribute.MirroredStrategy() + >>> def step_fn(): + ... i = tf.distribute.get_replica_context().replica_id_in_sync_group + ... return tf.identity(i) + >>> + >>> per_replica_result = strategy.run(step_fn) + >>> total = strategy.reduce("SUM", per_replica_result, axis=None) + >>> total + + + To see how this would look with multiple replicas, consider the same + example with MirroredStrategy with 2 GPUs: + + ```python + strategy = tf.distribute.MirroredStrategy(devices=["gpu:0", "gpu:1"]) + def step_fn(): + i = tf.distribute.get_replica_context().replica_id_in_sync_group + return tf.identity(i) + + per_replica_result = strategy.run(step_fn) + # Check devices on which per replica result is: + strategy.experimental_local_results(per_replica_result)[0].device + # /job:localhost/replica:0/task:0/device:GPU:0 + strategy.experimental_local_results(per_replica_result)[1].device + # /job:localhost/replica:0/task:0/device:GPU:1 + + total = strategy.reduce("SUM", per_replica_result, axis=None) + # Check device on which reduced result is: + total.device + # /job:localhost/replica:0/task:0/device:CPU:0 + + ``` + + This API is typically used for aggregating the results returned from + different replicas, for reporting etc. For example, loss computed from + different replicas can be averaged using this API before printing. + + Note: The result is copied to the "current" device - which would typically + be the CPU of the worker on which the program is running. For `TPUStrategy`, + it is the first TPU host. For multi client `MultiWorkerMirroredStrategy`, + this is CPU of each worker. + + There are a number of different tf.distribute APIs for reducing values + across replicas: + * `tf.distribute.ReplicaContext.all_reduce`: This differs from + `Strategy.reduce` in that it is for replica context and does + not copy the results to the host device. `all_reduce` should be typically + used for reductions inside the training step such as gradients. + * `tf.distribute.StrategyExtended.reduce_to` and + `tf.distribute.StrategyExtended.batch_reduce_to`: These APIs are more + advanced versions of `Strategy.reduce` as they allow customizing the + destination of the result. They are also called in cross replica context. + + _What should axis be?_ Given a per-replica value returned by `run`, say a per-example loss, the batch will be divided across all the replicas. This function allows you to aggregate across replicas and optionally also across - batch elements. For example, if you have a global batch size of 8 and 2 + batch elements by specifying the axis parameter accordingly. + + For example, if you have a global batch size of 8 and 2 replicas, values for examples `[0, 1, 2, 3]` will be on replica 0 and - `[4, 5, 6, 7]` will be on replica 1. By default, `reduce` will just - aggregate across replicas, returning `[0+4, 1+5, 2+6, 3+7]`. This is useful - when each replica is computing a scalar or some other value that doesn't - have a "batch" dimension (like a gradient). More often you will want to - aggregate across the global batch, which you can get by specifying the batch + `[4, 5, 6, 7]` will be on replica 1. With `axis=None`, `reduce` will + aggregate only across replicas, returning `[0+4, 1+5, 2+6, 3+7]`. + This is useful when each replica is computing a scalar or some other value + that doesn't have a "batch" dimension (like a gradient or loss). + ``` + strategy.reduce("sum", per_replica_result, axis=None) + ``` + + Sometimes, you will want to aggregate across both the global batch _and_ + all replicas. You can get this behavior by specifying the batch dimension as the `axis`, typically `axis=0`. In this case it would return a scalar `0+1+2+3+4+5+6+7`. + ``` + strategy.reduce("sum", per_replica_result, axis=0) + ``` If there is a last partial batch, you will need to specify an axis so that the resulting shape is consistent across replicas. So if the last @@ -1242,11 +1307,13 @@ class StrategyBase(object): which will weigh some values `1/8` and others `1/4`. Args: - reduce_op: A `tf.distribute.ReduceOp` value specifying how values should - be combined. - value: A "per replica" value, e.g. returned by `run` to - be combined into a single tensor. - axis: Specifies the dimension to reduce along within each + reduce_op: a `tf.distribute.ReduceOp` value specifying how values should + be combined. Allows using string representation of the enum such as + "SUM", "MEAN". + value: a `tf.distribute.DistributeValues` instance, e.g. returned by + `Strategy.run`, to be combined into a single tensor. It can also be a + regular tensor when used with `OneDeviceStrategy` or default strategy. + axis: specifies the dimension to reduce along within each replica's tensor. Should typically be set to the batch dimension, or `None` to only reduce across replicas (e.g. if the tensor has no batch dimension). From 7211f4c2b12fb0e4f4ce24e710900048c8a322a4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 17:13:18 -0700 Subject: [PATCH 50/66] Add the "--define=no_tensorflow_py_deps=true" flag for the windows cpu release builds. PiperOrigin-RevId: 317968971 Change-Id: I7d4db21474d85620928f3a5ffb1e4cfebaa2be9f --- .../tools/ci_build/release/windows/cpu_py35_full/release.bat | 2 +- .../tools/ci_build/release/windows/cpu_py36_full/release.bat | 2 +- .../tools/ci_build/release/windows/cpu_py37_full/release.bat | 2 +- .../tools/ci_build/release/windows/cpu_py38_full/release.bat | 3 ++- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat index bd8c217ddef..02b12c7650a 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat index 0a81a90a431..e44e6ca6e18 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat index 9591d7aac34..c65167a5dc6 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat index 7a7435b3713..06599fc0d8c 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat @@ -17,4 +17,5 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" + From 4dd7002d5697d729e281d3b05a140088361690e2 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 23 Jun 2020 17:20:39 -0700 Subject: [PATCH 51/66] MultiProcessRunner: Add more information regarding UnexpectedSubprocessExitError. PiperOrigin-RevId: 317970123 Change-Id: Ie2aff422fc7eff2bd48b6a82fab34e4b0c0bb930 --- .../python/distribute/multi_process_runner.py | 59 ++++++++++++++++--- .../distribute/multi_process_runner_test.py | 50 ++++++++++++++++ 2 files changed, 101 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index 84b61be1ea2..4971eea93ad 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -144,6 +144,9 @@ class MultiProcessRunner(object): `signal.alarm()` api. Note that this is best effort at Python level since Python signal handler does not get executed when it runs lower level C/C++ code. So it can be delayed for arbitrarily long time. + If any of the child process is still running when `max_run_time` is up, + they will be force-terminated and a `UnexpectedSubprocessExitError` + may be raised at `join()`. grpc_fail_fast: Whether GRPC connection between processes should fail without retrying. Defaults to None, in which case the environment variable is not explicitly set. @@ -450,11 +453,19 @@ class MultiProcessRunner(object): from subprocesses' stdout and stderr. Raises: - SubprocessTimeoutError: if not all processes report status approximatelty - within `timeout` seconds. When this is raised, a - `MultiProcessRunnerResult` object can be retrieved by - `SubprocessTimeoutError`'s mpr_result attribute, which has the same - structure as above 'Returns' section describes. + SubprocessTimeoutError: if not all processes report status approximately + within `timeout` seconds. When this is raised, a + `MultiProcessRunnerResult` object can be retrieved by + `SubprocessTimeoutError`'s mpr_result attribute, which has the same + structure as above 'Returns' section describes. + UnexpectedSubprocessExitError: If any of the subprocesses did not exit + properly (for example, they exit on SIGTERM or SIGKILL signal). When + this is raised, a `MultiProcessRunnerResult` object can be retrieved by + `UnexpectedSubprocessExitError`'s mpr_result attribute, which has the + same structure as above 'Returns' section describes. If `max_run_time` + is not `None`, it is expected that some subprocesses may be + force-killed when `max_run_time` is up, and this is raised in those + cases. Exception: if there is an Exception propagated from any subprocess. """ if self._joined: @@ -478,14 +489,28 @@ class MultiProcessRunner(object): process_statuses = self._queue_to_list(self._process_status_queue) if not self._all_forced_terminated and len( process_statuses) != self._outstanding_subprocess_count: - raise RuntimeError( - 'missing statuses from %d subproceses.' % - (self._outstanding_subprocess_count - len(process_statuses))) + raise UnexpectedSubprocessExitError( + 'Missing status(es) from %d subprocess(es). See logs for details.' % + (self._outstanding_subprocess_count - len(process_statuses)), + self._get_mpr_result(process_statuses)) for process_status in process_statuses: assert isinstance(process_status, _ProcessStatusInfo) if not process_status.is_successful: six.reraise(*process_status.exc_info) + # Checking all the processes that are expected to exit properly. + for (task_type, task_id), p in self._processes.items(): + if self._dependence_on_chief and task_type != 'chief': + # If _dependence_on_chief, other processes may have been + # forced-terminated, which is expected. + continue + # Successfully exiting process has exit code 0. + if p.exitcode > 0: + raise UnexpectedSubprocessExitError( + 'Subprocess %s-%d exited with exit code %d. See logs for details.' % + (task_type, task_id, p.exitcode), + self._get_mpr_result(process_statuses)) + logging.info('Joining log reading threads.') for thread in self._reading_threads: thread.join() @@ -521,6 +546,8 @@ class MultiProcessRunner(object): for (task_type, task_id), p in self._processes.items(): try: os.kill(p.pid, sig) + logging.info('%s-%d terminated with signal %r.', task_type, task_id, + sig) except ProcessLookupError: logging.info('Attempting to kill %s-%d but it does not exist.', task_type, task_id) @@ -658,6 +685,9 @@ class _ProcFunc(object): self._close_streaming() + # Exit with code 0 as it's considered successful exit at this point. + sys.exit(0) + class MultiProcessPoolRunner(object): """A utility class to start a process pool to simulate a cluster. @@ -848,6 +878,19 @@ class SubprocessTimeoutError(RuntimeError): self.mpr_result = mpr_result +class UnexpectedSubprocessExitError(RuntimeError): + """An error indicating there is at least one subprocess with unexpected exit. + + When this is raised, a `MultiProcessRunnerResult` object can be retrieved by + `UnexpectedSubprocessExitError`'s mpr_result attribute. See + `MultiProcessRunner.join()` for more information. + """ + + def __init__(self, msg, mpr_result): + super(UnexpectedSubprocessExitError, self).__init__(msg) + self.mpr_result = mpr_result + + def _set_tf_config(task_type, task_id, cluster_spec, rpc_layer=None): """Set TF_CONFIG environment variable.""" tf_config_dict = { diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index 32d3ae6c84e..acec6d0c999 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -18,8 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import ctypes import json import os +import sys import threading import time import unittest @@ -314,7 +316,55 @@ class MultiProcessRunnerTest(test.TestCase): self.assertTrue( any('something printed' in line for line in list_to_assert)) + def test_seg_fault_raises_error(self): + def proc_func_expected_to_seg_fault(): + ctypes.string_at(0) # Intentionally made seg fault. + + with self.assertRaises( + multi_process_runner.UnexpectedSubprocessExitError) as cm: + multi_process_runner.run( + proc_func_expected_to_seg_fault, + multi_worker_test_base.create_cluster_spec(num_workers=1), + list_stdout=True) + self.assertIn('Missing status(es) from 1 subprocess(es).', + str(cm.exception)) + list_to_assert = cm.exception.mpr_result.stdout + self.assertTrue(any('SIGSEGV' in line for line in list_to_assert)) + + def test_seg_fault_in_chief_raises_error(self): + + def proc_func_expected_to_seg_fault(): + if multi_worker_test_base.get_task_type() == 'worker': + time.sleep(10000) + ctypes.string_at(0) # Intentionally made seg fault. + + with self.assertRaises( + multi_process_runner.UnexpectedSubprocessExitError) as cm: + multi_process_runner.run( + proc_func_expected_to_seg_fault, + multi_worker_test_base.create_cluster_spec( + has_chief=True, num_workers=1), + list_stdout=True) + self.assertIn('Subprocess chief-0 exited with exit code', + str(cm.exception)) + list_to_assert = cm.exception.mpr_result.stdout + self.assertTrue(any('SIGSEGV' in line for line in list_to_assert)) + + def test_non_zero_exit_code_raises_error(self): + + def proc_func_expected_to_exit_with_1(): + sys.exit(1) + + with self.assertRaises( + multi_process_runner.UnexpectedSubprocessExitError) as cm: + multi_process_runner.run( + proc_func_expected_to_exit_with_1, + multi_worker_test_base.create_cluster_spec(num_workers=1)) + self.assertIn('Missing status(es) from 1 subprocess(es).', + str(cm.exception)) + + class MultiProcessPoolRunnerTest(test.TestCase): def test_same_process_across_runs(self): From 422825f1a904b0cf0b82ccf804af7c433ca6b56a Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 23 Jun 2020 17:31:14 -0700 Subject: [PATCH 52/66] Fix Markdown table format to dispay correctly on GitHub GitHub requires a leading | for tables. PiperOrigin-RevId: 317971572 Change-Id: I0b0860e143d21fb8fa52a8421fa62b43fa9bfd04 --- tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md b/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md index 06c55abf1fa..2fe109c1783 100644 --- a/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md +++ b/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md @@ -24,10 +24,10 @@ the codegen input. ## Tasks - | Host | Device -------------- | ------------------------ | ------------------------ -Input format | HloInstruction* (Task 1) | HloInstruction* (Task 1) -Output format | xla::Thunk (Task 2) | LLVM IR (Task 3) +| | Host | Device +| ------------- | ------------------------ | ------------------------ +| Input format | HloInstruction* (Task 1) | HloInstruction* (Task 1) +| Output format | xla::Thunk (Task 2) | LLVM IR (Task 3) * **Task 1** changes both host and device input format from HloInstruction* to LHLO. From 5a5679c8aa3645aae5a47582f40f6697a04efa9a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 17:32:44 -0700 Subject: [PATCH 53/66] when python is not initialized , do nothing in python hooks. PiperOrigin-RevId: 317971811 Change-Id: Ib73f11e1c2a88dee6f11105c2ae8ab20599703a6 --- tensorflow/python/profiler/internal/python_hooks.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/profiler/internal/python_hooks.cc b/tensorflow/python/profiler/internal/python_hooks.cc index 7ad15cd921d..33e182f8de0 100644 --- a/tensorflow/python/profiler/internal/python_hooks.cc +++ b/tensorflow/python/profiler/internal/python_hooks.cc @@ -46,7 +46,7 @@ PythonHooks* PythonHooks::GetSingleton() { } void PythonHooks::Start(const PythonHooksOptions& option) { - DCHECK(Py_IsInitialized()); + if (!Py_IsInitialized()) return; if (option.enable_python_traceme || option.enable_trace_python_function) { PyGILState_STATE gil_state = PyGILState_Ensure(); if (option.enable_trace_python_function) { From 3252c965ee399aa795522f9f383805dc4aaec68f Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Tue, 23 Jun 2020 17:36:05 -0700 Subject: [PATCH 54/66] Add input array shape instructions to Keras model. PiperOrigin-RevId: 317972245 Change-Id: I9863d2e6beda85e4c0d016db541bb4341e739bc9 --- tensorflow/lite/g3doc/convert/python_api.md | 22 +++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/g3doc/convert/python_api.md b/tensorflow/lite/g3doc/convert/python_api.md index 3171306af13..0c43a795514 100644 --- a/tensorflow/lite/g3doc/convert/python_api.md +++ b/tensorflow/lite/g3doc/convert/python_api.md @@ -30,8 +30,8 @@ This document contains [example usages](#examples) of the API and ### Converting a SavedModel The following example shows how to convert a -[SavedModel](https://www.tensorflow.org/guide/saved_model) into a -TensorFlow Lite [`FlatBuffer`](https://google.github.io/flatbuffers/). +[SavedModel](https://www.tensorflow.org/guide/saved_model) into a TensorFlow +Lite [`FlatBuffer`](https://google.github.io/flatbuffers/). ```python import tensorflow as tf @@ -97,6 +97,24 @@ with tf.io.gfile.GFile('model.tflite', 'wb') as f: f.write(tflite_model) ``` +If your model requires specifying the input shape, use `tf.keras.layers.Input` +or `tf.keras.layers.InputLayer` to create a Keras model with a fixed input shape +as seen below or use the [`from_concrete_functions`](#concrete_function) +classmethod as shown in the prior section to set the shape of the input arrays +prior to conversion. + +```python +input = tf.keras.layers.Input(shape=(1), batch_size=1) +dense_layer = tf.keras.layers.Dense(units=1, input_shape=[1]) +model = tf.keras.Model(input, dense_layer(input)) +``` + +```python +model = tf.keras.models.Sequential( + [tf.keras.layers.InputLayer(input_shape=(1), batch_size=1), + tf.keras.layers.Dense(units=1, input_shape=[1])]) +``` + ### Converting a concrete function The following example shows how to convert a TensorFlow From 7db333e5545ccd6784b2e752a95b8119769e6696 Mon Sep 17 00:00:00 2001 From: Lluis-Miquel Munguia Date: Tue, 23 Jun 2020 17:44:58 -0700 Subject: [PATCH 55/66] Internal code refactoring. PiperOrigin-RevId: 317973409 Change-Id: Ic249b4e1380313b6c556022dc78826c3165f1d3f --- tensorflow/core/grappler/costs/BUILD | 1 + .../grappler/costs/op_level_cost_estimator.cc | 364 ++++++++++-------- .../grappler/costs/op_level_cost_estimator.h | 3 + 3 files changed, 215 insertions(+), 153 deletions(-) diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 02a26cdd390..257d77541e0 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -323,6 +323,7 @@ cc_library( ":cost_estimator", ":op_context", ":utils", + "@com_google_absl//absl/strings", "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 6f57708a780..fb0d6ecf1d0 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h" +#include "absl/strings/match.h" #include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" @@ -23,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/clusters/utils.h" +#include "tensorflow/core/grappler/costs/op_context.h" #include "tensorflow/core/grappler/costs/utils.h" namespace tensorflow { @@ -101,16 +103,16 @@ static const Costs::Duration kMinComputeTime(1); namespace { -string GetDataFormat(const OpInfo& op_info) { - string data_format = "NHWC"; // Default format. +std::string GetDataFormat(const OpInfo& op_info) { + std::string data_format = "NHWC"; // Default format. if (op_info.attr().find("data_format") != op_info.attr().end()) { data_format = op_info.attr().at("data_format").s(); } return data_format; } -string GetFilterFormat(const OpInfo& op_info) { - string filter_format = "HWIO"; // Default format. +std::string GetFilterFormat(const OpInfo& op_info) { + std::string filter_format = "HWIO"; // Default format. if (op_info.attr().find("filter_format") != op_info.attr().end()) { filter_format = op_info.attr().at("filter_format").s(); } @@ -202,7 +204,7 @@ int64 CwiseOutputElementCount(const TensorShapeProto& input_shape_1, // Helper function for determining whether there are repeated indices in the // input Einsum equation. -bool CheckRepeatedDimensions(const string& dim_str) { +bool CheckRepeatedDimensions(const absl::string_view dim_str) { int str_size = dim_str.size(); for (int idx = 0; idx < str_size - 1; idx++) { if (dim_str.find(dim_str[idx], idx + 1) != std::string::npos) { @@ -212,6 +214,75 @@ bool CheckRepeatedDimensions(const string& dim_str) { return false; } +// Auxiliary function for determining whether OpLevelCostEstimator is compatible +// with a given Einsum. +bool IsEinsumCorrectlyFormed(const OpContext& einsum_context) { + const auto& op_info = einsum_context.op_info; + + auto it = op_info.attr().find("equation"); + if (it == op_info.attr().end()) return false; + const absl::string_view equation = it->second.s(); + std::vector equation_split = absl::StrSplit(equation, "->"); + + if (equation_split.empty()) { + LOG(WARNING) << "Einsum with malformed equation"; + return false; + } + std::vector input_split = + absl::StrSplit(equation_split[0], ','); + + // The current model covers Einsum operations with two operands and a RHS + if (op_info.inputs_size() != 2 || equation_split.size() != 2) { + VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); + return false; + } + const auto& a_input = op_info.inputs(0); + const auto& b_input = op_info.inputs(1); + absl::string_view rhs_str = equation_split[1]; + absl::string_view a_input_str = input_split[0]; + absl::string_view b_input_str = input_split[1]; + + // Ellipsis are not currently supported + if (absl::StrContains(a_input_str, "...") || + absl::StrContains(b_input_str, "...")) { + VLOG(1) << "Missing accurate estimator for op: " << op_info.op() + << ", ellipsis not supported"; + return false; + } + + constexpr int kMatrixRank = 2; + + bool a_input_shape_unknown = false; + bool b_input_shape_unknown = false; + + TensorShapeProto a_input_shape = MaybeGetMinimumShape( + a_input.shape(), std::max(kMatrixRank, a_input.shape().dim_size()), + &a_input_shape_unknown); + TensorShapeProto b_input_shape = MaybeGetMinimumShape( + b_input.shape(), std::max(kMatrixRank, b_input.shape().dim_size()), + &b_input_shape_unknown); + + if (a_input_str.size() != static_cast(a_input_shape.dim_size()) || + b_input_str.size() != static_cast(b_input_shape.dim_size())) { + VLOG(1) << "Missing accurate estimator for op: " << op_info.op() + << ", equation subscripts don't match tensor rank."; + return false; + } + + // Subscripts where axis appears more than once for a single input are not yet + // supported + if (CheckRepeatedDimensions(a_input_str) || + CheckRepeatedDimensions(b_input_str) || + CheckRepeatedDimensions(rhs_str)) { + VLOG(1) << "Missing accurate estimator for op: " << op_info.op() + << ", Subscripts where axis appears more than once for a single " + "input are not yet supported"; + return false; + } + + return true; +} + } // namespace // Return a minimum shape if the shape is unknown. If known, return the original @@ -528,7 +599,7 @@ DeviceInfo OpLevelCostEstimator::GetDeviceInfo( } } } else if (device.type() == "GPU") { - const string architecture = device.environment().at("architecture"); + const std::string architecture = device.environment().at("architecture"); int cores_per_multiprocessor; if (architecture < "3") { // Fermi @@ -695,7 +766,7 @@ OpLevelCostEstimator::ConvolutionDimensionsFromInputs( VLOG(2) << "Original filter shape: " << original_filter_shape.DebugString(); int x_index, y_index, major_channel_index, minor_channel_index = -1; - const string& data_format = GetDataFormat(op_info); + const std::string& data_format = GetDataFormat(op_info); if (data_format == "NCHW") { major_channel_index = 1; y_index = 2; @@ -712,7 +783,7 @@ OpLevelCostEstimator::ConvolutionDimensionsFromInputs( x_index = 2; major_channel_index = 3; } - const string& filter_format = GetFilterFormat(op_info); + const std::string& filter_format = GetFilterFormat(op_info); int filter_x_index, filter_y_index, in_major_channel_index, out_channel_index, in_minor_channel_index = -1; if (filter_format == "HWIO") { @@ -906,6 +977,130 @@ int64 OpLevelCostEstimator::CountMatMulOperations(const OpInfo& op_info, return ops; } +bool OpLevelCostEstimator::GenerateBatchMatmulContextFromEinsum( + const OpContext& einsum_context, OpContext* batch_matmul_context, + bool* found_unknown_shapes) const { + // This auxiliary function transforms an einsum OpContext into its equivalent + // Batch Matmul OpContext. The function returns a boolean, which determines + // whether it was successful in generating the output OpContext or not. + + // Einsum computes a generalized contraction between tensors of arbitrary + // dimension as defined by the equation written in the Einstein summation + // convention. The number of tensors in the computation and the number of + // contractions can be arbitrarily long. The current model only contemplates + // Einsum equations, which can be translated into a single BatchMatMul + // operation. Einsum operations with more than two operands are not currently + // supported. Subscripts where an axis appears more than once for a single + // input and ellipsis are currently also excluded. See: + // https://www.tensorflow.org/api_docs/python/tf/einsum + // We distinguish four kinds of dimensions, depending on their placement in + // the equation: + // + B: Batch dimensions: Dimensions which appear in both operands and RHS. + // + K: Contracting dimensions: These appear in both inputs but not RHS. + // + M: Operand A dimensions: These appear in the first operand and the RHS. + // + N: Operand B dimensions: These appear in the second operand and the RHS. + // Then, the operation to estimate is BatchMatMul([B,M,K],[B,K,N]) + + if (batch_matmul_context == nullptr) { + VLOG(1) << "Output context should not be a nullptr."; + return false; + } + if (!IsEinsumCorrectlyFormed(einsum_context)) return false; + const auto& op_info = einsum_context.op_info; + std::vector equation_split = + absl::StrSplit(op_info.attr().find("equation")->second.s(), "->"); + std::vector input_split = + absl::StrSplit(equation_split[0], ','); + const auto& a_input = op_info.inputs(0); + const auto& b_input = op_info.inputs(1); + absl::string_view rhs_str = equation_split[1]; + absl::string_view a_input_str = input_split[0]; + absl::string_view b_input_str = input_split[1]; + + constexpr int kMatrixRank = 2; + + bool a_input_shape_unknown = false; + bool b_input_shape_unknown = false; + + TensorShapeProto a_input_shape = MaybeGetMinimumShape( + a_input.shape(), std::max(kMatrixRank, a_input.shape().dim_size()), + &a_input_shape_unknown); + TensorShapeProto b_input_shape = MaybeGetMinimumShape( + b_input.shape(), std::max(kMatrixRank, b_input.shape().dim_size()), + &b_input_shape_unknown); + + *found_unknown_shapes = a_input_shape_unknown || b_input_shape_unknown || + (a_input.shape().dim_size() < kMatrixRank) || + (b_input.shape().dim_size() < kMatrixRank); + + OpInfo batch_matmul_op_info = op_info; + batch_matmul_op_info.mutable_inputs()->Clear(); + batch_matmul_op_info.set_op("BatchMatMul"); + + AttrValue transpose_attribute; + transpose_attribute.set_b(false); + (*batch_matmul_op_info.mutable_attr())["transpose_a"] = transpose_attribute; + (*batch_matmul_op_info.mutable_attr())["transpose_b"] = transpose_attribute; + + OpInfo::TensorProperties* a_matrix = batch_matmul_op_info.add_inputs(); + TensorShapeProto* a_matrix_shape = a_matrix->mutable_shape(); + a_matrix->set_dtype(a_input.dtype()); + + OpInfo::TensorProperties* b_matrix = batch_matmul_op_info.add_inputs(); + b_matrix->set_dtype(b_input.dtype()); + TensorShapeProto* b_matrix_shape = b_matrix->mutable_shape(); + + TensorShapeProto_Dim m_dim; + TensorShapeProto_Dim n_dim; + TensorShapeProto_Dim k_dim; + + m_dim.set_size(1); + n_dim.set_size(1); + k_dim.set_size(1); + + for (int i_idx = 0, a_input_str_size = a_input_str.size(); + i_idx < a_input_str_size; ++i_idx) { + if (b_input_str.find(a_input_str[i_idx]) == std::string::npos) { + if (rhs_str.find(a_input_str[i_idx]) == std::string::npos) { + VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); + return false; + } + + m_dim.set_size(m_dim.size() * a_input_shape.dim(i_idx).size()); + continue; + } else if (rhs_str.find(a_input_str[i_idx]) == std::string::npos) { + // The dimension does not appear in the RHS, therefore it is a contracting + // dimension. + k_dim.set_size(k_dim.size() * a_input_shape.dim(i_idx).size()); + continue; + } + // It appears in both input operands, therefore we place it as an outer + // dimension for the Batch Matmul. + *(a_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); + *(b_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); + } + for (int i_idx = 0, b_input_str_size = b_input_str.size(); + i_idx < b_input_str_size; ++i_idx) { + if (a_input_str.find(b_input_str[i_idx]) == std::string::npos) { + if (rhs_str.find(b_input_str[i_idx]) == std::string::npos) { + VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); + return false; + } + n_dim.set_size(n_dim.size() * b_input_shape.dim(i_idx).size()); + } + } + + // The two inner-most dimensions of the Batch Matmul are added. + *(a_matrix_shape->add_dim()) = m_dim; + *(a_matrix_shape->add_dim()) = k_dim; + *(b_matrix_shape->add_dim()) = k_dim; + *(b_matrix_shape->add_dim()) = n_dim; + + *batch_matmul_context = einsum_context; + batch_matmul_context->op_info = batch_matmul_op_info; + return true; +} + int64 OpLevelCostEstimator::CountBatchMatMulOperations( const OpInfo& op_info, bool* found_unknown_shapes) { return CountBatchMatMulOperations(op_info, nullptr, found_unknown_shapes); @@ -1327,7 +1522,7 @@ Costs OpLevelCostEstimator::PredictFusedConv2DBiasActivation( // contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc // TODO(yaozhang): Support NHWC_VECT_W. - string data_format = GetDataFormat(op_context.op_info); + std::string data_format = GetDataFormat(op_context.op_info); if (data_format != "NCHW" && data_format != "NHWC" && data_format != "NCHW_VECT_C") { LOG(WARNING) << "unsupported data format: " << data_format; @@ -1335,7 +1530,7 @@ Costs OpLevelCostEstimator::PredictFusedConv2DBiasActivation( cost.inaccurate = true; return cost; } - string filter_format = GetFilterFormat(op_context.op_info); + std::string filter_format = GetFilterFormat(op_context.op_info); if (filter_format != "HWIO" && filter_format != "OIHW" && filter_format != "OIHW_VECT_I") { LOG(WARNING) << "unsupported filter format: " << filter_format; @@ -1405,154 +1600,17 @@ Costs OpLevelCostEstimator::PredictMatMul(const OpContext& op_context) const { } Costs OpLevelCostEstimator::PredictEinsum(const OpContext& op_context) const { - // Einsum computes a generalized contraction between tensors of arbitrary - // dimension as defined by the equation written in the Einstein summation - // convention. The number of tensors in the computation and the number of - // contractions can be arbitrarily long. The current model only contemplates - // Einsum equations, which can be translated into a single BatchMatMul - // operation. Einsum operations with more than two operands are not currently - // supported. Subscripts where an axis appears more than once for a single - // input and ellipsis are currently also excluded. See: - // https://www.tensorflow.org/api_docs/python/tf/einsum - // We distinguish four kinds of dimensions, depending on their placement in - // the equation: - // + B: Batch dimensions: Dimensions which appear in both operands and RHS. - // + K: Contracting dimensions: These appear in both inputs but not RHS. - // + M: Operand A dimensions: These appear in the first operand and the RHS. - // + N: Operand B dimensions: These appear in the second operand and the RHS. - // Then, the operation to estimate is BatchMatMul([B,M,K],[B,K,N]) const auto& op_info = op_context.op_info; auto it = op_info.attr().find("equation"); if (it == op_info.attr().end()) return Costs::ZeroCosts(/*inaccurate=*/true); - const string& equation = it->second.s(); - std::vector equation_split = absl::StrSplit(equation, "->"); - - if (equation_split.empty()) { - LOG(WARNING) << "Einsum with malformed equation"; - return PredictCostOfAnUnknownOp(op_context); - } - std::vector input_split = absl::StrSplit(equation_split[0], ','); - - // The current model covers Einsum operations with two operands and a RHS - if (op_info.inputs_size() != 2 || equation_split.size() != 2) { - VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); - return PredictCostOfAnUnknownOp(op_context); - } - string rhs_str = equation_split[1]; - string a_input_str = input_split[0]; - string b_input_str = input_split[1]; - - // Ellipsis are not currently supported - if (a_input_str.find("...") != std::string::npos || - b_input_str.find("...") != std::string::npos) { - VLOG(1) << "Missing accurate estimator for op: " << op_info.op() - << ", ellipsis not supported"; - return PredictCostOfAnUnknownOp(op_context); - } - - const auto& a_input = op_info.inputs(0); - const auto& b_input = op_info.inputs(1); - const int matrix_rank = 2; - + OpContext batch_matmul_op_context; bool found_unknown_shapes = false; - bool a_input_shape_unknown = false; - bool b_input_shape_unknown = false; - - TensorShapeProto a_input_shape = MaybeGetMinimumShape( - a_input.shape(), std::max(matrix_rank, a_input.shape().dim_size()), - &a_input_shape_unknown); - TensorShapeProto b_input_shape = MaybeGetMinimumShape( - b_input.shape(), std::max(matrix_rank, b_input.shape().dim_size()), - &b_input_shape_unknown); - - found_unknown_shapes = a_input_shape_unknown || b_input_shape_unknown || - (a_input.shape().dim_size() < matrix_rank) || - (b_input.shape().dim_size() < matrix_rank); - - if (a_input_str.size() != static_cast(a_input_shape.dim_size()) || - b_input_str.size() != static_cast(b_input_shape.dim_size())) { - VLOG(1) << "Missing accurate estimator for op: " << op_info.op() - << ", equation subscripts don't match tensor rank."; + bool success = GenerateBatchMatmulContextFromEinsum( + op_context, &batch_matmul_op_context, &found_unknown_shapes); + if (!success) { return PredictCostOfAnUnknownOp(op_context); } - - // Subscripts where axis appears more than once for a single input are not yet - // supported - if (CheckRepeatedDimensions(a_input_str) || - CheckRepeatedDimensions(b_input_str) || - CheckRepeatedDimensions(rhs_str)) { - VLOG(1) << "Missing accurate estimator for op: " << op_info.op() - << ", Subscripts where axis appears more than once for a single " - "input are not yet supported"; - return PredictCostOfAnUnknownOp(op_context); - } - - OpInfo batch_matmul_op_info = op_info; - batch_matmul_op_info.mutable_inputs()->Clear(); - batch_matmul_op_info.set_op("BatchMatMul"); - - AttrValue transpose_attribute; - transpose_attribute.set_b(false); - (*batch_matmul_op_info.mutable_attr())["transpose_a"] = transpose_attribute; - (*batch_matmul_op_info.mutable_attr())["transpose_b"] = transpose_attribute; - - OpInfo::TensorProperties* a_matrix = batch_matmul_op_info.add_inputs(); - TensorShapeProto* a_matrix_shape = a_matrix->mutable_shape(); - a_matrix->set_dtype(a_input.dtype()); - - OpInfo::TensorProperties* b_matrix = batch_matmul_op_info.add_inputs(); - b_matrix->set_dtype(b_input.dtype()); - TensorShapeProto* b_matrix_shape = b_matrix->mutable_shape(); - - TensorShapeProto_Dim m_dim; - TensorShapeProto_Dim n_dim; - TensorShapeProto_Dim k_dim; - - m_dim.set_size(1); - n_dim.set_size(1); - k_dim.set_size(1); - - for (int i_idx = 0, a_input_str_size = a_input_str.size(); - i_idx < a_input_str_size; ++i_idx) { - if (b_input_str.find(a_input_str[i_idx]) == std::string::npos) { - if (rhs_str.find(a_input_str[i_idx]) == std::string::npos) { - VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); - return PredictCostOfAnUnknownOp(op_context); - } - - m_dim.set_size(m_dim.size() * a_input_shape.dim(i_idx).size()); - continue; - } else if (rhs_str.find(a_input_str[i_idx]) == std::string::npos) { - // The dimension does not appear in the RHS, therefore it is a contracting - // dimension. - k_dim.set_size(k_dim.size() * a_input_shape.dim(i_idx).size()); - continue; - } - // It appears in both input operands, therefore we place it as an outer - // dimension for the Batch Matmul. - *(a_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); - *(b_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); - } - for (int i_idx = 0, b_input_str_size = b_input_str.size(); - i_idx < b_input_str_size; ++i_idx) { - if (a_input_str.find(b_input_str[i_idx]) == std::string::npos) { - if (rhs_str.find(b_input_str[i_idx]) == std::string::npos) { - VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); - return PredictCostOfAnUnknownOp(op_context); - } - n_dim.set_size(n_dim.size() * b_input_shape.dim(i_idx).size()); - } - } - - // The two inner-most dimensions of the Batch Matmul are added. - *(a_matrix_shape->add_dim()) = m_dim; - *(a_matrix_shape->add_dim()) = k_dim; - *(b_matrix_shape->add_dim()) = k_dim; - *(b_matrix_shape->add_dim()) = n_dim; - - OpContext batch_matmul_op_context = op_context; - batch_matmul_op_context.op_info = batch_matmul_op_info; Costs costs = PredictCosts(batch_matmul_op_context); costs.inaccurate = costs.inaccurate || found_unknown_shapes; costs.num_ops_with_unknown_shapes = found_unknown_shapes; @@ -1772,7 +1830,7 @@ Costs OpLevelCostEstimator::PredictFusedOp( /* static */ OpContext OpLevelCostEstimator::FusedChildContext( - const OpContext& parent, const string& op_name, + const OpContext& parent, const std::string& op_name, const OpInfo::TensorProperties& output, const std::vector& inputs) { // Setup the base parameters of our new context. @@ -1821,7 +1879,7 @@ OpLevelCostEstimator::OpDimensionsFromInputs( VLOG(2) << "Image shape: " << image_shape.DebugString(); int x_index, y_index, channel_index; - const string& data_format = GetDataFormat(op_info); + const std::string& data_format = GetDataFormat(op_info); if (data_format == "NCHW") { channel_index = 1; y_index = 2; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index ad2df8fcdd5..2bf3c5bb920 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -138,6 +138,9 @@ class OpLevelCostEstimator { static int64 CountMatMulOperations(const OpInfo& op_info, MatMulDimensions* mat_mul, bool* found_unknown_shapes); + bool GenerateBatchMatmulContextFromEinsum(const OpContext& einsum_context, + OpContext* batch_matmul_context, + bool* found_unknown_shapes) const; static int64 CountBatchMatMulOperations(const OpInfo& op_info, bool* found_unknown_shapes); static int64 CountBatchMatMulOperations(const OpInfo& op_info, From e213574acedc8810cf3eb753ff387d70c52b90a3 Mon Sep 17 00:00:00 2001 From: Ashwin Murthy Date: Tue, 23 Jun 2020 18:06:58 -0700 Subject: [PATCH 56/66] Add g3doc for TensorFlow composite operation fusion in the TensorFlow Lite converter PiperOrigin-RevId: 317976446 Change-Id: I5b9093f5290f14444cb1a64c1c17f3017996e5b5 --- tensorflow/lite/g3doc/_book.yaml | 2 + .../lite/g3doc/convert/operation_fusion.md | 270 ++++++++++++++++++ tensorflow/lite/g3doc/convert/rnn.md | 19 +- .../lite/g3doc/guide/ops_compatibility.md | 3 - .../lite/g3doc/images/convert/op_fusion.png | Bin 0 -> 39668 bytes .../g3doc/images/convert/op_fusion_banner.jpg | Bin 0 -> 90470 bytes 6 files changed, 285 insertions(+), 9 deletions(-) create mode 100644 tensorflow/lite/g3doc/convert/operation_fusion.md create mode 100644 tensorflow/lite/g3doc/images/convert/op_fusion.png create mode 100644 tensorflow/lite/g3doc/images/convert/op_fusion_banner.jpg diff --git a/tensorflow/lite/g3doc/_book.yaml b/tensorflow/lite/g3doc/_book.yaml index 6c454fab921..715e0c8431b 100644 --- a/tensorflow/lite/g3doc/_book.yaml +++ b/tensorflow/lite/g3doc/_book.yaml @@ -86,6 +86,8 @@ upper_tabs: path: /lite/convert/rnn - title: "Add metadata" path: /lite/convert/metadata + - title: "Composite operation fusion" + path: /lite/convert/operation_fusion - title: "1.x compatibility" path: /lite/convert/1x_compatibility diff --git a/tensorflow/lite/g3doc/convert/operation_fusion.md b/tensorflow/lite/g3doc/convert/operation_fusion.md new file mode 100644 index 00000000000..c8714179498 --- /dev/null +++ b/tensorflow/lite/g3doc/convert/operation_fusion.md @@ -0,0 +1,270 @@ +# TensorFlow operation fusion + +## Overview + +This page describes the design and steps needed to convert composite operations +in TensorFlow to fused operations in TensorFlow Lite. This infrastructure is +general purpose and supports conversion of any composite operation in TensorFlow +to a corresponding fused operation in TensorFlow Lite. + +An example use of this infrastructure is TensorFlow RNN operation fusion to +TensorFlow Lite, as detailed +[here](https://www.tensorflow.org/lite/convert/rnn). + +### What are fused operations + +![drawing](../images/convert/op_fusion_banner.jpg) + +TensorFlow operations can either be primitive ops e.g. +[tf.add](https://www.tensorflow.org/api_docs/python/tf/math/add) or they can be +composed from other primitive operations e.g. +[tf.einsum](https://www.tensorflow.org/api_docs/python/tf/einsum). A primitive +operation shows up as a single node in the TensorFlow graph while.a composite +operation is a collection of nodes in the TensorFlow graph. Executing a +composite operation is equivalent to executing each of its constituent primitive +operations. + +A fused operation corresponds to a single operation that subsumes all the +computation performed by each primitive operation within the corresponding +composite operation. + +### Benefits of fused operations + +Fused operations exist to maximize the performance of their underlying kernel +implementations, by optimizing the overall computation and reducing memory +footprint. This is very valuable, especially for low-latency inference workloads +and resource constrained mobile platforms. + +Fused operations also provide a higher level interface to define complex +transformations like quantization, which would otherwise be infeasible or very +hard to do at a more granular level. + +TensorFlow Lite has many instances of fused operations for the reasons +articulated above. These fused operations typically correspond to composite +operations in the source TensorFlow program. Examples of composite operations in +TensorFlow that are implemented as a single fused operation in TensorFlow Lite +include various RNN operations like Unidirectional and Bidirectional sequence +LSTM, convolution (conv2d, bias add, relu), fully connected (matmul, bias add, +relu) and more. In TensorFlow Lite, LSTM quantization is currently only +implemented in the fused LSTM operations. + +### Challenges with fused operations + +Converting composite operations from TensorFlow to fused operations in +TensorFlow Lite is a hard problem. This is because: + +1. Composite operations are represented in the TensorFlow graph as an + unstructured set of primitive operations. It can be very challenging to + identify (e.g. via pattern matching) the sub-graph corresponding to such a + composite operation. + +1. There may be more than one TensorFlow implementation targeting a fused + TensorFlow Lite operation. For example, there are many LSTM implementations + in TensorFlow (Keras, Babelfish/lingvo etc) and each of these is composed of + different primitive operations but they all could still be converted to the + same fused LSTM operation in TensorFlow Lite. + +As such, conversion of fused operations has proven quite challenging. + +## Converting from composite to fused operation + +The overall architecture for converting TensorFlow composite operations to +TensorFlow Lite fused operations is below: + +![drawing](../images/convert/op_fusion.png) + +### Wrap the composite operation in a `tf.function` + +In the TensorFlow model source code, identify and abstract out the composite +operation into a `tf.function` with the +[experimental\_implements](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/function.py#L88) +function annotation. See an example of [embedding lookup](#composing_ops). The +function defines the interface and its arguments should be used to implement the +conversion logic. + +### Write conversion code + +The conversion code is written per the interface of the function with the +`implements` annotation. See an example fusion for +[embedding lookup](#fusion_code). Conceptually, the conversion code replaces the +composite implementation of this interface with the fused one. + +In the prepare-composite-functions pass, plugin in your +[conversion code](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc#L108). + +In more advanced usages, it is possible to implement complex transformations of +the composite operation's operands in order to derive the operands of the fused +operation. See +[Keras LSTM](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc#L627). +conversion code as an example. + +### Convert to TensorFlow Lite + +Use the +[TFLiteConverter.from_saved_model](https://www.tensorflow.org/api_docs/python/tf/lite/TFLiteConverter#from_saved_model) +API to convert to TensorFlow Lite. + +## Under the hood + + + +We now describe high level details of the overall design in converting to fused +operations in TensorFlow Lite. + +### Composing operations in TensorFlow + + + +The use of `tf.function` with the +[experimental\_implements](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/function.py#L88) +function attribute allows users to explicitly compose new operations using +TensorFlow primitive operations and specify the interface that the resultant +composite operation implements. This is very useful as it provides: + +1. A well-defined boundary for the composite operation in the underlying + TensorFlow graph. +1. Explicitly specify the interface that this operation implements. The + arguments of the `tf.function` correspond to the arguments of this + interface. + +As an example, let’s consider a composite operation defined in +[Lingvo/TensorFlow](https://github.com/tensorflow/lingvo) to implement embedding +lookup. This maps to a fused operation in TensorFlow Lite. + +```python + @tf.function( + experimental_implements="lingvo.embedding_lookup") + def EmbFprop(embs, ids_vec): + """Embedding forward prop. + + Effectively, it computes: + num = size of ids_vec + rets = zeros([num, embedding dim]) + for i in range(num): + rets[i, :] = embs[ids_vec[i], :] + return rets + + Args: + embs: The embedding matrix. + ids_vec: A vector of int32 embedding ids. + + Returns: + The result of embedding lookups. A matrix of shape + [num ids in ids_vec, embedding dims]. + """ + num = tf.shape(ids_vec)[0] + rets = inplace_ops.empty([num] + emb_shape_suf, py_utils.FPropDtype(p)) + + def EmbFpropLoop(i, embs, ids_vec, rets): + # row_id = ids_vec[i] + row_id = tf.gather(ids_vec, i) + # row = embs[row_id] + row = tf.reshape(tf.gather(embs, row_id), [1] + emb_shape_suf) + # rets[i] = row + rets = inplace_ops.alias_inplace_update(rets, [i], row) + return embs, ids_vec, rets + + _, _, rets = functional_ops.For( + start=0, + limit=num, + delta=1, + inputs=[embs, ids_vec, rets], + body=EmbFpropLoop, + rewrite_with_while=compiled) + if len(weight_shape) > 2: + rets = tf.reshape(rets, [num, symbolic.ToStatic(p.embedding_dim)]) + return rets +``` + +By making models use composite operations via `tf.function` as illustrated +above, it becomes possible to build a general infrastructure to **identify and +convert** such operations to fused TensorFlow Lite operations. + +### Extending the TensorFlow Lite converter + +The TensorFlow Lite converter that was released earlier this year only supported +importing TensorFlow models as a graph with all variables replaced with their +corresponding constant values. This does not work for operation fusion since +such graphs have all functions inlined so that the variables can be turned into +constants. + +In order to leverage the `tf.function` with the `experimental_implements` +feature during the conversion process, the functions need to be preserved until +later in the conversion process. + +As such, we implemented a new workflow of importing and converting TensorFlow +models in the converter to support the composite operation fusion use case. +Specifically, the new features added are: + +1. Importing TensorFlow + [saved models into MLIR](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc#L3593) +1. [fuse composite operations](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc#L103) +1. [variable mutability analysis](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc#L43) +1. [freeze all read-only variables](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/tensorflow/transforms/freeze_global_tensors.cc#L44) + +This allows us to perform operation fusion using the functions representing the +composite operations prior to function inlining and variable freezing. + +### Implementing operation fusion + +Let’s look at the operation fusion pass in more detail. This pass does the +following: + +1. Loop through all functions in the MLIR module. +1. If a function has the tf.\_implements attribute, based on the attribute + value, calls the appropriate operation fusion utility. +1. The operation fusion utility operates on the function’s operands and + attributes (which serve as the interface for the conversion) and replaces + the body of the function with an equivalent function body containing the + fused operation. +1. In many cases, the replaced body will contain operations other than the + fused operation. These correspond to some static transforms on the + function’s operands in order to obtain the operands of the fused operation. + Since these computations can all be constant folded away, they would not be + present in the exported flatbuffer where only the fused operation would + exist. + +Here is code snippet from the pass showing the main workflow: + +``` +void PrepareCompositeFunctionsPass::ConvertTFImplements(FuncOp func, + StringAttr attr) { + if (attr.getValue() == "lingvo.embedding_lookup") { + func.eraseBody(); + func.addEntryBlock(); + // Convert the composite embedding_lookup function body to a + // TFLite fused embedding_lookup op. + ConvertEmbeddedLookupFunc convert_embedded_lookup(func); + if (failed(convert_embedded_lookup.VerifySignature())) { + return signalPassFailure(); + } + convert_embedded_lookup.RewriteFunc(); + } else if (attr.getValue() == mlir::TFL::kKerasLstm) { + func.eraseBody(); + func.addEntryBlock(); + OpBuilder builder(func.getBody()); + if (failed(ConvertKerasLSTMLayer(func, &builder))) { + return signalPassFailure(); + } + } else if (.....) /* Other fusions can plug in here */ +} +``` + +Here is code snippet showing mapping this composite operation to a fused +operation in TensorFlow Lite leveraging the function as a conversion interface. + + + +```C++ +void RewriteFunc() { + Value lookup = func_.getArgument(1); + Value value = func_.getArgument(0); + auto output_type = func_.getType().getResult(0); + + OpBuilder builder(func_.getBody()); + auto op = builder.create( + func_.getLoc(), output_type, lookup, value); + + builder.create(func_.getLoc(), op.getResult()); + } +``` diff --git a/tensorflow/lite/g3doc/convert/rnn.md b/tensorflow/lite/g3doc/convert/rnn.md index 734992c0904..0954f13a4c7 100644 --- a/tensorflow/lite/g3doc/convert/rnn.md +++ b/tensorflow/lite/g3doc/convert/rnn.md @@ -23,15 +23,16 @@ two fold: ## Converter API -Currently this feature is available through the -[tf-nightly](https://pypi.org/project/tf-nightly/) pip or from head. This will -be available in the TensorFlow 2.3 release. +The feature is part of TensorFlow 2.3 release. It is also available through the +[tf-nightly](https://pypi.org/project/tf-nightly/) pip or from head. This conversion functionality is available when converting to TensorFlow Lite via a SavedModel or from the Keras model directly. See example usages. ### From saved model + + ``` # build a saved model. Here concrete_function is the exported function # corresponding to the TensorFlow model containing one or more @@ -64,6 +65,8 @@ illustrates the end to end usage with the TensorFlow Lite interpreter. ## TensorFlow RNNs APIs supported + + ### Keras LSTM conversion (recommended) We support out-of-the-box conversion of Keras LSTM to TensorFlow Lite. For @@ -75,13 +78,17 @@ details on how this works please refer to the Also important is to highlight the TensorFlow Lite’s LSTM contract with respect to the Keras operation definition: -1. The dimension 0 of the input tensor is the batch size. -1. The dimension 0 of the recurrent\_weight tensor is the number of outputs. +1. The dimension 0 of the **input** tensor is the batch size. +1. The dimension 0 of the **recurrent\_weight** tensor is the number of + outputs. 1. The **weight** and **recurrent\_kernel** tensors are transposed. -1. The transposed weight, transposed recurrent\_kernel and bias tensors are +1. The transposed weight, transposed recurrent\_kernel and **bias** tensors are split into 4 equal sized tensors along the dimension 0. These correspond to **input gate, forget gate, cell, and output gate**. +See the detailed conversion code from Keras LSTM to TensorFlow Lite +[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc#L627). + #### Keras LSTM Variants ##### Time major diff --git a/tensorflow/lite/g3doc/guide/ops_compatibility.md b/tensorflow/lite/g3doc/guide/ops_compatibility.md index 054b7e0e275..d1462cb09c7 100644 --- a/tensorflow/lite/g3doc/guide/ops_compatibility.md +++ b/tensorflow/lite/g3doc/guide/ops_compatibility.md @@ -1147,11 +1147,8 @@ models: * `CALL` * `CONCAT_EMBEDDINGS` * `CUSTOM` -* `EMBEDDING_LOOKUP` * `EMBEDDING_LOOKUP_SPARSE` * `HASHTABLE_LOOKUP` * `LSH_PROJECTION` -* `LSTM` -* `RNN` * `SKIP_GRAM` * `SVDF` diff --git a/tensorflow/lite/g3doc/images/convert/op_fusion.png b/tensorflow/lite/g3doc/images/convert/op_fusion.png new file mode 100644 index 0000000000000000000000000000000000000000..bfee4acb53ced941ec2e779d7082c0f91f9735e0 GIT binary patch literal 39668 zcmc$FWk6J4*ROV!_Xi|r!)c*Gjt6~Nsi<& z0|R#k{Xft9-uL-%zubGz2V$Rd_Fj9fwb%NsIMG@fO2oHmZ{N6agIGmbUgyRQ9OfH0 zunh_D&?8yi;jTAsgx*k*fBM3Aez%=4!&on~*fb#@v!Bs_fTLX0jx_o5aFI7C08MaU72X!%H|yqE_;@6rW1$$;|VDsXs=M z6|#f}ZM3~dMWm*-Yvz=_&?|ccJD2Qa_d0zUuyk5A1Yg*xZEcbN|8!Zg=#)>r5v~u> z#VOaX^oO|(Fu)rJm_jbTf6u$}JK3Isa~m}{LVoteitre>9RJuDGa{AoAgVO1v9J`_ zLxL{N*N0L~ZG$_jT3gRcK=F?D(3^ymYK`d%m<@S9&5AVhhS+ksWWCcA{N4@6$HNP5 zb^Xz;q$#$ZqBLD;G}E+OJ4~B*d0b3qdhwHr&S%k0Nfml^38$kIb}X4(@_{Zr_gQdi ztRHPX9C}R0gcwx1be(=_<0Q6`VHrFJfgBlr0i5IG<9EKUo*8r8UfCFfw9&F)E?pvQ z)SItR$OT+iU20a4vGa2Lz4Y-fp_$hZeqtHKH%;2kvp$~w*E=)SE`g{WQH`L8TrUsD z^$d(p%Fp*(UAi;kA4#39yp7wb?!4^ME7wOgy}W|eI`=;2#B7DX%MNeffkypo#76q) zNd2R*y6>tsh$rv|AzWnPx66q?YSC@G0U>9ruM3$ElyPA?K-m%*d77DmGlrzJY zH>V(WJyB}ul>JV;)amRwlkZBHik+m6(8BN8q14AAla7$N)p4)5`+{1yQ+iNnD>+ZV zWVx=co2ssZ(x=6M=TZc&xuT+!FK6S^9$z%Wn2bHJXy&Zo5PE z6p<|0C5MvU>_nZt;A8scSz#*leX5wP6wBOd zW#uPS;G@O!Iq5zhDRtxV$wSbg0-Ug@Lb0~D= z5kOu%btkc+T_9F)%}0KQ98w_)`%uNM6!YdGo|Oi8A90x$gAw2Ttf$CM&1oT~z2ibA zREc)=Oo)gI@t|zdOnby9W)3KLa!P&45O>4qcHy`l?rC6SDK8U~-F6aa`hj2Y1H@$v zXn%}o@JF9mI#t&d66E|`O*s|ITyV7p7}QGTWt0c(;#$(Xpx6D(OVdc{)g;L$^D(BS z^6~s6$2J>=_qx5Ig8OYpqntLf&MjLYJg1MccxzQGQJO;KaacdHgTCZnWLyF5JeWjw_*tJ2F5OrX%G<PeyaaC*^9UYOEP>D-x1P2FA= z>n`n@s^t;+NP^iK$~@JS$yqKwG+vT|rxd zXKHoM?Cb0=Exb25x9(M-V)HZyx8K{f0C8}hUHY)~M9;0>_!<`qy zR?Fdy2Vo?E`EFnp_WJocLYhP`W6gM3T(xqGwJ`0dK4VQlBrtXeLYC;)H|9|*sVwjX z2L~JQnqF^Bppe5yZibwdV@Hq~!;;#m(B8VD$JK4MHGzo3{wV{~CI6l?D}E*sXrPBB z+K$BybGJG=Ecx8WxvN`pN3BDqo|pN{?SrF3uFoVm;e5@VqLggb8s@L`eQae7SGxqO zeX9omELY!OF6iR8`zF2dA;xTDko%47GmKG*d&gzmQs;(PPuKiKMh57|7z@$USxdU& zZ1?A@@e=#98|TJ@q4J0fBJZ@8a)OHllStD++_XUpsKwr$9WB~ik6g&j4a9cd+0l1$JudO^{-JC#;pxPj}Cj7 zE?u8x#8-!)cH#)RoAUKUMs%Ddy+2^x&&V}!4}4`V{f3CO`sG8v?>qC}GZnRhlVPQr z04}xa!E_jj+P2V>@}2t;&2#J$KF%}aJWsOorrb1Ul4Hjp-Xo*i^qb$gRZP>~U&~Dh z=L6nFwl%x@2T(`-;}}syO^ALz97vrPJdTXVP+*BToYtwzlSA#wGTb%CBVrWbKYM0a2(iK zTI!G`UcifsW4zE&AE@ufC0f8sj5eQIDMA9n=}BoqeQ-6{eEZ=5`+Yv4l+i`&26C2gYi+BVDA&K80T9v*3^A$2%?hYfPdSYceI)#02h1<_X% z%Bj2av89yvR$xalFO6};gdcDxy3`C%;m85*KQ1pZr3QN5o~DSnFW$&YZ4{G-GumE1 zn6L_`vW~^5imfc$_`M2*(`eU?3MY@?Osr;cgkTaa*szu1QL2RcUDV!Zv>iC zyhAS3HoEl3b5v) z@g9U9v%4h?|fOKYz09P{snQ={Fxe-8&2&ahpb;+A@iIq(nd82 zo~a-v!gdnPY4~2m%OGiHJ=*QZBF{ZPsL5oX(6v$)V9~JS)V2_Si`fU9xv(#SVnZ*# zuC+GL)da+bN{fCvlO`J3j|ihyCK^Pk__b0guCVsmj0{)y9aOE&=6pkxVp({u$Dn}= zKUjzvaj!xm*}cF~ZD{V2@)~~PSp^ae3K>rk)@yQ%sEp=BJiUeE{^D>+_L~^|4W5;; zir7wU9?d7i+@KI;b1n<}Y0C)C`(skIb$WPKZw{7PezuLrMK@z4KsM(^Jcxm-?d|Oe z05|y3hm7d#dWtT@6SP7-18%So4YtcsC)Urwx5VPOI;*<1Tm!uiGQF78zWW13<7sH@ zajm=$mW=yqUgRA%Dr4-IIHF64`F zYW5dtJ~CE5W~Of)Hn%7D`o>axo}HqS&@SBHjNg#;ZY;R?A(TPv9ag4xU^-6$d#BXJ z6O7Iz#h1?PIA}=h)#bU&*Vzqakj&TmpKRkh)X#sV_F*c#r52E-8~Db!w=zN{!x$Gn zEFh3rbwC}BHJ6e>aJgid!O)cNun_U1g2MbB8(=mr(>zOW^6K|(9QS|^r=S8SNOBwO z37`D+S(qBhlL>*NQNF{d_)dkX`c3j@bj#<#Oxc3{t0=VR-MGBG#NRnV{?I+U zyT0%XgPNn%oMSj&o}@lDxLhqzhz$-o=%W&GZry8`uF@gLVE4`rw<$dXl=)Pdo)IPY zYhM?LB!iHssOXewOfeQl*^)|CH8sZ=2N(l(5$dQWw4NM5G-XiG>ZYGDc#t|YXyJ8! z^EN4?71q(})F_4Z9k8B~H7lB*gqCCfGw`Fcml{ena8`s#=`zD%S#g!)Q z$Z?(gso(2w1~A}xX#iN%?je{!v${FG}8CxC zm6qXS7Bh;G4RJCjaORwl)#;agV`kWTU}%1i7u$fSCp2;YYlZYt@{#3j{Zy=5LvpA-YaWXG0W4K$V#HVr${R7H^d zQZZjc)T1+JJG(=9g&b5Z>00O(t!rT8tb>;CNq6ps^6+^_-0-#IQe)zo zB7HpNBc$KqaR5V7o?aY<-z0oP^CSkqg3eXObXlX|Rjr80fs40=7~)QV)7c@Og)W~r z4K%*prD;z}5Q^0&zF503(l|)~V5ol)JsrH;qzIE8qg0O?j86#Mg+fe2e!Vpn>yzQe z(5K^cysxS+f}}!QCyeGOCb6)F-qbr;MqWuB4Usd#6Jjv)Opc3RR#1dc8!n#16+f`B z(xIpg-FH{UP?iN4LZ%RRUZP_CnXDSmx0SejiF~^mbYOKA%7Nb6^Eg?Xko}yv z%f5Cvjb4nWGH_&h=)Cu+SESLrBQ0TZu|&o1JN1)Nws5F2d%*dUa&1I#=+-!~+3t zu#^5>luSEdG7bf6?TPOfbm_MUR7t;?-1eRJ8fZKOGWJNNH%#=`44HBkq3sqeX;Hsk zF=}b3pD|$Ur%dkN@z7h7c~KZhh`i-;%zq!yzZ`Pe-C##BTBu$verD}-&H`@to4BH` z7eCGQH+-zQ&9~@bN9ROee&N#~?Rmr*Cvx234W>51OkRMcZIAy}CVZjzY63#5R3Y6& z5qh+i$G(rdja$znvoeNG5&CUIyj|*3z0->%d{;tM>}09y{l0&^GTK3^Hv}XGgIp9n zdQ~|8?)_=~@or;dxD3cRHK=neIH0jZ231xFdWY4ecEGd8tqR|j%B}YjwB@Hs{D#id z=%5WR`e$n*3-1QB7M5`7>az3A-&t0)?NPPaIr zqVlC9^a>0PtG_a#6%?>0mVZjLhLb%d`wkhx<%s?r6y9;)7NvuJjbsV2~X+`WUz5Oyr~1RK@2R zWFR8dA?FSa%_L%ZV{1yZRRJc!b;uvE<`YBUYM0IgGidkci@VPp=M?AOl?JFesdp@tsjir z7sv(`jCx32R3(smG@{=!#}i~Std_4qOyFlGJp?kJ;69A6E5*A1yC_cWM}Z0MLLgh7 zAP3v$D@~s4xia>!ARsGrF?1X{0iTQx{FWE29Jio69V$=vbN)9EqMtT{s{H~{DJiCb zpx0CyTk4C=-sFB8dkpXo5} zF!ND{slhh2?yfQn*6>%#MHrYbbNINaGEy!Y@4-` zxvI^c+wH;K8dVe|mo>@EoArz|o=kN??#h*zph>AG)Rob?Pw10myhlzq{ckC2e(dD5 zOj9bbM|NEjdn+g@egaS@4Om-a?5WS0aK<($65s=Wc4U*$upRTUL5IR^1#5`D!^^J(9+1JGbBdo-TxrBePGP z2t1USBKYRWa6V|V6247w*-^cGy)zjK^cp>-2_Ep0=gl#axZ@~Y90+!CMwbKxnytzYbf!6g}Ch) zfzg~geyx^dfs*S`G=bH7rLTUgU)s^6&}{JN)g@i73d{&Ft>5nBwl}vZL$Xzqvx$tj z%mtA_Y7gtBF83e(wH3GI6BqU$2BYgFSnLq9GciTwjI3TyMm4<1`y$x5#RpbWhZgep zi32Th3!j@)w>(m6s7jX|41=BX6qSsXy8DBQsq=rUGc$*R}#4t2et}V6V`$?{wFIw#Qio!zu7E1fg>-i~8 z#g~U(iaDWQ8F$IL%flpO`)MvBD~*>viF>;4GbM0$ z>9HCNrKZ`AcV9xWz3Cxt9Myx0NA~>iM7QW=C6jZ^`w<($R_z8k@66! zZJrL0d&O%D_cJ$`GDKrVCy`*U*H#)sZZ6hLaSia|ad?xcHU_P~j@Ray@1iB&rfUU6 zJY5kDQkV;5f^9^ZNrKw;9MJ)TD>C!)&3BTSN5*vF2SBISSJXK_sqI8_xofNWnU8~x zop#$OAt%2E3`68EYx>;iA!wKW$MLE_Z{Bs098S*;IRO5 zGHiVs@#Rr$mGnu&iawFDTjDiB2sdU-@z&gz-dvzJpdnSWJ*K(>NdSsahH2IRW*--} zYPd?cSgvewKP#1Hp~}E~Btcpxx15<#!^6h|!wvK5-)}%FSjK8aN{Tg?RH%%;>A*fM zqhxbR}etzo!cc0 zL#r!cIJBB`dz=^Mj-Gj*gH2N?G~jcdl97hY;7?zkr*sy_8wUNpvwl#qIckgX_u?9w zekrYmUa&PJQTAmTXAnx!Jt6nh=P@0-af{Xap>=rkN|E0ZS+#vp^m#0_ zT4SGLKdw&wV#KeR<6IBF)A9Y)GDiK;(J^V*G6o6WGLpe20`$~S9Vdt;kOXc4(++$w zaP2XZbb$+2Of-R#WF_6EY(nL)>x8 z;St6;Py3Y)rmv0#P9M9WBtHFdNLZP&F*hughz4!f_)Nm;@T@H8NQW}F@vMNIriIqB z*BH$gffdrQ1kbozbzn=ASa!btiv%8ju3UNzA_lpgWPOn@Wf0@j_abay2NlyPxVe$J zK9DS@_@XPZqdb|-1DFRqP~-7fb9IQ@V^QT{48rbLH90D%)MXe<2!zyWKIKJ9i-$}Pp`L-UQ>Z%>>Kqqg4EWx~pGFAiU%3#KR- zal|$p;7Urp7kW6Ed@A2+iNxE~<%TLWBOunJ(5olaj`eSkZnaIn?(qDGRFXPN7v#w} zTI`P@@pMr?LgU2w+k*u#JC`}~wYKHskZd^y==k}3;FMjOqlj6!c_Fx}`RE&ch;^182eAVaP6EyN%e8qirhhqFa)(={#6T4QI zqed*Z%8ch?(|GR!;v~1&ah_kBk3=_K=8F+Q{BVv9J@Ve)d3<4nG9CctC`&R9a-xfg zW4p@ysQj2Dyzsk+@yy}OfKrO=;W1nqH4kRySTl|xwy*a^*+Qc^d2sczm4Z_gT)!nh z-ICR`xJn=X zV(6g^cp{#Fv=B9!BW>t^x?p5?mc5}Xl#nOPpOEJ;SpQ%}4X67UsYL}=8rKke?Sd2| z!duto_9Jc5&Ev(b$ql`cAy}6xoE$H>28qV{5zI~y-{R(yBm#HvC+QUe?8v>6)}`84 z*w!=xI}Em~NF6#TU&+s{b^Ve(#`03)(~I&SfZgBp=uAv=Kg$fb>i_i8|k1&*ae;` zq3pwxpSDY#=k8UY!-z2RtVmgzTrOP~FpyB+P6S0|y}k~#CT${1Dbn1s!wKqxYzT6w zQq6sk{+=#*#QB)a${;S-^~oVjjBUdNtX*#>PDDv|bot{;bTq zAT~JW6a*JMS%*K`kDK6SHaFBgfWL28_cFk^7Z4EZhYa$CUh7xDd#rCbyx=uWZVw!t zQ^BO-Hh^z0%WuhFfY3Ddcx~^FWNl|gf@ilBZ%LQ3W36e$D){t;Lr5T(x>xwB40jg1?l0v zW0n+gs)^N0MaO7>fCm&<0V$?lB>Xzv1gg?bT#7h9?Jkwk!Cd;2HyhIN0jyQ#;>KmW zJ{%x#H)7ql4E>FcHf587IxZuM(l_})O6DE5KCN2@dU&|WruC!jod)i@>H8^tZi>Zd zJwXU^5&2Q)PVh5d5Z`CISLnXTqX!X7McdRQbWhWnrl8(s9(+`o*-thTU(=d!PAqc% z<}EwFq+pJtuYBPl;gwYuGxX94eFVzV&l5pEpr0szHwrLY@5PlqBa8B~kYzDe`yLbe zW$E7C@a9=A9n02`S+ME{U2zxa|m zih++Am_XM3x(4)+xT3u3KAd}oVfBMa4{x$w;qxsiL!n9n@^0H_uU+Pso#V7JvA$Is zNZ{Pv`yRsdo}bW8rU>$xnomV3v)}$UWhYNcG$+Fl@H%gz+a3Cw$*$}GN#a^Zr2KNF z2Vw_!sy6p1Km@mthVOf^F3tyyz8|HJAg=RLFZU)xdBb_>ah|n9PH^m)DPWhjHnP6q zrVZW^>`S=yJ$%d)>=&vszmMDPjn#G!dj?g+!D+NH8br!d1X`_Ri5wCk-D4%1=%zAp z-MLnKf-*~7P7v+79l=k_q1Rmd8hNe*Q6p$!+F9Cbl^rvpms^|R-5%zfPvicr0egwP z;f?n|k%03pGhMQ7!(afK;A6WMQkm(RI-`#%a9yO{|h!cW+%vrnRrAf*m!BtLHbzm&NDqWKsX}f}qc_Q`UzggV&2*^ja~iYf z1<}qG%@YK!2O<+2#x;<9;d$s@z4g0McY*GJ(00d9KF9}Iuu_SwTV|!~F+vpR_YDW` z?kP<&)Gyq8?ENN&j2%1>uV=l-njZ%%easfVle2$A!B1NlIv|y?Y+B<%iHblJ?jQTu zW54HTKJd)*p!xoTwtwSR#-|KLy@uqMXhG54B?T7G@EXrDpyW}7K`9i4hI){g!S_g= zM$u|xI&`?HbrIiR!z-rdR#@Hht9~Iy1Xg_9_jU{CJv#91Oi_PJ_f5FXe3$L;JSinB zO8hEKmkynvEmTZpVbFnco(!%g4=MEFvq1g#|-5)!?#iRLy50C6(ZIj%pXcLlltRfRM((HE*zD$VdtYe*K ztXl{d!5&uDG{GIFU$xo3w>LJE^rAkmMM+B|t8B~FE>X$UP2KY2v>@}yaKG)iNQ#_k zU#s2EfT7qYrp@>&>Cm?a`^imL1d)~tQ?vUGyO(2miMj^cR8DKuSR#HOMA%r`U4GK0 zRYIvpg@BiKOyeD%w~gC6*?HK9G#8efV{GQ}>k4--hp*UpvobH^K`@!<0LKz1r(tBY z@CKb3Fo+3mmkLILX6nnT!7woykj|r`$Z2$PfAN>m>`|h2k@EZFQ@EozESCKo-|8K0 zWusPkDG#<{I9T#CYyN5U7oF$oChaf?yD(n*X|)I@=ZvW#{El5^LRXJy1NvT6``THz zdAgJ|Ha;#&$!hY47nRcy8XSjwFVO*_dmz!-e@*0ycF1p%!NJ}aWwpjJDP^ycdr;lt zcPP`pP(BD!$4;19XH zSPD1~V3gjR_dzQCNp6d>E*P{G@_h!vu4uJmh(c(mqsyncF6%>-F)>n+_)I||9wP(& zu92A_C%+n|T7Pmp6XJKe$#C;JWZIU8Ar@BYf(Qc`P^2`Rn6_bb8r{e@Gd|?S^$cYv zT|7D9Uv#@cKy*gmv`{&TA-}kKvs=%B(q&oCRILQ_lu(MVVXM{2O7%G&a@p>fqxNkb zB@lkP*x>&C!+XK|*Rk2c$AqE*?nSV~`1wm#r6g^^`%+R;d8T#tE6_l>77VeItN<@0MvXjfbUs}LiPSm!_k{n7hCT;f7o89>Afek6FzD>RY-fxy+xmop6_S2 zHQEJ()ZnEhsaE}RV`Eq;%D*ooE^cU_{3=!SD{Blgm>WH@)ra5QN#PQJ(SSSywuhP_Xwo zxZkCtv0~!v)AdegqkUJqU|e=~Ju_)_7D;2TC1-uFwt^A_S5uUzh^Mcde#oK9fT)xN|Gm%XI>n9kK1CbYKi-VR$x6Rhg+U8zYO-ZsjL zTr>T6Fk){q;%LRrUo-Q5#8gzP^}MOjpYd{;ib{mhw0a9u1DRYSXMGUjWe?*y_je#c z*ObX;$tIOGt+3rNy$}N9rmc2;U}R6DN&9cNE7$9l9z~V@guR5Eo4IK2b)dU3aN>~i zY&Ys(Ipud&H>}4pnVtnGNnUFN#mN#KrBsJ&en+!Ob>5C`)SyPeN#wCV>TP-2spFeb zU`Y1$b5m3`d2a5|{^eYxF(D;ahP5CEfBff@*CD2R4#hd>vnpYDy|MvV?^)&a=67rx| z9#?0WWQM)Lw1IT*X&x+-FnHFASE25H_omc~| zN|`mno2eL~-UDxR9_x26nqUuf0a!Hf4fp*f) zt%$OS&VxVYxye)H{z)w3x^;v`-(8xVQiJzQUkW)N*o0=@$M7v`lW4!|FsVe zp9(%BF*Q7H3Ngg6sHm*(#mRE+JP-Qgieegt7(gN@mPr(Gwv3M|ye?D;)r|%kz(8n< z3IQw%iXEq`>~tLsy)qovJ7S1xiaz_sp<`v`AhW@3^cuR$M1IYx(RvFz)Q?w{4V~Au zy#0NX07OfeDf0cbWhwIrI(5Ng+JaS@UYiceJJTV*$3&i-l0;u?Fz@kKCskjbLCrWe zg~+EE8}%0F1ln{`b$$1@(75UO(sfZ2U650*)HmwbZx13t;LFF=Z*(GMe1JU0ryDAB z9dV5(cjg-Q_|`V(W|t33mpUqfgKD% zP3+@RGo6+EOHT~wTZ9-oS+|1R7QYUQw=@8Y+S;Dzy{a`E1)?Mh*orT=>rAG@=*?en zf}62k`mC~3qHBV2tyq}{BgH~ljVmd9%<$#c4)r$EBQ~B^vdR)?=~rIQ8Y29)JI^k1 zEluW-YP@2LZT&4d>^vVAGEi+-D=SrPzf(dwx8g4=b;ZvzJsVB@46Q>Iev>1Lbe|4^bQHu7pFB=) zIXK$O3szN4KtVoeqkB2kw1ZQU({k|K>~fCkCN^q?*XDnx?(6&c1n*b&@H5wa^q4!?bRCIy9tuZC*=e6RP8k<47vsVkkEW`s?NX{N`Wc%=OV`r zs+nE3h zx$VTaT~SKVx}_Io866pEVku!uf5X2$_kWU#N zTP)<$QOvE;xL-wVJ&v~>S;Jc0L=m+gA1=CWyAWiHn}2d^qOm`H@o(H{p; z4iH2g9^)#G?8M9Rfo_qdfVR2?zyfL;wzV~2U6-NYH;E;`=&#)-cen0lAuJ($Lo@``8_p8Nda4OcPy|tVK z`rl$Cln`$)Z0&pV)=5-OBba^qbzWc{yCMSt*Q#%^gY3`X;o>z?W)FIDy1Y_T=S5yz zy9EMw1U+0U%T)Z860IWkVO!IHCA`EopaTnM23zheB6=wg+yY`1PRWGZvEw~hqQ38B zbiwyMSb1rX1zK|}lAvJsoQxN{a4BY>jWMHAa(=dPW^47(ZuTx;&t-6~^1)LOwZ z?v(k9LbYwxOzWM(NK096NmTtpCC`@C10IwVp}rqC5`o_g)uDH37vs(mzg|?+j7o` z(ZHTW*TBmz4tEq6z?B8%^O4boyPlPI!+elyIi)(sy7P)7diEEGLe$pJcltfc+eSW3 zi@%^rthH0?5n|^1NeTJP`(QHm+?|V9*~Di*06>a@I2YdfTlc}5b5_URp&iP_D{ejW zM1PUS-iqrYSs5S7gPm_k{_JzcCKXl)zzh(9^@G!M&a>8&&#$eHSdK#TYmY6dW1sS^ zr+f%~r?@P9H^jYyI+-#eWx(w27xG2x97Z)|W%w$%n>X|WSGD||yHy5dih>Tj!s4ctsHu_0U(zbZHtXruRn2oJvdGT;3 z5XX77W_(UI$J!kqUj)AY7OPD+-J_)_of-+dHEqezi%yWd46cciA!2(I8a%QPNv5*U zABEH9oNJo#EyKBQGw=!a06HN8YD;x0QsZTUTW7?#IlEmoEF3si5G!+uZw40_Z_O7R zn7m_zEFW%J|J@2stueEvnIfs2Yfe?ois%u1!c|E&gIz;56VHbX=qd>U^lCiY(s-UN z$D6kg5}?F;ha7)1^n6FfxP&~+L%isLqO^NqlC&}VXU4KS0D?p%`+faikD7YrH)!s>I3YCUeYc5}^dCY^#p(72B zU;6a(!q7LzrI@ZY!Q9Zo2^Y)n^72ESY4;*&UGdh>SMt0}K_BSic%`fhU#?oqS}fsr zKAse-9QJkT8Ez+YupUy&xlZ6jfkRV9SJ_I|?4)($(A_j!5@g~+%(!*gs`=lOeLz%X zzMEaFnv;dEhk%aQ)YyB1hKz(bcTa#^L%>|zgNgmXnL^+BZ2TO)@Dvzw%K6@dAz*t= zA1_{E4pww%x~Jp;NRv;?oEvPMcj#^5U0_TNI`&_a;ORS8<`s5)M#{#_P#?V)`^}>S z9y+f7?mly;p=#1HK3~N9h9oAhm=li4%!y*&*T5gt8_{*RD>V@=$02Pc3Bn%?7RQeL z#MgPeQ7oEZZ4F0wc9l(xPlOhmA4I9<;i0m37p*$ZT|7>7wvnGXr{aWmD?qjYI_#aM z`=?{1wS?ffESw(O5_|Y~-R+=PA}347JGdfDSxHMY{f5LLYtv(+K=qco*Wr^Fx9s(r zQw9!djf;j}q@~@6lNB6dH~hYkgXguD6hG^V?nV$edFN5!LU7C{G4b9n55)Qq_am-P z_CRMK+l@E9EDTtj4(ve`x?ZArU)y)Lq!46QiekWF(~*Heh2xj4oXUK9v6pMeBO06+ z!aDMq#|kMlPJ~}lX`k+|Di3!a2RY;NPOXv8q)HA=dyV5ya)Px@Iu>5N%&ZG2G0wBK z@h3*4k6G`*)}*=5r3#(thpXjY&d;`&J@=D>oU_h@ol_Dn33QjX7M4=~R*mgnC@qS= ze2fk|{0|Lc`tOkMRlf$*n% zc^zB*tML6_=(}#}!o*JKO{3chQP&34-d^EW-h8k~p-BHxu7!X);S*}47j^B(AVW#_?O+d+Q4k;!LZh` zFGd?h=ZREK?D9g2=0~ygTCkmQh32O_fdeJF$HaT8-{}gqwWZPR1bh_n$O-5g!^!F<8gb{q zYyX}QEBC14JzOMi4>dmaYwYy;cEi=h8HC~PJk$Y9j9`j`eb>LnCJS>sC z9)f&|`t&*?m8{4WacOd2CM1%o4jW!e_7)AHS}op%iffA~O6dqB(cU=xYS>#sBEH#c z&=`JU%etv`2Xv$7GdlRwHxfPlc64d+F?hX=mxr1ZJkFF9IdL~WqNc!wkKPWE^4{&! z(qse-GC`qe1V&=c!?kV$Gh!mPR<>jmL-R9aMTF>ZH*s6GUXg)Vl7vChWV;?*-zBhN zyx2A4QtEP~Ojwe-Uk=R}$$%b`dElwhP&3CvA*Lqp>rk|`m;U{Xwmeg2+qvyS;|YA+RH<nZM- zH6%;4{~m~%>{oB7mP`14VSwrU?WW8}@4~ivLrLWY`Z$OPbYsuN45Wn|>xct=r?7fV`dltsH{UH0w9$RYC>73q_QcrVHHZD+bDh+9e%{w*`_Y@}G zZ{mE+85y2&&nq69PvX3N+PMzZWm_+Hu>aR$&tg_m^V3RYsEI))WcR*&75DbPy~>+{ z6m@CH#~^F5sTDwIyVw3=(Q!-1R0Sf07I;3{pA1y>0{Ge^|ix%^Vgdj|w@c+Qiv691nH2prFX)pl| zv~v5>m?E9yqk+fP?YNL7(l*tTxVrK$&GMN_9|Ea#)=9rlN0K`K=UpizWkIB?u z^IrGW0q78IH5>(MzmCzbwqi08Am*o9$dBF_jmO6ZlJj?JO^x$!@?6;weG z>Q@HcZ>J9&(7ht~F!wAypN7wxFf~p(@e-NPQF_p^YBF_u{)kgY^2BC)BAq+H!y)A% z=zgCc#{FgC^+ze$V`p66E5yt0SpXx%h=1Bq`C)Q?s>PxTbfUpqFRL9=d+0_M)HPc% z2?=)2}C!{>Q^-#7kw`RlN=o%6{%uKT+0PuEk~ zK(*rYJ1mM@PC{_ioA6sFhE= zneDGmY_I^J;5}P|FKRjxCrLr#DSN4n-Pw&?*-|S^<>BaJs?$~6L6OMc!3oZbt2DGTJfZ!gnmjlqp8f0GX{en_o zWo~Emd!4pkVgmT<94Qrc&Wwot)c8k~@_~(H3BuImxEFIJqtv~@+YAeN8a<;nx?|F4r8OL!qX(CfG#mpYSZNK}(vr|GS z&USgD1@XQoUGRgVMYr9{2%7td2sU1PzH?0R*g87ZVHhZn;}MZ<)R8qxDkLauWlT*uy7zuvbSZti#rX3>HCzc{B?+K$nRQ}7QP_C=~Eu?b(vCO}<%m#p|$Ao<3GSJwWZ`bs4ZSTQ_G<0mTl#S6+3Mtq-h?wzUoB+9# z%p}Fuydy0LK+7M~bHrhErposzF%R$r6EQ^wsgy`%hVskhjOPKWOdn|YPWCb>%%^G( zK8aNm(JsX9-x2xOoT7OOREGJ%na!xoFLU@KXgpov$~*b(oo!dsyd{HHo`@FSW^IS) z%#`kLWlGys!h^^kQ@A3|k|3q-g%t9BW}R@!6FSVIj9muiIE`7hclMD%*NXvvm12_N z2q}VbNoOW#<4W+7*b6BPq;&Tnb-$~03+2hU0k?=j`y*XRF5LzS3dB856uva=DMCV240?jUaQffF5C5@f*e2>I53k2j zSA`;nn65l-tW9Lo%d4Pcemjzjx(ug{GcO8B-XQ+9EG#Q?*KHT0-X-zY{{}ZaFyd?4 zXZ-N%tH*vu&l=1Bdkt6|?|bNGDsH&Ag~#iNH2?$!w|swybtX#}Lsks1@tD#V;d}mw zNYy}@TVGL_vAf(}8ShH%3qIr2?f14nceW!YK1clV@H)A&+OD)MJQGJgT8eC)l`_oS zzZ0ijlsO?5!^Q_&E!e_+%i~B`iNOfSuIIgnA^xm;H1$6gVJ4WrJ8=`G<7C*6aI#!) zD*0Dax>Qo*fzxHtUQ0TLU-S9Svl*jIJ3AfQasOTXdJT~u z9;Tvb&k3ww>scp9tk_995d13xl!}}?>`75&{Pf@*tMePOlIQ=4nBmk&XYaA&cb+WN zaRc5Y<9z@4LH|+W|33-%HdJJegyYV0yJvBiLgCk$t1JoH;CY;OrVs-M~dcOfH z<-0_0DLFa!kH>mJj$j^V^e!=RGMtjx`>aWGjA%xhfrve{GVCG{FjcPb2%I0r-Oya< zufs+n*Trlw-7Ef#7XNYzKzYB|sz{;JzPA&qa^n1G=}Xqji`@>Bl|EvK-}sH=<2}iV z1MSy-U)HaGko>fFJ?@_l#1m3Kr0D)2EwIQXvlu287bhI#1?ipHl>SAmHfH=2uq61$ z$=USkaOQj@Scg|8HVQuB)Lqg^Z(05kxIE4)v-bL?LZ=@pI zzq)Jtd(Hz(ykTmX2TZgWJbz`8s{B}**}& z-e9G=)XyKmq&N*s1~R=TFb~5|6VDcCz^pp@5U~8E%8vkVTflpUuKVV^da#& zM&@C|la)I{<4WmR7{wW0m=6%LOPw^B_h$nm5y@1e5}Sz6j^W~`XlT%o1KvI+KL!{( z>~3DSAwSyWvB$OEJRrgp{AdabIq34fMf^uJ0@*(r^ zLGGQ0$LVayu1e9~sIx!ymQKB82QvOwalJZb~I-MBQwbX(YP~1w=8Wsr& z-4K>r6w=8dl!7Y2XvuK3L)$z8K*!|qOqDa3Y5`_YfW#fHc8-D6>+fJoYNIL}UYzafu5JBh$~dm1~G=z)H- zmrB@ySm7jeZlzacKsqW-A-&v*`B|c0A2k3f{c0g=-_eOgDffY5LDS^IT^^tJ=y8WU zJaQj)YY`LRu(Gpcb%t@ZJ+t5kK!Lq^g8n5YA&tJu0A1Zq&LfaNvwpEn;6 zzU$#2mQ&9zPl&Wg36=&3q2MXZ1>4r=h~KukG7y?vh#VOz+lLmB%(6-wkb*56R&@&w zP_@bY-3RzT__GJ=gq?lfctsP@_PF!0crsmlg@_b{$#VCTRKFZ}2nak2D1?fCSF`ff zSL{u$?GD2fT0799WO82$yyt}{y<#Hu&ak;cvl6%B{kXF%K5C=QcIvqtX@PVDub5hMfw4-CRqNTDt3^^^v0)>ye_cQ! zLHmZ)IElQm1Rs;73J~kG8rPG#|0aiDjQMz&YgH*;-8G$#Y$TrlQzD$B`vr)DNu7om zFL8^SGHR>%BxESopL@PrOoVJjxbpWgmx#5bgWMl_6^cTG1}^pJXBN1~1YDwSAYfG+ zXSJd2@N%3czP2WnQjHGj7{#Tnx@hHVk#6oFXJuC=EIrCkvIv+^V@9vnW{$QM@#tkrft6``R)>7=BYp+NG|OuM3O0bpHu70~ ztbB~0fQr$d=Sx5!-S>x@dGkuhKZddm2N%4V%x7^Tr<2t+8izG9dNz||JgW@ntz3p1 z0WVZSn_oSRJkRT{@r=H9{}jh^Oz^jLX=XI}YXHs?%5r)n83ce|1j}hElnZ42`ZdBw zao1-|e++63hgEloR;OMIbwGCDx}V&ymX2*)nfJIk2ILaw(kJLh1iTG=U~Fw5dY>;E z7stPm`?ZA1_B4=v>7D2*bKdFM7M#ezUeFud(M`F6Ieh2J*GZesUpy2C1OgfMH1m}A zzD{gE_(wq#hRe4)t=n%?@^$uUY5&p&E9Pq!n38W}?)g8YqDk^T(Qp0Sta19bTDRsb zA8HuYBB?Hxyj@=yJtA`MIyBSb%=&Rl#T)f%cy;73ccv@j>koxd6EGm=WqN+ z%T>TL);j@ggCV!75f&BSHf8unpIy0K|AV7pm(;(hi+~+zyz@y`IAQH~&y(UJAB0L{ zcAiFKx%H>bijIBi&lg2LZRU-?%QY%Rg1C>=I!0<#51T8i)coxZnVjJp7mWYfFK;_e zfi#ofTEaWu>*roF^z|*j9#<~nhFVT5HL|ylU4IZ{X_fZb>`07$M~2MotgAt(o)kKy zEyirxL-z~lL=2|A&F_OKz(0(m#ZugKbk0fR%FS1q;?*tWkYdJL5Aegiy*y5EJ9mVE zgl-R8m-v~qKelm_yNOD`(6NmoUYd_TYy5JMgC!#qkqieN%z95Qe(Mk3ew@)7%Y5`l zh{V20T5@`I;43yDqO9`B9pu2ftsCF*0dyD zY)5R{XrY;%>WzSdz>$$_L0HPne%d@e8+A-@^*Y2?;yf}KiUryp#5Ibm#P`_kvN=D0 z;vjbf^G$k96ZmLmLRDt4C+n=Eb#K94icdD0>UB7xs5?&3ibqr8I(N&B z758SHEoU(Pp|!8M(iBH;VJyH(LbhUCAMlf<~}H}fcx*P@{*u3 zqES za?LfJjCQi2QEDJ7LM4m;A>`7X#5R8cYkNKKDps-Jr#4xu5Mds=Q3OFx&r^@SLv#^0 zme_`W%H~R{cL4A*hokz7-L}rp3(`T>0Bw_HYHEtg|M+Hs3BZ1lB3Ka8c~?C0LTQ^S z#1zyhz{AO48_Dbgn2Qv5a9quvG>9;N4P*%&8V>$hUD29iy~3%$3)>aCeLI`*MQ*m$ z14D_@NQ(?9!b+5kg-6k6Qhs*5B@bWy=a&^1I;!8?@NO>>CRGgtu}6&coO*NO-Dan- zl&IIBe1oET)il2}IjIb|FG45GDqhIbqW<7KgQ8tOB1}S@gI1*{4?u!9+ViF&`EqN7 z@G4=WeDtaL_S17Zj&ZBC$15K1cSM;19@;^CgW!9b_{lrll`Reo-+?3|9BDSbVF5Ub ze>I*i-akM!nF_~|w$=JxH(Lw7fMLPsE$S)e!hCIkKMDB1l6wkuqX~4`7nEQ?KH(v* z@r+xh-nHHyVuItveKt}RhTUen$M@j+`5Q4@PYm09-s7mpfb&?&g{3bAq7vN1!?Q_JqKNkV4c z;LOfch(6`%2KX(Ib897>q-RyZnT}+r_SX-x$qPMBW`GeJz0XvNO-&yrac(=Ez?K07 z2(C2v1Lm1bmq(OTpRlFQBjqoY(wWKK^#BITs&dczd6LroMJd5YG}0i*w8f zyj_vV94)(~ehYZ*;ClSmrXO~>yeGw30YA)1&rI+t+=F}TT( z^i)n+IIYTYD$goYXYiUF1ByIxd1($H`awVPb>q2<4_68SDr>@VD($?*p;NC9OZj$@ zJCJoSI=7>Z&$5#vAh32c>_9=_lXe72&gbrw1w~@7Kpa?x@F8YRzbt z%;lbVO~ppuW?%Aa8BM$n>dQ>wo7C}s_K6XMu8iV1^IvQP76-)Y*8K0ODBpZXPo!!0 zr;9ye=g+HtV4Nk@lQJu@;?LKY0ZV*OXg{v%0fBV_tuQig;!x#SKtcHwxuvf>1EirT z)z|XzOIZqw02A;l%_jk45p*sFQes!6%-T}=UkYkvS-3maN#f%x^SEAEnvEw@=()>pxYKHVE?<3=Z;yGOu>uL-nS^;~UCF zMSSpC0sZ&QrCHX|7Gtv5HpZG71$9rAD|rRCwY5Q{rK_?_E_?r|0u=w@VM;c|63KUB zmDW;6nayZDpxjs0%k%zEGl%=b7jfiy-Ld7@X&zk^!u-zLfp1e8tUaS^e@=G6P+H`r z!Bxdwa{jEo@IV(RIqILQn!sRladzBrxHr6cF<2GFctCnpKy3H`D|gL-CpO!lIR!O@ zr_MqE(x+C&B?8%X#`aGcGR9hGLOw?Sqi(ji$*w{@8xO7SaqFl1lU-!FK~8}2&z1i{ zBoNbN8)F-_Mk;7KWv^%BG!kZV{VOb*zBBQK? zWcRN{#at+{za|uqPr19>xwmg_bXFLIMFP1*?gCH_kMCZg?SA(D{)mfnr{pQ^{#!6J z^*bBV^~=%ev1>+8M!bDlvp;c?5#V9EW3lBM7-P%mE1nH51Oesys zZix`IMJmbbu?AkJU@Kc-x2>puQrX+_{9C6}#d|HVFgx840pSCdKU0A(kM^4}ZktU7 zE;E}eAxls9hdr`o4l@teCXX+t%Y|H&nvXj*IW89`f@(Lq0$;ra3Y(U7n_skzFtuEK z?`g?Sr@qAO?Cc0d? zt&a?rwnzkwiW_$}i-`*yhi*cNfO%v1<(#+dqtn)u zAUn-NmdnAE8qY~(P3)82onAWmiCr9hn*n0MNEd|X#oCP}iP09w?&TmjZblL@`fNf% zM&`mK_ffuA0@t&W>uDL%OVY+Qjk||6gU**H{d<$%>>ThwVHX7nY&F6LTndUL+069x zI!Bm`mexRE;KgrZjKCoLIEYmEkAcHodk2Tr5@+MV^D{Jh_`FY<mt=exG?x!t>+2)wfnl0i$ z^Y}1lEH65Wj5Ibt1G2#d{a{4E{NzS?-Il3KOD<-jW#?{7CD2{8WnUK4#q*`kn#)C5 zrwQNbOGY}nQ&~)=vr^%3Z^_ytSFaaqRMjqi`%PPQb4h_L*jZwWHJ;unYJsJvA9I`a zt`9%V|DZ=fLD8k9r4_W5&h6qmYru3-Z!QNkbT@Lrfaw?&ASHv-IpJJ+AIXnx+R_a`7yH90$tUMA*x76Sh%u%5E+ZYQ8uNG4Yr-_8OB)Jdi(b*w4z| zfcgDcoBnkkUx*$yqIyq>!1`==u0CQsV@V zkwbVUM5lexyAD?&x~qu8Wfkn)_|vZW$2UPR1IbC?m~+jP1XEr9;b|d>)NIb*CX9Oo zqU(6HoHCYR?tyD~=3h6Jfl@#CN>36eUY?j7%5Hq|+(m8%^`99twwzCy|6U+gOUq8* zi`~+|4{v?zUf#5-NJ`k@^%|${q{6sfdaQULxTA1mJ)zVz#B%LRHqMyeRzUqiP}_l+ z7)-ILEAQbO9EilF^k`7M`y@tt_2N9*prF(bYdYd2{31Ks>@_I!rHL0^+SI_eu`6g1 zW!4-h$4AeJfA7U~5p=m{d-~6d7i88pyB{URt-~7h@$a!&r{*eV`-FSm2f}s2%oWdx z37TH>`Cn*K`3e(KrfQlV=Ik=bF5SL^Ho=YNHox3t7;IV@my~Qd%A}C++x+_UF!TIS z^2-FBkw_`xHy$sV#pOv43zzB) zMAZ$TXCDhINd64g4a`ew*?}TwDj9<-hoKGVGe@fN z`5{#qyN^@4yiFo-zlUr^esb|H?V2;AKF-hsR1=`U zks{S;2|m@z%F{ww3cA24GN$#iIL+rJ_i4jGgA7E}X!#IoK3|Y{&yJ_C%|OvJ15x#VefohN zo!&Ro;wn8jCsX8t`*QI96ohYIVFA7iZa`y_Es30)W}64;78ITnoVhcO&Pw%WxIYpb zjbiCKve(S+$tsJfYcXd@BON2#DUpl zOov$mYf%Y-8FC^EHo2EraG1^fjYv)CeG!RC3gGo- z!pT!WC!VfjHk7M+VZ&OocnXY;s3eA>6PkF?Fbz+!mH{%1X7jz+xNZNg6|tJQ)m{6i zwyj#zt1C)c(7r-OHVLg`t_hNbE`%0BfymjwB>?LHJ?klKOLWak15aP4 zz1QBNdfJp1FLCX~O-uN%9j60YVv4`=D$7akw=I`+`!S+1k4VYyIrt~%t?Y;qnk=rcB2YPK6C^HRpN7#%KP?4n$>ONj z3I8&!mf#5g=P;BHFLA{(uUy+bRE@}KSJa-T4``_~ ztWWrZqr49<{sJ*Y@;=kbtAfV0q1a*LkwkugVe~OIf%*WP@92wfJq)Q?IEfoZ<*YnC zC+hw}cOw};5BB=K-cRZu?}1Vkw1uSOdgmAPSoD~d%t#skUWnP^PqBo{ekyCZioy>N zFyW$54LwB#H!#A3ggHlI(n!KDKbh5CqCTq1c+E#`mEVVU-^vhd52B3{1(*UPm z4HglIuoydV*HN+P^-FBdvXui?7xm7r3wITMxH~_yR0FkS%i%w2>LbnyRU`#H`6Js6 zMPt10$9pOt3=$rF;F9f+a!FqlvcEaPNlfr_-h_{r&m1;VIhzVwtf#JPKVMJ<_F=C~B-G0VaW`5Uym6*;w zQt|qVpSR+hMyZnSW{$h!`@j@#az7?nvB~((Xilb;8P}x*8T*uxoxItJR#?AKYJ%dX zkxtVe80d0AB0tMgt`HB|T6hET1j6{#jof!c^vCS+6yIOfb(89&P*x#?Tmn#&ySf-l256}cWR0x3$0iGxhJASy#JL)ss<}{)-+?gbofIu?d7QQX#h7c_5yrX-A zy2DaMj`|m8c^AT@n9;ddmH!dn1TzQOX4Nx>d3wCWq2!0G<=&9qC>$tdI_1X#UVT+` z(+aGzDa%>TYPvh9iDye=3s^)WM}gG;=yFFn0^wb_6)s~S;uErh>T4-Z&qyxFDV3nC z%kGE)nW4#?ak}drY7jbJ?;TJ_qy~2+Yvss7v+(g3tK#=w-&%OmoQ5l8U$1NQns7RO z*;Y=lKAy)wyfYG9{aK6Vwp7={ePgn0hM$j85b>AMXkC0@sqHJIy&^e8yQgYwrttV% z%VdO(mEg^pBX!7N6+EN%_gnxXY5Nx8Lu;#V!IG#y4noE?Ln5TjVqt4IvhH!w_T;Sv zTFKD9@lYh#=Bub}WSZuwslh`p4o<{4$!hf4jyNpLjq0n^Eqd&RdT6EA>$lh#xEAo_ zdvFKyj?qZn@RBGE+3v_5QF4X>k+q!QO@o)Dyx2eq9;+k{E^lb?Ui}BMYYVnEj*KRc z27}IQ5x(UJOm@JuqF(cSZX;ue9O~BKDu&=}HKme#dtbx5PSl#rq(&xXFTSk5PiJ;& zPeC%YrIEgj>q{}QW{bqKsu6sF6Av=r$KG3H4f;6dTiLIev8Ou-=6PVAsA|!--v=0? z3(`eNpp0Ys#R!XX|F^cg#bqO6XGiL4z=3iAd z)1T4eV?`Trjy{PO8}}Nwtcwk=j=z2hX`%jzJld(N2;_8BdAK@6GFY4~k9=OE+$$P| z>a%_4fB(atFmPqbD%p&_E_$3ACrWqyZ|cz10R~sA z6zgjIckx&9VL!SOf&W|pQwRM0ox}gVz?QrO_?^Jt|6Kg<4*}l5|32!!*ZgRZof)VQaAhZ?sY(xH8OQq}@O_%70H4WDdL#9=y ztVWMzM9Y3m4RJ}v`$1G8Ccaurx)d#>ma%w2m38<5wzMra*t50OH0Lc@0Bh`F*2GpJ zg~%~Z*-kOP6vyjmZQ_gq%)oE$uYA9HN1Sk@=%3z*DH|lt> zX7ApYAE_uaHht&b(teln14;XF)72XD)5Xj@HfM+Ux0{AfAnv)MZO%k$fQ=&9k755i zezft;ON~n>I^MA|yW+PDR2jb^nzJx9rd93YPa8K)v${4KF29SLV6gsyqN|jo*AB_j zdiBK1ER;@~y~y<_=F-u}A2Ndm7PhwN-L@@nbsf%ZwGO^74?0W)6z=-INROkq;r*3K z_qZm?-73Jgmjs)O??&2f#=F43C5P}fp_3g4#rNK-uK#S;fi?sF3eRvm;9Q;k#1Egn zq{?>xPO>7t(6N83bGa3dWHlDFaF$WllVwemhSiOm-zV zgqbygGR4ieZ7kTA6Yl4*JYu1?mH^`prxJ#W;C{1?Gip$D1-o`^fuc>q*4PWdawtTw z%^tIf*N~(a!5*Om;Y-3hD?>{sXEE+-GyM4Br>L?M)P}2CCGV6go!>FOA^+zFKV$1~iSz zlBMM(a9;M~jk0wwjGx8Q%w-CuF36;2Bf%E^zA==qc%f=11}~GTAhYzn;jJwL1mGl` zkHo_7jiFk-?63KpBP(BNmnr51b{3(g9IvTFA#Co2?dI@Wy(oIx8z-m{taGd{OMM*~ zp|>n{y;xX1A4feyaudqoiPH4dzmSs5RIE#y_J_GUhDFNXpUQoPm*}?O$IK?UFGP9< z9`9*rOT9{-#Gx4vCfjq>mbQIu@!l*TkbRr^5tpAa;d26AD`tYoHRT@ATU+K~FLDWa zGMsyCxZ)$p_{_TNcO!`fnW?F9F}^p!p=*M=+^V=pO@=UeadNT`qjP~=1{blG#W756 zlSTyy9uV@(tiV<)>iW2C9ecK?j)?R{(NgMR!-6>Kj1<#^GX}c=J)b(Sn$t zVY1OUEW~-GFAbG$#*j*Qw+&bP_CzIL^?5kN%c?snB^TNY-2MQl&C@4~;BzXLS*9>D-K%wn!pzt*^G`Y>t zd?xIm8b3TI&F@tsV?vt^I#2{O)lMBhSv(^$K^+e|LZ^9HsW3|qr@>KoIyh1tZ|1!* z0;tX=1vT_lJ;S`Kk$Sv=9I5&10*at9bn)m(s2S>=@AXCjGT!BHAeQbFcu zYO=}{$s|4eyj$PyT6Y$z889pz6%rA!#V(vAyWP8uXYQiWYOmhR&Ch!5pEa#VN4EKE zm{GvZ87B5u2fYI2EzJCA)e4@-@h2W1rrl12D1kCT3bwA`FQz&?JysjQ_}%LD zyY5by^KFwgmqaVX#TZYYH zQeDdKx@i*% zOmbEcf!a5<+8Ds$%yp`m4hiDQjs6rbgH-ob_pYYr)tV_JQbQ`rP*14`+%_4>WkuI9 z@~}*uk5Gk|6wlU!_Tz>hao9w zHz9_!P`39PYQm)n`DJ38y*xRn+$%F;alu}_TBVGt{ZNqE@x;l|v}O&AGao4MBlXJ( zv{=me>ZBD7JS4Tx;it*BZ*s0D$-64?xf{fX=*_wlmJlL)r@{#0m-8c1@niFEWcVdV z1ssb{1XgQt94^B4zTf8DH!jfSd&%bw|4e1?uC=SZZDy8)S-U2*!c(p+Lr^WG+6-AU zf|tj?+%$Ay0FzA$>+3e_HJG;=@qJ=<-a9!gMwEnb#wENSc9E@R9Oc1i)aeBpDSk^$ z8GZG#Tntq(+3WSId{WJdrvX zh$&zR@3ip=3UQpd4B+xlZyw!xvF0rd-1v)%;oI1-hu7`(+uD)HG;CL9U0~0&`_GF0 zf1BFaPatyR?V_I=XQTJ&a{V&|CIa0y@=FNknr=jmReOH@V(4H79*qj*^EM_X>Q$`H zT38a@QlYt9)DXC@GbZXB^6;M*kQ&Ul-Rc;5%y(}-!ZFP3(+GDe`o8V4G@p&}yAXTd zxf#x$zL}Gut(cwS5P7JYX&KEn<+`3h%Tf8;lZX(fd6Q;Em8<&rfMv@5b1hRWL+B*I z%mJW45Bg6+qS9lCI&^4lqm&Y$`#&>!AMo;_F zXnl_>)I4-H-y}(D?`FLaPpP>~-9^rd{=#s6^}@7`T;Uq{*EkJ+9y|qrl*CPe$R5@` zu%QZPe5LYeP$HmgNjy4`Ov(%}hbVC|R5F+OQ{=d2GPO&Lex4-aM_&d#p9WRFq9tpy zlfE96aM$d~GwstqTC36g7Cvs#7%Tr?u4i>-6g2l)a%A?=T2p5WE~%x}j!fx91f%z+ z(Bh*~fIYHdp>)*0=(*&DNpk1RcH0XtQeF@_k~E0bFi<&O+(L&m+IYNkLm$<-KYfUf zLy$VkJ;Q|c^q+Stsx+N<=rE?FWP|EbpzK ztL&~~mcdJ{T=#!G&x)HYJ^&D*y*X+PTj4*`lrzN-0EOA=8XtCu0dU-82zi(rxgc5i z3#Hr{R*ylJWdtW!#fzGS6A@JeK{=Yyk?r*A{*wK4}N0V)MNEYNwErt68 zmqqS7rz$T4Vsl(m`u?q4{S3B$ma)-~yhFdAtnP!Bt9rOU;6~BHlpApmo|~0JN7DYDgDma*NF!2yyU%J-=u2 z<0h{dezT?$TcilP{f!z=zkFV;PRzN@SX-J~w$C1)O;XD-OwA4%&eDSzzI!RC(Jr9U z#m|*H2^h%Cd0B`gSjD;!ZB1qd#Xkb5JJP$A^-Hneeg^Igs00)MD$v5eXAu6RH`r$N zNx2>&5qWkw1QVMy9*j<>-|`+1902aKhYxX7KN$32R|)QgJl1Lm1_~lY8R31$2WjC* z9osyhmKCza67&+f999H5G}})zQZX?M`=rBTBEN9H}-r>vre{ zDy6hrnsAjM)XzCVlT=pT;X+$C6}IE+s}Gkd4ac$UrC1|L_tNNaKbJ0M)%c-Ar=h1Zp}IRSAt9&SI~-po^W%OA)5#FgI-#&YTRG@Q+Z_beYz*fkFlG{m8YXK~dvfQ67b*K(OQkS%d_Eie+Mr zJbrkQ6=0D>s2}UXO)^?xryMTmkh6}X7-X=;1GS*K#e0yBz2sZlMyHC#>h=7*XRw4s zJ-`?Dc+cissk~{v$^LrI`8gjD62BU$V)_xWpY2(mgrIuhxU}#2$7(rh+lZNEmabZ| z)(^SFAf!oDZ~`K3Jh+2;;rhG#gwF{dt8*;563xrO#!D^%(2(Dro^30d$kMgj&qyT< z;J8Ub@AH7RTM@Qjv}g&})rGo}5TKb{V4=Kej3*uGj|a-*>NHtD7k{qMJStTi_Jj@( zsWdJRpqw5>iv9e$(_A%q-yr8ei2+BAJ&YL;@EK)TzWT>S(5YmRX&?KAA< z$X2>-dWKh>hLMnSRK*L>r=lC#XSZhyczaE|j_RhLiycC~F@9S4HY^SKC?k-kwZXQg z)2k;KOH8Y$F|}XJ*V|CEX|G80<3V4HN3~HOFphl)4wuWM!Hy1e3$5XUU;6X&6o)g% zbLGd^k%{#M&jHMcam;@`zNnPU3f1McLOXM|Nttci z7VHz0C~d zf!{gxi&#V?B^zeY=soYL#HYaRJXxm8RwmEUK(My>${?8m|$Dz z3vL>(UYKm5qoZq>T-GZZ0{=+=E!ePeVG5)LV=36GdLxw?;GBP{)#B`!kBawPV`8Oxk70 z)`_kIhrO4V*Vfn=&*iI^2bnHlE$x}0vHLE^I}Ou;s!P@8mrhI8HG%2|*!MQ9mp$na zY)H8E6gm)t?;qVs{LPEwTokAV*7=a2Jr@ChcmnZ2qQ3cHzrac*;e#9Jz<2F$0Z>=9^NzRm{y;Ry$p7bEqD znM7jlPxe|#(`;H+N|yH--WpuzvS467V=GOJ)y1N>o;*)qvFwnVZo&fve@pD0y|-$9 z^wnGx+sP*Aj7maf{!N2wS><<=aHX2Wl*P@^cn&pht$5W9V zgvreN1wyb-jkdUBb1|8w!zE5P2n3YvGb zKKf&O!vnV6-$i6owKSh%=4W4wfd%y>Np_j}FDGstHjYXa8P>HS;x7An$3({$1(q;E z8xWC7JLL41)M_{e_HBRmF0Uiw>mAoH90L9ZBkiDNH=}sCN}B*btzJk=y5V^+9dyXB`{68Ba3J+wSzVX4*IIf^L9@qY+0fi? zv$&Zizx8Eeo$XVDD=b{;gP4W!z&%b{kLizm{$~}7?}baezKKTTKn$uI(s7`h;%__# zTKQm?9k!Z5y<4-^j1!|rmH9>Iyl-%zyHOYCG7H_Nn?m+B>VE?UHikf8zKY&`n&O7e zaox6h>7eyZ4H|-&-r)Gb{A5Ks0PL9%ja-BI%_XV0HB~R^;nytz!PGLK4l)Ky_B>{Q=blZjkY-5^dXUIuW4wa)awT-6HeXHc(~2)a>==JHE$Xe2 ztY2hdYDK$J|Lhh{1${on7-)}|3v9fhZ*Z|YY+|g6C)w509@z~vJQbR2oJ27eY!tJL2bu(u#kl$d&L zS(~#BmWriqg3(bFB?*m9E5)8L^<^!`DBb0bWq3o;ZaVES;|wQ@20@qI-2NpA_sA@h zYI`trhG2r~pC5L1qj~Sd-*`duTG^teYdbPEN6bQV*(b{Bf81$^$>38*09l?Km9>dC zxa16uiOdUWA5U4(OHhE3T_y5JCyhoje=*<86HNLgY(r6qA=(L10od_At24i zaG#g3X5571IKjDwX%+&{<@1K(F3A-gf_Ti_kG2wTzoRg`>eBOIEd;et=oa+Kw+R_| zern=A`0NIngo<#Fwd3|SxF+yLlnTee)4het(_!o)38TzfS)s1dw2=2W0VZ_t3E`IZ zoW~U`9MZpOj?`}1#3YoO_N0WCq(!O6@sN3!_goF=t=59CT11%`M)t)d$2K!qu3X1J zOAn76snG_Ix$D2=MO&$;>mrkw_l}JW}$@Sc&5Fi!4rK$=yzqlDY69S9_ZdSG5$4 z-z!kATr!^t0{7IN6jP;a&^Y{q9QV5-Dk;M!@zILTTsoIEAK?pv-_hki!%os10-7@ZMJd!WjX|KzArbaKh_l!RqZ z3#RjwutFg${w#QQwS%Tr2sLjZB3y8@VYMrp3bp(Nlk3orH4Pza>Wp!x`a;=*Z?IF9 zA8f8b3uJSN?jzg7r;YG`7U%^s-2&5bsg1`0p5XjeDpq%c8+w3FRzUh-_CJFZ z+dulkq3DDdP4nVi2}>k6vm-|B(X7|hEEEiEeM6FeQ77!vZrP_iAvx3#6gw(Z{ub*f zMPRL7I-cTivx961QxF#5uDIft^_8?I?DB0{P_Y>Og2Z6Ez=wdcz^B6D=q@hG-y1G} z+sj9!`sEgA<$-ENub(_%`R?I5pp}|sH+0%Mu*lLuY}uuXUlr2VC`{%56l&b^cNV;7Fq$c^U>>a+GMA`#pTEHOQ4Or(jH>29_bUJ!jxpj~s49I@ z4f!qQQK!7>Tg6Q3_8i_Nyz+#wTA|{4<3p&tZurt;9sP=A6R)2y885}-I=lI>aO6lH zV6A4U!qR>R5u)1qcu_t;)Y{J)Hsi%UR615J_*Q1|zcnxd!g?XkBQOvQ#f1Sl2nR!b zO5sRsJooCOt_dW$B@Bw%8f7U(g%DpipD2rbnD(^X6R zDF0mqpL38PIlmMm*#SuyFLzE0DZCohy@hit{_0CtOk#c=<9qM!@BC#Y z80MTYYnb`)EZ0iNf352%Segh;q%JNMYh=K4;Og}Bs^S0z37!F1)X%uh4Kh{pnbdq^ z_>cjx*fv7`7&pyqw0-+>{rlP0$*OFPank7cdtk)`@#}IbJIjpeEl}9uqAxg@NTKIB$g6gf)1erf-Pql_CF4R})Y5}G>+AVS8g~Y~ zRuO2Ua(=5lO*y9StuFxyt~X;-Lal75k~cUvdhVa=UpM&e^5~%@qvgJW#1vegQd9gp zo*Wi_xh(=`fYP}hl2z#R*J~Hieb{D~2t=ViO3%$xOPx+k2idZ(W$)>$9FbmBgTK-o zc0^TalGA~2q z13!J@hgUY@^Zk6n*-$Vo`_r(NutXYatIAY<^R211K17QEtGj1;1C~B+xrTSgZcSx-w!@;gKJ`y z2oRNBw;}caQe*O;+3~L$m2d6*-5_xBzbFl4h6Bfe1N!e52ad@8y}#5aGt#d=WtkOM zM|jZ%_t63rJnb!YVjtfeAdMKgpLc2o?v>u&I1b>23uKLRto6qUd2@320DE+Uj6TKs zh7aTsUQd378eMPKDSJ}$UmkwLA%6@z=M8So25NkfDzq)eC&mP@n%RA0bndG71>hlk z*yR+H>QH8x-HSm0=O}+onxTL0iga6HKG`nb2PRcQcKCk1uz|dY*97b{d=G2Cop;nv z)ihJ=+}iffL|vc|bcgGFoI0@3yf`sEDl28vn&6CW5gNB0F`_gX+%lL?#VSoiJpUE*l~8j8ADM9eue zP;EOJPeu2U5u3&o&l_{GAl#V$N2uQJ_5S;pZ3Y{2vfNvPy>^oaa?NWyWLqXT;*3u3 z*P6WcF3CoUlAMsVkh}8ReE8h-DO~ebJ-x2&qcg*;_D1+g%i>H;+#3^bUOFt-Jao$$ zX7}{VdjPaih>3}}_s2@PG(dTLEp0uG!!`BhqN9VEyjtf9!U)Y`v;;_E`;>GLvK}e$#uKWHL!$;ZFJ_lVoNxnG}S3uZT1O zL3;C2c7a`Xk+M_~8(lgJQdN3!L6M>;@|(@7V7Xqu<@rDV`#g7el0BJo&gZ;s&Uw#! z-r@Eqw@=*LL9bK}w82)5-lQ(N=boaXdmq6_lDNO9s3@LbXjrT0?Dlv&KXj_-{-Tye zj}`SSdI9sNNV&~s;+how^tZ8m=emBNh(jtW+E?0$)ne$`ZHlP_$3N5W`N^}ch&$bB z_q(O$fndtd)qKx=2KY&uDJr_BKUeOOVaUev-?;KqG)ELyp4eCpHyLq`j!9&u3iyt~>WMGm46;iK3!b zx;yu}T_`GQv!bZz{Wc^FiODB4-&z6Fu|la83O#@>&Hpw=Z?QC_%|behH*zIWhh&6iyK7 z6hmW7lKy?byBZWs`S<7{@3NC$u*FqyokaYf4TIBr?|{E7l%(Sr(-)6p;UGUs2E)Di zckbS?kH)49&Je+ZasF&G1uK;k%rEaYLN}Tjfj3%j8G$7@6=zEW!4Q@vnBu-p%|H{* z1bg$F8vU|!)7@WU7HInQ@Vz;cD^t?JMu9n#NuqDlW2e6(#?C@$w>? z6>NL}`V{~41l(;ncl6H*t?>yi%isYf(iael3+&x>>kd`F?Gy<%E+~D4AatkKkN5tl z+m9w|u&K3o`u%wGkNOpCyf0rC<%{KVyCdc`z+t&U9#+d8a)n+lSMeowc?!`+*nn1+ z$F#DHUS&?3<#Mq-Mp)!LfxK`jNg?F1uX8=ZWR2m?q{E8VI@8 zS-33^@qKbVR~fde6>>za2qPLrIHpzDJqBeatW|`=CY3yCRA!RK0-#DEH>>2C#*bYk z7u(cwsa2&gJ2eWsQ=QHDwAqwjmz9Qo{$$iTMaFMY$ps#61dld?z#2GNs4oEi-0<#B zf3nP%a*W!;9McM^Jfnu2SeENyx!nwNtf}Q#fqR|2kvqG(iRHq#T_exvwVYUVN_mW9 zkE;voRljc61ZbBlIc_<*`1MMA(!@1Yas#O3VY`yM$4P_(N|-rinPH9;jULu#?Jl)e zEmY%vj~X$P+K3{HX6?95Q2<03UBsOA;!-7?3$tp%o`;<=%9!(rY)T7aPsAW=-VzBQ zUY%1VNbB{6q}ia1guM|1CpyX@a4{B_Q98T7=x1whJ|FbIPQ z&=@6+Xq<#_lA_Wn8iUv*ESKv!_Vp@-oii7^N3G-x!q+Iq%N=0I{N?va?33X#oW2^!SlYygR6!UdKM zQLjde2VwyYB9c8F3)ggB3G&#O-v<~ z%MdXo(YQT#eK7^V7{Y3E3Y_$){RD(3p{N}8I8-o$^NFZk=19rhtk)}wqe3`=>T|Z7 zIb&k8UKV5{NsOOSq_G5yxeY3lK9-0yQONhFMF5Y==CiPz#vy?f&F~#L+LDF>LSqV% z<{=fxV`U!3D^o}XhNO_7Avi5@(ONAOV4dE)UV}M9GN}&v(QJ6C2;+`nM$(vw>7)YK z4CMG3rAI)E3}%CqFV|@~3lBNMn9&{#8|b-A1O|9xwv^m%&8o7hv@=5ln(WX<(D@K1N~s9|jAG(c5>$Fk zPJxSO)tg|o%+ARM7kT(F?p37`xlCe|hI0X&#xojYoCrj4I_2ViHhmQ+yz}#yrKmud zb-_^vvOuwDkTeJloVHaOm+zOh_X|_}QHC68R8*L-fch-t(1r~*EgMPuU2uT38EkTc zK_69SY1Bdi8kHpMq3B;q{ch7P05)2>T<((b(#CYwzlv>J?j$4cXFXjtnd=ggT$6P6_>CtOk zQ~@U=$dZWjCvE*wJ&hWqoDf5zq{vDNU0KYXifLFa<~G~?5l7NvL5zxY-WQe@D2&Q< zZcB(_EJPv}HL9a(Rf6T&0%?#J2eekL74gD~SP-VAAy1TXg;|5llJ@wFZh{B zB^2>QJrZ9O1mgujZ!$;7aYHg~2@_g##;X*EB`H*pa0XCZfH`u2Q5KM7`G^|=)PUP= zLyRtO20-1Ek|U4QJ0nUmo|g*qZm-{80F)BqY$huMLY`FGW^z*^F)IL^h~5`gdNk^^ z3R1{iO57Fpq7lA7%vZpQw2{qOd0w|BCH80`ABwn1*be*nX`lcIhwuRDOvx>vj%VRk zZq(`S}Qs$p?+u0)?#D z!_PZ;q>VM1(NxIog@YnL1L(AJfj^^{88TvC4$gU`5hA7sOgxh~;U#mA_tKoTaU^XUc0yO+QSNy{IC^?@jty+0K99$ z{b`{8DPF1o_`gQE3!nh_`%3OCWLlLtO2`JlS@gx(SRdCDPn$00Wj?M&X<|H62ok5r^k`ifI6c5LE+)Rw8eL)wf#t8>3#uY{?r9=!my&`NS?3hGM6#!Ef zzK1q?oPJn|SoKk@+f6`GQ{H8C(K%yCtq}Xf6vY^{q&SsPB+;Zp9@Ik#&_i)kt;8z= zXk9>vNCN=oR5cm4%5P7nVm?|-K`{!Jrd)^sa61hqDHPHL?Y0oDfg~uF(|aSTyg5W^ z&78%8VLC@T(n2GX!w6Ck;A==J0VtaQ`Q}J0!f0cN(~9#kuO}CT<&v06gV^&%iiqo7 zE)D9D$mM}NPDK?Vp(cab-DV_f(}tq~lqU?w+{qxMi`jf3O@U>Uu>mG0<6vB@#hOm~ zasq!!oi+twuSaM!BOZmofJjj!%;OnppVJjEu^x~#S*78$M-mW56G1I(&_sh~!tHAE zu}Pbna_RLpr^bZ}(-IVlsu{+jv#Ep8tXby{iIA9wq8hzbRKbrkJ`u^#N(tkOI+IR} z4rx&n;!e9TuhZ+$6)21-nYhYkNGHuP%ty$BjL}0$$xtqoR10V<;e$xf$%h%Ki`Aom z5oCR6P#BBE**JsB6r?9kMe{nqMI_OH*Vd%vmTS{jfs|0Bnw=%zi^Wk`|kI zJT{s1NFX6M8!1sqhSj-3NXml}t{f;fW^_W7q7pI|^hW`s#L%<^u&mJ>lLlBt05vN` z>7>FV5kz2vMdd){Ih5oU#i)wN8OWSQZg$yyJf}rY2p}zh@}zMm%=(pb+CTw3SZ4Q& z3KRxtok}Y}q+S_fcUrVo7sli%hh3>b^l3r|m@t8W<5rN;CFzh=mZhvo1Q&U41g#{CW9nN!BIA3l(8XUUSwecSu7ENNS{>b;Fbph z^^zbGH+!Sva4s8=1TBEx zZ!q|&yq^n~*p)cNxAQY{)}~KWSk#MoX;~gb(=terz@ExC8*UU zambtOP??f?Vwfb74w^kKG-}DfK7j}^J7ic)KR8A1*A0PkOs13Dw!fw z&YZ%*`JGZN18QU%LQLoIre~S*2h#44#$^=qO%iF=>%(;r?cq5Crf8V(dL(IqKqgGX z-lWHkGXj}c7j?#RIYr)%CsepRh|`>V5_abSrQYW#C`Fe8K!YVo|(vz+>5GC$94GZHk#Fj~%@ZK5GqjUvbi-#rg0CJGV#rrR2oNPyhuKX1q?C3DCDJB zozIC|ykZmPz|0v9D0YZVRL(&Jk|HwgPyn2EO`M5YB+_I7Fkno3Xe*G`%dAmAjm7}R zq{19<)~x^(s3jc|`2$!w<+ZpGgyS~q4dv5(6J`{t^H5w&>1-r7v7-K9Oe*y^ttq)| zOosyri@?g)VT_9l{Ydz7Rf=zl3&OE9DT8BZ7B@H?QVrubhY2oppm57f;gpv!1xc~N zDayKZ4t|^lm3UJi$cI5Oz)Y)xHjT!YfLOmY&K)*bg7PS6bOe|z!}U;+Jijfb^$0U5 z3C|(rF;*xWSBetGh$rcxanu#}a!XLmRbV-*1u%(}G9eNV750Usd|Qr58+;bM1&sxK zJcm^Y#CVj*5g|b`5spU0j5-w#BqH2?Q;`+vg%lSJ;9FHvHcK|`MqSbX91@Ued(b_B*-r$*7y;qFhN6FCYSa7Lzp$Sa=rJ7l&dYi1G71EP|7(MjSqr+A|Ht1ITkdLd38*d#xtx|0GU%x2C~Q1C}i-%sSFm+ z3Tc=!(t0*8;+uk~D4b||Qdn=?A&#J^jSrI%sXxr%F$0y9$9xc9;S5EkC?|@XDo67? zIzpF6@+9XYngY3OCL=P(LQ01vqVXGbT-*n6H@R~u*d0X^Aq{H36TVAu!8eFCkufV5 zsRbjfg>%Q!QXZMHbB09Z4FT4YO-9tD%%X8d^Il4mw%F5PvPmg!LJ$IREbGTe8>uCO z0U3(%1iYZ$7DRb*G(dVeS4?hj>&$Mo$)e*{VxuExjMy2Da*=`OMU$bl1x>NmfLBtG zA*9tBA(r!H7`=q>h)q_p87K8o2b@otViuz(Cx(M6Kbp)*0hBTF&1RuB4lz`OZ#F@s zSBr^R#$sWUdSwo6vO}Sqk|q&{QJyyiZL(xq5EWR(Di0NR<#`@Jm}W)RxP=SoNgOan z;aMj?NeLYy9QN8%1epzUj#eV$R2ow$kD@6=prCMW-ejGuO+aL+6k@i4(Yzh?i>MGv z2W>f-O5+f-J}Rixc)c3Xnu_^jUVF@xHJMy-o-xijE?n@|LvNOACQm zET~|#u$$GUxy`rJo-?|6G+}d^C`urwSX0{ULcDx)Iw5kgn9!d9X^8N-eIlt@WwzMT z3aBaOkntL5zX6p4Q9MB~Zksx%3~NPdod%U@(-xyZ=djoqf^We*!XU}`6rfO$3qjc+ z7WKR3oW%tplg00fg~KvU)Alxy)3_xrMOMrC{1K1@e0e7r034DC-x9>~d^o^zG)QAX zJZ?mVQCMJfI+97;98Rd6W`Q0wWw~V(<6K&V$FK!08~BL?YC%{%qvo6AkV7cofoW$j z?a_I~w9qX8lO~^ljKUJkYV^h}h9Ip7r6@T?`Ez`NmCEC86AfWhD6ft;t;>2%$i+ou z9VrCQ=2>Id90C#o!WV|JDI}^hn_OWB=ali2GKS*fBmsu=;)5p2!dHh;F&m3$(gsu~ z36Y9~GS>9y(RKkXNOF-|INlVXY4ojvLB7cr;{j3DX4U{QXG)|>3Ha)?8nxldENn$0 zI)lLOciFOL1vkAJO?)Z9&o%ir8Y1Ap$Y3zxLp_m1T9fvnfiTO3m}42YKt$zov_O|N zW)wo9!Q~f$`4GVq_&6h$8G;6tKb}iF6$+zMW0kX+CYM;}Qn3&&!IQkO*-oXSe5+fU zb~`M2iX~}_O5x9F#HO%c?2Btial$PV@wk`{C{BnYSqqF)C_kg(b}u;=h(@u3Qba17 zUl_Ida!FYhvr1u^BNWB*QZ16!U@l`)r1C{`76mtRM8Qy=;+q171S0@ok|qp%iaR3V zCQ;PPc2#hEaMS>LZCMByz z>5%ccMK@$01JXK=+3WS#oe<*3BMwV-UmR;BW35MIGQ z@k^ngn>7m=J8BL_f*#OjiV9^}hyhF*AL(aonv79vS|qDB**y7h9GN9TC|)sAvrc+5st z(t1-is?Q)6zXLV`fG?ZZOAy#Y$n=0l4MwAA*hooHSwfw1ktQEMt_j*jA|MTVMQ%)M z1XG;H-*mnlcMG_g-G<5|a43~G3b{Qpw?<%$Pn;3SGBIs}A2;ZHMz01B8;vQsGS5p= zC~gzlH|Khy1noyzFzi-B9zZ6S65JVqr^#&6V$kL!4P1;rm6a39EKVq3 z$e-q@bnqY?fb*0lPsS1;ZZ{}_TntDAOeTki)=HIpB%!0?Mv}F053|Fog6stu3X!my zBBOLX3}L=J1O!l9!ik7U6&Ey8rvwnK6{y29l|ZL5nADI$5akXu!YO0O#~G;4m%%g| zPhO}Yll(;9SpW=J1?hY`nGBd14`~W25ft=udm~g4CW!jvBudG$8mkOQ5`U*7Lh3pE=Ia*K+vUiHnHpy zNb-mP5u2k>6pSJ2q`=6-HK2xyCLo*5i>Vp6EGedBio)Du=JrOA1MtD56AOAwq%bXj zQE8ganLRSpP>^BHm+}*Qza^t&0G^t`X>BMcktVn!2X5yHOF@j=C2=#P10X3TCxGJy z&~3AlxI1pJ(FW9GvMUHNKNTi8t7?kQi;L(UGP7Iv{9Hmrz__ z*}ym9RGbeYLOP#w^2Kp!hPD`!Q49{)y>78wq!24I31fnb4?+^ZS}YKYDZtMsG)Y1f zaQhjMic{_Y%{lHs)0z^(yf(kV zdRsImv(mC?&gYSHp%o+~rF~Y1!IO|?ASh=Da?w4epHZKlX-3)_M zOjBw{h{&1T4$@-dXDu|hnkCFyNrGjOv|ML5*sVy?YOy*b0dB5Hg<>Ks2PJHl5O*lx zVi_an9;VWSGw&Bil_EOWBtw-YDJ$#Od0YRT+`z`fz2D7tYs?Tdo6D?R-ZGxR2vyRbZcJj;tNNzXc%V$C z7O6ocH?J$DfJy-XQi({cQh-v8N~IPnH_&h} z5x^U>f|Uf8O7;CQLGve~epUOM-l`ytv%!FdPR0va#s4~Kuy-NvwhX_+Q9%(wAtdF3 zXp+)^1QV4)Au&)Yl>k_&fICSN3xZOBJDTdv|4rXJ1T`j{HdP1i==di+1Tm>!1PQ@X z5eUdi#S*Ew6!S|Yr2>I8j5z&5C(jr(h$aF;EDqhX(=XxpwghwFY!x+7zBk1 z>TATmtNTA6BNGWGe$4I#I1_8k?iEVQ0AZ;BEM)gKQBr92&p}ej5R<{^pghb?D1R#R zxC!=`{r)*V_v%L?91Qg3|0KGfZv48-?@yHf|4`kZn*o2L`Cqfx$jn_z`GdslyITEo zkYB3c&m@iif2z7Bh5qtk{OQU5?Zeyqu9pJJ91KP?Y0^d~LpYbIufS5czccqTRB!$t zw7MgxUts;1Sno*tem}SVmzck5eHSM8-a#2hP$2CO(my^YHtww!0{77vo$CyMJsUIX zf6?m4-c5qI>(!OQ?9_CZdohN4Cx`#@OF6%pxPG2k{x1u6*!sUj{T0@K2Kkjh{`lvA z#P!GW`Bm+I#PutI{PEBKi0hB#^Q+qbi0fAZ`QxAe5!WBf=U26VB(COnUhCyHPko9r z+)KN+KP!6NpwVlJ?z#7#A|3ay=ypR<4_hSJ_%2|Q>@4ajD3U7l=KC57FBd=F*n)d~ zxv_2G@9iT+uX1mf|LN=AJJ)?bUqAM~_nv#WTJH06#h)$QKE}Po{`cFLiXJVvszrhK z8mYVeUQw%u@4ff_d+#f1Rdi3Qd+u#@&+U)ynOh*Yh#RHwCOg-#sQJUq9%}KxgZJEb zKSx5*J@?S+>K=Ibktdo33zhfYcmF*PJowPV z&00P24EkhSx!pZ!@`0zGZueXC+VBx0M~$B_ zb=vefbLUl5E~;9(Z279yYu3L1!G|Ah+_ZVi)@^$}Idu5*6DLodK6CE;g|DuBef8S) z8{d3;lVkioj_Bsinl*n6D=>8rNAm*@wtDD^=bmi+G+=ii?fL0?gjNmg1O@u zHj}-J;z^UIa3s&0Us1VW;i9U#m5n6-OmY2zgB;C`6rcSC$+r&|J#rtXzE=0PDtf7C z!{3_J@W1|k=Cb3Kto^r-jQ$SNdtWMcu!m00`{LXGHEa{WUvKaJcOi7ozYC#z|6K^( z_wPdJ{(l!j>;GK{J@D^B=)r#%LjTk}+PBTu<4=CmP+GU)c!Mjcl*9(Vvd1|8o41kl zyXAr6(v^D>rs4gZM<$+cG1nVh+kNA~4;BnAzf^o+)fc@U=zieH;>!8w*DhT2kNWw6 ze4T2iMt#5Tw0eT4!@}3cOkJ~m#q_q18g?wL8H9z5-6rQ}PhKz~SNG!4TZV_H*G}+= z?5Qq(N83K1fMpj~{Izmfizg4RaqmM%`=>qiB2Y_zp`X=mRrdO!v1R-F4PAK&Kf2=m zH!ikNbo?v@mNqx@HW>E4we-k==U09_g)Lhf?)T*fTi-HG5TS$>1t%5?AE(B*Zu>~ zl+iCOUHM6?V^hjL?`sr%_Q8k`N={x0ZhvFtCu2HJ-iMUNV0hxbA$1iG*VnCJiCTkuqN8A)x$q(z#9A_Q>Ty zM^}T3{r3K6OU|iO2(o(w{dJ^tUk6^R#b1P#b2PiIbep((=jj{wZ;3s;s%#S3p?po{ z!A_q~TzhyzmEBw=F1uXuQSY0QJ^k6>_5*)E)~mX@{QEbQ9qW=HQr)j7?fFV3F8OG` zYrA{l!iA&iM?J7{Nnp98&5||C)VqzjCC%0G(+czu)N6c)iQ~`I5F=NPEUqdmbCx$> zcxpmm?`G|jZ__EO1$5Zbsc6f`wq9#nvNqT@c(Kj&ieWp`-G?WFA0PjyPPL+6mpL=y zb6X<@bm}I&@{DSM@P`#wESSIm6fG_bhhsdM8dqrCak zzKQG_e~E1ee0aaPbZwV6KBdYR5mUa8enr21ykTu-wC?Ze6{hj`zN~eyTVHN3^h=K{ z{B*&~RmscU!5Yq>4@|oJ=(vmdBkMXov4U+lR=se}O8kp<5_Rt<%DTHxO^1k)4-+9L&cNEOpsJonCI-EOL|_2Y;0@5doTD$ zulWZb#)po%xMO`ry2wZ!+27)RRku_1FHY~;^Az;-)%eK$pI$!jeF=2>N$b2!Yw!Lq zIt;tHXVit3noFnm&Whr#S}lsgn?{QATP{?&K0D-CHeuG&S5@CAv?Yst!Pq}mPN)d3 zT;>qU%c|ByUdUgFop$W}qW`Y$Z}Mi0t9OB|9<4heo85i(q2g0hL-OcWBK73E8#nDM zU1K888xW$83Z1T=c(D0IY6?8O-;$T-?_K}Zq|p^G*VcT$wD_G2w7X)(V*Q(&cHFn8 zVRP+4tYTb3S|*t{VB_vHbI$?aW)sIYFb(=1{kH1+v|jY=gt0DO`EbwHvBSZ^6%9!2 zO(SCa4xndt{=)|^yV(XV&@(mfwwZeYdUixF*P3;Q>7SU}OT?(Yt%pf)D4$Hf*pw zdaKt>=GMGD>IvSq)!i?|O7^r(vjX#Uy;*NNSm5GO_V;?MJ-oI19^hh+2}_q!x`pnx zL+30gMbl%a&;4k2<+Rn`TtGL%RU;4TJ|9$hYoC7OgQLOyt`$p$Og}K3bU639{_Aj+?3q&0zBcQ}YQe2*4}wVR-YXpQ zql-!wMxWL9d+Lq)O{-<&qDvdfH-lS7jXquS+^w!Y=^q-&pR9cB_P_MC?|JR|$Rj5; zO!4x?ZNA0#k5P`^;c>n2`i{`sHNqqCjB1%it1=7?Q>*&H{m1U@YKc1+w0yOz({t*b zl~=J-8=h}hGfrFFZ%X~tRrAjeIJ#xP)XLUrX6)?Rzxz7tM(w9rB~QtZ#Nm zzpY9gLpH4}-8H1eKBNcx^5Xt&+j(F1emmZ7@7HDWiyfw(*>+lue^{D2yv_p+pSeLx zy!Q526MGM@`3CBYtM(#?^`v3}~vDBpcO*0-8w z-yU>KxOQN%>)w~1{^BNaZsExfw^YyDiCT+$PMna@U9YMF+(+9@({D}u*A;5~P3HyQ znCDm4S4}{jp{>sE**(urA2V#!^fuCd?OV4J?b*3;`)6&|b{aDN?2TD<&z&uwIat@S z`E>vvx1r|sFOI%Cs87#<#}5UDeB6EK;_dupW3LUqIQgx29MivJHtZie^b2rPpNoOv z^9M%`F8|@#_4#q9qsw=7y#I%-(}rlOhQO7AF6EC+y!H8KZD%ig-P)l z_s#9;pCp*v)%jsr_kKf9W>&9hMZUk~)b8~ob`ReD^0U(Uz5j}w+_dleS;s>kW;PwT z@a4K0^_4eTZhfr#R_TcYOBMtceRB27i_i7Cs-LoW=*`_1M-D#Fe*L6{C)ORD^$A!q zzW+*M=dsk$s+%Bk(X?b3bYRZvd8X89bj}l3%Ll^;SMvui!B(~H(R{?D@s%gQR_$#g zW@Yy*>$z98)_LyL9B@V(>aRUCsf|%Lr1#WIbH2IqOj_K}D*W2uNUS))GgRnna4$OK zHT9w@yc$1*YP)ouKWW67K4Y~&1MNEq?m)Dl!%$K(w{l|dL1K|(+lBmVWwK9B$EKfh zt*KSNZ{M=Iq@?FC_1;lKPOK^XZ-~G5Al8$qUZV9H5dFhT0A=OMi>2;Q%X*@{P@+2iSR*p{ijxN#$eo- ze71UjMQcsXI}c5(y&3C%>c-i>EgQY(r9qc_omzfra(X;g`{LN>-1;rY4jQIEw>b%Q z=y0ri)%r)?Jhfp?yHn*ao>;M@GQKM7c!<}3)I|F_>W$hKPDsSh&!BLw5DiFL)?_GW4dlCiTukgAJ?qPMnUu zR}Wr3pDpLzdbuYEBggVLMUC)6txEgY&@ky*Q= zacTw6bD(ix%5IJ!79!zWc)X zQJ`_}jiq(ho;fM5aurd#>uc4bR_ta&p&(3B2YvAs+PQOAjt{ zatV<~*9ta%viPx#XGv=6>D?2$Oj_JOeR|5Ao}aFv%-P!Z6?KVyK1`&JFO_`_o2trZ6{bF#H>oLD^@?t0BbNsr*tWL$Pp80OsbnOGZt|_{EZ(Li%9GZN8>;3UXFOQAQte(?K zu)At_3r|#Uw7p$7V&RDC)PS|VK&PwEEa^Y={A2yUehF-L%djuGYd-uSieb>NlqhIgX9c8Au}kInCs`uO|(9Sc3=OD0^7pM+n9iVr_{ ztFGtvn**0T1CHAB`lq55p^{mFrM-ZQ&UPnrKOiHtm%2=OXu#wu-Ny1RC)D@k;jLE7vC8C+{M}9Gjj%AfA1xcCy;=tA|qSHstJe3%`^398brndh_{_m@N;2O_? zmcz01-gJpv^ z-aqqf_^f|yN8;GdLtlr{zN=p9vvAt#crN>O#{tqV6MikEJHP*W`(H$~9(?&=e4#iN z?uVw*?Yjr7Km6)q@xIqQ=Z@_Xtnc>D-Z51jr#%L`_jKr2V(z-Ve}wU7L3VV_lLOw` z_HH>HA7A}a<&s&0`bE}H+hhOm)w(z1pDiBNVcKFuZFYsVk9MEFEc}qiPzLGVo>&Op0ll4HfuXq^Zo!a%S<3p}~72D<3y*#h)v)gPV zt~@e)Yu)azuZ|ekc}wnK_*wWq#roI1{>vBry(G_d+GU(_W2NWW<;_#o!tGzoTR$*= zdcozV&b)h!f33?uzH3R1%O85~{Ig#0i`zxRFAtdat#|R)o8>Pp3k@7!_145iuRnZH zdHSVn_JOmD=l3<&4u5ODD%V_j>d?8|O%?h^KzwOL{H3btoxf|f^t<(mZvF?@Yq1YsU(wZY`KWSY-SMQQbxQljfH57$2RGZ7&gi>r;HtUZRvqY5;~99+P&Tf8M>qjIQ8rA4_IP;z z&i-`pr8#GAovIHkVe%*0>8r2S_d-(b*Y)-ed~EXNXWNvFtg5PQR%IQ~@zFc>eRt`h zk=K@c1jioSSAAyQnJX7|EZcwVnZGB4`m0^u82CKY5TAN$PDN(t(VjiQ@(Tyw6Yg#E z^(3RY24AscdAxuv2y6Hm1A~wJu_{^wz`gM z*R*Z7skrxr#j9&(*COVA4LkTlwSk^9w!PV1KX2KrFAbknj)*_KU|T0U|3Nr@;|P25 z%P{~U08z&)y%}KSLFCAwdeQ6#-YZ?aN4EqvcWWE#ipHl5xxD%H>Y)o3(%q`r>pZFh9RZzrTbmPw^fWfP_yXWV8ij@z29*PD<64lpy$~0!V<08QGDXe z8U~t9e-nw*UQ3M}sQuyla-4P7S~(cNEsU*A${*`SrgJnKJQgzU{newN~!skF%$8>@>{N z^=6Em2R3il_1I-dKc(4=BOfhm7adJGUKumV~j67e%r{8M#0SE zeGcvNw?0xeqRMw{ErcDaZu`ka+%)=`LuZxL(O0(5`?g}!uE9%j=z+*P-bAZ7e@(l= z%g)~WekY>Y_@m~hhI_qjhB<~zs<3E2u30>IPK#%IRBap&s8f^LTFyLg*p_GaZaK4b z#3|o*hQyO6SI=26^}&jU!9zCJJ^Rpy*Y169=g3e-L*!P9g4@1m=sz#;O^57_HVsog zaeOY3z1DT@t9?}VkLJ|xomA1<>$><3y}jkVecQ^$)<0<|ncL8d{OpU@goX-Y#Mp$d zp|THCa(V6WWf{kVyE^SDhZ+J)*A7`XWXi5}GuPx@GX{;@xd51P?Zf1;;n&+J#ADMt z4@TCXwABmyjy_k}zS*}=e}`r&2DI*YF+Hb^>Deb&mp^ZEZu@8k^wg9^rNZd3e)+Lg zL2wH0{p)>mOX>#KTwL|q*mq0TO`Tg3`Eapz`r5JMKI^)BQA4SDiT&{}2i&u6%G;O1 znTpz*muk+ZT2wu8>~f#?pVKd|Ngz(cQ#HM}rjESXep|(h73-d9`NA+my|bdVbL->= zuYHwl?Tmv|?Y{HlE86rEC;NAtdHisyWLEm^fnRvaC+vIE-*$#eJ$7Kjo}SlxT^svy zzna-!MQ3k&{{1!HOSbYM9ljBDSv_gRMI+p17`&necl>=&ht0fBqsDR0JX6wr8@MB7 z-{ExC?w$HIUb=hClss%>x_SJ(Ta+7$WooBa`s7R5V*5p=Bez}t38+2w%*ibsUCWI zbb5;W%nh}C%+bjmePip-92hm^{Q$b(^o>6>xz|G_wJp^R*S`N4z3+-qG5N(KU5?G! zI_iaTN8N$1Nn!0{>lQ3+({G4xN8axdA8&u{vwdB6d>K>CXi-v=ebF@PFS#Mq%hAy_ zLteYox}Uk%weI3iYhKW^H7_2SdCYuq_1OM{g>r`#0QJ*ZRt_2fqrwb2y}3?Y(q;#>`OVky`f9R~^_M=brrh-Qk{GUH4V(%b%Kl zU{XcwJz1@P(m1uY__KxUBI|~~IBKjag|=DrS)X~(ibJ=Hf*pn&c_TJ^PQS12A6P!= z*$&IsFz zcw5Ijx2b!ZayPxWtl{yheZ#BgRL#u`D@N6q*RJK`4(;{@{ZDmX^h(8=-D_GunN0VJ zM2es5f40+}epTPKCl0-FT=346vbASDXZnI zFUKc!j2mA$T`!tjHhsuM9-(Y-#f@MVb-?I zC2P(fNX!mgn*H>?Sx5T}Pp$i)wC&cl3o7*KO&;Ing;Q1PL$h+%7Co$}z<*HB-q37C z$;i*wb*+3lF>3C_wcSR;0VCV+;*6R5{ohyCzGWKIT}#;4lWb}bK5unZdeip&@>A^S zkbGQu&8mwnFWuj-V)~8HtTt!yb^Trtjug~F+h2cS;PA3Xwp{AEX*M2eG_f^ND!#i{$1Rbtk4h zsF?;3OFkIWCn*aKEo~J&vi$H|__PB_9$4{Zm1W-Y=*7cl9boHri^i6WTy><2(-6N)>4}7@oL(8RyX0Fs6{@QeX@4og27M(Aj2QG6Hxmo+} zd+VXzD?K&n{Npuj@B9znjRCMNbmY+^NB{ojrq5(Yr}wO$d*1oNP(u}%zW)Go{{9Q~ zbX~`j2qM_%s2#c5RZ%sz7MvhE>-hl<6Bo})IuKJ#Tv16+|k(~dX0`juiF8TjJaqi5=>&X#m>)Gp|fx^=W>IJvzp&c09=b9&US zDSY>%%^n5kv`8$PJ6?UElm2f<2UZ~7J-n#s|1tO8K}~IK`>^L&j*93(M3Cl@DhWk; z$D`6AAdmzJP3cKU=s`g6u>jJgcaV|<5)uTG&_Sek2!zm)UW0T*#Pji#-}9E4Z{GKh z@0{vi~$#TvhX4m@0g+Egop4yRs25Eji(GsYrHp}nGin1l8a~A5pF;ED6_&#zlZN)lr z+K>z8we>*Mxc9ZBqMr1n$XPtiC_(DIqwqPTkqL8oTNE1Z?wltJ$C1r zB17iBN^uA&>{X-nUa_}|;@(%a`UxkJt zcJy0F7g8hU>G%+!@tK-%^5A6e* z3<75J%lW<%4M!-MR?Z^>vLeL@VJ^?sQ=w<9DY}1`*}I&pwIhuMa8s1_NuMA0db`9M znhJtc-e{5y`sti#!xtlleDC$cfg!ygkT$7;x-Sizx6V_cQ0O<;lTFV{5Qv7*8)gk~ zz8mnU$%WHZ?91EK`m!=7|9AwT6n-V90=JUW9RzWu`wr>s^n~B(zB8Z7Pqs1m1_eEf z5^P1LyH|FnEgaWfEQ2>?K93$Z(}5r?DATxoAk36lj&yTi5P0A9cuwfh>&b&RFMmg* zu6YuOAKT#;52|ACrTvHX{EyZzfBg?IkaHhc%M!(zcI!Sp)s~N=Bbhe|q?_5xPv=JN znpV~=UMDTt#}=A5J{*ad!JQ}*ZO-_@Oa)A}HCGZu`w&p=NnciZ$^})0O;ti>hJr&H zV40a`cuAs0P`bri44D#Z`K$oGZ&6Py@FVL?4dKtXa!OE_PcALBXYb$HiT=5JSg7~j zNy{wv+Tva9fD4Zn#{cffp8lk|k`9m>2(zL55*QtK;c|Eu`~ZgsCV7bXjuO|2}4va5AGRIev)?%}6@D-N&P!C`Mw#1 zm^F1dd@f&=i>wVL2rQ27T2GF!%cBK0ZJm}zbsWaL#RY63LW>-z>Xqw1SP+N=deX7o z+Jiuyu}RyMt{paab5_nNX5AF+T2gacO=`Xi(DyNEfJWmc$@4_9v`(4&C(b zg#U~vNvsg_-$uBnRY;Raxtel9Pe*eiGwRjQBBywWmT~<;M_(cYgz%qT+WPlB{w2=; zLGVMV;`-MCGj&$K-Ej`O{$?s>U!OW%-Mz`S#%W)rxQ2AH`((U4Q)$xudH49LDW-DR z%VV^mL8eNE5PAmvf%Pn4?6z~%fo)i={&-@J-Lq9(jt!vMJpO6D{ZpGwWl3zCfZrvH zs-LQ^X&Z_X*W`_|9R`=OoK1vi)ToE!--lyO-@?-Sb_Ug^5d_CY2cMJ}L;4UfeXAh# z+)iDita=q^MRi2=+VoVl-3J7X=A`PNP%53nDgXC|h^OKXAHhn;^azgd0epW!=q=zNO@M&7_)cOf1en zBGd+jl)A5^Ow~r$-k+=aEf>+y=i7PDrqQs>sL#u@ZL?Zn2+&r+@4-#$tX=rHZauy{ z&9V}bdOR7zR-zAfwezUz)59wpzxx47e~!onj{QH-Dt_3-hGV%m46(4LvBBg%*K5{HlNVm=FS@Ugjf z=RMY8SY30tZwHWuPpbFb#r)`(+zR`;TpWXI6fL-VLV$G{)J!+eY%U%FFDuQIf(~JU zWR2cxYPC8pQ{Rm&OK}+-Lt7?U%k52~Mla+#GOKSsA1U$py61WLVGL7_5=sVk6YyG~w7;v?gGnKSJ%@F#)%8fprElZG!<-bM+-qQm%`h*=5tm zgrZvg%!mThZlRYI&3q-+8G%;iSMR(*N=Hn2-yOwRWZ+G|eseMP1#M$87mm5U3YD9- zlZlb7t`@k-izdM3G{IPHFkBk)K*hR6HCCkZFQsoP{V$H)iiAtMr*{c3Q z{;9Xn)bl8;v*p-ktR7XwM^+nb1N$KhQbFaf*eDI%7`KST6_Bw+y)r_H-cw@1d9<3Z zV|C#=ZX<+QE!{Jr0lRmiQrGx&c*?KFwk!zsosHa9JO5lKCNWDMo!9;4q zl)&DWV07l21RiT|9yt69`s6GSFk+C%5aWJl6Ee~{T*0jn-vSF#fgfg%C2r+4*lI=dLBzXc z3K(#o(_SyN?>U-6sfVLZT2Av$Uw4Z>LjPVvXn)5_ zlg_X2rTSGU(lc?VQ_2hGjAJlLZ2cCte4IaBs@=uTS~Knqe)L^J#X0{J#y}22TF@1K zFU}F7TE0h2ZByshz)sA8HxGu7J$fGr=1$0-eCJL4io78)W%?F(DE2)v*t)?)i~gf5 zSNXoONICn*y}2P$4Zhf4>OMVP8Hi21Sn=d(OM`Q|Eur1F=0T&$E45iwOctP@6KIUP zRZu2=A7CVxQnXPCwU3u(R9dKQi>K?orw0+VsJGrUEuEe+^tX%(Rxgy7UOShfk`%kqdF-RBR+85%k^}K)t zx@~aEa1A{Y25*F6AU4uP*n|sDV}rPgV@CjKH~lmsu52Wa!dQPK`M?!j>SF08T)5O`xJ4B2j7vWSBuzdX6r*JFbz|@`$%{4VX=6|A#b@u8 zGBqBy;@zkIsF0RcJCTfSuAlO*b-qnN%hUqi4+&*8;Q%Q}AyWZ2JHu>S$Gl#5DI3{l zE{s618i%riBeT6EVO8?EpGdF?Pza~u_*y)9V(O@?ZhlI4a*(!RZ)0IeaoLWaQh0f{ zk$F4)@(Dnpb~AhFa_q@?@2%P%?T3jzWz+dh$&-31t%6}2S}w<%Rl5L7DTfI*x8(`{l(C@EKTa+7)3^WJPAf>mkWJAt+L)=OR@Y4u|ROTH* z^Rk8D1{B8x(JnKd45IsBGHS^_Eec&d^U{z?y!r9VC`>0yV%o3@WwWJzehQyB;jqVXyTAq$a3a@aIDJdIjTW(#8^TI75CO%`QOX|`Bo&nSFbnb)seKNyhDzgJmk zT;{eZqZMiCU7*PEZj2nj;r3-qJxU)Klg{`~Ra zEQoj~8hs~|QaSXotYi9Gq(wf2UO$O~z@TTDF-Ll4fSoHZfwdc?^8 z3MT1CGv-CBq?E7V{yGEovi|GLc}rw-OkS&eFK4r{%k>te6s!cz3}X3`7ARjXoEROo z0Bf#5OPzgIlrW7jg$S)B``x1WskvmEaVy{EkJpX=!~_t9(7-rT-@DF{f1O$M0b+Xd zUaZ?na*UFnzzaYoDOV6525;OYq|MGl^@vM8W*j@}=bGbLz8WXO+#Yi5|9_&{X4G@5 z1O6c;d9U6;*i8qK_R(kur)HdQi73#Mts^Kl$@*KR?!rNzD6CtF+F!+zgZ*Yo5zf0K-gbPy zLF)ppWglSQr&W)j9%Y=u*OI~bbRXejM3Aw^@RD{@QANF%T}7vdu4p1s1fyl)fMWm9 zpa>K~rlhY^GD<>Jx2gj-(J!9Q5#p5}`Xr#r7hQ4z4F6YhOrO8)#=BaRWphH+_)~~= z^Vjn`9(jD+0nTC#ux<_q%yO3c^?{5|8hp8s3SWR4UFq@lZaZL=ZP#=3c>4`@M243a z6N^{?{q>Di$DA;if^-qI-YJ?Ce~O1g#s_vq7KMzw0KD+uGr6^>dYPc~^WHxKin z?U^g}G&^E_fwd|^%i1HUD)k6MnHz2HA(J{?PLk9zwFAod^AkTMx<%bt?L2*0!IS9Y zklNu=s59=MVR2!cPFwVkBxvN#9c&;8UK{(XMp$IrzkPsRmn%8?BUKvsS?Ao zyb3PA=>=g1o#~1ENd(-+2T!hxJ`CNS={=V%8$~ zw4N0N(sYrLoRs1xLfzkKXTKs_y`z69%+n^z)fY6msEvfZJBtUhH7JdN-{Gw(kun>_rAYhYE>E(=!V7kE|Du-wdJ>Gmn!DGs8i-M!$AM ze;_8Cl`%CHG~Y3_pV%A*ZZHWYtbv*=LPs{P|6We<8QyM(sYgL)neGVA(&AcOKgctG zrRRPH$V6Wl@F^zdD|3(qWzFFB#db!p$?JBr#OVrS%*lEO2Ni2BWQZ}+y zzj(R;a6$0XzcuB8=G6K9i!|K9rR9)qC;rgn4Z~{~Wrh$laQQU$QUnV5!Yn4m2mKUJrhzGFo=Os6jr&2@OAqT#CeNZ`sRL!=meokJKICxc(r#bW=CK+E6Bh_ zzKmM?m{?e`%h&qZ@f(d3&+K;3>K+JAIX}ykzNHyC?`=b|{_qfGP>|Itfl>fT*q;1! z7;ug}+NN-2)F>)VE)3$?v8Iyqs{(K(nb+y#DVk0?LrZ!1QdQ_z-3P8Uqv(SemsEKD zgey!sUUg!XPoS&PCcpNodx!@gkBgDUxcr(aJ2ekJGhA-E8KMMYLx{MLu8l zN#HsdiiZ{b;8N!6d3kl|W=jxkDn*u*3+#x zR^>vBYNkAs=h$e+S&sH~xyTQfD`tP~1Y6T#QZwChdo%e+{^H0CIp)wIFjfP(Yx=TP zks@BEeCv>52HXE3bL}+VX7TYSN8)7SuvT%+t*X}4HE4nJ_pjw8>mpGpD7D3;rE#4E zTTbUdlV@Jz zNS18`OEr7a_ea^NZF^X{hY!(pQBiv<#|J79LfIPLS1dKln6Z3myJ&_>G>yfwg=_`g zOx?;&P*`*?wGsXSTAk|^W1=Z0>eztC7iAgfkh<$<9Uh{O37wyA~4(nwdGDBdml>qNcu=)uPTIA&ImL31ZE1x4%^iV{W zyI1|CHmzZQ>DA9>(r=xcS6jS(Rj0<5K&xvqNOZ12QB%fB zBdTQHc9pd&HlWH@zDjIfhqKp;-kp-L|1~Ai(8I&x`EoJD%CT+bM}mbA-a|fXZb{qk z!-9XjRce$<{n-+>fyK>@g5L!uw?~%6M5+2QKJ-);#Fi5x6iD^B7(JIUYfjByN5+e4 z{T5iq%gk$R?6uvu#l*3f2*hRkcW3#L!u1;k8u@mdRA26z@*RpS9-Ftqxkz~%9)`EG zmM_=cm@Kf_ZDZ*Zo`bQ71uF}n;TE#Ksq`i2WNp`y#A)#=n8+O%bJ#BM46xjh?asHe zBsm3Wmkt7MX3i+eHMzL(2L#YESrVZYy@Zs_p{z_)gZ;7wu2@eP8d5axp8*UC22amg zG~*719qXwhau4@geQbG;Y@PN9Z`B_VL2e~6CN-Lxkj?xIHcTP}NC_F9&~j5s?W=T_ zwA$LROS$%7Qv#8ijrz^2ypQ(KqQl~yw_-a-RCHGyOgOYDi*UF{x>$+1nwXzLaDo!5 zy$*S{k6tY7Ubk-du zA=6UtmK0>`SvxhddsQRQ0@&~bO(;N1!2O1;z_=k3Cqh&$jEc&U_-F}adjgHlm0x~zoO z=y6A!-irqI$o>0(N&fmRJ?y_pnlS_S2ajU$}l2=>Fw&ZYO?g(!&s55e;XVB zZ2a=q|G>pQH^0`K(*9@C+_E{(+QTtu6p`Cye-~}M}*Zv5H|K^ zeFq`^D8^=?ht#z4wiBltC_t{U3v5Z5w!RZ3a9+zj*|fDEN=(m+xPqi57%wTBam#CV zX4G4ZH9kf044nJaU>KIIGyG&2zP6+)DTWjrlf6`WsmJ;@Vc~PRc_jI}AZ(%6^uc%Y zI^Ljeq3nz4vYnS7A;H=yj6D~&p=3$4yQ%gB7T?Z9M*X!+2t(Ie7 zzjW$p6o%ZFu;Ae!hj)c+ZL#%R=_5h!sUxiw2gL^6lMF*-86~Yb6e`k;Ve%*WhneTmP6YH4(w>88DY(g4#$?v=i z@v@PbkrsL6{)OHQ%2<@(3q;X=peE#9`_W%#@^6c#;4Gzq()&?n?3#FxT^9{e#4>@z z-kKXu?rnAKGuVwkiZiy(AT^Eken4pq|F{Mz0WCbg077-JA`5(9R-HmKQT?hq5Lxs{ zJDu^FAx+Ui>MkgZCuMjh8saD_@WX(J||N7MD=$iC0>scrzPU5;Sol0_x1O|Z1 zyoUy+5~Ay@x=n*O_Ke`syc5G0CYF@WWTi0> zsb~_k&!;RQ#l{Jj=m*s>d$Kqf9j~T(&zP%g(ky_Dl*xm@ygvl&!r@j2*ZVJZ%}BnV z!uE{`o&vCm`=Nk%F=3P$fKYg^m?I(5%tWe&@D6t)@DH^Ma%J&vGp5VZ*4sVDqEZMc z%7`sl1p-s#D<5N?9uaN!xBoUl{G)sJJzuHSqk+9cy^q7{;EMMyx322$`VxwJ zkr8A0))|CiLr-wndfG@Wqcr@~t|1?sX!s0o>Bla&MQl*&|U9GCs- zdk2&q%Yz2d6Me?^Mi0(itj(4O=F|lPTjMgD5=X_%+XxbjWS(-{0IFmi@aef++~-yGD40HZ zg+9lz6yBZ)70L8VUQ5;%vE41$nnl&jYt;>S8%ot$vHlJaR!oJ)sM~S>f?kZvtIgMq zlNffdPVL=p4VjBEBZGC@?fEQ>GBS9RFAhU8ZWs%ubzgT`E|+|;2ilW^E|gObrx)a` z(C5nYtBc6KDKBFEV;rZj-6mwPz6+!^>a95$7Aw4}Sp)bkrGWgDI+7b%K9huBr zp|7vDtPWLY(GsYu(U%WIPpI&EBid=ccG%;aB<%nQiLUk?f4Fsy%PO0fF@ z>c!0%z|!zgPbW`ptYdtyy{y?qX!3bYqAS?S&n*^P{Ayd1o7~A;;vG;#0N=P3J$dI* zt{}A}V9>G0fl2_vDntu47)Y>$8wn9Q^ce!DlZ+=Vza$g)|$hwPeJKhPg znKutTwKhVxbkVQU0k|CLSAApHtpz;Up@Zr}=N6kN)SI)QX4x+BdVOkA&u@~n=W(^qQpJ)A;E*xs0UxH0 z+Xh{Gd`bWo#RGS*i!1xt=BY3}dRmrm!_8!?$Ma=Qse>~CRf1xZCsSJGsK!$ucTO!X zHTpU$O3Vck-sS8qgQ?BQ6`Q}#yrl|i4O5SJ+G7!zp@J8A<4M_ITu(|U&|>Y1Re$!@XFesLV3xD1QS%ZN-~fn|Pd z6Q}GI>A`@++gx2p!Jy7JqKMR+06N`nDdOQW_z5&v=W?g3>~&W~uBfaHZg}8YF;)RS z4q5lhe*|Z=nnx4;Ke-WNjrqpc#niYBwCPIUM=`qm2Npnt)aCfNjxzG9(C9%xTw}t0 z_droQ4Bz;RVqmXrwW@&MLlTuX;%5n?LRErRq!h|oB8jqpq@L!3BWL5PQ2|x6i&=-J zW=pP;(Y?)nvU3x=_k=GgJQ>4IMInU_Qazim*N!&<%1~hLU~Kuf^>!l?@%45L-c5O3o!kmmA|8 z^yf1`lh_fy5Hr1?>SOr@waO1FA=GDEB2(yNt;zCB)tWS6Kd(i2gT3HzR_yl_7^;1N z$yGf+Afj&QaVN^BLPQGuNO6XOW;p&q@KcC-pQ#+h#eShvBxr?vC z-SIn5DVa0lJ(z6#hT2O%^#y{}4ZHf*P54^Qr(Ht#{3(q(XrpnHB4@k;A|udVsvPA0d>gm$twHZj z*mfmqjB~#vjMSA7v3(PNi)f-WE~1HIjw&YYR>McjEePKjAV5S_abXTS9;A1dstzmYs= z-ji+jU?TB%7aP*!!d{G1wP9@9SFIjct5+MHO3eHy8L=uzg`DbJ<%Pi{w^)q&UG3&)WR(Nrsw}V#;tCpp;x&V+KuWP-$CU|kJ#jlaV$2Z4&k0G zHdw1%DK@ix$QOIE2Kv70?F?do1E-XP)=1yCELx@w+lHYc9AwD|F7CLnHR&i3y~pB6-#vw!p~o%PVLx=c5d>mg1SH+?^n~F4oVm* ze-1o%P3K1_f{{S7KmQUF*6;t8eN}b9!;i9Og>< zV{w~h`jtS7fLdSRlB;)(TE;hCmE&#*+>jac(KSfoX$qD@R+pIC4fc=|x>*SF-I3$R zFnDv!0lS3a4TDPS593l>Qc4{qO|7MP)GlGK z;vklQ>Tw(F!Mk^9V)WaiA^*0#{?)kl*Z-oD+G5E1wN_=^&z6OtwFNP?V7PcW`SbB8 z=d@blw7{8=o+#u3d+-`w<x`4nh z0vvx;95^za7W^e(UE*3IGH-d6L-67IF_nkD0XS4doeK+qu%R90ubdUqYzI7NquM)q6lKH)6qNM zRp&J6{c*e3oL*SGssRo{K*H^vA zPFhPtE`M2b5U%`;eJ(fg@*J44^+L^a%rDYo@A-YVuRml56UJ&>zN&U_{n`$DmwQtO z+c&S{%JF{lmENC})btqBH zK}(f)D~T)zvyGDKK0g{N+C^M`@{6PaN=8|&SFI)`WU%^)YM{=2=)i(Y~M7$>Y-I{fGDlRK1#XEFcZjxn@m zl@q!an=`f^(o@zkVq~w30Nf4|Z565ckxI04X?gP4BLQ#h|2m`C0FGtcHB2Eb;HcOs z@13s+J$VznO+F|IAjuZg52Z!c#~}G=t0F1M@w$@`-pE5L|00qvD(fB1?_0+OsGNtx z{-fZMjv~oyv$BFIYuEbzAX+-V8N@&`wu!4NzO3uC>dFQ$6A&!?d#g7;^A2r3jl#Xj zO?AFR3Wu@QwdBh>qqN9RVBMjaJDIfljI-Wb6uC4VZvp@^=^6jP$Lv4gZ zbrX!*btPrAUPpssv^K<>%!7X7ue364v>PCS?j6D>;3kXXuPasRqIscujR=)_Q~o3V zq8ROVfr9#!cl22p>is4smuK$vsmlsuYq%Y}k%K(QAdO(zm|s{ID%}{wbBP9k15pS< zx7H2(hm3r^*6S!8zMN$_g`G5p?on9tO+_hZ_BaC_w;rm}TZKV6t5mcVPaAbdCgo75 zwpZ)V`B;6t25&NsGyIfS3pBRAjfLBNr;wy%l~-ZUeMtb9nhhcBctt$syY985w5M8! z0I78fLy7tszv!9^SN%Nw?^jGiuQcOvTa6nHzp{`yQ8mGv9A+e!-Hg6?HbKG_h6n=-Ih1%y^Htqgih@QA0=~rR7zeqk(r!(D zD0eq4sbsgWTt^K(p2>Y}w-q_+^2JLQ2Se}rwQojN(Z94=2Ev$vG+v79 z=hup*XMxF9scxgJ6hTwGymRB7G0si)FA3tKPFBl3OBB;zr+h;_JYMOTi$bx*mRwx( zL>xj-PNWPs0(?_%gaXePcY?pQLBO)T6?aoF+Y%a)8##?_`{h!Tr_q zECaYd5UpG&yWt1hisQuHHquMj zLQp|i>9g^R7Y)i?9*y>oS(`mUqAQq=Dx*=|$ho8ns*krwNv>Fp{vI%G$?~EkKm&dx zwcnOulQWNHf?J?`aUSm~r%Rr1>{_Zor<{X9FEX%?%9`Pv55?hH_Nc%$pBsB0=@oJ} zngyuqrKk{Z9-@ap*M#&WtUeae+73epdj8bJ9mHtpB-~`<6h~M> zy!ICfE+xC7W(p3Gv&>MK`1=Y}wdCl{3%ap7fwf!^5JkJWN}E(+Z3WOTg?^b-DqAP~ zsIF2n8~Z8N9dv(n>CJ=A1s5;Ri_CAVg>n*T>~>9{V7JVe#ouXdOr<&hTiuIR*t^GB zB9%HGbm}x-z#2@Yni9N)A){_AA@T0(s~-&A_%2J|o%B7b%8{OEchu+eTD_@Sc*y_L zB}+CAp;_>FFeo%uKCxC}!bCQ89!>?EJ&Y2%h1zqD3@r75KolJ!zF3CBaMVmUwcc!3 zq*+%9^b0X;JpV_T2`wHdq@O%i6x)szEXQ}}+Kh5OQ%8S(9`Z86+D{BqSfJjCs^+Y~ zJ8U?t(FjvWQh7XIZ0o_XfdM9Ymf;5j8XitCw)$&LhJtYi^&7gt(``@ZX)gxNMEPt+ zs%^8XSWkof-AO&{SqB|e_cNTX*zSF`i2ZSBl%Pa)0g!)?eo6ZpD`nc!I@ zN+CeAxrXs>qrGsr9nLB0%c)=JOfRKaA+-r+>R+*bJ zDc;k$3Y!K?KX_Tb?Oy&rBP|~CFC9-z7cV#W9DcJs95iKXJbuu-|0J_5*XJm&9m(C+ zkh;_kQBs=h9vgBraO;+Ro@#RpVO|1u1O(S$8^U!x$Qu%dXCTeU&WmT`pYx9nEqk`}Sr zc52Gz)#$JguD|Z2>Ru@%SyawvAt!7Qy8|8Co`L%r4iF%KzcY;~#jQdFX$hJMSjU*# zZ5B7E{xG}u9piC@TwXCi$j!#bc8PpBswJw1;5jtKMKwH)P(l2G`XCfUyf|B%vKbXI*sv;45F8aIxo(W zhymbIsrC4t&lk-~rz~B?uL}p8Ct>(FMmvRw{v7u<=Nj^+HUAi_mme^`(2Y|wh_Lpo z1AGpbxNthaX7SiHLR6|(^`uz6*y8hAkav>B&Z5R%IMUEF6?Pg%tuQlF1QQiDE%oa| zP%S&}m-53@c;Z{qWUok79xP4a(Z`z^Gt#h1uOSGWQ6NlJy9*pZ9EMV zRUX^_{KBixT?;OQA9bAlJj9dT(4SAb2MZKCc15>XAR9EPb(1fh-Ek;je~ZrG{(>O0 z-(f;XeinqRVMHxEUyy#pPD{p<>lqh5MMa9K1tEw#OYcdi3ytq8EX3xWr3x@_B8+Dw za$PJHv8%2(BPUMj)@ZH3xl6A`vpHIZmqoQ0)k2ri61~f0PV>nKZHX?qMe~fWgNnu8 z!**5(iJ4zphV`h{vA}{G&7QM)Wn))J$s!OaL3(64!@q=P;&jHTo{E9Ip_zo63|fc5 zfxpBli?o;D*X6+|d5dRwe>(0LlljoTnc^#Tz32oE*$ss7O&l|}B1VFJO1|f$AWC0yZMOmG8%5!U3jAyj;gS~d7ivl{v zT~ptuAD9A;i<^>bo|QZaZIo|D_sL3liV?Ko(BmW%_$075Vg39lQ(bh4OAuzA$TgC^ zgjQi7a9LiLUx6$%MT!;Glgq`6tBRSX5(Ykx__s^Eagr8qs+ql-JV_g`-RqUcPwDZ| z^Vg&l-#qKt8@b>d@W$jC*8BkE(&L$yIB2Q$7XN_;PCD!|UvD5x4%$_76eT{4JQ>DZ zYAI?c{@m~xQOmOcM$h3iCoP6GzE)sn(fu%^_s!jYnR0a~Hb2Vbtz5Tu{QUM-uv&hB zw8FdTXdX&XAIPW6QqY=6+l=t5<8@ykhydSMDQeMbztN?x=aP`Mlg7 z-icg#2e@=)=vBka<#Lh{%-U2|aM;l*_N|QJNZfKHlYn^clA_f~J3;$LSjFrAUX^N+JWWj7j8=Th14)5e7Kt8BL?8tz|pA|}WMeg1}d z`H+#s{8f$1O7gVCrUDjj{hGGbYi+pZ97EE3hUE(EikfOZ8;L7-#AjEy5|iuQHA=3I zrmS}V+XnnUjr;$b{Sy8!b)%1z0#6g*Tno3Zen-Ff9*gO3ER{Hfzq#K~fnOb8ZO{e& zG->7x<__sI`1xox#Bd&+v##;^|NQa)di+h~&}LhltY&daAp${K=EizIcZwQ31mlFA zX%PLTZs_5qD1Gs` z@w>RKC91MJE7O^j)KP82#T71CsB%RDgCjyXVTMe&iEhDfn7Q>K+lpF)|i@b3e&M}cJ6EQ%( zn%-n4U}1$6@>HKks;dJO`KY|ycLFbr3sRc-+guIom=P| zJxTtiSZD7pG)UPd{0=dw`+bdvbjoosw+Wng=vIH8#f${QB13g%%RJg{v0YmEjb!B; z%UrFDwIR(w^F)CwAQH@7K`U={Asbsp^;6k%RXwuoPsIWLyIkz`Rp-;e27}}R1EHvV`&PQ2Tqb918D>@ph_&JzV|nlR@a~ZRgeGQdaEY|4W;8+ z_jlU6G-DyU1ORS2aTT_d56I8Is}+}cv}hD@KC~+vprJzYyt|NYsw@z;Hb)=wweFW- zFUU}%VAc1(GGM1n4mUkwN#TcA3<$lQ9q8G*qgrwbl#GbVDvF3C2szY1%#?-`4fHJS zACJR$3NwmFzjTZ**L&(fnnIdajRGq;Cd)q|}an(gZLO=OXpq9acS;@p!gvR4erC2FGx}SU~B7gFsuqNU)4YnMV zhn8!kWT-9_^CN}nzqn9ne*hP#n`2vYk+lc|&85jJ zS*ANm_e?5r)b&>ISm2$UQIYTkdQsRd*%KztY=->!J^}L(#wo8lt3U^iTI<`xakonA z21*8`H%bBQg9TBNLjrl`tt+Ns2xmPAJc}Dxgozp9u#4@EG!q#bTyxHA90SaMz~SED zO}+8MKlJgzr@?&@qZtfT)01b`Y*(I=!QCzIoJ~aymq`)0dhrhKp7Qam`U?R9!X4HK zF~Du)SdeUMQE)T$bx&Gm zX--*gwa>(g>e>Em?@fvX_{MdUGCV?Ue7Je9z~U8Z4@+pR&c_CK%a{AU|DAaKE=hN0 zT{&l$G;v`tP!92ttHwe|BP~kT6C|ja)wpd`@HN9$JW5rwI{t>LoBs})#n*!h+iPl6 zRD$s^JTYejZKw=-+Lo;>`AwZ|#N1YoHj`dc0qlu*$uR2r-{Tn`5)X_|0qnHT!>$7F zw0$19-@NOuUjlvJABy2AdVgXx8O57u(}q}$R6`s??e@6rJYghDr*_`v zG^fTP00=`hT$;p$3h)Hz7^9V)qrl3_nv9GRRB+^YYBYn0GW#uQ9+YI)71UDdpdpcw zdWb1gv4z4>)Gye~qv%&_Ki}}hA?Je!Jke9GqrgP23bP`jINALW9@+D?(r(2Rr63Rt z3zwUh&MtGhlihu!+s$x7?`6ikus?a_^2;@{;TtWjSN1?XBy%;BQUo!Z**-5a$`Wrp zdq68k^6BrTqA+$KISb})EL06$zx}r7a!20X;kO-b22(KaFmb~Ps2%u4#EsW$pp*Zm!I z;H}TfD)zfZ{u~ zs~$HwCy4H|4>}7DA8<&(u-c&a&O$)m-e34 zx%yHzrPde9EhT4fPkrka)Zsjvt@60jhx5Xi)R_S-)qyHwpqbc?ax+W4nTVFvP>jiD zF1e}&VnfeeEqOvNCtiwe%yUvP2Vv2A5!zcZpn>eFZ#P7=w_ZKhYiRexBY7w6Jnl)N zA@6dO(r___)@^zbS7O!!l2qovU?q3}oL;}?h?7Fnb@zWknrMr9TRt%pc&@i9LBOE^ z{QX8$kDsb0<{l%$)~9ud8g#>^20FyNklx|Z1ClsqER)SQCd3YKU5#-4;KCwe_6)J{ z$oc)~x6Kt@6`dIX3IDAEmevllLS7iYh`V|)YXG!0#GJ=(BHVVGVvDP_*Bvb$=|XfN z@m0xqY&a@iQu(${%o+@|P$&Y=Rt0wO7@bokFo)$;5mnmLpTGo7gxIu8`eR2uknslb zKoOV_0Vw{!c;4389}_3d2Q}d)bg7LeVT;?7OKmK2ECz*`;C}Vk`#EWk67%)elGd@S zvb4>v!)zKPXPaIFsEdLNq%GN&wu*e6 z+e8P;SC^Ncrkk>YlAn3pKtHWtb+NWWCDvYeG%I<(S*$_1wt+dphI->WhtM0UlPhR(Zh5cRS?L~+}W3Mq!r zC*B4n3M|YvM{&f1Tddw5V-+<^2Ar_*Xu~iqfqK*QQ7jgnnHyVDt~Q+=7QR@!p&}Lq zv0jw!Hh`Ljp}}DK>&~0J!opxrO%D&~_b$rg}qJWH)<7 zx%H)-P886@fj#zCecm3sS(|;DCkco#n!DxMSP`n*i}yH>q<;KTghla}yJ2 z+j%0Sys?-f|3)9GwqwJMC{OE-S+87~)Us@AeC z4&U$KWmGg{yAG-{w`u0ff>kL&Zq45-MHKzvQF-y_;R?>2`L}vW)5f{joLC;ZzDJx= zX1`0myqjFtlVV2E#hxaOIz;$MohBH*m~6OMg|S23T;cSu7t9pPW}A;FS9D z?pSUrB%ArAT~C8Si4r5j*4F&#sGb;#0o1seo7e+SJO?!$1cNOug@wm6mNG>EvrKJV ziaTMbd2clLaT3TS3TIHQZdXAp4$FMzruWKjAjQm{#hK%CE(ll!w$ z0DXb00anLw?nBUdxc;S>HiXcdIbR*WE~(Zf1UZp z!Fjy7y&?YQ18*CiVkLJeaVfxxV{WLd$MUzojnLmu{FAEvrmM%jL7$lYBO9)!vrU1V zUs}P3EZMt!@^A7Ao93F%Y7Y`B9mUXXTYmyZI}g$8Sai$%|@-k^-Sm`Yhn+RHQ>Xb zGRmC+|6tFz)`ltN76?eq;0R@F%niOCRyKQJN19OU(7-(Q4{?j1ZJpBcMpm8VB~^XB z|T95Q(}=#7J58lxQK);61l?Hcq#pO1Z)kz8?&zLv9F zwk;~@Xi>~@M^K^Hee1qg2)a8+mM&8=r$N*5ydg5D$E*X^r0NnnSH(@aEp4>)U5N}Q zOb}ddslAlcUSP5+_*5M|h{U}mZb{?3uC367T%EHWqD$zE=`Ch!+L5-vl<4%HLU$BL zk@`6EEK==GxdsEj_WLF*nr8vZ15F*bADVb@XDC2oEvzW2RNi8a=E>;3?MZ|d#p|kd zl}*^xv`G#z(pEAxc7S!BVf})X(`6btrusBgMAB6;Sz1Uu3{yVT)?cn@JY&YKyffLURTdfajAsJliqDsfau#v@jU# zs{KkRhs>hZTE;N>jctCpGiua%zl*0dCpku3Ak%-Md{L5R^fGazc9o%aWlwhZ$|IgP#pzM>o?D__GSHGN3F-@wqkruirynjbWFCNT*n=cmOArHB8G@pyV zbxe1!5Xxofo+iuoT?b1>C`B+WPA@XheV{tnKffti_Dpd`# z%0jr0L;7Wx;Hb)p*i%FOPT|qpDk(+ZaGodL(E&RA@VR$Vc#9Bw#hZ7ps@xz@c`TU< zcCZpt#mLO4hHg;ggK*EF=$De$`XbB{Pa^E9D0Y60&>7?9BJ-;U_tfy4T?QYRjNSSV z*s)0>i7Exje5u0Vm0rpG%?CDU#7+>U&6_OTu7N#&?|bu(=?W z3J_{Ks0r?-t|!o6X9V|#p|u;GUzC^~1>{#{V4j=hI;)DN#U~U%Hj9Om4uMx(N96|!bWzlwEzd%xbU+^sMB zmc86nsVbHgIkn?cvFMraXX>I~*aA)>`;Ou`hnLRp1?II1I~07AU7NaGK^7+A;J}MrN}zm%m6CcHqjWT-Dnf z!eZCCp|69VjFWKE&30i^d6!Ye0XY5oss8ZByQX}EybzEf2EVW4vrSh>G1z1!n1ws= zE@r5@1otdh@(g?_(_(HywR>>_T^d>a!K4rww{#ZL+V!u(D%e#CRfCPEQbW}odA%OX zMpx#M6R_%!E$?PNAIEr>YL-_fCfAIxDGut7H-+vqP58e|o7c~GZ*#}X^Q*kpn~zP7 zo7>yb{h?Ux5F#)WZm-v(${EnB&X84}m5hsFJdA14XOI0f&Yt!mCS^ScDs#myf(I=6 z>W;T1)fYliSw~lh3Nr+1yLT%eVQb(C)JS8^@!itUJjiMqA$ZP`>DO=^iMDD3DoKb2 zZMkEL1M7DKLM8ox3JcGn`8GwEP@6i5ELp=#e;^{YS{K5V5dYbG7;T5vYpVA#< zzgzD8E9s^CGsM!L$}emz*PTT<$OuQshS*32r1aTHyBr2uchGx(^H!MiJJGv|ZU#3` zq>Gg8mEv`WB*vEd1skl+-@sy~=}gbMQ!FTL@@c$OGgcUlE1^Ft^SzjD!1JB5&B6jU zH~*wzdE!cQwHUMX2#`^K^z@um`MQNOdbnhhG|;=Vi+ulZIW?w4o#0~`lRfc@brFc~ zQk5pQC*zvhLrdp=HuCe@7U};N8u`&;TWkgu*(duHl#K#pnh!~9Zg7SY{ed6}oc z|K6?G%-u4E99_ZpCSaCx*nJg*Eo2ZnJ}ezdx#$6#ET{%{WmerKJv?sjumpq*5JvmO znFhecR@{67tb%B3ASVvKT?_#L+tbkZJ3htkvLau_jfyOFqmlR5Qn3$< z2OV8?^h=p!X9fl;TEzS|0xs= z-6$Jc4X7~9k&Oon5T08iW^tl^HH-#Z^7WoY93-r%V3VT0vOeN9Qc8?tp1Pb-%8Zmf4NYf)ugAi|PF6zdNSy!PozLeU$Lezs{wet5x4SVlN?iG7__LDNf*gwykqC5;DDI z*4R^~;EpxYvZt~WhyoC#t@xO`6`yf+Lon`bWX@Db2}p*S3dh;N8lK}JHeJiw4_dMjGXB2DjfugoQlm08Xwtntrx}QEf59#eVAE#_zHC|?6u=vBCMt|orkwG;FX4@GJ93(!# zPgRe4SGLW~#y6iT5Sdhm^eLv|Exb47zT{nb2;Z$H)x_5x@bZUPrX} zw2u0i$>U@f@}6BR`7vw7Xm9smZo<=KK18~*N_I}JauX`GDtlK-(9;}jUYA#s#==~F zG|oSMy!_0VDXp+PU@)@G{`(Gn1|8aLUL{AoVT-}sht=DraX=Nq81hn-&I3p@;;GCK z!cW+tJZQ1S&ZQnoREAe37O=ONEkKeTD|xTz(*okDfHde<%Jb{0W){~$`nB%pSlK9~ zVEy3kK6)tAx*#(-B3W09C+>XgH_y$7%&$u$pfhp6svK99EPkcAQo)K2Jdo2bLcXEb8N>$paw!v$pRV}rYvKjwY3X#OT5K-nRerZqO?sck=~w1Xdg`Y zR;74^kR0CfI7;*SIoqp-cw~XmRr%YsY{)KoIE5n)F5)SCkSK5EC>hfsLtO8Im-ekE zn^)qcw`y~oi~L4MZPUfyp&xW9_8*_wzGF;7J1 zaR$!g7Sr^Q1OX5Zx|)LaaG7YT-WfJr)>Bq~+J8^-elp^l@EPiYs*ya!Kf(Nvb5VW+ z3?E+vU=JDgEsxuRC^T9twFGw(dC0Xp)@VBXQ)m=*)kFwFt56!R> z5hwq(8u>F&yr0Wk%V&Bq^wYQ(l26M45|k%7y2AzWHt&;juGCa8cL6qZqj`0+EX>^? zU?X01Gl~F6Fze=(cr+(R++m%o7xE*NDzX|IO&@kIR~eHroxA341-Z&Ld|)uxqtHX2 z2xi=>WsQ`Vjin7w&9WtOmr%i5n|kfP=_k;6VXo8P{fNqhXE1O-4P%Q{)4hJ7xyik9 z9vElOVNP}IcjBOdO1>9Sd9XonN?{uBZ&StJB(%JcD(=fM3}l6M*tTGF&aztH3^Z%U zGUN=rw&Cko@@z*a9)0U#BN}+Q?xW}D5;n%b2Oeear&k5G^$og|gH=7V(u%23ISF`( z1Lt1Z)@@|_1-mlyB+Ee$0A%n5ANzahqQq6pi?+(Hc`^k6P1;#%p zY&ljkfpA(7b#D`qTXzG4%ng?&>P9|NSh$@;C?0Qfaisi|(_N3Fd^en_aMvkAuG_HbidbJ7&scaJu#%|DjHzOx?Dn=T zJmM>NedBS%z;D+eN>xN0G#8uk|xck$!(E8HX&df*j) zW!Au(vv)l2D`Spa0k~GDIfCFFx664Ux2`sV1r4w=$Mb3XZ!=6N?k+CbKJHZqxZ~Dj zJV+%ww)=ask33!}5Ryf~AH!OEkC}%uNZG^9dytp4g=F z1V$-BKz=1ab3#}~l~uygZ6u6VmY!S1>8eT;OYTTCM{t~yM90;#c*y~+toSTF6xEba zD&N)+g-S=H#mC2#0F0BOY;F!xA=7ecsM4$i)s^6;9*&Al+gd=)?)-fJoP7U;Zg_(E zqO=jCk+d6ir51p##AehK>ixBp|HU=kC*l~+yHdx@RdR$G>Jk6h_93w$`oZ<($e2DXvdfvd^|EDQW4F7%WNQ;C$PvcDF;h0)sKr`VD>jj@ZbTrG8qVEI z_bf(q8ft!zgrPL-BinB^#i$f`v{_LjRytG+md{O2Dux-|%RK^9H&Q7DTI;6=cjwOX ztQ+_ld#ucvd$c{lWP37!m30Jkb^R83`IXYmIftTL&uc?+S-EqS2P!d_0U|S5S$W2X zsaEP`E#J81ZOSV&3NehL8OD!cFUOjC{Ng_iyn*NVjkDu-<$$uL!}Aa;Nsf<1DW@*v zhZ5%_b#mFkr>GL&68n{i1)|2+pY2$GJY$(FBiQ0R<{7_1Mwo_KFefv4<)-C8I3J08 z{G+YbUyRg0dhusyEdSAt0wV~lHLn*=Wl${@6>XZYObqleWiTQ>YD_`qFv?mWr( z84LauKyG|*%}khXc0fdz{yv)OAFKSszkKzNIEej&t+7z_Po>!ouSOh8s+%fs z>wAsPX0ILogO&fR*o%L}1nUH$gm0mR$oG-t`0@`DnI9+`?iilHKH~FHKUynZt*__4l_ET>l12x5uCV zC<-O8Cf{;8_)hDc<3C1s@Bekx_TQuX4=VYfV0~jK%0!ff;|MOIR{CZ$-+ew2cnk)?YA~an=~89u(K=s@Mp-@G z0Y_FChbc%W!sGm^d1(uh$CeF zfld%Ca97(uCr&$LL1{tqw!vQHGILQz-nmxg;U%sTWV>x*`v_(@F-tI6ZG?QoPokJh z&~h2f%*B<$AVRW_oIjFcODAkW(RETtujc2nqLlgzCgxzyo0a9)Y^i|;a0nUKGxx`L zi{dPVH)~4E#Y(;$f3{0#(wz?ecs1iQp}0ls)5YJlAZaxfbhV@sD|dI~QQQ}%^83p* zXJ+A~IxUZi=B@6L=8j{^2c!|BYB1X+@fROSEgVwcvAg&Ux8acolP2Vwqb~vv#M@`` zKQ}zPpmSh4GMoGPY*dfwA+UdntwYv^?KkqAzip%9K$@|8OrCKgu3GU`na%n-INR8r z?tkhK49{+$dCj6Q4Yfg}{6H%u51kv<2yhizk94lDJ7KYNGw^x#{uTd;u(R8M&sWWh zxB%3$Yt(#Naa7{|)GHx8Ao{B0|K9x1BkwD!A!UYVDDgp_S`VZE>zPxJ#U@+tIx&N~ z-}ePbU_GjZ$G{D}of4Hf;Kq^@Zxkd#kKt$unE;>Aju;DrspwD1N?uySu!olF6>Mj< zjHTT`1m%xEZ=#x^;+jljMXs8G4n64g6{$`EGo}OvGCjfkEx25+?fqVG2b&2-_9`5_ z=5M|>2+wvA)h3ZQuDSGnI{|Z_H5X32?}leu{yeD^8uzZtj=@?Z{1~k7+8oC*h`XW% zN#HfW$*!bN=HMEbc5&O+uMLfTu5>8NWLtr-iHP3SQz%oAl(lAM_X|Syjw*`_dg4 z8NIa$1S!{bK%Nb=z(z|*Zwv|4m z;R%Hh!PL;2fXa=!h1C>Z!?c@C)?~=Me`r$+e|0yCEIWu6;4^n*THtgiprLh0U63oq z`6RRANEEeG`B9abKCJ69(b28sn?M=e9oW)tE! zi^J;~q3OFy>qKu482_eQEkxn=O;sz=x`)}Nv|G=Cbb_K{JWnTvrpbyiA*sL1c5bLi zrBQ6}=#-?Hu;eI{zUkY-M#|oL^xAXlcTpW5pUQz{KzqU;?omzDjbJO#2-BrGgO+=Ya7#=|>ad{cMtq0p{L zy;d!pM&g$up0qT_^ju+lKBN!qb}r7E&Ugvz%UZN2hVDz{F6Ps5|2mbrH<@?jF15wN zyyqNS7NnHBUfQBMy7g=B-#mYnzNfRxCZRv#AAO8@nDVgqyT4iVKQk2mXtvqL<}Khj z%5S>z2FHk(g=ra#4ZZG!(SUXHgsPwJ;WNLkq_R zwmIymLNPy@u%J!yAxy04GJl#aAK=1@Z|v?f7gp)31Nitf|4XvY`>*wY1{`S4OqnK{ zF6ANxU*l=NY`ynq$j|sXe&A)ekzd;0mt9%viqJ0JTKrkKEs0`Xd3tFna6a@yxe+5X zU^wNRl6Jl@m*2v!*Qw_9$G(+y+O=oc-%R)=^Rhy7BzNS>8sbtda}_vK#=A!EI?FZl zv{Nb$oPo~+1t=%{f?MedOCOO+Rpm;T3NSLg{s}VeR7k6#ZTjw6{c(=7ZyQ%ngSp=KhweRaB4i}okl-h4beNBCFbd7eXXHO z6E=ofS-E#G{ryW+3;#D^$?vL*7I<-nrczB$!(Fi!+l~+s4NP>jt$QLE1lDz}lpdd)ZL17|!PdF* zLU*8gTk1!*vZ}yoHd?X3X8XDUF7vSGzV!U&cx zp?9)4aISfOz+CXqe)6c*Ha|Br&DO)&KvkMC+Wr#07iED+FJYIN8Sgq&Z}J=P-cm() zu}!+LWX_@G2TD=PpBb||9^bFH7o_KASvGONxt9?Y3y zB#d5wjOr;p&J!A}W<<|Ia;+@+_8?QBylncQ0W|HLP?ULSl`cE3iQeN=t^y^knsKR) z3Ad0J)%VD83=Gz?Nnq@RDNzzw=d~{5R6kK?j{1-0US$Zs|ElM-UNW!-_Eq@uI=<&E zS!`cy{3S(x(M8t|q!h1JA;uw0P$Oi$%@M#IQTB(>+OZw>AmOV8qaKo;8Adcb%K$F! z!YFu5>w^8K?|-HI|Ci4%|AfRWd|12%NO!i?YEaHzLz2CRxu%w*z+#BDZU}#P0UlY7 zF8D5yLy=KS(DEn&(=qYobpiF0U{JELy!QG`PGPVeL=_+yE5N!70W2vSxO@}#(b6S~ zt;p4)`f)>WQxU>+jO7y)X`H}Xs&vrT9){Lsq-x{TYwrfe`Q*R*Mf3kN>ttueVX|cg z@@JaXdm1am{5r)Q>{^=F-3M2MyDGjC8+u+f5&v;4=zS*1OkG0@NBdwl9%K-YhN^<* zO9gih@4TNzrZWvrvJT6aZOYz=Vvw8AGXY}}o%a5^tV62u-Ac+3p47?XR!$|XdxsBy zaIQoD1nNTZkX|vpEjUhAKlp~bAbcum*NDhIGByN;CK$KC)w}AvUYe51x*NwpNHlNm zCK3|7ge8^17`^ln;7e$4%tyV1TJ0}{q9C6Sr;NTP@Pszuvs7ynfw*&9am;{E>898X zY&yl^5lMx$5t>jBD1SAA;Lcajt_8f?e;A_{Idz^zP^%_f71NU2hUH zDh{o`zy@$9)IT|5shNGYpu8WU*r!8L{ax$Mn0C^s;*$M#R7f)Oq%fVbLM_KsV5Xd2 zlmE?#&(BKo1kHBoGE=%G|KI6aC#-J06lbE0Tt(PAekej-nfu1Dpb51aVzy}J;x*cd zm#e&&rRLUryZLQH0u{-|FafRd$NL*tM(LMzzjH+Luw57sbm54yiJ4On(;cw!TafDlCYZc#>^H~L( z%M;6U>6E(ObOGzyNacgOzww-nPF%Fclr#K=pKRUnV;^+*o*46|20LxTiWM$JsZ?jV z*1AncKY`PwjpOxiW{`p-jaIK{5~NM1@&m6s?%eZjZs?4lFok4gOd|g8b)BMNC;y&*m1eg~p$A8*4l=FO<2S~a+sd<^-_56i zJN4Q=3{pZXR~0i}j!~AZvtCt${{S@5L-;A)lE0SV`uPWo$Ipc5V7k{*YJxYf!V|;cyIM-VqB+tD)@#;$o zpa1r^8ox`BZZ78TM$EAeZ$zB)_T^nq(04qH`+RzalYb~WCoqpv{NtI?8vxbvfNICj zZU(u(tNCm4fp*7EQrSt^5BpBpn_h`!L8uV~_}9#TW;>NuWh1sfJ?hBDVuk8mS=Uck zBE=J%CaD1rK`FZ@#OTS$=n!Dk=0~dp%><9DCqUt;m`g#r)VSJFJ@D^-?eIU(G-AaQ zP_3M@k7YO~%f%_DdJx6Cy^*pEejtOP{*p#JzZ3}8EH;Kjyjv&P4R98FUJaHkfb@+Q*CYto! ztX=E6QHOSdrE^7!Vai-7E)lBLeWg&V!hC(eSh4d#d+(}9rr_gz>W256BWjrRc$izz zkop0(4shUm!)*X4Pgey#H?XNyzV)gaT`mPi_J0`;)LYMA6M36CX}uk_AZ&@kOmQtH zrh0pnH?Ne8S&dPsZeO&&lAuK?{6%_qK53s>IqW@)ew$biZxA(Y@ITYynq+! zmTnXMWHxMv(+g-F#%oaKafok3w^WkHkoLikXXI+;%{D|$ypEK>TPwfH>MwUb%67qI{1et3Ts!way_8}D$2%7*rprjrTK!3 zq@`SHB1CHS9(8W4FvS_JAtbJN+{E#P!H{N(j#^R}&=aEb5kRW9aleUyXrl=>9Rl}_ z-S@+W2M^Zb{QL6f_t4@!-Wtv>1;Xf4{1KY0doSmXcT&x)Z|Ld&m^HJ1F(MvsXt$YD zUGFYq(>A~O`nLSO*C)%DITl3kPd*y}%g&x`Z6h|2BNK*Z8pjs98s(Y&H)K04-D(GqVW4;Cv*K# zN|q9j15@Ps!914xg3m23W6{n`y3fBmbLOnEO?nDR^exn?N{D`9F0mY}mA#(JJ_a=C}0mcz~o z)}84IE*8UFX|Cz{;uhiksrGm{5K~5tnX9k0`tn1#KtVzru*g2QijiI)Ce!Zbl`_R4 z=>frcxx+-*a&rDcs+ptJo>0Vmo41*MJo5`bWUI_)t~mVf?7)({F3=q`mVx2O#R!|`cm73>@8j?Sh~!Ep1~n4GSGVvdr>8f?dF`j^ymoQt3Q|YsyCTs>AP}qX?w#_q+N!V35V@kLU#g% z`XSfZdHo)Y*;RdJ_KRdS)BD4!&!1)c>zm&NwK)&Bii5}5bR_~=Io z)jI@8LV;dM!Q6?uyPYb*Tv#&n^E@^{K++Svn2j%zQXy9gczim5XbDWaCc>JQ?BC;+ z*RpF3%G1nuZ@9u^q7f3%(ro{`U@d{ngQany@k|dBOQCn;W2DI{@2HgKU%x}ae@%L#U9j#G!{+5Ye8NGHgB29_i(ie zC917=lutS*Ch3^r%L^h^9dpr-Du0Nb6_vz-1!10O|CLzNMX}aB5jy22W;cDuEGk?r zw2bhiL%x?L%nvCcJLVsKsF{p63_j&g*9!sZQJiNUO`I7?#Mmg-=2iZ9Ccc?y%C?j4 ztNK64QtY8(=p8v_`>bqo2z+phl5b`ws@F(AYsAqDl@liT2rOaV=M`U*HA&{d&&m>t z{9oskqde}YAe|h%0=kV9u8M(M9+||HNG>#ok`7fv-J`-KRlD2;NfwFD^Vx@-Ix*vD zOVQ!gW;)w6kLn42nY;yak?UOHc{v@iTPtWB7#okJ4C335M#dtkSQXq)L&K}C<*Y!w zrOg+B=5=~pXaC}kdvNY$Qp<8%8EFC6pvq>z|f=G7gF_zr>w1$n+UVHBBMhs=68(kmLUsp&1_!Z4SNDdE`sz>@S!6KQO$hzTVL`WslPfYLle__-R_J;DVoy;t>)wHyUYMkg!K z^?o%IXndqP7Ie&Y8uK{E-=U*CG6mzOlZ$FYWYZ5hGv6E(dR83|ZSZ0*?_;3Gv+uvI zXB_riQ<}PM%iCgagL%APsjd;8r;?MOi@fOYaUS});fA@y4J)?WF3zC{SELVJ)mO!$ zdd(sJI&MFn5s39{@14roZr5D~S*)k3$-ZM_*cEd0F5BnYN*Wea42tWy^Y5SIdr?c$ zc(g%fi+6rZKT8IhN0coemGl-JK?*=hCYYh9HHUffkoXO+^g?wyMJEjvsv9#123&fA z7Pd23@1%}V((Fq-Kl$5WDF+p*f0Z6*01xBCU$h7pzul3hX^_$<|FXV=YK<`kEoW80 zcfVx*Gdpt^Q_HfKIlPY79rhl9w8|cg`CeWQEHPndFEWY%XNm*LTEP9&3}0L6duW+! z0Fm1gUbsGwXyS%RV)p)^ z)LLY_(#4{y)5cvrx4lT6^Th(!CMvl{4P)0$-J^yA0BAGvP|7*enzc8~=l%OCH>@k^ z=Y#Uta>!kfA_nW{3bi!PCNz>o33|h0Vs5+Qr-yQs`ukD7E_*dW2i}>5vKTn;9@g|& zg+sGGNJo~^TvOq1sSqIBOX3(L#vX%(1G8y!t?K#H_Gc~4YKyq)IF_KaE9Py1SY%ya zh`#7lT=(&2k?`bj^c3w5FvL^Dwr)tn%CP~HNNHOPJQzk&pe>XTKh}n-L~$z!x~S~g zV8{O4cHIfI!<#U>-;5aL_tIw_D|M7t=#>q6IdzxlOUbqpYs+=`Eb4}<3qR0SPbPK= zN6sLJZTH0rJ$09Bj_zOfil8i^a*LtP{JH}&Xj@gD=Y1(uFm0DZJ3oJQ!u8vCnv~5w zW-G(1({=OqPRLt*@84u=;)h}4L*QJUmsnNyTZy_o{ zMRt|?VZoEKT0f;!>O5>J;>XjM9Gd3r*1{z$b$K@-Of3rR;^ieDAc=7LJ@=$7|AR?$ z9RCW=atM#mxpt^b>_ur+n|id*=e7+%k)kev5eQv2KJciC1av%JrhnE%XfRAIzLdHG z=(tFS(y!0mf6-)W2rXO`V_Z7fu`Iv&ECgAdvm(=2pbC(ZlA^7Xb$E;rO8QAaFT8J& zmnyXSmQ?Ry%v0Sa>WocM?aT;SI<^bT; zwI0?2#Le|*q$Vjfu&2BrQ$es->fIJPq&~u8DQ=bu62g_Fw&<61Fx4M0KI^$StDv>5 zI~n#sY`^3C!`C7DTx>@=DN()$Jp%!Q-dQ@4fm@$~p4}R3Y5o$oFl!lfZ)XZ9JNjl- zAv>tcC%tzvBIs2utAu1j;g@jEVA;fRE=8Xk=CggSeXn}WUFP=Li1JMv4$o7jKl|q0 z)&B9!Jv8w)OM(l!h5feSZ1Bu9f=f88L(+VS!C7)NEhHIn@!WAAfAfRG=L&aY+;+t_ z>vIl?jjKJ)K`9H_wG06^l*F^!-_YhhklM+RV%^@%$UV%bC}rF{p8w(Gqar3c>Xg5$ zx#q-Sxeg-xYdddOY-V4mZ@U#KupGIf5IgFd(RJo@owsHFg9Xt4PUD_N@F`bXN)SiB zud7U*XHcjwu!v)aYiX1EXVja3F%ntu7VJ~Fwm~QM>9cf(xc{5!&zNa;V4edMKGu11 zGS>pVDvQ<0E?6!njuIEDYUFr#mO_mdP+ zkE{EbFR4){!OEv8w{cuC0@co&eR~1l{hasL|GrujzH_8%QFm-|`c^LvMpqv9{kDEa zu4Jpi3(Ml;mVuAURT-DHHiUQ0pO;U?M3>hKsm1sr4}NLpYjluP%Cn_4O-HvW`7uG8D=ePsWSEm}#{8af$;Y%uYC&6It`l0s0ynHy80y8{hq`k3n;Z z<{&xNx2XQKFPk#y1~0Ru6U0>H@eO79IljgvN=pNxq%V$3g<-d zn0J#I%^?>HxAtH&x|9C9(O729W9IgtOES5y{9drqI3$+3G9b363Im^1IgHfle||J1 z!8UNv3ze%)@rn0NKebtTGw`5kE8`{Y&rQ8DnO`F}X4mYmav6vu?}^5rT=Ueru=7QN zwMW!`l9yw#-uNifqo+6^@6#)+qg3AJJCd)z8xZzu1;$-yU%$zp zo#DR5el%qKg=ovocW@oGB8vyX(BZH~#;ICyLkB67U>~uwS!Qb~| z(;T=@in)-Kt zET1^Kjs4x*yz_hJbLc;+RfxJ!+A&%Yrs_)}J;=AXw`L`9@a=yz1x&Z> zssVWxz!jfHRzEZ2#G96M9#8{4Mq!0GX1SJanJI^nj8ERePUha-)5%p6Z`?V>>+ zMQ|}L33{)hab|!1{HG%z|FzMOYoqHO$JcXhSZSX2<%z#G8@HBdleBmc_8RadLJuvo zDx~HmVV5DL0?2yW(FLB`tW3&Y92GX-cS;J4FtB?ir$i@z{~+ym{as1? zh0Nzi|9vuNYql0F419XZ@T=q>R#5Wu0b6hyd#{63%MpD_SW&~%@N zo&d<38u6o>b+F9Lv9*N|-Z^MMZYdg5jLFI|RUu;6){Nk1v-xlQjG1wEZmnKDLZ>yA z&I_f$?BuVyj0>Ir)h6qL|KN{jv{k++a)0THHH>bJpD6cPI)3o*H%0gtiEU(jAP;^j zu<%Q(fO>fdS?898kfFwyiSkMhq&ipLFn_tqY(_`pvU^x|xP9!XUlh2VI~9=U|H3cj z3;kyVJRls)1q!Wp3{cHty0Xd~wkPedmAs2p9~EWINRAxWzP0q`dxUZoZM|v;j#$#K+Fzn4& zI_N4a4!zQ7hpJXtsru}QJ6%7b_&CfEySG5tpe$|}H`m>OjCa20F@YRUb-<3 zUrtx2YZ|*pZ=UlP-2L6fS3my2KjMq}=cLMy314dga(Um+(mwaDG|&i}oX*06@)Ozc zTX7aE6hM5w@&`9I`xj}shZ0V-QVg*t#2?2eAE)0 z2Gp^b+;-9d>dY;^Nc4$WKP~OZ+wk{Rfn?~s;S6ke(B?q`);%ss)w%XzJEhZ8{wsRc zmR;K2e8a0w3-*Q%;z5f09AeeHq_|JX8ZeB-b{NY#3pTJJ?Tf{XYQ}T+a-ezmebOvKIQ2je-uv_n-_^(UvF&d}lK4vtMYaL)~^#J8A| z>A7(C4Iq!Hp70?c?_H-x5scyfoBI0c{*VD9pqm1tnB2E}Y4b3Ve5DIHxe2&CtM@X| zm(el;IUz|@$XX{Y0Jz6#6P35}uNs=a%b|ZpAoN%EjKxdXHl9V9jBVJhepO|@Owee?V4X1H-`R zri<8yca_A)XO)Q~!6Da<5o)dp@GoQNRE?XoW;1vy?R|CbyKX(l8Cm4xYo4hsEjwvnZyBQV?$%4&R zgo{jn-B*Z=DKzdYNG+&{Hlt}$d=t2=;Nk-E(ph6O`of9+Fj$*0+SNSyd1iN|pg20r ze|j=*Z(%Oa_R9yti|G}a#L{EwO7bL)A9l@V#`3^B=afhk7*%)=vpVoYpx3^1yGZW8 zvcy*3JV>nL_R8YL1eAIN zLFv-F%t#do2qcuyluknEgpOkY1PllgdXWx+1W8EfSP%k)9s-0=q}PCSY0k~enc|!? zbKf(+bI!fze(sw;vdh|gt@S*s>}Rk2eZEfxowa(7uwwn`JY^g{+l1*4+t&RiAAn!m69cp>ig)0uPu#1e3E1c{hW ziCs3QwFZ?cHl!w8o!WmMWksv1_H52W6NcD`C7W@Bm{O5j%m)q@0|bO`GV za&3pH?E7OEjDJpsQp_FrSayjn0X(~g_+>w?t3x3-arVj@Pg}`nc~AJhlr(U^oQ2$r zpOx82t^U%M4e7F%?{nFyUI66$v`9IzP%;A4UzHcA;?Yrzv*LQ;OR+7B^U0&%!#@;S zY`3KJQ)(Cadb0_}$ z?eC!T8JAp+zqlDF`5`O@KQ%}uXlfV0U3GLHa#30orZnPfXdIBSg6W^ID77|8xn3S4}eDkDVlPVGyDsqH7@1!aq)?u6(38dLDM zQt)wovF%dVR$9r`gX;(f zglY*du)oO*1s{k^j6EjzVvsnhWFimRpl+1-+Y0?lFVkt)Jde5o5@YA8+>kk@zL^K7 z9rQ7lTHU>?Uo`|tjPi8kTk>~$%r3lfYRE$qwGegq&XTc!K@1yo!qX6kR&pOw>yyLu zXcD=DhgU^B5>N}%<=XiaQw5$1vAKYt0;+yWR<3FQ;6O*o76nzPX;e@;Yk7BNVswsY z?zxkME&TENm;#;K-_)|PWwEvu`-b|ewo|1`sKdH}etNpjb3}2e2p1Ad_C+{7!rk{x z4M+zh)dr2Q68$EsyeR0MzI53YUH=_<81{v=-DJA5F|(Goen3KmVjo^cWFV2ojm{ZxNS zz-?f1s9(H!Wj6@eY3xKgxkj@)F@Wz(_Y9IXKZ82L(Mwkr%)Uw`%2{K`@ixrL9T`!xpd2>q zXn|2i^eWC9W1u}>*4@v<^SKX_%NVuI$V8OCgnO>~p5MHUO_5Z&6}XzQ6Q^8SK0dWI zI{i0YDs#O&Ir}MLHE3K;ezOStpoEIs&xf_pQ{XuKvwqcE&*#=! zobfo+T)5%!2RFuI)_p`3%ndD6iN&L}MJ6NFLKQ#(r<0PT`pS?oWm%EqJPt+IE&_h; zbANV8`}ary{_h6-zHRQ}90%&7?YG5s5%I4(ZXD))LEkMCe{;4>`ZB6_^TNvmQ3&wt z)6)QcO-tj!fz(8>Mko4w?pa-K!pJFO89?S%Uoc;l;HT;5@3Gs*wS`>c`V~-KN9QfP zZCqYUhqPubN3<6uuymCUM+!S3#baqz&gYGxkC&Iq%-J2Q77j;fvO-F6c00YVsCZG6 z@*%MRr%d?l`R|VvaDIC~@~CMUy=xV+km5MVa}TSp;luyF(osIzf*>w@5r{Od&+2O8 z@V2kw3d;|DwDq&je!D%_T&4_2w(@C#4n8cJxL`g>o3h#zlb@7GF4wSIKMUe6x7LwB zYL7aT(y5l-JzC`{X=oa>tlSb9JhF-YYj}{W143d%d#@B+S=A(n8&~mzn)!}>^M<#6 z=+1ik&N7_Aa3~S$HYl1j!&&WSS}wlwkeE2)OhdDO)i2TzH_HsMzXf~RQG>m-p7nJ< z&y`j`-^<}dhw#~uhVfOSnV@dpgr?C$(w$9VI(CdKPVzwDcvx0V`gjOwPDRux&q&gZ zN$flA%TC9FfP620Bip}ZwCqdtS=qWzW8KBfCadI2z+2tCFcJC%_V9T~yGL|=dnNwF zL771@H+I~t>mJ_M9#vN55i^(cez2Z0js|FfEKo_E=4J4mDhC?yrcg;_7PB)PA~8r! z{x}mL8;n{veCMxH+R|i#b zaYe_pUWc_9I2JVpd{%Ahg+&A)<>E z#DCTiE-KAiK!nBj$wDw7)$Rrn)9{RSh^xp#AA4cUJNRG{Im!))W-?$qQ+hU`BO}DQ zfS>L4Pfg2b+8rh*$y@6(Rx?jlP2^)lWZ|#X{Y|@nc5wV3F{F&tKN#^BE-deq_@?xs zZw+kS@|eEHLL5u;c({?Ax`El4(ZN&Gl8TOmb)j|LE*V}>Enf-tZ8w{x{_YN@MJuEa zovU~7Dz=YYmP#R>(w)`TX0Y3Ogk)qzW{&F53#F(@ZXaS-9~D^SVo=%)x_w^N94#5s9zl0=N>MI9Xee% z^cv4o)Ms=dBpi?S)`88k9NLP0yOV{647LNeiJBD-oLR8~_V}j||Mb?ppqjMg`n?9~BWb+%O;^20(_ZM5a&be0 zfDMT>kvU`n84SDm~?fynV zf!g&cDX(~R#|+Qm8ksHxhB{5(AYr*e2pOMD6DGtxF)9@_PmcPm?(xlpt8x;eam^XR zA6F%0312V!qo6>=$EXZSp{U+V+hr<%9{_-Wr8-mJ&kb}^3`|e-$7}V+l+>5hT!csf zI?JN(qOfAMcdezMi+6p^PiPq!I{enwN#+9x~el_H?u{&MLRr)45AUAjIlxm~S>~Z-O)WPgJMQwJHbJf=N z@V`K0Uh{q?)ONl02F(L-uSeRdTygUfd_C4~ty&bLlJKow-)ITP+0gkJ?8uBxp_OJ) zVw%(vh7}5Mn;1(`5#^Yh9a#q_u}1)VXEqUx@9D>2 zI_1)`e7A6PgX&g$r#aFW+^XTSNu}N?;Z38H7IaTNm?~#I`Z3 z5&u$q|mcEOeMVGrm%81dQ$2dDr6OKSESjYmc!XMA^rbz&&(4#4L3?)YAMQ z*$WFIg1$w*eo9v&6}tH4g1bX3OYvUTLrOdk^MpmIaTrMC3p>G?pyV**3`cZ@@+P*} zwiv_&qIjX+T|y`ih`I|MEGh7+z;5&lYv!H?2tdPqfoO7k$kw%Y|PR`I0jFhiwpft0Y{YrDU!{(jn; zE*Z_b5jW4-=M<_{JVPCi*S3xye7HNrh+KAxbKWgfHzz4TaWGbNN?cv2d+U?4Cr4mU zCz5y3+aC0$H9JQztsfljTYh8Ij3;%2M?cZpS@b&Wj134#h!M(Q5FqHq6JSA?hG$Tv ze(qY@c^%u@d8k_B(yWL4C=^QQ5`(L~^mZ~^G}nZfpC2MoOUtJ~DzDaG&dCu}%Es4fi#p z>Pc`5D&0R6k#GyDQD_BPqJgcc`&W|}=mF+8u z!+iJ4^ZQ?0>Nn)|`2LuCZ52B?Fs}2N-vvFNEaQ|Xj={N-LJpVWFYk)=Rj_voW;l?A z`GF8YR-5XZNeui9rS78*SL7;)Y;qG+5-i1yXy$FYlxc(hl8ldL1r3W#N2MC}NJ#xWwoyx=% zA(XWc=3HsW0d2r1M;Khe#wRRG+0Ad}%h3#mu+7c|RnM11p!u#S&c7l^`>om?J>QwAMu zxXfRF=|8{j{N>)rg>@a*o|pNCM_XcZPOFm5{H=Ru3U;$)zO2>Uxoh=GfURk(Bgncg zR`J*|Y3wdYi{rs!UTrV%RA0|P?Z`Lq%CI+ZkQIVaDs(Fp~zC}Y6Qh@z@OH}GXh z8)Bjg)1)sJ%RXe^;)=a|HH3|gyUR8R!b#*`)WKu%6_7;+hbjE2*w#0;P?6o)hqUVQ z+bmWRx2F(^{CW5zNh2ni>=wXzhl#V9M4>1taA__;7hX#K?w7_quC{c)Sm6GZg!|6z zqB=8r9yC?Iu$;gClKz{WAd=qAgdxwS!TXLE3uqejVq}NQ@Ncg8+riIJiJL!pmf7S> zv2}d&I~V?VitiuM&y4eb&~-0dh6)MC&+WS*?WrJqSmhO&WqT`eMS3FD<4J^KmDZy#XzXIT3sz;`6v0%wpzIIP8k5$R!MpQr z{l;X=fdtDf1Kox9h|1Q17UObzMzA3U8TXqKMl2#fH`Ai!G8A2piWb=|M^adtEO|?w zU=npi#~HSQrnwTxOH-4a%Y31d<*OHrq4$B^2HV|DrL5ZCMlS3y!J9_8O?S_t%IO+u z-p}gRYDM1D8;wPkt&qhJ{l~dt7D|>6>;#n*tIKlSz=LvMQZ^(cpPOpeF4cPk;}n!g zL%x-uF%^k-Bq1PXHk%~WQVqK#HvIgg4yr7@wTNsH-`&Z0y&mPvh#CwjWcXh5JU$SW zAr*5>T}cf8B6kzB7H8*p-twpheL;2nLx7$4(sTYP2M2#Zs#Q`M9@Hiac)b!sRjZQ^A7a;(n@v? zLBrJ@s#|vRRzFU^fBXA4Scfp3D)!FQxX$NQ7eLr7iIkL?;9ORr`i8ITHgeU(6x0MPu~*|{Sce2fhIOdgD;LX(tU)oXLvNkC zNByw6?Q8Dz)EnoP-5T}VUye3g$$o!~x4m*Xb2%jWpeuR)eiVCHojq5zJYEOM(~%3z zE45kr=%V6k2@eHIw{sil5afV1Ew87Ap^zMtR;5+5vga)E0p4=c7V_}nR!Yu7-2HHJ zXgl5g&HB6#xA)wo8VQp3OXvP#D^%?{T^;Y4w(1*&rjK4Vjj4Rfs=RCZ#?-p9d%#yt z&YC0OqEd#5<&~nIXb`5`DCMd;!q?9VHDMM!?Y=42u$|pGZeca1ZrQ}(%JYczw_574 zhGjb;TyP4>=X1&@xD|&KwCd2?E*SY9=~f&ygLKMh&@+x3nvpkVwvQX)a!GTrTUC7x%l?Th6T> zqx)7Bk%Pj?clnBQt8G@Ab6I7*}8MLQLUw<*1KA z_U0%TvZvQ6nZn~!+_lB$66ICbHFr({knhAU(X6GPiJD zjgCO=EOh^E=QcOlHGaB_b5@JG%hRbisi3rx(8&EN)dL z)%&f7@3v-0v>bWTQ7&#^yirDF5(Bi!@g8lCWA)8J35?#xWq=B1%rgwtT}F=Ari9FPpU9@LW|ts&v2J+Oyw`@N$r>Na+Z@hRz@VcG=}(fGnex6$2LzCQ4Zh^0eB?A6#utn>a)0iWqILH*};TmGNg zExvm3)6&hi!TA|eb-oXwIk}@G`6jE%2K%0Fk_9^Z@tV2iSFvAkSL|>m%RNHi4OpH%qO>V`f|q^R#xc>qnw z*ctB_NSUKNn;xQt>f;lS9BMS{Ei2?pSNILMvQUeES^U~Vey=i9AEDYYNP7ahh-AFA zKtG1(i2Q9N9y{qk9U_-OCY~$7kMfnTUuZTw{&!!f{^WB)?>sorKf)Ry4i7gqcd?l)ocL6LcP(-Ay*dBpI=z*=gd_M;QU^Xlwe zhPq*XfQ!pdv_x%>fVu5=+S6|i?|u_V&h!R)JlK|Z z9DDAz2Aj4O8{1aQ>Xd83y|k5`|2)sYp;(15!)v}wp5-eX2qhw$_5z=&Gk_|ey)!O1 z)iq%n%`j#Dy9zs)OJ235&)*vuQ*N>$__tpHms7_D{A)XHZ-0ko7T;NUI3ewh%+0JM znk5TJGs4B|A;G3ih0uwMbNH2c%{cEQ~O6T+L1?BgFVjqh5lQWCD24K7cS@7YZ zJASm=m2N@Vo`J9Kvd{@%nb4mV=*7Esi4Cd5mLvpZcs`8LdUV;tEXH3B&DsdqUo#q!A@BhzA?a$!$@Ss;09e~{T za-2s_J+qKHr*7ue@aSrzZrhEZbKsR7(txE*vGP0aJRs33uGt@CLWbG|)XxkyuCv!s zaI^WziZfavsU^`5>60U(G(*oAk2sxcDchuL zC6`{z)ACg@Zt*TP73nYRALV#F-KZ^6%bse2Jcv0TRwvUjm%0-Yvxho){hz*#mya60 z?)GY&P*6XNNnWNDd63N`n(M$W+O<}Nxs5!1aI+fny}B)z{-_KSQJ_dw-g9uEALhfA zvIqM=%`!i2{-tHAqT96ZkNJ7(SF6%FodO3&xo&o)Qn-?h-K*pt3h#4)s9ziqzU{iAFGB_M7F)(pI;7+zp|=%u+<&CY2}kk0Et!3w@|9P&X0U zp_DbAykj!MPo9r znC2FA>#{>fDGc!GLSC|G;(J_3j=ttHvMH;*#^HkMkW5f`f;YGLtY=Xs8%%ToWdq>P zYpihtps&Yn%wIE-fn2$r9)qxk9~L&x#!1X`>O9-a5Dwnca8KoZ`Uu6sBjten5JmSI%%rb46mN@1IP zZ?;^~Q3GG&-vo5n{F@j&i=Ivr-x40LqZJq~58{?hT8p)jjo%Kd*LJJ)k9SxMjQk(B#wHKIPw&T=cKzs2v;Y?t5BT_Q(=g zHfjN*g>kG6At`Okm9^$UXs)6S@*8x>(W-nO))@yT>dn>imPKCLemSN&CS+o+V}a4; zFY@3D^zTm1G?x9TPyK0t<3C{X85#Z4Pq06DH{zRYr3F=A4ysZnmza7~O_>lzVQwfh zhLIh#$PJhY?XZqwH&gehh|S*CKPIJbk%a`_}%ogAm4LyPU?)1igeOU;W+iq zQ47hg;RaVt?dK$j7Y~OO_UVG5z)}XWF1WvV^57v~U$?$?kN}zYwYQ!0H*ZcUiC>LDg>K`*)Xpr)#`z)9{cYvlWU57KtLbsJSWm4i3k-9*(v?B$%6jvX z4TnNu6Gb^2DEL}xrMRNlknUpNQoLrf95cL<)?(`Sz_pKLHh2{>&;?>R=R{5kziryQ zSFIpIySC%Y!1#WZ_VyTQYdsBDO%f(v-QWaw+1WWnhI^a zRpy|@4IdhrY;L#>#U_i3_TfM>Pp8HfBw+h3Psg(7NpHe$M{A6tc1 zE?I`=n6ijyuiPM>^OIvOFRQKdP4cSCPJ8+4gpUIrUkD0C_uupJO!wNEXqX8)F(pyD zQZfofuP*zR_TnQXTlknUGn=EwUcFi_JDbx;1?sA*Mm zQS)`EbNM%Yw3U!nwZs|4eJnTcd{8v!P0141N0uUOE2;jAY5lAUBJUieM>8U@`rgha ztcyv_oil1({X*m$~zCIly25=`dmj$ zhgo(6WVD#-eP;Z_A|p^+Bj-ma#}K_B)P042n+MRp;Lq_NbN|_9m;nlE?O z(-}G#7XDnkYLx0R#w4PbIB2M|Ya>4UC zDhOVNWmyEa)~g(K()K>>jnpX%?I=g%`)*t5dA_FnFwX}%vE&{mL&bBrtHA4r!DQIW zVR!gPc!qdl*GX|qpDW&Kx|-z#b_n*6{55)qfvVRx!1jMl0~AH;xiLb^AD{W|rzh&) zZ~hz$*lZ!6?sD}6R2_T91eUu)?wFNX`THCh$NvgS`J6*JSIEk2-kw;e^7qFQZKS!g z2lw;F(TX<3y|eC#8K}e+737;mvjYuZ26;s8Lff;MC#xo<{$H7g`k^;Sp8yy@(E(>~ zX>!p(G!?4@;4Ldvk{{VEOXF1I&`4z<%KenkHmAa^Z6g~{69+;n!JP$>$!%%1eYZhE z`6xU*i;KAeX))73RK`inn<6U7X7f**QRCkonXH)f&~Cf9SGL1 zGq0v`^+_KRxgj*Ek2xrt7|R1UE-@dQ#*v1{;oR<09XkgO!9sSa(+qn~opoEkQePb= z;1s7{U2py@z8MjzWIN&RsY%-8UhhVEJXmonw|HExAdQ18<%w8}HYRVozNCpOY4at4 zZB_w~q?urL?hg^utgLWsvR?t+_Rhe1kT9!gxE38wyzo#m`4v%T@_@#Z60Ke+* zAm?|RP#u10HYBXFkyWzOyA_S&i{39BF3ce$V2OgXv9dOu$<}7>3QvMUp3`2OB8A|v zt`4lfh}-Fxx0tC9!3TYHsQBO1NxaM4iBsXFaA6mE!7okj{gNn3qazm5!X4t|V5x^j z6p0qGqbfwQlEgUO_Llm2`CnAvOAmBj3?~Ie?C?Y^j~hRc258^i3|%0b*!C1K zq%Idtz-YFZMpW0a_z>?|V!;N*`51SYxw5h)W#fGYWO&9=Z@by8ygPXE5xDH3#1ZVB z@?+)x-f53367tiKyxu7eJ>j#r$lyX<6DR{Xg4uQrX2L+z;~;r>(~H0nplbJAZJe1J z8!sao#sUyX(*lr>a*BTrYjIp^1+a8^C4R$Vi**NS|J@-V{~y+7e>=(j8KC_+as1%G zNDppym#JzIuJr}!lt9(!s)E^^!;XGO4)VNXVBcVr#LGg(wb4uO@Uu~SaY6NtgBFUnF5Du2lo>c++2DJF=Y5L$ zkpHv-T46=HcDLq!>hv#HNL5N+Ipi{4=%_gJc^>irr$>+fx+Sjuh~dfmzZv}hN(Vtv z-|Q@mXn?i@n8*yeQoOZyXdw`8>3O*FXjLQpQj{B?n;HTVrt^VZt-LJm(JT*=%wosi#-%Fm8|L#559Md3Oe zH8t2~6($TO)l6GR*xY{rGL|b8qLy=@z~2eyD3D34wf&J zk4cAIo^`3!VI3c9Fom(gF4lM;vW(nq>m}OC4BQ}SPFK6?8Nsxt@culy5**1vx+Oo; z{#@Q5`uSCL3CHTO=OVxq>u`fK$(mxjge$wiYTK}RbC5U$4Jj=~^KRePJ+Q~?i9;-5*C4B|_k4B}b8gc{#qz(L+H%km3- z75*^oXGhS_L_Y=+dOdVN(8o$V9j!1K&5|oK({a*_f`XJ5R4#X6-90*6K;S`qfr?sT zq?>Kxb*F&b{ob;w4R!m0!A8fdE9qB3w9bb`t`0_T9T14kbFzN=mPuxY*5dp6YSBC@ zMGrv`n9G2PX^+S!HnI4Cba&R;^RYjd@2nQxsG*j?c&~XF-d}!)FS^R@b@+Ebr~c0N zPkB$ahxNPqEG8`azpWfCI3<>LQC5F^!pVf1M?2WXjKm_vlm?b6Fbk<0?`Y#s}+} zI_H#B3jr!_@zOmUydQq2%AD~ImssXu0>z-1jw}NX>B}2KdZG__1r+g%XM-pvG4F*i z4UuH zP~yZ%XA51k>G20D@_V58l}I&%wZX)21I#7&->JsmPJK>W&MP+v^`kY#LmG={m)ldg ziqhxJmP57O^cqFnM{O55#hgpQ{YdZKF+I~);DBv$+Qe;BJ3qP}r@R=_*ODStSxT61 z;=rSg#H~xBX%evsmijJs`ux3^5Qn^n1BHn?81kj|>0g3(im6f1xk}eN8dDIn`RwCY zsNnaUYs%vvJ5RQW#U6NDjt7k`tfFD3+qFR}4W8l`zFt9lF$H9KhRT2IXh)0b-t1n} z@rSw^>Pc5PO$WIKMma&hgElOOOk&USGq5`W*#W^SWmdO*jFBOdTqKQmDx%iMZRV{Q z$Q(IjF04iZ6=`Ain^8xvF_^NAy zm(8*34OOZdSo~|basS2-)DsB|ppr_DN+Zv&^5;(RATfWzY@*j9|5x~bN6Q<--zpL` zBHW7$1G_2r(DbA{pzR#wrkz)8mq~=U>|Af5{8Y&PN4DO!$XhEaw)7bc zsS-APwGe-AG4o^2L#5hnB#Qz!8+>@KR<%&$k{UC|!a8gAbcm(^c0Iy_+ z8puv+r)w<4I8I%?cIox{P$D3RC9se6e!XZxE$a0 z>BS2djhlpQp{u_}<-QX?gMzwFKoJ4bkL;D-H1sZ2L1sd@DP4)e%xo;W;v2cY-u+&cs0zxQTLYF{ueh zU8ARs6iY@BDR|y8w58iYSh_}=4!P8OXXI&=xn3Y7-?cCpsM;s_Z41Sc?H)n=WJ}y* zKd)M2)oOk@w$LgFJlO`4MqsHAF0eGym9HumW4U9l3|2Q3j5NP*x_ESo!^XOdr_wfIJHa6W#7Y8c=k0tj>|5v4kz3R46MDHa zq#udA?FXFQuPCxh>AZ;S3w^!mry?|1&xKlcRSyBxf)Vbi2>?Etm7 zKXEk+$DnLX)csw`#*-;zMo0%PovHhcN`(4iZ)U;Lu5G);;I zfEq>!+pK1VDpPdHm{1qt!-j2XzDgAZ>}F`6y1)FM%bZVKaN338tPE;UlPdzyWYKNf zs$9=WMt{^iyr7D;DzLhGJIWt#12eRyE8P@$K4QCSrk-}M_Dd_owdphurJK>=7YEt2 z2z4NlJem~(U0R$hgEY!_Cl{xFBGvi3zxGPqY9!fH<7pLp-4?Y0)+8}QamVO52ZYV4 zUup+u)wJRv`&}@4Q*xs2N19 z7*Q{YCkV2kl|e@L$;DAdT>xd` zs!ulLfwEc$x7#{LbG&dtU4I#1)+FXbZ=L7RLjpyw5IC{M2tHL*`tW`6IQ7Wo{qMBy z52yarituN7oZH3TFQMjTH;>*Q)BAGdp>ne(LqGsRhq4!Yrx&0Q=Z35Y3gu8 z**Q)+sHR(Ro;#!-7KqFpdoaV(aK706h^k0Vh2qVo6;m?l@-sW5oLz(>b##E*z1Rrt z=f%63{3l{kVR2yz_{wK*+Z~O{x4Ii`p;!ETdWU3CkcpE>vm?=0fYp;!-1`@PZYSaA zwTNs)=mlW~Lz5m|W(IBi;-g#u&%umMgx=?eo{3M+BDjh#%76m``4E{km3%BbyZFuiL zq{siGqPlEe?kr;{AsTd0{=Q6()mB=jhH}IBUE56J`q{3!50QVhPdOm7J(4%H^tu-% zDm^@L2Ib_3QUs0{LSN~^BcgTmwfCmY=kcuPKsRWbqgPEv$SLPw(BJ?rWCsS8^DXLG#Pb zO<*}9@rDGM2*g{BhK+lde}EhPz>TwwvlHi>o=)X(n?`}rt2fsa2Ln~E?J7kD?Ncj~ z5#H;aXRFpJ;%2*i0hv%$p63Ud#BT{#hh+>2m+?z%Wk$Z-LlQ2=#naVi^V1T`^?c5z zj)pY2mONNs>4N~c!W1GX-ELY#VW-E(*LFXWZuKSinw6s&a36T(@?>3EjX`07lZc2E z$oC~}b7yx70R6o0)g{s-n$_f?_X2wlJ-?=$OEmS_SxUrD+wvfz~qny?D zC|-6sQz2y$RQ9MLYo;G0SZl0fIN~f7GdNx7P1HlJ?m1mv8Ab_OTF(olFqJE}N6se& zvNjyK50SKMKF*yO3A*+-iIo`MAOC(eHGfu8;8Jo~NPK;_T=x0)qwag-;RNVxzICYY z7j#vBeNZEDY@V~{l;fxQ8=tqLav^GbdUX@w@(9ZyaN2b44nYKS;GC=82T#N-#g*In zocki|?If&f=KQ^FTo-)Ov37V&bIGbQ)f6%!FvecBsbFl&Y5Hoyv%hi)JraWoFl~IQ zT`N!KhOdu_G&?tcSt@Eh>*F!m2|xC}Ta(eXIqi&&WMO>KpSI?b_-2w&WBIE(?Tit? zSELDdy0PWPR5-^6gQ@l}Hl=WR%juEr&eT&1!&Q&J!^!3rHWa?DFHL3xQ^eY1yOJmL zl7gz!^OHVq^;eC5SOVTwxbv}VRQ-9DY&<)4M=RV?-QVasgRv4I&_W1HuuIl1ojpfU zwJA@otph!Hl~%Q7)lcp4TjpLp{shhf%v>|}HehZKYtt(jj3HDfn&}z8-a@74DUh|i zN+_zi=|MWbWXxHd#Ne+iN|>VEY^xv%05>8baen=(RxVVwJx56Poi5Y;JU3;Q4UqD> zxdp%tB*!T!?cL#Ls*@QsP`!S5f-k*j>KvJ5oG*1=Man@wIiKiu4m%QhRer=ZiTr8; z4gDdQ=GwB|xuQD19GA6UW`w`n`E!u)&u*3(Z~v6<920VZ5vIfcI;$(^*PkW+(eA0w zDNX-qis<{?>tYuFH;;!Y_Wve73EaI9RLBVkuS4|vOX2nVmq@tG^ya9k!EFD1lINCi z!px8b!HTE8b{C)$;T|zc%N$If>$i1^{AhiRJ;?-B{4QCo@S+ED{e5d53gZiZu6JO( zPrH?{sx9}UBLN-1$@DGe;MT&Y&S4EO0!_W2XUKW9aZ;W#-FwnF(W}B-akKE-hy`_K ztR1FwZk=9HmJW)M%|5w-)T79j>p?7+W(G=WSwfd4V}#=6?j(ryy&l~&KRF#`6Q;qv z{+U0C;ZI&DPe%STzAKyKJv3i;99~{OxOqze7{-eg3vRJaT=&?zJz(un0|)j*LCeQy z=M>}APHJy0Ke|2#Q?3r>JNxKOUYRaMKFo=Wd0VUtf;nASR@?~^X*M^yGwUwV(w0gW z2`-7mU19mLr~Tp6_J`DSM8c^SE|uXd+ywoto2&Z8U&#l5znlJ<730ry2y}3GDvE~4_yixHqF<`;*%)cfQ+Xc*g&azYE{@ zzxwXytNvije4X(*P(3TUg56_3({N54@ZSYG_w7CqUv;As8I+2+BJbpt?~gsBR$43A zRK<$6S6rA;6kQqdJ`A`cwcI#2vneH=#xf3JB5+|!#pSEnG(81HFx#TMSPE{YAX#27 zW>-$ndL2Sl;hhtakoe(ENvP`MbD=Az{PkpHAX@%Nw&S-$|cx ze)8-B{W!a=Qw;64N}?!dp&lNdg!9@g(7bpYs?3}}^LGH5Wr(dr@lFtR*=RPb2h z<%_!c)2@0735{zI3IPk-g*m(C2nMfzA8&?fAyDUdsZdJ(NOLm zKT`kEBlU-V_m|Y2*B|p;LRbkQM>PQPQ&Z_)d@p|PkiWP6KQ%gkFRy<%ck27k7zg=^ zG_EH$ua6cT_+0i*5hiPtpx6vkdQrzn=9wPLj>$7_#^GN-aH^X0bjngjSSOBjMig&$m@9Y1HQx}$_ z+-gU84FbKKZZ4JSdEM#Qeg0}|GZuY4vd=IE2{w5EVs$@iW;Jpk;4R$=2lhi+wU?2; zAR#;ove{jGWr-hOC<-(lf@rcd_rNP1 ze4$15z4|x*p!?5G{Hayi&+hxDb}#%c$K3DV`rp<&e~*FX{~I+Q{Pi*O`xXA>?0=ii LhyFiXbL#&En8Mb4 literal 0 HcmV?d00001 From 40439cbf5f26623f189a237ca58f29394a43649d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Jun 2020 18:53:48 -0700 Subject: [PATCH 57/66] Integrate LLVM at https://github.com/llvm/llvm-project/commit/4d1fd33561cf PiperOrigin-RevId: 317982154 Change-Id: Id9ebe544371760095fd1303b740760d46bf65fdb --- tensorflow/compiler/aot/BUILD | 1 + tensorflow/compiler/aot/compile.cc | 1 + .../compiler/mlir/xla/transforms/lhlo_legalize_to_llvm.cc | 2 +- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/BUILD | 1 + 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD index eed796b4ec1..0c959e327a8 100644 --- a/tensorflow/compiler/aot/BUILD +++ b/tensorflow/compiler/aot/BUILD @@ -69,6 +69,7 @@ cc_library( "//tensorflow/core:protos_all_cc", "@llvm-project//llvm:ARMCodeGen", # fixdeps: keep "@llvm-project//llvm:PowerPCCodeGen", # fixdeps: keep + "@llvm-project//llvm:Support", "@llvm-project//llvm:Target", "@llvm-project//llvm:X86CodeGen", # fixdeps: keep "//tensorflow/core:regexp_internal", diff --git a/tensorflow/compiler/aot/compile.cc b/tensorflow/compiler/aot/compile.cc index a2cba5cdf9e..fe0d6d5a074 100644 --- a/tensorflow/compiler/aot/compile.cc +++ b/tensorflow/compiler/aot/compile.cc @@ -22,6 +22,7 @@ limitations under the License. #include "absl/base/call_once.h" #include "llvm-c/Target.h" +#include "llvm/Support/ManagedStatic.h" #include "tensorflow/compiler/aot/codegen.h" #include "tensorflow/compiler/aot/flags.h" #include "tensorflow/compiler/aot/quantize.h" diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_llvm.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_llvm.cc index 78a77dc3b4d..99d2c08aa98 100644 --- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_llvm.cc +++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_llvm.cc @@ -129,7 +129,7 @@ struct DynamicMemRefCastOpConverter void PopulateLhloToLLVMConversionPatterns(LLVMTypeConverter *converter, OwningRewritePatternList *patterns) { patterns->insert( - *converter, LowerToLLVMOptions()); + *converter); } } // namespace xla_lhlo diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 98043c1658b..f2d0c028c5f 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "f1c671925b1c60ded3e4e7b3c6b1ec984b2d9b93" - LLVM_SHA256 = "57fc8f0ab46bdfdff52b03c2196d658c094bc4179cd1cf9495becf6a8466123a" + LLVM_COMMIT = "4d1fd33561cf758be00bdbffab1b6a1a0e428fc0" + LLVM_SHA256 = "6d9851ea3c4ff3df57865e0cafc9793c983636cdb6dc9ff3df00816a778e9039" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 8fd0a94bf64..ba9b580e53f 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -1507,6 +1507,7 @@ cc_library( ":StandardToLLVM", ":Support", ":Transforms", + "@llvm-project//llvm:Support", ], ) From ef20289d996f85558817fa6aaf1d4786eee0c527 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 23 Jun 2020 19:51:24 -0700 Subject: [PATCH 58/66] Update LLVM OSS Bazel build file: add rule for auto-generated file PiperOrigin-RevId: 317987718 Change-Id: I25126724e4f335438b9ac66f6d02a38be1e3782d --- third_party/llvm/llvm.autogenerated.BUILD | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index c70ff559165..50ff746b9f2 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -561,6 +561,7 @@ filegroup( name = "common_target_td_sources", srcs = glob([ "include/llvm/CodeGen/*.td", + "include/llvm/Frontend/Directive/*.td", "include/llvm/IR/Intrinsics*.td", "include/llvm/TableGen/*.td", "include/llvm/Target/*.td", @@ -666,6 +667,17 @@ cc_library( ], ) +gentbl( + name = "omp_gen", + tbl_outs = [("--gen-directive-decls", "include/llvm/Frontend/OpenMP/OMP.h.inc")], + tblgen = ":llvm-tblgen", + td_file = "include/llvm/Frontend/OpenMP/OMP.td", + td_srcs = glob([ + "include/llvm/Frontend/OpenMP/*.td", + "include/llvm/Frontend/Directive/*.td", + ]), +) + ########################## Begin generated content ########################## cc_library( name = "AArch64AsmParser", @@ -2053,6 +2065,7 @@ cc_library( ":Support", ":TransformUtils", ":config", + ":omp_gen", ], ) From a4f7dd5436885a8ecdc6ac34bc4689e7e04ed2af Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Tue, 23 Jun 2020 20:04:29 -0700 Subject: [PATCH 59/66] Support to output unconsumed flags and exit the execution if cmdline flags fail to be parsed for tflite evaluation tools. PiperOrigin-RevId: 317989024 Change-Id: I52cc2249246b7d19c9c8a257ac1478d48f7de8fa --- .../evaluation_delegate_provider.cc | 8 +++- tensorflow/lite/tools/evaluation/tasks/BUILD | 4 ++ .../tasks/coco_object_detection/BUILD | 1 - .../tasks/coco_object_detection/run_eval.cc | 21 ++++----- .../tasks/imagenet_image_classification/BUILD | 1 - .../imagenet_image_classification/run_eval.cc | 21 ++++----- .../evaluation/tasks/inference_diff/BUILD | 1 - .../tasks/inference_diff/run_eval.cc | 22 ++++----- .../tools/evaluation/tasks/task_executor.cc | 47 +++++++++++++++++++ .../tools/evaluation/tasks/task_executor.h | 15 +++++- .../evaluation/tasks/task_executor_main.cc | 4 +- 11 files changed, 104 insertions(+), 41 deletions(-) create mode 100644 tensorflow/lite/tools/evaluation/tasks/task_executor.cc diff --git a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.cc b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.cc index 42f2666ba9b..fc40440b105 100644 --- a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.cc +++ b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.cc @@ -97,7 +97,13 @@ bool DelegateProviders::InitFromCmdlineArgs(int* argc, const char** argv) { auto one_flags = one->CreateFlags(¶ms_); flags.insert(flags.end(), one_flags.begin(), one_flags.end()); } - return Flags::Parse(argc, argv, flags); + + const bool parse_result = Flags::Parse(argc, argv, flags); + if (!parse_result) { + std::string usage = Flags::Usage(argv[0], flags); + TFLITE_LOG(ERROR) << usage; + } + return parse_result; } TfLiteDelegatePtr DelegateProviders::CreateDelegate( diff --git a/tensorflow/lite/tools/evaluation/tasks/BUILD b/tensorflow/lite/tools/evaluation/tasks/BUILD index d8daf170331..5272542f045 100644 --- a/tensorflow/lite/tools/evaluation/tasks/BUILD +++ b/tensorflow/lite/tools/evaluation/tasks/BUILD @@ -10,10 +10,14 @@ package( cc_library( name = "task_executor", + srcs = ["task_executor.cc"], hdrs = ["task_executor.h"], copts = tflite_copts(), linkopts = task_linkopts(), deps = [ + "//tensorflow/lite/tools:command_line_flags", + "//tensorflow/lite/tools:logging", + "//tensorflow/lite/tools/evaluation:evaluation_delegate_provider", "//tensorflow/lite/tools/evaluation/proto:evaluation_config_cc_proto", "@com_google_absl//absl/types:optional", ], diff --git a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/BUILD b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/BUILD index b8f77d72acb..dc5f8237f6a 100644 --- a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/BUILD +++ b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/BUILD @@ -26,7 +26,6 @@ cc_library( "//tensorflow/lite/c:common", "//tensorflow/lite/tools:command_line_flags", "//tensorflow/lite/tools:logging", - "//tensorflow/lite/tools/evaluation:evaluation_delegate_provider", "//tensorflow/lite/tools/evaluation:evaluation_stage", "//tensorflow/lite/tools/evaluation:utils", "//tensorflow/lite/tools/evaluation/proto:evaluation_config_cc_proto", diff --git a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc index 765e8fc6465..73491457f38 100644 --- a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc +++ b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc @@ -21,7 +21,6 @@ limitations under the License. #include "absl/types/optional.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/tools/command_line_flags.h" -#include "tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h" #include "tensorflow/lite/tools/evaluation/proto/evaluation_config.pb.h" #include "tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h" #include "tensorflow/lite/tools/evaluation/stages/object_detection_stage.h" @@ -49,11 +48,14 @@ std::string GetNameFromPath(const std::string& str) { class CocoObjectDetection : public TaskExecutor { public: - CocoObjectDetection(int* argc, char* argv[]); + CocoObjectDetection() : debug_mode_(false), num_interpreter_threads_(1) {} ~CocoObjectDetection() override {} + protected: + std::vector GetFlags() final; + // If the run is successful, the latest metrics will be returned. - absl::optional Run() final; + absl::optional RunImpl() final; private: void OutputResult(const EvaluationStageMetrics& latest_metrics) const; @@ -68,8 +70,7 @@ class CocoObjectDetection : public TaskExecutor { DelegateProviders delegate_providers_; }; -CocoObjectDetection::CocoObjectDetection(int* argc, char* argv[]) - : debug_mode_(false), num_interpreter_threads_(1) { +std::vector CocoObjectDetection::GetFlags() { std::vector flag_list = { tflite::Flag::CreateFlag(kModelFileFlag, &model_file_path_, "Path to test tflite model file."), @@ -105,12 +106,10 @@ CocoObjectDetection::CocoObjectDetection(int* argc, char* argv[]) "Delegate to use for inference, if available. " "Must be one of {'nnapi', 'gpu', 'xnnpack', 'hexagon'}"), }; - tflite::Flags::Parse(argc, const_cast(argv), flag_list); - DelegateProviders delegate_providers; - delegate_providers.InitFromCmdlineArgs(argc, const_cast(argv)); + return flag_list; } -absl::optional CocoObjectDetection::Run() { +absl::optional CocoObjectDetection::RunImpl() { // Process images in filename-sorted order. std::vector image_paths; if (GetSortedFileNames(StripTrailingSlashes(ground_truth_images_path_), @@ -224,8 +223,8 @@ void CocoObjectDetection::OutputResult( << precision_metrics.overall_mean_average_precision(); } -std::unique_ptr CreateTaskExecutor(int* argc, char* argv[]) { - return std::unique_ptr(new CocoObjectDetection(argc, argv)); +std::unique_ptr CreateTaskExecutor() { + return std::unique_ptr(new CocoObjectDetection()); } } // namespace evaluation diff --git a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/BUILD b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/BUILD index de2a7f96311..941bbc0ff69 100644 --- a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/BUILD +++ b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/BUILD @@ -17,7 +17,6 @@ cc_library( "//tensorflow/lite/c:common", "//tensorflow/lite/tools:command_line_flags", "//tensorflow/lite/tools:logging", - "//tensorflow/lite/tools/evaluation:evaluation_delegate_provider", "//tensorflow/lite/tools/evaluation:evaluation_stage", "//tensorflow/lite/tools/evaluation:utils", "//tensorflow/lite/tools/evaluation/proto:evaluation_config_cc_proto", diff --git a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc index 13eeb313ad4..fdc97d44abc 100644 --- a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc +++ b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc @@ -20,7 +20,6 @@ limitations under the License. #include "absl/types/optional.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/tools/command_line_flags.h" -#include "tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h" #include "tensorflow/lite/tools/evaluation/proto/evaluation_config.pb.h" #include "tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h" #include "tensorflow/lite/tools/evaluation/stages/image_classification_stage.h" @@ -50,11 +49,14 @@ std::vector GetFirstN(const std::vector& v, int n) { class ImagenetClassification : public TaskExecutor { public: - ImagenetClassification(int* argc, char* argv[]); + ImagenetClassification() : num_images_(0), num_interpreter_threads_(1) {} ~ImagenetClassification() override {} + protected: + std::vector GetFlags() final; + // If the run is successful, the latest metrics will be returned. - absl::optional Run() final; + absl::optional RunImpl() final; private: void OutputResult(const EvaluationStageMetrics& latest_metrics) const; @@ -67,11 +69,9 @@ class ImagenetClassification : public TaskExecutor { std::string delegate_; int num_images_; int num_interpreter_threads_; - DelegateProviders delegate_providers_; }; -ImagenetClassification::ImagenetClassification(int* argc, char* argv[]) - : num_images_(0), num_interpreter_threads_(1) { +std::vector ImagenetClassification::GetFlags() { std::vector flag_list = { tflite::Flag::CreateFlag(kModelFileFlag, &model_file_path_, "Path to test tflite model file."), @@ -107,11 +107,10 @@ ImagenetClassification::ImagenetClassification(int* argc, char* argv[]) "Delegate to use for inference, if available. " "Must be one of {'nnapi', 'gpu', 'hexagon', 'xnnpack'}"), }; - tflite::Flags::Parse(argc, const_cast(argv), flag_list); - delegate_providers_.InitFromCmdlineArgs(argc, const_cast(argv)); + return flag_list; } -absl::optional ImagenetClassification::Run() { +absl::optional ImagenetClassification::RunImpl() { // Process images in filename-sorted order. std::vector image_files, ground_truth_image_labels; if (GetSortedFileNames(StripTrailingSlashes(ground_truth_images_path_), @@ -203,8 +202,8 @@ void ImagenetClassification::OutputResult( } } -std::unique_ptr CreateTaskExecutor(int* argc, char* argv[]) { - return std::unique_ptr(new ImagenetClassification(argc, argv)); +std::unique_ptr CreateTaskExecutor() { + return std::unique_ptr(new ImagenetClassification()); } } // namespace evaluation diff --git a/tensorflow/lite/tools/evaluation/tasks/inference_diff/BUILD b/tensorflow/lite/tools/evaluation/tasks/inference_diff/BUILD index a53872b50cb..36606722caf 100644 --- a/tensorflow/lite/tools/evaluation/tasks/inference_diff/BUILD +++ b/tensorflow/lite/tools/evaluation/tasks/inference_diff/BUILD @@ -17,7 +17,6 @@ cc_library( "//tensorflow/lite/c:common", "//tensorflow/lite/tools:command_line_flags", "//tensorflow/lite/tools:logging", - "//tensorflow/lite/tools/evaluation:evaluation_delegate_provider", "//tensorflow/lite/tools/evaluation:evaluation_stage", "//tensorflow/lite/tools/evaluation/proto:evaluation_config_cc_proto", "//tensorflow/lite/tools/evaluation/proto:evaluation_stages_cc_proto", diff --git a/tensorflow/lite/tools/evaluation/tasks/inference_diff/run_eval.cc b/tensorflow/lite/tools/evaluation/tasks/inference_diff/run_eval.cc index 814ebe3b3bf..9a3fea0b8a3 100644 --- a/tensorflow/lite/tools/evaluation/tasks/inference_diff/run_eval.cc +++ b/tensorflow/lite/tools/evaluation/tasks/inference_diff/run_eval.cc @@ -19,7 +19,6 @@ limitations under the License. #include "absl/types/optional.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/tools/command_line_flags.h" -#include "tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h" #include "tensorflow/lite/tools/evaluation/proto/evaluation_config.pb.h" #include "tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h" #include "tensorflow/lite/tools/evaluation/stages/inference_profiler_stage.h" @@ -37,11 +36,14 @@ constexpr char kDelegateFlag[] = "delegate"; class InferenceDiff : public TaskExecutor { public: - InferenceDiff(int* argc, char* argv[]); + InferenceDiff() : num_runs_(50), num_interpreter_threads_(1) {} ~InferenceDiff() override {} + protected: + std::vector GetFlags() final; + // If the run is successful, the latest metrics will be returned. - absl::optional Run() final; + absl::optional RunImpl() final; private: void OutputResult(const EvaluationStageMetrics& latest_metrics) const; @@ -50,11 +52,9 @@ class InferenceDiff : public TaskExecutor { std::string delegate_; int num_runs_; int num_interpreter_threads_; - DelegateProviders delegate_providers_; }; -InferenceDiff::InferenceDiff(int* argc, char* argv[]) - : num_runs_(50), num_interpreter_threads_(1) { +std::vector InferenceDiff::GetFlags() { // Command Line Flags. std::vector flag_list = { tflite::Flag::CreateFlag(kModelFileFlag, &model_file_path_, @@ -72,11 +72,11 @@ InferenceDiff::InferenceDiff(int* argc, char* argv[]) "Delegate to use for test inference, if available. " "Must be one of {'nnapi', 'gpu', 'hexagon', 'xnnpack'}"), }; - tflite::Flags::Parse(argc, const_cast(argv), flag_list); - delegate_providers_.InitFromCmdlineArgs(argc, const_cast(argv)); + + return flag_list; } -absl::optional InferenceDiff::Run() { +absl::optional InferenceDiff::RunImpl() { // Initialize evaluation stage. EvaluationStageConfig eval_config; eval_config.set_name("inference_profiling"); @@ -137,8 +137,8 @@ void InferenceDiff::OutputResult( } } -std::unique_ptr CreateTaskExecutor(int* argc, char* argv[]) { - return std::unique_ptr(new InferenceDiff(argc, argv)); +std::unique_ptr CreateTaskExecutor() { + return std::unique_ptr(new InferenceDiff()); } } // namespace evaluation diff --git a/tensorflow/lite/tools/evaluation/tasks/task_executor.cc b/tensorflow/lite/tools/evaluation/tasks/task_executor.cc new file mode 100644 index 00000000000..e62793dc6ff --- /dev/null +++ b/tensorflow/lite/tools/evaluation/tasks/task_executor.cc @@ -0,0 +1,47 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/tools/evaluation/tasks/task_executor.h" + +#include "absl/types/optional.h" +#include "tensorflow/lite/tools/logging.h" + +namespace tflite { +namespace evaluation { +absl::optional TaskExecutor::Run(int* argc, + char* argv[]) { + auto flag_list = GetFlags(); + bool parse_result = + tflite::Flags::Parse(argc, const_cast(argv), flag_list); + if (!parse_result) { + std::string usage = Flags::Usage(argv[0], flag_list); + TFLITE_LOG(ERROR) << usage; + return absl::nullopt; + } + parse_result = delegate_providers_.InitFromCmdlineArgs( + argc, const_cast(argv)); + if (!parse_result) { + return absl::nullopt; + } + + std::string unconsumed_args = + Flags::ArgsToString(*argc, const_cast(argv)); + if (!unconsumed_args.empty()) { + TFLITE_LOG(WARN) << "Unconsumed cmdline flags: " << unconsumed_args; + } + + return RunImpl(); +} +} // namespace evaluation +} // namespace tflite diff --git a/tensorflow/lite/tools/evaluation/tasks/task_executor.h b/tensorflow/lite/tools/evaluation/tasks/task_executor.h index b50e7d6d03f..caa84283098 100644 --- a/tensorflow/lite/tools/evaluation/tasks/task_executor.h +++ b/tensorflow/lite/tools/evaluation/tasks/task_executor.h @@ -16,6 +16,8 @@ limitations under the License. #define TENSORFLOW_LITE_TOOLS_EVALUATION_TASKS_TASK_EXECUTOR_H_ #include "absl/types/optional.h" +#include "tensorflow/lite/tools/command_line_flags.h" +#include "tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h" #include "tensorflow/lite/tools/evaluation/proto/evaluation_config.pb.h" namespace tflite { @@ -25,13 +27,22 @@ namespace evaluation { class TaskExecutor { public: virtual ~TaskExecutor() {} + // If the run is successful, the latest metrics will be returned. - virtual absl::optional Run() = 0; + absl::optional Run(int* argc, char* argv[]); + + protected: + // Returns a list of commandline flags that this task defines. + virtual std::vector GetFlags() = 0; + + virtual absl::optional RunImpl() = 0; + + DelegateProviders delegate_providers_; }; // Just a declaration. In order to avoid the boilerpolate main-function code, // every evaluation task should define this function. -std::unique_ptr CreateTaskExecutor(int* argc, char* argv[]); +std::unique_ptr CreateTaskExecutor(); } // namespace evaluation } // namespace tflite diff --git a/tensorflow/lite/tools/evaluation/tasks/task_executor_main.cc b/tensorflow/lite/tools/evaluation/tasks/task_executor_main.cc index 6ef1a6724b7..97f8e263659 100644 --- a/tensorflow/lite/tools/evaluation/tasks/task_executor_main.cc +++ b/tensorflow/lite/tools/evaluation/tasks/task_executor_main.cc @@ -18,12 +18,12 @@ limitations under the License. // This could serve as the main function for all eval tools. int main(int argc, char* argv[]) { - auto task_executor = tflite::evaluation::CreateTaskExecutor(&argc, argv); + auto task_executor = tflite::evaluation::CreateTaskExecutor(); if (task_executor == nullptr) { TFLITE_LOG(ERROR) << "Could not create the task evaluation!"; return EXIT_FAILURE; } - const auto metrics = task_executor->Run(); + const auto metrics = task_executor->Run(&argc, argv); if (!metrics.has_value()) { TFLITE_LOG(ERROR) << "Could not run the task evaluation!"; return EXIT_FAILURE; From cb32cf0f0160d1f582787119d0480de3ba8b9b53 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 23 Jun 2020 21:06:46 -0700 Subject: [PATCH 60/66] change the size of input to remedy OOM issue. PiperOrigin-RevId: 317995769 Change-Id: I1358449e989a41c5621e6a4d56e603387be0490d --- .../preprocessing/image_preprocessing_distribution_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_distribution_test.py index 0b93c1d57c6..7fc2b42c919 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_distribution_test.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_distribution_test.py @@ -40,9 +40,10 @@ class ImagePreprocessingDistributionTest( preprocessing_test_utils.PreprocessingLayerTest): def test_distribution(self, distribution): - np_images = np.random.random((1000, 32, 32, 3)).astype(np.float32) + # TODO(b/159738418): large image input causes OOM in ubuntu multi gpu. + np_images = np.random.random((32, 32, 32, 3)).astype(np.float32) image_dataset = dataset_ops.Dataset.from_tensor_slices(np_images).batch( - 32, drop_remainder=True) + 16, drop_remainder=True) with distribution.scope(): input_data = keras.Input(shape=(32, 32, 3), dtype=dtypes.float32) @@ -58,7 +59,7 @@ class ImagePreprocessingDistributionTest( output = flatten_layer(preprocessed_image) cls_layer = keras.layers.Dense(units=1, activation="sigmoid") output = cls_layer(output) - model = keras.Model(inputs=input_data, outputs=preprocessed_image) + model = keras.Model(inputs=input_data, outputs=output) model.compile(loss="binary_crossentropy") _ = model.predict(image_dataset) From 4dc9422e243f47d8a31db315f8a7ee8e204750d2 Mon Sep 17 00:00:00 2001 From: Ashwin Murthy Date: Tue, 23 Jun 2020 21:46:03 -0700 Subject: [PATCH 61/66] Small update to the operation fusion g3doc PiperOrigin-RevId: 317999506 Change-Id: I676b755eebea8189b6068ac8785b92b1391764fd --- tensorflow/lite/g3doc/convert/operation_fusion.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/g3doc/convert/operation_fusion.md b/tensorflow/lite/g3doc/convert/operation_fusion.md index c8714179498..74af2a08b81 100644 --- a/tensorflow/lite/g3doc/convert/operation_fusion.md +++ b/tensorflow/lite/g3doc/convert/operation_fusion.md @@ -53,10 +53,10 @@ implemented in the fused LSTM operations. Converting composite operations from TensorFlow to fused operations in TensorFlow Lite is a hard problem. This is because: -1. Composite operations are represented in the TensorFlow graph as an - unstructured set of primitive operations. It can be very challenging to - identify (e.g. via pattern matching) the sub-graph corresponding to such a - composite operation. +1. Composite operations are represented in the TensorFlow graph as a set of + primitive operations without a well defined boundary. It can be very + challenging to identify (e.g. via pattern matching) the sub-graph + corresponding to such a composite operation. 1. There may be more than one TensorFlow implementation targeting a fused TensorFlow Lite operation. For example, there are many LSTM implementations From 55a1bd064482bbe59c7cbf8fa5bbdf7ee4828b5a Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Tue, 23 Jun 2020 21:52:26 -0700 Subject: [PATCH 62/66] Simplify the build structure of TpuExecutor. PiperOrigin-RevId: 318000146 Change-Id: Ib2e95583653edcbcf85d3a1b17ef8b58ab570dce --- tensorflow/core/tpu/BUILD | 1 - tensorflow/core/tpu/kernels/BUILD | 12 +-- .../core/tpu/kernels/tpu_compile_c_api.h | 2 +- .../core/tpu/kernels/tpu_ops_common_c_api.h | 20 ---- .../core/tpu/kernels/tpu_program_c_api.h | 2 +- .../core/tpu/tpu_api_dlsym_initializer.cc | 1 - tensorflow/core/tpu/tpu_library_init_fns.inc | 1 + tensorflow/stream_executor/tpu/BUILD | 98 ++++--------------- .../stream_executor/tpu/status_helper.h | 1 + .../stream_executor/tpu/tpu_executor_c_api.h | 2 +- .../tpu/tpu_node_context_c_api.h | 3 +- .../stream_executor/tpu/tpu_platform.cc | 2 +- 12 files changed, 30 insertions(+), 115 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/tpu_ops_common_c_api.h diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index aa811f23672..589af63da52 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -141,7 +141,6 @@ cc_library( "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", "//tensorflow/stream_executor/tpu:tpu_node_context_c_api_hdrs", - "//tensorflow/stream_executor/tpu:tpu_platform_hdrs", ], ) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index f69c97b81de..d82cf1b254b 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -1,4 +1,5 @@ # TPU Kernel Implementations + load( "//tensorflow/core/platform:build_config.bzl", "tf_proto_library_cc", @@ -86,8 +87,8 @@ cc_library( hdrs = ["tpu_compile_c_api.h"], deps = [ ":tpu_mesh_state_c_api_hdrs", - ":tpu_ops_common_c_api_hdrs", ":tpu_program_c_api_hdrs", + ":tpu_util_c_api_hdrs", "//tensorflow/core/tpu:libtftpu_header", "//tensorflow/stream_executor/tpu:proto_helper", ], @@ -367,7 +368,6 @@ cc_library( cc_library( name = "tpu_util_hdrs", - srcs = [], hdrs = ["tpu_util.h"], deps = [ ":tpu_compilation_cache_key", @@ -390,17 +390,11 @@ cc_library( alwayslink = True, ) -cc_library( - name = "tpu_ops_common_c_api_hdrs", - hdrs = ["tpu_ops_common_c_api.h"], - alwayslink = True, -) - cc_library( name = "tpu_program_c_api_hdrs", hdrs = ["tpu_program_c_api.h"], deps = [ - ":tpu_ops_common_c_api_hdrs", + ":tpu_util_c_api_hdrs", "//tensorflow/stream_executor/tpu:proto_helper", ], alwayslink = True, diff --git a/tensorflow/core/tpu/kernels/tpu_compile_c_api.h b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h index eab53fe9da4..e82df78b3bd 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h @@ -16,8 +16,8 @@ limitations under the License. #define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_C_API_H_ #include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" -#include "tensorflow/core/tpu/kernels/tpu_ops_common_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" diff --git a/tensorflow/core/tpu/kernels/tpu_ops_common_c_api.h b/tensorflow/core/tpu/kernels/tpu_ops_common_c_api.h deleted file mode 100644 index 987eb64925f..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_ops_common_c_api.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_OPS_COMMON_C_API_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_TPU_OPS_COMMON_C_API_H_ - -typedef struct SE_Status SE_Status; - -#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_OPS_COMMON_C_API_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_program_c_api.h b/tensorflow/core/tpu/kernels/tpu_program_c_api.h index 43cbe37d258..254527e7a2a 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_program_c_api.h @@ -15,7 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_C_API_H_ #define TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_C_API_H_ -#include "tensorflow/core/tpu/kernels/tpu_ops_common_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" typedef struct XLA_TpuProgram XLA_TpuProgram; diff --git a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc index c6666421327..495e6a2219b 100644 --- a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc +++ b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/core/platform/status.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/tpu_node_context_c_api.h" -#include "tensorflow/stream_executor/tpu/tpu_platform.h" #define TFTPU_SET_FN(Struct, FnName) \ Struct->FnName##Fn = \ diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index e21d7f195ad..29fdb42d95e 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -137,6 +137,7 @@ tensorflow::Status SetTpuNodeContextStructFns(void* library_handle) { TFTPU_SET_FN(node_context_fn, TpuNodeContext_Create); TFTPU_SET_FN(node_context_fn, TpuNodeContext_Free); + TFTPU_SET_FN(node_context_fn, TpuNodeContext_Initialize); TFTPU_SET_FN(node_context_fn, TpuNodeContext_StopChipHeartbeats); TFTPU_SET_FN(node_context_fn, TpuNodeContext_CloseTpuHost); diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 720ba6bc0c3..71c2c728a17 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -16,7 +16,7 @@ cc_library( "//tensorflow/c:tf_attrtype", "//tensorflow/c:tf_status", "//tensorflow/core/tpu:libtftpu_header", - "//tensorflow/core/tpu/kernels:tpu_ops_common_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", ], alwayslink = True, ) @@ -26,8 +26,8 @@ cc_library( hdrs = ["tpu_node_context_c_api.h"], visibility = ["//visibility:public"], deps = [ - ":tpu_executor_c_api_hdrs", "//tensorflow/core/tpu:libtftpu_header", + "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", ], alwayslink = True, ) @@ -38,6 +38,7 @@ cc_library( deps = [ ":tpu_executor_c_api_hdrs", "//tensorflow/core/platform:status", + "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", ], ) @@ -62,77 +63,34 @@ cc_library( deps = ["//tensorflow/core:lib"], ) -cc_library( - name = "tpu_stream", - hdrs = ["tpu_stream.h"], - deps = [ - ":c_api_conversions", - ":status_helper", - ":tpu_executor_c_api_hdrs", - ":tpu_stream_interface", - "//tensorflow/core/tpu:tpu_api", - "//tensorflow/stream_executor:stream", - ], -) - -cc_library( - name = "tpu_timer", - hdrs = ["tpu_timer.h"], - deps = [ - ":tpu_executor_c_api_hdrs", - "//tensorflow/core/platform:types", - "//tensorflow/core/tpu:tpu_api", - "//tensorflow/stream_executor:stream", - ], -) - cc_library( name = "tpu_executor", - srcs = ["tpu_executor.cc"], - hdrs = ["tpu_executor.h"], + srcs = [ + "tpu_executor.cc", + "tpu_platform.cc", + ], + hdrs = [ + "tpu_executor.h", + "tpu_platform.h", + "tpu_stream.h", + "tpu_timer.h", + ], deps = [ ":c_api_conversions", ":status_helper", ":tpu_executor_c_api_hdrs", ":tpu_executor_interface", - ":tpu_platform", ":tpu_platform_interface", - ":tpu_stream", - ":tpu_timer", + ":tpu_stream_interface", "//tensorflow/c:tf_status", "//tensorflow/core:lib", + "//tensorflow/core/platform:types", "//tensorflow/core/tpu:tpu_api", "//tensorflow/stream_executor:stream", "//tensorflow/stream_executor/lib", "@com_google_absl//absl/container:flat_hash_map", ], -) - -cc_library( - name = "tpu_executor_hdrs", - hdrs = ["tpu_executor.h"], - deps = [ - ":tpu_executor_c_api_hdrs", - ":tpu_executor_interface", - ":tpu_platform_hdrs", - ":tpu_platform_interface", - "//tensorflow/core/platform:types", - "//tensorflow/stream_executor:stream_header", - "//tensorflow/stream_executor/lib", - "@com_google_absl//absl/container:flat_hash_map", - ], -) - -cc_library( - name = "tpu_platform_hdrs", - hdrs = ["tpu_platform.h"], - deps = [ - ":tpu_executor_c_api_hdrs", - ":tpu_platform_interface", - "//tensorflow/core/platform:types", - "//tensorflow/stream_executor:stream_header", - "@com_google_absl//absl/container:flat_hash_map", - ], + alwayslink = True, ) cc_library( @@ -158,29 +116,11 @@ cc_library( ], ) -cc_library( - name = "tpu_platform", - srcs = ["tpu_platform.cc"], - hdrs = ["tpu_platform.h"], - deps = [ - ":status_helper", - ":tpu_executor_c_api_hdrs", - ":tpu_executor_hdrs", - ":tpu_platform_interface", - "//tensorflow/c:tf_status", - "//tensorflow/core/platform:types", - "//tensorflow/core/tpu:tpu_api", - "//tensorflow/stream_executor:stream", - "@com_google_absl//absl/container:flat_hash_map", - ], - alwayslink = True, -) - cc_library( name = "tpu_transfer_manager", srcs = ["tpu_transfer_manager_registration.cc"], deps = [ - ":tpu_platform", + ":tpu_executor", ":tpu_transfer_manager_base", "//tensorflow/compiler/xla/service:transfer_manager", ], @@ -194,8 +134,8 @@ cc_library( ":c_api_conversions", ":proto_helper", ":status_helper", + ":tpu_executor", ":tpu_executor_c_api_hdrs", - ":tpu_platform", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto_cc", @@ -211,8 +151,8 @@ cc_library( srcs = ["tpu_computation_placer.cc"], hdrs = ["tpu_computation_placer.h"], deps = [ + ":tpu_executor", ":tpu_executor_c_api_hdrs", - ":tpu_platform", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla/service:computation_placer", ], diff --git a/tensorflow/stream_executor/tpu/status_helper.h b/tensorflow/stream_executor/tpu/status_helper.h index 8fcf302edac..bc8820f5fef 100644 --- a/tensorflow/stream_executor/tpu/status_helper.h +++ b/tensorflow/stream_executor/tpu/status_helper.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_TPU_STATUS_HELPER_H_ #include "tensorflow/core/platform/status.h" +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" struct StatusHelper { diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index eee69a35b23..5911d651b66 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/c/tf_attrtype.h" #include "tensorflow/c/tf_status.h" -#include "tensorflow/core/tpu/kernels/tpu_ops_common_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" typedef struct SE_Platform SE_Platform; diff --git a/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h index d47fdf37a46..e7ca506df72 100644 --- a/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h @@ -15,8 +15,8 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" -#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" typedef struct XLA_TpuNodeContext XLA_TpuNodeContext; @@ -36,6 +36,7 @@ void TpuNodeContext_CloseTpuHost(SE_Status* status); struct TfTpu_NodeContextApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Create); TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Free); + TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Initialize); TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_StopChipHeartbeats); TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_CloseTpuHost); }; diff --git a/tensorflow/stream_executor/tpu/tpu_platform.cc b/tensorflow/stream_executor/tpu/tpu_platform.cc index 97a97a63351..db6324ecaec 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform.cc +++ b/tensorflow/stream_executor/tpu/tpu_platform.cc @@ -100,7 +100,7 @@ TpuPlatform::GetUncachedExecutor( return status.status(); } return std::make_unique( - this, absl::make_unique(this, executor), + this, std::make_unique(this, executor), config.ordinal); } From c3c7f0418e7a7ec8ae6f0e531d24d6b5724ac3bb Mon Sep 17 00:00:00 2001 From: Tian Lin Date: Tue, 23 Jun 2020 21:59:12 -0700 Subject: [PATCH 63/66] Add quantization description in Model Maker colab. PiperOrigin-RevId: 318000747 Change-Id: I6cf6356bd3c582f6906005ba1879e316878cc5f8 --- .../model_maker_image_classification.ipynb | 126 ++++++++++++++---- 1 file changed, 102 insertions(+), 24 deletions(-) diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb index 464a5d1b5ef..37b2395dec6 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "cellView": "form", "colab": {}, @@ -49,7 +49,7 @@ "metadata": { "colab_type": "text", "id": "nDABAblytltI" - }, + }, "source": [ "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", " \u003ctd\u003e\n", @@ -93,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -131,6 +131,7 @@ "\n", "from tensorflow_examples.lite.model_maker.core.data_util.image_dataloader import ImageClassifierDataLoader\n", "from tensorflow_examples.lite.model_maker.core.task import image_classifier\n", + "from tensorflow_examples.lite.model_maker.core.task.configs import QuantizationConfig\n", "from tensorflow_examples.lite.model_maker.core.task.model_spec import mobilenet_v2_spec\n", "from tensorflow_examples.lite.model_maker.core.task.model_spec import ImageModelSpec\n", "\n", @@ -161,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "cellView": "form", "colab": {}, @@ -221,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -245,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -268,7 +269,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -294,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -370,7 +371,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -398,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -421,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -445,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -478,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -501,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -526,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -549,7 +550,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -609,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -644,7 +645,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -724,6 +725,83 @@ "In this section, we describe several advanced topics, including switching to a different image classification model, changing the training hyperparameters etc.\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Gc4Jk8TvBQfm" + }, + "source": [ + "## Post-training quantization on the TensorFLow Lite model\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "tD8BOYrHBiDt" + }, + "source": [ + "[Post-training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) is a conversion technique that can reduce model size and inference latency, while also improving CPU and hardware accelerator latency, with little degradation in model accuracy. Thus, it's widely used to optimize the model.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "iyIo0d5TCzE2" + }, + "source": [ + "Model Maker supports multiple post-training quantization options. Let's take full integer quantization as an instance. First, define the quantization config to enforce enforce full integer quantization for all ops including the input and output. The input type and output type are `uint8` by default. You may also change them to other types like `int8` by setting `inference_input_type` and `inference_output_type` in config." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "k8hL2mstCxQl" + }, + "outputs": [], + "source": [ + "config = QuantizationConfig.create_full_integer_quantization(representative_data=test_data, is_integer_only=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "K1gzx_rmFMOA" + }, + "source": [ + "Then we export TensorFlow Lite model with such configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "WTJzFQnJFMjr" + }, + "outputs": [], + "source": [ + "model.export(export_dir='.', tflite_filename='model_quant.tflite', quantization_config=config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Safo0e40wKZW" + }, + "source": [ + "In Colab, you can download the model named `model_quant.tflite` from the left sidebar, same as the uploading part mentioned above." + ] + }, { "cell_type": "markdown", "metadata": { @@ -750,7 +828,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -773,7 +851,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -802,7 +880,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -871,7 +949,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -894,7 +972,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", From 867c19e99bd23ea6560c710f127bfcee5d9b1429 Mon Sep 17 00:00:00 2001 From: Ashwin Murthy Date: Tue, 23 Jun 2020 22:17:14 -0700 Subject: [PATCH 64/66] Change operators to operations in RNN doc PiperOrigin-RevId: 318002821 Change-Id: I04ffa3a58c695ac0b032f5664f5d5828fee2e66e --- tensorflow/lite/g3doc/convert/rnn.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/g3doc/convert/rnn.md b/tensorflow/lite/g3doc/convert/rnn.md index 0954f13a4c7..ce9cf91f867 100644 --- a/tensorflow/lite/g3doc/convert/rnn.md +++ b/tensorflow/lite/g3doc/convert/rnn.md @@ -3,9 +3,9 @@ ## Overview TensorFlow Lite supports converting TensorFlow RNN models to TensorFlow Lite’s -fused LSTM operators. Fused operators exist to maximize the performance of their -underlying kernel implementations, as well as provide a higher level interface -to define complex transformations like quantizatization. +fused LSTM operations. Fused operations exist to maximize the performance of +their underlying kernel implementations, as well as provide a higher level +interface to define complex transformations like quantizatization. Since there are many variants of RNN APIs in TensorFlow, our approach has been two fold: @@ -105,7 +105,7 @@ forward and one for backward, see examples [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/layers/wrappers.py#L381). Once we see the go\_backward attribute, we recognize it as backward LSTM, then we group forward & backward LSTM together. **This is future work.** Currently, -this creates two UnidirectionalSequenceLSTM operators in the TensorFlow Lite +this creates two UnidirectionalSequenceLSTM operations in the TensorFlow Lite model. ### User-defined LSTM conversion examples @@ -141,7 +141,7 @@ MLIR-pass [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc#L108). The function’s interface should be treated like an API contract and should contain the arguments needed to convert to fused TensorFlow Lite LSTM -operators - i.e. input, bias, weights, projection, layer normalization, etc. It +operations - i.e. input, bias, weights, projection, layer normalization, etc. It is preferable for the tensors passed as arguments to this function to have known rank (i.e. RankedTensorType in MLIR). This makes it much easier to write conversion code that can assume these tensors as RankedTensorType and helps @@ -196,5 +196,5 @@ follows: the user program. Such a TensorFlow program can still be converted to TensorFlow Lite using the feature being described here. 1. Bidirectional LSTM is currently modelled as two UnidirectionalSequenceLSTM - operators in TensorFlow Lite. This will be replaced with a single + operations in TensorFlow Lite. This will be replaced with a single BidirectionalSequenceLSTM op. From 19e03663aa2358ec910748b7919674f39273e311 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 23 Jun 2020 23:30:15 -0700 Subject: [PATCH 65/66] Stop reporting errors after 10 mismatching tensor values, cutting down log size on failing builds. PiperOrigin-RevId: 318010085 Change-Id: I9a8b70256b4f04134d9034deae606db61b1135fb --- tensorflow/core/framework/tensor_testutil.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/tensor_testutil.cc b/tensorflow/core/framework/tensor_testutil.cc index 1a7812ce4eb..313451d6b83 100644 --- a/tensorflow/core/framework/tensor_testutil.cc +++ b/tensorflow/core/framework/tensor_testutil.cc @@ -42,11 +42,15 @@ void ExpectClose(const Tensor& x, const Tensor& y, double atol, double rtol) { << "typed_atol is negative: " << typed_atol; ASSERT_GE(typed_rtol, static_cast(0.0)) << "typed_rtol is negative: " << typed_rtol; + const int max_failures = 10; + int num_failures = 0; for (int i = 0; i < size; ++i) { EXPECT_TRUE( internal::Helper::IsClose(Tx[i], Ty[i], typed_atol, typed_rtol)) - << "index = " << i << " x = " << Tx[i] << " y = " << Ty[i] - << " typed_atol = " << typed_atol << " typed_rtol = " << typed_rtol; + << "index = " << (++num_failures, i) << " x = " << Tx[i] + << " y = " << Ty[i] << " typed_atol = " << typed_atol + << " typed_rtol = " << typed_rtol; + ASSERT_LT(num_failures, max_failures) << "Too many mismatches, giving up."; } } From 71aceb9ca98d238a6f883687d84fd9b62ed5c21e Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Wed, 24 Jun 2020 00:38:41 -0700 Subject: [PATCH 66/66] Integrate LLVM at https://github.com/llvm/llvm-project/commit/6507bc56216b PiperOrigin-RevId: 318016873 Change-Id: Ia747d4f68f39773b9e1b74b0b5e2d727363bd61c --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index f2d0c028c5f..49cd146bed5 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "4d1fd33561cf758be00bdbffab1b6a1a0e428fc0" - LLVM_SHA256 = "6d9851ea3c4ff3df57865e0cafc9793c983636cdb6dc9ff3df00816a778e9039" + LLVM_COMMIT = "6507bc56216ba4441790bc581a5b76d9c2ad9774" + LLVM_SHA256 = "d1749ab8a32110fae83881ca6c82383632516c6fd5ffbd2c5dd1b486db224e46" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT),