From b5831006af0ec62189e62d7f6be1c7ca8c8a2d97 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 9 Mar 2020 13:56:05 -0700 Subject: [PATCH 001/220] Add leaf_id field to boosted_trees.DebugOutput, and populate it in BoostedTreesExampleDebugOutputsOp. --- .../kernels/boosted_trees/boosted_trees.proto | 10 ++++----- .../kernels/boosted_trees/prediction_ops.cc | 1 + .../boosted_trees/prediction_ops_test.py | 21 +++++++++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto index 0c54b357c22..1d325ee8af9 100644 --- a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto +++ b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto @@ -162,9 +162,7 @@ message TreeEnsemble { } // DebugOutput contains outputs useful for debugging/model interpretation, at -// the individual example-level. Debug outputs that are available to the user -// are: 1) Directional feature contributions (DFCs) 2) Node IDs for ensemble -// prediction path 3) Leaf node IDs. +// the individual example-level. message DebugOutput { // Return the logits and associated feature splits across prediction paths for // each tree, for every example, at predict time. We will use these values to @@ -173,7 +171,7 @@ message DebugOutput { // id. repeated int32 feature_ids = 1; repeated float logits_path = 2; - - // TODO(crawles): return 2) Node IDs for ensemble prediction path 3) Leaf node - // IDs. + // Return the node_id for each leaf node we reach in our prediction path. + repeated int32 leaf_node_ids = 3; + // TODO(crawles): return 4) Node IDs for ensemble prediction path } diff --git a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc index 19be606f184..418ff6c3dad 100644 --- a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc @@ -361,6 +361,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel { if (tree_id == 0 || node_id > 0) { past_trees_logit += tree_logit; } + example_debug_info.add_leaf_node_ids(node_id); ++tree_id; node_id = 0; } else { // Add to proto. diff --git a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py index ed554ea9288..2ed7fd8ce10 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py @@ -2673,6 +2673,7 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): # Expected logits are computed by traversing the logit path and # subtracting child logits from parent logits. bias = 1.72 * 0.1 # Root node of tree_0. + expected_leaf_ids = ((0,), (0,)) expected_feature_ids = ((), ()) expected_logits_paths = ((bias,), (bias,)) @@ -2688,14 +2689,17 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): serialized_examples_debug_outputs = session.run(debug_op) feature_ids = [] logits_paths = [] + leaf_ids = [] for example in serialized_examples_debug_outputs: example_debug_outputs = boosted_trees_pb2.DebugOutput() example_debug_outputs.ParseFromString(example) feature_ids.append(example_debug_outputs.feature_ids) logits_paths.append(example_debug_outputs.logits_path) + leaf_ids.append(example_debug_outputs.leaf_node_ids) self.assertAllClose(feature_ids, expected_feature_ids) self.assertAllClose(logits_paths, expected_logits_paths) + self.assertAllClose(expected_leaf_ids, leaf_ids) @test_util.run_deprecated_v1 def testContribsForOnlyABiasNodeMultiDimensionFeature(self): @@ -2734,6 +2738,7 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): # Expected logits are computed by traversing the logit path and # subtracting child logits from parent logits. bias = 1.72 * 0.1 # Root node of tree_0. + expected_leaf_ids = ((0,), (0,)) expected_feature_ids = ((), ()) expected_logits_paths = ((bias,), (bias,)) @@ -2749,14 +2754,17 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): serialized_examples_debug_outputs = session.run(debug_op) feature_ids = [] logits_paths = [] + leaf_ids = [] for example in serialized_examples_debug_outputs: example_debug_outputs = boosted_trees_pb2.DebugOutput() example_debug_outputs.ParseFromString(example) feature_ids.append(example_debug_outputs.feature_ids) logits_paths.append(example_debug_outputs.logits_path) + leaf_ids.append(example_debug_outputs.leaf_node_ids) self.assertAllClose(feature_ids, expected_feature_ids) self.assertAllClose(logits_paths, expected_logits_paths) + self.assertAllClose(leaf_ids, expected_leaf_ids) @test_util.run_deprecated_v1 def testContribsMultipleTreeWhenFirstTreeIsABiasNode(self): @@ -2834,6 +2842,7 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): # example_0 : (bias, 0.1 * 5.5 + bias, 0.1 * 5. + bias) # example_1 : (bias, 0.1 * 7. + bias ) expected_logits_paths = ((1.72, 2.27, 2.22), (1.72, 2.42)) + expected_leaf_ids = ((0, 3), (0, 2)) bucketized_features = [ feature_0_values, feature_1_values, feature_2_values @@ -2847,14 +2856,18 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): serialized_examples_debug_outputs = session.run(debug_op) feature_ids = [] logits_paths = [] + leaf_ids = [] + for example in serialized_examples_debug_outputs: example_debug_outputs = boosted_trees_pb2.DebugOutput() example_debug_outputs.ParseFromString(example) feature_ids.append(example_debug_outputs.feature_ids) logits_paths.append(example_debug_outputs.logits_path) + leaf_ids.append(example_debug_outputs.leaf_node_ids) self.assertAllClose(feature_ids, expected_feature_ids) self.assertAllClose(logits_paths, expected_logits_paths) + self.assertAllClose(leaf_ids, expected_leaf_ids) @test_util.run_deprecated_v1 def testContribsMultipleTreeWhenFirstTreeIsABiasNodeMultiDimFeature(self): @@ -2933,6 +2946,7 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): # example_0 : (bias, 0.1 * 5.5 + bias, 0.1 * 5. + bias) # example_1 : (bias, 0.1 * 7. + bias ) expected_logits_paths = ((1.72, 2.27, 2.22), (1.72, 2.42)) + expected_leaf_ids = ((0, 3), (0, 2)) bucketized_features = [ feature_0_values, feature_1_values, feature_2_values @@ -2946,14 +2960,17 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): serialized_examples_debug_outputs = session.run(debug_op) feature_ids = [] logits_paths = [] + leaf_ids = [] for example in serialized_examples_debug_outputs: example_debug_outputs = boosted_trees_pb2.DebugOutput() example_debug_outputs.ParseFromString(example) feature_ids.append(example_debug_outputs.feature_ids) logits_paths.append(example_debug_outputs.logits_path) + leaf_ids.append(example_debug_outputs.leaf_node_ids) self.assertAllClose(feature_ids, expected_feature_ids) self.assertAllClose(logits_paths, expected_logits_paths) + self.assertAllClose(leaf_ids, expected_leaf_ids) @test_util.run_deprecated_v1 def testContribsMultipleTree(self): @@ -3075,6 +3092,7 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): # 1.0 * -7. + 0.2 * 7 + .114) expected_logits_paths = ((bias, 0.114, 1.214, 1.114, 6.114), (bias, 0.114, 1.514, -5.486)) + expected_leaf_ids = ((1, 3, 2), (1, 2, 1)) bucketized_features = [ feature_0_values, feature_1_values, feature_2_values @@ -3088,14 +3106,17 @@ class FeatureContribsOpsTest(test_util.TensorFlowTestCase): serialized_examples_debug_outputs = session.run(debug_op) feature_ids = [] logits_paths = [] + leaf_ids = [] for example in serialized_examples_debug_outputs: example_debug_outputs = boosted_trees_pb2.DebugOutput() example_debug_outputs.ParseFromString(example) feature_ids.append(example_debug_outputs.feature_ids) logits_paths.append(example_debug_outputs.logits_path) + leaf_ids.append(example_debug_outputs.leaf_node_ids) self.assertAllClose(feature_ids, expected_feature_ids) self.assertAllClose(logits_paths, expected_logits_paths) + self.assertAllClose(expected_leaf_ids, leaf_ids) if __name__ == '__main__': From 5742350cb05c03b250648ceefaa9b637de233390 Mon Sep 17 00:00:00 2001 From: Thibaut Goetghebuer-Planchon Date: Mon, 7 Sep 2020 16:15:23 +0100 Subject: [PATCH 002/220] Add int16x8 support for ABS operator --- tensorflow/lite/kernels/elementwise.cc | 46 +++++++++------ tensorflow/lite/kernels/elementwise_test.cc | 57 ++++++++++++++++--- tensorflow/lite/kernels/register.cc | 2 +- .../lite/tools/versioning/op_version.cc | 9 +++ .../lite/tools/versioning/op_version_test.cc | 10 +++- .../lite/tools/versioning/runtime_version.cc | 1 + 6 files changed, 98 insertions(+), 27 deletions(-) diff --git a/tensorflow/lite/kernels/elementwise.cc b/tensorflow/lite/kernels/elementwise.cc index d23cdedc6c8..9212aeb6cd8 100644 --- a/tensorflow/lite/kernels/elementwise.cc +++ b/tensorflow/lite/kernels/elementwise.cc @@ -58,7 +58,7 @@ bool IsLogicalSupportedType(const TfLiteType type) { } bool IsAbsSupportedType(const TfLiteType type) { - return type == kTfLiteFloat32 || type == kTfLiteInt8; + return type == kTfLiteFloat32 || type == kTfLiteInt8 || type == kTfLiteInt16; } typedef bool (*IsSupportedType)(TfLiteType); @@ -81,7 +81,7 @@ TfLiteStatus AbsPrepare(TfLiteContext* context, TfLiteNode* node) { context, (GenericPrepare(context, node)), kTfLiteOk); const TfLiteTensor* input = GetInput(context, node, 0); - if (input->type == kTfLiteInt8) { + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { TfLiteTensor* output = GetOutput(context, node, 0); auto* op_data = static_cast(node->user_data); TF_LITE_ENSURE_EQ(context, input->quantization.type, @@ -102,6 +102,10 @@ TfLiteStatus AbsPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, output_params->zero_point->size > 0); op_data->input_offset = input_params->zero_point->data[0]; op_data->output_offset = output_params->zero_point->data[0]; + if (input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, op_data->input_offset, 0); + TF_LITE_ENSURE_EQ(context, op_data->output_offset, 0); + } const float input_scale = input_params->scale->data[0]; const float output_scale = output_params->scale->data[0]; double scale = input_scale / output_scale; @@ -144,26 +148,34 @@ void AbsFree(TfLiteContext* context, void* buffer) { delete static_cast(buffer); } +template +TfLiteStatus AbsEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteType type) { + const auto* op_data = static_cast(node->user_data); + const int kMin = std::numeric_limits::min(); + const int kMax = std::numeric_limits::max(); + + std::function func = [&](T i) { + const int32_t value = std::abs(i - op_data->input_offset); + const int32_t output = MultiplyByQuantizedMultiplier( + value, op_data->multiplier, op_data->shift) + + op_data->output_offset; + + return static_cast(std::min(std::max(output, kMin), kMax)); + }; + + return EvalImpl(context, node, func, type); +} + TfLiteStatus AbsEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteType type = GetInput(context, node, 0)->type; switch (type) { case kTfLiteFloat32: return EvalImpl(context, node, std::abs, type); - case kTfLiteInt8: { - const auto* op_data = static_cast(node->user_data); - const int kMinInt8 = std::numeric_limits::min(); - const int kMaxInt8 = std::numeric_limits::max(); - std::function func = [&](int8_t i) { - const int32_t value = std::abs(i - op_data->input_offset); - return std::min( - std::max(op_data->output_offset + - MultiplyByQuantizedMultiplier( - value, op_data->multiplier, op_data->shift), - kMinInt8), - kMaxInt8); - }; - return EvalImpl(context, node, func, type); - } + case kTfLiteInt8: + return AbsEvalQuantized(context, node, type); + case kTfLiteInt16: + return AbsEvalQuantized(context, node, type); default: TF_LITE_KERNEL_LOG(context, "Current data type %s is not supported.", TfLiteTypeGetName(type)); diff --git a/tensorflow/lite/kernels/elementwise_test.cc b/tensorflow/lite/kernels/elementwise_test.cc index e0f198f8f9b..f03c4cefd24 100644 --- a/tensorflow/lite/kernels/elementwise_test.cc +++ b/tensorflow/lite/kernels/elementwise_test.cc @@ -47,12 +47,12 @@ class ElementWiseOpFloatModel : public ElementWiseOpBaseModel { } }; -class ElementWiseOpInt8Model : public ElementWiseOpBaseModel { +class ElementWiseOpQuantizedModel : public ElementWiseOpBaseModel { public: - ElementWiseOpInt8Model(BuiltinOperator op, TensorData input_tensor_data, - TensorData output_tensor_data) { - input_ = AddInput(input_tensor_data); - output_ = AddOutput(output_tensor_data); + ElementWiseOpQuantizedModel(BuiltinOperator op, TensorData input_tensor_data, + TensorData output_tensor_data) { + input_ = AddInput(SymmetricInt16Scaling(input_tensor_data)); + output_ = AddOutput(SymmetricInt16Scaling(output_tensor_data)); SetBuiltinOp(op, BuiltinOptions_NONE, 0); BuildInterpreter({input_tensor_data.shape}); } @@ -83,6 +83,24 @@ class ElementWiseOpInt8Model : public ElementWiseOpBaseModel { } return output; } + + private: + TensorData& SymmetricInt16Scaling(TensorData& tensor) { + // Symmetric range and null zero-point is required for INT16 tensors. As + // SingleOpModel::QuantizationParams calculates the scale on an asymmetric + // base [int_type::min, int_type::max], manually calculate the scale on a + // symmetric range [int_type::min+1, int_type::max] to ensure a null + // zero-point. + if (tensor.type == TensorType_INT16) { + CHECK_EQ(std::abs(tensor.min), tensor.max); + tensor.scale = tensor.max / std::numeric_limits::max(); + tensor.zero_point = 0; + tensor.min = 0; + tensor.max = 0; + } + + return tensor; + } }; class ElementWiseOpBoolModel : public ElementWiseOpBaseModel { @@ -96,6 +114,13 @@ class ElementWiseOpBoolModel : public ElementWiseOpBaseModel { } }; +template +float GetQuantizationStep(float min, float max) { + const float kQuantizedStep = (max - min) / (std::numeric_limits::max() - + std::numeric_limits::min()); + return kQuantizedStep; +} + TEST(ElementWise, Sin) { ElementWiseOpFloatModel m(BuiltinOperator_SIN, {1, 1, 4, 1}); m.PopulateTensor(m.input(), {0, 3.1415926, -3.1415926, 1}); @@ -123,7 +148,7 @@ TEST(ElementWise, Log) { EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 1, 4, 1})); } -TEST(FloatActivationsOpTest, Abs) { +TEST(ElementWise, Abs) { ElementWiseOpFloatModel m(BuiltinOperator_ABS, {1, 2, 4, 1}); m.PopulateTensor(m.input(), { 0.f, -6.2f, 2.f, 4.f, // @@ -136,7 +161,7 @@ TEST(FloatActivationsOpTest, Abs) { })); } -TEST(FloatActivationsOpTest, AbsInt8) { +TEST(ElementWise, AbsInt8) { std::vector data = {15., 46., 78., -142., -1., -17., -49., 113.}; std::vector abs_data(data.size()); for (int i = 0; i < abs_data.size(); i++) { @@ -148,7 +173,7 @@ TEST(FloatActivationsOpTest, AbsInt8) { const float kOutputScale = abs_max / 255.0; const int input_zero_point = 127 - *minmax.second; const int output_zero_point = -128; - ElementWiseOpInt8Model m( + ElementWiseOpQuantizedModel m( BuiltinOperator_ABS, {TensorType_INT8, {1, 8}, @@ -166,6 +191,22 @@ TEST(FloatActivationsOpTest, AbsInt8) { ElementsAreArray(ArrayFloatNear(abs_data, kInputScale))); } +TEST(ElementWise, AbsInt16) { + const float kQuantizedTolerance = GetQuantizationStep(-150, 150); + std::vector data = {15., 46., 78., -142., -1., -17., -49., 113.}; + std::vector abs_data(data.size()); + for (int i = 0; i < abs_data.size(); i++) { + abs_data[i] = std::abs(data[i]); + } + ElementWiseOpQuantizedModel m(BuiltinOperator_ABS, + {TensorType_INT16, {1, 8}, -142, 142}, + {TensorType_INT16, {1, 8}, -150, 150}); + m.QuantizeAndPopulate(m.input(), data); + m.Invoke(); + EXPECT_THAT(m.ExtractDequantVector(m.output()), + ElementsAreArray(ArrayFloatNear(abs_data, kQuantizedTolerance))); +} + TEST(ElementWise, Sqrt) { ElementWiseOpFloatModel m(BuiltinOperator_SQRT, {1, 1, 4, 1}); m.PopulateTensor(m.input(), {0, 1, 2, 4}); diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index e020298fc8f..27ae7da7970 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -35,7 +35,7 @@ namespace builtin { BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_ABS, Register_ABS(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_HARD_SWISH, Register_HARD_SWISH()); AddBuiltin(BuiltinOperator_RELU, Register_RELU(), /* min_version = */ 1, /* max_version = */ 2); diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc index 7edf459eb90..46b0bb06ceb 100644 --- a/tensorflow/lite/tools/versioning/op_version.cc +++ b/tensorflow/lite/tools/versioning/op_version.cc @@ -369,6 +369,15 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { return 1; case BuiltinOperator_ABS: + if (op_sig.input_types.at(0) == TensorType_INT16) { + return 3; + } + if (op_sig.input_types.at(0) == TensorType_INT8 || + op_sig.input_types.at(0) == TensorType_UINT8) { + return 2; + } + return 1; + case BuiltinOperator_RELU: if (op_sig.input_types.at(0) == TensorType_INT8 || op_sig.input_types.at(0) == TensorType_UINT8) { diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc index 82ebad701cd..aa834790091 100644 --- a/tensorflow/lite/tools/versioning/op_version_test.cc +++ b/tensorflow/lite/tools/versioning/op_version_test.cc @@ -721,10 +721,18 @@ TEST(OpVersionTest, VersioningAbsTest) { // int8 input is version 2. fake_op_sig = { - .op = BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + .op = BuiltinOperator_ABS, .input_types = std::vector{TensorType_INT8}, .output_types = std::vector{TensorType_INT8}, }; EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2); + + // int16 input is version 3. + fake_op_sig = { + .op = BuiltinOperator_ABS, + .input_types = std::vector{TensorType_INT16}, + .output_types = std::vector{TensorType_INT16}, + }; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); } } // namespace tflite diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index a656356b84c..dd7cb89f020 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -306,6 +306,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_ZEROS_LIKE, 1}, "1.12.0"}, {{BuiltinOperator_ABS, 1}, "1.13.0"}, {{BuiltinOperator_ABS, 2}, kPendingReleaseVersion}, + {{BuiltinOperator_ABS, 3}, kPendingReleaseVersion}, {{BuiltinOperator_HARD_SWISH, 1}, "1.15.0"}, {{BuiltinOperator_FILL, 1}, "1.13.0"}, {{BuiltinOperator_FILL, 2}, "2.3.0"}, From c03abbfd76cc85a9956cbd1bb68f8e99d1043347 Mon Sep 17 00:00:00 2001 From: Thibaut Goetghebuer-Planchon Date: Fri, 18 Sep 2020 12:24:16 +0100 Subject: [PATCH 003/220] Update ABS operator version supported by the reference kernel --- tensorflow/lite/kernels/register_ref.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc index c8fb46adb96..45b8e36b655 100644 --- a/tensorflow/lite/kernels/register_ref.cc +++ b/tensorflow/lite/kernels/register_ref.cc @@ -190,7 +190,7 @@ const TfLiteRegistration* BuiltinRefOpResolver::FindOp(const char* op, BuiltinRefOpResolver::BuiltinRefOpResolver() { AddBuiltin(BuiltinOperator_ABS, Register_ABS(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_HARD_SWISH, Register_HARD_SWISH_REF()); AddBuiltin(BuiltinOperator_RELU, Register_RELU(), /* min_version = */ 1, /* max_version = */ 2); From 38230d4884e590e9024b61e701613fe1c62d7bbc Mon Sep 17 00:00:00 2001 From: xiaohong1031 Date: Thu, 24 Sep 2020 15:47:54 -0700 Subject: [PATCH 004/220] DNN 0.x code cleanup - fusedbatchnorm op --- .../kernels/mkl/mkl_fused_batch_norm_op.cc | 352 +++++------------- 1 file changed, 93 insertions(+), 259 deletions(-) diff --git a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc index 3b2c4f84039..b40748b7522 100644 --- a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc @@ -14,18 +14,17 @@ limitations under the License. ==============================================================================*/ #ifdef INTEL_MKL #include "mkldnn.hpp" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/fused_batch_norm_op.h" #include "tensorflow/core/kernels/no_op.h" -#include "tensorflow/core/util/mkl_types.h" #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/tensor_format.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#define GET_FLAG(bn_flag) static_cast(BN_FLAGS::bn_flag) +#define GET_FLAG(bn_flag) static_cast(mkldnn::normalization_flags::bn_flag) #define IS_SET(cflag) (context_.flags & GET_FLAG(cflag)) using mkldnn::batch_normalization_backward; @@ -47,39 +46,24 @@ struct MklBatchNormFwdParams { float eps; bool training; FusedBNActivationMode activation_mode; -#ifndef ENABLE_MKLDNN_V1 - MEMORY_FORMAT src_format; -#else memory::desc src_md; -#endif // !ENABLE_MKLDNN_V1 MklBatchNormFwdParams(const memory::dims& src_dims, int depth, float eps, -#ifndef ENABLE_MKLDNN_V1 - bool training, MEMORY_FORMAT src_format, - FusedBNActivationMode activation_mode) -#else bool training, memory::desc src_md, FusedBNActivationMode activation_mode) -#endif // !ENABLE_MKLDNN_V1 : src_dims(src_dims), depth(depth), eps(eps), training(training), activation_mode(activation_mode), -#ifndef ENABLE_MKLDNN_V1 - src_format(src_format) { - } -#else - src_md(src_md) { - } -#endif // !ENABLE_MKLDNN_V1 + src_md(src_md) {} }; template class MklFusedBatchNormFwdPrimitive : public MklPrimitive { public: explicit MklFusedBatchNormFwdPrimitive(const MklBatchNormFwdParams& fwdParams) - : MklPrimitive(engine(ENGINE_CPU, 0)) { + : MklPrimitive(engine(engine::kind::cpu, 0)) { if (context_.bn_fwd == nullptr) Setup(fwdParams); } @@ -94,8 +78,8 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { void Execute(const T* src_data, const U* weights_data, T* dst_data, U* mean_data, U* variance_data, std::shared_ptr fwd_stream, U* workspace_data) { - // TODO: Create a common function and avoid the duplicate code #ifdef ENABLE_MKLDNN_THREADPOOL + // TODO: Create a common function and avoid the duplicate code context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *fwd_stream); context_.dst_mem->set_data_handle(static_cast(dst_data), @@ -133,13 +117,8 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { context_.ws_mem->set_data_handle(workspace_data); } #endif // ENABLE_MKLDNN_THREADPOOL -#ifdef ENABLE_MKLDNN_V1 // Execute batch-normalization forward primitives. execute_primitives(context_.fwd_primitives, fwd_stream, context_.net_args); -#else - fwd_stream.reset(new stream(stream::kind::eager_nostore)); - fwd_stream->submit(context_.fwd_primitives); -#endif // ENABLE_MKLDNN_V1 context_.src_mem->set_data_handle(DummyData); context_.dst_mem->set_data_handle(DummyData); @@ -158,19 +137,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { } } - MEMORY_PRIMITIVE_DESC GetDstPd() const { return context_.dst_mem->GET_DESC; } - -#ifndef ENABLE_MKLDNN_V1 - // In MKL-DNN v1.x, memory format tags only provide a partial description - // of the memory layout. Hence, these functions are disabled for v1.x. - mkldnn_memory_format_t GetSrcMemoryFormat() const { - return context_.src_mem->get_primitive_desc().desc().data.format; - } - - mkldnn_memory_format_t GetDstFmt() const { - return (*context_.dst_mem).get_primitive_desc().desc().data.format; - } -#endif // !ENABLE_MKLDNN_V1 + memory::desc GetDstPd() const { return context_.dst_mem->get_desc(); } std::shared_ptr GetBatchNormFwdPd() const { return context_.fwd_pd; @@ -199,10 +166,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { // BatchNorm forward primitive. std::shared_ptr bn_fwd; std::vector fwd_primitives; - -#ifdef ENABLE_MKLDNN_V1 std::vector> net_args; -#endif // ENABLE_MKLDNN_V1 BatchNormFwdContext() : flags(0), @@ -224,7 +188,6 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { context_.pkind = fwdParams.training ? prop_kind::forward_training : prop_kind::forward_scoring; -#ifdef ENABLE_MKLDNN_V1 if (fwdParams.activation_mode == FusedBNActivationMode::kRelu) { context_.flags |= GET_FLAG(fuse_norm_relu); } @@ -234,72 +197,52 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { auto fwd_desc = batch_normalization_forward::desc( context_.pkind, src_md, fwdParams.eps, static_cast(context_.flags)); -#else - // Memory descriptor - auto src_md = memory::desc({fwdParams.src_dims}, MklDnnType(), - fwdParams.src_format); - auto fwd_desc = batch_normalization_forward::desc( - context_.pkind, src_md, fwdParams.eps, context_.flags); -#endif // ENABLE_MKLDNN_V1 context_.fwd_pd.reset(new BatchNormFwdPd(fwd_desc, cpu_engine_)); // Create memory primitive based on dummy data - context_.src_mem.reset(new MEMORY_CONSTRUCTOR( - context_.fwd_pd->PRIMITIVE_DESC_SRC, cpu_engine_, DummyData)); - context_.dst_mem.reset(new MEMORY_CONSTRUCTOR( - context_.fwd_pd->PRIMITIVE_DESC_DST, cpu_engine_, DummyData)); + context_.src_mem.reset( + new memory(context_.fwd_pd->src_desc(), cpu_engine_, DummyData)); + context_.dst_mem.reset( + new memory(context_.fwd_pd->dst_desc(), cpu_engine_, DummyData)); memory::dims s_dims = {2, fwdParams.depth}; memory::dims m_dims = {1, fwdParams.depth}; if (IS_SET(use_scale_shift)) { - context_.weights_mem.reset(new MEMORY_CONSTRUCTOR_USING_MEM_PD( - s_dims, U, MEMORY_FORMAT::nc, cpu_engine_, DummyData)); + context_.weights_mem.reset( + new memory({{s_dims}, MklDnnType(), memory::format_tag::nc}, + cpu_engine_, DummyData)); } if (fwdParams.training || (IS_SET(use_global_stats))) { - context_.mean_mem.reset(new MEMORY_CONSTRUCTOR_USING_MEM_PD( - m_dims, U, MEMORY_FORMAT::nc, cpu_engine_, DummyData)); + context_.mean_mem.reset( + new memory({{m_dims}, MklDnnType(), memory::format_tag::nc}, + cpu_engine_, DummyData)); - context_.variance_mem.reset(new MEMORY_CONSTRUCTOR_USING_MEM_PD( - m_dims, U, MEMORY_FORMAT::nc, cpu_engine_, DummyData)); + context_.variance_mem.reset( + new memory({{m_dims}, MklDnnType(), memory::format_tag::nc}, + cpu_engine_, DummyData)); } -#ifdef ENABLE_MKLDNN_V1 if (IS_SET(fuse_norm_relu)) { - context_.ws_mem.reset(new MEMORY_CONSTRUCTOR( - context_.fwd_pd->workspace_desc(), cpu_engine_, DummyData)); + context_.ws_mem.reset(new memory(context_.fwd_pd->workspace_desc(), + cpu_engine_, DummyData)); } -#endif // ENABLE_MKLDNN_V1 // BatchNorm forward primitive. // TODO(intel-tf): Merge all the #ifdefs and simplify code if (!fwdParams.training && !(IS_SET(use_global_stats))) { -#ifdef ENABLE_MKLDNN_V1 if ((IS_SET(use_scale_shift)) && mkldnn_use_scaleshift) { context_.net_args.push_back( {{MKLDNN_ARG_SRC, *context_.src_mem}, {MKLDNN_ARG_WEIGHTS, *context_.weights_mem}, - { MKLDNN_ARG_DST, - *context_.dst_mem }}); + {MKLDNN_ARG_DST, *context_.dst_mem}}); } else { context_.net_args.push_back({{MKLDNN_ARG_SRC, *context_.src_mem}, - { MKLDNN_ARG_DST, - *context_.dst_mem }}); + {MKLDNN_ARG_DST, *context_.dst_mem}}); } context_.bn_fwd.reset(new batch_normalization_forward(*context_.fwd_pd)); -#else - if ((IS_SET(use_scale_shift)) && GET_FLAG(use_scale_shift)) { - context_.bn_fwd.reset(new batch_normalization_forward( - *context_.fwd_pd, *context_.src_mem, *context_.weights_mem, - *context_.dst_mem)); - } else { - context_.bn_fwd.reset(new batch_normalization_forward( - *context_.fwd_pd, *context_.src_mem, *context_.dst_mem)); - } -#endif // ENABLE_MKLDNN_V1 } else if (IS_SET(use_global_stats)) { -#ifdef ENABLE_MKLDNN_V1 if ((IS_SET(use_scale_shift)) && GET_FLAG(use_scale_shift)) { if (IS_SET(fuse_norm_relu)) { context_.net_args.push_back( @@ -308,16 +251,14 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, {MKLDNN_ARG_WEIGHTS, *context_.weights_mem}, {MKLDNN_ARG_DST, *context_.dst_mem}, - { MKLDNN_ARG_WORKSPACE, - *context_.ws_mem }}); + {MKLDNN_ARG_WORKSPACE, *context_.ws_mem}}); } else { context_.net_args.push_back( {{MKLDNN_ARG_SRC, *context_.src_mem}, {MKLDNN_ARG_MEAN, *context_.mean_mem}, {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, {MKLDNN_ARG_WEIGHTS, *context_.weights_mem}, - { MKLDNN_ARG_DST, - *context_.dst_mem }}); + {MKLDNN_ARG_DST, *context_.dst_mem}}); } } else { if (IS_SET(fuse_norm_relu)) { @@ -326,34 +267,17 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { {MKLDNN_ARG_MEAN, *context_.mean_mem}, {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, {MKLDNN_ARG_DST, *context_.dst_mem}, - { MKLDNN_ARG_WORKSPACE, - *context_.ws_mem }}); + {MKLDNN_ARG_WORKSPACE, *context_.ws_mem}}); } else { context_.net_args.push_back( {{MKLDNN_ARG_SRC, *context_.src_mem}, {MKLDNN_ARG_MEAN, *context_.mean_mem}, {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, - { MKLDNN_ARG_DST, - *context_.dst_mem }}); + {MKLDNN_ARG_DST, *context_.dst_mem}}); } } context_.bn_fwd.reset(new batch_normalization_forward(*context_.fwd_pd)); -#else - if ((IS_SET(use_scale_shift)) && GET_FLAG(use_scale_shift)) { - context_.bn_fwd.reset(new batch_normalization_forward( - *context_.fwd_pd, *context_.src_mem, - (const primitive::at)*context_.mean_mem, - (const primitive::at)*context_.variance_mem, *context_.weights_mem, - *context_.dst_mem)); - } else { - context_.bn_fwd.reset(new batch_normalization_forward( - *context_.fwd_pd, *context_.src_mem, - (const primitive::at)*context_.mean_mem, - (const primitive::at)*context_.variance_mem, *context_.dst_mem)); - } -#endif // ENABLE_MKLDNN_V1 } else { -#ifdef ENABLE_MKLDNN_V1 if ((IS_SET(use_scale_shift)) && GET_FLAG(use_scale_shift)) { if (IS_SET(fuse_norm_relu)) { context_.net_args.push_back( @@ -362,16 +286,14 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { {MKLDNN_ARG_DST, *context_.dst_mem}, {MKLDNN_ARG_MEAN, *context_.mean_mem}, {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, - { MKLDNN_ARG_WORKSPACE, - *context_.ws_mem }}); + {MKLDNN_ARG_WORKSPACE, *context_.ws_mem}}); } else { context_.net_args.push_back( {{MKLDNN_ARG_SRC, *context_.src_mem}, {MKLDNN_ARG_WEIGHTS, *context_.weights_mem}, {MKLDNN_ARG_DST, *context_.dst_mem}, {MKLDNN_ARG_MEAN, *context_.mean_mem}, - { MKLDNN_ARG_VARIANCE, - *context_.variance_mem }}); + {MKLDNN_ARG_VARIANCE, *context_.variance_mem}}); } } else { if (IS_SET(fuse_norm_relu)) { @@ -380,28 +302,16 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { {MKLDNN_ARG_DST, *context_.dst_mem}, {MKLDNN_ARG_MEAN, *context_.mean_mem}, {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, - { MKLDNN_ARG_WORKSPACE, - *context_.ws_mem }}); + {MKLDNN_ARG_WORKSPACE, *context_.ws_mem}}); } else { - context_.net_args.push_back({{MKLDNN_ARG_SRC, *context_.src_mem}, - {MKLDNN_ARG_DST, *context_.dst_mem}, - {MKLDNN_ARG_MEAN, *context_.mean_mem}, - { MKLDNN_ARG_VARIANCE, - *context_.variance_mem }}); + context_.net_args.push_back( + {{MKLDNN_ARG_SRC, *context_.src_mem}, + {MKLDNN_ARG_DST, *context_.dst_mem}, + {MKLDNN_ARG_MEAN, *context_.mean_mem}, + {MKLDNN_ARG_VARIANCE, *context_.variance_mem}}); } } context_.bn_fwd.reset(new batch_normalization_forward(*context_.fwd_pd)); -#else - if ((IS_SET(use_scale_shift)) && GET_FLAG(use_scale_shift)) { - context_.bn_fwd.reset(new batch_normalization_forward( - *context_.fwd_pd, *context_.src_mem, *context_.weights_mem, - *context_.dst_mem, *context_.mean_mem, *context_.variance_mem)); - } else { - context_.bn_fwd.reset(new batch_normalization_forward( - *context_.fwd_pd, *context_.src_mem, *context_.dst_mem, - *context_.mean_mem, *context_.variance_mem)); - } -#endif // ENABLE_MKLDNN_V1 } context_.fwd_primitives.push_back(*context_.bn_fwd); @@ -469,25 +379,11 @@ struct MklBatchNormBwdParams { float eps; bool training; -#ifndef ENABLE_MKLDNN_V1 - MEMORY_FORMAT src_format; -#else memory::desc src_md; memory::desc diff_dst_md; -#endif // !ENABLE_MKLDNN_V1 MklBatchNormBwdParams(memory::dims src_dims, memory::dims diff_dst_dims, int depth, float eps, bool training, -#ifndef ENABLE_MKLDNN_V1 - MEMORY_FORMAT src_format) - : src_dims(src_dims), - diff_dst_dims(diff_dst_dims), - depth(depth), - eps(eps), - training(training), - src_format(src_format) { - } -#else memory::desc src_md, memory::desc diff_dst_md) : src_dims(src_dims), diff_dst_dims(diff_dst_dims), @@ -495,16 +391,14 @@ struct MklBatchNormBwdParams { eps(eps), training(training), src_md(src_md), - diff_dst_md(diff_dst_md) { - } -#endif // !ENABLE_MKLDNN_V1 + diff_dst_md(diff_dst_md) {} }; template class MklFusedBatchNormBwdPrimitive : public MklPrimitive { public: explicit MklFusedBatchNormBwdPrimitive(const MklBatchNormBwdParams& bwdParams) - : MklPrimitive(engine(ENGINE_CPU, 0)) { + : MklPrimitive(engine(engine::kind::cpu, 0)) { if (context_.bn_bwd == nullptr) Setup(bwdParams); } @@ -526,8 +420,8 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { const T* diff_dst_data, const U* weights_data, T* diff_src_data, U* diff_weights_data, U* res_space_data, std::shared_ptr bwd_stream) { - // TODO: Create a common function and avoid the duplicate code #ifdef ENABLE_MKLDNN_THREADPOOL + // TODO: Create a common function and avoid the duplicate code context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *bwd_stream); context_.mean_mem->set_data_handle( @@ -565,14 +459,10 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); #endif // ENABLE_MKLDNN_THREADPOOL -#ifdef ENABLE_MKLDNN_V1 + // Execute backward batch-normalization primitives. DCHECK_EQ(context_.bwd_primitives.size(), context_.net_args.size()); execute_primitives(context_.bwd_primitives, bwd_stream, context_.net_args); -#else - bwd_stream.reset(new stream(stream::kind::eager_nostore)); - bwd_stream->submit(context_.bwd_primitives); -#endif // ENABLE_MKLDNN_V1 // After execution, set data handle back to DummyData. context_.src_mem->set_data_handle(DummyData); @@ -586,23 +476,11 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { context_.diff_src_mem->set_data_handle(DummyData); } -#ifndef ENABLE_MKLDNN_V1 - mkldnn_memory_format_t GetSrcMemoryFormat() const { - return context_.src_mem->get_primitive_desc().desc().data.format; - } - - mkldnn_memory_format_t GetDiffDstMemoryFormat() const { - return context_.diff_dst_mem->get_primitive_desc().desc().data.format; - } -#endif // !ENABLE_MKLDNN_V1 - std::shared_ptr GetBatchNormBwdPd() const { return context_.bwd_pd; } - MEMORY_PRIMITIVE_DESC GetDiffSrcPd() { - return GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(context_.diff_src_mem); - } + memory::desc GetDiffSrcPd() { return context_.diff_src_mem->get_desc(); } private: struct BatchNormBwdContext { @@ -624,10 +502,7 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { // Backward batch-normalization primitive. std::shared_ptr bn_bwd; std::vector bwd_primitives; - -#ifdef ENABLE_MKLDNN_V1 std::vector> net_args; -#endif // ENABLE_MKLDNN_V1 BatchNormBwdContext() : src_mem(nullptr), @@ -645,30 +520,23 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { ? GET_FLAG(use_scale_shift) : (GET_FLAG(use_scale_shift) | GET_FLAG(use_global_stats)); -// Memory descriptors. -#ifndef ENABLE_MKLDNN_V1 - auto src_md = memory::desc({bwdParams.src_dims}, MklDnnType(), - bwdParams.src_format); - auto diff_dst_md = memory::desc({bwdParams.diff_dst_dims}, MklDnnType(), - bwdParams.src_format); -#else + // Memory descriptors. auto src_md = bwdParams.src_md; auto diff_dst_md = bwdParams.diff_dst_md; -#endif // !ENABLE_MKLDNN_V1 - auto variance_desc = - memory::desc({1, bwdParams.depth}, MklDnnType(), MEMORY_FORMAT::nc); - auto mean_desc = - memory::desc({1, bwdParams.depth}, MklDnnType(), MEMORY_FORMAT::nc); - auto weights_desc = - memory::desc({2, bwdParams.depth}, MklDnnType(), MEMORY_FORMAT::nc); + auto variance_desc = memory::desc({1, bwdParams.depth}, MklDnnType(), + memory::format_tag::nc); + auto mean_desc = memory::desc({1, bwdParams.depth}, MklDnnType(), + memory::format_tag::nc); + auto weights_desc = memory::desc({2, bwdParams.depth}, MklDnnType(), + memory::format_tag::nc); auto diff_weights_desc = weights_desc; // Forward batch-normalization descriptor and primitive descriptor. // Adding this back due to type difference with context.flags - auto bn_flags = - bwdParams.training - ? BN_FLAGS::use_scale_shift - : (BN_FLAGS::use_scale_shift | BN_FLAGS::use_global_stats); + auto bn_flags = bwdParams.training + ? mkldnn::normalization_flags::use_scale_shift + : (mkldnn::normalization_flags::use_scale_shift | + mkldnn::normalization_flags::use_global_stats); auto fwd_desc = batch_normalization_forward::desc( prop_kind::forward_training, src_md, bwdParams.eps, bn_flags); auto fwd_pd = BatchNormFwdPd(fwd_desc, cpu_engine_); @@ -683,37 +551,27 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { context_.bwd_pd.reset(new BatchNormBwdPd(bwd_desc, cpu_engine_, fwd_pd)); // Create memory primitives. - context_.src_mem.reset( - new MEMORY_CONSTRUCTOR_USING_MD(src_md, cpu_engine_, DummyData)); + context_.src_mem.reset(new memory(src_md, cpu_engine_, DummyData)); context_.diff_dst_mem.reset( - new MEMORY_CONSTRUCTOR_USING_MD(diff_dst_md, cpu_engine_, DummyData)); + new memory(diff_dst_md, cpu_engine_, DummyData)); context_.variance_mem.reset( - new MEMORY_CONSTRUCTOR_USING_MD(variance_desc, cpu_engine_, DummyData)); - context_.mean_mem.reset( - new MEMORY_CONSTRUCTOR_USING_MD(mean_desc, cpu_engine_, DummyData)); + new memory(variance_desc, cpu_engine_, DummyData)); + context_.mean_mem.reset(new memory(mean_desc, cpu_engine_, DummyData)); context_.weights_mem.reset( - new MEMORY_CONSTRUCTOR_USING_MD(weights_desc, cpu_engine_, DummyData)); - context_.diff_weights_mem.reset(new MEMORY_CONSTRUCTOR_USING_MD( - diff_weights_desc, cpu_engine_, DummyData)); - context_.diff_src_mem.reset( - new MEMORY_CONSTRUCTOR_USING_MD(src_md, cpu_engine_, DummyData)); + new memory(weights_desc, cpu_engine_, DummyData)); + context_.diff_weights_mem.reset( + new memory(diff_weights_desc, cpu_engine_, DummyData)); + context_.diff_src_mem.reset(new memory(src_md, cpu_engine_, DummyData)); -#ifdef ENABLE_MKLDNN_V1 context_.bn_bwd.reset(new batch_normalization_backward(*context_.bwd_pd)); - context_.net_args.push_back({{MKLDNN_ARG_SRC, *context_.src_mem}, - {MKLDNN_ARG_MEAN, *context_.mean_mem}, - {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, - {MKLDNN_ARG_DIFF_DST, *context_.diff_dst_mem}, - {MKLDNN_ARG_WEIGHTS, *context_.weights_mem}, - {MKLDNN_ARG_DIFF_SRC, *context_.diff_src_mem}, - { MKLDNN_ARG_DIFF_WEIGHTS, - *context_.diff_weights_mem }}); -#else - context_.bn_bwd.reset(new batch_normalization_backward( - *context_.bwd_pd, *context_.src_mem, *context_.mean_mem, - *context_.variance_mem, *context_.diff_dst_mem, *context_.weights_mem, - *context_.diff_src_mem, *context_.diff_weights_mem)); -#endif // ENABLE_MKLDNN_V1 + context_.net_args.push_back( + {{MKLDNN_ARG_SRC, *context_.src_mem}, + {MKLDNN_ARG_MEAN, *context_.mean_mem}, + {MKLDNN_ARG_VARIANCE, *context_.variance_mem}, + {MKLDNN_ARG_DIFF_DST, *context_.diff_dst_mem}, + {MKLDNN_ARG_WEIGHTS, *context_.weights_mem}, + {MKLDNN_ARG_DIFF_SRC, *context_.diff_src_mem}, + {MKLDNN_ARG_DIFF_WEIGHTS, *context_.diff_weights_mem}}); context_.bwd_primitives.push_back(*context_.bn_bwd); } @@ -796,11 +654,6 @@ class MklFusedBatchNormOp : public OpKernel { mean_values_ = nullptr; variance_values_ = nullptr; -#ifndef ENABLE_MKLDNN_V1 - OP_REQUIRES(context, !is_batch_norm_ex, - errors::InvalidArgument( - "_MklFusedBatchNormEx is not supported in DNNL 0.x .")); -#endif if (!is_batch_norm_ex) { activation_mode_ = FusedBNActivationMode::kIdentity; } else { @@ -893,15 +746,15 @@ class MklFusedBatchNormOp : public OpKernel { MklDnnData weights(&cpu_engine_); MklDnnData wksp(&cpu_engine_); - MEMORY_FORMAT dnn_fmt; - MKL_TENSOR_FORMAT mkl_tensor_fmt; + memory::format_tag dnn_fmt; + MklTensorFormat mkl_tensor_fmt; if (dnn_shape_src.IsMklTensor()) { if (dnn_shape_src.IsTensorInNCHWFormat()) { - dnn_fmt = MEMORY_FORMAT::nchw; - mkl_tensor_fmt = MKL_TENSOR_FORMAT_NCHW; + dnn_fmt = memory::format_tag::nchw; + mkl_tensor_fmt = MklTensorFormat::FORMAT_NCHW; } else { - dnn_fmt = MEMORY_FORMAT::nhwc; - mkl_tensor_fmt = MKL_TENSOR_FORMAT_NHWC; + dnn_fmt = memory::format_tag::nhwc; + mkl_tensor_fmt = MklTensorFormat::FORMAT_NHWC; } } else { mkl_tensor_fmt = TFDataFormatToMklDnnDataFormat(tensor_format_); @@ -918,14 +771,8 @@ class MklFusedBatchNormOp : public OpKernel { ? dnn_shape_src.GetMklLayout() : memory::desc(src_dims, MklDnnType(), dnn_fmt); -#ifdef ENABLE_MKLDNN_V1 MklBatchNormFwdParams fwdParams(src_dims, depth_, epsilon_, is_training_, src_md, activation_mode_); -#else - MklBatchNormFwdParams fwdParams( - src_dims, depth_, epsilon_, is_training_, - static_cast(src_md.data.format), activation_mode_); -#endif // ENABLE_MKLDNN_V1 // Get forward batch-normalization op from the primitive caching pool. MklFusedBatchNormFwdPrimitive* bn_fwd = MklFusedBatchNormFwdPrimitiveFactory::Get(fwdParams); @@ -933,8 +780,7 @@ class MklFusedBatchNormOp : public OpKernel { // Allocate workspace tensor U* ws_data = nullptr; if (fwdParams.activation_mode == FusedBNActivationMode::kRelu) { -#ifdef ENABLE_MKLDNN_V1 - MEMORY_PRIMITIVE_DESC workspace_pd = + memory::desc workspace_pd = bn_fwd->GetBatchNormFwdPd()->workspace_desc(); size_t workspace_bytes = workspace_pd.get_size(); workspace_tf_shape.AddDim(workspace_bytes); @@ -947,7 +793,6 @@ class MklFusedBatchNormOp : public OpKernel { wksp.SetUsrMem(workspace_pd, reserved_space_tensor); ws_data = static_cast(wksp.GetOpMem().get_data_handle()); } -#endif // ENABLE_MKLDNN_V1 } else { // There is actually no workspace tensor out, so we make a dummy one. size_t workspace_bytes = 0; @@ -986,12 +831,9 @@ class MklFusedBatchNormOp : public OpKernel { // Check if reorder is needed for src. const T* src_data = nullptr; std::shared_ptr bn_fwd_pd = bn_fwd->GetBatchNormFwdPd(); - if (IS_SRC_REORDER_NEEDED(src_md, bn_fwd_pd, bn_fwd)) { + if (src_md != bn_fwd_pd->src_desc()) { src.SetUsrMem(src_md, &src_tensor); - src.CheckReorderToOpMem( - MEMORY_PD_WITHOUT_DATA(GET_SRC_DESC_FROM_OP_PD(bn_fwd_pd), - cpu_engine_), - context); + src.CheckReorderToOpMem(bn_fwd_pd->src_desc(), cpu_engine_, context); src_data = static_cast(src.GetOpMem().get_data_handle()); } else { src_data = static_cast(const_cast(src_tensor.flat().data())); @@ -1057,9 +899,9 @@ class MklFusedBatchNormOp : public OpKernel { std::memcpy(batch_variance_data, variance_data, depth_ * sizeof(U)); } } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); @@ -1075,7 +917,7 @@ class MklFusedBatchNormOp : public OpKernel { U* variance_values_; size_t depth_; // Batch normalization is performed for per channel. FusedBNActivationMode activation_mode_; - engine cpu_engine_ = engine(ENGINE_CPU, 0); + engine cpu_engine_ = engine(engine::kind::cpu, 0); void ExtractParams(OpKernelContext* context) { const Tensor& input = MklGetInput(context, 0); @@ -1287,15 +1129,15 @@ class MklFusedBatchNormGradOp : public OpKernel { ExtractParams(context); } - MEMORY_FORMAT dnn_fmt; - MKL_TENSOR_FORMAT mkl_tensor_fmt; + memory::format_tag dnn_fmt; + MklTensorFormat mkl_tensor_fmt; if (dnn_shape_src.IsMklTensor()) { if (dnn_shape_src.IsTensorInNCHWFormat()) { - dnn_fmt = MEMORY_FORMAT::nchw; - mkl_tensor_fmt = MKL_TENSOR_FORMAT_NCHW; + dnn_fmt = memory::format_tag::nchw; + mkl_tensor_fmt = MklTensorFormat::FORMAT_NCHW; } else { - dnn_fmt = MEMORY_FORMAT::nhwc; - mkl_tensor_fmt = MKL_TENSOR_FORMAT_NHWC; + dnn_fmt = memory::format_tag::nhwc; + mkl_tensor_fmt = MklTensorFormat::FORMAT_NHWC; } } else { mkl_tensor_fmt = TFDataFormatToMklDnnDataFormat(tensor_format_); @@ -1339,14 +1181,8 @@ class MklFusedBatchNormGradOp : public OpKernel { diff_weights.AllocateBuffer(2 * depth_ * sizeof(U)); -#ifdef ENABLE_MKLDNN_V1 MklBatchNormBwdParams bwdParams(src_dims, diff_dst_dims, depth_, epsilon_, is_training_, src_md, diff_dst_md); -#else - MklBatchNormBwdParams bwdParams( - src_dims, diff_dst_dims, depth_, epsilon_, is_training_, - static_cast(src_md.data.format)); -#endif // ENABLE_MKLDNN_V1 MklFusedBatchNormBwdPrimitive* bn_bwd = MklFusedBatchNormBwdPrimitiveFactory::Get(bwdParams); @@ -1354,12 +1190,10 @@ class MklFusedBatchNormGradOp : public OpKernel { const T* diff_dst_data = diff_dst_tensor.flat().data(); // Check if diff_dst input needs to be reordered std::shared_ptr bn_bwd_pd = bn_bwd->GetBatchNormBwdPd(); - if (IS_DIFF_DST_REORDER_NEEDED(diff_dst_md, bn_bwd_pd, bn_bwd)) { + if (diff_dst_md != bn_bwd_pd->diff_dst_desc()) { diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); - diff_dst.CheckReorderToOpMem( - MEMORY_PD_WITHOUT_DATA(GET_DIFF_DST_DESC_FROM_OP_PD(bn_bwd_pd), - cpu_engine_), - context); + diff_dst.CheckReorderToOpMem(bn_bwd_pd->diff_dst_desc(), cpu_engine_, + context); diff_dst_data = static_cast(diff_dst.GetOpMem().get_data_handle()); } else { diff_dst_data = @@ -1417,9 +1251,9 @@ class MklFusedBatchNormGradOp : public OpKernel { reinterpret_cast(diff_weights_data + depth_), depth_ * sizeof(U)); } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); @@ -1431,7 +1265,7 @@ class MklFusedBatchNormGradOp : public OpKernel { TensorFormat tensor_format_; size_t depth_; // Batch normalization is performed for per channel. bool is_training_; - engine cpu_engine_ = engine(ENGINE_CPU, 0); + engine cpu_engine_ = engine(engine::kind::cpu, 0); void ExtractParams(OpKernelContext* context) { const Tensor& input = MklGetInput(context, 0); From 04237f354f67fa6a898b10a5b08bbb41ece6c7da Mon Sep 17 00:00:00 2001 From: Thibaut Goetghebuer-Planchon Date: Wed, 23 Sep 2020 11:21:59 +0100 Subject: [PATCH 005/220] Add int16x8 support for REDUCE_MIN and REDUCE_MAX operators --- tensorflow/lite/kernels/reduce.cc | 11 +- tensorflow/lite/kernels/reduce_test.cc | 436 ++++++++++-------- tensorflow/lite/kernels/register.cc | 4 +- tensorflow/lite/kernels/register_ref.cc | 4 +- tensorflow/lite/toco/tflite/op_version.cc | 2 + .../lite/tools/versioning/op_version.cc | 4 +- .../lite/tools/versioning/op_version_test.cc | 8 + .../lite/tools/versioning/runtime_version.cc | 2 + 8 files changed, 260 insertions(+), 211 deletions(-) diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc index c3debef0f86..458c2273215 100644 --- a/tensorflow/lite/kernels/reduce.cc +++ b/tensorflow/lite/kernels/reduce.cc @@ -223,6 +223,11 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, op_context.axis->type, kTfLiteInt32); TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context)); + if (op_context.input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, 0); + } + TfLiteTensor* resolved_axis; TF_LITE_ENSURE_OK( context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis)); @@ -535,7 +540,8 @@ TfLiteStatus EvalLogic(TfLiteContext* context, TfLiteNode* node, if (input->dims->data[i] == 0) return kTfLiteOk; } - if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) { + if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 || + input->type == kTfLiteInt16) { TF_LITE_ENSURE_EQ(context, input->params.scale, op_context->output->params.scale); TF_LITE_ENSURE_EQ(context, input->params.zero_point, @@ -635,6 +641,9 @@ TfLiteStatus EvalGeneric(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt8: return EvalType(context, node, &op_context, reduce_type); break; + case kTfLiteInt16: + return EvalType(context, node, &op_context, reduce_type); + break; case kTfLiteBool: return EvalType(context, node, &op_context, reduce_type); break; diff --git a/tensorflow/lite/kernels/reduce_test.cc b/tensorflow/lite/kernels/reduce_test.cc index 2e724189fde..6296e722a2a 100644 --- a/tensorflow/lite/kernels/reduce_test.cc +++ b/tensorflow/lite/kernels/reduce_test.cc @@ -998,148 +998,162 @@ TEST(DynamicFloatMaxOpTest, Scale) { EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({9.527}))); } -TEST(ConstUint8MaxOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); +template +void ConstMaxOpTestNotKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MaxOpConstModel m({TensorType_UINT8, {1, 3, 2}, -1.0, 1.0}, - {TensorType_UINT8, {2}, -1.0, 1.0}, {1}, {1}, false); - m.QuantizeAndPopulate(m.Input(), data); + MaxOpConstModel m({tensor_type, {1, 3, 2}, 1.0 * kMin, 1.0 * kMax}, + {tensor_type, {2}, 1.0 * kMin, 1.0 * kMax}, {1}, {1}, + false); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({0.501961, 0.603922}, kQuantizedTolerance))); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({0.5, 0.6}, kQuantizedTolerance))); +} + +TEST(ConstUint8MaxOpTest, NotKeepDims) { + ConstMaxOpTestNotKeepDims(); } TEST(ConstInt8MaxOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + ConstMaxOpTestNotKeepDims(); +} + +TEST(ConstInt16MaxOpTest, NotKeepDims) { + ConstMaxOpTestNotKeepDims(); +} + +template +void ConstMaxOpTestKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MaxOpConstModel m({TensorType_INT8, {1, 3, 2}, -1.0, 1.0}, - {TensorType_INT8, {2}, -1.0, 1.0}, {1}, {1}, false); - m.QuantizeAndPopulate(m.Input(), data); + MaxOpConstModel m({tensor_type, {3, 2}, 1.0 * kMin, 1.0 * kMax}, + {tensor_type, {3}, 1.0 * kMin, 1.0 * kMax}, {1}, {1}, true); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({0.501961, 0.603922}, kQuantizedTolerance))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1})); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({0.4, 0.4, 0.6}, kQuantizedTolerance))); } TEST(ConstUint8MaxOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MaxOpConstModel m({TensorType_UINT8, {3, 2}, -1.0, 1.0}, - {TensorType_UINT8, {3}, -1.0, 1.0}, {1}, {1}, true); - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({0.4, 0.4, 0.603922}, kQuantizedTolerance))); + ConstMaxOpTestKeepDims(); } TEST(ConstInt8MaxOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MaxOpConstModel m({TensorType_INT8, {3, 2}, -1.0, 1.0}, - {TensorType_INT8, {3}, -1.0, 1.0}, {1}, {1}, true); - m.QuantizeAndPopulate(m.Input(), data); + ConstMaxOpTestKeepDims(); +} + +TEST(ConstInt16MaxOpTest, KeepDims) { + ConstMaxOpTestKeepDims(); +} + +template +void DynamicMaxOpTestNotKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-5.0, 5.0); + std::vector data = {1.3, -4.8, -3.6, 0.24}; + MaxOpDynamicModel m({tensor_type, {2, 2}, 5.0 * kMin, 5.0 * kMax}, + {tensor_type, {2}, 5.0 * kMin, 5.0 * kMax}, + {TensorType_INT32, {1}}, false); + std::vector axis = {1}; + m.SetAxis(axis); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({0.4, 0.4, 0.603922}, kQuantizedTolerance))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({1.3, 0.24}, kQuantizedTolerance))); } TEST(DynamicUint8MaxOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-5.0, 2.0); - std::vector data = {1.3, -4.8, -3.6, 0.24}; - MaxOpDynamicModel m({TensorType_UINT8, {2, 2}, -5.0, 2.0}, - {TensorType_UINT8, {2}, -5.0, 2.0}, - {TensorType_INT32, {1}}, false); - std::vector axis = {1}; - m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({1.2902, 0.247059}, kQuantizedTolerance))); + DynamicMaxOpTestNotKeepDims(); } TEST(DynamicInt8MaxOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-5.0, 2.0); - std::vector data = {1.3, -4.8, -3.6, 0.24}; - MaxOpDynamicModel m({TensorType_INT8, {2, 2}, -5.0, 2.0}, - {TensorType_INT8, {2}, -5.0, 2.0}, - {TensorType_INT32, {1}}, false); - std::vector axis = {1}; + DynamicMaxOpTestNotKeepDims(); +} + +TEST(DynamicInt16MaxOpTest, NotKeepDims) { + DynamicMaxOpTestNotKeepDims(); +} + +template +void DynamicMaxOpTestKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-12.0, 12.0); + std::vector data = {11.14, -0.14, 7.423, 0.879}; + MaxOpDynamicModel m({tensor_type, {2, 2}, 12.0 * kMin, 12.0 * kMax}, + {tensor_type, {2}, 12.0 * kMin, 12.0 * kMax}, + {TensorType_INT32, {1}}, true); + std::vector axis = {0}; m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({1.2902, 0.247059}, kQuantizedTolerance))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({11.14, 0.879}, kQuantizedTolerance))); } TEST(DynamicUint8MaxOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14, -0.14, 7.423, 0.879}; - MaxOpDynamicModel m({TensorType_UINT8, {2, 2}, -10.0, 12.0}, - {TensorType_UINT8, {2}, -10.0, 12.0}, - {TensorType_INT32, {1}}, true); - std::vector axis = {0}; - m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({11.1294, 0.862745}, kQuantizedTolerance))); + DynamicMaxOpTestKeepDims(); } TEST(DynamicInt8MaxOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14, -0.14, 7.423, 0.879}; - MaxOpDynamicModel m({TensorType_INT8, {2, 2}, -10.0, 12.0}, - {TensorType_INT8, {2}, -10.0, 12.0}, + DynamicMaxOpTestKeepDims(); +} + +TEST(DynamicInt16MaxOpTest, KeepDims) { + DynamicMaxOpTestKeepDims(); +} + +template +void DynamicMaxOpTestScalar() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-12.0, 12.0); + std::vector data = {11.14}; + MaxOpDynamicModel m({tensor_type, {}, 12.0 * kMin, 12.0 * kMax}, + {tensor_type, {}, 12.0 * kMin, 12.0 * kMax}, {TensorType_INT32, {1}}, true); std::vector axis = {0}; - m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({11.1294, 0.862745}, kQuantizedTolerance))); + EXPECT_THAT(m.GetOutputShape(), IsEmpty()); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({11.14}, kQuantizedTolerance))); } TEST(DynamicUint8MaxOpTest, Scalar) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14}; - MaxOpDynamicModel m({TensorType_UINT8, {}, -10.0, 12.0}, - {TensorType_UINT8, {}, -10.0, 12.0}, - {TensorType_INT32, {1}}, true); - std::vector axis = {0}; - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), IsEmpty()); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance))); + DynamicMaxOpTestScalar(); } TEST(DynamicInt8MaxOpTest, Scalar) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14}; - MaxOpDynamicModel m({TensorType_INT8, {}, -10.0, 12.0}, - {TensorType_INT8, {}, -10.0, 12.0}, - {TensorType_INT32, {1}}, true); - std::vector axis = {0}; - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), IsEmpty()); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance))); + DynamicMaxOpTestScalar(); +} + +TEST(DynamicInt16MaxOpTest, Scalar) { + DynamicMaxOpTestScalar(); } // Tests for reduce_min @@ -1221,148 +1235,162 @@ TEST(DynamicFloatMinOpTest, Scalar) { EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({9.527}))); } -TEST(ConstUint8MinOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); +template +void ConstMinOpTestNotKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MinOpConstModel m({TensorType_UINT8, {1, 3, 2}, -1.0, 1.0}, - {TensorType_UINT8, {2}, -1.0, 1.0}, {1}, {1}, false); - m.QuantizeAndPopulate(m.Input(), data); + MinOpConstModel m({tensor_type, {1, 3, 2}, 1.0 * kMin, 1.0 * kMax}, + {tensor_type, {2}, 1.0 * kMin, 1.0 * kMax}, {1}, {1}, + false); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({0.294117, 0.2}, kQuantizedTolerance))); + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({0.3, 0.2}, kQuantizedTolerance))); +} + +TEST(ConstUint8MinOpTest, NotKeepDims) { + ConstMinOpTestNotKeepDims(); } TEST(ConstInt8MinOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + ConstMinOpTestNotKeepDims(); +} + +TEST(ConstInt16MinOpTest, NotKeepDims) { + ConstMinOpTestNotKeepDims(); +} + +template +void ConstMinOpTestKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MinOpConstModel m({TensorType_INT8, {1, 3, 2}, -1.0, 1.0}, - {TensorType_INT8, {2}, -1.0, 1.0}, {1}, {1}, false); - m.QuantizeAndPopulate(m.Input(), data); + MinOpConstModel m({tensor_type, {3, 2}, 1.0 * kMin, 1.0 * kMax}, + {tensor_type, {3}, 1.0 * kMin, 1.0 * kMax}, {1}, {1}, true); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1})); EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({0.294117, 0.2}, kQuantizedTolerance))); + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance))); } TEST(ConstUint8MinOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MinOpConstModel m({TensorType_UINT8, {3, 2}, -1.0, 1.0}, - {TensorType_UINT8, {3}, -1.0, 1.0}, {1}, {1}, true); - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1})); - EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance))); + ConstMinOpTestKeepDims(); } TEST(ConstInt8MinOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - MinOpConstModel m({TensorType_INT8, {3, 2}, -1.0, 1.0}, - {TensorType_INT8, {3}, -1.0, 1.0}, {1}, {1}, true); - m.QuantizeAndPopulate(m.Input(), data); + ConstMinOpTestKeepDims(); +} + +TEST(ConstInt16MinOpTest, KeepDims) { + ConstMinOpTestKeepDims(); +} + +template +void DynamicMinOpTestNotKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-5.0, 5.0); + std::vector data = {1.3, -4.8, -3.6, 0.24}; + MinOpDynamicModel m({tensor_type, {2, 2}, 5.0 * kMin, 5.0 * kMax}, + {tensor_type, {2}, 5.0 * kMin, 5.0 * kMax}, + {TensorType_INT32, {1}}, false); + std::vector axis = {1}; + m.SetAxis(axis); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance))); + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-4.8, -3.6}, kQuantizedTolerance))); } TEST(DynamicUint8MinOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-5.0, 2.0); - std::vector data = {1.3, -4.8, -3.6, 0.24}; - MinOpDynamicModel m({TensorType_UINT8, {2, 2}, -5.0, 2.0}, - {TensorType_UINT8, {2}, -5.0, 2.0}, - {TensorType_INT32, {1}}, false); - std::vector axis = {1}; - m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); - EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({-4.807843, -3.6}, kQuantizedTolerance))); + DynamicMinOpTestNotKeepDims(); } TEST(DynamicInt8MinOpTest, NotKeepDims) { - float kQuantizedTolerance = GetTolerance(-5.0, 2.0); - std::vector data = {1.3, -4.8, -3.6, 0.24}; - MinOpDynamicModel m({TensorType_INT8, {2, 2}, -5.0, 2.0}, - {TensorType_INT8, {2}, -5.0, 2.0}, - {TensorType_INT32, {1}}, false); - std::vector axis = {1}; + DynamicMinOpTestNotKeepDims(); +} + +TEST(DynamicInt16MinOpTest, NotKeepDims) { + DynamicMinOpTestNotKeepDims(); +} + +template +void DynamicMinOpTestKeepDims() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-12.0, 12.0); + std::vector data = {11.14, -0.14, 7.423, 0.879}; + MinOpDynamicModel m({tensor_type, {2, 2}, 12.0 * kMin, 12.0 * kMax}, + {tensor_type, {2}, 12.0 * kMin, 12.0 * kMax}, + {TensorType_INT32, {1}}, true); + std::vector axis = {0}; m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({-4.807843, -3.6}, kQuantizedTolerance))); + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({7.423, -0.14}, kQuantizedTolerance))); } TEST(DynamicUint8MinOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14, -0.14, 7.423, 0.879}; - MinOpDynamicModel m({TensorType_UINT8, {2, 2}, -10.0, 12.0}, - {TensorType_UINT8, {2}, -10.0, 12.0}, - {TensorType_INT32, {1}}, true); - std::vector axis = {0}; - m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({7.427451, -0.164706}, kQuantizedTolerance))); + DynamicMinOpTestKeepDims(); } TEST(DynamicInt8MinOpTest, KeepDims) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14, -0.14, 7.423, 0.879}; - MinOpDynamicModel m({TensorType_INT8, {2, 2}, -10.0, 12.0}, - {TensorType_INT8, {2}, -10.0, 12.0}, + DynamicMinOpTestKeepDims(); +} + +TEST(DynamicInt16MinOpTest, KeepDims) { + DynamicMinOpTestKeepDims(); +} + +template +void DynamicMinOpTestScalar() { + const float kMin = -1; + const float kMax = + std::numeric_limits::max() / + static_cast(std::numeric_limits::max() + 1); + const float kQuantizedTolerance = GetTolerance(-12.0, 12.0); + std::vector data = {11.14}; + MinOpDynamicModel m({tensor_type, {}, 12.0 * kMin, 12.0 * kMax}, + {tensor_type, {}, 12.0 * kMin, 12.0 * kMax}, {TensorType_INT32, {1}}, true); std::vector axis = {0}; - m.SetAxis(axis); - m.QuantizeAndPopulate(m.Input(), data); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray( - ArrayFloatNear({7.427451, -0.164706}, kQuantizedTolerance))); + EXPECT_THAT(m.GetOutputShape(), IsEmpty()); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({11.14}, kQuantizedTolerance))); } TEST(DynamicUint8MinOpTest, Scalar) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14}; - MinOpDynamicModel m({TensorType_UINT8, {}, -10.0, 12.0}, - {TensorType_UINT8, {}, -10.0, 12.0}, - {TensorType_INT32, {1}}, true); - std::vector axis = {0}; - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), IsEmpty()); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance))); + DynamicMinOpTestScalar(); } TEST(DynamicInt8MinOpTest, Scalar) { - float kQuantizedTolerance = GetTolerance(-10.0, 12.0); - std::vector data = {11.14}; - MinOpDynamicModel m({TensorType_INT8, {}, -10.0, 12.0}, - {TensorType_INT8, {}, -10.0, 12.0}, - {TensorType_INT32, {1}}, true); - std::vector axis = {0}; - m.QuantizeAndPopulate(m.Input(), data); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), IsEmpty()); - EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance))); + DynamicMinOpTestScalar(); +} + +TEST(DynamicInt16MinOpTest, Scalar) { + DynamicMinOpTestScalar(); } // Tests for reduce_any diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 6e15f553fdb..06f3309d63c 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -218,10 +218,10 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD()); AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY()); AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS()); AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE(), diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc index c8fb46adb96..40d8731256c 100644 --- a/tensorflow/lite/kernels/register_ref.cc +++ b/tensorflow/lite/kernels/register_ref.cc @@ -374,10 +374,10 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD()); AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY()); AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS()); AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE(), diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index 72fc4eea1e7..46f817b1650 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -139,8 +139,10 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kSum, 2}, "1.15.0"}, {{OperatorType::kReduceMax, 1}, "1.11.0"}, {{OperatorType::kReduceMax, 2}, "1.14.0"}, + {{OperatorType::kReduceMax, 3}, kPendingReleaseOpVersion}, {{OperatorType::kReduceMin, 1}, "1.11.0"}, {{OperatorType::kReduceMin, 2}, "1.14.0"}, + {{OperatorType::kReduceMin, 3}, kPendingReleaseOpVersion}, {{OperatorType::kReduceProd, 1}, "1.11.0"}, {{OperatorType::kAny, 1}, "1.11.0"}, {{OperatorType::kRelu6, 1}, "1.5.0"}, diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc index a15fc27d43a..d365af1c228 100644 --- a/tensorflow/lite/tools/versioning/op_version.cc +++ b/tensorflow/lite/tools/versioning/op_version.cc @@ -547,6 +547,8 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { case BuiltinOperator_MEAN: case BuiltinOperator_PAD: case BuiltinOperator_PADV2: + case BuiltinOperator_REDUCE_MAX: + case BuiltinOperator_REDUCE_MIN: // In case of int16 inputs, the version is 3. if (op_sig.input_types.at(0) == TensorType_INT16) { return 3; @@ -573,8 +575,6 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { case BuiltinOperator_SPACE_TO_DEPTH: case BuiltinOperator_SPLIT_V: case BuiltinOperator_SUM: - case BuiltinOperator_REDUCE_MAX: - case BuiltinOperator_REDUCE_MIN: case BuiltinOperator_RELU6: case BuiltinOperator_LOG_SOFTMAX: case BuiltinOperator_TOPK_V2: diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc index f954ea6b6d2..f3f23e80cb1 100644 --- a/tensorflow/lite/tools/versioning/op_version_test.cc +++ b/tensorflow/lite/tools/versioning/op_version_test.cc @@ -306,6 +306,14 @@ TEST(OpVersionTest, VersioningSumTest) { SimpleVersioningTest(BuiltinOperator_SUM); } +TEST(OpVersionTest, VersioningReduceMinTest) { + SimpleVersioningTestExtended(BuiltinOperator_REDUCE_MIN); +} + +TEST(OpVersionTest, VersioningReduceMaxTest) { + SimpleVersioningTestExtended(BuiltinOperator_REDUCE_MAX); +} + TEST(OpVersionTest, VersioningAddTest) { SimpleVersioningTest(BuiltinOperator_ADD); } diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index cac6779c48a..17bf5713ccf 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -180,8 +180,10 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_SUM, 2}, "1.15.0"}, {{BuiltinOperator_REDUCE_MAX, 1}, "1.11.0"}, {{BuiltinOperator_REDUCE_MAX, 2}, "1.14.0"}, + {{BuiltinOperator_REDUCE_MAX, 3}, kPendingReleaseVersion}, {{BuiltinOperator_REDUCE_MIN, 1}, "1.11.0"}, {{BuiltinOperator_REDUCE_MIN, 2}, "1.14.0"}, + {{BuiltinOperator_REDUCE_MIN, 3}, kPendingReleaseVersion}, {{BuiltinOperator_REDUCE_PROD, 1}, "1.11.0"}, {{BuiltinOperator_REDUCE_ANY, 1}, "1.11.0"}, {{BuiltinOperator_RELU6, 1}, "1.5.0"}, From 43b1e7396c8c911525be29c8602336e406fee121 Mon Sep 17 00:00:00 2001 From: Thibaut Goetghebuer-Planchon Date: Wed, 23 Sep 2020 11:23:56 +0100 Subject: [PATCH 006/220] Use Register_*_REF for reduce operators in BuiltinRefOpResolver --- tensorflow/lite/kernels/register_ref.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc index 40d8731256c..8edbfc7f343 100644 --- a/tensorflow/lite/kernels/register_ref.cc +++ b/tensorflow/lite/kernels/register_ref.cc @@ -101,11 +101,11 @@ TfLiteRegistration* Register_LESS_EQUAL(); TfLiteRegistration* Register_FLOOR_REF(); TfLiteRegistration* Register_TILE(); TfLiteRegistration* Register_NEG(); -TfLiteRegistration* Register_SUM(); -TfLiteRegistration* Register_REDUCE_PROD(); -TfLiteRegistration* Register_REDUCE_MAX(); -TfLiteRegistration* Register_REDUCE_MIN(); -TfLiteRegistration* Register_REDUCE_ANY(); +TfLiteRegistration* Register_SUM_REF(); +TfLiteRegistration* Register_REDUCE_PROD_REF(); +TfLiteRegistration* Register_REDUCE_MAX_REF(); +TfLiteRegistration* Register_REDUCE_MIN_REF(); +TfLiteRegistration* Register_REDUCE_ANY_REF(); TfLiteRegistration* Register_SELECT(); TfLiteRegistration* Register_SLICE_REF(); TfLiteRegistration* Register_SIN(); @@ -368,17 +368,17 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { AddBuiltin(BuiltinOperator_TILE, Register_TILE(), /* min_version = */ 1, /* max_version = */ 2); - AddBuiltin(BuiltinOperator_SUM, Register_SUM(), + AddBuiltin(BuiltinOperator_SUM, Register_SUM_REF(), /* min_version = */ 1, /* max_version = */ 2); - AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD()); - AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(), + AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD_REF()); + AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX_REF(), /* min_version = */ 1, /* max_version = */ 3); - AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(), + AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN_REF(), /* min_version = */ 1, /* max_version = */ 3); - AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY()); + AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY_REF()); AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS()); AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE(), /* min_version = */ 1, From ecbfff1f320b0fbd28368daa27846ee62242e8a4 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Tue, 29 Sep 2020 17:17:41 -0700 Subject: [PATCH 007/220] Adding CentOS OneDNN partials and dockerfiles --- .../onednn/centos-8-devel-jupyter.Dockerfile | 107 +++++++++++++ ...tos-8-devel-mpi-horovod-jupyter.Dockerfile | 139 ++++++++++++++++ .../centos-8-devel-mpi-horovod.Dockerfile | 125 +++++++++++++++ ...s-8-devel-mpich-horovod-jupyter.Dockerfile | 138 ++++++++++++++++ .../centos-8-devel-mpich-horovod.Dockerfile | 124 +++++++++++++++ .../onednn/centos-8-devel.Dockerfile | 93 +++++++++++ .../onednn/centos-8-jupyter.Dockerfile | 72 +++++++++ .../centos-8-mpi-horovod-jupyter.Dockerfile | 113 +++++++++++++ .../onednn/centos-8-mpi-horovod.Dockerfile | 99 ++++++++++++ .../centos-8-mpich-horovod-jupyter.Dockerfile | 112 +++++++++++++ .../onednn/centos-8-mpich-horovod.Dockerfile | 98 ++++++++++++ .../dockerfiles/onednn/centos-8.Dockerfile | 58 +++++++ .../onednn/centos/bazel.partial.Dockerfile | 7 + .../onednn/centos/cpu.partial.Dockerfile | 1 + .../centos/devel-horovod.partial.Dockerfile | 4 + .../onednn/centos/devel.partial.Dockerfile | 39 +++++ .../onednn/centos/horovod.partial.Dockerfile | 13 ++ .../onednn/centos/mpi.partial.Dockerfile | 26 +++ .../onednn/centos/mpich.partial.Dockerfile | 25 +++ .../onednn/centos/python.partial.Dockerfile | 19 +++ .../onednn/centos/version.partial.Dockerfile | 1 + .../{ubuntu => }/jupyter.partial.Dockerfile | 0 tensorflow/tools/dockerfiles/spec.yml | 150 +++++++++++++++++- 23 files changed, 1560 insertions(+), 3 deletions(-) create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/bazel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/cpu.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/devel-horovod.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/horovod.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/mpi.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/mpich.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/python.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/centos/version.partial.Dockerfile rename tensorflow/tools/dockerfiles/partials/onednn/{ubuntu => }/jupyter.partial.Dockerfile (100%) diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-jupyter.Dockerfile new file mode 100644 index 00000000000..c51b7bfa625 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-jupyter.Dockerfile @@ -0,0 +1,107 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} AS base + +ARG CENTOS_VERSION=8 + +# Enable both PowerTools and EPEL otherwise some packages like hdf5-devel fail to install +RUN dnf install -y 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled PowerTools && \ + dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-"${CENTOS_VERSION}".noarch.rpm && \ + dnf clean all + +RUN yum update -y && \ + yum install -y \ + curl \ + freetype-devel \ + gcc \ + gcc-c++ \ + git \ + hdf5-devel \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-headless \ + libcurl-devel \ + make \ + pkg-config \ + rsync \ + sudo \ + unzip \ + zeromq-devel \ + zip \ + zlib-devel && \ + yum clean all + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..35494c25716 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,139 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} AS base + +ARG CENTOS_VERSION=8 + +# Enable both PowerTools and EPEL otherwise some packages like hdf5-devel fail to install +RUN dnf install -y 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled PowerTools && \ + dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-"${CENTOS_VERSION}".noarch.rpm && \ + dnf clean all + +RUN yum update -y && \ + yum install -y \ + curl \ + freetype-devel \ + gcc \ + gcc-c++ \ + git \ + hdf5-devel \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-headless \ + libcurl-devel \ + make \ + pkg-config \ + rsync \ + sudo \ + unzip \ + zeromq-devel \ + zip \ + zlib-devel && \ + yum clean all + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +RUN yum update -y && yum install -y \ + openmpi \ + openmpi-devel \ + openssh \ + openssh-server \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/openmpi/bin:${PATH}" + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi-x86_64/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..cd0f5f06e0c --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpi-horovod.Dockerfile @@ -0,0 +1,125 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} AS base + +ARG CENTOS_VERSION=8 + +# Enable both PowerTools and EPEL otherwise some packages like hdf5-devel fail to install +RUN dnf install -y 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled PowerTools && \ + dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-"${CENTOS_VERSION}".noarch.rpm && \ + dnf clean all + +RUN yum update -y && \ + yum install -y \ + curl \ + freetype-devel \ + gcc \ + gcc-c++ \ + git \ + hdf5-devel \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-headless \ + libcurl-devel \ + make \ + pkg-config \ + rsync \ + sudo \ + unzip \ + zeromq-devel \ + zip \ + zlib-devel && \ + yum clean all + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +RUN yum update -y && yum install -y \ + openmpi \ + openmpi-devel \ + openssh \ + openssh-server \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/openmpi/bin:${PATH}" + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi-x86_64/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..f4fd26edb3e --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod-jupyter.Dockerfile @@ -0,0 +1,138 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} AS base + +ARG CENTOS_VERSION=8 + +# Enable both PowerTools and EPEL otherwise some packages like hdf5-devel fail to install +RUN dnf install -y 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled PowerTools && \ + dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-"${CENTOS_VERSION}".noarch.rpm && \ + dnf clean all + +RUN yum update -y && \ + yum install -y \ + curl \ + freetype-devel \ + gcc \ + gcc-c++ \ + git \ + hdf5-devel \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-headless \ + libcurl-devel \ + make \ + pkg-config \ + rsync \ + sudo \ + unzip \ + zeromq-devel \ + zip \ + zlib-devel && \ + yum clean all + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +# install mpich, openssh for MPI to communicate between containers +RUN yum update -y && yum install -y \ + mpich \ + mpich-devel \ + openssh \ + openssh-server \ + redhat-rpm-config \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/mpich/bin:${PATH}" + +# Create a wrapper for MPICH to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Set up SSH +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod.Dockerfile new file mode 100644 index 00000000000..751c093834b --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel-mpich-horovod.Dockerfile @@ -0,0 +1,124 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} AS base + +ARG CENTOS_VERSION=8 + +# Enable both PowerTools and EPEL otherwise some packages like hdf5-devel fail to install +RUN dnf install -y 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled PowerTools && \ + dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-"${CENTOS_VERSION}".noarch.rpm && \ + dnf clean all + +RUN yum update -y && \ + yum install -y \ + curl \ + freetype-devel \ + gcc \ + gcc-c++ \ + git \ + hdf5-devel \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-headless \ + libcurl-devel \ + make \ + pkg-config \ + rsync \ + sudo \ + unzip \ + zeromq-devel \ + zip \ + zlib-devel && \ + yum clean all + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +# install mpich, openssh for MPI to communicate between containers +RUN yum update -y && yum install -y \ + mpich \ + mpich-devel \ + openssh \ + openssh-server \ + redhat-rpm-config \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/mpich/bin:${PATH}" + +# Create a wrapper for MPICH to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Set up SSH +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel.Dockerfile new file mode 100644 index 00000000000..a4d28ddb414 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-devel.Dockerfile @@ -0,0 +1,93 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} AS base + +ARG CENTOS_VERSION=8 + +# Enable both PowerTools and EPEL otherwise some packages like hdf5-devel fail to install +RUN dnf install -y 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled PowerTools && \ + dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-"${CENTOS_VERSION}".noarch.rpm && \ + dnf clean all + +RUN yum update -y && \ + yum install -y \ + curl \ + freetype-devel \ + gcc \ + gcc-c++ \ + git \ + hdf5-devel \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-headless \ + libcurl-devel \ + make \ + pkg-config \ + rsync \ + sudo \ + unzip \ + zeromq-devel \ + zip \ + zlib-devel && \ + yum clean all + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-jupyter.Dockerfile new file mode 100644 index 00000000000..5f6a898127f --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-jupyter.Dockerfile @@ -0,0 +1,72 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..e995a7391de --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,113 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +RUN yum update -y && yum install -y \ + openmpi \ + openmpi-devel \ + openssh \ + openssh-server \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/openmpi/bin:${PATH}" + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi-x86_64/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN yum update -y && yum install -y \ + gcc \ + gcc-c++ \ + python36-devel && \ + yum clean all + +RUN ${PYTHON} -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..7e853ddaf94 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpi-horovod.Dockerfile @@ -0,0 +1,99 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +RUN yum update -y && yum install -y \ + openmpi \ + openmpi-devel \ + openssh \ + openssh-server \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/openmpi/bin:${PATH}" + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi-x86_64/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN yum update -y && yum install -y \ + gcc \ + gcc-c++ \ + python36-devel && \ + yum clean all + +RUN ${PYTHON} -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..2e91c6b36ed --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod-jupyter.Dockerfile @@ -0,0 +1,112 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +# install mpich, openssh for MPI to communicate between containers +RUN yum update -y && yum install -y \ + mpich \ + mpich-devel \ + openssh \ + openssh-server \ + redhat-rpm-config \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/mpich/bin:${PATH}" + +# Create a wrapper for MPICH to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Set up SSH +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN yum update -y && yum install -y \ + gcc \ + gcc-c++ \ + python36-devel && \ + yum clean all + +RUN ${PYTHON} -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod.Dockerfile new file mode 100644 index 00000000000..50b19bfba87 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8-mpich-horovod.Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +# install mpich, openssh for MPI to communicate between containers +RUN yum update -y && yum install -y \ + mpich \ + mpich-devel \ + openssh \ + openssh-server \ + redhat-rpm-config \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/mpich/bin:${PATH}" + +# Create a wrapper for MPICH to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Set up SSH +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN yum update -y && yum install -y \ + gcc \ + gcc-c++ \ + python36-devel && \ + yum clean all + +RUN ${PYTHON} -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8.Dockerfile new file mode 100644 index 00000000000..692c83eb65d --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/centos-8.Dockerfile @@ -0,0 +1,58 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG CENTOS_VERSION=8 + +FROM centos:${CENTOS_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/bazel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/bazel.partial.Dockerfile new file mode 100644 index 00000000000..d20f1244033 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/bazel.partial.Dockerfile @@ -0,0 +1,7 @@ +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/cpu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/cpu.partial.Dockerfile new file mode 100644 index 00000000000..b66f41b7fdf --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/cpu.partial.Dockerfile @@ -0,0 +1 @@ +FROM centos:${CENTOS_VERSION} as base diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/devel-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/devel-horovod.partial.Dockerfile new file mode 100644 index 00000000000..3150c7a108b --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/devel-horovod.partial.Dockerfile @@ -0,0 +1,4 @@ +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/devel.partial.Dockerfile new file mode 100644 index 00000000000..6b86b7fca4d --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/devel.partial.Dockerfile @@ -0,0 +1,39 @@ +FROM centos:${CENTOS_VERSION} AS base + +ARG CENTOS_VERSION=8 + +# Enable both PowerTools and EPEL otherwise some packages like hdf5-devel fail to install +RUN dnf install -y 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled PowerTools && \ + dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-"${CENTOS_VERSION}".noarch.rpm && \ + dnf clean all + +RUN yum update -y && \ + yum install -y \ + curl \ + freetype-devel \ + gcc \ + gcc-c++ \ + git \ + hdf5-devel \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-headless \ + libcurl-devel \ + make \ + pkg-config \ + rsync \ + sudo \ + unzip \ + zeromq-devel \ + zip \ + zlib-devel && \ + yum clean all + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/horovod.partial.Dockerfile new file mode 100644 index 00000000000..b5fa86db28c --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/horovod.partial.Dockerfile @@ -0,0 +1,13 @@ +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN yum update -y && yum install -y \ + gcc \ + gcc-c++ \ + python36-devel && \ + yum clean all + +RUN ${PYTHON} -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/mpi.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/mpi.partial.Dockerfile new file mode 100644 index 00000000000..2439eac6898 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/mpi.partial.Dockerfile @@ -0,0 +1,26 @@ +RUN yum update -y && yum install -y \ + openmpi \ + openmpi-devel \ + openssh \ + openssh-server \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/openmpi/bin:${PATH}" + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi-x86_64/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/mpich.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/mpich.partial.Dockerfile new file mode 100644 index 00000000000..92f7ddabfce --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/mpich.partial.Dockerfile @@ -0,0 +1,25 @@ +# install mpich, openssh for MPI to communicate between containers +RUN yum update -y && yum install -y \ + mpich \ + mpich-devel \ + openssh \ + openssh-server \ + redhat-rpm-config \ + which && \ + yum clean all + +ENV PATH="/usr/lib64/mpich/bin:${PATH}" + +# Create a wrapper for MPICH to allow running as root by default +RUN mv -f $(which mpirun) /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Set up SSH +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/sshd_config | grep -v StrictHostKeyChecking > /etc/ssh/sshd_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/sshd_config.new && \ + mv -f /etc/ssh/sshd_config.new /etc/ssh/sshd_config diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/python.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/python.partial.Dockerfile new file mode 100644 index 00000000000..973348a104c --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/python.partial.Dockerfile @@ -0,0 +1,19 @@ +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN yum update -y && yum install -y \ + ${PYTHON} \ + ${PYTHON}-pip \ + which && \ + yum clean all + + +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python diff --git a/tensorflow/tools/dockerfiles/partials/onednn/centos/version.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/centos/version.partial.Dockerfile new file mode 100644 index 00000000000..d96a332301b --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/centos/version.partial.Dockerfile @@ -0,0 +1 @@ +ARG CENTOS_VERSION=8 diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/jupyter.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/jupyter.partial.Dockerfile similarity index 100% rename from tensorflow/tools/dockerfiles/partials/onednn/ubuntu/jupyter.partial.Dockerfile rename to tensorflow/tools/dockerfiles/partials/onednn/jupyter.partial.Dockerfile diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 421f8c56bd1..005d6a78432 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -37,7 +37,7 @@ releases: versioned: tag_specs: - "{_TAG_PREFIX}{ubuntu}{jupyter}" - onednn: + ubuntu-onednn: tag_specs: - "{_TAG_PREFIX}{ubuntu-onednn}" - "{_TAG_PREFIX}{ubuntu-onednn}{onednn-jupyter}" @@ -51,6 +51,21 @@ releases: - "{_TAG_PREFIX}{ubuntu-onednn-mpich-horovod}{onednn-jupyter}" - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpich-horovod}" - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpich-horovod}{onednn-jupyter}" + centos-onednn: + tag_specs: + - "{_TAG_PREFIX}{centos-onednn}" + - "{_TAG_PREFIX}{centos-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{centos-devel-onednn}" + - "{_TAG_PREFIX}{centos-devel-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{centos-onednn-mpi-horovod}" + - "{_TAG_PREFIX}{centos-onednn-mpi-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{centos-devel-onednn-mpi-horovod}" + - "{_TAG_PREFIX}{centos-devel-onednn-mpi-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{centos-onednn-mpich-horovod}" + - "{_TAG_PREFIX}{centos-onednn-mpich-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{centos-devel-onednn-mpich-horovod}" + - "{_TAG_PREFIX}{centos-devel-onednn-mpich-horovod}{onednn-jupyter}" + # Dockerfiles stored in the TF repo; not pushed anywhere dockerfiles: @@ -61,6 +76,7 @@ releases: - "{ubuntu-devel}{jupyter}" - "{ubuntu-ppc64le}{jupyter}" - "{ubuntu-devel-ppc64le}{jupyter}" + - "{ubuntu-devel-arm64v8}{jupyter}" - "{ubuntu-onednn}" - "{ubuntu-onednn}{onednn-jupyter}" - "{ubuntu-devel-onednn}" @@ -73,7 +89,18 @@ releases: - "{ubuntu-devel-onednn-mpich-horovod}" - "{ubuntu-onednn-mpich-horovod}{onednn-jupyter}" - "{ubuntu-devel-onednn-mpich-horovod}{onednn-jupyter}" - - "{ubuntu-devel-arm64v8}{jupyter}" + - "{centos-onednn}" + - "{centos-onednn}{onednn-jupyter}" + - "{centos-devel-onednn}" + - "{centos-devel-onednn}{onednn-jupyter}" + - "{centos-onednn-mpi-horovod}" + - "{centos-devel-onednn-mpi-horovod}" + - "{centos-onednn-mpi-horovod}{onednn-jupyter}" + - "{centos-devel-onednn-mpi-horovod}{onednn-jupyter}" + - "{centos-onednn-mpich-horovod}" + - "{centos-devel-onednn-mpich-horovod}" + - "{centos-onednn-mpich-horovod}{onednn-jupyter}" + - "{centos-devel-onednn-mpich-horovod}{onednn-jupyter}" slice_sets: @@ -87,7 +114,7 @@ slice_sets: - add_to_name: "" - add_to_name: "-jupyter" partials: - - onednn/ubuntu/jupyter + - onednn/jupyter ubuntu: - add_to_name: "" @@ -470,6 +497,123 @@ slice_sets: - CHECKOUT_TF_SRC=1 - TF_BRANCH=master + centos-onednn: + - add_to_name: "-8" + dockerfile_exclusive_name: "centos-8" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/cpu + - onednn/centos/python + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - CENTOS_VERSION=8 + - PYTHON=python3 + + centos-devel-onednn: + - add_to_name: "-8-devel" + dockerfile_exclusive_name: "centos-8-devel" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/devel + - onednn/centos/python + - onednn/centos/bazel + - shell + tests: + - "" + args: + - CENTOS_VERSION=8 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - PYTHON=python3 + + centos-onednn-mpi-horovod: + - add_to_name: "-8-mpi-horovod" + dockerfile_exclusive_name: "centos-8-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/cpu + - onednn/centos/python + - tensorflow + - onednn/centos/mpi + - onednn/centos/horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - CENTOS_VERSION=8 + - PYTHON=python3 + - HOROVOD_VERSION=0.19.5 + - TF_PACKAGE=intel-tensorflow + + centos-devel-onednn-mpi-horovod: + - add_to_name: "-8-devel-mpi-horovod" + dockerfile_exclusive_name: "centos-8-devel-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/devel + - onednn/centos/python + - onednn/centos/bazel + - onednn/centos/mpi + - onednn/centos/devel-horovod + - shell + tests: + - "" + args: + - CENTOS_VERSION=8 + - PYTHON=python3 + - CHECKOUT_TF_SRC=1 + - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master + + centos-onednn-mpich-horovod: + - add_to_name: "-8-mpich-horovod" + dockerfile_exclusive_name: "centos-8-mpich-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/cpu + - onednn/centos/python + - tensorflow + - onednn/centos/mpich + - onednn/centos/horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - CENTOS_VERSION=8 + - PYTHON=python3 + - HOROVOD_VERSION=0.19.5 + - TF_PACKAGE=intel-tensorflow + + centos-devel-onednn-mpich-horovod: + - add_to_name: "-8-devel-mpich-horovod" + dockerfile_exclusive_name: "centos-8-devel-mpich-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/devel + - onednn/centos/python + - onednn/centos/bazel + - onednn/centos/mpich + - onednn/centos/devel-horovod + - shell + tests: + - "" + args: + - CENTOS_VERSION=8 + - PYTHON=python3 + - CHECKOUT_TF_SRC=1 + - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master + ubuntu-ppc64le: - add_to_name: "-ppc64le" dockerfile_exclusive_name: "cpu-ppc64le" From 0b56f9ef6dab451f4f0fc8be78f0020a1df78c8c Mon Sep 17 00:00:00 2001 From: Koan-Sin Tan Date: Tue, 6 Oct 2020 23:00:04 +0800 Subject: [PATCH 008/220] fix grappler/costs:op_performance_data dependency 1. when tensorflow is used as third party, there is a dependency problem when building on macOS. E.g., https://github.com/mlperf/mobile_app/issues/90 2. op_performance_data.proto doesn't really need all the tf_additional_all_protos() --- tensorflow/core/grappler/costs/BUILD | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index b205f5c3e56..b64f4c7dc94 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -31,7 +31,13 @@ tf_proto_library( srcs = ["op_performance_data.proto"], cc_api_version = 2, make_default_target_header_only = True, - protodeps = tf_additional_all_protos(), + protodeps = [ + "//tensorflow/core/framework:attr_value_proto", + "//tensorflow/core/framework:resource_handle_proto", + "//tensorflow/core/framework:tensor_proto", + "//tensorflow/core/framework:tensor_shape_proto", + "//tensorflow/core/protobuf:for_core_protos", + ], visibility = ["//visibility:public"], ) From 5f1adf36f5752e18dae49bb77db88918abc4abdc Mon Sep 17 00:00:00 2001 From: Thibaut Goetghebuer-Planchon Date: Wed, 7 Oct 2020 08:45:30 +0100 Subject: [PATCH 009/220] Remove unnecessary version increment of kReduceMin and kReduceMax in deprecated file --- tensorflow/lite/toco/tflite/op_version.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index 46f817b1650..72fc4eea1e7 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -139,10 +139,8 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kSum, 2}, "1.15.0"}, {{OperatorType::kReduceMax, 1}, "1.11.0"}, {{OperatorType::kReduceMax, 2}, "1.14.0"}, - {{OperatorType::kReduceMax, 3}, kPendingReleaseOpVersion}, {{OperatorType::kReduceMin, 1}, "1.11.0"}, {{OperatorType::kReduceMin, 2}, "1.14.0"}, - {{OperatorType::kReduceMin, 3}, kPendingReleaseOpVersion}, {{OperatorType::kReduceProd, 1}, "1.11.0"}, {{OperatorType::kAny, 1}, "1.11.0"}, {{OperatorType::kRelu6, 1}, "1.5.0"}, From 2ef80f22951331b80347dfe1ffe84fb1be1b62f4 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Fri, 9 Oct 2020 11:28:09 +0100 Subject: [PATCH 010/220] Use TensorShape.assert_is_compatible_with instead of merge_with --- tensorflow/python/framework/sparse_tensor.py | 4 ++-- tensorflow/python/ops/clip_ops.py | 6 +++--- tensorflow/python/ops/functional_ops.py | 2 +- tensorflow/python/ops/map_fn.py | 2 +- tensorflow/python/ops/nn_impl.py | 6 +++--- tensorflow/python/ops/rnn.py | 6 +++--- tensorflow/python/ops/sparse_ops.py | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py index 5704563a92e..e7e8ea33b01 100644 --- a/tensorflow/python/framework/sparse_tensor.py +++ b/tensorflow/python/framework/sparse_tensor.py @@ -146,10 +146,10 @@ class SparseTensor(internal.NativeObject, composite_tensor.CompositeTensor): dense_shape_shape = dense_shape.shape.with_rank(1) # Assert number of rows in indices match the number of elements in values. - indices_shape.dims[0].merge_with(values_shape.dims[0]) + indices_shape.dims[0].assert_is_compatible_with(values_shape.dims[0]) # Assert number of columns in indices matches the number of elements in # dense_shape. - indices_shape.dims[1].merge_with(dense_shape_shape.dims[0]) + indices_shape.dims[1].assert_is_compatible_with(dense_shape_shape.dims[0]) def get_shape(self): """Get the `TensorShape` representing the shape of the dense tensor. diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py index 1045ff692ea..3a42ae0ff45 100644 --- a/tensorflow/python/ops/clip_ops.py +++ b/tensorflow/python/ops/clip_ops.py @@ -111,10 +111,10 @@ def clip_by_value(t, clip_value_min, clip_value_max, t_min = math_ops.minimum(values, clip_value_max) # Assert that the shape is compatible with the initial shape, # to prevent unintentional broadcasting. - _ = values.shape.merge_with(t_min.shape) + values.shape.assert_is_compatible_with(t_min.shape) t_max = math_ops.maximum(t_min, clip_value_min, name=name) - _ = values.shape.merge_with(t_max.shape) + values.shape.assert_is_compatible_with(t_max.shape) if isinstance(t, ops.IndexedSlices): t_max = ops.IndexedSlices(t_max, t.indices, t.dense_shape) @@ -225,7 +225,7 @@ def clip_by_norm(t, clip_norm, axes=None, name=None): intermediate = values * clip_norm # Assert that the shape is compatible with the initial shape, # to prevent unintentional broadcasting. - _ = values.shape.merge_with(intermediate.shape) + values.shape.assert_is_compatible_with(intermediate.shape) values_clip = array_ops.identity( intermediate / math_ops.maximum(l2norm, clip_norm), name=name) diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index b51d1baa6c0..bdd20cda991 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -675,7 +675,7 @@ def scan(fn, tensor_shape.dimension_value( elems_flat[0].get_shape().with_rank_at_least(1)[0])) for elem in elems_flat[1:]: - n_static.merge_with( + n_static.assert_is_compatible_with( tensor_shape.Dimension( tensor_shape.dimension_value( elem.get_shape().with_rank_at_least(1)[0]))) diff --git a/tensorflow/python/ops/map_fn.py b/tensorflow/python/ops/map_fn.py index edea769f663..af681592e67 100644 --- a/tensorflow/python/ops/map_fn.py +++ b/tensorflow/python/ops/map_fn.py @@ -445,7 +445,7 @@ def map_fn(fn, tensor_shape.dimension_value( elems_batchable[0].get_shape().with_rank_at_least(1)[0])) for tensor in elems_batchable[1:]: - n_static.merge_with( + n_static.assert_is_compatible_with( tensor_shape.Dimension( tensor_shape.dimension_value( tensor.get_shape().with_rank_at_least(1)[0]))) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index d22fbf3fa4e..5ec95b6646d 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -88,7 +88,7 @@ def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None): log_input = ops.convert_to_tensor(log_input, name="log_input") targets = ops.convert_to_tensor(targets, name="targets") try: - targets.get_shape().merge_with(log_input.get_shape()) + targets.get_shape().assert_is_compatible_with(log_input.get_shape()) except ValueError: raise ValueError( "log_input and targets must have the same shape (%s vs %s)" % @@ -168,7 +168,7 @@ def sigmoid_cross_entropy_with_logits( # pylint: disable=invalid-name logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: - labels.get_shape().merge_with(logits.get_shape()) + labels.get_shape().assert_is_compatible_with(logits.get_shape()) except ValueError: raise ValueError("logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) @@ -304,7 +304,7 @@ def weighted_cross_entropy_with_logits_v2(labels, logits, pos_weight, logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: - labels.get_shape().merge_with(logits.get_shape()) + labels.get_shape().assert_is_compatible_with(logits.get_shape()) except ValueError: raise ValueError("logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 6c11ebefb1c..32dc9e38cb0 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -318,7 +318,7 @@ def _reverse_seq(input_seq, lengths): for sequence in zip(*flat_input_seq): input_shape = tensor_shape.unknown_shape(rank=sequence[0].get_shape().rank) for input_ in sequence: - input_shape.merge_with(input_.get_shape()) + input_shape.assert_is_compatible_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) @@ -1112,7 +1112,7 @@ def raw_rnn(cell, for input_shape_i in input_shape: # Static verification that batch sizes all match - static_batch_size.merge_with( + static_batch_size.assert_is_compatible_with( tensor_shape.dimension_at_index(input_shape_i, 0)) batch_size = tensor_shape.dimension_value(static_batch_size) @@ -1339,7 +1339,7 @@ def static_rnn(cell, input_shape = flat_input.get_shape().with_rank_at_least(2) batch_size, input_size = tensor_shape.dimension_at_index( input_shape, 0), input_shape[1:] - fixed_batch_size.merge_with(batch_size) + fixed_batch_size.assert_is_compatible_with(batch_size) for i, size in enumerate(input_size.dims): if tensor_shape.dimension_value(size) is None: raise ValueError( diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 18b7561b113..3e3751e3ca6 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -1905,7 +1905,7 @@ def sparse_retain(sp_input, to_retain): retain_shape = to_retain.get_shape() retain_shape.assert_has_rank(1) if sp_input.values.get_shape().dims is not None: - sp_input.values.get_shape().dims[0].merge_with( + sp_input.values.get_shape().dims[0].assert_is_compatible_with( tensor_shape.dimension_at_index(retain_shape, 0)) where_true = array_ops.reshape(array_ops.where_v2(to_retain), [-1]) @@ -1993,7 +1993,7 @@ def sparse_reset_shape(sp_input, new_shape=None): # For cases when shape is known during graph construction, this catches the # error before the sparse_tensor.SparseTensor catches it. if output_shape_tensor.get_shape().rank is not None: - output_shape_tensor.get_shape().dims[0].merge_with( + output_shape_tensor.get_shape().dims[0].assert_is_compatible_with( in_shape.get_shape().dims[0]) output_shape_tensor_const = tensor_util.constant_value(output_shape_tensor) From 10b6814c36f718d18f5f215824a9fb20397b39f6 Mon Sep 17 00:00:00 2001 From: xiaohong1031 Date: Wed, 14 Oct 2020 15:42:14 -0700 Subject: [PATCH 011/220] rebase after native format PR being merged --- .../kernels/mkl/mkl_fused_batch_norm_op.cc | 192 +++++++++++++----- 1 file changed, 141 insertions(+), 51 deletions(-) diff --git a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc index b40748b7522..8ef18ae9356 100644 --- a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc @@ -117,6 +117,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { context_.ws_mem->set_data_handle(workspace_data); } #endif // ENABLE_MKLDNN_THREADPOOL + // Execute batch-normalization forward primitives. execute_primitives(context_.fwd_primitives, fwd_stream, context_.net_args); @@ -166,6 +167,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { // BatchNorm forward primitive. std::shared_ptr bn_fwd; std::vector fwd_primitives; + std::vector> net_args; BatchNormFwdContext() @@ -459,7 +461,6 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); #endif // ENABLE_MKLDNN_THREADPOOL - // Execute backward batch-normalization primitives. DCHECK_EQ(context_.bwd_primitives.size(), context_.net_args.size()); execute_primitives(context_.bwd_primitives, bwd_stream, context_.net_args); @@ -502,6 +503,7 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { // Backward batch-normalization primitive. std::shared_ptr bn_bwd; std::vector bwd_primitives; + std::vector> net_args; BatchNormBwdContext() @@ -633,7 +635,7 @@ class MklFusedBatchNormBwdPrimitiveFactory : public MklPrimitiveFactory { // with MKL. This is different from default where the classes are // derived. Moves enabling to compile-time rather than runtime. template + bool is_batch_norm_ex = false, bool native_format = false> class MklFusedBatchNormOp : public OpKernel { public: explicit MklFusedBatchNormOp(OpKernelConstruction* context) @@ -688,7 +690,7 @@ class MklFusedBatchNormOp : public OpKernel { TensorShape tf_shape_src; MklDnnShape dnn_shape_src; - GetMklShape(context, kSrcIndex, &dnn_shape_src); + GetMklShape(context, kSrcIndex, &dnn_shape_src, native_format); if (dnn_shape_src.IsMklTensor()) { tf_shape_src = dnn_shape_src.GetTfShape(); @@ -773,6 +775,7 @@ class MklFusedBatchNormOp : public OpKernel { MklBatchNormFwdParams fwdParams(src_dims, depth_, epsilon_, is_training_, src_md, activation_mode_); + // Get forward batch-normalization op from the primitive caching pool. MklFusedBatchNormFwdPrimitive* bn_fwd = MklFusedBatchNormFwdPrimitiveFactory::Get(fwdParams); @@ -780,9 +783,9 @@ class MklFusedBatchNormOp : public OpKernel { // Allocate workspace tensor U* ws_data = nullptr; if (fwdParams.activation_mode == FusedBNActivationMode::kRelu) { - memory::desc workspace_pd = + memory::desc workspace_md = bn_fwd->GetBatchNormFwdPd()->workspace_desc(); - size_t workspace_bytes = workspace_pd.get_size(); + size_t workspace_bytes = workspace_md.get_size(); workspace_tf_shape.AddDim(workspace_bytes); AllocateTFOutputs(context, scale_tensor.shape(), workspace_tf_shape, @@ -790,7 +793,7 @@ class MklFusedBatchNormOp : public OpKernel { &saved_mean_tensor, &saved_variance_tensor, &reserved_space_tensor); if (reserved_space) { - wksp.SetUsrMem(workspace_pd, reserved_space_tensor); + wksp.SetUsrMem(workspace_md, reserved_space_tensor); ws_data = static_cast(wksp.GetOpMem().get_data_handle()); } } else { @@ -831,7 +834,7 @@ class MklFusedBatchNormOp : public OpKernel { // Check if reorder is needed for src. const T* src_data = nullptr; std::shared_ptr bn_fwd_pd = bn_fwd->GetBatchNormFwdPd(); - if (src_md != bn_fwd_pd->src_desc()) { + if (!native_format && src_md != bn_fwd_pd->src_desc()) { src.SetUsrMem(src_md, &src_tensor); src.CheckReorderToOpMem(bn_fwd_pd->src_desc(), cpu_engine_, context); src_data = static_cast(src.GetOpMem().get_data_handle()); @@ -839,7 +842,7 @@ class MklFusedBatchNormOp : public OpKernel { src_data = static_cast(const_cast(src_tensor.flat().data())); } - // Allocate output (dst) tensor; always set it as MKL-DNN layout + // Allocate output (dst) tensor MklDnnShape dnn_shape_dst; TensorShape tf_shape_dst; dnn_shape_dst.SetMklTensor(true); @@ -850,8 +853,11 @@ class MklFusedBatchNormOp : public OpKernel { : src_tensor.shape().dims(); dnn_shape_dst.SetTfLayout(ndims, src_dims, mkl_tensor_fmt); tf_shape_dst.AddDim(dst_pd.get_size() / sizeof(T)); + if (native_format) { + tf_shape_dst = dnn_shape_dst.GetTfShape(); + } AllocateOutputSetMklShape(context, kDstIndex, &dst_tensor, tf_shape_dst, - dnn_shape_dst); + dnn_shape_dst, native_format); U* weights_op_data = weights_data; U* mean_op_data = saved_mean_tensor->flat().data(); @@ -939,7 +945,7 @@ class MklFusedBatchNormOp : public OpKernel { MklDnnShape dnn_shape_dst; dnn_shape_dst.SetMklTensor(false); AllocateOutputSetMklShape(context, kDstIndex, dst_tensor, tf_shape_src, - dnn_shape_dst); + dnn_shape_dst, native_format); DCHECK(*dst_tensor); memset(const_cast((*dst_tensor)->tensor_data().data()), 0, (*dst_tensor)->tensor_data().size()); @@ -977,7 +983,8 @@ class MklFusedBatchNormOp : public OpKernel { MklDnnShape mkl_shape_batch_mean; mkl_shape_batch_mean.SetMklTensor(false); AllocateOutputSetMklShape(context, kBatchMeanIndex, batch_mean_tensor, - tf_shape_scale, mkl_shape_batch_mean); + tf_shape_scale, mkl_shape_batch_mean, + native_format); DCHECK(*batch_mean_tensor); // Set NAN mean value in case of empty input tensor @@ -990,7 +997,7 @@ class MklFusedBatchNormOp : public OpKernel { mkl_shape_batch_variance.SetMklTensor(false); AllocateOutputSetMklShape(context, kBatchVarianceIndex, batch_variance_tensor, tf_shape_scale, - mkl_shape_batch_variance); + mkl_shape_batch_variance, native_format); DCHECK(*batch_variance_tensor); // Set NAN variance value in case of empty input tensor @@ -1001,7 +1008,8 @@ class MklFusedBatchNormOp : public OpKernel { MklDnnShape mkl_shape_saved_mean; mkl_shape_saved_mean.SetMklTensor(false); AllocateOutputSetMklShape(context, kSavedMeanIndex, saved_mean_tensor, - tf_shape_scale, mkl_shape_saved_mean); + tf_shape_scale, mkl_shape_saved_mean, + native_format); DCHECK(*saved_mean_tensor); // Set 0 mean value in case of empty input tensor @@ -1012,7 +1020,7 @@ class MklFusedBatchNormOp : public OpKernel { mkl_shape_saved_variance.SetMklTensor(false); AllocateOutputSetMklShape(context, kSavedVarianceIndex, saved_variance_tensor, tf_shape_scale, - mkl_shape_saved_variance); + mkl_shape_saved_variance, native_format); DCHECK(*saved_variance_tensor); // Set 0 variance value in case of empty input tensor @@ -1027,13 +1035,14 @@ class MklFusedBatchNormOp : public OpKernel { mkl_shape_reserved_space.SetMklTensor(false); AllocateOutputSetMklShape(context, kReservedSpaceIndex, reserved_space_tensor, workspace_tf_shape, - mkl_shape_reserved_space); + mkl_shape_reserved_space, native_format); DCHECK((*reserved_space_tensor) != nullptr); } } }; -template +template class MklFusedBatchNormGradOp : public OpKernel { public: explicit MklFusedBatchNormGradOp(OpKernelConstruction* context) @@ -1069,8 +1078,8 @@ class MklFusedBatchNormGradOp : public OpKernel { : Tensor(); MklDnnShape dnn_shape_src, dnn_shape_diff_dst; - GetMklShape(context, kSrcIndex, &dnn_shape_src); - GetMklShape(context, kDiffDstIndex, &dnn_shape_diff_dst); + GetMklShape(context, kSrcIndex, &dnn_shape_src, native_format); + GetMklShape(context, kDiffDstIndex, &dnn_shape_diff_dst, native_format); TensorShape tf_shape_src, tf_shape_diff_dst; if (dnn_shape_diff_dst.IsMklTensor()) { @@ -1169,6 +1178,33 @@ class MklFusedBatchNormGradOp : public OpKernel { ? dnn_shape_diff_dst.GetMklLayout() : memory::desc(diff_dst_dims, MklDnnType(), dnn_fmt); + MklDnnData reorder_src(&cpu_engine_); + MklDnnData reorder_diff_dst(&cpu_engine_); + T* diff_dst_data = + static_cast(const_cast(diff_dst_tensor.flat().data())); + T* src_data = + static_cast(const_cast(src_tensor.flat().data())); + + if (!native_format) { + // MKL-DNN requires src and diff_dst to be in same memory layout, either + // blocked or native format. If these inputs are in different formats, + // convert the one in native format to blocked format as MKL-DNN gives + // better performance for blocked format. + if (dnn_shape_src.IsMklTensor() && !dnn_shape_diff_dst.IsMklTensor()) { + reorder_diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + reorder_diff_dst.CheckReorderToOpMem(src_md, cpu_engine_, context); + diff_dst_md = src_md; + diff_dst_data = + static_cast(reorder_diff_dst.GetOpMem().get_data_handle()); + } else if (!dnn_shape_src.IsMklTensor() && + dnn_shape_diff_dst.IsMklTensor()) { + reorder_src.SetUsrMem(src_md, &src_tensor); + reorder_src.CheckReorderToOpMem(diff_dst_md, cpu_engine_, context); + src_md = diff_dst_md; + src_data = static_cast(reorder_src.GetOpMem().get_data_handle()); + } + } + // weights -- MKL DNN packs scales/ shifts as weights in order // of scale, ..., scale, shift, ...., shift weights.AllocateBuffer(2 * depth_ * sizeof(U)); @@ -1186,18 +1222,19 @@ class MklFusedBatchNormGradOp : public OpKernel { MklFusedBatchNormBwdPrimitive* bn_bwd = MklFusedBatchNormBwdPrimitiveFactory::Get(bwdParams); - const T* src_data = src_tensor.flat().data(); - const T* diff_dst_data = diff_dst_tensor.flat().data(); // Check if diff_dst input needs to be reordered std::shared_ptr bn_bwd_pd = bn_bwd->GetBatchNormBwdPd(); - if (diff_dst_md != bn_bwd_pd->diff_dst_desc()) { - diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + if (!native_format && diff_dst_md != bn_bwd_pd->diff_dst_desc()) { + diff_dst.SetUsrMem(diff_dst_md, diff_dst_data); diff_dst.CheckReorderToOpMem(bn_bwd_pd->diff_dst_desc(), cpu_engine_, context); diff_dst_data = static_cast(diff_dst.GetOpMem().get_data_handle()); - } else { - diff_dst_data = - static_cast(const_cast(diff_dst_tensor.flat().data())); + } + + if (!native_format && (src_md != bn_bwd_pd->src_desc()) { + src.SetUsrMem(src_md, src_data); + src.CheckReorderToOpMem(bn_bwd_pd->src_desc(), cpu_engine_, context); + src_data = static_cast(src.GetOpMem().get_data_handle()); } // Indices of output tensors @@ -1213,8 +1250,12 @@ class MklFusedBatchNormGradOp : public OpKernel { dnn_shape_diff_src.SetTfLayout(src_dims.size(), src_dims, mkl_tensor_fmt); dnn_shape_diff_src.SetTfDimOrder(src_dims.size(), tensor_format_); tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); + if (native_format) { + tf_shape_diff_src = dnn_shape_diff_src.GetTfShape(); + } AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, - tf_shape_diff_src, dnn_shape_diff_src); + tf_shape_diff_src, dnn_shape_diff_src, + native_format); U* mean_data = static_cast(const_cast(saved_mean_tensor.flat().data())); @@ -1280,7 +1321,7 @@ class MklFusedBatchNormGradOp : public OpKernel { MklDnnShape dnn_shape_diff_src; dnn_shape_diff_src.SetMklTensor(false); AllocateOutputSetMklShape(context, kDiffSrcIndex, diff_src_tensor, - tf_shape_src, dnn_shape_diff_src); + tf_shape_src, dnn_shape_diff_src, native_format); auto diff_src_data = (*diff_src_tensor)->flat().data(); std::fill_n(diff_src_data, (*diff_src_tensor)->shape().num_elements(), static_cast(0)); @@ -1307,7 +1348,8 @@ class MklFusedBatchNormGradOp : public OpKernel { MklDnnShape mkl_shape_diff_scale; mkl_shape_diff_scale.SetMklTensor(false); AllocateOutputSetMklShape(context, kDiffScaleIndex, diff_scale_tensor, - tf_shape_scale_shift, mkl_shape_diff_scale); + tf_shape_scale_shift, mkl_shape_diff_scale, + native_format); DCHECK(*diff_scale_tensor); auto diff_scale_data = (*diff_scale_tensor)->flat().data(); @@ -1317,7 +1359,8 @@ class MklFusedBatchNormGradOp : public OpKernel { MklDnnShape mkl_shape_diff_shift; mkl_shape_diff_shift.SetMklTensor(false); AllocateOutputSetMklShape(context, kDiffShiftIndex, diff_shift_tensor, - tf_shape_scale_shift, mkl_shape_diff_shift); + tf_shape_scale_shift, mkl_shape_diff_shift, + native_format); DCHECK(*diff_shift_tensor); auto diff_shift_data = (*diff_shift_tensor)->flat().data(); @@ -1330,11 +1373,11 @@ class MklFusedBatchNormGradOp : public OpKernel { MklDnnShape mkl_shape_p; mkl_shape_p.SetMklTensor(false); AllocateOutputSetMklShape(context, kP1Index, &p1_tensor, TensorShape({}), - mkl_shape_p); + mkl_shape_p, native_format); std::fill_n(p1_tensor->flat().data(), p1_tensor->shape().num_elements(), static_cast(0)); AllocateOutputSetMklShape(context, kP2Index, &p2_tensor, TensorShape({}), - mkl_shape_p); + mkl_shape_p, native_format); std::fill_n(p2_tensor->flat().data(), p2_tensor->shape().num_elements(), static_cast(0)); } @@ -1348,7 +1391,13 @@ class MklFusedBatchNormGradOp : public OpKernel { .Device(DEVICE_CPU) \ .TypeConstraint("T") \ .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedBatchNormOp); + MklFusedBatchNormOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedBatchNorm") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedBatchNormOp); TF_CALL_float(REGISTER_MKL_FUSED_BATCHNORM_CPU); TF_CALL_bfloat16(REGISTER_MKL_FUSED_BATCHNORM_CPU); @@ -1361,7 +1410,14 @@ TF_CALL_bfloat16(REGISTER_MKL_FUSED_BATCHNORM_CPU); .TypeConstraint("T") \ .TypeConstraint("U") \ .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedBatchNormOp); + MklFusedBatchNormOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedBatchNormV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("U") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedBatchNormOp); REGISTER_MKL_FUSED_BATCHNORM_V2_CPU(float, float); REGISTER_MKL_FUSED_BATCHNORM_V2_CPU(bfloat16, float); @@ -1373,7 +1429,13 @@ REGISTER_MKL_FUSED_BATCHNORM_V2_CPU(bfloat16, float); .Device(DEVICE_CPU) \ .TypeConstraint("T") \ .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedBatchNormGradOp); + MklFusedBatchNormGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedBatchNormGrad") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedBatchNormGradOp); TF_CALL_float(REGISTER_MKL_FUSED_BATCHNORM_GRAD_CPU); TF_CALL_bfloat16(REGISTER_MKL_FUSED_BATCHNORM_GRAD_CPU); @@ -1386,7 +1448,14 @@ TF_CALL_bfloat16(REGISTER_MKL_FUSED_BATCHNORM_GRAD_CPU); .TypeConstraint("T") \ .TypeConstraint("U") \ .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedBatchNormGradOp); + MklFusedBatchNormGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedBatchNormGradV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("U") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedBatchNormGradOp); REGISTER_MKL_FUSED_BATCHNORM_GRAD_V2_CPU(float, float); REGISTER_MKL_FUSED_BATCHNORM_GRAD_V2_CPU(bfloat16, float); @@ -1395,21 +1464,35 @@ REGISTER_MKL_FUSED_BATCHNORM_GRAD_V2_CPU(bfloat16, float); // TODO: FusedBatchNormV3 has an additional output that is used to // hold intermediate results. This parameter functionality is // not implemented on CPU. -#define REGISTER_MKL_FUSED_BATCHNORM_V3_CPU(T, U) \ - REGISTER_KERNEL_BUILDER( \ - Name("_MklFusedBatchNormV3") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("U") \ - .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedBatchNormOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("_MklFusedBatchNormEx") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("U") \ - .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedBatchNormOp); +#define REGISTER_MKL_FUSED_BATCHNORM_V3_CPU(T, U) \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklFusedBatchNormV3") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("U") \ + .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ + MklFusedBatchNormOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklFusedBatchNormEx") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("U") \ + .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ + MklFusedBatchNormOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedBatchNormV3") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("U") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedBatchNormOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedBatchNormEx") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("U") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedBatchNormOp); REGISTER_MKL_FUSED_BATCHNORM_V3_CPU(float, float); REGISTER_MKL_FUSED_BATCHNORM_V3_CPU(bfloat16, float); @@ -1433,7 +1516,14 @@ REGISTER_KERNEL_BUILDER(Name("_FusedBatchNormEx") .TypeConstraint("T") \ .TypeConstraint("U") \ .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedBatchNormGradOp); + MklFusedBatchNormGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedBatchNormGradV3") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("U") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedBatchNormGradOp); REGISTER_MKL_FUSED_BATCHNORM_GRAD_V3_CPU(float, float); REGISTER_MKL_FUSED_BATCHNORM_GRAD_V3_CPU(bfloat16, float); From 3d801274d6090492467c3ca850879852579ff786 Mon Sep 17 00:00:00 2001 From: xiaohong1031 Date: Thu, 15 Oct 2020 13:57:44 -0700 Subject: [PATCH 012/220] fix an issue introduced during address a merge conflict --- tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc index 8ef18ae9356..d5c59f1372b 100644 --- a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc @@ -1231,7 +1231,7 @@ class MklFusedBatchNormGradOp : public OpKernel { diff_dst_data = static_cast(diff_dst.GetOpMem().get_data_handle()); } - if (!native_format && (src_md != bn_bwd_pd->src_desc()) { + if (!native_format && (src_md != bn_bwd_pd->src_desc())) { src.SetUsrMem(src_md, src_data); src.CheckReorderToOpMem(bn_bwd_pd->src_desc(), cpu_engine_, context); src_data = static_cast(src.GetOpMem().get_data_handle()); From 04ff97cbb64909507d08273bf3e2499b46d5988a Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Thu, 29 Oct 2020 04:39:35 +0300 Subject: [PATCH 013/220] fix typos in lite directory --- tensorflow/lite/c/common.h | 2 +- tensorflow/lite/core/subgraph.cc | 4 ++-- tensorflow/lite/delegates/delegate_test.cc | 4 ++-- tensorflow/lite/delegates/gpu/api.h | 2 +- tensorflow/lite/delegates/gpu/gl/node_shader.h | 8 ++++---- tensorflow/lite/delegates/nnapi/nnapi_delegate.cc | 4 ++-- .../lite/delegates/utils/dummy_delegate/README.md | 2 +- .../experimental/writer/option_writer_generator.cc | 2 +- tensorflow/lite/g3doc/guide/build_arm64.md | 2 +- tensorflow/lite/g3doc/guide/build_rpi.md | 4 ++-- tensorflow/lite/g3doc/guide/ops_select.md | 2 +- .../task_library/customized_task_api.md | 4 ++-- tensorflow/lite/g3doc/performance/gpu.md | 8 ++++---- .../model_maker_image_classification.ipynb | 12 ++++++------ .../tutorials/model_maker_question_answer.ipynb | 2 +- .../model_maker_text_classification.ipynb | 2 +- tensorflow/lite/java/BUILD | 6 +++--- tensorflow/lite/kernels/transpose_conv.cc | 6 +++--- tensorflow/lite/kernels/variable_ops_test.cc | 2 +- .../lite/micro/examples/magic_wand/train/README.md | 2 +- .../lite/micro/examples/micro_speech/README.md | 4 ++-- .../lite/micro/examples/micro_speech/esp/ringbuf.c | 2 +- .../micro_features/no_feature_data_slice.h | 2 +- .../micro_features/yes_feature_data_slice.h | 2 +- .../simple_features/no_power_spectrum_data.h | 2 +- .../simple_features/simple_features_generator.cc | 2 +- .../simple_features/yes_power_spectrum_data.h | 2 +- .../lite/micro/examples/person_detection/README.md | 2 +- tensorflow/lite/micro/kernels/kernel_runner.h | 12 ++++++------ .../lite/micro/kernels/xtensa_hifimini/softmax.cc | 6 +++--- tensorflow/lite/micro/tools/make/Makefile | 6 +++--- .../make/templates/arc/README_ARC_EMSDP.md.tpl | 2 +- .../lite/profiling/profile_summary_formatter.h | 2 +- tensorflow/lite/python/optimize/calibrator.py | 2 +- .../tools/benchmark/experimental/c/c_api_types.h | 2 +- .../lite/tools/optimize/operator_property.cc | 14 +++++++------- tensorflow/lite/tools/versioning/op_version.cc | 2 +- 37 files changed, 73 insertions(+), 73 deletions(-) diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index e04e1a12cd4..389a08528f1 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -80,7 +80,7 @@ struct TfLiteRegistration; // An external context is a collection of information unrelated to the TF Lite // framework, but useful to a subset of the ops. TF Lite knows very little -// about about the actual contexts, but it keeps a list of them, and is able to +// about the actual contexts, but it keeps a list of them, and is able to // refresh them if configurations like the number of recommended threads // change. typedef struct TfLiteExternalContext { diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index 20d68590740..1deda07d397 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -219,7 +219,7 @@ Subgraph::Subgraph(ErrorReporter* error_reporter, // Reserve some space for the tensors to avoid excessive resizing. tensors_.reserve(kTensorsReservedCapacity); nodes_and_registration().reserve(kTensorsReservedCapacity); - // Invalid to call these these except from TfLiteDelegate + // Invalid to call these except from TfLiteDelegate SwitchToKernelContext(); } @@ -311,7 +311,7 @@ TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate, // Use `char*` for conveniently step through the allocated space by bytes. char* allocation = static_cast(malloc(allocation_size)); - // Step 3: Fill all data structures structures. + // Step 3: Fill all data structures. TfLiteDelegateParams* params = reinterpret_cast(allocation); params->delegate = delegate; diff --git a/tensorflow/lite/delegates/delegate_test.cc b/tensorflow/lite/delegates/delegate_test.cc index a51d5bc431a..857a94b67b4 100644 --- a/tensorflow/lite/delegates/delegate_test.cc +++ b/tensorflow/lite/delegates/delegate_test.cc @@ -196,7 +196,7 @@ class TestDelegate : public ::testing::Test { kTfLiteOk); if (simple->min_ops_per_subset() > 0) { - // Build a new vector of ops from subsets with atleast the minimum + // Build a new vector of ops from subsets with at least the minimum // size. std::vector allowed_ops; for (int idx = 0; idx < num_partitions; ++idx) { @@ -1304,7 +1304,7 @@ TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagNotSet) { // Input: 0, Output:12. // All constants are 2, so the function is: (x + 2 + 2) * 2 + 2 = 2x + 10 // -// Delegate only supports ADD, so can have upto two delegated partitions. +// Delegate only supports ADD, so can have up to two delegated partitions. // TODO(b/156707497): Add more cases here once we have landed CPU kernels // supporting FP16. class TestFP16Delegation : public ::testing::TestWithParam { diff --git a/tensorflow/lite/delegates/gpu/api.h b/tensorflow/lite/delegates/gpu/api.h index 7892d0ce2f6..075e66bef50 100644 --- a/tensorflow/lite/delegates/gpu/api.h +++ b/tensorflow/lite/delegates/gpu/api.h @@ -364,7 +364,7 @@ struct InferenceOptions { }; // Returns a position number for the priority. If priority is missing, -// then it it would return 'max num priorities + 1'. +// then it would return 'max num priorities + 1'. int GetPosition(const InferenceOptions& options, InferencePriority p); // Return true if options are valid. diff --git a/tensorflow/lite/delegates/gpu/gl/node_shader.h b/tensorflow/lite/delegates/gpu/gl/node_shader.h index 0575182f361..9abe41fa07b 100644 --- a/tensorflow/lite/delegates/gpu/gl/node_shader.h +++ b/tensorflow/lite/delegates/gpu/gl/node_shader.h @@ -44,10 +44,10 @@ enum class IOStructure { ONLY_DEFINITIONS, // For inputs: - // Source code runs computations using 'vec4 value_N' declared by - // the compiler, where where N is an index of the input. Each value comes - // from inputs using coordinates set by GlobalInvocationID and a dispatch - // method, therefore, source code should not explicitly read values. + // Source code runs computations using 'vec4 value_N' declared by the + // compiler, where N is an index of the input. Each value comes from inputs + // using coordinates set by GlobalInvocationID and a dispatch method, + // therefore, source code should not explicitly read values. // // For outputs: // Source code runs computations and leaves results in 'vec4 value_N' diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 913e35cb9d9..c12e9c0da9c 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -1359,7 +1359,7 @@ class NNAPIOpBuilder { if (tensor->allocation_type == kTfLiteMmapRo) { if (IsQuantized(tensor_type) && need_int8_conversion && nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) { - // We need to to add a tensor and convert the weights into uint8. + // We need to add a tensor and convert the weights into uint8. // Currently this is only needed for fully_connected. The new_tensor is // needed for lifetime management for the converted weights. int new_tensor_index = -1; @@ -2488,7 +2488,7 @@ bool NNAPIDelegateKernel::Validate( context->tensors[node->inputs->data[1]].dims; Expect(TfLiteIntArrayEqual(condition_shape, input_shape), NNAPIValidationFailureType::kUnsupportedOperandValue, - "Condition and inputs tensors shuld have the same shape", + "Condition and inputs tensors should have the same shape", &val_ctx); } break; case kTfLiteBuiltinGather: { diff --git a/tensorflow/lite/delegates/utils/dummy_delegate/README.md b/tensorflow/lite/delegates/utils/dummy_delegate/README.md index d55ba421cba..6b394d12160 100644 --- a/tensorflow/lite/delegates/utils/dummy_delegate/README.md +++ b/tensorflow/lite/delegates/utils/dummy_delegate/README.md @@ -21,7 +21,7 @@ the ideas above. For more sophisticated examples, refer to [Flex delegate](https ## Testing & Tooling There are currently **two options** to plug in a newly created TFLite delegate -to reuse existing TFLite kernel tests and and tooling: +to reuse existing TFLite kernel tests and tooling: - Utilize the **[delegate registrar](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates)** mechanism diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index 14d7219f304..e8cef523c60 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -119,7 +119,7 @@ class OpOptionData { const std::unordered_map& op_to_option() { return op_to_option_; } - // Maps from option to to C struct i.e. 'AddOptions' -> 'TfLiteAddOptions' + // Maps from option to C struct i.e. 'AddOptions' -> 'TfLiteAddOptions' const std::unordered_map& option_to_struct() { return option_to_struct_; } diff --git a/tensorflow/lite/g3doc/guide/build_arm64.md b/tensorflow/lite/g3doc/guide/build_arm64.md index c07c81cd69b..9a7bf12c1b8 100644 --- a/tensorflow/lite/g3doc/guide/build_arm64.md +++ b/tensorflow/lite/g3doc/guide/build_arm64.md @@ -126,7 +126,7 @@ page for the detail. bazel build --config=elinux_aarch64 -c opt //tensorflow/lite:libtensorflowlite.so ``` -You can find a shared library library in: +You can find a shared library in: `bazel-bin/tensorflow/lite/libtensorflowlite.so`. Currently, there is no straightforward way to extract all header files needed, diff --git a/tensorflow/lite/g3doc/guide/build_rpi.md b/tensorflow/lite/g3doc/guide/build_rpi.md index f43a81dd268..408a0f11856 100644 --- a/tensorflow/lite/g3doc/guide/build_rpi.md +++ b/tensorflow/lite/g3doc/guide/build_rpi.md @@ -119,7 +119,7 @@ cd tensorflow_src && ./tensorflow/lite/tools/make/download_dependencies.sh You can use [ARM GCC toolchains](https://github.com/tensorflow/tensorflow/tree/master/third_party/toolchains/embedded/arm-linux) -with Bazel to build an armhf shared library which is compatibile with Raspberry +with Bazel to build an armhf shared library which is compatible with Raspberry Pi 2, 3 and 4. Note: The generated shared library requires glibc 2.28 or higher to run. @@ -165,7 +165,7 @@ page for the detail. bazel build --config=elinux_armhf -c opt //tensorflow/lite:libtensorflowlite.so ``` -You can find a shared library library in: +You can find a shared library in: `bazel-bin/tensorflow/lite/libtensorflowlite.so`. Currently, there is no straightforward way to extract all header files needed, diff --git a/tensorflow/lite/g3doc/guide/ops_select.md b/tensorflow/lite/g3doc/guide/ops_select.md index 3aa81528c1f..73466791078 100644 --- a/tensorflow/lite/g3doc/guide/ops_select.md +++ b/tensorflow/lite/g3doc/guide/ops_select.md @@ -222,7 +222,7 @@ pip package version since 2.3 for Linux and 2.4 for other environments. ### Performance When using a mixture of both builtin and select TensorFlow ops, all of the same -TensorFlow Lite optimizations and optimized builtin ops will be be available and +TensorFlow Lite optimizations and optimized builtin ops will be available and usable with the converted model. The following table describes the average time taken to run inference on diff --git a/tensorflow/lite/g3doc/inference_with_metadata/task_library/customized_task_api.md b/tensorflow/lite/g3doc/inference_with_metadata/task_library/customized_task_api.md index 04f0477552e..d7b8d315365 100644 --- a/tensorflow/lite/g3doc/inference_with_metadata/task_library/customized_task_api.md +++ b/tensorflow/lite/g3doc/inference_with_metadata/task_library/customized_task_api.md @@ -110,7 +110,7 @@ To build an API object,you must provide the following information by extending std::vector, // OutputType const std::string&, const std::string& // InputTypes > { - // Convert API input into into tensors + // Convert API input into tensors absl::Status BertQuestionAnswerer::Preprocess( const std::vector& input_tensors, // input tensors of the model const std::string& context, const std::string& query // InputType of the API @@ -230,7 +230,7 @@ following information by extending [`BaseTaskApi`](https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/java/src/java/org/tensorflow/lite/task/core/BaseTaskApi.java), which provides JNI handlings for all Java Task APIs. -* __Determine the API I/O__ - This usually mirriors the native interfaces. e.g +* __Determine the API I/O__ - This usually mirrors the native interfaces. e.g `BertQuestionAnswerer` takes `(String context, String question)` as input and outputs `List`. The implementation calls a private native function with similar signature, except it has an additional parameter `long diff --git a/tensorflow/lite/g3doc/performance/gpu.md b/tensorflow/lite/g3doc/performance/gpu.md index 077f88e1b12..e992518baf1 100644 --- a/tensorflow/lite/g3doc/performance/gpu.md +++ b/tensorflow/lite/g3doc/performance/gpu.md @@ -114,7 +114,7 @@ OR pod 'TensorFlowLiteSwift', '~> 0.0.1-nightly', :subspecs => ['Metal'] ``` -You can do similiarly for `TensorFlowLiteC` if you want to use the C API. +You can do similarly for `TensorFlowLiteC` if you want to use the C API. #### Step 3. Enable the GPU delegate @@ -154,9 +154,9 @@ Lastly make sure to select Release-only builds on 64-bit architecture. Under ### Android -Note: The TensorFlow Lite Interpreter must be created on the same thread as when -is is run. Otherwise, `TfLiteGpuDelegate Invoke: GpuDelegate must run on the -same thread where it was initialized.` may occur. +Note: The TensorFlow Lite Interpreter must be created on the same thread as +where it is run. Otherwise, `TfLiteGpuDelegate Invoke: GpuDelegate must run on +the same thread where it was initialized.` may occur. Look at the demo to see how to add the delegate. In your application, add the AAR as above, import `org.tensorflow.lite.gpu.GpuDelegate` module, and use diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb index ef650c6b05b..f0da8e4b91a 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb @@ -84,7 +84,7 @@ "source": [ "## Prerequisites\n", "\n", - "To run this example, we first need to install serveral required packages, including Model Maker package that in github [repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)." + "To run this example, we first need to install several required packages, including Model Maker package that in GitHub [repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)." ] }, { @@ -346,7 +346,7 @@ "id": "NNRNv_mloS89" }, "source": [ - "If you prefer not to upload your images to the cloud, you could try to run the library locally following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) in github." + "If you prefer not to upload your images to the cloud, you could try to run the library locally following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) in GitHub." ] }, { @@ -783,7 +783,7 @@ "id": "-4jQaxyT5_KV" }, "source": [ - "You can also evalute the tflite model with the `evaluate_tflite` method." + "You can also evaluate the tflite model with the `evaluate_tflite` method." ] }, { @@ -805,7 +805,7 @@ "source": [ "## Advanced Usage\n", "\n", - "The `create` function is the critical part of this library. It uses transfer learning with a pretrained model similiar to the [tutorial](https://www.tensorflow.org/tutorials/images/transfer_learning).\n", + "The `create` function is the critical part of this library. It uses transfer learning with a pretrained model similar to the [tutorial](https://www.tensorflow.org/tutorials/images/transfer_learning).\n", "\n", "The `create`function contains the following steps:\n", "\n", @@ -843,7 +843,7 @@ "id": "iyIo0d5TCzE2" }, "source": [ - "Model Maker supports multiple post-training quantization options. Let's take full integer quantization as an instance. First, define the quantization config to enforce enforce full integer quantization for all ops including the input and output. The input type and output type are `uint8` by default. You may also change them to other types like `int8` by setting `inference_input_type` and `inference_output_type` in config." + "Model Maker supports multiple post-training quantization options. Let's take full integer quantization as an instance. First, define the quantization config to enforce full integer quantization for all ops including the input and output. The input type and output type are `uint8` by default. You may also change them to other types like `int8` by setting `inference_input_type` and `inference_output_type` in config." ] }, { @@ -1018,7 +1018,7 @@ " `use_hub_library` is True. None by default.\n", "* `shuffle`: Boolean, whether the data should be shuffled. False by default.\n", "* `use_augmentation`: Boolean, use data augmentation for preprocessing. False by default.\n", - "* `use_hub_library`: Boolean, use `make_image_classifier_lib` from tensorflow hub to retrain the model. This training pipline could achieve better performance for complicated dataset with many categories. True by default. \n", + "* `use_hub_library`: Boolean, use `make_image_classifier_lib` from tensorflow hub to retrain the model. This training pipeline could achieve better performance for complicated dataset with many categories. True by default. \n", "* `warmup_steps`: Number of warmup steps for warmup schedule on learning rate. If None, the default warmup_steps is used which is the total training steps in two epochs. Only used when `use_hub_library` is False. None by default.\n", "* `model_dir`: Optional, the location of the model checkpoint files. Only used when `use_hub_library` is False. None by default.\n", "\n", diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb index 06f534522c7..328f9d0cb70 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb @@ -665,7 +665,7 @@ "id": "HZKYthlVrTos" }, "source": [ - "You can also evalute the tflite model with the `evaluate_tflite` method. This step is expected to take a long time." + "You can also evaluate the tflite model with the `evaluate_tflite` method. This step is expected to take a long time." ] }, { diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb index ba6d266361b..2fc40f8a1f0 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb @@ -747,7 +747,7 @@ "id": "HZKYthlVrTos" }, "source": [ - "You can evalute the tflite model with `evaluate_tflite` method to get its accuracy." + "You can evaluate the tflite model with `evaluate_tflite` method to get its accuracy." ] }, { diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index 9bceb939c02..c86872b18e3 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -91,9 +91,9 @@ tflite_flex_android_library( visibility = ["//visibility:public"], ) -# EXPERIMENTAL: Android target target for GPU acceleration. Note that this -# library contains *only* the GPU delegate and its Java wrapper; clients must -# also include the core `tensorflowlite` runtime. +# EXPERIMENTAL: Android target for GPU acceleration. Note that this library +# contains *only* the GPU delegate and its Java wrapper; clients must also +# include the core `tensorflowlite` runtime. android_library( name = "tensorflowlite_gpu", srcs = ["//tensorflow/lite/delegates/gpu/java/src/main/java/org/tensorflow/lite/gpu:gpu_delegate"], diff --git a/tensorflow/lite/kernels/transpose_conv.cc b/tensorflow/lite/kernels/transpose_conv.cc index 52ee0414dd6..7a2b1a8dceb 100644 --- a/tensorflow/lite/kernels/transpose_conv.cc +++ b/tensorflow/lite/kernels/transpose_conv.cc @@ -22,10 +22,10 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/cpu_backend_context.h" #include "tensorflow/lite/kernels/internal/compatibility.h" -// NOLINTNEXTLINE - This header file should't go to the top. +// NOLINTNEXTLINE - This header file shouldn't go to the top. #include "tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" -// NOLINTNEXTLINE - This header file should't go to the top. +// NOLINTNEXTLINE - This header file shouldn't go to the top. #include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h" #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" #include "tensorflow/lite/kernels/internal/tensor.h" @@ -204,7 +204,7 @@ TfLiteStatus ResizeAndTransposeWeights(TfLiteContext* context, TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, transposed_weights, transposed_weights_shape_array)); - // Transpose the weights from from OHWI order to HWOI order. + // Transpose the weights from OHWI order to HWOI order. TransposeParams transpose_params; transpose_params.perm_count = 4; transpose_params.perm[0] = 1; diff --git a/tensorflow/lite/kernels/variable_ops_test.cc b/tensorflow/lite/kernels/variable_ops_test.cc index 077a03df21d..1716f896805 100644 --- a/tensorflow/lite/kernels/variable_ops_test.cc +++ b/tensorflow/lite/kernels/variable_ops_test.cc @@ -44,7 +44,7 @@ class VariableOpsTest : public ::testing::Test { } void ConstructGraph() { - // Construct a graph like ths: + // Construct a graph like this: // Input: %0, %1, %2 // Output: %3 // variable_assign(%0, %2) diff --git a/tensorflow/lite/micro/examples/magic_wand/train/README.md b/tensorflow/lite/micro/examples/magic_wand/train/README.md index f85ca015a9f..0b562a322de 100644 --- a/tensorflow/lite/micro/examples/magic_wand/train/README.md +++ b/tensorflow/lite/micro/examples/magic_wand/train/README.md @@ -84,7 +84,7 @@ $ python train.py --model CNN --person true #### Model type -In the `--model` argument, you can can provide `CNN` or `LSTM`. The CNN +In the `--model` argument, you can provide `CNN` or `LSTM`. The CNN model has a smaller size and lower latency. ## Collecting new data diff --git a/tensorflow/lite/micro/examples/micro_speech/README.md b/tensorflow/lite/micro/examples/micro_speech/README.md index f896e40de2e..8490458e3a9 100644 --- a/tensorflow/lite/micro/examples/micro_speech/README.md +++ b/tensorflow/lite/micro/examples/micro_speech/README.md @@ -223,7 +223,7 @@ make -f tensorflow/lite/micro/tools/make/Makefile TARGET=esp generate_micro_spee ### Building the example -Go the the example project directory +Go to the example project directory ``` cd tensorflow/lite/micro/tools/make/gen/esp_xtensa-esp32/prj/micro_speech/esp-idf ``` @@ -577,7 +577,7 @@ using [ARM Mbed](https://github.com/ARMmbed/mbed-cli). The following instructions will help you build and deploy this example to [HIMAX WE1 EVB](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_board_brief) -board. To undstand more about using this board, please check +board. To understand more about using this board, please check [HIMAX WE1 EVB user guide](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide). ### Initial Setup diff --git a/tensorflow/lite/micro/examples/micro_speech/esp/ringbuf.c b/tensorflow/lite/micro/examples/micro_speech/esp/ringbuf.c index b297069e80c..6bf1585ea5a 100644 --- a/tensorflow/lite/micro/examples/micro_speech/esp/ringbuf.c +++ b/tensorflow/lite/micro/examples/micro_speech/esp/ringbuf.c @@ -291,7 +291,7 @@ void rb_abort(ringbuf_t *rb) { } /** - * Reset the ringbuffer and keep keep rb_write aborted. + * Reset the ringbuffer and keep rb_write aborted. * Note that we are taking lock before even toggling `abort_write` variable. * This serves a special purpose to not allow this abort to be mixed with * rb_write. diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_features/no_feature_data_slice.h b/tensorflow/lite/micro/examples/micro_speech/micro_features/no_feature_data_slice.h index 7c27379f6de..01e6605b844 100644 --- a/tensorflow/lite/micro/examples/micro_speech/micro_features/no_feature_data_slice.h +++ b/tensorflow/lite/micro/examples/micro_speech/micro_features/no_feature_data_slice.h @@ -16,7 +16,7 @@ limitations under the License. // This data was extracted from the larger feature data held in // no_features_data.cc and consists of the 29th spectrogram slice of 43 values. // This is the expected result of running the sample data in -// no_30ms_sample_data.cc through through the preprocessing pipeline. +// no_30ms_sample_data.cc through the preprocessing pipeline. #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_FEATURE_DATA_SLICE_H_ #define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_FEATURE_DATA_SLICE_H_ diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h b/tensorflow/lite/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h index 2427ee70063..18faadcf971 100644 --- a/tensorflow/lite/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h +++ b/tensorflow/lite/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h @@ -16,7 +16,7 @@ limitations under the License. // This data was extracted from the larger feature data held in // no_micro_features_data.cc and consists of the 26th spectrogram slice of 40 // values. This is the expected result of running the sample data in -// yes_30ms_sample_data.cc through through the preprocessing pipeline. +// yes_30ms_sample_data.cc through the preprocessing pipeline. #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_FEATURE_DATA_SLICE_H_ #define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_FEATURE_DATA_SLICE_H_ diff --git a/tensorflow/lite/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h b/tensorflow/lite/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h index 463a4951cf1..f20362349f2 100644 --- a/tensorflow/lite/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h +++ b/tensorflow/lite/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h @@ -16,7 +16,7 @@ limitations under the License. // This data was extracted from the larger feature data held in // no_features_data.cc and consists of the 29th spectrogram slice of 43 values. // This is the expected result of running the sample data in -// no_30ms_sample_data.cc through through the preprocessing pipeline. +// no_30ms_sample_data.cc through the preprocessing pipeline. #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_POWER_SPECTRUM_DATA_H_ #define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_POWER_SPECTRUM_DATA_H_ diff --git a/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc b/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc index 0de36b48e41..204bfc857d0 100644 --- a/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc +++ b/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc @@ -136,7 +136,7 @@ TfLiteStatus GenerateSimpleFeatures(tflite::ErrorReporter* error_reporter, // Quantize the result into eight bits, effectively multiplying by two. // The 127.5 constant here has to match the features_max value defined in // tensorflow/examples/speech_commands/input_data.py, and this also assumes - // that features_min is zero. It it wasn't, we'd have to subtract it first. + // that features_min is zero. If it wasn't, we'd have to subtract it first. int quantized_average = roundf(average * (255.0f / 127.5f)); if (quantized_average < 0) { quantized_average = 0; diff --git a/tensorflow/lite/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h b/tensorflow/lite/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h index 7e0c146ace0..5264e6262fc 100644 --- a/tensorflow/lite/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h +++ b/tensorflow/lite/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h @@ -16,7 +16,7 @@ limitations under the License. // This data was extracted from the larger feature data held in // no_features_data.cc and consists of the 26th spectrogram slice of 43 values. // This is the expected result of running the sample data in -// yes_30ms_sample_data.cc through through the preprocessing pipeline. +// yes_30ms_sample_data.cc through the preprocessing pipeline. #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_POWER_SPECTRUM_DATA_H_ #define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_POWER_SPECTRUM_DATA_H_ diff --git a/tensorflow/lite/micro/examples/person_detection/README.md b/tensorflow/lite/micro/examples/person_detection/README.md index 8f437524ef0..7312582f9b9 100644 --- a/tensorflow/lite/micro/examples/person_detection/README.md +++ b/tensorflow/lite/micro/examples/person_detection/README.md @@ -311,7 +311,7 @@ make -f tensorflow/lite/micro/tools/make/Makefile TARGET=esp generate_person_det ### Building the example -Go the the example project directory +Go to the example project directory ``` cd tensorflow/lite/micro/tools/make/gen/esp_xtensa-esp32/prj/person_detection/esp-idf ``` diff --git a/tensorflow/lite/micro/kernels/kernel_runner.h b/tensorflow/lite/micro/kernels/kernel_runner.h index 45d107e7a37..064aabe97cf 100644 --- a/tensorflow/lite/micro/kernels/kernel_runner.h +++ b/tensorflow/lite/micro/kernels/kernel_runner.h @@ -23,12 +23,12 @@ limitations under the License. namespace tflite { namespace micro { -// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle -// (init, prepare, invoke). All internal allocations are handled by this class. -// Simply pass in the registration, list of required tensors, inputs array, -// outputs array, and any pre-builtin data. Calling Invoke() will automatically -// walk the kernl and outputs will be ready on the the TfLiteTensor output -// provided during construction. +// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) +// lifecycle (init, prepare, invoke). All internal allocations are handled by +// this class. Simply pass in the registration, list of required tensors, inputs +// array, outputs array, and any pre-builtin data. Calling Invoke() will +// automatically walk the kernel and outputs will be ready on the TfLiteTensor +// output provided during construction. class KernelRunner { public: KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc index 79a44e2c670..75eb2838034 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc @@ -32,7 +32,7 @@ struct OpData { }; // Number of unique int8_t and int16_t values. Used in exponent lookup table -// conputation. +// computation. constexpr int kInt8Range = std::numeric_limits::max() - std::numeric_limits::min() + 1; constexpr int kInt16Range = std::numeric_limits::max() - @@ -52,7 +52,7 @@ constexpr int kMaxExponentValue = (1 << kExpFractionalBits); TfLiteStatus Softmax(OpData op_data, const RuntimeShape& input_shape, const int8_t* input_data, const RuntimeShape& output_shape, int16_t* output_data) { - // The last dimension is depth. Outer size is the the total input size + // The last dimension is depth. Outer size is the total input size // divided by depth. const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = @@ -75,7 +75,7 @@ TfLiteStatus Softmax(OpData op_data, const RuntimeShape& input_shape, input_diff == 0 ? kMaxExponentValue : op_data.exp_lut[input_diff]; } - // Ensure we cannnot overflow the full_range_output value. We need to + // Ensure we cannot overflow the full_range_output value. We need to // guarantee that kInt16Range * max(input_data) / sum_of_exps < kInt16Range. TFLITE_DCHECK(sum_of_exps >= kMaxExponentValue); diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index b28fd19d15e..1b28152bb17 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -6,9 +6,9 @@ endif TENSORFLOW_ROOT := MAKEFILE_DIR := tensorflow/lite/micro/tools/make -# Override this on make command line to to parse thirdy party downloads during project generation -# make -f tensorflow/lite/micro/tools/make/Makefile PARSE_THIRD_PARTY=true TARGET=apollo3evb generate_hello_world_make_project -PARSE_THIRD_PARTY := +# Override this on make command line to parse third party downloads during project generation +# make -f tensorflow/lite/micro/tools/make/Makefile PARSE_THIRD_PARTY=true TARGET=apollo3evb generate_hello_world_make_project +PARSE_THIRD_PARTY := # Pull in some convenience functions. diff --git a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl index 9d2801ed6b7..766450253cc 100644 --- a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl +++ b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl @@ -1,6 +1,6 @@ # TensorFlow Lite Micro ARC Make Project for EM SDP Board. -This folder has been autogenerated by TensorFlow, and contains source, header, and project files needed to build a single TensorFlow Lite Micro target using make tool and and a Synopsys DesignWare ARC processor compatible toolchain, specifically the ARC MetaWare Development Toolkit (MWDT). +This folder has been autogenerated by TensorFlow, and contains source, header, and project files needed to build a single TensorFlow Lite Micro target using make tool and a Synopsys DesignWare ARC processor compatible toolchain, specifically the ARC MetaWare Development Toolkit (MWDT). This project has been generated for the ARC EM Software Development Platform (EM SDP). The built application can be run only on this platform. diff --git a/tensorflow/lite/profiling/profile_summary_formatter.h b/tensorflow/lite/profiling/profile_summary_formatter.h index 8f6f9f33e46..d19dfc8fdfa 100644 --- a/tensorflow/lite/profiling/profile_summary_formatter.h +++ b/tensorflow/lite/profiling/profile_summary_formatter.h @@ -38,7 +38,7 @@ class ProfileSummaryFormatter { const std::map>& stats_calculator_map, const tensorflow::StatsCalculator& delegate_stats_calculator) const = 0; - // Returns a string detailing the short summary of the the accumulated runtime + // Returns a string detailing the short summary of the accumulated runtime // stats in StatsCalculator of ProfileSummarizer. virtual std::string GetShortSummary( const std::map>& diff --git a/tensorflow/lite/python/optimize/calibrator.py b/tensorflow/lite/python/optimize/calibrator.py index e1758e87eeb..0527104329c 100644 --- a/tensorflow/lite/python/optimize/calibrator.py +++ b/tensorflow/lite/python/optimize/calibrator.py @@ -32,7 +32,7 @@ _calibration_wrapper = LazyLoader( def add_intermediate_tensors(model_content): - """Adds intermedaite tensors to fused op if needed.""" + """Adds intermediate tensors to fused op if needed.""" return _calibration_wrapper.AddIntermediateTensors(model_content) diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index e04e1a12cd4..389a08528f1 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -80,7 +80,7 @@ struct TfLiteRegistration; // An external context is a collection of information unrelated to the TF Lite // framework, but useful to a subset of the ops. TF Lite knows very little -// about about the actual contexts, but it keeps a list of them, and is able to +// about the actual contexts, but it keeps a list of them, and is able to // refresh them if configurations like the number of recommended threads // change. typedef struct TfLiteExternalContext { diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index 6ec320c4144..75af0df34a4 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -239,7 +239,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.quantizable = false; break; } - // TODO(jianlijianli): extend LSTM op spec to inlucde input, bias etc. + // TODO(jianlijianli): extend LSTM op spec to include input, bias etc. // LSTM needs 5 intermediate tensors. This agrees with the fully quantized // kernels in lstm_eval.cc if (op_variant.use_layer_norm && op_variant.use_projection && @@ -522,7 +522,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, tensor_property_9.symmetric = true; // Without layer norm, we choose to quantize bias with the scale of // input and its corresponding weight. The other choice will - // be to ues the scale of recurrent and its corresponding weight but we + // be to use the scale of recurrent and its corresponding weight but we // choose to use the smaller scale, which means higher resolution. TensorProperty tensor_property_12; tensor_property_12.use_derived_scale = true; @@ -574,7 +574,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.intermediates = { // Without layer normalization, intermediate tensors 0, 1, 2, 3 are - // not used and and their quantization parameters are ignored. + // not used and their quantization parameters are ignored. {0, {}}, {1, {}}, {2, {}}, @@ -589,7 +589,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, !op_variant.use_peephole) { // Without layer norm, we choose to quantize bias with the scale of // input and its corresponding weight. The other choice will - // be to ues the scale of recurrent and its corresponding weight but we + // be to use the scale of recurrent and its corresponding weight but we // choose to use the smaller scale, which means higher resolution. TensorProperty tensor_property_12; tensor_property_12.use_derived_scale = true; @@ -656,7 +656,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, tensor_property_9.symmetric = true; // Without layer norm, we choose to quantize bias with the scale of // input and its corresponding weight. The other choice will - // be to ues the scale of recurrent and its corresponding weight but we + // be to use the scale of recurrent and its corresponding weight but we // choose to use the smaller scale, which means higher resolution. TensorProperty tensor_property_12; tensor_property_12.use_derived_scale = true; @@ -722,7 +722,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, !op_variant.use_peephole) { // Without layer norm, we choose to quantize bias with the scale of // input and its corresponding weight. The other choice will - // be to ues the scale of recurrent and its corresponding weight but we + // be to use the scale of recurrent and its corresponding weight but we // choose to use the smaller scale, which means higher resolution. TensorProperty tensor_property_12; tensor_property_12.use_derived_scale = true; @@ -949,7 +949,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, case BuiltinOperator_SVDF: { TensorProperty tensor_property_time; // Only 10bits are needed because 6bits are reserved for the reduce - // operation after elemement-wise multiplication between state and time + // operation after element-wise multiplication between state and time // weights. tensor_property_time.number_of_bits = 10; TensorProperty tensor_property_bias; diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc index 8627c492c70..5668bc06f8c 100644 --- a/tensorflow/lite/tools/versioning/op_version.cc +++ b/tensorflow/lite/tools/versioning/op_version.cc @@ -168,7 +168,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { return 3; } // For float and uint8 fixed point kernels, if the weight is - // Shuffled4x16Int8, is is version 2. + // Shuffled4x16Int8, it is version 2. if (op_sig.options.fully_connected.weights_format == FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8) { return 2; From 620a090a631bade18f685261b5f24cb348a4d8c5 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Thu, 29 Oct 2020 17:31:40 +0300 Subject: [PATCH 014/220] a bit more --- .../delegates/gpu/cl/kernels/conv_constants.cc | 4 ++-- .../lite/delegates/gpu/cl/kernels/conv_constants.h | 2 +- tensorflow/lite/micro/kernels/arc_mli/conv.cc | 12 ++++++------ .../lite/micro/kernels/arc_mli/depthwise_conv.cc | 14 +++++++------- .../lite/micro/kernels/arc_mli/fully_connected.cc | 6 +++--- tensorflow/lite/micro/kernels/arc_mli/pooling.cc | 2 +- .../lite/micro/kernels/arc_mli/scratch_buf_mgr.cc | 4 ++-- tensorflow/lite/micro/kernels/conv.cc | 2 +- tensorflow/lite/micro/kernels/depthwise_conv.cc | 2 +- .../lite/micro/kernels/vexriscv/depthwise_conv.cc | 2 +- tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc | 2 +- .../micro/kernels/xtensa_hifi/depthwise_conv.cc | 2 +- .../lite/micro/kernels/xtensa_hifimini/conv.cc | 2 +- .../kernels/xtensa_hifimini/depthwise_conv.cc | 2 +- 14 files changed, 29 insertions(+), 29 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index c3663634177..83ef72f35a8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -170,7 +170,7 @@ std::string GenerateConvolutionConstantCode(const OperationDef& op_def, } for (int d = 0; d < out_z; ++d) { c += " " + s_conv + "(r[" + std::to_string(d) + - "], src, args.weigths.GetPtr(),"; + "], src, args.weights.GetPtr(),"; c += " " + std::to_string(filters_counter) + ");\n"; filters_counter += ch_count; } @@ -201,7 +201,7 @@ bool IsConvConstantsSupported(const DeviceInfo& device_info, if (device_info.IsAMD() && definition.precision != CalculationsPrecision::F32 && definition.src_tensors[0].storage_type != TensorStorageType::BUFFER) { - // BUG, some AMD gpus crashe without it + // BUG, some AMD GPUs crash without it return false; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h index c341ecb5753..9f1b9c2343d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h @@ -104,7 +104,7 @@ void UploadWeightsForConvConstants(const tflite::gpu::Tensor& weights, absl::MakeSpan(ptr, float_count / 4)); } - op->args_.AddObject("weigths", + op->args_.AddObject("weights", absl::make_unique(std::move(desc))); } diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv.cc b/tensorflow/lite/micro/kernels/arc_mli/conv.cc index 4522421fa56..0069d41b67e 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv.cc @@ -85,7 +85,7 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLiteConvParams* params) { const auto* affine_quantization = reinterpret_cast(filter->quantization.params); - // MLI optimized version only supports int8_t dataype, dilation factor of 1 + // MLI optimized version only supports int8_t datatype, dilation factor of 1 // and per-axis quantization of weights (no broadcasting/per-tensor) bool ret_val = (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && @@ -159,7 +159,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int output_width = output->dims->data[2]; int output_height = output->dims->data[1]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. const int num_channels = filter->dims->data[kConvQuantizedDimension]; data->per_channel_output_multiplier = reinterpret_cast(context->AllocatePersistentBuffer( @@ -241,7 +241,7 @@ TfLiteStatus EvalMliQuantizedPerChannel( const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { // Run Conv MLI kernel - // MLI optimized version only supports int8_t dataype and dilation factor of 1 + // MLI optimized version only supports int8_t datatype and dilation factor of 1 if ((input->type == kTfLiteInt8) && (params->dilation_width_factor == 1) && (params->dilation_height_factor == 1)) { mli_tensor mli_in = {}; @@ -299,7 +299,7 @@ TfLiteStatus EvalMliQuantizedPerChannel( const int overlap = kernel_height - cfg.stride_height; // for weight slicing (on output channels) - // NHWC layout for weigths, output channel dimension is the first dimension. + // NHWC layout for weights, output channel dimension is the first dimension. const int weight_out_ch_dimension = 0; int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); @@ -362,9 +362,9 @@ TfLiteStatus EvalMliQuantizedPerChannel( in_slice_height, cfg.padding_top, cfg.padding_bottom, overlap); - /* output tensor is alreade sliced in the output channel dimension. + /* output tensor is already sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of output channels of this - itteration of the weight slice loop. This tensor needs to be further + iteration of the weight slice loop. This tensor needs to be further sliced over the batch and height dimension. */ ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension, out_slice_height); diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index 8fe5d307cdd..cdd5a945517 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -72,7 +72,7 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const int in_ch = SizeOfDimension(input, 3); const int filters_num = SizeOfDimension(filter, 3); - // MLI optimized version only supports int8_t dataype, dilation factor of 1 + // MLI optimized version only supports int8_t datatype, dilation factor of 1 // and per-axis quantization of weights (no broadcasting/per-tensor) (in_ch == // filters_num) || (in_ch == 1)) is a forbidding of channel multiplier logic // for multichannel input. @@ -150,7 +150,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Per channel quantization is only needed for int8 inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. data->per_channel_output_multiplier = reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); @@ -280,7 +280,7 @@ TfLiteStatus EvalMliQuantizedPerChannel( const int overlap = kernelHeight - cfg.stride_height; // for weight slicing (on output channels) - // HWCN layout for weigths, output channel dimension is the first dimension. + // HWCN layout for weights, output channel dimension is the first dimension. const int weight_out_ch_dimension = 3; // bias has only 1 dimension const int bias_out_ch_dimension = 0; @@ -345,9 +345,9 @@ TfLiteStatus EvalMliQuantizedPerChannel( mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); - /* input tensor is alreade sliced in the channel dimension. + /* input tensor is already sliced in the channel dimension. out_ch_slice.Sub() is the tensor for the amount of channels of this - itteration of the weight slice loop. This tensor needs to be further + iteration of the weight slice loop. This tensor needs to be further sliced over the batch and height dimension. in_ch_slice.Sub() tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. because the mli kernel will process one HWC tensor at a time, the 4 dimensional @@ -360,9 +360,9 @@ TfLiteStatus EvalMliQuantizedPerChannel( inSliceHeight, padding_top, padding_bottom, overlap); - /* output tensor is alreade sliced in the output channel dimension. + /* output tensor is already sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of output channels of this - itteration of the weight slice loop. This tensor needs to be further + iteration of the weight slice loop. This tensor needs to be further sliced over the batch and height dimension. */ ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension, outSliceHeight); diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc index ea5c6c6eaf3..d3638d14c40 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc @@ -52,7 +52,7 @@ constexpr int kOutputTensor = 0; bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, const TfLiteFullyConnectedParams* params) { - // MLI optimized version only supports int8_t dataype and no fused Relu and + // MLI optimized version only supports int8_t datatype and no fused Relu and // symmetric per-tensor quantization of weights (not per-axis) bool ret_val = (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && @@ -190,9 +190,9 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node, ops::micro::TensorSlicer in_slice(&mli_in, input_size_dimension, mli_in.shape[input_size_dimension]); - /* output tensor is alreade sliced in the output size dimension. + /* output tensor is already sliced in the output size dimension. out_ch_slice.Sub() is the tensor for the amount of output size of this - itteration of the weight slice loop. This tensor needs to be further + iteration of the weight slice loop. This tensor needs to be further sliced over the batch */ ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), out_tensor_dimension, slice_size); diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc index 2194d3c71f2..e4dfa8b3146 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -43,7 +43,7 @@ enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 }; bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLitePoolParams* params) { - // MLI optimized version only supports int8_t dataype and no fused Relu + // MLI optimized version only supports int8_t datatype and no fused Relu return (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone); } diff --git a/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc index aaf04154602..a0475524e52 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc @@ -163,7 +163,7 @@ TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors( init_arc_scratch_buffers(); /* strategy for FC kernels: first allocate input, because this cannot be sliced. (in case of batch - processing, only a single input needs to be allocated) then weigths & bias + processing, only a single input needs to be allocated) then weights & bias because if fully loaded, they can be reused over batches. then output. The number of output channels (for weights slicing) depends on size of output and size of weights&bias */ @@ -275,7 +275,7 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( max_out_lines_for_input = (max_lines_in - kernel_height + 1) / stride_height; } - // Ten compute how many ouput lines fit into the output tensor. + // Then compute how many output lines fit into the output tensor. max_lines_out = std::min(out_height, static_cast(out->capacity) / line_size_out); // the smallest of the two determines the slice height for the output, and diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc index 55efa486234..dc821df5418 100644 --- a/tensorflow/lite/micro/kernels/conv.cc +++ b/tensorflow/lite/micro/kernels/conv.cc @@ -141,7 +141,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int output_width = output->dims->data[2]; int output_height = output->dims->data[1]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. const int num_channels = filter->dims->data[kConvQuantizedDimension]; data->per_channel_output_multiplier = static_cast(context->AllocatePersistentBuffer( diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc index 85b51233e90..08e969b17fa 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -127,7 +127,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. data->per_channel_output_multiplier = reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); diff --git a/tensorflow/lite/micro/kernels/vexriscv/depthwise_conv.cc b/tensorflow/lite/micro/kernels/vexriscv/depthwise_conv.cc index 028c1111281..ef41504cd95 100644 --- a/tensorflow/lite/micro/kernels/vexriscv/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/vexriscv/depthwise_conv.cc @@ -362,7 +362,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. data->per_channel_output_multiplier = reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc index 2de3345bcbf..9d08709e1ce 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc @@ -157,7 +157,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int output_width = output->dims->data[2]; int output_height = output->dims->data[1]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. const int num_channels = filter->dims->data[kConvQuantizedDimension]; TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t), diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc index 2dd11ed060f..8b1a8cfb9f3 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc @@ -145,7 +145,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t), reinterpret_cast(&data->per_channel_output_multiplier))); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc index 2c3577d77be..de9820b82d9 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc @@ -325,7 +325,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kConvQuantizedDimension]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. op_data->per_channel_output_multiplier = reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc index 4a37becbf4d..12410a94456 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc @@ -368,7 +368,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - // Dynimically allocate per-channel quantization parameters. + // Dynamically allocate per-channel quantization parameters. op_data->per_channel_output_multiplier = reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); From 164a7f3d9b21b52829c63b894b6b762fe782aab7 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Thu, 29 Oct 2020 22:09:57 -0700 Subject: [PATCH 015/220] Remove some duplicates for 'onednn' on 'spec.yaml' per code review --- tensorflow/tools/dockerfiles/spec.yml | 466 ++++++++++++-------------- 1 file changed, 215 insertions(+), 251 deletions(-) diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 005d6a78432..f6ed6af4ebc 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -37,70 +37,45 @@ releases: versioned: tag_specs: - "{_TAG_PREFIX}{ubuntu}{jupyter}" - ubuntu-onednn: + onednn: tag_specs: - - "{_TAG_PREFIX}{ubuntu-onednn}" - - "{_TAG_PREFIX}{ubuntu-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-devel-onednn}" - - "{_TAG_PREFIX}{ubuntu-devel-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}" - - "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}" - - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-onednn-mpich-horovod}" - - "{_TAG_PREFIX}{ubuntu-onednn-mpich-horovod}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpich-horovod}" - - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpich-horovod}{onednn-jupyter}" - centos-onednn: - tag_specs: - - "{_TAG_PREFIX}{centos-onednn}" - - "{_TAG_PREFIX}{centos-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{centos-devel-onednn}" - - "{_TAG_PREFIX}{centos-devel-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{centos-onednn-mpi-horovod}" - - "{_TAG_PREFIX}{centos-onednn-mpi-horovod}{onednn-jupyter}" - - "{_TAG_PREFIX}{centos-devel-onednn-mpi-horovod}" - - "{_TAG_PREFIX}{centos-devel-onednn-mpi-horovod}{onednn-jupyter}" - - "{_TAG_PREFIX}{centos-onednn-mpich-horovod}" - - "{_TAG_PREFIX}{centos-onednn-mpich-horovod}{onednn-jupyter}" - - "{_TAG_PREFIX}{centos-devel-onednn-mpich-horovod}" - - "{_TAG_PREFIX}{centos-devel-onednn-mpich-horovod}{onednn-jupyter}" - + - "{_TAG_PREFIX}{devel-onednn}" + - "{_TAG_PREFIX}{devel-onednn-mpich-horovod}" + - "{_TAG_PREFIX}{devel-onednn-mpich-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{devel-onednn-mpi-horovod}" + - "{_TAG_PREFIX}{devel-onednn-mpi-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{devel-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{onednn}" + - "{_TAG_PREFIX}{onednn-mpich-horovod}" + - "{_TAG_PREFIX}{onednn-mpich-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{onednn-mpi-horovod}" + - "{_TAG_PREFIX}{onednn-mpi-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{onednn}{onednn-jupyter}" # Dockerfiles stored in the TF repo; not pushed anywhere dockerfiles: is_dockerfiles: true upload_images: false tag_specs: - - "{ubuntu}{jupyter}" - - "{ubuntu-devel}{jupyter}" - - "{ubuntu-ppc64le}{jupyter}" - - "{ubuntu-devel-ppc64le}{jupyter}" + - "{devel-onednn}" + - "{devel-onednn-mpich-horovod}" + - "{devel-onednn-mpich-horovod}{onednn-jupyter}" + - "{devel-onednn-mpi-horovod}" + - "{devel-onednn-mpi-horovod}{onednn-jupyter}" + - "{devel-onednn}{onednn-jupyter}" + - "{devel-onednn}{onednn-jupyter}" + - "{onednn}" + - "{onednn-mpich-horovod}" + - "{onednn-mpich-horovod}{onednn-jupyter}" + - "{onednn-mpi-horovod}" + - "{onednn-mpi-horovod}{onednn-jupyter}" + - "{onednn}{onednn-jupyter}" - "{ubuntu-devel-arm64v8}{jupyter}" - - "{ubuntu-onednn}" - - "{ubuntu-onednn}{onednn-jupyter}" - - "{ubuntu-devel-onednn}" - - "{ubuntu-devel-onednn}{onednn-jupyter}" - - "{ubuntu-onednn-mpi-horovod}" - - "{ubuntu-devel-onednn-mpi-horovod}" - - "{ubuntu-onednn-mpi-horovod}{onednn-jupyter}" - - "{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}" - - "{ubuntu-onednn-mpich-horovod}" - - "{ubuntu-devel-onednn-mpich-horovod}" - - "{ubuntu-onednn-mpich-horovod}{onednn-jupyter}" - - "{ubuntu-devel-onednn-mpich-horovod}{onednn-jupyter}" - - "{centos-onednn}" - - "{centos-onednn}{onednn-jupyter}" - - "{centos-devel-onednn}" - - "{centos-devel-onednn}{onednn-jupyter}" - - "{centos-onednn-mpi-horovod}" - - "{centos-devel-onednn-mpi-horovod}" - - "{centos-onednn-mpi-horovod}{onednn-jupyter}" - - "{centos-devel-onednn-mpi-horovod}{onednn-jupyter}" - - "{centos-onednn-mpich-horovod}" - - "{centos-devel-onednn-mpich-horovod}" - - "{centos-onednn-mpich-horovod}{onednn-jupyter}" - - "{centos-devel-onednn-mpich-horovod}{onednn-jupyter}" + - "{ubuntu-devel}{jupyter}" + - "{ubuntu-devel-ppc64le}{jupyter}" + - "{ubuntu}{jupyter}" + - "{ubuntu-ppc64le}{jupyter}" slice_sets: @@ -182,7 +157,132 @@ slice_sets: - UBUNTU_VERSION=18.04 - CHECKOUT_TF_SRC=1 - ubuntu-onednn-mpi-horovod: + onednn: + - add_to_name: "-16.04" + dockerfile_exclusive_name: "ubuntu-16.04" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=16.04 + - add_to_name: "-18.04" + dockerfile_exclusive_name: "ubuntu-18.04" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=18.04 + - add_to_name: "-20.04" + dockerfile_exclusive_name: "ubuntu-20.04" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python3 + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - add_to_name: "-8" + dockerfile_exclusive_name: "centos-8" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/cpu + - onednn/centos/python + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - CENTOS_VERSION=8 + - PYTHON=python3 + + devel-onednn: + - add_to_name: "-16.04-devel" + dockerfile_exclusive_name: "ubuntu-16.04-devel" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=16.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - add_to_name: "-18.04-devel" + dockerfile_exclusive_name: "ubuntu-18.04-devel" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=18.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - add_to_name: "-20.04-devel" + dockerfile_exclusive_name: "ubuntu-20.04-devel" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python3 + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - add_to_name: "-8-devel" + dockerfile_exclusive_name: "centos-8-devel" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/devel + - onednn/centos/python + - onednn/centos/bazel + - shell + tests: + - "" + args: + - CENTOS_VERSION=8 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - PYTHON=python3 + + + onednn-mpi-horovod: - add_to_name: "-16.04-mpi-horovod" dockerfile_exclusive_name: "ubuntu-16.04-mpi-horovod" dockerfile_subdirectory: "onednn" @@ -235,8 +335,26 @@ slice_sets: - PYTHON=python3.7 - DEBIAN_FRONTEND="noninteractive" - TF_PACKAGE=intel-tensorflow + - add_to_name: "-8-mpi-horovod" + dockerfile_exclusive_name: "centos-8-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/cpu + - onednn/centos/python + - tensorflow + - onednn/centos/mpi + - onednn/centos/horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - CENTOS_VERSION=8 + - PYTHON=python3 + - HOROVOD_VERSION=0.19.5 + - TF_PACKAGE=intel-tensorflow - ubuntu-devel-onednn-mpi-horovod: + devel-onednn-mpi-horovod: - add_to_name: "-16.04-devel-mpi-horovod" dockerfile_exclusive_name: "ubuntu-16.04-devel-mpi-horovod" dockerfile_subdirectory: "onednn" @@ -292,8 +410,27 @@ slice_sets: - CHECKOUT_TF_SRC=1 - CHECKOUT_HOROVOD_SRC=1 - HOROVOD_BRANCH=master + - add_to_name: "-8-devel-mpi-horovod" + dockerfile_exclusive_name: "centos-8-devel-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/devel + - onednn/centos/python + - onednn/centos/bazel + - onednn/centos/mpi + - onednn/centos/devel-horovod + - shell + tests: + - "" + args: + - CENTOS_VERSION=8 + - PYTHON=python3 + - CHECKOUT_TF_SRC=1 + - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master - ubuntu-onednn-mpich-horovod: + onednn-mpich-horovod: - add_to_name: "-16.04-mpich-horovod" dockerfile_exclusive_name: "ubuntu-16.04-mpich-horovod" dockerfile_subdirectory: "onednn" @@ -346,8 +483,26 @@ slice_sets: - PYTHON=python3.7 - DEBIAN_FRONTEND="noninteractive" - TF_PACKAGE=intel-tensorflow + - add_to_name: "-8-mpich-horovod" + dockerfile_exclusive_name: "centos-8-mpich-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/centos/version + - onednn/centos/cpu + - onednn/centos/python + - tensorflow + - onednn/centos/mpich + - onednn/centos/horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - CENTOS_VERSION=8 + - PYTHON=python3 + - HOROVOD_VERSION=0.19.5 + - TF_PACKAGE=intel-tensorflow - ubuntu-devel-onednn-mpich-horovod: + devel-onednn-mpich-horovod: - add_to_name: "-16.04-devel-mpich-horovod" dockerfile_exclusive_name: "ubuntu-16.04-devel-mpich-horovod" dockerfile_subdirectory: "onednn" @@ -403,197 +558,6 @@ slice_sets: - CHECKOUT_TF_SRC=1 - CHECKOUT_HOROVOD_SRC=1 - HOROVOD_BRANCH=master - - ubuntu-onednn: - - add_to_name: "-16.04" - dockerfile_exclusive_name: "ubuntu-16.04" - dockerfile_subdirectory: "onednn" - partials: - - onednn/ubuntu/version - - onednn/ubuntu/cpu - - onednn/ubuntu/python - - tensorflow - - shell - tests: - - import-onednn.sh - args: - - TF_PACKAGE=intel-tensorflow - - UBUNTU_VERSION=16.04 - - add_to_name: "-18.04" - dockerfile_exclusive_name: "ubuntu-18.04" - dockerfile_subdirectory: "onednn" - partials: - - onednn/ubuntu/version - - onednn/ubuntu/cpu - - onednn/ubuntu/python - - tensorflow - - shell - tests: - - import-onednn.sh - args: - - TF_PACKAGE=intel-tensorflow - - UBUNTU_VERSION=18.04 - - add_to_name: "-20.04" - dockerfile_exclusive_name: "ubuntu-20.04" - dockerfile_subdirectory: "onednn" - partials: - - onednn/ubuntu/version - - onednn/ubuntu/cpu - - onednn/ubuntu/python3 - - tensorflow - - shell - tests: - - import-onednn.sh - args: - - TF_PACKAGE=intel-tensorflow - - UBUNTU_VERSION=20.04 - - PYTHON=python3.7 - - ubuntu-devel-onednn: - - add_to_name: "-16.04-devel" - dockerfile_exclusive_name: "ubuntu-16.04-devel" - dockerfile_subdirectory: "onednn" - partials: - - onednn/ubuntu/version - - onednn/ubuntu/devel - - onednn/ubuntu/python - - onednn/ubuntu/bazel - - shell - tests: - - "" - args: - - UBUNTU_VERSION=16.04 - - CHECKOUT_TF_SRC=1 - - TF_BRANCH=master - - add_to_name: "-18.04-devel" - dockerfile_exclusive_name: "ubuntu-18.04-devel" - dockerfile_subdirectory: "onednn" - partials: - - onednn/ubuntu/version - - onednn/ubuntu/devel - - onednn/ubuntu/python - - onednn/ubuntu/bazel - - shell - tests: - - "" - args: - - UBUNTU_VERSION=18.04 - - CHECKOUT_TF_SRC=1 - - TF_BRANCH=master - - add_to_name: "-20.04-devel" - dockerfile_exclusive_name: "ubuntu-20.04-devel" - dockerfile_subdirectory: "onednn" - partials: - - onednn/ubuntu/version - - onednn/ubuntu/devel - - onednn/ubuntu/python3 - - onednn/ubuntu/bazel - - shell - tests: - - "" - args: - - UBUNTU_VERSION=20.04 - - PYTHON=python3.7 - - CHECKOUT_TF_SRC=1 - - TF_BRANCH=master - - centos-onednn: - - add_to_name: "-8" - dockerfile_exclusive_name: "centos-8" - dockerfile_subdirectory: "onednn" - partials: - - onednn/centos/version - - onednn/centos/cpu - - onednn/centos/python - - tensorflow - - shell - tests: - - import-onednn.sh - args: - - TF_PACKAGE=intel-tensorflow - - CENTOS_VERSION=8 - - PYTHON=python3 - - centos-devel-onednn: - - add_to_name: "-8-devel" - dockerfile_exclusive_name: "centos-8-devel" - dockerfile_subdirectory: "onednn" - partials: - - onednn/centos/version - - onednn/centos/devel - - onednn/centos/python - - onednn/centos/bazel - - shell - tests: - - "" - args: - - CENTOS_VERSION=8 - - CHECKOUT_TF_SRC=1 - - TF_BRANCH=master - - PYTHON=python3 - - centos-onednn-mpi-horovod: - - add_to_name: "-8-mpi-horovod" - dockerfile_exclusive_name: "centos-8-mpi-horovod" - dockerfile_subdirectory: "onednn" - partials: - - onednn/centos/version - - onednn/centos/cpu - - onednn/centos/python - - tensorflow - - onednn/centos/mpi - - onednn/centos/horovod - - shell - tests: - - import-onednn-horovod.sh - args: - - CENTOS_VERSION=8 - - PYTHON=python3 - - HOROVOD_VERSION=0.19.5 - - TF_PACKAGE=intel-tensorflow - - centos-devel-onednn-mpi-horovod: - - add_to_name: "-8-devel-mpi-horovod" - dockerfile_exclusive_name: "centos-8-devel-mpi-horovod" - dockerfile_subdirectory: "onednn" - partials: - - onednn/centos/version - - onednn/centos/devel - - onednn/centos/python - - onednn/centos/bazel - - onednn/centos/mpi - - onednn/centos/devel-horovod - - shell - tests: - - "" - args: - - CENTOS_VERSION=8 - - PYTHON=python3 - - CHECKOUT_TF_SRC=1 - - CHECKOUT_HOROVOD_SRC=1 - - HOROVOD_BRANCH=master - - centos-onednn-mpich-horovod: - - add_to_name: "-8-mpich-horovod" - dockerfile_exclusive_name: "centos-8-mpich-horovod" - dockerfile_subdirectory: "onednn" - partials: - - onednn/centos/version - - onednn/centos/cpu - - onednn/centos/python - - tensorflow - - onednn/centos/mpich - - onednn/centos/horovod - - shell - tests: - - import-onednn-horovod.sh - args: - - CENTOS_VERSION=8 - - PYTHON=python3 - - HOROVOD_VERSION=0.19.5 - - TF_PACKAGE=intel-tensorflow - - centos-devel-onednn-mpich-horovod: - add_to_name: "-8-devel-mpich-horovod" dockerfile_exclusive_name: "centos-8-devel-mpich-horovod" dockerfile_subdirectory: "onednn" From 94b4bc26931abf9cdd99b6a18404e504dcee08ba Mon Sep 17 00:00:00 2001 From: Jakub Jatczak Date: Fri, 30 Oct 2020 09:57:53 +0100 Subject: [PATCH 016/220] Bluepill: generate binary tests in robot --- tensorflow/lite/micro/testing/bluepill.robot | 37 +++++++++++++++---- .../micro/testing/test_bluepill_binary.sh | 10 +---- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/tensorflow/lite/micro/testing/bluepill.robot b/tensorflow/lite/micro/testing/bluepill.robot index 8c14e7e9930..5a4c0b21b6e 100644 --- a/tensorflow/lite/micro/testing/bluepill.robot +++ b/tensorflow/lite/micro/testing/bluepill.robot @@ -1,5 +1,5 @@ *** Settings *** -Suite Setup Setup +Suite Setup Prepare Tests Suite Teardown Teardown Test Setup Reset Emulation Test Teardown Test Teardown @@ -8,15 +8,25 @@ Resource ${RENODEKEYWORDS} *** Variables *** ${UART} sysbus.cpu.uartSemihosting -*** Test Cases *** -Should Run Bluepill Test - [Documentation] Runs a Bluepill test and waits for a specific string on the semihosting UART - [Tags] bluepill uart tensorflow arm - ${BIN} = Get Environment Variable BIN +*** Keywords *** +Prepare Tests + Setup ${SCRIPT} = Get Environment Variable SCRIPT ${LOGFILE} = Get Environment Variable LOGFILE ${EXPECTED} = Get Environment Variable EXPECTED - Execute Command $bin = @${BIN} + Set Suite Variable ${SCRIPT} + Set Suite Variable ${EXPECTED} + Set Suite Variable ${LOGFILE} + List All Test Binaries + +List All Test Binaries + Setup + ${BIN_DIR} = Get Environment Variable BIN_DIR + @{binaries} = List Files In Directory ${BIN_DIR} absolute=True + Set Suite Variable @{binaries} + +Test Binary + Remove File ${LOGFILE} Execute Command $logfile = @${LOGFILE} Execute Script ${SCRIPT} @@ -24,3 +34,16 @@ Should Run Bluepill Test Start Emulation Wait For Line On Uart ${EXPECTED} + +*** Test Cases *** +Should Run All Bluepill Tests + [Documentation] Runs Bluepill tests and waits for a specific string on the semihosting UART + [Tags] bluepill uart tensorflow arm + FOR ${BIN} IN @{binaries} + Execute Command $bin = @${BIN} + ${_} ${file} = Split Path ${BIN} + Test Binary + Execute Command Clear + + Log \t${file} - PASSED console=True + END diff --git a/tensorflow/lite/micro/testing/test_bluepill_binary.sh b/tensorflow/lite/micro/testing/test_bluepill_binary.sh index 6353299f4e3..2ff03f53849 100755 --- a/tensorflow/lite/micro/testing/test_bluepill_binary.sh +++ b/tensorflow/lite/micro/testing/test_bluepill_binary.sh @@ -46,17 +46,9 @@ then exit 1 fi - -# This check ensures that we only have a single $MICRO_LOG_FILENAME. Without it, -# renode will do a log rotation and there will be multiple files such as -# $MICRO_LOG_FILENAME.1 $MICRO_LOG_FILENAME.2 etc. -if [ -e $MICRO_LOG_FILENAME ]; then - rm $MICRO_LOG_FILENAME &> /dev/null -fi; - exit_code=0 -if ! BIN=${ROOT_DIR}/$1 \ +if ! BIN_DIR=${ROOT_DIR}/$1 \ SCRIPT=${ROOT_DIR}/tensorflow/lite/micro/testing/bluepill.resc \ LOGFILE=$MICRO_LOG_FILENAME \ EXPECTED="$2" \ From dcc61a4d47d4bb0131efe481dc2e3022d8c9c623 Mon Sep 17 00:00:00 2001 From: Jakub Jatczak Date: Fri, 30 Oct 2020 10:28:09 +0100 Subject: [PATCH 017/220] Bluepill Change output logs --- tensorflow/lite/micro/testing/bluepill.robot | 8 +++++++- tensorflow/lite/micro/testing/test_bluepill_binary.sh | 11 +++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tensorflow/lite/micro/testing/bluepill.robot b/tensorflow/lite/micro/testing/bluepill.robot index 5a4c0b21b6e..56cf74746b2 100644 --- a/tensorflow/lite/micro/testing/bluepill.robot +++ b/tensorflow/lite/micro/testing/bluepill.robot @@ -2,10 +2,11 @@ Suite Setup Prepare Tests Suite Teardown Teardown Test Setup Reset Emulation -Test Teardown Test Teardown +Test Teardown Teardown With Custom Message Resource ${RENODEKEYWORDS} *** Variables *** +${CREATE_SNAPSHOT_ON_FAIL} False ${UART} sysbus.cpu.uartSemihosting *** Keywords *** @@ -19,6 +20,10 @@ Prepare Tests Set Suite Variable ${LOGFILE} List All Test Binaries +Teardown With Custom Message + Set Test Message ${file} - FAILED + Test Teardown + List All Test Binaries Setup ${BIN_DIR} = Get Environment Variable BIN_DIR @@ -42,6 +47,7 @@ Should Run All Bluepill Tests FOR ${BIN} IN @{binaries} Execute Command $bin = @${BIN} ${_} ${file} = Split Path ${BIN} + Set Test Variable ${file} Test Binary Execute Command Clear diff --git a/tensorflow/lite/micro/testing/test_bluepill_binary.sh b/tensorflow/lite/micro/testing/test_bluepill_binary.sh index 2ff03f53849..4a11b7a320f 100755 --- a/tensorflow/lite/micro/testing/test_bluepill_binary.sh +++ b/tensorflow/lite/micro/testing/test_bluepill_binary.sh @@ -54,18 +54,17 @@ if ! BIN_DIR=${ROOT_DIR}/$1 \ EXPECTED="$2" \ ${RENODE_TEST_SCRIPT} \ ${ROOT_DIR}/tensorflow/lite/micro/testing/bluepill.robot \ - -r $TEST_TMPDIR &> ${MICRO_LOG_PATH}robot_logs.txt + -r $TEST_TMPDIR then exit_code=1 fi -echo "LOGS:" -# Extract output from renode log -cat ${MICRO_LOG_FILENAME} |grep 'uartSemihosting' |sed 's/^.*from start] *//g' if [ $exit_code -eq 0 ] then - echo "$1: PASS" + echo "PASS" else - echo "$1: FAIL - '$2' not found in logs." + echo "UART LOGS:" + # Extract output from renode log + cat ${MICRO_LOG_FILENAME} |grep 'uartSemihosting' |sed 's/^.*from start] *//g' fi exit $exit_code From 8c8cd4d3b7ee040768914907e0bf70df52aa9d31 Mon Sep 17 00:00:00 2001 From: Jakub Jatczak Date: Wed, 4 Nov 2020 15:08:17 +0100 Subject: [PATCH 018/220] Add `Documentation` to custom Robot keywords --- tensorflow/lite/micro/testing/bluepill.robot | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/lite/micro/testing/bluepill.robot b/tensorflow/lite/micro/testing/bluepill.robot index 56cf74746b2..c216aa66438 100644 --- a/tensorflow/lite/micro/testing/bluepill.robot +++ b/tensorflow/lite/micro/testing/bluepill.robot @@ -11,6 +11,7 @@ ${UART} sysbus.cpu.uartSemihosting *** Keywords *** Prepare Tests + [Documentation] Make environment variables avaiable in whole test suite and list files in ${BIN_DIR} Setup ${SCRIPT} = Get Environment Variable SCRIPT ${LOGFILE} = Get Environment Variable LOGFILE @@ -21,10 +22,12 @@ Prepare Tests List All Test Binaries Teardown With Custom Message + [Documentation] Replace robot fail message with shorter one to avoid duplicated UART output in log Set Test Message ${file} - FAILED Test Teardown List All Test Binaries + [Documentation] List all files in ${BIN_DIR} and make it available from test cases Setup ${BIN_DIR} = Get Environment Variable BIN_DIR @{binaries} = List Files In Directory ${BIN_DIR} absolute=True From 396fe082fdef7519fe6125b4c1927f113e04ffba Mon Sep 17 00:00:00 2001 From: Vladimir Silyaev Date: Thu, 17 Sep 2020 16:46:48 -0700 Subject: [PATCH 019/220] [lite] Use std::endl to force flush of output Change-Id: Id44c6731e93bbada4ba086db91974ac7e2d65c40 --- tensorflow/lite/examples/label_image/BUILD | 1 + .../examples/label_image/bitmap_helpers.cc | 9 ++- .../lite/examples/label_image/label_image.cc | 66 +++++++++---------- tensorflow/lite/examples/label_image/log.h | 41 ++++++++++++ 4 files changed, 78 insertions(+), 39 deletions(-) create mode 100644 tensorflow/lite/examples/label_image/log.h diff --git a/tensorflow/lite/examples/label_image/BUILD b/tensorflow/lite/examples/label_image/BUILD index e7d2d1a9672..1c770a83678 100644 --- a/tensorflow/lite/examples/label_image/BUILD +++ b/tensorflow/lite/examples/label_image/BUILD @@ -63,6 +63,7 @@ cc_library( "bitmap_helpers.h", "bitmap_helpers_impl.h", "label_image.h", + "log.h", ], deps = [ "//tensorflow/lite:builtin_op_data", diff --git a/tensorflow/lite/examples/label_image/bitmap_helpers.cc b/tensorflow/lite/examples/label_image/bitmap_helpers.cc index 0adad68ddca..93a077926df 100644 --- a/tensorflow/lite/examples/label_image/bitmap_helpers.cc +++ b/tensorflow/lite/examples/label_image/bitmap_helpers.cc @@ -22,8 +22,7 @@ limitations under the License. #include // NOLINT(build/include_order) #include "tensorflow/lite/examples/label_image/bitmap_helpers.h" - -#define LOG(x) std::cerr +#include "tensorflow/lite/examples/label_image/log.h" namespace tflite { namespace label_image { @@ -76,7 +75,7 @@ std::vector read_bmp(const std::string& input_bmp_name, int* width, std::ifstream file(input_bmp_name, std::ios::in | std::ios::binary); if (!file) { - LOG(FATAL) << "input file " << input_bmp_name << " not found\n"; + LOG(FATAL) << "input file " << input_bmp_name << " not found"; exit(-1); } @@ -85,7 +84,7 @@ std::vector read_bmp(const std::string& input_bmp_name, int* width, end = file.tellg(); size_t len = end - begin; - if (s->verbose) LOG(INFO) << "len: " << len << "\n"; + if (s->verbose) LOG(INFO) << "len: " << len; std::vector img_bytes(len); file.seekg(0, std::ios::beg); @@ -100,7 +99,7 @@ std::vector read_bmp(const std::string& input_bmp_name, int* width, if (s->verbose) LOG(INFO) << "width, height, channels: " << *width << ", " << *height - << ", " << *channels << "\n"; + << ", " << *channels; // there may be padding bytes when the width is not a multiple of 4 bytes // 8 * channels == bits per pixel diff --git a/tensorflow/lite/examples/label_image/label_image.cc b/tensorflow/lite/examples/label_image/label_image.cc index 4f6bcb4573c..9025cf220e7 100644 --- a/tensorflow/lite/examples/label_image/label_image.cc +++ b/tensorflow/lite/examples/label_image/label_image.cc @@ -52,7 +52,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/delegate.h" #endif -#define LOG(severity) (std::cerr << (#severity) << ": ") +#include "tensorflow/lite/examples/label_image/log.h" namespace tflite { namespace label_image { @@ -98,7 +98,7 @@ class DelegateProviders { // It's possible that a delegate of certain type won't be created as // user-specified benchmark params tells not to. if (ptr == nullptr) continue; - LOG(INFO) << delegate->GetName() << " delegate created.\n"; + LOG(INFO) << delegate->GetName() << " delegate created."; delegates_map.emplace(delegate->GetName(), std::move(ptr)); } return delegates_map; @@ -134,7 +134,7 @@ TfLiteDelegatePtrMap GetDelegates(Settings* s, if (s->gl_backend) { auto delegate = CreateGPUDelegate(s); if (!delegate) { - LOG(INFO) << "GPU acceleration is unsupported on this platform.\n"; + LOG(INFO) << "GPU acceleration is unsupported on this platform."; } else { delegates.emplace("GPU", std::move(delegate)); } @@ -145,7 +145,7 @@ TfLiteDelegatePtrMap GetDelegates(Settings* s, options.allow_fp16 = s->allow_fp16; auto delegate = evaluation::CreateNNAPIDelegate(options); if (!delegate) { - LOG(INFO) << "NNAPI acceleration is unsupported on this platform.\n"; + LOG(INFO) << "NNAPI acceleration is unsupported on this platform."; } else { delegates.emplace("NNAPI", std::move(delegate)); } @@ -157,7 +157,7 @@ TfLiteDelegatePtrMap GetDelegates(Settings* s, evaluation::CreateHexagonDelegate(libhexagon_path, s->profiling); if (!delegate) { - LOG(INFO) << "Hexagon acceleration is unsupported on this platform.\n"; + LOG(INFO) << "Hexagon acceleration is unsupported on this platform."; } else { delegates.emplace("Hexagon", std::move(delegate)); } @@ -166,7 +166,7 @@ TfLiteDelegatePtrMap GetDelegates(Settings* s, if (s->xnnpack_delegate) { auto delegate = evaluation::CreateXNNPACKDelegate(s->number_of_threads); if (!delegate) { - LOG(INFO) << "XNNPACK acceleration is unsupported on this platform.\n"; + LOG(INFO) << "XNNPACK acceleration is unsupported on this platform."; } else { delegates.emplace("XNNPACK", std::move(delegate)); } @@ -195,7 +195,7 @@ TfLiteStatus ReadLabelsFile(const string& file_name, size_t* found_label_count) { std::ifstream file(file_name); if (!file) { - LOG(ERROR) << "Labels file " << file_name << " not found\n"; + LOG(ERROR) << "Labels file " << file_name << " not found"; return kTfLiteError; } result->clear(); @@ -225,14 +225,13 @@ void PrintProfilingInfo(const profiling::ProfileEvent* e, << std::setprecision(3) << op_index << ", OpCode " << std::setw(3) << std::setprecision(3) << registration.builtin_code << ", " << EnumNameBuiltinOperator( - static_cast(registration.builtin_code)) - << "\n"; + static_cast(registration.builtin_code)); } void RunInference(Settings* settings, const DelegateProviders& delegate_providers) { if (!settings->model_name.c_str()) { - LOG(ERROR) << "no model file name\n"; + LOG(ERROR) << "no model file name"; exit(-1); } @@ -240,29 +239,29 @@ void RunInference(Settings* settings, std::unique_ptr interpreter; model = tflite::FlatBufferModel::BuildFromFile(settings->model_name.c_str()); if (!model) { - LOG(ERROR) << "\nFailed to mmap model " << settings->model_name << "\n"; + LOG(ERROR) << "Failed to mmap model " << settings->model_name; exit(-1); } settings->model = model.get(); - LOG(INFO) << "Loaded model " << settings->model_name << "\n"; + LOG(INFO) << "Loaded model " << settings->model_name; model->error_reporter(); - LOG(INFO) << "resolved reporter\n"; + LOG(INFO) << "resolved reporter"; tflite::ops::builtin::BuiltinOpResolver resolver; tflite::InterpreterBuilder(*model, resolver)(&interpreter); if (!interpreter) { - LOG(ERROR) << "Failed to construct interpreter\n"; + LOG(ERROR) << "Failed to construct interpreter"; exit(-1); } interpreter->SetAllowFp16PrecisionForFp32(settings->allow_fp16); if (settings->verbose) { - LOG(INFO) << "tensors size: " << interpreter->tensors_size() << "\n"; - LOG(INFO) << "nodes size: " << interpreter->nodes_size() << "\n"; - LOG(INFO) << "inputs: " << interpreter->inputs().size() << "\n"; - LOG(INFO) << "input(0) name: " << interpreter->GetInputName(0) << "\n"; + LOG(INFO) << "tensors size: " << interpreter->tensors_size(); + LOG(INFO) << "nodes size: " << interpreter->nodes_size(); + LOG(INFO) << "inputs: " << interpreter->inputs().size(); + LOG(INFO) << "input(0) name: " << interpreter->GetInputName(0); int t_size = interpreter->tensors_size(); for (int i = 0; i < t_size; i++) { @@ -271,7 +270,7 @@ void RunInference(Settings* settings, << interpreter->tensor(i)->bytes << ", " << interpreter->tensor(i)->type << ", " << interpreter->tensor(i)->params.scale << ", " - << interpreter->tensor(i)->params.zero_point << "\n"; + << interpreter->tensor(i)->params.zero_point; } } @@ -286,29 +285,29 @@ void RunInference(Settings* settings, &image_height, &image_channels, settings); int input = interpreter->inputs()[0]; - if (settings->verbose) LOG(INFO) << "input: " << input << "\n"; + if (settings->verbose) LOG(INFO) << "input: " << input; const std::vector inputs = interpreter->inputs(); const std::vector outputs = interpreter->outputs(); if (settings->verbose) { - LOG(INFO) << "number of inputs: " << inputs.size() << "\n"; - LOG(INFO) << "number of outputs: " << outputs.size() << "\n"; + LOG(INFO) << "number of inputs: " << inputs.size(); + LOG(INFO) << "number of outputs: " << outputs.size(); } auto delegates_ = GetDelegates(settings, delegate_providers); for (const auto& delegate : delegates_) { if (interpreter->ModifyGraphWithDelegate(delegate.second.get()) != kTfLiteOk) { - LOG(ERROR) << "Failed to apply " << delegate.first << " delegate.\n"; + LOG(ERROR) << "Failed to apply " << delegate.first << " delegate."; exit(-1); } else { - LOG(INFO) << "Applied " << delegate.first << " delegate.\n"; + LOG(INFO) << "Applied " << delegate.first << " delegate."; } } if (interpreter->AllocateTensors() != kTfLiteOk) { - LOG(ERROR) << "Failed to allocate tensors!\n"; + LOG(ERROR) << "Failed to allocate tensors!"; exit(-1); } @@ -340,7 +339,7 @@ void RunInference(Settings* settings, break; default: LOG(ERROR) << "cannot handle input type " - << interpreter->tensor(input)->type << " yet\n"; + << interpreter->tensor(input)->type << " yet"; exit(-1); } auto profiler = absl::make_unique( @@ -351,7 +350,7 @@ void RunInference(Settings* settings, if (settings->loop_count > 1) { for (int i = 0; i < settings->number_of_warmup_runs; i++) { if (interpreter->Invoke() != kTfLiteOk) { - LOG(ERROR) << "Failed to invoke tflite!\n"; + LOG(ERROR) << "Failed to invoke tflite!"; exit(-1); } } @@ -361,16 +360,16 @@ void RunInference(Settings* settings, gettimeofday(&start_time, nullptr); for (int i = 0; i < settings->loop_count; i++) { if (interpreter->Invoke() != kTfLiteOk) { - LOG(ERROR) << "Failed to invoke tflite!\n"; + LOG(ERROR) << "Failed to invoke tflite!"; exit(-1); } } gettimeofday(&stop_time, nullptr); - LOG(INFO) << "invoked\n"; + LOG(INFO) << "invoked"; LOG(INFO) << "average time: " << (get_us(stop_time) - get_us(start_time)) / (settings->loop_count * 1000) - << " ms \n"; + << " ms"; if (settings->profiling) { profiler->StopProfiling(); @@ -413,7 +412,7 @@ void RunInference(Settings* settings, break; default: LOG(ERROR) << "cannot handle output type " - << interpreter->tensor(output)->type << " yet\n"; + << interpreter->tensor(output)->type << " yet"; exit(-1); } @@ -427,7 +426,7 @@ void RunInference(Settings* settings, for (const auto& result : top_results) { const float confidence = result.first; const int index = result.second; - LOG(INFO) << confidence << ": " << index << " " << labels[index] << "\n"; + LOG(INFO) << confidence << ": " << index << " " << labels[index]; } } @@ -449,8 +448,7 @@ void display_usage() { << "--threads, -t: number of threads\n" << "--verbose, -v: [0|1] print more information\n" << "--warmup_runs, -w: number of warmup runs\n" - << "--xnnpack_delegate, -x [0:1]: xnnpack delegate\n" - << "\n"; + << "--xnnpack_delegate, -x [0:1]: xnnpack delegate\n"; } int Main(int argc, char** argv) { diff --git a/tensorflow/lite/examples/label_image/log.h b/tensorflow/lite/examples/label_image/log.h new file mode 100644 index 00000000000..01fa3e4ea0e --- /dev/null +++ b/tensorflow/lite/examples/label_image/log.h @@ -0,0 +1,41 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_LOG_H_ +#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_LOG_H_ + +#include + +namespace tflite { +namespace label_image { + +class Log { + std::stringstream stream_; +public: + Log(const char *severity) { + stream_ << severity << ": "; + } + std::stringstream& Stream() { return stream_; } + ~Log() { + std::cerr << stream_.str() << std::endl; + } +}; + +#define LOG(severity) tflite::label_image::Log(#severity).Stream() + +} // namespace label_image +} // namespace tflite + +#endif // TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_LOG_H_ From 0b793fecd18ab858d205d26bbdda54af21fc5be5 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Sat, 19 Sep 2020 01:22:00 +0000 Subject: [PATCH 020/220] Adding #defines for ROCm / MIOpen / HIP Runtime version numbers This PR/commit introduces the following #defines in the `rocm/rocm_config.h` file ``` #define TF_ROCM_VERSION #define TF_MIOPEN_VERSION #define TF_HIPRUNTIME_VERSION ``` These #defines should be used within TF code to add ROCm/MIOpen/HIp Runtime version specific code. Details on how we go about determining these version numbers can found on the following wiki-page https://github.com/ROCmSoftwarePlatform/tensorflow-internal/wiki/How-to-add-ROCm-version-specific-code-changes-in-the-TensorFlow-code%3F A new script `find_rocm_config.py` is being added by this commit. This script does all the work of determining the version number information and it is pretty to extend it to query more information about the ROCM install. The information collected by the script is available to `rocm_configure.bzl` and hence can be used to add version specific code in `rocm_configure.bzl` as well. --- third_party/gpus/find_rocm_config.py | 284 ++++++++++++++++++++++++ third_party/gpus/rocm/rocm_config.h.tpl | 4 + third_party/gpus/rocm_configure.bzl | 51 +++-- 3 files changed, 321 insertions(+), 18 deletions(-) create mode 100644 third_party/gpus/find_rocm_config.py diff --git a/third_party/gpus/find_rocm_config.py b/third_party/gpus/find_rocm_config.py new file mode 100644 index 00000000000..b838cd3cb71 --- /dev/null +++ b/third_party/gpus/find_rocm_config.py @@ -0,0 +1,284 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Prints ROCm library and header directories and versions found on the system. + +The script searches for ROCm library and header files on the system, inspects +them to determine their version and prints the configuration to stdout. +The path to inspect is specified through an environment variable (ROCM_PATH). +If no valid configuration is found, the script prints to stderr and +returns an error code. + +The script takes the directory specified by the ROCM_PATH environment variable. +The script looks for headers and library files in a hard-coded set of +subdirectories from base path of the specified directory. If ROCM_PATH is not +specified, then "/opt/rocm" is used as it default value + +""" + +import io +import os +import re +import sys + +class ConfigError(Exception): + pass + + +def _get_default_rocm_path(): + return "/opt/rocm" + + +def _get_rocm_install_path(): + """Determines and returns the ROCm installation path""" + rocm_install_path = _get_default_rocm_path() + if "ROCM_PATH" in os.environ: + rocm_install_path = os.environ["ROCM_PATH"] + # rocm_install_path = os.path.realpath(rocm_install_path) + return rocm_install_path + + +def _get_composite_version_number(major, minor, patch): + return 10000*major + 100*minor + patch + + +def _get_header_version(path, name): + """Returns preprocessor defines in C header file.""" + for line in io.open(path, "r", encoding="utf-8").readlines(): + match = re.match("#define %s +(\d+)" % name, line) + if match: + value = match.group(1) + return int(value) + + raise ConfigError( + '#define "{}" is either\n'.format(name) + + ' not present in file {} OR\n'.format(path) + + ' its value is not an integer literal') + + +def _find_rocm_config(rocm_install_path): + + def rocm_version_numbers(path): + version_file = os.path.join(path, ".info/version-dev") + if not os.path.exists(version_file): + raise ConfigError( + 'ROCm version file "{}" not found'.format(version_file)) + version_numbers = [] + with open(version_file) as f: + version_string = f.read().strip() + version_numbers = version_string.split(".") + major = int(version_numbers[0]) + minor = int(version_numbers[1]) + patch = int(version_numbers[2].split("-")[0]) + return major, minor, patch + + major, minor, patch = rocm_version_numbers(rocm_install_path) + + rocm_config = { + "rocm_version_number" : + _get_composite_version_number(major, minor, patch) + } + + return rocm_config + + +def _find_hipruntime_config(rocm_install_path): + + def hipruntime_version_number(path): + version_file = os.path.join(path, "hip/include/hip/hip_version.h") + if not os.path.exists(version_file): + raise ConfigError( + 'HIP Runtime version file "{}" not found'.format(version_file)) + # This header file has an explicit #define for HIP_VERSION, whose value + # is (HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR) + # Retreive the major + minor and re-calculate here, since we do not + # want get into the business of parsing arith exprs + major = _get_header_version(version_file, "HIP_VERSION_MAJOR") + minor = _get_header_version(version_file, "HIP_VERSION_MINOR") + return 100*major + minor + + hipruntime_config = { + "hipruntime_version_number" : + hipruntime_version_number(rocm_install_path) + } + + return hipruntime_config + + +def _find_miopen_config(rocm_install_path): + + def miopen_version_numbers(path): + version_file = os.path.join(path, "miopen/include/miopen/version.h") + if not os.path.exists(version_file): + raise ConfigError( + 'MIOpen version file "{}" not found'.format(version_file)) + version_numbers = [] + major = _get_header_version(version_file, "MIOPEN_VERSION_MAJOR") + minor = _get_header_version(version_file, "MIOPEN_VERSION_MINOR") + patch = _get_header_version(version_file, "MIOPEN_VERSION_PATCH") + return major, minor, patch + + major, minor, patch = miopen_version_numbers(rocm_install_path) + + miopen_config = { + "miopen_version_number" : + _get_composite_version_number(major, minor, patch) + } + + return miopen_config + + +def _find_rocblas_config(rocm_install_path): + + def rocblas_version_numbers(path): + version_file = os.path.join(path, "rocblas/include/rocblas-version.h") + if not os.path.exists(version_file): + raise ConfigError( + 'rocblas version file "{}" not found'.format(version_file)) + version_numbers = [] + major = _get_header_version(version_file, "ROCBLAS_VERSION_MAJOR") + minor = _get_header_version(version_file, "ROCBLAS_VERSION_MINOR") + patch = _get_header_version(version_file, "ROCBLAS_VERSION_PATCH") + return major, minor, patch + + major, minor, patch = rocblas_version_numbers(rocm_install_path) + + rocblas_config = { + "rocblas_version_number" : + _get_composite_version_number(major, minor, patch) + } + + return rocblas_config + + +def _find_rocrand_config(rocm_install_path): + + def rocrand_version_number(path): + version_file = os.path.join(path, "rocrand/include/rocrand_version.h") + if not os.path.exists(version_file): + raise ConfigError( + 'rocblas version file "{}" not found'.format(version_file)) + version_number = _get_header_version(version_file, "ROCRAND_VERSION") + return version_number + + rocrand_config = { + "rocrand_version_number" : rocrand_version_number(rocm_install_path) + } + + return rocrand_config + + +def _find_rocfft_config(rocm_install_path): + + def rocfft_version_numbers(path): + version_file = os.path.join(path, "rocfft/include/rocfft-version.h") + if not os.path.exists(version_file): + raise ConfigError( + 'rocfft version file "{}" not found'.format(version_file)) + version_numbers = [] + major = _get_header_version(version_file, "rocfft_version_major") + minor = _get_header_version(version_file, "rocfft_version_minor") + patch = _get_header_version(version_file, "rocfft_version_patch") + return major, minor, patch + + major, minor, patch = rocfft_version_numbers(rocm_install_path) + + rocfft_config = { + "rocfft_version_number" : + _get_composite_version_number(major, minor, patch) + } + + return rocfft_config + + +def _find_roctracer_config(rocm_install_path): + + def roctracer_version_numbers(path): + version_file = os.path.join(path, "roctracer/include/roctracer.h") + if not os.path.exists(version_file): + raise ConfigError( + 'roctracer version file "{}" not found'.format(version_file)) + version_numbers = [] + major = _get_header_version(version_file, "ROCTRACER_VERSION_MAJOR") + minor = _get_header_version(version_file, "ROCTRACER_VERSION_MINOR") + # roctracer header does not have a patch version number + patch = 0 + return major, minor, patch + + major, minor, patch = roctracer_version_numbers(rocm_install_path) + + roctracer_config = { + "roctracer_version_number" : + _get_composite_version_number(major, minor, patch) + } + + return roctracer_config + + +def _find_hipsparse_config(rocm_install_path): + + def hipsparse_version_numbers(path): + version_file = os.path.join(path, "hipsparse/include/hipsparse-version.h") + if not os.path.exists(version_file): + raise ConfigError( + 'hipsparse version file "{}" not found'.format(version_file)) + version_numbers = [] + major = _get_header_version(version_file, "hipsparseVersionMajor") + minor = _get_header_version(version_file, "hipsparseVersionMinor") + patch = _get_header_version(version_file, "hipsparseVersionPatch") + return major, minor, patch + + major, minor, patch = hipsparse_version_numbers(rocm_install_path) + + hipsparse_config = { + "hipsparse_version_number" : + _get_composite_version_number(major, minor, patch) + } + + return hipsparse_config + + +def find_rocm_config(): + """Returns a dictionary of ROCm components config info.""" + rocm_install_path = _get_rocm_install_path() + if not os.path.exists(rocm_install_path): + raise ConfigError( + 'Specified ROCM_PATH "{}" does not exist'.format(rocm_install_path)) + + result = {} + + result["rocm_toolkit_path"] = rocm_install_path + result.update(_find_rocm_config(rocm_install_path)) + result.update(_find_hipruntime_config(rocm_install_path)) + result.update(_find_miopen_config(rocm_install_path)) + result.update(_find_rocblas_config(rocm_install_path)) + result.update(_find_rocrand_config(rocm_install_path)) + result.update(_find_rocfft_config(rocm_install_path)) + result.update(_find_roctracer_config(rocm_install_path)) + result.update(_find_hipsparse_config(rocm_install_path)) + + return result + + +def main(): + try: + for key, value in sorted(find_rocm_config().items()): + print("%s: %s" % (key, value)) + except ConfigError as e: + sys.stderr.write("\nERROR: {}\n\n".format(str(e))) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl index 957413b9acd..ec26b00a5b5 100644 --- a/third_party/gpus/rocm/rocm_config.h.tpl +++ b/third_party/gpus/rocm/rocm_config.h.tpl @@ -18,4 +18,8 @@ limitations under the License. #define TF_ROCM_TOOLKIT_PATH "%{rocm_toolkit_path}" +#define TF_ROCM_VERSION %{rocm_version_number} +#define TF_MIOPEN_VERSION %{miopen_version_number} +#define TF_HIPRUNTIME_VERSION %{hipruntime_version_number} + #endif // ROCM_ROCM_CONFIG_H_ diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 05082795188..1215657f08a 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -27,6 +27,7 @@ load( "get_bash_bin", "get_cpu_value", "get_host_environ", + "get_python_bin", "raw_exec", "realpath", "which", @@ -212,20 +213,6 @@ def _enable_rocm(repository_ctx): return True return False -def _rocm_toolkit_path(repository_ctx, bash_bin): - """Finds the rocm toolkit directory. - - Args: - repository_ctx: The repository context. - - Returns: - A speculative real path of the rocm toolkit install directory. - """ - rocm_toolkit_path = get_host_environ(repository_ctx, _ROCM_TOOLKIT_PATH, _DEFAULT_ROCM_TOOLKIT_PATH) - if files_exist(repository_ctx, [rocm_toolkit_path], bash_bin) != [True]: - auto_configure_fail("Cannot find rocm toolkit path.") - return rocm_toolkit_path - def _amdgpu_targets(repository_ctx, rocm_toolkit_path, bash_bin): """Returns a list of strings representing AMDGPU targets.""" amdgpu_targets_str = get_host_environ(repository_ctx, _TF_ROCM_AMDGPU_TARGETS) @@ -402,7 +389,20 @@ def _find_libs(repository_ctx, rocm_config, bash_bin): return _select_rocm_lib_paths(repository_ctx, libs_paths, bash_bin) -def _get_rocm_config(repository_ctx, bash_bin): +def _exec_find_rocm_config(repository_ctx, script_path): + python_bin = get_python_bin(repository_ctx) + return execute(repository_ctx, [python_bin, script_path]) + +def find_rocm_config(repository_ctx, script_path): + """Returns ROCm config dictionary from running find_rocm_config.py""" + exec_result = _exec_find_rocm_config(repository_ctx, script_path) + if exec_result.return_code: + auto_configure_fail("Failed to run find_rocm_config.py: %s" % err_out(exec_result)) + + # Parse the dict from stdout. + return dict([tuple(x.split(": ")) for x in exec_result.stdout.splitlines()]) + +def _get_rocm_config(repository_ctx, bash_bin, find_rocm_config_script): """Detects and returns information about the ROCm installation on the system. Args: @@ -413,11 +413,21 @@ def _get_rocm_config(repository_ctx, bash_bin): A struct containing the following fields: rocm_toolkit_path: The ROCm toolkit installation directory. amdgpu_targets: A list of the system's AMDGPU targets. + rocm_version_number: The version of ROCm on the system. + miopen_version_number: The version of MIOpen on the system. + hipruntime_version_number: The version of HIP Runtime on the system. """ - rocm_toolkit_path = _rocm_toolkit_path(repository_ctx, bash_bin) + config = find_rocm_config(repository_ctx, find_rocm_config_script) + rocm_toolkit_path = config["rocm_toolkit_path"] + rocm_version_number = config["rocm_version_number"] + miopen_version_number = config["miopen_version_number"] + hipruntime_version_number = config["hipruntime_version_number"] return struct( - rocm_toolkit_path = rocm_toolkit_path, amdgpu_targets = _amdgpu_targets(repository_ctx, rocm_toolkit_path, bash_bin), + rocm_toolkit_path = rocm_toolkit_path, + rocm_version_number = rocm_version_number, + miopen_version_number = miopen_version_number, + hipruntime_version_number = hipruntime_version_number, ) def _tpl_path(repository_ctx, labelname): @@ -550,8 +560,10 @@ def _create_local_rocm_repository(repository_ctx): "rocm:rocm_config.h", ]} + find_rocm_config_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_rocm_config.py")) + bash_bin = get_bash_bin(repository_ctx) - rocm_config = _get_rocm_config(repository_ctx, bash_bin) + rocm_config = _get_rocm_config(repository_ctx, bash_bin, find_rocm_config_script) # Copy header and library files to execroot. # rocm_toolkit_path @@ -749,6 +761,9 @@ def _create_local_rocm_repository(repository_ctx): ["\"%s\"" % c for c in rocm_config.amdgpu_targets], ), "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path, + "%{rocm_version_number}": rocm_config.rocm_version_number, + "%{miopen_version_number}": rocm_config.miopen_version_number, + "%{hipruntime_version_number}": rocm_config.hipruntime_version_number, }, ) From 467bdaf578e575e89fac41789e66b197d11232c0 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Mon, 14 Sep 2020 22:18:55 +0000 Subject: [PATCH 021/220] removing references to unused env vars from the rocm_configure.bzl file --- third_party/gpus/rocm_configure.bzl | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 1215657f08a..3d2c7cc3fc6 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -4,11 +4,7 @@ * `TF_NEED_ROCM`: Whether to enable building with ROCm. * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path - * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is - `/opt/rocm`. - * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then - use the system default. - * `TF_MIOPEN_VERSION`: The version of the MIOpen library. + * `ROCM_PATH`: The path to the ROCm toolkit. Default is `/opt/rocm`. * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. """ @@ -36,13 +32,9 @@ load( _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" _ROCM_TOOLKIT_PATH = "ROCM_PATH" -_TF_ROCM_VERSION = "TF_ROCM_VERSION" -_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION" _TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS" _TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO" -_DEFAULT_ROCM_VERSION = "" -_DEFAULT_MIOPEN_VERSION = "" _DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm" def verify_build_defines(params): @@ -828,8 +820,6 @@ _ENVIRONS = [ _GCC_HOST_COMPILER_PREFIX, "TF_NEED_ROCM", _ROCM_TOOLKIT_PATH, - _TF_ROCM_VERSION, - _TF_MIOPEN_VERSION, _TF_ROCM_AMDGPU_TARGETS, ] From 2dd90546365a6364b35988441fec6bf390cb7682 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Thu, 5 Nov 2020 14:29:10 +0000 Subject: [PATCH 022/220] Update to make `find_rocm_config.py` to execute properly in RBE (remote build environment) --- third_party/gpus/compress_find_rocm_config.py | 37 +++++++++++++++++++ .../gpus/find_rocm_config.py.gz.base64 | 1 + third_party/gpus/rocm_configure.bzl | 23 +++++++++++- 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 third_party/gpus/compress_find_rocm_config.py create mode 100644 third_party/gpus/find_rocm_config.py.gz.base64 diff --git a/third_party/gpus/compress_find_rocm_config.py b/third_party/gpus/compress_find_rocm_config.py new file mode 100644 index 00000000000..d89dc847368 --- /dev/null +++ b/third_party/gpus/compress_find_rocm_config.py @@ -0,0 +1,37 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Compresses the contents of 'find_rocm_config.py'. + +The compressed file is what is actually being used. It works around remote +config not being able to upload files yet. +""" +import base64 +import zlib + + +def main(): + with open('find_rocm_config.py', 'rb') as f: + data = f.read() + + compressed = zlib.compress(data) + b64encoded = base64.b64encode(compressed) + + with open('find_rocm_config.py.gz.base64', 'wb') as f: + f.write(b64encoded) + + +if __name__ == '__main__': + main() + diff --git a/third_party/gpus/find_rocm_config.py.gz.base64 b/third_party/gpus/find_rocm_config.py.gz.base64 new file mode 100644 index 00000000000..d938b98458c --- /dev/null +++ b/third_party/gpus/find_rocm_config.py.gz.base64 @@ -0,0 +1 @@ +eJzNWm1v2zYQ/q5fQSgoKrWOkvbTkCEfvDRDvbVJYGcbhiYwaIm2ucqiRlJxjaL/fXckJUuynDh1gi5AUVu6e3i8e+5Fog/ImchXks/mmrw9fntMrueMXLNMCflrKpakX+i5kCoi/TQlQxRTZMgUk3csibwD74B84DGIs4QUWcIk0aDfz2kM/7k7PfInk4qLjLyNjkmAAr675Yc/A8JKFGRBVyQTmhSKAQRXZMpTRtiXmOWa8IzEYpGnnGYxI0uu52YZBwJmkL8dhJhoCtIU5HP4Nq3LEaqNwfg31zo/OTpaLpcRNcZGQs6OUiuojj4Mzs4vRueHYLBR+SNLmVJEsn8LLmGrkxWhOdgT0wlYmdIlEZLQmWRwTwu0dym55tmsR5SY6iWVDFASrrTkk0I3nFVaB3uuC4C7aEb8/ogMRj75pT8ajHqA8dfg+v3lH9fkr/5w2L+4HpyPyOWQnF1evBtcDy4v4NuvpH/xN/l9cPGuRxi4CpZhX3KJ9oORHN1oQkdGjDUMmAprkMpZzKc8hn1ls4LOGJmJOyYz2A7JmVxwhcFUYF4CKClfcE21ubKxKVzm9En/PN/3ryTPkIaXZwtYfiKpXKExZM4orp9AiGItJGfGRnJn2QeUEmAgOtbscqU0W0Seh4RXseTAM8WoBC4o44pt8EhM1UTpQcTRa1p5cHGBFEiYRldlxsVclkYYoNzaj/qxyKZ8VkjjQNRTOhGFjoxVOUWiixIcGeJigzSbS1HM5kgSlt1xKbIFyzS5o5IbUgZg/8fxVf/6fRh5gykkF9xLedJakju39Ox2rB9KA405TEoTasl0IU3YCVwCB8UiYU3/afqZ2X2VMVjVLIakwVuVXZ12R3W8VIjPNhjW9zaeZUxsIEy2z6lMDtGeBGKoIe89VUzqPJhKsSATqpxTXWFY21bZGxHw1dpEcA9UJa8SNG6CtDwSuT6SIl74KFJg+aNgi4a4T2mR4n7SgnnIVs+DnBMSwifKT0KVn6AuuE/AJM+LUwppemYidI5ODs5NBYRIhSceAeMVSHmwCBnPmB671cZoyRh3FhgxG6q6lXUlIwyc0jRNa0pg6ruStNbRZcRd1BbEKVnmoCZuD5ZrA5LTreaBOJ8Sv/KwjwEUKnJcQEO6Adcyn2ratyB/sE0eP0WS0dQsvSEUrj21ca/uL2w9QnHNxi6Jx1mxmDAZLOg/QvYIOAz/A7V4Xnf/m2P4e2WEyGv89spIwmcjWl/C0rvED9CCHsnogpWRGbpQQB3PwVgo5gAE6iZW4MGzenWKbFgwb1KsQHCfi0jkrET2pQ+9IYOEgZp+6hd6eviTH6KrElRQlhAEOirYCb6ULDIfA//ArkleKPI6uElehz55YQztmaVCowYBNuIWhNhcABhzMZpB3cqDN6G76ZwFBScwcqGHHqQcUrWeB0b6Zbm8//WbSTzb326ylxFsFuAD4zPy2koTM1Fg68MKA14wM8XXb9AxayqGCmsVDoXPGmxzH+sdGMdmDL0J+UHTl2EZOzAmsey2VbWDZCe4HxQ2t5oMUoETMV5yt4yRa/7+I3gVtohnU3HkBA8TdudXDkdLSxX2BeYIFdQBwzIWWzxrNm9yvGxUxgzjZ4Q2PaJyWQM5bFjvNgYb+HRrbphpzXCvoYXlcloRxN3B6QeGjFMyNVwMwgiv5EHYklsv0tSMFAw3OvAj5xebe6eWXE3dT8e3TsbkZLfMGyeTuzzoknl7W6566IcVqmM16SgRyIeOy5hmXQzpKFtlybWcA8WvZk2/Q98n1sePL2Sg9s1rVUi7YIP8c57LItN8wXZIgZpwy4LH5QHgHPEsTouEHeFn+FcCRvOnzYn3gysytDZ/b2ocwDMV1JJagYZ5xc5RX/AZAqaGsq5hyYYVx3+eD0cwzffIci7ANDtMWCxACmoi44/93+AB4BX2F+gsjTuDi8thaQI0EMn4nZ34y55kuW9b/WFM07iA3g7WMQn1XHHzsAWznDAzkMVZUiilQCbMBWHAJoXCnqFwqMop7B4yGIY5yHp87FCNROxqdnWPQWw39uY38/SxEOgEv5GUb2pd2aAiPTd4vE6rrawtk2s7rTtnjnpabSzbSK4Fx8K5Q2I5wf26iwWpEst9faa8+ji4BPQn7zaPIBpYcHV+sS/X2ig1upVl/fEoMNqevff36CRb+NDdSxosW9O+E+PJ+klj0fY4NYHHoN0mKiO5H+0dSsV79/3wmYjv4H8k82HO++VDf7Qv9Tdgvo/7bZi9yb+NFlsnqRrdGsNUB8pTzlO1ZdsJIKEn75YARnKvYcqB1PlfB/1f839nhg37F+9KhrW41UR0lKgFoEGJDncDJbYF4sH231ypzYLpVO9GAhTcuwgCSJ0D8PUZSyCg/8gK2PKZ0Xx8AWyjoObj618LxWjuWf66+LC1+q1Z1mD6JsZT1r71om3Sa0ljcNZOvHeye1Pf4tTZb688A/Et8A/u/tfD/tn58An6fxuoNgEcrANUHcwIZl+pzSk8iFLH2NIVrv7Ws+d4ryzYwo6tidBgXiMXOpGeMh0aS7ffryh8qt7x9YqT3S8jKpz6SxZ75blaQrXAj8yMygh3Xv3x+9rCBsz39YU2zNW+jWE7PbpTok29xhuRTqQnS4n20i4lNl63t49HKEl4jKdTeD4opvbUyhiSMTzRdBvB1+jRA2dXHadk3ja6d+XkvYx/OapOHtdnjYbsVY000BXjN1ewVYspPGyEwHxbf/1kXwRrIdLPXBtp/7Z8vdw45Co1oiJPqGbBLucZ4RatXV4Eb9N96D3XNr0HXxXco3j/I9Y9ivdO5ffoPTTY3OPYhzpAWO8mBsIlzIJCVTdk1HJlOYmvmD+zVa885MqIElKzJNhMrgiSd6GCsKrg5ncBgf9CnZAXCk/+gjWSsd/9XqfGeDznYVZfrVRkf08Q4a9jWODfZOfD4eXwBOh7k91kfsl2pWUAgGGlBsmg8cjQ8yABx2M85RuPyekp8cdj3ON47J+Yuofb9f4Dez+DwA== \ No newline at end of file diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 3d2c7cc3fc6..d47c559e60e 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -383,7 +383,26 @@ def _find_libs(repository_ctx, rocm_config, bash_bin): def _exec_find_rocm_config(repository_ctx, script_path): python_bin = get_python_bin(repository_ctx) - return execute(repository_ctx, [python_bin, script_path]) + # If used with remote execution then repository_ctx.execute() can't + # access files from the source tree. A trick is to read the contents + # of the file in Starlark and embed them as part of the command. In + # this case the trick is not sufficient as the find_cuda_config.py + # script has more than 8192 characters. 8192 is the command length + # limit of cmd.exe on Windows. Thus we additionally need to compress + # the contents locally and decompress them as part of the execute(). + compressed_contents = repository_ctx.read(script_path) + decompress_and_execute_cmd = ( + "from zlib import decompress;" + + "from base64 import b64decode;" + + "from os import system;" + + "script = decompress(b64decode('%s'));" % compressed_contents + + "f = open('script.py', 'wb');" + + "f.write(script);" + + "f.close();" + + "system('\"%s\" script.py');" % (python_bin) + ) + + return execute(repository_ctx, [python_bin, "-c", decompress_and_execute_cmd]) def find_rocm_config(repository_ctx, script_path): """Returns ROCm config dictionary from running find_rocm_config.py""" @@ -552,7 +571,7 @@ def _create_local_rocm_repository(repository_ctx): "rocm:rocm_config.h", ]} - find_rocm_config_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_rocm_config.py")) + find_rocm_config_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_rocm_config.py.gz.base64")) bash_bin = get_bash_bin(repository_ctx) rocm_config = _get_rocm_config(repository_ctx, bash_bin, find_rocm_config_script) From 94d54ad8a5335418f43327093324d159a128f16d Mon Sep 17 00:00:00 2001 From: Thibaut Goetghebuer-Planchon Date: Thu, 5 Nov 2020 17:43:50 +0000 Subject: [PATCH 023/220] [TFLite] Fix operators using the default register functions instead of the available reference ones in the reference register --- tensorflow/lite/kernels/register_ref.cc | 30 +++++++++++++------------ 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc index d5f5ba75833..c6e078aa735 100644 --- a/tensorflow/lite/kernels/register_ref.cc +++ b/tensorflow/lite/kernels/register_ref.cc @@ -83,15 +83,15 @@ TfLiteRegistration* Register_SPLIT(); TfLiteRegistration* Register_SPLIT_V(); TfLiteRegistration* Register_SQUEEZE(); TfLiteRegistration* Register_STRIDED_SLICE_REF(); -TfLiteRegistration* Register_EXP(); +TfLiteRegistration* Register_EXP_REF(); TfLiteRegistration* Register_TOPK_V2(); TfLiteRegistration* Register_LOG(); TfLiteRegistration* Register_LOG_SOFTMAX_REF(); TfLiteRegistration* Register_CAST(); -TfLiteRegistration* Register_DEQUANTIZE(); -TfLiteRegistration* Register_PRELU(); -TfLiteRegistration* Register_MAXIMUM(); -TfLiteRegistration* Register_MINIMUM(); +TfLiteRegistration* Register_DEQUANTIZE_REF(); +TfLiteRegistration* Register_PRELU_REF(); +TfLiteRegistration* Register_MAXIMUM_REF(); +TfLiteRegistration* Register_MINIMUM_REF(); TfLiteRegistration* Register_ARG_MAX(); TfLiteRegistration* Register_ARG_MIN(); TfLiteRegistration* Register_GREATER(); @@ -120,7 +120,7 @@ TfLiteRegistration* Register_RSQRT(); TfLiteRegistration* Register_SHAPE(); TfLiteRegistration* Register_RANK(); TfLiteRegistration* Register_POW(); -TfLiteRegistration* Register_FAKE_QUANT(); +TfLiteRegistration* Register_FAKE_QUANT_REF(); TfLiteRegistration* Register_PACK(); TfLiteRegistration* Register_ONE_HOT(); TfLiteRegistration* Register_LOGICAL_OR(); @@ -143,7 +143,7 @@ TfLiteRegistration* Register_GATHER_ND(); TfLiteRegistration* Register_WHERE(); TfLiteRegistration* Register_REVERSE_SEQUENCE(); TfLiteRegistration* Register_MATRIX_DIAG(); -TfLiteRegistration* Register_QUANTIZE(); +TfLiteRegistration* Register_QUANTIZE_REF(); TfLiteRegistration* Register_MATRIX_SET_DIAG(); TfLiteRegistration* Register_IF(); TfLiteRegistration* Register_WHILE(); @@ -321,7 +321,7 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE_REF(), /* min_version = */ 1, /* max_version = */ 4); - AddBuiltin(BuiltinOperator_EXP, Register_EXP()); + AddBuiltin(BuiltinOperator_EXP, Register_EXP_REF()); AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2(), /* min_version = */ 1, /* max_version = */ 2); @@ -330,14 +330,14 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { /* min_version = */ 1, /* max_version = */ 2); AddBuiltin(BuiltinOperator_CAST, Register_CAST()); - AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(), + AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE_REF(), /* min_version = */ 1, /* max_version = */ 4); - AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); - AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(), + AddBuiltin(BuiltinOperator_PRELU, Register_PRELU_REF()); + AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM_REF(), /* min_version = */ 1, /* max_version = */ 4); - AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(), + AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM_REF(), /* min_version = */ 1, /* max_version = */ 4); AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX(), @@ -401,7 +401,9 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE()); AddBuiltin(BuiltinOperator_RANK, Register_RANK()); AddBuiltin(BuiltinOperator_POW, Register_POW()); - AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT(), 1, 2); + AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT_REF(), + /* min_version = */ 1, + /* max_version = */ 2); AddBuiltin(BuiltinOperator_PACK, Register_PACK(), /* min_version = */ 1, /* max_version = */ 3); @@ -440,7 +442,7 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { AddBuiltin(BuiltinOperator_WHERE, Register_WHERE()); AddBuiltin(BuiltinOperator_REVERSE_SEQUENCE, Register_REVERSE_SEQUENCE()); AddBuiltin(BuiltinOperator_MATRIX_DIAG, Register_MATRIX_DIAG()); - AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE(), + AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE_REF(), /* min_version = */ 1, /* max_version = */ 2); AddBuiltin(BuiltinOperator_MATRIX_SET_DIAG, Register_MATRIX_SET_DIAG()); From 9d8f486104c63a743a6402452bd3da56808a0f38 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 5 Nov 2020 16:57:14 -0500 Subject: [PATCH 024/220] Add a comment back --- tensorflow/core/kernels/boosted_trees/boosted_trees.proto | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto index 1d325ee8af9..19724aa88a2 100644 --- a/tensorflow/core/kernels/boosted_trees/boosted_trees.proto +++ b/tensorflow/core/kernels/boosted_trees/boosted_trees.proto @@ -162,7 +162,9 @@ message TreeEnsemble { } // DebugOutput contains outputs useful for debugging/model interpretation, at -// the individual example-level. +// the individual example-level. Debug outputs that are available to the user +// are: 1) Directional feature contributions (DFCs) 2) Node IDs for ensemble +// prediction paths 3) Leaf node IDs. message DebugOutput { // Return the logits and associated feature splits across prediction paths for // each tree, for every example, at predict time. We will use these values to From 34f31a179d63263d6e4d7445332642ca52805f9f Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 5 Nov 2020 13:09:10 -0800 Subject: [PATCH 025/220] use the flatbuffers:runtime_cc bazel target instead of flatbuffers. --- tensorflow/lite/micro/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index d480d2e5f8c..44aabe3cd3e 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -66,7 +66,7 @@ cc_library( "//tensorflow/lite/core/api", "//tensorflow/lite/kernels/internal:reference", "//tensorflow/lite/schema:schema_fbs", - "@flatbuffers", + "@flatbuffers//:runtime_cc", ], ) From c7a5cbb069d9142b51fc77947391917d747386f3 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 6 Nov 2020 14:27:45 +0000 Subject: [PATCH 026/220] Go: make ReadTensor return error for shapes with negative dimention --- tensorflow/go/tensor.go | 9 ++++++++- tensorflow/go/tensor_test.go | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index d9036ced325..6d884f32f83 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -168,11 +168,18 @@ func ReadTensor(dataType DataType, shape []int64, r io.Reader) (*Tensor, error) if err := isTensorSerializable(dataType); err != nil { return nil, err } - nbytes := TypeOf(dataType, nil).Size() * uintptr(numElements(shape)) + var shapePtr *C.int64_t if len(shape) > 0 { + for _, dim := range shape { + if dim < 0 { + return nil, fmt.Errorf("all shape dimentions should be non-negative: %v", shape) + } + } shapePtr = (*C.int64_t)(unsafe.Pointer(&shape[0])) } + + nbytes := TypeOf(dataType, nil).Size() * uintptr(numElements(shape)) t := &Tensor{ c: C.TF_AllocateTensor(C.TF_DataType(dataType), shapePtr, C.int(len(shape)), C.size_t(nbytes)), shape: shape, diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index ebfbdecf6c8..15b2ea55ad8 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -276,6 +276,14 @@ func TestReadTensorReadAll(t *testing.T) { } } +func TestReadTensorNegativeDimention(t *testing.T) { + buf := new(bytes.Buffer) + _, err := ReadTensor(Int32, []int64{-1, 1}, buf) + if err == nil { + t.Fatal("ReadTensor should failed if shape contains negative dimention") + } +} + func benchmarkNewTensor(b *testing.B, v interface{}) { b.ReportAllocs() for i := 0; i < b.N; i++ { From ab21c0fbb8d214002abcf855d1717b73d6bce860 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 5 Nov 2020 14:29:12 -0800 Subject: [PATCH 027/220] enable use of for profiling via -DTF_LITE_USE_CTIME --- tensorflow/lite/micro/BUILD | 4 +-- tensorflow/lite/micro/hexagon/micro_time.cc | 28 ------------------- tensorflow/lite/micro/micro_time.cc | 15 ++++++++++ tensorflow/lite/micro/posix/micro_time.cc | 28 ------------------- tensorflow/lite/micro/tools/make/Makefile | 6 ++++ .../tools/make/targets/hexagon_makefile.inc | 1 + .../make/targets/xtensa_hifimini_makefile.inc | 1 + .../lite/micro/xtensa_hifimini/micro_time.cc | 28 ------------------- 8 files changed, 25 insertions(+), 86 deletions(-) delete mode 100644 tensorflow/lite/micro/hexagon/micro_time.cc delete mode 100644 tensorflow/lite/micro/posix/micro_time.cc delete mode 100644 tensorflow/lite/micro/xtensa_hifimini/micro_time.cc diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index d480d2e5f8c..c814a261166 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -158,12 +158,12 @@ cc_library( cc_library( name = "micro_time", srcs = [ - "posix/micro_time.cc", + "micro_time.cc", ], hdrs = [ "micro_time.h", ], - copts = micro_copts(), + copts = micro_copts() + ["-DTF_LITE_USE_CTIME"], deps = ["//tensorflow/lite/c:common"], ) diff --git a/tensorflow/lite/micro/hexagon/micro_time.cc b/tensorflow/lite/micro/hexagon/micro_time.cc deleted file mode 100644 index 9baf77b5653..00000000000 --- a/tensorflow/lite/micro/hexagon/micro_time.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Hexagon timer implementation. -// To include this with make, add TARGET=hexagon. -#include "tensorflow/lite/micro/micro_time.h" - -#include - -namespace tflite { - -int32_t ticks_per_second() { return CLOCKS_PER_SEC; } - -int32_t GetCurrentTimeTicks() { return clock(); } - -} // namespace tflite diff --git a/tensorflow/lite/micro/micro_time.cc b/tensorflow/lite/micro/micro_time.cc index 09119de8394..d7c51f90847 100644 --- a/tensorflow/lite/micro/micro_time.cc +++ b/tensorflow/lite/micro/micro_time.cc @@ -27,8 +27,14 @@ limitations under the License. #include "tensorflow/lite/micro/micro_time.h" +#if defined(TF_LITE_USE_CTIME) +#include +#endif + namespace tflite { +#if !defined(TF_LITE_USE_CTIME) + // Reference implementation of the ticks_per_second() function that's required // for a platform to support Tensorflow Lite for Microcontrollers profiling. // This returns 0 by default because timing is an optional feature that builds @@ -41,4 +47,13 @@ int32_t ticks_per_second() { return 0; } // that builds without errors on platforms that do not need it. int32_t GetCurrentTimeTicks() { return 0; } +#else // defined(TF_LITE_USE_CTIME) + +// For platforms that support ctime, we implment the micro_time interface in +// this central location. +int32_t ticks_per_second() { return CLOCKS_PER_SEC; } + +int32_t GetCurrentTimeTicks() { return clock(); } +#endif + } // namespace tflite diff --git a/tensorflow/lite/micro/posix/micro_time.cc b/tensorflow/lite/micro/posix/micro_time.cc deleted file mode 100644 index f2d21e9b145..00000000000 --- a/tensorflow/lite/micro/posix/micro_time.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Posix implementation of micro_timer. -// To include this with make, add TAGS=posix. -#include "tensorflow/lite/micro/micro_time.h" - -#include - -namespace tflite { - -int32_t ticks_per_second() { return CLOCKS_PER_SEC; } - -int32_t GetCurrentTimeTicks() { return clock(); } - -} // namespace tflite diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index 044d4669d54..0ef68df5d37 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -137,6 +137,12 @@ COMMON_FLAGS := \ $(OPTIMIZED_KERNEL_DEFINES) \ $(TAG_DEFINES) +ifeq ($(TARGET), $(HOST_OS)) + # If we are not doing a cross-compilation then -DTF_LITE_USE_CTIME is what we + # want to have by default. + COMMON_FLAGS += -DTF_LITE_USE_CTIME +endif + CXXFLAGS := \ -std=c++11 \ -fno-rtti \ diff --git a/tensorflow/lite/micro/tools/make/targets/hexagon_makefile.inc b/tensorflow/lite/micro/tools/make/targets/hexagon_makefile.inc index 3bbe6f9aeb9..3a8d7b64710 100644 --- a/tensorflow/lite/micro/tools/make/targets/hexagon_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/hexagon_makefile.inc @@ -39,6 +39,7 @@ ifeq ($(TARGET), hexagon) PLATFORM_ARGS = \ -DTF_LITE_MCU_DEBUG_LOG \ + -DTF_LITE_USE_CTIME \ -DHEXAGON_ASM \ -DMALLOC_IN_STDLIB \ -DPTHREAD_STUBS \ diff --git a/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_makefile.inc b/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_makefile.inc index 05e6001e16d..1587ebcd034 100644 --- a/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_makefile.inc @@ -24,6 +24,7 @@ endif PLATFORM_FLAGS = \ -DTF_LITE_MCU_DEBUG_LOG \ + -DTF_LITE_USE_CTIME \ --xtensa-core=$(XTENSA_CORE) \ -mcoproc \ -DXTENSA \ diff --git a/tensorflow/lite/micro/xtensa_hifimini/micro_time.cc b/tensorflow/lite/micro/xtensa_hifimini/micro_time.cc deleted file mode 100644 index 22880657882..00000000000 --- a/tensorflow/lite/micro/xtensa_hifimini/micro_time.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Xtensa timer implementation. -// To include this with make, add TARGET=xtensa_hifimini. -#include "tensorflow/lite/micro/micro_time.h" - -#include - -namespace tflite { - -int32_t ticks_per_second() { return CLOCKS_PER_SEC; } - -int32_t GetCurrentTimeTicks() { return clock(); } - -} // namespace tflite From 4e4f1ea42e3381a6f568433f6f0cda0b7e132502 Mon Sep 17 00:00:00 2001 From: "Xiaoming (Jason) Cui" Date: Fri, 6 Nov 2020 10:08:40 -0800 Subject: [PATCH 028/220] [INTEL MKL] Fixed unit test failures of matmul_op_test, c_api_test and tensordot_op_test in MKL optimized tensorflow caused by recent matmul and batch_matmul op merge --- tensorflow/core/kernels/mkl/mkl_matmul_op.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl/mkl_matmul_op.cc index 81339489223..d08809ed2b5 100644 --- a/tensorflow/core/kernels/mkl/mkl_matmul_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_matmul_op.cc @@ -55,18 +55,19 @@ class MklMatMulOp : public OpKernel { // Check that the dimensions of the two matrices are valid. OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(a.shape()), - errors::InvalidArgument("In[0] is not a matrix")); + errors::InvalidArgument("In[0] ndims must be >= 2")); OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(b.shape()), - errors::InvalidArgument("In[1] is not a matrix")); + errors::InvalidArgument("In[1] ndims must be >= 2")); Eigen::array, 1> dim_pair; dim_pair[0].first = transpose_a_ ? 0 : 1; dim_pair[0].second = transpose_b_ ? 1 : 0; - OP_REQUIRES( - ctx, a.dim_size(dim_pair[0].first) == b.dim_size(dim_pair[0].second), - errors::InvalidArgument( - "Matrix size-incompatible: In[0]: ", a.shape().DebugString(), - ", In[1]: ", b.shape().DebugString())); + int d1 = a.dim_size(dim_pair[0].first); + int d2 = b.dim_size(dim_pair[0].second); + OP_REQUIRES(ctx, d1 == d2, + errors::InvalidArgument( + "In[0] mismatch In[1] shape: ", d1, " vs. ", d2, ": ", + a.shape().DebugString(), " ", b.shape().DebugString())); int a_dim_remaining = 1 - dim_pair[0].first; int b_dim_remaining = 1 - dim_pair[0].second; TensorShape out_shape( From 3cf1ea8bd3f8dd554ef57232b828cf3833e90160 Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Fri, 6 Nov 2020 14:28:24 -0800 Subject: [PATCH 029/220] Removed duplicate kernel registration of BatchMulV2. --- tensorflow/core/kernels/mkl/mkl_tmp_bf16_ops.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/mkl/mkl_tmp_bf16_ops.cc b/tensorflow/core/kernels/mkl/mkl_tmp_bf16_ops.cc index 9b2d09fb827..2e07dfc08be 100644 --- a/tensorflow/core/kernels/mkl/mkl_tmp_bf16_ops.cc +++ b/tensorflow/core/kernels/mkl/mkl_tmp_bf16_ops.cc @@ -56,9 +56,7 @@ namespace tensorflow { .TypeConstraint("U"), \ NoOp); \ REGISTER_KERNEL_BUILDER( \ - Name("_FusedMatMul").Device(DEVICE_CPU).TypeConstraint("T"), NoOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("BatchMatMulV2").Device(DEVICE_CPU).TypeConstraint("T"), NoOp); + Name("_FusedMatMul").Device(DEVICE_CPU).TypeConstraint("T"), NoOp); TF_CALL_bfloat16(REGISTER_CPU); #undef REGISTER_CPU From a82cdca23ecda25ecc1a5e5c47bd49c5f13dbd13 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 8 Nov 2020 18:46:45 +0000 Subject: [PATCH 030/220] Expose Logging C API in pip package While working on modular file systems, noticed that the logging C API headers are not included in tensorflow pip packages. This limit the ability for plugins to add logging in the file system. This PR adds logging C API header in pip package. Signed-off-by: Yong Tang --- tensorflow/c/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 3f4d70ed60e..50825086443 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -199,6 +199,7 @@ tf_cuda_library( "//tensorflow/core:portable_tensorflow_lib_lite", ], "//conditions:default": [ + ":logging", ":tf_status", ":tf_tensor", "@com_google_absl//absl/strings", From ae5f78b6c460359b6b4d98aadb22be71a8bb56f1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 01:01:43 -0800 Subject: [PATCH 031/220] Update GraphDef version to 580. PiperOrigin-RevId: 341356330 Change-Id: Ife25d5d7a88a0879092502506b40d5d37d10060a --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 5553ff6fac0..32637ef237c 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 579 // Updated: 2020/11/8 +#define TF_GRAPH_DEF_VERSION 580 // Updated: 2020/11/9 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 7978215e252e786f152504ab8999879aa6850cc6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 01:40:19 -0800 Subject: [PATCH 032/220] [MLIR][KernelGen] Add unranked ceil kernel and microbenchmarks PiperOrigin-RevId: 341360359 Change-Id: I5717b9731b4fd5171d708bfee7c94e56079d61f3 --- tensorflow/core/kernels/cwise_op_ceil.cc | 3 +++ tensorflow/core/kernels/mlir_generated/BUILD | 2 ++ .../mlir_generated/unranked_op_gpu_ceil.cc | 25 +++++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc index 765e5b94949..b6748eaa92b 100644 --- a/tensorflow/core/kernels/cwise_op_ceil.cc +++ b/tensorflow/core/kernels/cwise_op_ceil.cc @@ -20,7 +20,10 @@ REGISTER4(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, bfloat16, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double); #endif +#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 91b9f49a16f..46d3e3028ae 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -41,6 +41,7 @@ filegroup( srcs = if_mlir_unranked_kernels_enabled( [ "unranked_op_gpu_abs.cc", + "unranked_op_gpu_ceil.cc", "unranked_op_gpu_tanh.cc", "unranked_op_gpu_base.h", "unranked_op_gpu_base.cc", @@ -60,6 +61,7 @@ cc_library( [ ":abs_unranked_kernels", ":addv2_unranked_kernels", + ":ceil_unranked_kernels", ":tanh_unranked_kernels", "//tensorflow/compiler/mlir/tools/kernel_gen:tf_cuda_runtime_wrappers", "//tensorflow/compiler/mlir/tools/kernel_gen:tf_framework_c_interface", diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc new file mode 100644 index 00000000000..3d0bc5928e5 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_KERNEL(Ceil, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Ceil, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_KERNEL(Ceil, f64, DT_DOUBLE, double); + +} // namespace tensorflow From 18f890059d5e28a825e055338458d69080a25d3e Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Mon, 9 Nov 2020 03:46:38 -0800 Subject: [PATCH 033/220] Refactor kernel_gen UrankedOp macros. This moves the primary tensor handling logic out of macros and into a common base class for unary and binary ops. PiperOrigin-RevId: 341374759 Change-Id: Ic023888cd4c44d525c57a98e19bb83352ba3186a --- .../mlir_generated/cwise_op_gpu_abs.cc | 10 +- .../mlir_generated/cwise_op_gpu_base.h | 15 +- .../mlir_generated/cwise_op_gpu_tanh.cc | 6 +- .../mlir_generated/unranked_op_gpu_abs.cc | 10 +- .../mlir_generated/unranked_op_gpu_base.h | 147 ++++++++++-------- .../mlir_generated/unranked_op_gpu_tanh.cc | 6 +- 6 files changed, 102 insertions(+), 92 deletions(-) diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc index a8e780d6bb5..263c7eb0eac 100644 --- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc @@ -32,9 +32,9 @@ namespace { GENERATE_OP_KERNEL_BASE(Abs); } // namespace -REGISTER_AND_GENERATE_KERNEL(Abs, F16, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Abs, F32, float); -REGISTER_AND_GENERATE_KERNEL(Abs, F64, double); -REGISTER_AND_GENERATE_KERNEL(Abs, I32, int32); -REGISTER_AND_GENERATE_KERNEL(Abs, I64, int64); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, F16, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, F32, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, F64, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, I32, int32); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, I64, int64); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h index 995aa5390e4..d2a5d2d9720 100644 --- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h @@ -61,13 +61,14 @@ class MlirGeneratedUnaryOp : public OpKernel { ##Op(ctx, k##kernel_name##data_type##Kernel) {} \ }; -#define REGISTER_AND_GENERATE_KERNEL(kernel_name, data_type, native_data_type) \ - namespace { \ - GENERATE_OP_KERNEL_FOR(kernel_name, data_type) \ - } \ - REGISTER_KERNEL_BUILDER(Name(#kernel_name) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T"), \ +#define REGISTER_AND_GENERATE_UNARY_KERNEL(kernel_name, data_type, \ + native_data_type) \ + namespace { \ + GENERATE_OP_KERNEL_FOR(kernel_name, data_type) \ + } \ + REGISTER_KERNEL_BUILDER(Name(#kernel_name) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T"), \ MlirGenerated##kernel_name##data_type##Op); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc index 72469a33378..545071ff595 100644 --- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc @@ -30,7 +30,7 @@ namespace { GENERATE_OP_KERNEL_BASE(Tanh); } // namespace -REGISTER_AND_GENERATE_KERNEL(Tanh, F16, Eigen::half) -REGISTER_AND_GENERATE_KERNEL(Tanh, F32, float) -REGISTER_AND_GENERATE_KERNEL(Tanh, F64, double) +REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, F16, Eigen::half) +REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, F32, float) +REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, F64, double) } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc index 586d73171f6..3e1baa2dce0 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc @@ -18,10 +18,10 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Abs, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Abs, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Abs, f64, DT_DOUBLE, double); -REGISTER_AND_GENERATE_KERNEL(Abs, i32, DT_INT32, int32); -REGISTER_AND_GENERATE_KERNEL(Abs, i64, DT_INT64, int64); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, i32, DT_INT32, int32); +REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, i64, DT_INT64, int64); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h index c894b6e67e4..4bcfff21ea8 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h @@ -16,8 +16,12 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_UNRANKED_OP_GPU_ABS_H_ #define TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_UNRANKED_OP_GPU_ABS_H_ +#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/ArrayRef.h" +#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/SmallVector.h" #include "mlir/ExecutionEngine/CRunnerUtils.h" // from @llvm-project #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/op_requires.h" +#include "tensorflow/core/platform/errors.h" namespace tensorflow { @@ -72,90 +76,95 @@ Tensor ConvertDescriptorToTensor( return tensor; } +template +class MlirUnrankedOp : public OpKernel { + public: + explicit MlirUnrankedOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + llvm::SmallVector<::UnrankedMemRefType, 2> input_descs; + for (int i = 0, end = ctx->num_inputs(); i < end; ++i) { + input_descs.push_back( + std::move(ConvertTensorToDescriptor(ctx->input(i)))); + } + auto result_desc = Derived::Invoke(ctx, input_descs); + + for (const auto& input_desc : input_descs) { + free(input_desc.descriptor); + } + void* result_data_ptr = static_cast(result_desc.descriptor)[0]; + + for (int i = 0, end = ctx->num_inputs(); i < end; ++i) { + const Tensor& input = ctx->input(i); + if (input.data() == result_data_ptr) { + ctx->set_output(0, input); + free(result_desc.descriptor); + return; + } + } + tensorflow::AllocatorAttributes attrs; + auto* allocator = ctx->get_allocator(attrs); + Tensor result_tensor = ConvertDescriptorToTensor( + result_desc, tf_data_type, allocator); + free(result_desc.descriptor); + ctx->set_output(0, result_tensor); + } +}; + #define MLIR_FUNCTION(tf_op, mlir_type) _mlir_ciface_##tf_op##_##mlir_type - -// Generates a class derived from OpKernel with Compute function that converts -// input tensors to unranked memref descriptors and calls mlir-generated -// unranked kernel. The outputs are converted back to tensors using -// MlirTensorBuffer to take ownership of pre-allocated memory. -#define REGISTER_AND_GENERATE_KERNEL(tf_op, mlir_type, tf_data_type, \ - data_type) \ - extern "C" ::UnrankedMemRefType MLIR_FUNCTION(tf_op, mlir_type)( \ - tensorflow::OpKernelContext * ctx, \ - ::UnrankedMemRefType * arg); \ - \ - namespace { \ - class MlirUnranked##tf_op##mlir_type##Op : public OpKernel { \ - public: \ - MlirUnranked##tf_op##mlir_type##Op(OpKernelConstruction* ctx) \ - : OpKernel(ctx) {} \ - \ - void Compute(OpKernelContext* ctx) override { \ - const Tensor& input = ctx->input(0); \ - \ - auto input_desc = ConvertTensorToDescriptor(input); \ - auto result_desc = MLIR_FUNCTION(tf_op, mlir_type)(ctx, &input_desc); \ - free(input_desc.descriptor); \ - \ - /* Compare data pointers to detect forwarding. */ \ - void* result_data_ptr = static_cast(result_desc.descriptor)[0]; \ - if (input.data() == result_data_ptr) { \ - ctx->set_output(0, input); \ - } else { \ - tensorflow::AllocatorAttributes attrs; \ - auto* allocator = ctx->get_allocator(attrs); \ - Tensor result_tensor = ConvertDescriptorToTensor( \ - result_desc, tf_data_type, allocator); \ - ctx->set_output(0, result_tensor); \ - } \ - free(result_desc.descriptor); \ - } \ - }; \ - } \ - \ - REGISTER_KERNEL_BUILDER( \ - Name(#tf_op).Device(DEVICE_GPU).TypeConstraint("T"), \ - MlirUnranked##tf_op##mlir_type##Op); - -// OpKernel with Compute function that converts input tensors to unranked memref -// descriptors and calls mlir-generated unranked kernel. The outputs are -// converted back to tensors using MlirTensorBuffer to take ownership of +// OpKernel with Compute function that converts input tensors to unranked +// memref descriptors and calls mlir-generated unranked kernel. The outputs +// are converted back to tensors using MlirTensorBuffer to take ownership of // pre-allocated memory. #define REGISTER_AND_GENERATE_BINARY_KERNEL(tf_op, mlir_type, tf_data_type, \ data_type) \ extern "C" ::UnrankedMemRefType MLIR_FUNCTION(tf_op, mlir_type)( \ tensorflow::OpKernelContext * ctx, \ - ::UnrankedMemRefType * arg1, \ - ::UnrankedMemRefType * arg2); \ + const ::UnrankedMemRefType* arg1, \ + const ::UnrankedMemRefType* arg2); \ \ namespace { \ - class MlirUnranked##tf_op##mlir_type##Op : public OpKernel { \ + class MlirUnranked##tf_op##mlir_type##Op \ + : public MlirUnrankedOp { \ public: \ explicit MlirUnranked##tf_op##mlir_type##Op(OpKernelConstruction* ctx) \ - : OpKernel(ctx) {} \ + : MlirUnrankedOp(ctx) {} \ \ - void Compute(OpKernelContext* ctx) override { \ - const Tensor& input = ctx->input(0); \ - const Tensor& input2 = ctx->input(1); \ - \ - auto input_desc = ConvertTensorToDescriptor(input); \ - auto input_desc2 = ConvertTensorToDescriptor(input2); \ - auto result_desc = \ - MLIR_FUNCTION(tf_op, mlir_type)(ctx, &input_desc, &input_desc2); \ - free(input_desc.descriptor); \ - free(input_desc2.descriptor); \ - \ - tensorflow::AllocatorAttributes attrs; \ - auto* allocator = ctx->get_allocator(attrs); \ - \ - Tensor result_tensor = ConvertDescriptorToTensor( \ - result_desc, tf_data_type, allocator); \ - free(result_desc.descriptor); \ - ctx->set_output(0, result_tensor); \ + static ::UnrankedMemRefType Invoke( \ + OpKernelContext* ctx, \ + llvm::ArrayRef<::UnrankedMemRefType> args) { \ + return MLIR_FUNCTION(tf_op, mlir_type)(ctx, &args[0], &args[1]); \ } \ }; \ } \ + REGISTER_KERNEL_BUILDER( \ + Name(#tf_op).Device(DEVICE_GPU).TypeConstraint("T"), \ + MlirUnranked##tf_op##mlir_type##Op); + +#define REGISTER_AND_GENERATE_UNARY_KERNEL(tf_op, mlir_type, tf_data_type, \ + data_type) \ + extern "C" ::UnrankedMemRefType MLIR_FUNCTION(tf_op, mlir_type)( \ + tensorflow::OpKernelContext * ctx, \ + const ::UnrankedMemRefType* arg); \ \ + namespace { \ + class MlirUnranked##tf_op##mlir_type##Op \ + : public MlirUnrankedOp { \ + public: \ + explicit MlirUnranked##tf_op##mlir_type##Op(OpKernelConstruction* ctx) \ + : MlirUnrankedOp(ctx) {} \ + \ + static ::UnrankedMemRefType Invoke( \ + OpKernelContext* ctx, \ + llvm::ArrayRef<::UnrankedMemRefType> args) { \ + return MLIR_FUNCTION(tf_op, mlir_type)(ctx, &args[0]); \ + } \ + }; \ + } \ REGISTER_KERNEL_BUILDER( \ Name(#tf_op).Device(DEVICE_GPU).TypeConstraint("T"), \ MlirUnranked##tf_op##mlir_type##Op); diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc index 206c0756e9c..440ef1a2d97 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Tanh, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Tanh, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Tanh, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, f64, DT_DOUBLE, double); } // namespace tensorflow From 3985f8497c27b14709993c160e25ad6eb13e07f5 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 9 Nov 2020 04:23:54 -0800 Subject: [PATCH 034/220] [HLO] Fix HLO DynamicBroadcastInDimOp -> LHLO lowering. The conversion had a bug in computation of strides and sizes args for std.memref_reinterpret_cast. The previous version also relied on linalg::ReshapeOp to do broadcasting when the rank of the output was higher than the rank of the input. Now the broadcasting is entirely done via descriptor modification and linalg::ReshapeOp was replaced with CopyOp. PiperOrigin-RevId: 341379871 Change-Id: I8cf33d01c2ea5dcd416f288c5e160462ce56827b --- .../mhlo/transforms/hlo_legalize_to_lhlo.cc | 83 +++++++++++------ .../mlir/hlo/tests/hlo-legalize-to-lhlo.mlir | 93 +++++++------------ 2 files changed, 89 insertions(+), 87 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc index aca597764a5..6710d371cfd 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc @@ -194,8 +194,7 @@ struct HloToLhloDynamicBroadcastInDimOpConverter Value transformed_operand = InsertDynamicMemrefCastOp(op, operands.front(), &rewriter); - rewriter.create( - loc, transformed_operand, resultBuffer, op.broadcast_dimensions()); + rewriter.create(loc, transformed_operand, resultBuffer); rewriter.replaceOp(op, {resultBuffer}); @@ -211,48 +210,76 @@ struct HloToLhloDynamicBroadcastInDimOpConverter auto loc = op.getLoc(); auto operand_type = operand.getType().cast(); auto operand_shape = operand_type.getShape(); + auto operand_rank = operand_type.getRank(); - SmallVector sizes, strides; - sizes.reserve(operand_shape.size()); - strides.reserve(operand_shape.size()); + auto result_type = op.getType().cast(); + auto result_rank = result_type.getRank(); Value zero = b->create(loc, 0); Value one = b->create(loc, 1); - for (auto dim : llvm::enumerate(op.broadcast_dimensions())) { - Value broadcast_dim_value = - b->create(loc, dim.value().getSExtValue()); - Value result_dim_size = b->create( - loc, op.output_dimensions(), broadcast_dim_value); - Value operand_dim_size = - ShapedType::isDynamic(operand_shape[dim.index()]) - ? b->create(loc, operand, dim.index()).getResult() - : b->create(loc, operand_shape[dim.index()]) - .getResult(); - // TODO(pifon): Revisit if this cast is needed. Maybe we can use - // tensor for `output_dimensions` as well. + // Compute a reversed scan product. Compute the stride for the dimensions so + // far, working from minor to major dimensions. Additionally, save the + // operand shape Values to use in the next loop. + SmallVector operand_strides(operand_rank, one); + SmallVector operand_sizes(operand_rank, one); + Value stride_so_far = one; + for (int i = operand_rank - 1; i >= 0; --i) { + Value operand_dim_size = + ShapedType::isDynamic(operand_shape[i]) + ? b->create(loc, operand, i).getResult() + : b->create(loc, operand_shape[i]).getResult(); + operand_sizes[i] = operand_dim_size; + + operand_strides[i] = stride_so_far; + if (i > 0) { + stride_so_far = b->create(loc, stride_so_far, operand_dim_size); + } + } + + SmallVector sizes, strides; + sizes.reserve(result_rank); + strides.reserve(result_rank); + + DenseMap output_to_input_dim; + for (auto dim : llvm::enumerate(op.broadcast_dimensions())) { + output_to_input_dim[dim.value().getSExtValue()] = dim.index(); + } + for (int i = 0; i < result_rank; ++i) { + Value i_val = b->create(loc, i); + Value result_dim_size = + b->create(loc, op.output_dimensions(), i_val); if (!result_dim_size.getType().isIndex()) { result_dim_size = b->create(loc, result_dim_size, b->getIndexType()); } + sizes.push_back(result_dim_size); + + auto it = output_to_input_dim.find(i); + // If the rank of the output is greater than the rank of the input, i.e. + // there was no output dimension in the inverse broadcast_dimensions map + // we also set stride to 0 to emulate padding of the shape with 1s and the + // corresponding expansion. + if (it == output_to_input_dim.end()) { + strides.push_back(zero); + continue; + } // There can be two cases: - // 1) Operand dim == result dim => expansion is not needed => stride := 1. + // 1) Operand dim == result dim => expansion is not needed + // => stride flattened buffer stride // 2) Operand dim < result dim => expansion is needed => stride := 0. - Value is_expansion = b->create(loc, CmpIPredicate::slt, - operand_dim_size, result_dim_size); - strides.push_back( - b->create(loc, is_expansion, zero, one)); - - // Size of input dim can be set to the size of the corresponding output - // dimension for both cases. - sizes.push_back(result_dim_size); + int dim = it->second; + Value is_expansion = b->create( + loc, CmpIPredicate::slt, operand_sizes[dim], result_dim_size); + strides.push_back(b->create(loc, is_expansion, zero, + operand_strides[dim])); } // Type-erased memref type with static rank, dynamic sizes and strides. - SmallVector dynamic_layout(operand_shape.size(), + SmallVector dynamic_layout(result_rank, MemRefType::kDynamicStrideOrOffset); - SmallVector dynamic_shape(operand_shape.size(), + SmallVector dynamic_shape(result_rank, MemRefType::kDynamicSize); auto type_erased_memref_type = MemRefType::get( dynamic_shape, operand_type.getElementType(), diff --git a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir index 399ec9e2e2d..910129ce612 100644 --- a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir @@ -1,4 +1,6 @@ -// RUN: mlir-hlo-opt -hlo-legalize-to-lhlo -buffer-hoisting -buffer-deallocation -split-input-file %s -o - | FILECHECK_OPTS="" FileCheck %s +// RUN: mlir-hlo-opt -hlo-legalize-to-lhlo -buffer-hoisting \ +// RUN: -buffer-deallocation -split-input-file -cse %s -o - \ +// RUN: | FILECHECK_OPTS="" FileCheck %s // CHECK-LABEL: func @attrs func @attrs_copy(%operand: memref<2x2xf32>, %result: memref<2x2xf32>) { @@ -153,64 +155,41 @@ func @broadcast(%operand: memref<5xf32>, %result: memref<10x5xf32>) { // ----- -func @external_func() -> tensor<3xi64> - -// CHECK: #[[MAP:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0 + d1 * s1)> +// CHECK: #[[MAP:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s0 + d1 * s1 + d2 * s2)> // CHECK-LABEL: func @dyn_broadcast -func @dyn_broadcast(%operand: memref) { - // CHECK-SAME: (%[[OPERAND:.*]]: memref) +func @dyn_broadcast(%operand: memref) -> index { + // CHECK-SAME: %[[OPERAND:.*]]: memref %tensor_operand = tensor_load %operand : memref %c1 = constant 1 : i64 %shape = tensor_from_elements %c1, %c1, %c1 : tensor<3xi64> %tensor_result = "mhlo.dynamic_broadcast_in_dim"(%tensor_operand, %shape) { broadcast_dimensions = dense<[1, 2]> : tensor<2xi64> } : (tensor, tensor<3xi64>) -> tensor - // CHECK: %[[SHAPE:.*]] = tensor_from_elements - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[EL0:.*]] = extract_element %[[SHAPE]][%[[C0]]] : tensor<3xi64> - // CHECK: %[[IC0:.*]] = index_cast %[[EL0]] : i64 to index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[EL1:.*]] = extract_element %[[SHAPE]][%[[C1]]] : tensor<3xi64> - // CHECK: %[[IC1:.*]] = index_cast %[[EL1]] : i64 to index - // CHECK: %[[C2:.*]] = constant 2 : index - // CHECK: %[[EL2:.*]] = extract_element %[[SHAPE]][%[[C2]]] : tensor<3xi64> - // CHECK: %[[IC2:.*]] = index_cast %[[EL2]] : i64 to index - // CHECK: %[[RESULT:.*]] = alloc(%[[IC0]], %[[IC1]], %[[IC2]]) - - // CHECK: %[[C0_:.*]] = constant 0 : index - // CHECK: %[[C1_:.*]] = constant 1 : index - - // CHECK: %[[C1__:.*]] = constant 1 : index - // CHECK: %[[EL1_:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C1__]]] : tensor<3xi64> - // CHECK: %[[C0___:.*]] = constant 0 : index - // CHECK: %[[OPERAND_DIM_0:.*]] = dim %[[OPERAND]], %[[C0___]] : memref - // CHECK: %[[RESULT_DIM_1:.*]] = index_cast %[[EL1_]] : i64 to index - // CHECK: %[[EXPAND_0:.*]] = cmpi "slt", %[[OPERAND_DIM_0]], %[[RESULT_DIM_1]] - // CHECK: %[[STRIDE_0:.*]] = select %[[EXPAND_0]], %[[C0_]], %[[C1_]] : index - - // CHECK: %[[C2_:.*]] = constant 2 : index - // CHECK: %[[EL2_:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C2_]]] : tensor<3xi64> - // CHECK: %[[C1___:.*]] = constant 1 : index - // CHECK: %[[OPERAND_DIM_1:.*]] = dim %[[OPERAND]], %[[C1___]] : memref - // CHECK: %[[RESULT_DIM_2:.*]] = index_cast %[[EL2_]] : i64 to index - // CHECK: %[[EXPAND_1:.*]] = cmpi "slt", %[[OPERAND_DIM_1]], %[[RESULT_DIM_2]] - // CHECK: %[[STRIDE_1:.*]] = select %[[EXPAND_1]], %[[C0_]], %[[C1_]] : index - - // CHECK: %[[TRANSFORMED_MEMREF:.*]] = memref_reinterpret_cast %[[OPERAND]] to - // CHECK-SAME: offset: [0], - // CHECK-SAME: sizes: {{\[}}%[[RESULT_DIM_1]], %[[RESULT_DIM_2]]] - // CHECK-SAME: strides: {{\[}}%[[STRIDE_0]], %[[STRIDE_1]]] - // CHECK-SAME: : memref to memref - - // CHECK: "lmhlo.broadcast_in_dim"(%[[TRANSFORMED_MEMREF]], %[[RESULT]]) { - // CHECK-SAME: broadcast_dimensions = dense<[1, 2]> : tensor<2xi64> - // CHECK-SAME: } : (memref, memref) -> () - - // Do not store the value back to avoid the tensor-store being rewritten to - // a copy into the pre-allocated argument. - return + %rank = rank %tensor_result : tensor + return %rank : index } +// CHECK: %[[SHAPE:.*]] = tensor_from_elements +// CHECK: %[[C0:.*]] = constant 0 : index +// CHECK: %[[EL0:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C0]]] : tensor<3xi64> +// CHECK: %[[SIZE_0:.*]] = index_cast %[[EL0]] : i64 to index +// CHECK: %[[C1:.*]] = constant 1 : index +// CHECK: %[[EL1:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C1]]] : tensor<3xi64> +// CHECK: %[[SIZE_1:.*]] = index_cast %[[EL1]] : i64 to index +// CHECK: %[[C2:.*]] = constant 2 : index +// CHECK: %[[EL2:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C2]]] : tensor<3xi64> +// CHECK: %[[SIZE_2:.*]] = index_cast %[[EL2]] : i64 to index +// CHECK: %[[RESULT:.*]] = alloc(%[[SIZE_0]], %[[SIZE_1]], %[[SIZE_2]]) : memref +// CHECK: %[[OPER_DIM_1:.*]] = dim %[[OPERAND]], %[[C1]] : memref +// CHECK: %[[OP_STRIDE_0:.*]] = muli %[[C1]], %[[OPER_DIM_1]] : index +// CHECK: %[[OPER_DIM_0:.*]] = dim %[[OPERAND]], %[[C0]] : memref +// CHECK: %[[EXPAND_1:.*]] = cmpi "slt", %[[OPER_DIM_0]], %[[SIZE_1]] : index +// CHECK: %[[STRIDE_1:.*]] = select %[[EXPAND_1]], %[[C0]], %[[OP_STRIDE_0]] : index +// CHECK: %[[EXPAND_2:.*]] = cmpi "slt", %[[OPER_DIM_1]], %[[SIZE_2]] : index +// CHECK: %[[STRIDE_2:.*]] = select %[[EXPAND_2]], %[[C0]], %[[C1]] : index +// CHECK: %[[TRANSFORMED_MEMREF:.*]] = memref_reinterpret_cast %[[OPERAND]] to offset: [0], sizes: {{\[}}%[[SIZE_0]], %[[SIZE_1]], %[[SIZE_2]]], strides: {{\[}}%[[C0]], %[[STRIDE_1]], %[[STRIDE_2]]]: memref to memref +// CHECK: "lmhlo.copy"(%[[TRANSFORMED_MEMREF]], %[[RESULT]]) : (memref, memref) -> () +// CHECK: dealloc %[[RESULT]] : memref // ----- @@ -483,11 +462,9 @@ func @add_dyn(%lhs: tensor, %rhs: tensor) { // CHECK: %[[DIM1:.*]] = dim %arg0, %[[C1]] : memref // CHECK: %[[IC1:.*]] = index_cast %[[DIM1]] : index to i64 // CHECK: %[[SHAPE:.*]] = tensor_from_elements %[[IC0]], %[[IC1]] : tensor<2xi64> - // CHECK: %[[C0_:.*]] = constant 0 : index - // CHECK: %[[EE0:.*]] = extract_element %[[SHAPE]][%[[C0_]]] : tensor<2xi64> + // CHECK: %[[EE0:.*]] = extract_element %[[SHAPE]][%[[C0]]] : tensor<2xi64> // CHECK: %[[ICS0:.*]] = index_cast %[[EE0]] : i64 to index - // CHECK: %[[C1_:.*]] = constant 1 : index - // CHECK: %[[EE1:.*]] = extract_element %[[SHAPE]][%[[C1_]]] : tensor<2xi64> + // CHECK: %[[EE1:.*]] = extract_element %[[SHAPE]][%[[C1]]] : tensor<2xi64> // CHECK: %[[ICS1:.*]] = index_cast %[[EE1]] : i64 to index // CHECK: %[[RESULT:.*]] = alloc(%[[ICS0]], %[[ICS1]]) // CHECK: "lmhlo.add"(%arg0, %arg1, %[[RESULT]]) : (memref, memref, memref) -> () @@ -508,11 +485,9 @@ func @tanh_dyn(%arg0: tensor) { // CHECK: %[[DIM1:.*]] = dim %arg0, %[[C1]] : memref // CHECK: %[[IC1:.*]] = index_cast %[[DIM1]] : index to i64 // CHECK: %[[SHAPE:.*]] = tensor_from_elements %[[IC0]], %[[IC1]] : tensor<2xi64> - // CHECK: %[[C0_:.*]] = constant 0 : index - // CHECK: %[[EE0:.*]] = extract_element %[[SHAPE]][%[[C0_]]] : tensor<2xi64> + // CHECK: %[[EE0:.*]] = extract_element %[[SHAPE]][%[[C0]]] : tensor<2xi64> // CHECK: %[[ICS0:.*]] = index_cast %[[EE0]] : i64 to index - // CHECK: %[[C1_:.*]] = constant 1 : index - // CHECK: %[[EE1:.*]] = extract_element %[[SHAPE]][%[[C1_]]] : tensor<2xi64> + // CHECK: %[[EE1:.*]] = extract_element %[[SHAPE]][%[[C1]]] : tensor<2xi64> // CHECK: %[[ICS1:.*]] = index_cast %[[EE1]] : i64 to index // CHECK: %[[RESULT:.*]] = alloc(%[[ICS0]], %[[ICS1]]) // CHECK: "lmhlo.tanh"(%arg0, %[[RESULT]]) : (memref, memref) -> () @@ -645,7 +620,7 @@ func @shape_assuming_memref(%arg0: tensor) -> tensor { %4 = tensor_cast %3 : tensor to tensor<1xindex> %5 = "mhlo.dynamic_broadcast_in_dim"(%0, %4) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor %6 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %4) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor - // CHECK: "lmhlo.maximum"(%6, %9, %20) : (memref, memref, memref) -> () + // CHECK: "lmhlo.maximum"(%{{.*}}, %{{.*}}, %{{.*}}) : (memref, memref, memref) -> () %7 = mhlo.maximum %5, %6 : tensor // CHECK: shape.assuming_yield %{{.*}} : memref shape.assuming_yield %7 : tensor From 9619f1cba1c250edcc682b37427f8102cc4410bb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 04:51:40 -0800 Subject: [PATCH 035/220] [MLIR][KernelGen] Add unranked MLIR-generated kernels and microbenchmarks Add unranked MLIR-generated versions for the following simple unary kernels: - cos - exp - floor - log - rsqrt - sqrt PiperOrigin-RevId: 341382592 Change-Id: I7b6c79204f793262fb1ec1e6cebcd4a944857e86 --- tensorflow/core/kernels/cwise_op_cos.cc | 3 +++ tensorflow/core/kernels/cwise_op_exp.cc | 3 +++ tensorflow/core/kernels/cwise_op_floor.cc | 3 +++ tensorflow/core/kernels/cwise_op_log.cc | 3 +++ tensorflow/core/kernels/cwise_op_rsqrt.cc | 3 +++ tensorflow/core/kernels/cwise_op_sqrt.cc | 4 ++- tensorflow/core/kernels/mlir_generated/BUILD | 12 +++++++++ .../mlir_generated/unranked_op_gpu_cos.cc | 25 +++++++++++++++++++ .../mlir_generated/unranked_op_gpu_exp.cc | 25 +++++++++++++++++++ .../mlir_generated/unranked_op_gpu_floor.cc | 25 +++++++++++++++++++ .../mlir_generated/unranked_op_gpu_log.cc | 25 +++++++++++++++++++ .../mlir_generated/unranked_op_gpu_rsqrt.cc | 25 +++++++++++++++++++ .../mlir_generated/unranked_op_gpu_sqrt.cc | 25 +++++++++++++++++++ 13 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc index 64e9fabfc2b..5bf127f0fb8 100644 --- a/tensorflow/core/kernels/cwise_op_cos.cc +++ b/tensorflow/core/kernels/cwise_op_cos.cc @@ -20,7 +20,10 @@ REGISTER6(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double); #endif +#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index 28ace80431b..24d098a0984 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -20,8 +20,11 @@ REGISTER6(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double, complex64, complex128); #endif +#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc index da5619b3df9..57296f9a41a 100644 --- a/tensorflow/core/kernels/cwise_op_floor.cc +++ b/tensorflow/core/kernels/cwise_op_floor.cc @@ -20,6 +20,9 @@ REGISTER4(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, bfloat16, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double); #endif +#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc index 236f95dfa77..f0ece6c24ca 100644 --- a/tensorflow/core/kernels/cwise_op_log.cc +++ b/tensorflow/core/kernels/cwise_op_log.cc @@ -20,7 +20,10 @@ REGISTER6(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double, bfloat16, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double); #endif +#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_rsqrt.cc b/tensorflow/core/kernels/cwise_op_rsqrt.cc index 21e3bf4d33f..cb6c1efd548 100644 --- a/tensorflow/core/kernels/cwise_op_rsqrt.cc +++ b/tensorflow/core/kernels/cwise_op_rsqrt.cc @@ -20,8 +20,11 @@ REGISTER5(UnaryOp, CPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER3(UnaryOp, GPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double); #endif +#endif REGISTER5(SimpleBinaryOp, CPU, "RsqrtGrad", functor::rsqrt_grad, float, Eigen::half, double, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc index 2e33297a305..32c78e4666a 100644 --- a/tensorflow/core/kernels/cwise_op_sqrt.cc +++ b/tensorflow/core/kernels/cwise_op_sqrt.cc @@ -20,9 +20,11 @@ REGISTER6(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double, bfloat16, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double); #endif - +#endif REGISTER6(SimpleBinaryOp, CPU, "SqrtGrad", functor::sqrt_grad, float, Eigen::half, bfloat16, double, complex64, complex128); diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 46d3e3028ae..c3e700d1d2f 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -42,6 +42,12 @@ filegroup( [ "unranked_op_gpu_abs.cc", "unranked_op_gpu_ceil.cc", + "unranked_op_gpu_cos.cc", + "unranked_op_gpu_exp.cc", + "unranked_op_gpu_floor.cc", + "unranked_op_gpu_log.cc", + "unranked_op_gpu_rsqrt.cc", + "unranked_op_gpu_sqrt.cc", "unranked_op_gpu_tanh.cc", "unranked_op_gpu_base.h", "unranked_op_gpu_base.cc", @@ -62,6 +68,12 @@ cc_library( ":abs_unranked_kernels", ":addv2_unranked_kernels", ":ceil_unranked_kernels", + ":cos_unranked_kernels", + ":exp_unranked_kernels", + ":floor_unranked_kernels", + ":log_unranked_kernels", + ":rsqrt_unranked_kernels", + ":sqrt_unranked_kernels", ":tanh_unranked_kernels", "//tensorflow/compiler/mlir/tools/kernel_gen:tf_cuda_runtime_wrappers", "//tensorflow/compiler/mlir/tools/kernel_gen:tf_framework_c_interface", diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc new file mode 100644 index 00000000000..b21f8351608 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_KERNEL(Cos, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Cos, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_KERNEL(Cos, f64, DT_DOUBLE, double); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc new file mode 100644 index 00000000000..70f33066819 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_KERNEL(Exp, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Exp, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_KERNEL(Exp, f64, DT_DOUBLE, double); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc new file mode 100644 index 00000000000..80b05d0e1e2 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_KERNEL(Floor, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Floor, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_KERNEL(Floor, f64, DT_DOUBLE, double); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc new file mode 100644 index 00000000000..5314e4afe2b --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_KERNEL(Log, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Log, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_KERNEL(Log, f64, DT_DOUBLE, double); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc new file mode 100644 index 00000000000..448b2427ad1 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_KERNEL(Rsqrt, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Rsqrt, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_KERNEL(Rsqrt, f64, DT_DOUBLE, double); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc new file mode 100644 index 00000000000..98f1342a9a8 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_KERNEL(Sqrt, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Sqrt, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_KERNEL(Sqrt, f64, DT_DOUBLE, double); + +} // namespace tensorflow From c86b90dc9e3dc51dfc95964449570160cb12c1fe Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 9 Nov 2020 07:45:24 -0800 Subject: [PATCH 036/220] Add a debug string to executable. PiperOrigin-RevId: 341403664 Change-Id: I50211247e449a1715c11be844a4d48d5456c9d23 --- tensorflow/compiler/xla/service/compiler.h | 2 +- tensorflow/compiler/xla/service/executable.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 253caac195c..9e169fd8210 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -135,7 +135,7 @@ class AotCompilationMetadata { public: AotCompilationMetadata(const AotCompilationMetadata&) = delete; AotCompilationMetadata& operator=(AotCompilationMetadata const&) = delete; - + virtual std::string ToString() const { return ""; } virtual ~AotCompilationMetadata() = default; protected: diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 51763b79959..62d3614ab1f 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -375,6 +375,10 @@ class Executable { bool dumping_snapshot() const { return hlo_proto_ != nullptr; } HloProto const* hlo_proto() const { return hlo_proto_.get(); } + std::string& debug_info() { return debug_info_; } + void set_debug_info(const std::string& debug_info) { + debug_info_ = debug_info; + } // Gather unused but donated buffers, return them to the caller of this API. // We don't free buffers inside this function since the caller could have // different preferences for buffer deallocation. For example, in TensorFlow, @@ -399,6 +403,9 @@ class Executable { std::unique_ptr hlo_profile_printer_data_; std::unique_ptr hlo_profile_index_map_; + + // Generic debug information as a string. + std::string debug_info_; }; } // namespace xla From 55a311cb735689a431c6aa9a6c765c5c5c034ede Mon Sep 17 00:00:00 2001 From: Jian Li Date: Mon, 9 Nov 2020 08:58:46 -0800 Subject: [PATCH 037/220] Add RISC Conv Op register. PiperOrigin-RevId: 341415820 Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5 --- .../api_def/base_api/api_def_RiscConv.pbtxt | 54 +++++++++++++++++++ .../core/kernels/risc/experimental/BUILD | 11 ++++ .../kernels/risc/experimental/risc_conv_op.cc | 50 +++++++++++++++++ tensorflow/core/ops/risc_ops.cc | 11 ++++ tensorflow/python/ops/risc/risc_grad.py | 7 +++ tensorflow/python/ops/risc/risc_ops.py | 17 +++++- 6 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt create mode 100644 tensorflow/core/kernels/risc/experimental/risc_conv_op.cc diff --git a/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt b/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt new file mode 100644 index 00000000000..a78ee1d2b89 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt @@ -0,0 +1,54 @@ +op { + graph_op_name: "RiscConv" + visibility: HIDDEN + in_arg { + name: "input" + description: < 1, there will be k-1 skipped cells between each +filter element on that dimension. The dimension order is determined by the +value of `data_format`, see above for details. Dilations in the batch and +depth dimensions must be 1. +END + } + summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors." +} diff --git a/tensorflow/core/kernels/risc/experimental/BUILD b/tensorflow/core/kernels/risc/experimental/BUILD index a16c0b66271..d0e94be3120 100644 --- a/tensorflow/core/kernels/risc/experimental/BUILD +++ b/tensorflow/core/kernels/risc/experimental/BUILD @@ -17,9 +17,20 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "risc_conv_op", + srcs = ["risc_conv_op.cc"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + tf_kernel_library( name = "experimental", deps = [ ":risc_add_op", + ":risc_conv_op", ], ) diff --git a/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc b/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc new file mode 100644 index 00000000000..58c5ee98eae --- /dev/null +++ b/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc @@ -0,0 +1,50 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +namespace risc { +namespace experimental { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class RiscConvOp : public OpKernel { + public: + explicit RiscConvOp(OpKernelConstruction* context) : OpKernel(context) { + // TODO(b/171294012): Implement RiscConv op. + } + + void Compute(OpKernelContext* context) override { + // TODO(b/171294012): Implement RiscConv op. + } +}; + +#define REGISTER_CPU(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("RiscConv").Device(DEVICE_CPU).TypeConstraint("T"), \ + RiscConvOp); + +REGISTER_CPU(float); +REGISTER_CPU(double); + +} // namespace experimental +} // namespace risc +} // namespace tensorflow diff --git a/tensorflow/core/ops/risc_ops.cc b/tensorflow/core/ops/risc_ops.cc index 1d90a645965..a5b1e37fa84 100644 --- a/tensorflow/core/ops/risc_ops.cc +++ b/tensorflow/core/ops/risc_ops.cc @@ -30,4 +30,15 @@ REGISTER_OP("RiscAdd") .SetIsAggregate() .SetIsCommutative(); +// TODO(b/171294012): change shape function. +REGISTER_OP("RiscConv") + .Input("input: T") + .Input("filter: T") + .Output("output: T") + .Attr("T: {float, double}") + .Attr("strides: list(int)") + .Attr(GetConvnetDataFormatAttrString()) + .SetShapeFn(shape_inference::UnknownShape) + .Attr("dilations: list(int) = [1, 1, 1, 1]"); + } // namespace tensorflow diff --git a/tensorflow/python/ops/risc/risc_grad.py b/tensorflow/python/ops/risc/risc_grad.py index b125aab895a..5c0f76ba3a4 100644 --- a/tensorflow/python/ops/risc/risc_grad.py +++ b/tensorflow/python/ops/risc/risc_grad.py @@ -28,3 +28,10 @@ def _RiscAddGrad(_, grad): # pylint: disable=unused-argument # TODO(b/171294012): Implement gradient of RISC with RISC ops. return None, None + + +@ops.RegisterGradient("RiscConv") +def _RiscConvGrad(_, grad): + # pylint: disable=unused-argument + # TODO(b/171294012): Implement gradient of RISC with RISC ops. + return None, None diff --git a/tensorflow/python/ops/risc/risc_ops.py b/tensorflow/python/ops/risc/risc_ops.py index 8682ebdd269..f59e42dbf6e 100644 --- a/tensorflow/python/ops/risc/risc_ops.py +++ b/tensorflow/python/ops/risc/risc_ops.py @@ -30,5 +30,20 @@ from tensorflow.python.ops.risc_ops_gen import * def risc_add( input_lhs, input_rhs, - name="RISC_ADD"): + name='RISC_ADD'): return gen_risc_ops.risc_add(input_lhs, input_rhs, name=name) + + +def risc_conv(x, + kernel, + strides, + data_format='NHWC', + dilations=None, + name='RISC_CONV'): + return gen_risc_ops.risc_conv( + x, + kernel, + strides, + data_format=data_format, + dilations=dilations, + name=name) From 8266d57ad759edbdc2fcdea9db90da1336df4e8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 10:18:25 -0800 Subject: [PATCH 038/220] Update ops-related pbtxt files. PiperOrigin-RevId: 341432816 Change-Id: Iac76b3fdc7c8a1de3b49255d628d0ded6750ffc8 --- .../ops/compat/ops_history_v2/RiscConv.pbtxt | 54 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 54 +++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt new file mode 100644 index 00000000000..9828ec5a949 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt @@ -0,0 +1,54 @@ +op { + name: "RiscConv" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "strides" + type: "list(int)" + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } + } + attr { + name: "dilations" + type: "list(int)" + default_value { + list { + i: 1 + i: 1 + i: 1 + i: 1 + } + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 23d3ba8019c..9a3ea84fc18 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -41345,6 +41345,60 @@ op { is_aggregate: true is_commutative: true } +op { + name: "RiscConv" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "strides" + type: "list(int)" + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } + } + attr { + name: "dilations" + type: "list(int)" + default_value { + list { + i: 1 + i: 1 + i: 1 + i: 1 + } + } + } +} op { name: "RngReadAndSkip" input_arg { From fb31adb04fb65869fdc985687da5ae453e3c62e8 Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Mon, 9 Nov 2020 10:20:17 -0800 Subject: [PATCH 039/220] Update RaggedTensor.merge_dims to return the ragged tensor as-is if outer_axis==inner_axis. I.e.: `rt.merge_dims(x, x) == rt`. (Previously, it raised an exception for outer_axis==inner_axis.) PiperOrigin-RevId: 341433226 Change-Id: I0f02a782a9feb000ab00dd25583e0de360cd4e4b --- .../ops/ragged/ragged_merge_dims_op_test.py | 32 +++++++++---------- tensorflow/python/ops/ragged/ragged_tensor.py | 4 +-- .../ops/structured/structured_tensor.py | 4 +-- .../ops/structured/structured_tensor_test.py | 4 +-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tensorflow/python/ops/ragged/ragged_merge_dims_op_test.py b/tensorflow/python/ops/ragged/ragged_merge_dims_op_test.py index 5e810e1b49c..30ae6feb601 100644 --- a/tensorflow/python/ops/ragged/ragged_merge_dims_op_test.py +++ b/tensorflow/python/ops/ragged/ragged_merge_dims_op_test.py @@ -164,6 +164,20 @@ class RaggedMergeDimsOpTest(test_util.TensorFlowTestCase, 'inner_axis': 3, 'expected': [[[1, 2], [3, 4], [5, 6], [7, 8]], [[9, 10], [11, 12]]], }, + { + 'testcase_name': 'OuterEqualsInner', + 'rt': [[1], [2], [3, 4]], + 'outer_axis': 0, + 'inner_axis': 0, + 'expected': [[1], [2], [3, 4]], + }, + { + 'testcase_name': 'OuterEqualsInnerWithNegativeAxis', + 'rt': [[1], [2], [3, 4]], + 'outer_axis': 1, + 'inner_axis': -1, + 'expected': [[1], [2], [3, 4]], + }, ]) # pyformat: disable def testRaggedMergeDims(self, rt, @@ -227,33 +241,19 @@ class RaggedMergeDimsOpTest(test_util.TensorFlowTestCase, 'exception': ValueError, 'message': 'inner_axis=-3 out of bounds: expected -2<=inner_axis<2', }, - { - 'rt': [[1]], - 'outer_axis': 0, - 'inner_axis': 0, - 'exception': ValueError, - 'message': 'Expected outer_axis .* to be less than inner_axis .*', - }, { 'rt': [[1]], 'outer_axis': 1, 'inner_axis': 0, 'exception': ValueError, - 'message': 'Expected outer_axis .* to be less than inner_axis .*', + 'message': 'Expected outer_axis .* to be less than or equal to .*', }, { 'rt': [[1]], 'outer_axis': -1, 'inner_axis': -2, 'exception': ValueError, - 'message': 'Expected outer_axis .* to be less than inner_axis .*', - }, - { - 'rt': [[1]], - 'outer_axis': 1, - 'inner_axis': -1, - 'exception': ValueError, - 'message': 'Expected outer_axis .* to be less than inner_axis .*', + 'message': 'Expected outer_axis .* to be less than or equal to .*', }, ]) # pyformat: disable def testRaggedMergeDimsError(self, diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py index 69740bd93f2..cf14e0f8801 100644 --- a/tensorflow/python/ops/ragged/ragged_tensor.py +++ b/tensorflow/python/ops/ragged/ragged_tensor.py @@ -1426,8 +1426,8 @@ class RaggedTensor(composite_tensor.CompositeTensor, self.shape.rank, axis_name="inner_axis", ndims_name="rank(self)") - if not outer_axis < inner_axis: - raise ValueError("Expected outer_axis (%d) to be less than " + if not outer_axis <= inner_axis: + raise ValueError("Expected outer_axis (%d) to be less than or equal to " "inner_axis (%d)" % (outer_axis, inner_axis)) return merge_dims(self, outer_axis, inner_axis) diff --git a/tensorflow/python/ops/structured/structured_tensor.py b/tensorflow/python/ops/structured/structured_tensor.py index 5b50cf42c56..20e54730d73 100644 --- a/tensorflow/python/ops/structured/structured_tensor.py +++ b/tensorflow/python/ops/structured/structured_tensor.py @@ -944,8 +944,8 @@ class StructuredTensor(composite_tensor.CompositeTensor): self.shape.rank, axis_name='inner_axis', ndims_name='rank(self)') - if not outer_axis < inner_axis: - raise ValueError('Expected outer_axis (%d) to be less than ' + if not outer_axis <= inner_axis: + raise ValueError('Expected outer_axis (%d) to be less than or equal to ' 'inner_axis (%d)' % (outer_axis, inner_axis)) return _merge_dims(self, outer_axis, inner_axis) diff --git a/tensorflow/python/ops/structured/structured_tensor_test.py b/tensorflow/python/ops/structured/structured_tensor_test.py index f4218042cc2..e1c414ae96e 100644 --- a/tensorflow/python/ops/structured/structured_tensor_test.py +++ b/tensorflow/python/ops/structured/structured_tensor_test.py @@ -916,8 +916,8 @@ class StructuredTensorTest(test_util.TensorFlowTestCase, def testMergeDimsError(self): st = StructuredTensor.from_pyval([[[{"a": 5}]]]) with self.assertRaisesRegex( - ValueError, - r"Expected outer_axis \(2\) to be less than inner_axis \(1\)"): + ValueError, r"Expected outer_axis \(2\) to be less than " + r"or equal to inner_axis \(1\)"): st.merge_dims(2, 1) def testTupleFieldValue(self): From e67f6b03f7bfadba608918e3bb2e8cd4a513e810 Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 9 Nov 2020 10:31:06 -0800 Subject: [PATCH 040/220] Add SetOrAddStat to XStatsBuilder Use it instead of FindOrAddMutableStat. When merging XPlane(s), correctly handles ref_value. PiperOrigin-RevId: 341435781 Change-Id: I4c6b5beeafc413a0d6e3f30566e1bced1b654e22 --- .../core/profiler/utils/xplane_builder.h | 88 +++++++++++++------ .../core/profiler/utils/xplane_utils.cc | 8 +- 2 files changed, 63 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/profiler/utils/xplane_builder.h b/tensorflow/core/profiler/utils/xplane_builder.h index 2504f4b5c48..df4c3023df0 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.h +++ b/tensorflow/core/profiler/utils/xplane_builder.h @@ -70,8 +70,8 @@ class XStatsBuilder { void AddStatValue(const XStatMetadata& metadata, std::string&& value) { AddStat(metadata)->set_str_value(std::move(value)); } - void AddStatValue(const XStatMetadata& key, const XStatMetadata& value) { - AddStat(key)->set_ref_value(value.id()); + void AddStatValue(const XStatMetadata& metadata, const XStatMetadata& value) { + AddStat(metadata)->set_ref_value(value.id()); } void AddStatValue(const XStatMetadata& metadata, const protobuf::MessageLite& proto) { @@ -79,27 +79,17 @@ class XStatsBuilder { proto.SerializeToString(bytes); } - void AddStat(const XStatMetadata& key, const XStat& stat, const XPlane& src) { - if (stat.value_case() == XStat::kRefValue) { - const auto& stat_metadata_map = src.stat_metadata(); - const auto it = stat_metadata_map.find(stat.ref_value()); - if (TF_PREDICT_TRUE(it != stat_metadata_map.end())) { - AddStatRefValue(key, it->second.name()); - } - } else { - XStat* new_stat = stats_owner_->add_stats(); - *new_stat = stat; - new_stat->set_metadata_id(key.id()); - } + // Adds a stat by copying a stat from another XPlane. Does not check if a stat + // with the same metadata already exists in the event. To avoid duplicated + // stats, use the variant below. + void AddStat(const XStatMetadata& metadata, const XStat& src_stat, + const XPlane& src_plane) { + CopyStatValue(src_stat, src_plane, AddStat(metadata)); } - - XStat* FindOrAddMutableStat(int64 metadata_id) { - for (auto& stat : *stats_owner_->mutable_stats()) { - if (stat.metadata_id() == metadata_id) { - return &stat; - } - } - return stats_owner_->add_stats(); + // Same as above but overrides an existing stat with the same metadata. + void SetOrAddStat(const XStatMetadata& metadata, const XStat& src_stat, + const XPlane& src_plane) { + CopyStatValue(src_stat, src_plane, FindOrAddStat(metadata)); } void ParseAndAddStatValue(const XStatMetadata& metadata, @@ -114,9 +104,10 @@ class XStatsBuilder { } else if (absl::SimpleAtod(value, &double_value)) { AddStatValue(metadata, double_value); } else { - AddStatRefValue(metadata, value); + AddStatValue(metadata, GetOrCreateStatMetadata(value)); } } + void ReserveStats(size_t num_stats) { stats_owner_->mutable_stats()->Reserve(num_stats); } @@ -128,7 +119,48 @@ class XStatsBuilder { return stat; } - void AddStatRefValue(const XStatMetadata& metadata, absl::string_view value); + XStat* FindOrAddStat(const XStatMetadata& metadata) { + for (auto& stat : *stats_owner_->mutable_stats()) { + if (stat.metadata_id() == metadata.id()) { + return &stat; + } + } + return AddStat(metadata); + } + + void CopyStatValue(const XStat& src_stat, const XPlane& src_plane, + XStat* dst_stat) { + switch (src_stat.value_case()) { + case XStat::VALUE_NOT_SET: + break; + case XStat::kInt64Value: + dst_stat->set_int64_value(src_stat.int64_value()); + break; + case XStat::kUint64Value: + dst_stat->set_uint64_value(src_stat.uint64_value()); + break; + case XStat::kDoubleValue: + dst_stat->set_double_value(src_stat.double_value()); + break; + case XStat::kStrValue: + dst_stat->set_str_value(src_stat.str_value()); + break; + case XStat::kRefValue: { + const auto& stat_metadata_by_id = src_plane.stat_metadata(); + const auto it = stat_metadata_by_id.find(src_stat.ref_value()); + if (TF_PREDICT_TRUE(it != stat_metadata_by_id.end())) { + absl::string_view value = it->second.name(); + dst_stat->set_ref_value(GetOrCreateStatMetadata(value).id()); + } + break; + } + case XStat::kBytesValue: + dst_stat->set_bytes_value(src_stat.bytes_value()); + break; + } + } + + const XStatMetadata& GetOrCreateStatMetadata(absl::string_view value); T* stats_owner_; XPlaneBuilder* stats_metadata_owner_; @@ -293,11 +325,9 @@ class XPlaneBuilder : public XStatsBuilder { }; template -void XStatsBuilder::AddStatRefValue(const XStatMetadata& metadata, - absl::string_view value) { - const XStatMetadata* ref_value = - stats_metadata_owner_->GetOrCreateStatMetadata(value); - AddStatValue(metadata, *ref_value); +const XStatMetadata& XStatsBuilder::GetOrCreateStatMetadata( + absl::string_view value) { + return *stats_metadata_owner_->GetOrCreateStatMetadata(value); } } // namespace profiler diff --git a/tensorflow/core/profiler/utils/xplane_utils.cc b/tensorflow/core/profiler/utils/xplane_utils.cc index 68f7c127e2f..96cf4fc117c 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.cc +++ b/tensorflow/core/profiler/utils/xplane_utils.cc @@ -185,10 +185,8 @@ void MergePlanes(const XPlane& src_plane, XPlane* dst_plane) { XPlaneBuilder dst(dst_plane); src.ForEachStat([&](const tensorflow::profiler::XStatVisitor& stat) { XStatMetadata* stat_metadata = dst.GetOrCreateStatMetadata(stat.Name()); - XStat* new_stat = dst.FindOrAddMutableStat(stat_metadata->id()); - // Add or override the existing stat value except the metadata id. - *new_stat = stat.RawStat(); - new_stat->set_metadata_id(stat_metadata->id()); + // Use SetOrAddStat to avoid duplicating stats in dst_plane. + dst.SetOrAddStat(*stat_metadata, stat.RawStat(), src_plane); }); src.ForEachLine([&](const tensorflow::profiler::XLineVisitor& line) { XLineBuilder dst_line = dst.GetOrCreateLine(line.Id()); @@ -231,6 +229,8 @@ void MergePlanes(const XPlane& src_plane, XPlane* dst_plane) { dst_event.SetNumOccurrences(event.NumOccurrences()); } event.ForEachStat([&](const tensorflow::profiler::XStatVisitor& stat) { + // Here we can call AddStat instead of SetOrAddStat because dst_event + // was just added. dst_event.AddStat(*dst.GetOrCreateStatMetadata(stat.Name()), stat.RawStat(), src_plane); }); From d0d33f6d0431ef4feecda52bc3fd0813412b5905 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Mon, 9 Nov 2020 10:33:48 -0800 Subject: [PATCH 041/220] [tf.data] Increase the roll out percentage of optimization `map_parallelization` to 20%. PiperOrigin-RevId: 341436467 Change-Id: Ieeefda39f9896d7c34f089c9e6eccf07b904d222 --- tensorflow/core/kernels/data/optimize_dataset_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index 821204f8908..b3df18a53c7 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -85,7 +85,7 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // clang-format off absl::flat_hash_map live_experiments = { {"enable_gradient_descent", 100}, - {"map_parallelization", 5} + {"map_parallelization", 20} }; // clang-format on auto hash_func = [](const string& str) { return Hash64(str); }; From 382678628baa2de3688a429bd18f0b4dd9bb8d88 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 10:48:27 -0800 Subject: [PATCH 042/220] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 341439473 Change-Id: I71fa373a670a291f35dc21d71fe2f1cef3829fa5 --- tensorflow/go/op/wrappers.go | 215 ++++++++++++++++++++++------------- 1 file changed, 139 insertions(+), 76 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index f8c4149e1ab..d65bd33f48f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -14570,6 +14570,21 @@ func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_ return op.Output(0) } +// Generate a glob pattern matching all sharded file names. +func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShardedFilespec", + Input: []tf.Input{ + basename, num_shards, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Saves the input tensors to disk. // // The size of `tensor_names` must match the number of tensors in `data`. `data[i]` @@ -22697,6 +22712,69 @@ func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { return op.Output(0) } +// RiscConvAttr is an optional argument to RiscConv. +type RiscConvAttr func(optionalAttr) + +// RiscConvDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func RiscConvDataFormat(value string) RiscConvAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// RiscConvDilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func RiscConvDilations(value []int64) RiscConvAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes a 2-D convolution given 4-D `input` and `filter` tensors. +// +// Arguments: +// input: A 4-D tensor. The dimension order is interpreted according to the value +// of `data_format`, see below for details. +// filter: A 4-D tensor of shape +// `[filter_height, filter_width, in_channels, out_channels]` +// strides: 1-D tensor of length 4. The stride of the sliding window for each +// dimension of `input`. The dimension order is determined by the value of +// `data_format`, see below for details. +// +// Returns A 4-D tensor. The dimension order is determined by the value of +// `data_format`, see below for details. +func RiscConv(scope *Scope, input tf.Output, filter tf.Output, strides []int64, optional ...RiscConvAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RiscConv", + Input: []tf.Input{ + input, filter, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes hyperbolic tangent of `x` element-wise. // // Given an input tensor, this function computes hyperbolic tangent of every @@ -29742,6 +29820,67 @@ func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf return op.Output(0) } +// RandomShuffleAttr is an optional argument to RandomShuffle. +type RandomShuffleAttr func(optionalAttr) + +// RandomShuffleSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomShuffleSeed(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomShuffleSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleSeed2(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Randomly shuffles a tensor along its first dimension. +// +// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped +// to one and only one `output[i]`. For example, a mapping that might occur for a +// 3x2 tensor is: +// +// ``` +// [[1, 2], [[5, 6], +// [3, 4], ==> [1, 2], +// [5, 6]] [3, 4]] +// ``` +// +// Arguments: +// value: The tensor to be shuffled. +// +// Returns A tensor of same shape and type as `value`, shuffled along its first +// dimension. +func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomShuffle", + Input: []tf.Input{ + value, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a dataset that takes a Bernoulli sample of the contents of another dataset. // // There is no transformation in the `tf.data` Python API for creating this dataset. @@ -36066,21 +36205,6 @@ func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes [ return op.Output(0), op.Output(1), op.Output(2) } -// Generate a glob pattern matching all sharded file names. -func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ShardedFilespec", - Input: []tf.Input{ - basename, num_shards, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Writes a scalar summary. // // Writes scalar `value` at `step` with `tag` using summary `writer`. @@ -37137,67 +37261,6 @@ func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf return op.Output(0), op.Output(1), op.Output(2) } -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) - -// RandomShuffleSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomShuffleSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Randomly shuffles a tensor along its first dimension. -// -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: -// -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` -// -// Arguments: -// value: The tensor to be shuffled. -// -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomShuffle", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Selects elements from `x` or `y`, depending on `condition`. // // The `x`, and `y` tensors must all have the same shape, and the From f3f2221eea3747aad87f677e24d27c5efceea7d2 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 9 Nov 2020 10:59:06 -0800 Subject: [PATCH 043/220] Replace the usages of `profiler.start` and `profiler.stop` with TF public APIs. PiperOrigin-RevId: 341441807 Change-Id: I2693bc6aa59ea40867b6d0886e29491e1b976898 --- .../benchmarks/model_components_benchmarks_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py b/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py index c0a8a255f57..2e1f5ea2be3 100644 --- a/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py +++ b/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py @@ -25,9 +25,9 @@ from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf from tensorflow.python.eager import context -from tensorflow.python.eager import profiler from tensorflow.python.eager.context import get_executor from tensorflow.python.platform import test +from tensorflow.python.profiler import profiler_v2 as profiler class SubclassedKerasModel(tf.keras.Model): @@ -213,11 +213,11 @@ class KerasComponentsBenchmarks(test.Benchmark): self._benchmark_keras_model_fit(model) def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self): - profiler.start() + profiler.start("") with context.graph_mode(): model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model) - result = profiler.stop() + result = profiler.stop(save=False) assert result is not None def benchmark_keras_model_functional_fit_run_model_eagerly(self): @@ -226,10 +226,10 @@ class KerasComponentsBenchmarks(test.Benchmark): def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler( self): - profiler.start() + profiler.start("") model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model, run_eagerly=True) - result = profiler.stop() + result = profiler.stop(save=False) assert result is not None def benchmark_keras_model_sequential_fit(self): From ed36a74434942dc005fc80418f3d5d377604f3f7 Mon Sep 17 00:00:00 2001 From: Marcello Maggioni Date: Mon, 9 Nov 2020 11:01:13 -0800 Subject: [PATCH 044/220] [XLA] Unconstraining MakeConvertHlo methods to accept converting to same type. If that happens return the same HLO unmodified. This allows writing algorithms involving not knowing the type is gonna be passed in advance in an easier way. PiperOrigin-RevId: 341442319 Change-Id: Ie2a939ee2d806371b2d537ce85e1fdf9dcdafa39 --- tensorflow/compiler/xla/service/hlo_creation_utils.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 4aeeb6d27ac..a90d2828000 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -248,7 +248,9 @@ StatusOr MakeConcatHlo( } HloInstruction* MakeConvertToHlo(HloInstruction* hlo, PrimitiveType type) { - CHECK_NE(hlo->shape().element_type(), type); + if (hlo->shape().element_type() == type) { + return hlo; + } Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type); hlo = hlo->parent()->AddInstruction(HloInstruction::CreateConvert(shape, hlo)); @@ -258,7 +260,9 @@ HloInstruction* MakeConvertToHlo(HloInstruction* hlo, PrimitiveType type) { HloInstruction* MakeBitcastConvertToHlo(HloInstruction* hlo, PrimitiveType type) { - CHECK_NE(hlo->shape().element_type(), type); + if (hlo->shape().element_type() == type) { + return hlo; + } Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type); // PRED are stored as one byte, PRED have a BitWidth of 1, avoid this problem // by using a convert instead of bitcast convert. From 0ee580cb41bd87c2afd56cdbc2b459c7d4c5b533 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Mon, 9 Nov 2020 11:01:42 -0800 Subject: [PATCH 045/220] Disable parse_headers for core/lib/jpeg. portable_jpeg_internal was moved from core/, which disables parse_headers. portable_jpeg_internal currently does not build with parse_headers enabled. PiperOrigin-RevId: 341442420 Change-Id: Idf86de03849b8663faf8466cea86d601a2201000 --- tensorflow/core/lib/jpeg/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/lib/jpeg/BUILD b/tensorflow/core/lib/jpeg/BUILD index a254fa80397..30dfb625f7e 100644 --- a/tensorflow/core/lib/jpeg/BUILD +++ b/tensorflow/core/lib/jpeg/BUILD @@ -15,6 +15,7 @@ load( package( default_visibility = ["//tensorflow/core:__pkg__"], + features = ["-parse_headers"], licenses = ["notice"], # Apache 2.0 ) From 1afab854847b26ea7e01bacff3c5c8b2d9cd2f93 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Mon, 9 Nov 2020 11:09:47 -0800 Subject: [PATCH 046/220] [tf.data] Apply gradient descent method as default algorithm for autotuning optimization. PiperOrigin-RevId: 341444332 Change-Id: I359e8269166d5e8e89514f5fe8e53f0733dab456 --- .../core/kernels/data/optimize_dataset_op.cc | 4 +- .../kernel_tests/optimize_dataset_test.py | 122 ++++++++++-------- .../experimental/ops/optimization_options.py | 4 +- 3 files changed, 73 insertions(+), 57 deletions(-) diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index b3df18a53c7..15a035e808a 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -84,7 +84,6 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // of the Borg jobs, the experiments will be randomly turned on. // clang-format off absl::flat_hash_map live_experiments = { - {"enable_gradient_descent", 100}, {"map_parallelization", 20} }; // clang-format on @@ -111,6 +110,9 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // The vector stores the graduated experiment names which will be turned on // for all input pipelines. + // + // Note some of the graduated experiments may be hard coded, so not listed + // below. // clang-format off std::vector graduated_experiments = {"disable_intra_op_parallelism"}; // clang-format on diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py index 51bd5fbcbaf..f731a714cab 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py @@ -245,38 +245,6 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertDatasetProduces(dataset, expected_output=expected_output) - @combinations.generate( - combinations.times( - test_base.default_test_combinations(), - combinations.combine(autotune=False, autotune_buffers=False) + - combinations.combine(autotune=True, autotune_buffers=False) + - combinations.combine(autotune=True, autotune_buffers=True), - combinations.combine(set_env=[False, True]))) - def testOptimizationEnableGradientDescent(self, autotune, autotune_buffers, - set_env): - if set_env: - os.environ["TF_DATA_EXPERIMENT_OPT_IN"] = "enable_gradient_descent" - os.environ["TF_JOB_NAME"] = "test_job" - - dataset = dataset_ops.Dataset.range(5) - dataset = dataset.prefetch(buffer_size=-1) - dataset = dataset.map(lambda x: x + 1, num_parallel_calls=2) - dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1) - dataset = dataset.prefetch(buffer_size=3) - dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1) - dataset = dataset.prefetch(buffer_size=1) - - options = dataset_ops.Options() - options.experimental_optimization.autotune = autotune - options.experimental_optimization.autotune_buffers = autotune_buffers - dataset = dataset.with_options(options) - - self.assertDatasetProduces(dataset, expected_output=list(range(3, 8))) - - if set_env: - del os.environ["TF_DATA_EXPERIMENT_OPT_IN"] - del os.environ["TF_JOB_NAME"] - @combinations.generate( combinations.times( test_base.default_test_combinations(), @@ -533,34 +501,80 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertEqual(set(graph_rewrites.default), set(expected_optimizations_default)) - @combinations.generate(test_base.default_test_combinations()) - def testAutotuningDefaults(self): + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(autotune=[True, False, None]), + combinations.combine(autotune_buffers=[True, False, None]))) + def testAutotuningSettings(self, autotune, autotune_buffers): options = dataset_ops.Options() + if autotune is not None: + options.experimental_optimization.autotune = autotune + if autotune_buffers is not None: + options.experimental_optimization.autotune_buffers = autotune_buffers # Check defaults - autotune, algorithm, cpu_budget, ram_budget = options._autotune_settings() - self.assertTrue(autotune) - self.assertEqual(algorithm, - optimization_options._AutotuneAlgorithm.HILL_CLIMB) - self.assertEqual(cpu_budget, 0) - self.assertEqual(ram_budget, 0) + autotune_settings = options._autotune_settings() + autotune_val = autotune_settings[0] + autotune_buffers_val = options.experimental_optimization._autotune_buffers() - @combinations.generate(test_base.default_test_combinations()) - def testAutotuningSettings(self): + if autotune is not False: # pylint: disable=g-bool-id-comparison + self.assertTrue(autotune_val) + else: + self.assertFalse(autotune_val) + if autotune_buffers is True: # pylint: disable=g-bool-id-comparison + self.assertTrue(autotune_buffers_val) + else: + self.assertFalse(autotune_buffers_val) + + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(autotune_buffers=[True, False, None]))) + def testAutotuneBuffersSettings(self, autotune_buffers): options = dataset_ops.Options() - options.experimental_optimization.autotune_cpu_budget = 1000 - options.experimental_optimization.autotune_ram_budget = 999999999 - options.experimental_optimization.autotune_buffers = True - self.assertIn("autotune_buffer_sizes", options._graph_rewrites().enabled) - self.assertIn("disable_prefetch_legacy_autotune", - options._graph_rewrites().enabled) + if autotune_buffers is not None: + options.experimental_optimization.autotune_buffers = autotune_buffers - autotune, algorithm, cpu_budget, ram_budget = options._autotune_settings() - self.assertTrue(autotune) - self.assertEqual(algorithm, - optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT) - self.assertEqual(cpu_budget, 1000) - self.assertEqual(ram_budget, 999999999) + graph_rewrites = options._graph_rewrites() + autotune_settings = options._autotune_settings() + algorithm = autotune_settings[1] + + if autotune_buffers is True: # pylint: disable=g-bool-id-comparison + self.assertIn("autotune_buffer_sizes", graph_rewrites.enabled) + self.assertIn("disable_prefetch_legacy_autotune", graph_rewrites.enabled) + else: + self.assertNotIn("autotune_buffer_sizes", graph_rewrites.enabled) + self.assertNotIn("disable_prefetch_legacy_autotune", + graph_rewrites.enabled) + if autotune_buffers is False: # pylint: disable=g-bool-id-comparison + self.assertEqual(algorithm, + optimization_options._AutotuneAlgorithm.HILL_CLIMB) + else: + self.assertEqual(algorithm, + optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT) + + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(set_budget=[True, False]), + )) + def testResourceBudgets(self, set_budget): + options = dataset_ops.Options() + if set_budget: + options.experimental_optimization.autotune_cpu_budget = 1000 + options.experimental_optimization.autotune_ram_budget = 999999999 + + autotune_settings = options._autotune_settings() + cpu_budget = autotune_settings[2] + ram_budget = autotune_settings[3] + + if set_budget: + self.assertEqual(cpu_budget, 1000) + self.assertEqual(ram_budget, 999999999) + else: + self.assertEqual(cpu_budget, 0) + self.assertEqual(ram_budget, 0) if __name__ == "__main__": diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 5c69855e15f..a2d6c77cfb7 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -228,8 +228,8 @@ class OptimizationOptions(options.OptionsBase): # If autotune_buffers is enabled, we use the GRADIENT_DESCENT algorithm by # default, which is more performant for tuning heterogeneous parameters. algorithm = ( - _AutotuneAlgorithm.GRADIENT_DESCENT - if self._autotune_buffers() else _AutotuneAlgorithm.HILL_CLIMB) + _AutotuneAlgorithm.HILL_CLIMB if self.autotune_buffers is False # pylint: disable=g-bool-id-comparison + else _AutotuneAlgorithm.GRADIENT_DESCENT) cpu_budget = 0 # Indicates that all CPU cores should be used by default. ram_budget = 0 # Indicates that default value of RAM budget should be used. From 600e892d02c12196d3b741fbb3cd8ded285cde85 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 9 Nov 2020 11:22:41 -0800 Subject: [PATCH 047/220] [XLA:SPMD] Support tuple reduce on sharded dimensions PiperOrigin-RevId: 341447073 Change-Id: I24a8479287e53ce1041c490eda09a322adaf5095 --- .../xla/service/spmd/spmd_partitioner.cc | 66 +++++++++++++------ .../xla/service/spmd/spmd_partitioner_test.cc | 29 +++++--- 2 files changed, 66 insertions(+), 29 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 891cbe0ea53..da187e85889 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -2954,23 +2954,26 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { std::vector inputs; std::vector inits; + std::vector preserved_dims; + for (int64 i = 0; i < hlo->operand(0)->shape().rank(); ++i) { + if (!absl::c_linear_search(hlo->dimensions(), i)) { + preserved_dims.push_back(i); + } + } + for (int64 operand_id = 0; operand_id < input_count; ++operand_id) { inits.push_back(GetPartitionedHlo(hlo->operand(operand_id + input_count)) .Reshard(HloSharding::Replicate()) .hlo()); inputs.push_back(GetPartitionedHlo(hlo->operand(operand_id))); - if (hlo->shape().IsTuple() && operand_id == 0) { - // We cannot do tuple-reduce where partitioned dimensions are reduced. - // Partially replicate on those dims. - inputs[0] = inputs[0].Reshard( - hlo_sharding_util::PartiallyReplicateTiledShardingOnDims( - inputs[0].sharding(), hlo->dimensions())); - } else { + if (operand_id > 0) { // Make sure all operands are sharded in the same way. inputs.back() = inputs.back().Reshard(inputs[0].sharding()); } if (!inputs[0].sharding().IsTileMaximal()) { - inputs.back() = inputs.back().PadWithValue(inits[operand_id]); + inputs.back() = + inputs.back().PadWithValue(inits[operand_id], /*left_padded_dims=*/{}, + /*skipped_dims=*/preserved_dims); } } @@ -3001,28 +3004,53 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { return inputs[0].sharding().tile_assignment().dim(i) > 1; }); if (reduce_sharded_dimension) { - CHECK(local_reduce->shape().IsArray()); - std::vector preserved_dims; - for (int64 i = 0; i < inputs[0].base_shape().rank(); ++i) { - if (!absl::c_linear_search(hlo->dimensions(), i)) { - preserved_dims.push_back(i); - } - } if (inputs[0].sharding().ReplicateOnLastTileDim()) { preserved_dims.push_back(inputs[0].base_shape().rank()); } auto grouped = GroupShardingOnDims(inputs[0].sharding(), preserved_dims); auto grouped_state = CreatePerGroupPartitioningState( inputs[0].state(), grouped.device_groups, &b_); - reduce = grouped_state.collective_ops_creator - .create_cross_partition_all_reduce( - &b_, local_reduce, hlo->to_apply(), {}, NewChannel()); + if (local_reduce->shape().IsArray()) { + reduce = grouped_state.collective_ops_creator + .create_cross_partition_all_reduce( + &b_, local_reduce, hlo->to_apply(), {}, NewChannel()); + } else { + std::vector all_gathered_partial_results(input_count); + for (int64 i = 0; i < input_count; ++i) { + auto gte = b_.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::GetTupleElementShape(reduce_shape, i), local_reduce, + i)); + auto expanded_shape = input_hlos[i]->shape(); + auto all_gather_shape = input_hlos[i]->shape(); + for (int64 dim : hlo->dimensions()) { + expanded_shape.set_dimensions(dim, 1); + all_gather_shape.set_dimensions( + dim, inputs[0].sharding().tile_assignment().dim(i)); + } + auto reshape = b_.AddInstruction( + HloInstruction::CreateReshape(expanded_shape, gte)); + // Replicate per group. + reshape->set_sharding(grouped.sharding); + all_gathered_partial_results[i] = + PartitionedHlo(reshape, all_gather_shape, grouped_state) + .Replicate() + .hlo(); + } + reduce = b_.AddInstruction(HloInstruction::CreateReduce( + reduce_shape, all_gathered_partial_results, inits, + hlo->dimensions(), hlo->to_apply())); + } } auto sharding = hlo_sharding_util::RemoveShapeDimensions( hlo_sharding_util::PartiallyReplicateTiledShardingOnDims( inputs[0].sharding(), hlo->dimensions()), hlo->dimensions()); - reduce->set_sharding(sharding); + if (local_reduce->shape().IsArray()) { + reduce->set_sharding(sharding); + } else { + reduce->set_sharding(HloSharding::Tuple( + reduce->shape(), std::vector(input_count, sharding))); + } return PartitionedHlo(reduce, hlo->shape(), MakePartitioningState()) .Reshard(hlo->sharding()) .hlo(); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index cebcfa3cadc..91a0c44b51a 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3123,7 +3123,7 @@ ENTRY %main { op::Shape("(f32[14], s32[14])"))); } -TEST_F(SpmdPartitioningTest, TiledToTiledTupleReduce2) { +TEST_F(SpmdPartitioningTest, TiledToPartiallyTiledTupleReduce) { const char* const hlo_string = R"( HloModule module @@ -3153,16 +3153,25 @@ ENTRY %main { PartitionComputation(hlo_string, /*num_devices=*/4)); VLOG(1) << module->ToString(); - auto lhs = - AllOf(op::Shape("f32[14,10]"), - op::AllReduce(op::DynamicUpdateSlice(_, op::Parameter(0), _, _))); - auto rhs = - AllOf(op::Shape("s32[14,10]"), - op::AllReduce(op::DynamicUpdateSlice(_, op::Parameter(1), _, _))); + auto lhs = AllOf(op::Shape("f32[14,5]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("s32[14,5]"), op::Parameter(1)); + auto local_reduce = + AllOf(op::Reduce(lhs, rhs, op::Parameter(2), op::Parameter(3)), + op::Shape("(f32[14], s32[14])")); + auto reshape_l = AllOf(op::Reshape(op::GetTupleElement(local_reduce)), + op::Shape("f32[14,1]")); + auto reshape_r = AllOf(op::Reshape(op::GetTupleElement(local_reduce)), + op::Shape("s32[14,1]")); + auto broadcast_l = + AllOf(op::AllReduce(op::DynamicUpdateSlice(_, reshape_l, _, _)), + op::Shape("f32[14,2]")); + auto broadcast_r = + AllOf(op::AllReduce(op::DynamicUpdateSlice(_, reshape_r, _, _)), + op::Shape("s32[14,2]")); auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, - AllOf(op::Reduce(lhs, rhs, op::Parameter(2), op::Parameter(3)), - op::Shape("(f32[14], s32[14])"))); + EXPECT_THAT(root, AllOf(op::Reduce(broadcast_l, broadcast_r, op::Parameter(2), + op::Parameter(3)), + op::Shape("(f32[14], s32[14])"))); } TEST_F(SpmdPartitioningTest, TiledToTiledReduceOutputReshard) { From d2d9e0814fd15148bfdb9ab4b6b5b7445c617162 Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Mon, 9 Nov 2020 11:28:43 -0800 Subject: [PATCH 048/220] Import restore graphs in SavedModelSignatureDefImporterLite. This CL also changes the session_initializer's initializer attribute from one symbol reference to an array of symbol references as there will be more than one init functions with a restricted initialization order. SavedModelSignatureDefImporter's output is not changed as importing restore graph is disabled in SavedModelSignatureDefImporter. PiperOrigin-RevId: 341448438 Change-Id: Ia179a63bcf88c0d0541247b677caf03773435914 --- .../mlir/tensorflow/ir/tf_saved_model.cc | 98 ++++++++++++------- .../mlir/tensorflow/ir/tf_saved_model.h | 2 +- .../mlir/tensorflow/ir/tf_saved_model_ops.td | 16 +-- .../tf_saved_model/hash_table_asset_v1.py | 4 +- .../tests/tf_saved_model/hash_table_v1.py | 6 +- .../tests/tf_saved_model/import_restore_v1.py | 80 +++++++++++++++ .../tensorflow/tests/tf_saved_model_ops.mlir | 2 +- .../tests/tf_saved_model_ops_invalid.mlir | 18 ++-- ...el_remove_vars_in_session_initializer.mlir | 10 +- .../remove_vars_in_session_initializer.cc | 34 ++++--- .../mlir/tensorflow/translate/import_model.cc | 86 +++++++++++----- 11 files changed, 255 insertions(+), 101 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/import_restore_v1.py diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc index 1eaf997ab69..fdf95d370fe 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc @@ -82,25 +82,28 @@ static LogicalResult Verify(SessionInitializerOp session_initializer) { mlir::SymbolTable symbol_table( session_initializer.getParentOfType()); - auto init_func_op = - symbol_table.lookup(session_initializer.initializer()); - if (!init_func_op) - return session_initializer.emitOpError() - << "the initializer function does not exist"; + for (auto sym_ref : session_initializer.initializers()) { + auto init_func_op = symbol_table.lookup( + sym_ref.cast().getValue()); - if (!init_func_op.getType().getResults().empty()) - return session_initializer.emitOpError() - << "the initializer function should have no output"; + if (!init_func_op) + return session_initializer.emitOpError() + << "the initializer function does not exist"; - auto exported_names = GetExportedNames(init_func_op); + if (!init_func_op.getType().getResults().empty()) + return session_initializer.emitOpError() + << "the initializer function should have no output"; - if (exported_names.empty()) - return session_initializer.emitOpError() - << "the initializer function should be exported"; + auto exported_names = GetExportedNames(init_func_op); - if (exported_names.size() != 1) - return session_initializer.emitOpError() - << "the initializer function should have only one exported names"; + if (exported_names.empty()) + return session_initializer.emitOpError() + << "the initializer function should be exported"; + + if (exported_names.size() != 1) + return session_initializer.emitOpError() + << "the initializer function should have only one exported names"; + } return success(); } @@ -291,7 +294,11 @@ static LogicalResult VerifySavedModelModule( auto is_init = [&session_initializers](mlir::FuncOp func) { if (session_initializers.empty()) return false; - return (*session_initializers.begin()).initializer() == func.getName(); + auto init_syms = (*session_initializers.begin()).initializers(); + return std::any_of( + init_syms.begin(), init_syms.end(), [&](Attribute sym_ref) { + return sym_ref.cast().getValue() == func.getName(); + }); }; SymbolTable symbol_table(module); @@ -450,22 +457,36 @@ class OptimizeSessionInitializerPattern LogicalResult matchAndRewrite(SessionInitializerOp op, PatternRewriter &rewriter) const override { SymbolTable symbol_table(op.getParentOfType()); - auto init_func_op = symbol_table.lookup(op.initializer()); - // The init function can only be referenced from the SessionInitializerOp. - // And there is at most one SessionInitializerOp in the module. So if both - // ops have no other uses or have one NoOp only, they can be simply erased. - auto &operations = init_func_op.front().getOperations(); - if ((operations.size() == 1 && operations.front().isKnownTerminator()) || - (operations.size() == 2 && - dyn_cast(operations.front()) && - operations.back().isKnownTerminator())) { - rewriter.eraseOp(init_func_op); - rewriter.eraseOp(op); - return success(); + SmallVector to_remove; + SmallVector to_keep; + for (auto sym_ref : op.initializers()) { + auto init_func_op = symbol_table.lookup( + sym_ref.cast().getValue()); + + // The init function can only be referenced from the SessionInitializerOp. + // And there is at most one SessionInitializerOp in the module. So if both + // ops have no other uses or have one NoOp only, they can be simply + // erased. + auto &operations = init_func_op.front().getOperations(); + if ((operations.size() == 1 && operations.front().isKnownTerminator()) || + (operations.size() == 2 && + dyn_cast(operations.front()) && + operations.back().isKnownTerminator())) { + to_remove.push_back(init_func_op); + } else { + to_keep.push_back(sym_ref); + } } - return failure(); + for (auto func_op : to_remove) rewriter.eraseOp(func_op); + + if (to_keep.empty()) + rewriter.eraseOp(op); + else + op.setAttr("initializers", rewriter.getArrayAttr(to_keep)); + + return success(); } }; @@ -474,15 +495,22 @@ void SessionInitializerOp::getCanonicalizationPatterns( results.insert(context); } -llvm::Optional GetSessionInitializerExportedName(ModuleOp op) { +SmallVector GetSessionInitializerExportedName(ModuleOp op) { auto session_initializer_op = GetSessionInitializerOp(op); - if (!session_initializer_op) return llvm::None; + if (!session_initializer_op) return {}; SymbolTable symbol_table(op); - auto init_func_op = - symbol_table.lookup(session_initializer_op.initializer()); - auto exported_names = GetExportedNames(init_func_op); - return exported_names[0]; + + SmallVector results; + for (auto sym_ref : session_initializer_op.initializers()) { + auto init_func_op = symbol_table.lookup( + sym_ref.cast().getValue()); + auto exported_names = GetExportedNames(init_func_op); + assert(exported_names.size() == 1); + results.push_back(exported_names[0]); + } + + return results; } } // namespace tf_saved_model diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h index c8518a9ca02..ee2ef6c994f 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h @@ -81,7 +81,7 @@ Type GetBoundInputArgTypeFor(mlir::Operation *op); SessionInitializerOp GetSessionInitializerOp(mlir::ModuleOp op); // Returns the exported name for the session initializer function. -llvm::Optional GetSessionInitializerExportedName(mlir::ModuleOp op); +SmallVector GetSessionInitializerExportedName(mlir::ModuleOp op); } // namespace tf_saved_model } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_ops.td index 753e2368d6e..c35e752a2c5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_ops.td @@ -132,13 +132,13 @@ def TfSavedModel_GlobalTensorOp : TfSavedModel_Op<"global_tensor"> { def TfSavedModel_SessionInitializerOp: TfSavedModel_Op<"session_initializer"> { let summary = "Initializes TensorFlow session state."; let description = [{ - The session initializer op marks a function that must be called by an - external agent exactly once to initialize TensorFlow session state, and this - must happen before any other exported functions are called. There must be no - more than one session initializer in a saved model. + The session initializer op marks one or more functions that must be called + by an external agent exactly once to initialize TensorFlow session state, + and this must happen before any other exported functions are called. There + must be no more than one session initializer op in a saved model. - The `initializer` represents the initialization function. The function have - no output and this function should be only called once. + The `initializers` represents the initialization functions. The function + have no output and this function should be only called once. This is used, for example, to initialize hash tables stored in resources and accessed by resource name (rather than as resource handles or bound inputs @@ -146,7 +146,7 @@ def TfSavedModel_SessionInitializerOp: TfSavedModel_Op<"session_initializer"> { }]; let arguments = (ins - FlatSymbolRefAttr:$initializer + SymbolRefArrayAttr:$initializers ); @@ -160,7 +160,7 @@ def TfSavedModel_AssetOp: TfSavedModel_Op<"asset", [Symbol]> { let description = [{ Represents an asset in the saved model that points to an external file. It is a scalar string tensor and it is passed as an argument to the session - initializer function. + initializer functions. The `sym_name` represents the symbol table name used for internal IR references. diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py index 3714c610afd..6dc68671158 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py @@ -26,11 +26,11 @@ import tempfile import tensorflow.compat.v1 as tf from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1 -# CHECK: "tf_saved_model.session_initializer"() {initializer = [[init:@.*]]} : () -> () +# CHECK: "tf_saved_model.session_initializer"() {initializers = [@[[init:.*]]]} : () -> () # CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset1:__tf_saved_model_asset1_.*]]"} # CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset0:__tf_saved_model_asset0_.*]]"} -# CHECK: func [[init]] +# CHECK: func @[[init]] # CHECK-SAME: [[ARG0:%.*]]: tensor {tf_saved_model.bound_input = @[[asset0]]} # CHECK-SAME: [[ARG1:%.*]]: tensor {tf_saved_model.bound_input = @[[asset1]]} # CHECK-NEXT: [[R0:%.*]] = "tf.HashTableV2"() diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_v1.py index ef16b5243c0..c09b9854677 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_v1.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_v1.py @@ -34,9 +34,9 @@ from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1 # CHECK-SAME: producer # CHECK: "tf_saved_model.global_tensor"() -# CHECK: "tf_saved_model.session_initializer"() {initializer = [[init:@.*]]} : () -> () +# CHECK: "tf_saved_model.session_initializer"() {initializers = [@[[init:.*]]]} : () -> () -# CHECK: func [[init]] +# CHECK: func @[[init]] # CHECK-NEXT: [[R5:%.*]] = "tf.Const"() # CHECK-NEXT: [[R6:%.*]] = "tf.Const"() # CHECK-NEXT: [[R7:%.*]] = "tf.HashTableV2"() @@ -89,4 +89,4 @@ def Test(): if __name__ == '__main__': common_v1.set_tf_options() - common_v1.do_test(Test) + common_v1.do_test(Test, canonicalize=True) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/import_restore_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/import_restore_v1.py new file mode 100644 index 00000000000..299f002e314 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/import_restore_v1.py @@ -0,0 +1,80 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# RUN: %p/import_restore_v1 | FileCheck %s + +# pylint: disable=missing-docstring,line-too-long +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v1 as tf +from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1 + +# Verify that the tf.versions attribute exists. It is difficult to enforce +# contents, since the version numbers change over time. The conversion logic +# itself is verified in the common graphdef converter, so here just assert +# it is being invoked. +# CHECK: module +# CHECK-SAME: tf.versions +# CHECK-SAME: bad_consumers +# CHECK-SAME: min_consumer +# CHECK-SAME: producer + +# CHECK: tf_saved_model.session_initializer +# CHECK-SAME: initializers = [@[[restore:.*]]] + +# CHECK: "tf_saved_model.asset"() +# CHECK-SAME: {filename = [[filename:.*]], sym_name = "[[sym_name:.*]]"} : () -> () + +# CHECK: func @[[restore]]( +# CHECK-SAME: [[variable_path:%.*]]: tensor {tf_saved_model.bound_input = @[[sym_name]]} +# CHECK-SAME: tf_saved_model.exported_names = ["{{__tf_saved_model_session_initializer.*}}"] +# CHECK: [[v0:%.*]] = "tf.RestoreV2"([[variable_path]] +# CHECK: [[v1:%.*]] = "tf.Identity"([[v0]]) +# CHECK: [[handle:%.*]] = "tf.VarHandleOp" +# CHECK-SAME: shared_name = [[shared_name:".*"]] +# CHECK: "tf.AssignVariableOp"([[handle]], [[v1]]) + +# CHECK: func {{@[a-zA-Z_0-9]+}}( +# CHECK-SAME: tf_saved_model.exported_names = ["key"] +# CHECK: tf.VarHandleOp +# CHECK-SAME: shared_name = [[shared_name]] + + +def Test(): + + x = tf.constant([[1.0], [1.0], [1.0]]) + y = tf.compat.v1.get_variable( + name='y', + shape=(1, 3), + initializer=tf.random_normal_initializer(), + trainable=True) + r = tf.matmul(x, y) + + tensor_info_x = tf.compat.v1.saved_model.utils.build_tensor_info(x) + tensor_info_r = tf.compat.v1.saved_model.utils.build_tensor_info(r) + + return { + 'key': (tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + inputs={'x': tensor_info_x}, + outputs={'r': tensor_info_r}, + method_name='some_function')) + }, None, None + + +if __name__ == '__main__': + common_v1.set_tf_options() + common_v1.do_test(Test, use_lite=True) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir index d2c5509b52d..60330d03ec7 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir @@ -4,7 +4,7 @@ module attributes {tf_saved_model.semantics} { // CHECK: tf_saved_model.session_initializer "tf_saved_model.session_initializer"() { - initializer = @init + initializers = [@init] } : () -> () // CHECK: tf_saved_model.asset diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir index 714c8908825..de7cbc3798b 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir @@ -277,7 +277,7 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { // expected-error@+1 {{the initializer function does not exist}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () } // ----- @@ -285,7 +285,7 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { // expected-error@+1 {{the initializer function should have no output}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() -> tensor<1xf32> attributes {sym_visibility = "private"} { %0 = "tf.Const"() {value = dense<[1.0]> : tensor<1xf32> } : () -> tensor<1xf32> return %0 : tensor<1xf32> @@ -298,7 +298,7 @@ module attributes {tf_saved_model.semantics} { "tf_saved_model.session_initializer"() { initializer = @init } : () -> () // expected-error@+1 {{there must be no more than one session_initializer op}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() -> tensor<1xf32> attributes {sym_visibility = "private"} { %0 = "tf.Const"() {value = dense<[1.0]> : tensor<1xf32> } : () -> tensor<1xf32> return %0 : tensor<1xf32> @@ -336,7 +336,7 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { // expected-error@+1 {{the initializer function does not exist}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () } // ----- @@ -344,7 +344,7 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { // expected-error@+1 {{the initializer function should have no output}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() -> (tensor<1xf32> {tf_saved_model.index_path = ["output"]}) attributes { tf_saved_model.exported_names = ["__tf_saved_model_session_initializer"] } { %0 = "tf.Const"() {value = dense<[1.0]> : tensor<1xf32> } : () -> tensor<1xf32> @@ -356,9 +356,9 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () // expected-error@+1 {{there must be no more than one session_initializer op}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() -> (tensor<1xf32> {tf_saved_model.index_path = ["output"]}) attributes { tf_saved_model.exported_names = ["__tf_saved_model_session_initializer"] } { %0 = "tf.Const"() {value = dense<[1.0]> : tensor<1xf32> } : () -> tensor<1xf32> @@ -371,7 +371,7 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { // expected-error@+1 {{the initializer function should be exported}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() attributes {sym_visibility = "private"} { return } @@ -382,7 +382,7 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { // expected-error@+1 {{the initializer function should have only one exported name}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() attributes { tf_saved_model.exported_names = ["a", "b"] } { return } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_remove_vars_in_session_initializer.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_remove_vars_in_session_initializer.mlir index a2eed45690e..254c32bdab0 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_remove_vars_in_session_initializer.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_remove_vars_in_session_initializer.mlir @@ -9,14 +9,14 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { // Test case: No matching function for the given session initializer. // expected-error@+1 {{'tf_saved_model.session_initializer' op the initializer function does not exist}} - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () } // ----- module attributes {tf_saved_model.semantics} { // Test case: Invalid multiple blocks in the initializer funcion. - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () // expected-error@+1 {{expects exactly one block in the MLIR function}} func @init() attributes {tf_saved_model.exported_names = ["__tf_saved_model_session_initializer"]} { br ^bb1 @@ -32,7 +32,7 @@ module attributes {tf_saved_model.semantics} { // CHECK: func @init() // CHECK: tf.Const // CHECK: return - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() attributes {tf_saved_model.exported_names = ["__tf_saved_model_session_initializer"]} { "tf.Const"() {value = dense<[1.0]> : tensor<1xf32> } : () -> tensor<1xf32> return @@ -48,7 +48,7 @@ module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} // CHECK-NOT: tf.Const // CHECK-NOT: tf.AssignAddVariableOp // CHECK: return - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() attributes {tf_saved_model.exported_names = ["__tf_saved_model_session_initializer"]} { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> %1 = "tf.VarHandleOp"() {container = "c", shared_name = "w"} : () -> tensor<*x!tf.resource>> @@ -69,7 +69,7 @@ module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} // CHECK-NOT: tf.Const // CHECK-NOT: tf.AssignAddVariableOp // CHECK: return - "tf_saved_model.session_initializer"() { initializer = @init } : () -> () + "tf_saved_model.session_initializer"() { initializers = [@init] } : () -> () func @init() attributes {tf_saved_model.exported_names = ["__tf_saved_model_session_initializer"]} { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> %1 = "tf.VarHandleOp"() {container = "c", shared_name = "w"} : () -> tensor<*x!tf.resource>> diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/remove_vars_in_session_initializer.cc b/tensorflow/compiler/mlir/tensorflow/transforms/remove_vars_in_session_initializer.cc index f916706a597..09495daeb33 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/remove_vars_in_session_initializer.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/remove_vars_in_session_initializer.cc @@ -82,23 +82,27 @@ void RemoveVariablesInSessionInitializerPass::runOnOperation() { if (!session_init_op) return; SymbolTable symbol_table(module); - FuncOp init_func_op = - symbol_table.lookup(session_init_op.initializer()); - if (!init_func_op) { - module.emitError("no session initializer function found"); - return signalPassFailure(); + for (auto sym_ref : session_init_op.initializers()) { + FuncOp init_func_op = symbol_table.lookup( + sym_ref.cast().getValue()); + + if (!init_func_op) { + module.emitError("no session initializer function found"); + return signalPassFailure(); + } + + if (init_func_op.getBlocks().size() != 1) { + init_func_op.emitError("expects exactly one block in the MLIR function"); + return signalPassFailure(); + } + + auto var_handle_ops = + init_func_op.getBlocks().front().getOps(); + llvm::SmallVector init_vars(var_handle_ops.begin(), + var_handle_ops.end()); + RemoveVariables(init_vars); } - - if (init_func_op.getBlocks().size() != 1) { - init_func_op.emitError("expects exactly one block in the MLIR function"); - return signalPassFailure(); - } - - auto var_handle_ops = init_func_op.getBlocks().front().getOps(); - llvm::SmallVector init_vars(var_handle_ops.begin(), - var_handle_ops.end()); - RemoveVariables(init_vars); } } // namespace diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index ddc74fe922a..8c977a6db4c 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -113,6 +113,7 @@ limitations under the License. #include "tensorflow/core/protobuf/graph_debug_info.pb.h" #include "tensorflow/core/protobuf/meta_graph.pb.h" #include "tensorflow/core/protobuf/saved_object_graph.pb.h" +#include "tensorflow/core/protobuf/saver.pb.h" #include "tensorflow/core/protobuf/struct.pb.h" #include "tensorflow/core/protobuf/trackable_object_graph.pb.h" #include "tensorflow/stream_executor/lib/statusor.h" @@ -3278,13 +3279,22 @@ class SavedModelSignatureDefImporterLite { public: // Main entry point: converts all functions (specified by SignatureDefs) in // the given meta graph to an MLIR Module. + // + // `import_restore` is introduced to control whether restore graph + // is imported in eg. SavedModelSignatureDefImporter. Ideally, we don't need + // this option to control this as restore graph should be always imported. + // However, right now, SavedModelSignatureDefImporter cannot handle restore + // graph correctly. + // + // TODO(chky): Remove import_restore once the restore graph is correctly + // handled in SavedModelSignatureDefImporter. static StatusOr Convert( const MetaGraphDef& meta_graph_def, const GraphDebugInfo& debug_info, absl::Span exported_names, mlir::MLIRContext* context, - bool upgrade_legacy) { + bool upgrade_legacy, bool import_restore = true) { LoadImporterDialects(*context); - SavedModelSignatureDefImporterLite importer(meta_graph_def, debug_info, - exported_names, context); + SavedModelSignatureDefImporterLite importer( + meta_graph_def, debug_info, exported_names, context, import_restore); TF_RETURN_IF_ERROR(importer.InitializeGraph(upgrade_legacy)); TF_ASSIGN_OR_RETURN(auto module, importer.ConvertSignatures()); @@ -3294,18 +3304,20 @@ class SavedModelSignatureDefImporterLite { return module; } + private: SavedModelSignatureDefImporterLite(const MetaGraphDef& meta_graph_def, const GraphDebugInfo& debug_info, absl::Span exported_names, - mlir::MLIRContext* context) + mlir::MLIRContext* context, + bool import_restore) : meta_graph_def_(meta_graph_def), debug_info_(debug_info), graph_(std::make_unique(OpRegistry::Global())), exported_names_(exported_names), module_(mlir::ModuleOp::create(mlir::UnknownLoc::get(context))), - symbol_table_(module_.get()) {} + symbol_table_(module_.get()), + import_restore_(import_restore) {} - private: // Initializes Graph from saved model GraphDef. If `upgrade_legacy` is set, // functionalization is ran on the Graph. Status InitializeGraph(bool upgrade_legacy); @@ -3322,7 +3334,8 @@ class SavedModelSignatureDefImporterLite { }; StatusOr> ConvertAssets(); // Converts the initialization graph in the SavedModel to an MLIR function. - Status ConvertInitializer(const std::vector& assets); + Status ConvertInitializer(const std::string& target_node_name, + const std::vector& assets); // Converts a graph with feeds and fetches to an MLIR function. StatusOr ConvertGraph( @@ -3348,6 +3361,7 @@ class SavedModelSignatureDefImporterLite { absl::Span exported_names_; mlir::OwningModuleRef module_; mlir::SymbolTable symbol_table_; + bool import_restore_ = true; }; Status SavedModelSignatureDefImporterLite::InitializeGraph( @@ -3408,12 +3422,7 @@ void SavedModelSignatureDefImporterLite::MoveConvertedFunctionsToModule( } Status SavedModelSignatureDefImporterLite::ConvertInitializer( - const std::vector& assets) { - std::string init_node_name; - TF_RETURN_IF_ERROR(internal::GetInitOp("", meta_graph_def_, &init_node_name)); - - if (init_node_name.empty()) return Status::OK(); - + const std::string& target_node_name, const std::vector& assets) { std::vector> inputs; inputs.reserve(assets.size()); for (const auto& asset : assets) { @@ -3423,12 +3432,12 @@ Status SavedModelSignatureDefImporterLite::ConvertInitializer( inputs.push_back({asset.tensor_name, tensor_info}); } - TF_ASSIGN_OR_RETURN(auto sub_module, ConvertGraph(init_node_name, inputs, {}, - {init_node_name})); + TF_ASSIGN_OR_RETURN(auto sub_module, ConvertGraph(target_node_name, inputs, + {}, {target_node_name})); mlir::SymbolTable sub_symbol_table(*sub_module); - auto init_func_op = sub_symbol_table.lookup(init_node_name); + auto init_func_op = sub_symbol_table.lookup(target_node_name); init_func_op.removeAttr("tf.entry_function"); mlir::OpBuilder builder(module_->getBodyRegion()); @@ -3445,10 +3454,8 @@ Status SavedModelSignatureDefImporterLite::ConvertInitializer( // tf_saved_model. init_func_op.setAttr( "tf_saved_model.exported_names", - builder.getStrArrayAttr({"__tf_saved_model_session_initializer"})); - - builder.create( - module_->getLoc(), builder.getSymbolRefAttr(init_func_op.getName())); + builder.getStrArrayAttr({absl::StrCat( + "__tf_saved_model_session_initializer_", target_node_name)})); // Move the converted functions to top level MLIR module. MoveConvertedFunctionsToModule(*sub_module); @@ -3570,9 +3577,44 @@ SavedModelSignatureDefImporterLite::ConvertSignatures() { } TF_ASSIGN_OR_RETURN(auto assets, ConvertAssets()); - TF_RETURN_IF_ERROR(ConvertInitializer(assets)); mlir::OpBuilder builder(module_->getBodyRegion()); + llvm::SmallVector init_sym_refs; + + if (import_restore_ && meta_graph_def_.has_saver_def()) { + std::vector variable_and_assets; + + // Create an AssetOp for the variable checkpoint files. The relative + // filename is used here. + auto variable_filename_op = builder.create( + module_->getLoc(), + /*sym_name=*/ + builder.getStringAttr("__tf_saved_model_variables"), + /*filename=*/ + builder.getStringAttr(io::JoinPath(kSavedModelVariablesDirectory, + kSavedModelVariablesFilename))); + variable_and_assets.push_back( + {meta_graph_def_.saver_def().filename_tensor_name(), + variable_filename_op}); + variable_and_assets.insert(variable_and_assets.end(), assets.begin(), + assets.end()); + + const auto& restore_op_name = meta_graph_def_.saver_def().restore_op_name(); + TF_RETURN_IF_ERROR( + ConvertInitializer(restore_op_name, variable_and_assets)); + init_sym_refs.push_back(builder.getSymbolRefAttr(restore_op_name)); + } + + std::string init_op_name; + TF_RETURN_IF_ERROR(internal::GetInitOp("", meta_graph_def_, &init_op_name)); + if (!init_op_name.empty()) { + TF_RETURN_IF_ERROR(ConvertInitializer(init_op_name, assets)); + init_sym_refs.push_back(builder.getSymbolRefAttr(init_op_name)); + } + + builder.create( + module_->getLoc(), builder.getArrayAttr(init_sym_refs)); + module_->setAttr("tf_saved_model.semantics", builder.getUnitAttr()); SortSavedModelModule(*module_); @@ -3600,7 +3642,7 @@ class SavedModelSignatureDefImporter { TF_ASSIGN_OR_RETURN(auto module, SavedModelSignatureDefImporterLite::Convert( bundle.meta_graph_def, debug_info, exported_names, - context, upgrade_legacy)); + context, upgrade_legacy, /*import_restore=*/false)); mlir::OpBuilder builder(module->getContext()); module->setAttr("tf_saved_model.under_construction", builder.getUnitAttr()); From d5c34d529fd18e158c73cf6ce408403c0c0d5ad3 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 9 Nov 2020 12:28:14 -0800 Subject: [PATCH 049/220] [MLIR] Add MHLO -> HloOpcode conversion function. PiperOrigin-RevId: 341461697 Change-Id: I18950b109d1c61b270dfadb9afb64995060f670e --- .../compiler/mlir/xla/mlir_hlo_to_hlo.cc | 196 ++++++++++++++++++ .../compiler/mlir/xla/mlir_hlo_to_hlo.h | 2 + 2 files changed, 198 insertions(+) diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index 0e904c153bb..c96bcfea63a 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -1763,4 +1763,200 @@ DenseIntElementsAttr GetLayoutFromMlirHlo(mlir::Operation* op) { return op->getAttrOfType("minor_to_major"); } +StatusOr<::xla::HloOpcode> MhloToHloOpcode(mlir::Operation* op) { + if (mlir::isa(op)) { + return xla::HloOpcode::kConstant; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kIota; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kConvert; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kAdd; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kAtan2; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kDivide; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kMaximum; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kMinimum; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kMultiply; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kPower; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kRemainder; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kShiftLeft; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kShiftRightArithmetic; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kShiftRightLogical; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSubtract; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kXor; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kInfeed; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kOutfeed; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSend; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kRecv; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kReplicaId; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kAfterAll; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kAllReduce; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kAllToAll; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kTuple; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kBatchNormGrad; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kBatchNormInference; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kBatchNormTraining; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kBitcastConvert; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kBroadcast; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCholesky; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kClamp; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kConcatenate; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kConvolution; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSort; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kRngBitGenerator; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kFusion; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kBitcast; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kAbs; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCbrt; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCeil; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kClz; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCos; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kExp; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kExpm1; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kFloor; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kImag; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kIsFinite; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kLog; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kLog1p; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kLogistic; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kNot; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kNegate; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kPopulationCount; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kReal; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kRoundNearestAfz; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kRsqrt; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSign; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSin; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSqrt; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kTanh; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kComplex; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kAnd; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kOr; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kWhile; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kReduce; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kGetTupleElement; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCompare; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSlice; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kDynamicSlice; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kDynamicUpdateSlice; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCollectivePermute; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCopy; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kCustomCall; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kDot; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kFft; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kGather; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kGetDimensionSize; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kMap; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kReshape; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kDynamicReshape; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kScatter; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSelect; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSelectAndScatter; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kSetDimensionSize; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kReverse; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kPad; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kTrace; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kTranspose; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kTriangularSolve; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kReduceWindow; + } else if (mlir::isa(op)) { + return xla::HloOpcode::kReducePrecision; + } else { + std::string s; + { + llvm::raw_string_ostream os(s); + op->print(os); + } + return tensorflow::errors::Unimplemented( + "Unimplemented MHLO -> HloOpcode: %s", s); + } +} + } // namespace mlir diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h index a727f60084c..45b3cb4d272 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h @@ -66,6 +66,8 @@ llvm::Optional<::xla::XlaOp> CreateXlaOperator( mlir::DenseIntElementsAttr GetLayoutFromMlirHlo(mlir::Operation* op); +::xla::StatusOr<::xla::HloOpcode> MhloToHloOpcode(mlir::Operation* op); + } // namespace mlir #endif // TENSORFLOW_COMPILER_MLIR_XLA_MLIR_HLO_TO_HLO_H_ From b49237e3bd3fa2c17f9858ae22ed1fd3260aaa8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 12:28:39 -0800 Subject: [PATCH 050/220] Add RISC Conv Op register. PiperOrigin-RevId: 341461773 Change-Id: I0dfe6b4fc57e611a1b875dbf5da32fbf6e781d2d --- .../api_def/base_api/api_def_RiscConv.pbtxt | 54 ------------------- .../core/kernels/risc/experimental/BUILD | 11 ---- .../kernels/risc/experimental/risc_conv_op.cc | 50 ----------------- tensorflow/core/ops/risc_ops.cc | 11 ---- tensorflow/python/ops/risc/risc_grad.py | 7 --- tensorflow/python/ops/risc/risc_ops.py | 17 +----- 6 files changed, 1 insertion(+), 149 deletions(-) delete mode 100644 tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt delete mode 100644 tensorflow/core/kernels/risc/experimental/risc_conv_op.cc diff --git a/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt b/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt deleted file mode 100644 index a78ee1d2b89..00000000000 --- a/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt +++ /dev/null @@ -1,54 +0,0 @@ -op { - graph_op_name: "RiscConv" - visibility: HIDDEN - in_arg { - name: "input" - description: < 1, there will be k-1 skipped cells between each -filter element on that dimension. The dimension order is determined by the -value of `data_format`, see above for details. Dilations in the batch and -depth dimensions must be 1. -END - } - summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors." -} diff --git a/tensorflow/core/kernels/risc/experimental/BUILD b/tensorflow/core/kernels/risc/experimental/BUILD index d0e94be3120..a16c0b66271 100644 --- a/tensorflow/core/kernels/risc/experimental/BUILD +++ b/tensorflow/core/kernels/risc/experimental/BUILD @@ -17,20 +17,9 @@ tf_kernel_library( ], ) -tf_kernel_library( - name = "risc_conv_op", - srcs = ["risc_conv_op.cc"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - ], -) - tf_kernel_library( name = "experimental", deps = [ ":risc_add_op", - ":risc_conv_op", ], ) diff --git a/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc b/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc deleted file mode 100644 index 58c5ee98eae..00000000000 --- a/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { -namespace risc { -namespace experimental { - -typedef Eigen::ThreadPoolDevice CPUDevice; - -template -class RiscConvOp : public OpKernel { - public: - explicit RiscConvOp(OpKernelConstruction* context) : OpKernel(context) { - // TODO(b/171294012): Implement RiscConv op. - } - - void Compute(OpKernelContext* context) override { - // TODO(b/171294012): Implement RiscConv op. - } -}; - -#define REGISTER_CPU(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("RiscConv").Device(DEVICE_CPU).TypeConstraint("T"), \ - RiscConvOp); - -REGISTER_CPU(float); -REGISTER_CPU(double); - -} // namespace experimental -} // namespace risc -} // namespace tensorflow diff --git a/tensorflow/core/ops/risc_ops.cc b/tensorflow/core/ops/risc_ops.cc index a5b1e37fa84..1d90a645965 100644 --- a/tensorflow/core/ops/risc_ops.cc +++ b/tensorflow/core/ops/risc_ops.cc @@ -30,15 +30,4 @@ REGISTER_OP("RiscAdd") .SetIsAggregate() .SetIsCommutative(); -// TODO(b/171294012): change shape function. -REGISTER_OP("RiscConv") - .Input("input: T") - .Input("filter: T") - .Output("output: T") - .Attr("T: {float, double}") - .Attr("strides: list(int)") - .Attr(GetConvnetDataFormatAttrString()) - .SetShapeFn(shape_inference::UnknownShape) - .Attr("dilations: list(int) = [1, 1, 1, 1]"); - } // namespace tensorflow diff --git a/tensorflow/python/ops/risc/risc_grad.py b/tensorflow/python/ops/risc/risc_grad.py index 5c0f76ba3a4..b125aab895a 100644 --- a/tensorflow/python/ops/risc/risc_grad.py +++ b/tensorflow/python/ops/risc/risc_grad.py @@ -28,10 +28,3 @@ def _RiscAddGrad(_, grad): # pylint: disable=unused-argument # TODO(b/171294012): Implement gradient of RISC with RISC ops. return None, None - - -@ops.RegisterGradient("RiscConv") -def _RiscConvGrad(_, grad): - # pylint: disable=unused-argument - # TODO(b/171294012): Implement gradient of RISC with RISC ops. - return None, None diff --git a/tensorflow/python/ops/risc/risc_ops.py b/tensorflow/python/ops/risc/risc_ops.py index f59e42dbf6e..8682ebdd269 100644 --- a/tensorflow/python/ops/risc/risc_ops.py +++ b/tensorflow/python/ops/risc/risc_ops.py @@ -30,20 +30,5 @@ from tensorflow.python.ops.risc_ops_gen import * def risc_add( input_lhs, input_rhs, - name='RISC_ADD'): + name="RISC_ADD"): return gen_risc_ops.risc_add(input_lhs, input_rhs, name=name) - - -def risc_conv(x, - kernel, - strides, - data_format='NHWC', - dilations=None, - name='RISC_CONV'): - return gen_risc_ops.risc_conv( - x, - kernel, - strides, - data_format=data_format, - dilations=dilations, - name=name) From 6ab9a206b824aa64ed80fbdcf1b5722e36baf391 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 12:32:40 -0800 Subject: [PATCH 051/220] Add a mechanism to do space-to-batch propagation in the whole graph. PiperOrigin-RevId: 341462532 Change-Id: I66a7c103d8c90fdc1b75759d945a2563dfcaf405 --- tensorflow/compiler/xla/service/BUILD | 7 + .../xla/service/space_to_batch_converter.cc | 1286 ++++++++++++++--- .../service/space_to_batch_converter_test.cc | 28 +- 3 files changed, 1102 insertions(+), 219 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 151afb81cb5..f1fa2ce3a52 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2454,6 +2454,7 @@ cc_library( ":hlo", ":hlo_creation_utils", ":hlo_pass", + ":shape_inference", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", @@ -2462,8 +2463,14 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/core:lib", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/algorithm", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/compiler/xla/service/space_to_batch_converter.cc b/tensorflow/compiler/xla/service/space_to_batch_converter.cc index e9fd1c1aa3d..5a05378a07f 100644 --- a/tensorflow/compiler/xla/service/space_to_batch_converter.cc +++ b/tensorflow/compiler/xla/service/space_to_batch_converter.cc @@ -14,9 +14,20 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/space_to_batch_converter.h" +#include +#include +#include #include +#include +#include +#include +#include "absl/algorithm/algorithm.h" +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" +#include "absl/types/span.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" @@ -25,6 +36,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" @@ -34,6 +46,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/stream_executor/lib/statusor.h" namespace xla { @@ -42,78 +55,165 @@ namespace { // ConvolutionVisitor traverses the HLO computation and rewrites Convolution // operations with small batch counts into convolutions with larger batch // counts by moving space to batch. -class ConvolutionVisitor : public DfsHloVisitorWithDefault { +class ConvolutionVisitor { public: - // Default visitor action is to do nothing and return OK. - Status DefaultAction(HloInstruction* /*hlo_instruction*/) override { - return Status::OK(); - } + // Top-level function to begin space-to-batch conversion. + Status PerformSpaceToBatchOnConvolution(HloInstruction* convolution); - Status HandleConvolution(HloInstruction* convolution) override; + // Function that determines if space-to-batch can be propagated into the + // consumer. Such propagation is only possible when all required operands are + // space-to-batch'ed. + bool CanPropagate(HloInstruction* consumer, HloInstruction* producer); + + // This function checks if the HLO instrution supports propagation. + bool SupportedOpForPropagation(HloInstruction* consumer, + HloInstruction* producer); + + // Method that checks validity of Broadcast propagation. + bool IsBroadcastPropagatable(HloInstruction* broadcast, + HloInstruction* old_other_op); + + // Propagates space-to-batch on the op, and returns a bool that indicates if + // the users of the op need to be propagated through. + StatusOr Propagate(HloInstruction* consumer, HloInstruction* producer); + + // Perform space-to-batch propagation on the convolution. Assumes the + // activations were already space-to-batched. + Status PropagateOnConv(HloInstruction* convolution); + + // Method that checks validity of space-to-batch on a given convolution. + bool IsConvSuitableForSpaceToBatch(HloInstruction* convolution); + + // Once a convolution has been space-to-batch'ed, this function will + // transitively propagate the space-to-batch-ness on rest of the graph. + Status PropagateOnUsers(HloInstruction* old_conv); + + // Generates masked output with valid data. This is useful when larger shapes + // are generated due to space-to-batch. + StatusOr SelectValidPortion( + HloInstruction* new_instr, HloInstruction* old_instr, + HloInstruction* select_val, int64 new_batch_dim, int64 new_space_dim, + int64 old_batch_dim, int64 old_space_dim); + + // Performs tranposition so that space dimension follows the batch dimension. + StatusOr BringSpaceNextToBatch( + HloInstruction* activations, ConvolutionDimensionNumbers& dim_numbers, + int64& spatial_dimension_to_split, int64& activations_batch_dim); + + // Function that converts spaced-to-batch shape back to the original. + StatusOr BatchToSpace(HloInstruction* old_instr); + + // Duplicates elements at boundaries. + StatusOr HaloDuplicateWithSlice( + HloInstruction* activations, int64 spatial_dimension_to_split, + int64 activations_batch_dim, int64 old_batch_size, int64 low_padding, + int64 high_padding, int64 halo_size, int64 original_split_dim_size, + HloInstruction* pad_val = nullptr); // Runs the visitor on a computation. - static bool Run(int64 limit_on_batch_size, HloComputation* computation); + StatusOr Run(); // Returns whether any convolution ops were rewritten. const bool changed() const { return changed_; } - ~ConvolutionVisitor() override = default; + ~ConvolutionVisitor() = default; + + explicit ConvolutionVisitor(int64 limit_on_batch_size, + HloComputation* computation); + + int64 get_chosen_spatial_dim(HloInstruction* convolution) { + return convolution->convolution_dimension_numbers() + .input_spatial_dimensions_size() - + 1; + } + + int64 DimLookUp(absl::Span permute_dims, int64 id) { + return permute_dims[id]; + } private: - explicit ConvolutionVisitor(int64 limit_on_batch_size, - HloComputation* computation) - : computation_(computation), limit_on_batch_size_(limit_on_batch_size) {} - // Current HloComputation instance the ConvolutionVisitor is traversing. HloComputation* computation_; + absl::flat_hash_set convs_to_visit_; + std::vector conv_visitor_list_; + absl::flat_hash_set non_propagatable_instrs_; + // Map from a given spaced-to-batch instruction to its batched-to-space + // version. + absl::flat_hash_map batch_to_space_map_; + + // Map from old (non space-to-batch) instructions to space-to-batch'ed + // instructions. + absl::flat_hash_map old_to_new_instrs_; + + // Map from instruction to dimensions of the shape (first is batch, second is + // space). This is with respect to the old instruction. + absl::flat_hash_map> + instr_to_dim_map_; + + // Map from space-to-batch'ed instruction to its permute dims. + absl::flat_hash_map> + instr_to_dim_permute_map_; + // Whether rewrite has occurred. bool changed_ = false; // Limit on batch size to apply this technique on. int64 limit_on_batch_size_; + + // We choose the new batch size to be a constant so that space-to-batch + // propagation through several convolutional layers is consistent. + static constexpr int64 kNewBatchSize = 8; }; -bool ConvolutionVisitor::Run(int64 limit_on_batch_size, - HloComputation* computation) { - ConvolutionVisitor visitor(limit_on_batch_size, computation); - TF_CHECK_OK(computation->Accept(&visitor)); - return visitor.changed_; +ConvolutionVisitor::ConvolutionVisitor(int64 limit_on_batch_size, + HloComputation* computation) { + computation_ = computation; + limit_on_batch_size_ = limit_on_batch_size; + for (HloInstruction* inst : computation->instructions()) { + if (inst->opcode() != HloOpcode::kConvolution) { + continue; + } + + auto convolution = inst; + // Perform legality checks. + if (!IsConvSuitableForSpaceToBatch(convolution)) { + VLOG(1) << "Conv not suitable for space-to-batch " + << convolution->ToString(); + continue; + } + convs_to_visit_.insert(convolution); + conv_visitor_list_.push_back(convolution); + } } -Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { - VLOG(1) << "Handling conv " << convolution->ToString(); - changed_ = false; - +bool ConvolutionVisitor::IsConvSuitableForSpaceToBatch( + HloInstruction* convolution) { ConvolutionDimensionNumbers dim_numbers = convolution->convolution_dimension_numbers(); // If there are no spatial dims, we return. if (dim_numbers.input_spatial_dimensions_size() < 1) { - return Status::OK(); + return false; } - // This is the spatial dimension we choose to spilt. - constexpr int64 kChosenSpatialDim = 0; - // We choose the new batch size to be a constant so that space-to-batch - // propagation through several convolutional layers is consistent. - constexpr int64 kNewBatchSize = 8; - // Batch in batch_group_count has different semantics (it isn't true batch). // Consider supporting this case in future if needed. if (convolution->batch_group_count() != 1) { - return Status::OK(); + return false; } - if (convolution->window().dimensions(kChosenSpatialDim).window_dilation() != - 1) { - return Status::OK(); + if (convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .window_dilation() != 1) { + return false; } // TODO(b/168316428): Support base dilations. - if (convolution->window().dimensions(kChosenSpatialDim).base_dilation() != - 1) { - return Status::OK(); + if (convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .base_dilation() != 1) { + return false; } int64 activations_batch_dim = dim_numbers.input_batch_dimension(); @@ -122,31 +222,29 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { convolution->operand(0)->shape().dimensions(activations_batch_dim); if (old_batch_size > limit_on_batch_size_) { - return Status::OK(); + return false; } auto kernel = convolution->mutable_operand(1); const auto& kernel_shape = kernel->shape(); - const int64 kernel_spatial_dim_size = kernel_shape.dimensions( - dim_numbers.kernel_spatial_dimensions(kChosenSpatialDim)); + const int64 kernel_spatial_dim_size = + kernel_shape.dimensions(dim_numbers.kernel_spatial_dimensions( + get_chosen_spatial_dim(convolution))); auto activations = convolution->mutable_operand(0); - int64 spatial_dimension_to_split = - dim_numbers.input_spatial_dimensions(kChosenSpatialDim); - - const int64 input_dim_size = activations->shape().dimensions( - dim_numbers.input_spatial_dimensions(kChosenSpatialDim)); + const int64 input_dim_size = + activations->shape().dimensions(dim_numbers.input_spatial_dimensions( + get_chosen_spatial_dim(convolution))); const int64 inherent_low_padding = - convolution->window().dimensions(kChosenSpatialDim).padding_low(); + convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .padding_low(); const int64 inherent_high_padding = - convolution->window().dimensions(kChosenSpatialDim).padding_high(); - const bool inherent_padding_needed = - inherent_low_padding != 0 || inherent_high_padding != 0; - - const int64 stride = - convolution->window().dimensions(kChosenSpatialDim).stride(); + convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .padding_high(); const int64 spatial_size = input_dim_size + inherent_low_padding + inherent_high_padding; @@ -156,23 +254,88 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { // We currently only cater to evenly divisible cases. if (kNewBatchSize % old_batch_size != 0) { - return Status::OK(); + return false; } // Splitting will be incorrect in these cases. if (spatial_size < num_splits || input_dim_size / num_splits < kernel_spatial_dim_size) { - return Status::OK(); + return false; } + VLOG(1) << "Legal space-to-batch convolution " << convolution->ToString(); + return true; +} - // By now, we are certain that the space-to-batch transormation is going to - // take place. +StatusOr ConvolutionVisitor::HaloDuplicateWithSlice( + HloInstruction* activations, int64 spatial_dimension_to_split, + int64 activations_batch_dim, int64 old_batch_size, int64 low_padding, + int64 high_padding, int64 halo_size, int64 original_split_dim_size, + HloInstruction* pad_val) { + const int64 rank = activations->shape().rank(); + const int64 spatial_split_size = + activations->shape().dimensions(spatial_dimension_to_split); + const int64 batch_size = + activations->shape().dimensions(activations_batch_dim); + CHECK_LT(low_padding, spatial_split_size); - // Create the new convolution dim numbers. - auto new_dim_numbers = dim_numbers; + VLOG(1) << "In HaloDuplicateWithSlice with activations " + << activations->ToString() << " batch_size " << batch_size + << " spatial_split_size " << spatial_split_size << " low_padding " + << low_padding << " halo size " << halo_size; + std::vector start_indices(rank, 0), + end_indices(activations->shape().dimensions().begin(), + activations->shape().dimensions().end()), + strides(rank, 1); + start_indices[spatial_dimension_to_split] = spatial_split_size - low_padding; + end_indices[activations_batch_dim] = batch_size - 1; + end_indices[spatial_dimension_to_split] = spatial_split_size; - // We'd need transposition of activations here such that batch and space dim - // that is being split are adjacent (in that order). + TF_ASSIGN_OR_RETURN( + HloInstruction * first_slice, + MakeSliceHlo(activations, start_indices, end_indices, strides)); + VLOG(1) << "first slice " << first_slice->ToString(); + PaddingConfig padding_config = + MakeNoPaddingConfig(first_slice->shape().dimensions_size()); + padding_config.mutable_dimensions(activations_batch_dim) + ->set_edge_padding_low(1); + HloInstruction* padding = + pad_val == nullptr + ? computation_->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(activations->shape().element_type()))) + : pad_val; + TF_ASSIGN_OR_RETURN(first_slice, + MakePadHlo(first_slice, padding, padding_config)); + + std::vector start_indices_halo(rank, 0), + end_indices_halo(activations->shape().dimensions().begin(), + activations->shape().dimensions().end()); + + start_indices_halo[activations_batch_dim] = 1; + end_indices_halo[spatial_dimension_to_split] = halo_size - low_padding; + + TF_ASSIGN_OR_RETURN( + HloInstruction * halo_region, + MakeSliceHlo(activations, start_indices_halo, end_indices_halo, strides)); + + VLOG(1) << "halo_region " << halo_region->ToString(); + PaddingConfig padding_config_halo = + MakeNoPaddingConfig(halo_region->shape().dimensions_size()); + padding_config_halo.mutable_dimensions(activations_batch_dim) + ->set_edge_padding_high(1); + TF_ASSIGN_OR_RETURN(halo_region, + MakePadHlo(halo_region, padding, padding_config_halo)); + + TF_ASSIGN_OR_RETURN(activations, + MakeConcatHlo({first_slice, activations, halo_region}, + spatial_dimension_to_split)); + + return activations; +} + +StatusOr ConvolutionVisitor::BringSpaceNextToBatch( + HloInstruction* activations, ConvolutionDimensionNumbers& dim_numbers, + int64& spatial_dimension_to_split, int64& activations_batch_dim) { + ConvolutionDimensionNumbers new_dim_numbers = dim_numbers; if (spatial_dimension_to_split != activations_batch_dim + 1) { int64 pushed_counter = 0; std::vector transpose_dims; @@ -208,8 +371,632 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { MakeTransposeHlo(activations, transpose_dims)); } + new_dim_numbers.set_input_batch_dimension(activations_batch_dim); + dim_numbers = new_dim_numbers; + + return activations; +} + +StatusOr ConvolutionVisitor::Run() { + for (auto conv : conv_visitor_list_) { + if (convs_to_visit_.count(conv) > 0) { + TF_CHECK_OK(PerformSpaceToBatchOnConvolution(conv)); + } + } + conv_visitor_list_.clear(); + convs_to_visit_.clear(); + // Iterate through all instructions that we could not propagate through, and + // turn their operands from batch-to-space as needed. + for (auto instr : non_propagatable_instrs_) { + absl::flat_hash_map operand_map; + for (int64 i = 0; i < instr->operand_count(); ++i) { + if (old_to_new_instrs_.count(instr->mutable_operand(i))) { + TF_ASSIGN_OR_RETURN(operand_map[i], + BatchToSpace(instr->mutable_operand(i))); + } + } + for (auto entry : operand_map) { + TF_CHECK_OK(instr->ReplaceOperandWith(entry.first, entry.second)); + } + } + non_propagatable_instrs_.clear(); + return changed_; +} + +bool IsTrivialElementwise(HloInstruction* hlo) { + if (hlo->opcode() == HloOpcode::kFusion || hlo->opcode() == HloOpcode::kRng || + hlo->opcode() == HloOpcode::kCopy || + hlo->opcode() == HloOpcode::kConstant || + hlo->opcode() == HloOpcode::kIota) { + return false; + } + return hlo->IsElementwise(); +} + +bool ConvolutionVisitor::CanPropagate(HloInstruction* consumer, + HloInstruction* producer) { + for (int64 i = 0; i < consumer->operand_count(); ++i) { + auto old_producer = consumer->mutable_operand(i); + if (IsTrivialElementwise(consumer)) { + if (old_to_new_instrs_.count(old_producer) <= 0 && + old_producer->opcode() != HloOpcode::kConstant && + !(old_producer->opcode() == HloOpcode::kBroadcast && + IsBroadcastPropagatable(old_producer, producer))) { + VLOG(1) << "Cannot propagate on elementwise op " + << consumer->ToString(); + return false; + } + } + if (consumer->opcode() == HloOpcode::kConvolution || + consumer->opcode() == HloOpcode::kReduceWindow || + consumer->opcode() == HloOpcode::kReduce) { + if (i == 0 && !old_to_new_instrs_.contains(old_producer)) { + return false; + } + } + } + return true; +} + +bool ConvolutionVisitor::IsBroadcastPropagatable(HloInstruction* broadcast, + HloInstruction* old_other_op) { + CHECK_EQ(broadcast->opcode(), HloOpcode::kBroadcast); + CHECK(instr_to_dim_map_.contains(old_other_op)); + + auto result = instr_to_dim_map_[old_other_op]; + const int64 batch_dim = result.first; + const int64 space_dim = result.second; + auto broadcast_dims = broadcast->dimensions(); + return !absl::c_linear_search(broadcast_dims, batch_dim) && + !absl::c_linear_search(broadcast_dims, space_dim); +} + +bool ConvolutionVisitor::SupportedOpForPropagation(HloInstruction* consumer, + HloInstruction* producer) { + if (IsTrivialElementwise(consumer)) { + for (int64 i = 0; i < consumer->operand_count(); ++i) { + if (consumer->operand(i)->opcode() == HloOpcode::kBroadcast) { + if (!IsBroadcastPropagatable(consumer->mutable_operand(i), producer)) { + VLOG(2) << "Could not propagate through broadcast"; + return false; + } + } + } + return true; + } + + if (consumer->opcode() == HloOpcode::kConvolution) { + VLOG(1) << "Checking if conv is supported for propagation"; + return IsConvSuitableForSpaceToBatch(consumer); + } + + if (consumer->opcode() == HloOpcode::kReduce) { + // Support only the trivial case where both batch and split spatial dim are + // being reduced + + auto reduce_dims = consumer->dimensions(); + auto result = instr_to_dim_map_[consumer->mutable_operand(0)]; + const int64 batch_dim = result.first; + const int64 space_dim = result.second; + VLOG(1) << "Checking if reduce is supported batch_dim " << batch_dim + << " space_dim " << space_dim << " reduce " + << consumer->ToString(); + return absl::c_linear_search(reduce_dims, batch_dim) && + absl::c_linear_search(reduce_dims, space_dim); + } + + if (consumer->opcode() == HloOpcode::kReduceWindow) { + auto first_operand = consumer->mutable_operand(0); + auto reduce_window = consumer->window(); + if (instr_to_dim_map_.count(first_operand) <= 0) { + VLOG(1) << "Dim map not found on reducewindow operand. Window dim count " + << reduce_window.dimensions().size(); + return false; + } + // Disallow windowing on on the batch dim + auto result = instr_to_dim_map_[first_operand]; + const int64 old_batch_dim = result.first; + const int64 old_space_dim = result.second; + if (reduce_window.dimensions(old_batch_dim).size() != 1) { + return false; + } + + // Only allow no-low-padding cases. + if (reduce_window.dimensions(old_space_dim).padding_low() != 0) { + return false; + } + + // Only allow small high pads. + if (reduce_window.dimensions(old_space_dim).padding_high() > + reduce_window.dimensions(old_space_dim).size()) { + return false; + } + + // Operand 0 must have been propagated through + if (old_to_new_instrs_.count(first_operand) <= 0) { + return false; + } + + auto new_operand = old_to_new_instrs_[first_operand]; + auto permute_dims = instr_to_dim_permute_map_[new_operand]; + const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim); + + // Make sure that the stride lines up. + if (reduce_window.dimensions(old_space_dim).size() != 1) { + if (new_operand->shape().dimensions(new_space_dim) % + reduce_window.dimensions(old_space_dim).stride() != + 0) { + return false; + } + } + + return true; + } + + return false; +} + +StatusOr ConvolutionVisitor::Propagate(HloInstruction* consumer, + HloInstruction* producer) { + auto computation = consumer->parent(); + if (IsTrivialElementwise(consumer)) { + auto dim_map_val = instr_to_dim_map_[producer]; + auto new_consumer = computation->AddInstruction(consumer->Clone()); + for (int64 i = 0; i < consumer->operand_count(); ++i) { + if (consumer->operand(i)->opcode() == HloOpcode::kBroadcast) { + CHECK(old_to_new_instrs_.contains(producer)); + auto new_producer = old_to_new_instrs_[producer]; + auto permute_dims = instr_to_dim_permute_map_[new_producer]; + std::vector broadcast_dims; + for (auto j : consumer->operand(i)->dimensions()) { + broadcast_dims.push_back(DimLookUp(permute_dims, j)); + } + auto new_broadcast = MakeBroadcastHlo( + consumer->mutable_operand(i)->mutable_operand(0), broadcast_dims, + new_producer->shape().dimensions()); + VLOG(1) << "Created broadcast " << new_broadcast->ToString(); + TF_CHECK_OK( + new_consumer->ReplaceOperandWithDifferentShape(i, new_broadcast)); + } else { + CHECK(old_to_new_instrs_.contains(consumer->mutable_operand(i))); + TF_CHECK_OK(new_consumer->ReplaceOperandWithDifferentShape( + i, old_to_new_instrs_[consumer->mutable_operand(i)])); + } + } + auto old_type = new_consumer->mutable_shape()->element_type(); + *(new_consumer->mutable_shape()) = old_to_new_instrs_[producer]->shape(); + + // The element type needs to be retained. + new_consumer->mutable_shape()->set_element_type(old_type); + + old_to_new_instrs_[consumer] = new_consumer; + instr_to_dim_map_[consumer] = dim_map_val; + CHECK(instr_to_dim_permute_map_.contains(old_to_new_instrs_[producer])); + instr_to_dim_permute_map_[new_consumer] = std::vector( + instr_to_dim_permute_map_[old_to_new_instrs_[producer]]); + + VLOG(2) << " new_consumer " << new_consumer->ToString() + << " old_to_new_instrs_[producer] " + << old_to_new_instrs_[producer]->ToString() << " permute dims " + << instr_to_dim_permute_map_.count(new_consumer); + + return true; + } + + if (consumer->opcode() == HloOpcode::kConvolution) { + TF_CHECK_OK(PropagateOnConv(consumer)); + return true; + } + + if (consumer->opcode() == HloOpcode::kReduce) { + auto new_consumer = computation->AddInstruction(consumer->Clone()); + auto first_operand = old_to_new_instrs_[consumer->mutable_operand(0)]; + + auto dim_map_val = instr_to_dim_map_[consumer->mutable_operand(0)]; + const int64 old_batch_dim = dim_map_val.first; + const int64 old_space_dim = dim_map_val.second; + auto permute_dims = instr_to_dim_permute_map_[first_operand]; + const int64 new_batch_dim = DimLookUp(permute_dims, old_batch_dim); + const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim); + + TF_ASSIGN_OR_RETURN( + first_operand, + SelectValidPortion(first_operand, consumer->mutable_operand(0), + consumer->mutable_operand(1), new_batch_dim, + new_space_dim, old_batch_dim, old_space_dim)); + + std::vector changed_dims(new_consumer->dimensions().size()); + for (int64 i = 0; i < new_consumer->dimensions().size(); ++i) { + changed_dims[i] = DimLookUp(permute_dims, new_consumer->dimensions(i)); + } + *(new_consumer->mutable_dimensions()) = changed_dims; + // Replace operand 0. + TF_CHECK_OK( + new_consumer->ReplaceOperandWithDifferentShape(0, first_operand)); + // We do not set instr_to_dim_permute_map_ here because no further + // propagation is needed here. + old_to_new_instrs_[consumer] = new_consumer; + instr_to_dim_map_[consumer] = dim_map_val; + + // Since the resultant ordering of dimension is the same as before, no + // further propagation is needed. + return false; + } + + if (consumer->opcode() == HloOpcode::kReduceWindow) { + auto first_operand = old_to_new_instrs_[consumer->mutable_operand(0)]; + + auto dim_map_val = instr_to_dim_map_[consumer->mutable_operand(0)]; + const int64 old_batch_dim = dim_map_val.first; + const int64 old_space_dim = dim_map_val.second; + auto permute_dims = instr_to_dim_permute_map_[first_operand]; + const int64 new_batch_dim = DimLookUp(permute_dims, old_batch_dim); + const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim); + + TF_ASSIGN_OR_RETURN( + first_operand, + SelectValidPortion(first_operand, consumer->mutable_operand(0), + consumer->mutable_operand(1), new_batch_dim, + new_space_dim, old_batch_dim, old_space_dim)); + + // Calculate the required halo size + auto new_shape = first_operand->shape(); + auto old_shape = consumer->mutable_operand(0)->shape(); + + const int64 new_batch_size = new_shape.dimensions(new_batch_dim); + const int64 new_space_size = new_shape.dimensions(new_space_dim); + const int64 stride = consumer->window().dimensions(old_space_dim).stride(); + const int64 window_size = + consumer->window().dimensions(old_space_dim).size(); + const int64 last_overlap_point = ((new_space_size - 1) / stride) * stride; + VLOG(1) << "last_overlap_point " << last_overlap_point << " window_size " + << window_size << " new_space_size " << new_space_size; + if (last_overlap_point + window_size > new_space_size) { + const int64 halo_size = last_overlap_point + window_size - new_space_size; + TF_ASSIGN_OR_RETURN( + first_operand, + HaloDuplicateWithSlice(first_operand, new_space_dim, new_batch_dim, + new_batch_size, + /*low_padding=*/0, + /*high_padding=*/0, halo_size, new_space_size, + consumer->mutable_operand(1))); + } + + Window new_win; + for (int64 i = 0; i < consumer->window().dimensions().size(); ++i) { + auto dim = DimLookUp(permute_dims, i); + new_win.add_dimensions(); + new_win.mutable_dimensions(i)->set_stride( + consumer->window().dimensions(dim).stride()); + new_win.mutable_dimensions(i)->set_size( + consumer->window().dimensions(dim).size()); + if (i == old_space_dim) { + new_win.mutable_dimensions(i)->set_padding_high(0); + new_win.mutable_dimensions(i)->set_padding_low(0); + } else { + new_win.mutable_dimensions(i)->set_padding_high( + consumer->window().dimensions(dim).padding_high()); + new_win.mutable_dimensions(i)->set_padding_low( + consumer->window().dimensions(dim).padding_low()); + } + new_win.mutable_dimensions(i)->set_window_dilation( + consumer->window().dimensions(dim).window_dilation()); + new_win.mutable_dimensions(i)->set_base_dilation( + consumer->window().dimensions(dim).base_dilation()); + new_win.mutable_dimensions(i)->set_window_reversal( + consumer->window().dimensions(dim).window_reversal()); + } + auto init_val = consumer->mutable_operand(1); + auto reduce_comp = consumer->to_apply(); + + new_shape = first_operand->shape(); + + TF_ASSIGN_OR_RETURN(auto new_reduce_window_shape, + ShapeInference::InferReduceWindowShape( + new_shape, init_val->shape(), new_win)); + HloInstruction* new_consumer = + computation_->AddInstruction(HloInstruction::CreateReduceWindow( + new_reduce_window_shape, first_operand, init_val, new_win, + reduce_comp)); + + // Replace operand 0. + TF_CHECK_OK( + new_consumer->ReplaceOperandWithDifferentShape(0, first_operand)); + VLOG(1) << "New reduce window " << new_consumer->ToString(); + // We do not set instr_to_dim_permute_map_ here because no further + // propagation is needed here. + old_to_new_instrs_[consumer] = new_consumer; + instr_to_dim_map_[consumer] = dim_map_val; + + instr_to_dim_permute_map_[new_consumer] = std::vector( + instr_to_dim_permute_map_[old_to_new_instrs_[consumer->mutable_operand( + 0)]]); + + return true; + } + + LOG(FATAL) << "Trying to propagate through an unsupported instruction " + << consumer->ToString(); + return true; +} + +StatusOr ConvolutionVisitor::SelectValidPortion( + HloInstruction* new_instr, HloInstruction* old_instr, + HloInstruction* select_val, int64 new_batch_dim, int64 new_space_dim, + int64 old_batch_dim, int64 old_space_dim) { + auto new_shape = new_instr->shape(); + auto old_shape = old_instr->shape(); + VLOG(1) << "In SelectValidPortion new_batch_dim " << new_batch_dim + << " new_space_dim " << new_space_dim << " old_batch_dim " + << old_batch_dim << " old_space_dim " << old_space_dim; + const int64 new_batch_size = new_shape.dimensions(new_batch_dim); + const int64 new_space_size = new_shape.dimensions(new_space_dim); + const int64 old_batch_size = old_shape.dimensions(old_batch_dim); + const int64 old_space_size = old_shape.dimensions(old_space_dim); + CHECK_EQ(new_batch_size % old_batch_size, 0); + const int64 num_splits = new_batch_size / old_batch_size; + // Build a constant PRED to decide which elements in the split dimension + // are from halo. + tensorflow::core::Bitmap b(new_batch_size * new_space_size); + for (int k = 0; k < new_batch_size * new_space_size; ++k) { + const int64 space_index = k % new_space_size; + const int64 batch_index = (k / new_space_size) % num_splits; + if (batch_index * new_space_size + space_index < old_space_size) { + b.set(k); + } else { + b.clear(k); + } + } + + auto arg_literal = LiteralUtil::CreateR1(b); + HloInstruction* slice_mask = computation_->AddInstruction( + HloInstruction::CreateConstant(std::move(arg_literal))); + + std::vector slice_mask_reshape_dims(2); + slice_mask_reshape_dims[0] = new_batch_size; + slice_mask_reshape_dims[1] = new_space_size; + + TF_ASSIGN_OR_RETURN(HloInstruction * slice_mask_reshaped, + MakeReshapeHlo(slice_mask_reshape_dims, slice_mask)); + + // Broadcast the mask in all dimensions of the activations. + HloInstruction* shape_mask = + MakeBroadcastHlo(slice_mask_reshaped, {new_batch_dim, new_space_dim}, + new_instr->shape().dimensions()); + + VLOG(1) << "Shape mask made " << shape_mask->ToString(); + + HloInstruction* zeroes = + MakeBroadcastHlo(select_val, {}, new_instr->shape().dimensions()); + + TF_ASSIGN_OR_RETURN(new_instr, MakeSelectHlo(shape_mask, new_instr, zeroes)); + + return new_instr; +} + +StatusOr ConvolutionVisitor::BatchToSpace( + HloInstruction* old_instr) { + if (batch_to_space_map_.count(old_instr)) { + return batch_to_space_map_[old_instr]; + } + auto result = instr_to_dim_map_[old_instr]; + const int64 old_batch_dim = result.first; + const int64 old_space_dim = result.second; + + const int64 old_batch_size = old_instr->shape().dimensions(old_batch_dim); + CHECK(old_to_new_instrs_.contains(old_instr)); + auto new_instr = old_to_new_instrs_[old_instr]; + VLOG(2) << "old_batch_dim " << old_batch_dim << " old_space_dim " + << old_space_dim << " new_instr " << new_instr->ToString() + << " permute dims " << instr_to_dim_permute_map_.count(new_instr); + CHECK(instr_to_dim_permute_map_.contains(new_instr)); + auto permute_dims = instr_to_dim_permute_map_[new_instr]; + const int64 batch_dim = DimLookUp(permute_dims, old_batch_dim); + const int64 space_dim = DimLookUp(permute_dims, old_space_dim); + const int64 batch_size = new_instr->shape().dimensions(batch_dim); + + std::vector new_dimensions(new_instr->shape().dimensions().begin(), + new_instr->shape().dimensions().end()); + new_dimensions[space_dim] *= (batch_size / old_batch_size); + new_dimensions[batch_dim] = old_batch_size; + // Reshape the output of the new conv into the old convolutions shape. + TF_ASSIGN_OR_RETURN(HloInstruction * reshape, + MakeReshapeHlo(new_dimensions, new_instr)); + + const int64 rank = old_instr->shape().rank(); + std::vector start_indices(rank, 0), + end_indices(new_dimensions.begin(), new_dimensions.end()), + strides(rank, 1); + end_indices[space_dim] = old_instr->shape().dimensions(old_space_dim); + + // This slicing is getting rid of the padding we added to evenly divide space. + TF_ASSIGN_OR_RETURN( + HloInstruction * output_slice, + MakeSliceHlo(reshape, start_indices, end_indices, strides)); + VLOG(1) << "Batch to space slice " << output_slice->ToString(); + std::vector transpose_dims(permute_dims); + TF_ASSIGN_OR_RETURN(HloInstruction * output_transpose, + MakeTransposeHlo(output_slice, transpose_dims)); + + old_instr->SetupDerivedInstruction(output_transpose); + + batch_to_space_map_[old_instr] = output_transpose; + return output_transpose; +} + +Status ConvolutionVisitor::PropagateOnUsers(HloInstruction* old_conv) { + std::queue> propagation_worklist; + + if (old_conv->user_count() == 0) { + TF_ASSIGN_OR_RETURN(HloInstruction * batch_to_space, + BatchToSpace(old_conv)); + VLOG(1) << "Replacing the root instruction to " + << batch_to_space->ToString(); + TF_CHECK_OK(computation_->ReplaceInstruction(old_conv, batch_to_space)); + VLOG(1) << "Replacement successful"; + return Status::OK(); + } + + int64 iteration_count = 0; + propagation_worklist.push( + std::make_pair(old_conv, old_conv->mutable_operand(0))); + + while (!propagation_worklist.empty()) { + auto top = propagation_worklist.front(); + auto node = top.first; + auto parent = top.second; + VLOG(1) << "Traversing for propagation operating on " << node->ToString(); + propagation_worklist.pop(); + + // Don't work on the same node again. + if (old_to_new_instrs_.count(node) > 0 && iteration_count != 0) { + continue; + } + + bool needs_further_propagation = true; + if (iteration_count != 0) { + // Do the space-to-batch propagation on this node. + TF_ASSIGN_OR_RETURN(needs_further_propagation, Propagate(node, parent)); + } + iteration_count++; + // If this is the root, no room for further propagation. + if (node->parent()->root_instruction() == node) { + // The below case does not need going back to space. + if (!needs_further_propagation) { + VLOG(1) << "Replacing the root instruction to " + << old_to_new_instrs_[node]->ToString(); + TF_CHECK_OK( + computation_->ReplaceInstruction(node, old_to_new_instrs_[node])); + continue; + } + + TF_ASSIGN_OR_RETURN(HloInstruction * batch_to_space, BatchToSpace(node)); + VLOG(1) << "Replacing the root instruction to " + << batch_to_space->ToString(); + TF_CHECK_OK(computation_->ReplaceInstruction(node, batch_to_space)); + } else { + if (!needs_further_propagation) { + TF_CHECK_OK( + computation_->ReplaceInstruction(node, old_to_new_instrs_[node])); + continue; + } + // Insert all users into the queue, as long as the ops are supported and + // the op is ready for propagation. If the op is unsupported, do + // batch-to-space. If not ready, mark as non-propagatable. + for (auto user : node->users()) { + if (!SupportedOpForPropagation(user, node)) { + TF_ASSIGN_OR_RETURN(HloInstruction * batch_to_space, + BatchToSpace(node)); + for (int64 i = 0; i < user->operand_count(); ++i) { + if (user->operand(i) == node) { + TF_CHECK_OK(user->ReplaceOperandWith(i, batch_to_space)); + } + } + continue; + } + // If the instruction is ready for propagation, add it to the queue. + if (CanPropagate(user, node)) { + non_propagatable_instrs_.erase(user); + propagation_worklist.push(std::make_pair(user, node)); + } else { + // Mark it as non-propagatable for now, for later revisiting. + non_propagatable_instrs_.insert(user); + } + } + } + } + return Status::OK(); +} + +Status ConvolutionVisitor::PropagateOnConv(HloInstruction* convolution) { + auto activations_old = convolution->mutable_operand(0); + + CHECK(old_to_new_instrs_.contains(activations_old)); + auto activations_new = old_to_new_instrs_[activations_old]; + auto permute_dims = instr_to_dim_permute_map_[activations_new]; + + auto original_conv_dims = convolution->convolution_dimension_numbers(); + + const int64 old_space_dim = original_conv_dims.input_spatial_dimensions( + get_chosen_spatial_dim(convolution)); + const int64 old_split_dim_size = + convolution->mutable_operand(0)->shape().dimensions(old_space_dim); + + auto permuted_conv_dims_numbers = original_conv_dims; + + int64 activations_batch_dim = + DimLookUp(permute_dims, original_conv_dims.input_batch_dimension()); + int64 activations_feature_dim = + DimLookUp(permute_dims, original_conv_dims.input_feature_dimension()); + permuted_conv_dims_numbers.set_input_batch_dimension(activations_batch_dim); + permuted_conv_dims_numbers.set_input_feature_dimension( + activations_feature_dim); + + for (int64 i = 0; i < original_conv_dims.input_spatial_dimensions_size(); + ++i) { + permuted_conv_dims_numbers.set_input_spatial_dimensions( + i, DimLookUp(permute_dims, + original_conv_dims.input_spatial_dimensions(i))); + } + + int64 spatial_dimension_to_split = + permuted_conv_dims_numbers.input_spatial_dimensions( + get_chosen_spatial_dim(convolution)); + + const int64 old_batch_dim = original_conv_dims.input_batch_dimension(); + const int64 old_batch_size = + activations_old->shape().dimensions(old_batch_dim); + + const int64 input_dim_size = activations_old->shape().dimensions( + permuted_conv_dims_numbers.input_spatial_dimensions( + get_chosen_spatial_dim(convolution))); + + VLOG(1) << "Propagating on conv activations_batch_dim " + << activations_batch_dim << " spatial_dimension_to_split " + << spatial_dimension_to_split << " old_batch_size " << old_batch_size; + TF_ASSIGN_OR_RETURN( + activations_new, + BringSpaceNextToBatch(activations_new, permuted_conv_dims_numbers, + spatial_dimension_to_split, activations_batch_dim)); + + auto select_val = computation_->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(activations_new->shape().element_type()))); + + TF_ASSIGN_OR_RETURN( + activations_new, + SelectValidPortion(activations_new, activations_old, select_val, + activations_batch_dim, spatial_dimension_to_split, + old_batch_dim, old_space_dim)); + // Create the new convolution dim numbers. + auto new_dim_numbers = permuted_conv_dims_numbers; + + auto kernel = convolution->mutable_operand(1); + const auto& kernel_shape = kernel->shape(); + const int64 kernel_spatial_dim_size = kernel_shape.dimensions( + permuted_conv_dims_numbers.kernel_spatial_dimensions( + get_chosen_spatial_dim(convolution))); + + const int64 inherent_low_padding = + convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .padding_low(); + const int64 inherent_high_padding = + convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .padding_high(); + const int64 stride = convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .stride(); + + const int64 spatial_size = + input_dim_size + inherent_low_padding + inherent_high_padding; + VLOG(1) << "spatial size " << spatial_size; + + const int64 num_splits = kNewBatchSize / old_batch_size; + const int64 output_offsets = convolution->shape().dimensions( - dim_numbers.output_spatial_dimensions(kChosenSpatialDim)); + permuted_conv_dims_numbers.output_spatial_dimensions( + get_chosen_spatial_dim(convolution))); const int64 output_offsets_per_split = CeilOfRatio(output_offsets, num_splits); @@ -221,7 +1008,206 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { } const int64 slice_size = - spatial_split_size + kernel_spatial_dim_size - stride; + spatial_split_size + + std::max(kernel_spatial_dim_size - stride, static_cast(0)); + + VLOG(1) << "spatial_split_size " << spatial_split_size << " slice_size " + << slice_size; + + const int64 new_batch_size = + activations_new->shape().dimensions(activations_batch_dim); + const int64 new_space_size = + activations_new->shape().dimensions(spatial_dimension_to_split); + // In the below case, we cannot use the activations directly for Halo + // Duplication. We must reshape them. + if (new_space_size != spatial_split_size) { + std::vector new_dimensions( + activations_new->shape().dimensions().begin(), + activations_new->shape().dimensions().end()); + const int64 reshaped_space_size = + new_space_size * new_batch_size / old_batch_size; + new_dimensions[spatial_dimension_to_split] = reshaped_space_size; + new_dimensions[activations_batch_dim] = old_batch_size; + + // Reshape the output of the new conv into the old convolutions shape. + TF_ASSIGN_OR_RETURN(HloInstruction * reshaped_activations, + MakeReshapeHlo(new_dimensions, activations_new)); + + PaddingConfig padding_config = + MakeNoPaddingConfig(reshaped_activations->shape().dimensions_size()); + padding_config.mutable_dimensions(spatial_dimension_to_split) + ->set_edge_padding_high(spatial_split_size * new_batch_size - + reshaped_space_size); + padding_config.mutable_dimensions(spatial_dimension_to_split) + ->set_edge_padding_low(0); + HloInstruction* padding = + computation_->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(reshaped_activations->shape().element_type()))); + + TF_ASSIGN_OR_RETURN( + reshaped_activations, + MakePadHlo(reshaped_activations, padding, padding_config)); + + std::vector reshape_back_dims( + reshaped_activations->shape().dimensions().begin(), + reshaped_activations->shape().dimensions().end()); + + reshape_back_dims[spatial_dimension_to_split] = spatial_split_size; + reshape_back_dims[activations_batch_dim] = new_batch_size; + + TF_ASSIGN_OR_RETURN( + reshaped_activations, + MakeReshapeHlo(reshape_back_dims, reshaped_activations)); + + TF_ASSIGN_OR_RETURN( + activations_new, + HaloDuplicateWithSlice(reshaped_activations, spatial_dimension_to_split, + activations_batch_dim, old_batch_size, + /*low_padding=*/inherent_low_padding, + /*high_padding=*/inherent_high_padding, + slice_size - spatial_split_size, + old_split_dim_size)); + } else { + TF_ASSIGN_OR_RETURN( + activations_new, + HaloDuplicateWithSlice(activations_new, spatial_dimension_to_split, + activations_batch_dim, old_batch_size, + /*low_padding=*/inherent_low_padding, + /*high_padding=*/inherent_high_padding, + slice_size - spatial_split_size, + old_split_dim_size)); + } + + // We will generate output such that batch is followed by the split spatial + // dimension. + const int64 rank = (convolution->shape().rank()); + std::vector transpose_dims(rank); + int dim_count = 0; + std::map dim_map; + + for (int j = 0; + j < permuted_conv_dims_numbers.output_spatial_dimensions_size(); ++j) { + if (j == get_chosen_spatial_dim(convolution)) { + dim_map[permuted_conv_dims_numbers.output_batch_dimension()] = dim_count; + new_dim_numbers.set_output_batch_dimension(dim_count++); + } + dim_map[permuted_conv_dims_numbers.output_spatial_dimensions(j)] = + dim_count; + new_dim_numbers.set_output_spatial_dimensions(j, dim_count); + dim_count++; + } + + dim_map[permuted_conv_dims_numbers.output_feature_dimension()] = dim_count; + new_dim_numbers.set_output_feature_dimension(dim_count); + + int p = 0; + for (const auto& entry : dim_map) { + transpose_dims[p] = entry.second; + p++; + } + + auto new_window = convolution->window(); + new_window.mutable_dimensions(get_chosen_spatial_dim(convolution)) + ->set_padding_high(0); + new_window.mutable_dimensions(get_chosen_spatial_dim(convolution)) + ->set_padding_low(0); + TF_ASSIGN_OR_RETURN( + HloInstruction * new_conv, + MakeConvolveHlo(activations_new, /*rhs=*/convolution->mutable_operand(1), + convolution->feature_group_count(), + convolution->batch_group_count(), new_window, + new_dim_numbers, convolution->precision_config())); + convolution->SetupDerivedInstruction(new_conv); + + old_to_new_instrs_[convolution] = new_conv; + VLOG(1) << "Space-to-batched convolution " << new_conv->ToString(); + + instr_to_dim_map_[convolution] = + std::make_pair(original_conv_dims.output_batch_dimension(), + original_conv_dims.output_spatial_dimensions( + get_chosen_spatial_dim(convolution))); + + instr_to_dim_permute_map_[new_conv] = std::vector(transpose_dims); + + convs_to_visit_.erase(convolution); + return Status::OK(); +} + +Status ConvolutionVisitor::PerformSpaceToBatchOnConvolution( + HloInstruction* convolution) { + VLOG(1) << "Handling conv " << convolution->ToString(); + changed_ = false; + + ConvolutionDimensionNumbers dim_numbers = + convolution->convolution_dimension_numbers(); + + int64 activations_batch_dim = dim_numbers.input_batch_dimension(); + + const int64 old_batch_size = + convolution->operand(0)->shape().dimensions(activations_batch_dim); + + auto kernel = convolution->mutable_operand(1); + const auto& kernel_shape = kernel->shape(); + const int64 kernel_spatial_dim_size = + kernel_shape.dimensions(dim_numbers.kernel_spatial_dimensions( + get_chosen_spatial_dim(convolution))); + + auto activations = convolution->mutable_operand(0); + + int64 spatial_dimension_to_split = + dim_numbers.input_spatial_dimensions(get_chosen_spatial_dim(convolution)); + + const int64 input_dim_size = + activations->shape().dimensions(dim_numbers.input_spatial_dimensions( + get_chosen_spatial_dim(convolution))); + + const int64 inherent_low_padding = + convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .padding_low(); + const int64 inherent_high_padding = + convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .padding_high(); + const bool inherent_padding_needed = + inherent_low_padding != 0 || inherent_high_padding != 0; + + const int64 stride = convolution->window() + .dimensions(get_chosen_spatial_dim(convolution)) + .stride(); + + const int64 spatial_size = + input_dim_size + inherent_low_padding + inherent_high_padding; + VLOG(1) << "spatial size " << spatial_size; + + const int64 num_splits = kNewBatchSize / old_batch_size; + auto original_conv = convolution; + + // We'd need transposition of activations here such that batch and space dim + // that is being split are adjacent (in that order). + TF_ASSIGN_OR_RETURN( + activations, + BringSpaceNextToBatch(activations, dim_numbers, + spatial_dimension_to_split, activations_batch_dim)); + // Create the new convolution dim numbers. + auto new_dim_numbers = dim_numbers; + + const int64 output_offsets = + convolution->shape().dimensions(dim_numbers.output_spatial_dimensions( + get_chosen_spatial_dim(convolution))); + const int64 output_offsets_per_split = + CeilOfRatio(output_offsets, num_splits); + + int64 spatial_split_size = output_offsets_per_split * stride; + // Keep increasing the split size so that overall size isn't smaller than the + // original spatial dimension. + while (spatial_split_size * num_splits - spatial_size < 0) { + spatial_split_size += stride; + } + + const int64 slice_size = + spatial_split_size + + std::max(kernel_spatial_dim_size - stride, static_cast(0)); // Pad spatial dim. const int64 pad_size = spatial_split_size * num_splits - spatial_size; @@ -276,118 +1262,26 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { VLOG(1) << "First reshape done " << batch_increased_reshape->ToString(); - // Create a gather HLO. We extract slices for given spatial and batch - // dimensions. - std::vector slice_sizes(activations->shape().dimensions().begin(), - activations->shape().dimensions().end()); - slice_sizes[spatial_dimension_to_split] = 1; - slice_sizes[activations_batch_dim] = 1; - - const int64 rank = activations->shape().dimensions_size(); - std::vector offset_dims; - std::vector collapsed_dims(2); - int64 collapsed_dim_counter = 0; - bool seen_collapsed = false; - for (int j = 0; j < rank; ++j) { - if (j == activations_batch_dim || j == spatial_dimension_to_split) { - collapsed_dims[collapsed_dim_counter++] = j; - seen_collapsed = true; - } else { - if (seen_collapsed) { - offset_dims.push_back(j - 1); - } else { - offset_dims.push_back(j); - } - } - } - std::vector start_index(2); - start_index[0] = activations_batch_dim; - start_index[1] = spatial_dimension_to_split; - - xla::GatherDimensionNumbers gather_dim_numbers = - HloGatherInstruction::MakeGatherDimNumbers( - /*offset_dims=*/offset_dims, - /*collapsed_slice_dims=*/collapsed_dims, - /*start_index_map=*/start_index, - /*index_vector_dim=*/1); - - // Create a static index for the gather. - auto arg_array = absl::make_unique>( - slice_size * old_batch_size * num_splits, 2); - auto generate_cell = [&](int64 i, int64 j, int32* value) { - const int64 row_number = i / (num_splits * old_batch_size); - if (row_number >= spatial_split_size) { - if (j == 0) { - *value = i % (num_splits * old_batch_size) + 1; - if (num_splits * old_batch_size <= - i % (num_splits * old_batch_size) + 1) { - *value = 0; - } - } else { - *value = row_number - spatial_split_size; - } - } else { - if (j == 0) { - *value = i % (num_splits * old_batch_size); - } else { - *value = row_number; - } - } - }; - - arg_array->Each(generate_cell); - - auto arg_literal = LiteralUtil::CreateR2FromArray2D(*arg_array); - VLOG(1) << " arg_literal " << arg_literal.ToString(); - HloInstruction* index = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(arg_literal))); - - VLOG(1) << "slice_size " << slice_size; - std::vector gather_output_shape_dims( - activations->shape().dimensions().begin(), - activations->shape().dimensions().end()); - - gather_output_shape_dims[activations_batch_dim] = - slice_size * old_batch_size * num_splits; - gather_output_shape_dims.erase(gather_output_shape_dims.begin() + - spatial_dimension_to_split); - - auto gather_shape = ShapeUtil::MakeShape(activations->shape().element_type(), - gather_output_shape_dims); - - HloInstruction* gather = computation_->AddInstruction( - HloInstruction::CreateGather(gather_shape, batch_increased_reshape, index, - gather_dim_numbers, slice_sizes, false)); - - std::vector gather_reshape_dimensions( - activations->shape().dimensions().begin(), - activations->shape().dimensions().end()); - - gather_reshape_dimensions[activations_batch_dim] = slice_size; - gather_reshape_dimensions[spatial_dimension_to_split] = - old_batch_size * num_splits; - - // Reshape the gather so that batch is split out. TF_ASSIGN_OR_RETURN(activations, - MakeReshapeHlo(gather_reshape_dimensions, gather)); + HaloDuplicateWithSlice( + batch_increased_reshape, spatial_dimension_to_split, + activations_batch_dim, old_batch_size, + /*low_padding=*/0, /*high_padding=*/0, + slice_size - spatial_split_size, input_dim_size)); VLOG(1) << "Batch merge done " << activations->ToString(); // Now, we rewrite the convolution with a larger batch. - // Set the batch and spatial dimensions for the new convolution. - new_dim_numbers.set_input_batch_dimension(spatial_dimension_to_split); - new_dim_numbers.set_input_spatial_dimensions(kChosenSpatialDim, - activations_batch_dim); - // We will generate output such that batch is followed by the split spatial // dimension. - std::vector transpose_dims(convolution->shape().rank()); + const int64 rank = convolution->shape().rank(); + std::vector transpose_dims(rank); int dim_count = 0; std::map dim_map; for (int j = 0; j < dim_numbers.output_spatial_dimensions_size(); ++j) { - if (j == kChosenSpatialDim) { + if (j == get_chosen_spatial_dim(convolution)) { dim_map[dim_numbers.output_batch_dimension()] = dim_count; new_dim_numbers.set_output_batch_dimension(dim_count++); } @@ -404,10 +1298,13 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { transpose_dims[p] = entry.second; p++; } - + VLOG(1) << "New dim numbers " << new_dim_numbers.DebugString() + << " batch dim " << new_dim_numbers.input_batch_dimension(); auto new_window = convolution->window(); - new_window.mutable_dimensions(kChosenSpatialDim)->set_padding_high(0); - new_window.mutable_dimensions(kChosenSpatialDim)->set_padding_low(0); + new_window.mutable_dimensions(get_chosen_spatial_dim(convolution)) + ->set_padding_high(0); + new_window.mutable_dimensions(get_chosen_spatial_dim(convolution)) + ->set_padding_low(0); TF_ASSIGN_OR_RETURN( HloInstruction * new_conv, MakeConvolveHlo(activations, /*rhs=*/convolution->mutable_operand(1), @@ -416,52 +1313,38 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { new_dim_numbers, convolution->precision_config())); convolution->SetupDerivedInstruction(new_conv); - VLOG(1) << "new_conv " << new_conv->ToString(); + VLOG(1) << "Space-to-batched convolution " << new_conv->ToString(); const int64 output_split_spatial_dim = - new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim); + new_dim_numbers.output_spatial_dimensions( + get_chosen_spatial_dim(convolution)); const int64 output_batch_dim = new_dim_numbers.output_batch_dimension(); + VLOG(1) << "output_batch_dim " << output_batch_dim + << " output_split_spatial_dim " << output_split_spatial_dim; - Shape new_shape = new_conv->shape(); - const int64 new_batch_size = new_shape.dimensions(output_batch_dim); - const int64 new_spatial_dim_size = - new_shape.dimensions(output_split_spatial_dim); + auto select_val = computation_->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(new_conv->shape().element_type()))); - CHECK_EQ(new_batch_size % old_batch_size, 0); - - const int64 output_split_batch_size = new_batch_size / old_batch_size; - - std::vector new_dimensions(new_conv->shape().dimensions().begin(), - new_conv->shape().dimensions().end()); - new_dimensions[output_split_spatial_dim] = - output_split_batch_size * new_spatial_dim_size; - new_dimensions[new_dim_numbers.output_batch_dimension()] = old_batch_size; - - // Reshape the output of the new conv into the old convolutions shape. - TF_ASSIGN_OR_RETURN(HloInstruction * reshape, - MakeReshapeHlo(new_dimensions, new_conv)); - convolution->SetupDerivedInstruction(reshape); - - std::vector start_indices(rank, 0), - end_indices(new_dimensions.begin(), new_dimensions.end()), - strides(rank, 1); - end_indices[output_split_spatial_dim] = convolution->shape().dimensions( - dim_numbers.output_spatial_dimensions(kChosenSpatialDim)); - - // This slicing is getting rid of the padding we added to evenly divide space. TF_ASSIGN_OR_RETURN( - HloInstruction * output_slice, - MakeSliceHlo(reshape, start_indices, end_indices, strides)); - convolution->SetupDerivedInstruction(output_slice); + new_conv, SelectValidPortion(new_conv, original_conv, select_val, + output_batch_dim, output_split_spatial_dim, + dim_numbers.output_batch_dimension(), + dim_numbers.output_spatial_dimensions( + get_chosen_spatial_dim(original_conv)))); + old_to_new_instrs_[original_conv] = new_conv; - TF_ASSIGN_OR_RETURN(HloInstruction * output_transpose, - MakeTransposeHlo(output_slice, transpose_dims)); - convolution->SetupDerivedInstruction(output_transpose); + instr_to_dim_map_[original_conv] = + std::make_pair(dim_numbers.output_batch_dimension(), + dim_numbers.output_spatial_dimensions( + get_chosen_spatial_dim(original_conv))); - VLOG(1) << "output_transpose " << output_transpose->ToString(); + instr_to_dim_permute_map_[new_conv] = std::vector(transpose_dims); + + TF_CHECK_OK(PropagateOnUsers(original_conv)); changed_ = true; - return computation_->ReplaceInstruction(convolution, output_transpose); + + return Status::OK(); } } // namespace @@ -470,10 +1353,13 @@ StatusOr ConvolutionSpaceToBatchConverter::Run(HloModule* module) { XLA_VLOG_LINES(2, "ConvolutionSpaceToBatchConverter::Run(), before:\n" + module->ToString()); bool changed = false; + for (auto* comp : module->MakeNonfusionComputations()) { - if (ConvolutionVisitor::Run(limit_on_batch_size_, comp)) { + ConvolutionVisitor visitor(limit_on_batch_size_, comp); + if (visitor.Run().ValueOrDie()) { changed = true; } + VLOG(1) << "Done operating on computation"; } XLA_VLOG_LINES(2, "ConvolutionSpaceToBatchConverter::Run(), after:\n" + module->ToString()); diff --git a/tensorflow/compiler/xla/service/space_to_batch_converter_test.cc b/tensorflow/compiler/xla/service/space_to_batch_converter_test.cc index bbc3882cde9..d53bb7d75f3 100644 --- a/tensorflow/compiler/xla/service/space_to_batch_converter_test.cc +++ b/tensorflow/compiler/xla/service/space_to_batch_converter_test.cc @@ -55,8 +55,9 @@ ENTRY computation { EXPECT_THAT(root->operand(0), op::Slice()); auto reshape = root->operand(0)->operand(0); EXPECT_THAT(reshape, op::Reshape()); - EXPECT_THAT(reshape->operand(0), op::Convolution()); + EXPECT_THAT(reshape->operand(0)->operand(1), op::Convolution()); const int64 batch_dim = reshape->operand(0) + ->operand(1) ->convolution_dimension_numbers() .output_batch_dimension(); // Verify that the transform has increased the batch size. @@ -77,30 +78,18 @@ TEST_F(ConvolutionSpaceToBatchConverterTest, SimpleBatch2) { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(hlo_string)); - ConvolutionSpaceToBatchConverter converter(/*limit_on_batch_size=*/2); - ASSERT_TRUE(converter.Run(module.get()).ValueOrDie()); - auto computation = module->entry_computation(); - HloInstruction* root = computation->root_instruction(); - EXPECT_THAT(root, op::Transpose()); - EXPECT_THAT(root->operand(0), op::Slice()); - auto reshape = root->operand(0)->operand(0); - EXPECT_THAT(reshape, op::Reshape()); - EXPECT_THAT(reshape->operand(0), op::Convolution()); - const int64 batch_dim = reshape->operand(0) - ->convolution_dimension_numbers() - .output_batch_dimension(); - // Verify that the transform has increased the batch size. - EXPECT_GT(reshape->operand(0)->shape().dimensions(batch_dim), 1); + ConvolutionSpaceToBatchConverter converter; + ASSERT_FALSE(converter.Run(module.get()).ValueOrDie()); } -TEST_F(ConvolutionSpaceToBatchConverterTest, Batch4WithStrideAndPad) { +TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithStrideAndPad) { string hlo_string = R"( HloModule module ENTRY computation { - %p0 = bf16[4,224,224,3]{3,2,1,0} parameter(0) + %p0 = bf16[1,224,224,3]{3,2,1,0} parameter(0) %p1 = bf16[7,7,3,64]{3,2,1,0} parameter(1) - ROOT %convolution.3 = bf16[4,112,112,64]{3,2,1,0} convolution(%p0, %p1), + ROOT %convolution.3 = bf16[1,112,112,64]{3,2,1,0} convolution(%p0, %p1), window={size=7x7 stride=2x2 pad=3_3x3_3}, dim_labels=b01f_01io->b01f } )"; @@ -115,8 +104,9 @@ TEST_F(ConvolutionSpaceToBatchConverterTest, Batch4WithStrideAndPad) { EXPECT_THAT(root->operand(0), op::Slice()); auto reshape = root->operand(0)->operand(0); EXPECT_THAT(reshape, op::Reshape()); - EXPECT_THAT(reshape->operand(0), op::Convolution()); + EXPECT_THAT(reshape->operand(0)->operand(1), op::Convolution()); const int64 batch_dim = reshape->operand(0) + ->operand(1) ->convolution_dimension_numbers() .output_batch_dimension(); From 5d3f9b173a84cf6f66da5d050ead977388843da4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 12:46:44 -0800 Subject: [PATCH 052/220] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 341465118 Change-Id: Icbaba2855dbbc7b5fcd9e209e70d1a323cd37efc --- tensorflow/go/op/wrappers.go | 215 +++++++++++++---------------------- 1 file changed, 76 insertions(+), 139 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index d65bd33f48f..f8c4149e1ab 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -14570,21 +14570,6 @@ func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_ return op.Output(0) } -// Generate a glob pattern matching all sharded file names. -func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ShardedFilespec", - Input: []tf.Input{ - basename, num_shards, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Saves the input tensors to disk. // // The size of `tensor_names` must match the number of tensors in `data`. `data[i]` @@ -22712,69 +22697,6 @@ func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { return op.Output(0) } -// RiscConvAttr is an optional argument to RiscConv. -type RiscConvAttr func(optionalAttr) - -// RiscConvDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func RiscConvDataFormat(value string) RiscConvAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// RiscConvDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func RiscConvDilations(value []int64) RiscConvAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 2-D convolution given 4-D `input` and `filter` tensors. -// -// Arguments: -// input: A 4-D tensor. The dimension order is interpreted according to the value -// of `data_format`, see below for details. -// filter: A 4-D tensor of shape -// `[filter_height, filter_width, in_channels, out_channels]` -// strides: 1-D tensor of length 4. The stride of the sliding window for each -// dimension of `input`. The dimension order is determined by the value of -// `data_format`, see below for details. -// -// Returns A 4-D tensor. The dimension order is determined by the value of -// `data_format`, see below for details. -func RiscConv(scope *Scope, input tf.Output, filter tf.Output, strides []int64, optional ...RiscConvAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RiscConv", - Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes hyperbolic tangent of `x` element-wise. // // Given an input tensor, this function computes hyperbolic tangent of every @@ -29820,67 +29742,6 @@ func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf return op.Output(0) } -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) - -// RandomShuffleSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomShuffleSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Randomly shuffles a tensor along its first dimension. -// -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: -// -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` -// -// Arguments: -// value: The tensor to be shuffled. -// -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomShuffle", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a dataset that takes a Bernoulli sample of the contents of another dataset. // // There is no transformation in the `tf.data` Python API for creating this dataset. @@ -36205,6 +36066,21 @@ func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes [ return op.Output(0), op.Output(1), op.Output(2) } +// Generate a glob pattern matching all sharded file names. +func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShardedFilespec", + Input: []tf.Input{ + basename, num_shards, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Writes a scalar summary. // // Writes scalar `value` at `step` with `tag` using summary `writer`. @@ -37261,6 +37137,67 @@ func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf return op.Output(0), op.Output(1), op.Output(2) } +// RandomShuffleAttr is an optional argument to RandomShuffle. +type RandomShuffleAttr func(optionalAttr) + +// RandomShuffleSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomShuffleSeed(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomShuffleSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleSeed2(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Randomly shuffles a tensor along its first dimension. +// +// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped +// to one and only one `output[i]`. For example, a mapping that might occur for a +// 3x2 tensor is: +// +// ``` +// [[1, 2], [[5, 6], +// [3, 4], ==> [1, 2], +// [5, 6]] [3, 4]] +// ``` +// +// Arguments: +// value: The tensor to be shuffled. +// +// Returns A tensor of same shape and type as `value`, shuffled along its first +// dimension. +func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomShuffle", + Input: []tf.Input{ + value, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Selects elements from `x` or `y`, depending on `condition`. // // The `x`, and `y` tensors must all have the same shape, and the From 6ef526e75814688a008586a95846214b6ffd931b Mon Sep 17 00:00:00 2001 From: Marcello Maggioni Date: Mon, 9 Nov 2020 12:52:38 -0800 Subject: [PATCH 053/220] [XLA] Stop the bf16 conversion folding from converting unused tuple outputs. The BF16 conversion folder thinks that and unused tuple output is a candidate for conversion folding (even if its not used by any convert). Stop it from doing that. Also constrain_layout() AllReduce shouldn't be optimized by bf16 conversion folding. Also add some extra AllReduce test cases. PiperOrigin-RevId: 341466256 Change-Id: I3f3cf2fb2fb7bb6c301af4e50171f36ea9ddb56e --- .../service/bfloat16_conversion_folding.cc | 8 +- .../compiler/xla/tests/all_reduce_test.cc | 83 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc index 4303b0f5763..29ad2943f2a 100644 --- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc @@ -188,8 +188,8 @@ Status BFloat16ConversionFoldingVisitor::DefaultAction(HloInstruction* hlo) { } Status BFloat16ConversionFoldingVisitor::HandleAllReduce(HloInstruction* crs) { - if (crs->IsCrossModuleAllReduce()) { - // Cross-module all-reduce has side effect. + if (crs->HasSideEffectNoRecurse()) { + // Do not perform optimization on side-effected AllReduce. return Status::OK(); } // First use DefaultAction() to handle the operands. It can't handle @@ -226,6 +226,10 @@ Status BFloat16ConversionFoldingVisitor::HandleAllReduce(HloInstruction* crs) { // Fold conversions only when all the get-tuple-elements' users are // conversions from F32 to BF16. auto all_gte_users_are_bf16_convert = [&per_tuple_element_gtes, i]() { + // If no uses then return false. (As no uses are bf16 converts). + if (per_tuple_element_gtes[i].empty()) { + return false; + } for (auto gte : per_tuple_element_gtes[i]) { if (!AllUsersAreF32ToBF16Converts(gte)) { return false; diff --git a/tensorflow/compiler/xla/tests/all_reduce_test.cc b/tensorflow/compiler/xla/tests/all_reduce_test.cc index 33a8db8de32..9f1f0030860 100644 --- a/tensorflow/compiler/xla/tests/all_reduce_test.cc +++ b/tensorflow/compiler/xla/tests/all_reduce_test.cc @@ -100,5 +100,88 @@ XLA_TEST_F(TrivialAllReduceTest, ConstantOperand) { ExecuteAndTransfer(std::move(module), {&literal0})); } +XLA_TEST_F(TrivialAllReduceTest, AllReduceU8) { + const char* module_str = R"( +HloModule test + +%AddComputation.15 { + %x.16 = u8[] parameter(0) + %y.17 = u8[] parameter(1) + ROOT %add.18 = u8[] add(u8[] %x.16, u8[] %y.17) +} + +ENTRY %test_computation { + %constant.4 = u8[] constant(0), metadata={op_type="prim::Constant" source_file="main@test_all_reduce_int.py" source_line=17} + %reshape.5 = u8[1]{0} reshape(u8[] %constant.4), metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %broadcast.6 = u8[1]{0} broadcast(u8[1]{0} %reshape.5), dimensions={0}, metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %reshape.7 = u8[] reshape(u8[1]{0} %broadcast.6), metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %broadcast.8 = u8[8]{0} broadcast(u8[] %reshape.7), dimensions={}, metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %constant.2 = u8[] constant(1), metadata={op_type="prim::Constant" source_file="main@test_all_reduce_int.py" source_line=18} + %reshape.3 = u8[1]{0} reshape(u8[] %constant.2), metadata={op_type="aten::view" source_file="__format__@tensor.py" source_line=563} + %constant.9 = s64[] constant(0), metadata={op_type="xla::update_slice" source_file="__format__@tensor.py" source_line=563} + %dynamic-update-slice.10 = u8[8]{0} dynamic-update-slice(u8[8]{0} %broadcast.8, u8[1]{0} %reshape.3, s64[] %constant.9), metadata={op_type="xla::update_slice" source_file="__format__@tensor.py" source_line=563} + %p0.1 = f32[] parameter(0), metadata={op_type="xla::device_data" source_file="_get_all_reduce_token@xla_model.py" source_line=463} + %convert.11 = u8[] convert(f32[] %p0.1), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %tuple.12 = (u8[8]{0}, u8[]) tuple(u8[8]{0} %dynamic-update-slice.10, u8[] %convert.11), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.13 = u8[8]{0} get-tuple-element((u8[8]{0}, u8[]) %tuple.12), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.14 = u8[] get-tuple-element((u8[8]{0}, u8[]) %tuple.12), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %all-reduce.19 = (u8[8]{0}, u8[]) all-reduce(u8[8]{0} %get-tuple-element.13, u8[] %get-tuple-element.14), replica_groups={}, constrain_layout=true, to_apply=%AddComputation.15, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.21 = u8[] get-tuple-element((u8[8]{0}, u8[]) %all-reduce.19), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %convert.22 = f32[] convert(u8[] %get-tuple-element.21), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.20 = u8[8]{0} get-tuple-element((u8[8]{0}, u8[]) %all-reduce.19), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + ROOT %tuple.23 = (u8[8]{0}) tuple(u8[8]{0} %get-tuple-element.20) +})"; + + auto module = + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()) + .ValueOrDie(); + auto literal_in = LiteralUtil::CreateR0(0); + auto literal0 = LiteralUtil::CreateR1({1, 0, 0, 0, 0, 0, 0, 0}); + EXPECT_EQ(LiteralUtil::MakeTuple({&literal0}), + ExecuteAndTransfer(std::move(module), {&literal_in})); +} + +XLA_TEST_F(TrivialAllReduceTest, AllReduceS32) { + const char* module_str = R"( + +HloModule test + +%AddComputation.15 { + %x.16 = s32[] parameter(0) + %y.17 = s32[] parameter(1) + ROOT %add.18 = s32[] add(s32[] %x.16, s32[] %y.17) +} + +ENTRY %test_computation { + %constant.4 = s32[] constant(0), metadata={op_type="prim::Constant" source_file="main@test_all_reduce_int.py" source_line=17} + %reshape.5 = s32[1]{0} reshape(s32[] %constant.4), metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %broadcast.6 = s32[1]{0} broadcast(s32[1]{0} %reshape.5), dimensions={0}, metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %reshape.7 = s32[] reshape(s32[1]{0} %broadcast.6), metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %broadcast.8 = s32[8]{0} broadcast(s32[] %reshape.7), dimensions={}, metadata={op_type="aten::expand" source_file="main@test_all_reduce_int.py" source_line=17} + %constant.2 = s32[] constant(1), metadata={op_type="prim::Constant" source_file="main@test_all_reduce_int.py" source_line=18} + %reshape.3 = s32[1]{0} reshape(s32[] %constant.2), metadata={op_type="aten::view" source_file="__format__@tensor.py" source_line=563} + %constant.9 = s64[] constant(0), metadata={op_type="xla::update_slice" source_file="__format__@tensor.py" source_line=563} + %dynamic-update-slice.10 = s32[8]{0} dynamic-update-slice(s32[8]{0} %broadcast.8, s32[1]{0} %reshape.3, s64[] %constant.9), metadata={op_type="xla::update_slice" source_file="__format__@tensor.py" source_line=563} + %p0.1 = f32[] parameter(0), metadata={op_type="xla::device_data" source_file="_get_all_reduce_token@xla_model.py" source_line=463} + %convert.11 = s32[] convert(f32[] %p0.1), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %tuple.12 = (s32[8]{0}, s32[]) tuple(s32[8]{0} %dynamic-update-slice.10, s32[] %convert.11), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.13 = s32[8]{0} get-tuple-element((s32[8]{0}, s32[]) %tuple.12), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.14 = s32[] get-tuple-element((s32[8]{0}, s32[]) %tuple.12), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %all-reduce.19 = (s32[8]{0}, s32[]) all-reduce(s32[8]{0} %get-tuple-element.13, s32[] %get-tuple-element.14), replica_groups={}, constrain_layout=true, to_apply=%AddComputation.15, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.21 = s32[] get-tuple-element((s32[8]{0}, s32[]) %all-reduce.19), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %convert.22 = f32[] convert(s32[] %get-tuple-element.21), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + %get-tuple-element.20 = s32[8]{0} get-tuple-element((s32[8]{0}, s32[]) %all-reduce.19), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" source_line=560} + ROOT %tuple.23 = (s32[8]{0}) tuple(s32[8]{0} %get-tuple-element.20) +})"; + + auto module = + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()) + .ValueOrDie(); + auto literal_in = LiteralUtil::CreateR0(0); + auto literal0 = LiteralUtil::CreateR1({1, 0, 0, 0, 0, 0, 0, 0}); + EXPECT_EQ(LiteralUtil::MakeTuple({&literal0}), + ExecuteAndTransfer(std::move(module), {&literal_in})); +} + } // namespace } // namespace xla From 4cf039a9877a5e7a73cf3bc189af0bd03d2cb422 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 9 Nov 2020 12:58:18 -0800 Subject: [PATCH 054/220] Update Cython to 0.29.21. This appears to be necessary for Python 3.9. PiperOrigin-RevId: 341467392 Change-Id: I729ee9fd6e6fc397788c4b865699908c11859108 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 614b39cf91f..8f1c14d947b 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -895,12 +895,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): name = "cython", build_file = clean_dep("//third_party:cython.BUILD"), delete = ["BUILD.bazel"], - sha256 = "bccc9aa050ea02595b2440188813b936eaf345e85fb9692790cecfe095cf91aa", - strip_prefix = "cython-0.28.4", + sha256 = "e2e38e1f0572ca54d6085df3dec8b607d20e81515fb80215aed19c81e8fe2079", + strip_prefix = "cython-0.29.21", system_build_file = clean_dep("//third_party/systemlibs:cython.BUILD"), urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/cython/cython/archive/0.28.4.tar.gz", - "https://github.com/cython/cython/archive/0.28.4.tar.gz", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/cython/cython/archive/0.29.21.tar.gz", + "https://github.com/cython/cython/archive/0.29.21.tar.gz", ], ) From 215aab70e4716f0d691d29fa81a1251764eb56da Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 9 Nov 2020 13:32:49 -0800 Subject: [PATCH 055/220] Remove usage of default_graph_seed internal TF symbol from Keras. PiperOrigin-RevId: 341474366 Change-Id: I221cc471ddcc086981c8597f8b49cd94f55330f6 --- tensorflow/python/keras/layers/gru_v2_test.py | 3 +-- tensorflow/python/keras/layers/lstm_v2_test.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/layers/gru_v2_test.py b/tensorflow/python/keras/layers/gru_v2_test.py index db2b0a2e7b9..0422ce1a5f5 100644 --- a/tensorflow/python/keras/layers/gru_v2_test.py +++ b/tensorflow/python/keras/layers/gru_v2_test.py @@ -34,7 +34,6 @@ from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils @@ -165,7 +164,7 @@ class GRUV2Test(keras_parameterized.TestCase): test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size, - random_seed=random_seed.DEFAULT_GRAPH_SEED) + random_seed=87654321) y_train = np_utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. diff --git a/tensorflow/python/keras/layers/lstm_v2_test.py b/tensorflow/python/keras/layers/lstm_v2_test.py index c6cb9208357..4c2bbad2bba 100644 --- a/tensorflow/python/keras/layers/lstm_v2_test.py +++ b/tensorflow/python/keras/layers/lstm_v2_test.py @@ -35,7 +35,6 @@ from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils from tensorflow.python.keras.layers import recurrent as rnn_v1 @@ -339,7 +338,7 @@ class LSTMV2Test(keras_parameterized.TestCase): test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size, - random_seed=random_seed.DEFAULT_GRAPH_SEED) + random_seed=87654321) y_train = np_utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. From 218bd071c8cf18642da93e2848421bc1a130f403 Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Mon, 9 Nov 2020 13:43:35 -0800 Subject: [PATCH 056/220] Add Assign op to TF ODS. PiperOrigin-RevId: 341476721 Change-Id: Idae1996f08f29015912e457273acd0d8091895fc --- .../mlir/tensorflow/ir/tf_generated_ops.td | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index c20431f39b0..4d1af95c395 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -604,6 +604,29 @@ If `condition` evaluates to false, print the list of tensors in `data`. let hasCanonicalizer = 1; } +def TF_AssignOp : TF_Op<"Assign", [NoSideEffect]> { + let summary = "Update 'ref' by assigning 'value' to it."; + + let description = [{ +This operation outputs "ref" after the assignment is done. +This makes it easier to chain operations that need to use the reset value. + }]; + + let arguments = (ins + TF_Tensor:$ref, + TF_Tensor:$value, + + DefaultValuedAttr:$validate_shape, + DefaultValuedAttr:$use_locking + ); + + let results = (outs + TF_Tensor:$output_ref + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_AssignAddVariableOp : TF_Op<"AssignAddVariableOp", []> { let summary = "Adds a value to the current value of a variable."; From 4b1bec735b535aefbc2931474041ddb0f9816387 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 9 Nov 2020 13:50:57 -0800 Subject: [PATCH 057/220] Added GpuSpatialTensor interface. Using GpuSpatialTensor in GPUOperation, instead of cl::Tensor. cl::Tensor implements GPUSpatialTensor. PiperOrigin-RevId: 341478259 Change-Id: Id5ddc2435061df669204d697a2b2c85f1465fe95 --- tensorflow/lite/delegates/gpu/cl/BUILD | 2 + .../lite/delegates/gpu/cl/inference_context.h | 1 + .../lite/delegates/gpu/cl/kernels/BUILD | 1 + .../delegates/gpu/cl/kernels/gpu_operation.cc | 19 +++++++-- .../delegates/gpu/cl/kernels/gpu_operation.h | 10 ++--- tensorflow/lite/delegates/gpu/cl/tensor.h | 17 ++++---- .../lite/delegates/gpu/common/task/BUILD | 5 +++ .../delegates/gpu/common/task/gpu_tensor.h | 42 +++++++++++++++++++ 8 files changed, 80 insertions(+), 17 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 4c402ca8ddf..96a28e3d484 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -386,6 +386,7 @@ cc_library( ":opencl_wrapper", ":serialization_cc_fbs", ":storage_type_util", + ":tensor", "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", "//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector", "//tensorflow/lite/delegates/gpu/cl/selectors:special_selector", @@ -518,6 +519,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:tensor", "//tensorflow/lite/delegates/gpu/common:types", + "//tensorflow/lite/delegates/gpu/common/task:gpu_tensor", "//tensorflow/lite/delegates/gpu/common/task:tensor_desc", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h index 0ea69f70548..b427462614b 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.h +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/model_hints.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/precision.h" #include "tensorflow/lite/delegates/gpu/common/status.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index dc02045195a..7c9d2755962 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -641,6 +641,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common/task:arguments", + "//tensorflow/lite/delegates/gpu/common/task:gpu_tensor", "//tensorflow/lite/delegates/gpu/common/task:tensor_desc", "@com_google_absl//absl/strings", ], diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 9d6bc59f716..eb53840f841 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -18,6 +18,7 @@ limitations under the License. #include "absl/strings/substitute.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" #include "tensorflow/lite/delegates/gpu/common/access_type.h" namespace tflite { @@ -106,14 +107,14 @@ bool OperationDef::IsBatchSupported() const { GPUOperation::GPUOperation(const OperationDef& definition) : definition_(definition) {} -void GPUOperation::SetSrc(Tensor* ptr, int index) { +void GPUOperation::SetSrc(GpuSpatialTensor* ptr, int index) { if (index >= src_.size()) { src_.resize(index + 1, nullptr); } src_[index] = ptr; } -void GPUOperation::SetDst(Tensor* ptr, int index) { +void GPUOperation::SetDst(GpuSpatialTensor* ptr, int index) { if (index >= dst_.size()) { dst_.resize(index + 1, nullptr); } @@ -213,10 +214,20 @@ void GPUOperation::AddDstTensor(const std::string& tensor_name, absl::Status GPUOperation::UpdateParams() { for (int i = 0; i < src_tensors_names_.size(); ++i) { - RETURN_IF_ERROR(cl_args_.SetObjectRef(src_tensors_names_[i], src_[i])); + const auto* cl_spatial_tensor = dynamic_cast(src_[i]); + if (!cl_spatial_tensor) { + return absl::InvalidArgumentError("Expected CLSpatialTensor."); + } + RETURN_IF_ERROR( + cl_args_.SetObjectRef(src_tensors_names_[i], cl_spatial_tensor)); } for (int i = 0; i < dst_tensors_names_.size(); ++i) { - RETURN_IF_ERROR(cl_args_.SetObjectRef(dst_tensors_names_[i], dst_[i])); + const auto* cl_spatial_tensor = dynamic_cast(dst_[i]); + if (!cl_spatial_tensor) { + return absl::InvalidArgumentError("Expected CLSpatialTensor."); + } + RETURN_IF_ERROR( + cl_args_.SetObjectRef(dst_tensors_names_[i], cl_spatial_tensor)); } RETURN_IF_ERROR(BindArguments(&cl_args_)); grid_size_ = GetGridSize(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index b3f3da01598..49188469c92 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -30,11 +30,11 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" #include "tensorflow/lite/delegates/gpu/cl/program_cache.h" #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" -#include "tensorflow/lite/delegates/gpu/cl/tensor.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/precision.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/task/arguments.h" +#include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h" #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" #include "tensorflow/lite/delegates/gpu/common/types.h" @@ -114,8 +114,8 @@ class GPUOperation { absl::Status AddOperation(GPUOperation* operation); - void SetSrc(Tensor* ptr, int index = 0); - void SetDst(Tensor* ptr, int index = 0); + void SetSrc(GpuSpatialTensor* ptr, int index = 0); + void SetDst(GpuSpatialTensor* ptr, int index = 0); // should be called after changes of inputs/outputs. absl::Status UpdateParams(); @@ -186,8 +186,8 @@ class GPUOperation { // Defines operation calculation precision and format of src/dst tensors. OperationDef definition_; - std::vector src_; - std::vector dst_; + std::vector src_; + std::vector dst_; int grid_dimension_ = 3; // can be 1, 2 or 3 int3 work_group_launch_order_ = int3(0, 1, 2); int3 grid_size_ = int3(0, 0, 0); diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.h b/tensorflow/lite/delegates/gpu/cl/tensor.h index f97b3ba2d66..331adf03296 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.h +++ b/tensorflow/lite/delegates/gpu/cl/tensor.h @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h" #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" #include "tensorflow/lite/delegates/gpu/common/tensor.h" #include "tensorflow/lite/delegates/gpu/common/types.h" @@ -37,7 +38,7 @@ namespace tflite { namespace gpu { namespace cl { -class Tensor : public GPUObject { +class Tensor : public GPUObject, public GpuSpatialTensor { public: Tensor() : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {} @@ -56,17 +57,17 @@ class Tensor : public GPUObject { Tensor(const Tensor&) = delete; Tensor& operator=(const Tensor&) = delete; - virtual ~Tensor() { Release(); } + ~Tensor() override { Release(); } absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr, GPUResourcesWithValue* resources) const override; - int Width() const { return shape_.w; } - int Height() const { return shape_.h; } - int Depth() const { return shape_.d; } - int Channels() const { return shape_.c; } - int Slices() const { return DivideRoundUp(shape_.c, 4); } - int Batch() const { return shape_.b; } + int Width() const override { return shape_.w; } + int Height() const override { return shape_.h; } + int Depth() const override { return shape_.d; } + int Channels() const override { return shape_.c; } + int Slices() const override { return DivideRoundUp(shape_.c, 4); } + int Batch() const override { return shape_.b; } TensorDescriptor GetDescriptor() const { return descriptor_; } DataType GetDataType() const { return descriptor_.data_type; } diff --git a/tensorflow/lite/delegates/gpu/common/task/BUILD b/tensorflow/lite/delegates/gpu/common/task/BUILD index 7cc8cf0966b..ac69fe8b944 100644 --- a/tensorflow/lite/delegates/gpu/common/task/BUILD +++ b/tensorflow/lite/delegates/gpu/common/task/BUILD @@ -45,6 +45,11 @@ cc_library( ], ) +cc_library( + name = "gpu_tensor", + hdrs = ["gpu_tensor.h"], +) + flatbuffer_cc_library( name = "serialization_base_cc_fbs", srcs = ["serialization_base.fbs"], diff --git a/tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h b/tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h new file mode 100644 index 00000000000..8a6120339c2 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_TENSOR_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_TENSOR_H_ + +namespace tflite { +namespace gpu { + +// Interface for GpuSpatialTensor. +// Spatial means that it has Width/Height/Depth dimensions(or their combination) +// and Channels dimension +// Batch dimension optional +class GpuSpatialTensor { + public: + GpuSpatialTensor() = default; + virtual ~GpuSpatialTensor() = default; + + virtual int Width() const = 0; + virtual int Height() const = 0; + virtual int Depth() const = 0; + virtual int Channels() const = 0; + virtual int Slices() const = 0; + virtual int Batch() const = 0; +}; + +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_TENSOR_H_ From eabaf8facfbebb1c07f7585bd5c5f7bd299b41b0 Mon Sep 17 00:00:00 2001 From: Abdurrahman Akkas Date: Mon, 9 Nov 2020 13:55:24 -0800 Subject: [PATCH 058/220] Remove stale TODO in HLO legalizer. PiperOrigin-RevId: 341479116 Change-Id: I46be654a6331e84b98bdaa288c6ef946e21a4912 --- .../compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td index d5c13242e0e..bbc662b5b86 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td +++ b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td @@ -49,7 +49,6 @@ def : Pat<(HLO_ConstOp $value), (TF_ConstOp $value)>; // context, getting to these ops may require some raising. //===----------------------------------------------------------------------===// -// TODO(b/158025719): Properly handle broadcast_dimensions. foreach fromToBinPair = [[HLO_AddOp, HLOClient_BroadcastAddOp, TF_AddV2Op], [HLO_DivOp, HLOClient_BroadcastDivOp, TF_DivOp], [HLO_ShiftLeftOp, HLOClient_BroadcastShiftLeftOp, TF_LeftShiftOp], From c8b67259c9f48895ec3dad4a1850201ba7b76bdb Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 9 Nov 2020 14:02:20 -0800 Subject: [PATCH 059/220] [KERNEL_GEN] Update macro name in unary kernels. PiperOrigin-RevId: 341480697 Change-Id: I23210f9c89224715c741a6b0727096c7d57a9271 --- .../core/kernels/mlir_generated/unranked_op_gpu_ceil.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_cos.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_exp.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_floor.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_log.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc | 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc index 3d0bc5928e5..d1c51860355 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Ceil, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Ceil, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Ceil, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Ceil, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Ceil, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Ceil, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc index b21f8351608..fdac05fd4ce 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Cos, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Cos, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Cos, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Cos, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Cos, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Cos, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc index 70f33066819..4288bba7eea 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Exp, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Exp, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Exp, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Exp, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Exp, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Exp, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc index 80b05d0e1e2..40f2682db4e 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Floor, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Floor, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Floor, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Floor, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Floor, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Floor, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc index 5314e4afe2b..8ade178691d 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Log, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Log, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Log, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Log, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Log, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Log, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc index 448b2427ad1..6e8974bd64a 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Rsqrt, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Rsqrt, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Rsqrt, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Rsqrt, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Rsqrt, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Rsqrt, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc index 98f1342a9a8..97f9825d173 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_KERNEL(Sqrt, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_KERNEL(Sqrt, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_KERNEL(Sqrt, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Sqrt, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Sqrt, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Sqrt, f64, DT_DOUBLE, double); } // namespace tensorflow From 68c24b5672eede61abbb9e8b68aad8924f2354da Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 9 Nov 2020 14:14:13 -0800 Subject: [PATCH 060/220] Replace the usages of `sparse_tensor_dense_mat_mul ` with its TF public api. PiperOrigin-RevId: 341483364 Change-Id: I916944fbe69546f052d5aa6cbacffe48fb9cf524 --- tensorflow/python/keras/layers/local.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/layers/local.py b/tensorflow/python/keras/layers/local.py index 88c8fede08c..b0d287e302e 100644 --- a/tensorflow/python/keras/layers/local.py +++ b/tensorflow/python/keras/layers/local.py @@ -30,8 +30,8 @@ from tensorflow.python.keras.engine.input_spec import InputSpec from tensorflow.python.keras.utils import conv_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import sparse_ops from tensorflow.python.util.tf_export import keras_export @@ -810,8 +810,9 @@ def local_conv_sparse_matmul(inputs, kernel, kernel_idxs, kernel_shape, Output (N+2)-D dense tensor with shape `output_shape`. """ inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1)) - output_flat = sparse_ops.sparse_tensor_dense_mat_mul( - kernel_idxs, kernel, kernel_shape, inputs_flat, adjoint_b=True) + output_flat = gen_sparse_ops.SparseTensorDenseMatMul( + a_indices=kernel_idxs, a_values=kernel, a_shape=kernel_shape, + b=inputs_flat, adjoint_b=True) output_flat_transpose = K.transpose(output_flat) output_reshaped = K.reshape( From 25c77fc422a3825e2c6e207266f8cc6f9bb10806 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 9 Nov 2020 14:18:27 -0800 Subject: [PATCH 061/220] ReduceAttributes extended to support set of axis for reduction. PiperOrigin-RevId: 341484205 Change-Id: I40e09302756a12592e17dbd6ff0c3257f1e22dae --- .../lite/delegates/gpu/cl/selectors/operation_selector.cc | 2 +- tensorflow/lite/delegates/gpu/common/model_builder.cc | 4 +++- tensorflow/lite/delegates/gpu/common/operations.h | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 6ab977753fc..ed63288b8da 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -465,7 +465,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, case OperationType::REDUCE_PRODUCT: case OperationType::REDUCE_SUM: { auto attr = absl::any_cast(node.operation.attributes); - if (attr.axis != Axis::CHANNELS) { + if (attr.dims != std::set({Axis::CHANNELS})) { return absl::UnimplementedError( "Currently we can reduce only in channels dimension."); } diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index c200f0926aa..5371ddc9268 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -1530,7 +1530,9 @@ class ReduceOperationParser : public TFLiteOperationParser { RETURN_IF_ERROR(reader->ReadTensor(1, &axes)); const TfLiteTensor* input = reader->GetInputTensor(0); ReduceAttributes attr; - RETURN_IF_ERROR(ExtractAxisFromIndex(*input, axes.data[0], &attr.axis)); + Axis axis; + RETURN_IF_ERROR(ExtractAxisFromIndex(*input, axes.data[0], &axis)); + attr.dims = {axis}; node->operation.attributes = attr; return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index a93f63a02b7..d4af64a2d14 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -372,7 +372,7 @@ struct PReLUAttributes { }; struct ReduceAttributes { - Axis axis = Axis::UNKNOWN; + std::set dims; }; struct SoftmaxAttributes { From 2386de254c38e88622b2ebdf7765f22c7cc87315 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 14:28:46 -0800 Subject: [PATCH 062/220] Better handle convolution propagation cases by reducing reshape requirements PiperOrigin-RevId: 341486263 Change-Id: Ib1fa76053844af768ba1584ed4a47deee6fd12d7 --- .../xla/service/space_to_batch_converter.cc | 86 ++++++++++++++++--- 1 file changed, 74 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/xla/service/space_to_batch_converter.cc b/tensorflow/compiler/xla/service/space_to_batch_converter.cc index 5a05378a07f..74cbfdd12a7 100644 --- a/tensorflow/compiler/xla/service/space_to_batch_converter.cc +++ b/tensorflow/compiler/xla/service/space_to_batch_converter.cc @@ -415,21 +415,68 @@ bool IsTrivialElementwise(HloInstruction* hlo) { bool ConvolutionVisitor::CanPropagate(HloInstruction* consumer, HloInstruction* producer) { - for (int64 i = 0; i < consumer->operand_count(); ++i) { - auto old_producer = consumer->mutable_operand(i); - if (IsTrivialElementwise(consumer)) { - if (old_to_new_instrs_.count(old_producer) <= 0 && - old_producer->opcode() != HloOpcode::kConstant && - !(old_producer->opcode() == HloOpcode::kBroadcast && - IsBroadcastPropagatable(old_producer, producer))) { + if (IsTrivialElementwise(consumer)) { + VLOG(2) << "Doing propagation check on elementwise op: " + << consumer->ToString(); + + HloInstruction* pivot_operand = nullptr; + for (int64 i = 0; i < consumer->operand_count(); ++i) { + auto old_producer = consumer->mutable_operand(i); + const bool broadcast_or_constant = + (old_producer->opcode() == HloOpcode::kConstant) || + (old_producer->opcode() == HloOpcode::kBroadcast && + IsBroadcastPropagatable(old_producer, producer)); + + if (!old_to_new_instrs_.contains(old_producer) && + !broadcast_or_constant) { VLOG(1) << "Cannot propagate on elementwise op " << consumer->ToString(); return false; + } else { + if (broadcast_or_constant) { + VLOG(2) << "Skipping on " << old_producer->ToString(); + continue; + } + + CHECK(old_to_new_instrs_.contains(old_producer)); + + CHECK(instr_to_dim_map_.contains(old_producer)); + if (pivot_operand == nullptr) { + pivot_operand = old_producer; + VLOG(2) << "Elementwise op: pivot " << old_producer->ToString(); + } else { + VLOG(2) << "Elementwise op: checking for shape equivalence " + << consumer->ToString(); + if (instr_to_dim_map_[pivot_operand] != + instr_to_dim_map_[old_producer]) { + return false; + } + auto pivot_new_instr = old_to_new_instrs_[pivot_operand]; + auto pivot_permute_dims = instr_to_dim_permute_map_[pivot_new_instr]; + auto new_instr = old_to_new_instrs_[old_producer]; + auto permute_dims = instr_to_dim_permute_map_[new_instr]; + for (int j = 0; j < pivot_permute_dims.size(); ++j) { + // Ensure the dimension mapping is the same. + if (pivot_permute_dims[j] != permute_dims[j]) { + return false; + } + + // Make sure all other dimensions are of the same size. + if (pivot_new_instr->shape().dimensions(j) != + new_instr->shape().dimensions(j)) { + return false; + } + } + } } } - if (consumer->opcode() == HloOpcode::kConvolution || - consumer->opcode() == HloOpcode::kReduceWindow || - consumer->opcode() == HloOpcode::kReduce) { + } + + if (consumer->opcode() == HloOpcode::kConvolution || + consumer->opcode() == HloOpcode::kReduceWindow || + consumer->opcode() == HloOpcode::kReduce) { + for (int64 i = 0; i < consumer->operand_count(); ++i) { + auto old_producer = consumer->mutable_operand(i); if (i == 0 && !old_to_new_instrs_.contains(old_producer)) { return false; } @@ -1007,7 +1054,7 @@ Status ConvolutionVisitor::PropagateOnConv(HloInstruction* convolution) { spatial_split_size += stride; } - const int64 slice_size = + int64 slice_size = spatial_split_size + std::max(kernel_spatial_dim_size - stride, static_cast(0)); @@ -1020,7 +1067,7 @@ Status ConvolutionVisitor::PropagateOnConv(HloInstruction* convolution) { activations_new->shape().dimensions(spatial_dimension_to_split); // In the below case, we cannot use the activations directly for Halo // Duplication. We must reshape them. - if (new_space_size != spatial_split_size) { + if (spatial_split_size > new_space_size) { std::vector new_dimensions( activations_new->shape().dimensions().begin(), activations_new->shape().dimensions().end()); @@ -1068,6 +1115,21 @@ Status ConvolutionVisitor::PropagateOnConv(HloInstruction* convolution) { slice_size - spatial_split_size, old_split_dim_size)); } else { + // If the ideal spatial_split_size was smaller than the incoming spatial + // dimension size, we don't need reshaping. Instead, we determine the + // additional space available, and adjust the required slice size (and + // thereby the halo size).'t need reshaping. Instead, we determine the + // additional space available, and adjust the required slice size (and + // thereby the halo size). + if (spatial_split_size < new_space_size) { + const int64 additional_space_present = spatial_split_size % stride; + spatial_split_size = new_space_size; + slice_size = + spatial_split_size + + std::max(kernel_spatial_dim_size - stride - additional_space_present, + static_cast(0)); + } + TF_ASSIGN_OR_RETURN( activations_new, HaloDuplicateWithSlice(activations_new, spatial_dimension_to_split, From 970817f42eff82200459419fb0a743325661da23 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 14:46:38 -0800 Subject: [PATCH 063/220] Internal change PiperOrigin-RevId: 341489804 Change-Id: Ic9f483469436267afdd9ec2a05d291621c02a76d --- .../api_def/base_api/api_def_RiscConv.pbtxt | 54 +++++++++++++++++++ .../core/kernels/risc/experimental/BUILD | 11 ++++ .../kernels/risc/experimental/risc_conv_op.cc | 50 +++++++++++++++++ tensorflow/core/ops/risc_ops.cc | 11 ++++ tensorflow/python/ops/risc/risc_grad.py | 7 +++ tensorflow/python/ops/risc/risc_ops.py | 17 +++++- 6 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt create mode 100644 tensorflow/core/kernels/risc/experimental/risc_conv_op.cc diff --git a/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt b/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt new file mode 100644 index 00000000000..a78ee1d2b89 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RiscConv.pbtxt @@ -0,0 +1,54 @@ +op { + graph_op_name: "RiscConv" + visibility: HIDDEN + in_arg { + name: "input" + description: < 1, there will be k-1 skipped cells between each +filter element on that dimension. The dimension order is determined by the +value of `data_format`, see above for details. Dilations in the batch and +depth dimensions must be 1. +END + } + summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors." +} diff --git a/tensorflow/core/kernels/risc/experimental/BUILD b/tensorflow/core/kernels/risc/experimental/BUILD index a16c0b66271..d0e94be3120 100644 --- a/tensorflow/core/kernels/risc/experimental/BUILD +++ b/tensorflow/core/kernels/risc/experimental/BUILD @@ -17,9 +17,20 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "risc_conv_op", + srcs = ["risc_conv_op.cc"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + tf_kernel_library( name = "experimental", deps = [ ":risc_add_op", + ":risc_conv_op", ], ) diff --git a/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc b/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc new file mode 100644 index 00000000000..58c5ee98eae --- /dev/null +++ b/tensorflow/core/kernels/risc/experimental/risc_conv_op.cc @@ -0,0 +1,50 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +namespace risc { +namespace experimental { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class RiscConvOp : public OpKernel { + public: + explicit RiscConvOp(OpKernelConstruction* context) : OpKernel(context) { + // TODO(b/171294012): Implement RiscConv op. + } + + void Compute(OpKernelContext* context) override { + // TODO(b/171294012): Implement RiscConv op. + } +}; + +#define REGISTER_CPU(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("RiscConv").Device(DEVICE_CPU).TypeConstraint("T"), \ + RiscConvOp); + +REGISTER_CPU(float); +REGISTER_CPU(double); + +} // namespace experimental +} // namespace risc +} // namespace tensorflow diff --git a/tensorflow/core/ops/risc_ops.cc b/tensorflow/core/ops/risc_ops.cc index 1d90a645965..a5b1e37fa84 100644 --- a/tensorflow/core/ops/risc_ops.cc +++ b/tensorflow/core/ops/risc_ops.cc @@ -30,4 +30,15 @@ REGISTER_OP("RiscAdd") .SetIsAggregate() .SetIsCommutative(); +// TODO(b/171294012): change shape function. +REGISTER_OP("RiscConv") + .Input("input: T") + .Input("filter: T") + .Output("output: T") + .Attr("T: {float, double}") + .Attr("strides: list(int)") + .Attr(GetConvnetDataFormatAttrString()) + .SetShapeFn(shape_inference::UnknownShape) + .Attr("dilations: list(int) = [1, 1, 1, 1]"); + } // namespace tensorflow diff --git a/tensorflow/python/ops/risc/risc_grad.py b/tensorflow/python/ops/risc/risc_grad.py index b125aab895a..5c0f76ba3a4 100644 --- a/tensorflow/python/ops/risc/risc_grad.py +++ b/tensorflow/python/ops/risc/risc_grad.py @@ -28,3 +28,10 @@ def _RiscAddGrad(_, grad): # pylint: disable=unused-argument # TODO(b/171294012): Implement gradient of RISC with RISC ops. return None, None + + +@ops.RegisterGradient("RiscConv") +def _RiscConvGrad(_, grad): + # pylint: disable=unused-argument + # TODO(b/171294012): Implement gradient of RISC with RISC ops. + return None, None diff --git a/tensorflow/python/ops/risc/risc_ops.py b/tensorflow/python/ops/risc/risc_ops.py index 8682ebdd269..f59e42dbf6e 100644 --- a/tensorflow/python/ops/risc/risc_ops.py +++ b/tensorflow/python/ops/risc/risc_ops.py @@ -30,5 +30,20 @@ from tensorflow.python.ops.risc_ops_gen import * def risc_add( input_lhs, input_rhs, - name="RISC_ADD"): + name='RISC_ADD'): return gen_risc_ops.risc_add(input_lhs, input_rhs, name=name) + + +def risc_conv(x, + kernel, + strides, + data_format='NHWC', + dilations=None, + name='RISC_CONV'): + return gen_risc_ops.risc_conv( + x, + kernel, + strides, + data_format=data_format, + dilations=dilations, + name=name) From aed0ad49164ced8522789901012a0a8fa9bf94f9 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 9 Nov 2020 14:55:13 -0800 Subject: [PATCH 064/220] Expose `XLAControlFlowContext` as a tf.__internal__ API. PiperOrigin-RevId: 341491334 Change-Id: I7388588488ed8ad777612d52afe89447ae72a0da --- .../python/keras/layers/normalization.py | 21 ++----------------- tensorflow/python/ops/control_flow_ops.py | 18 ++++++++++++++++ .../golden/v2/tensorflow.__internal__.pbtxt | 4 ++++ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py index 0737fe11712..9a324d90987 100644 --- a/tensorflow/python/keras/layers/normalization.py +++ b/tensorflow/python/keras/layers/normalization.py @@ -30,12 +30,12 @@ from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.engine.input_spec import InputSpec from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables as tf_variables +from tensorflow.python.ops.control_flow_ops import get_enclosing_xla_context from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export @@ -534,7 +534,7 @@ class BatchNormalizationBase(Layer): use_fused_avg_updates = ( ops.executing_eagerly_outside_functions() and isinstance(self.momentum, (float, int)) and - enclosing_xla_context() is None) + get_enclosing_xla_context() is None) if use_fused_avg_updates: exponential_avg_factor = 1.0 - self.momentum else: @@ -953,23 +953,6 @@ def replace_in_base_docstring(replacements): return string -def enclosing_xla_context(): - """Recursively find and return the XLAControlFlowContext.""" - graph = ops.get_default_graph() - while graph is not None: - # pylint: disable=protected-access - context_ = graph._get_control_flow_context() - # pylint: enable=protected-access - while context_ is not None: - if isinstance(context_, control_flow_ops.XLAControlFlowContext): - return context_ - context_ = context_.outer_context - # This may be a FuncGraph due to defuns or v2 control flow. We need to - # find the original graph with the XLAControlFlowContext. - graph = getattr(graph, 'outer_graph', None) - return None - - # pylint: disable=missing-docstring @keras_export(v1=['keras.layers.BatchNormalization']) class BatchNormalization(BatchNormalizationBase): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index c75b910058b..0ffef6c8b47 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -3693,6 +3693,24 @@ class XLAControlFlowContext(ControlFlowContext): return False +@tf_export("__internal__.get_enclosing_xla_context", v1=[]) +def get_enclosing_xla_context(): + """Recursively find and return the XLAControlFlowContext.""" + graph = ops.get_default_graph() + while graph is not None: + # pylint: disable=protected-access + context_ = graph._get_control_flow_context() + # pylint: enable=protected-access + while context_ is not None: + if isinstance(context_, XLAControlFlowContext): + return context_ + context_ = context_.outer_context + # This may be a FuncGraph due to defuns or v2 control flow. We need to + # find the original graph with the XLAControlFlowContext. + graph = getattr(graph, "outer_graph", None) + return None + + def from_control_flow_context_def(context_def, import_scope=None): """Deserializes `context_def` into the appropriate ControlFlowContext. diff --git a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt index b4978fe6a54..af6f1a5ce93 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt @@ -52,4 +52,8 @@ tf_module { name: "execute_fn_for_device" argspec: "args=[\'device_branch_fns\', \'default_fn\', \'name\'], varargs=None, keywords=None, defaults=[\'execute_fn\'], " } + member_method { + name: "get_enclosing_xla_context" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } } From 38c53e2f5953e8b8fd94ba07de6c4bb2c15b0824 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 9 Nov 2020 14:57:06 -0800 Subject: [PATCH 065/220] [TF2XLA] Support must-be-constant resource variables for compilation Performs an explicit copy at runtime from device to host if needed. PiperOrigin-RevId: 341491694 Change-Id: If4a6c0c76a1110637a06e96595c6013c8fac17e5 --- tensorflow/compiler/jit/get_compiler_ir.cc | 2 +- tensorflow/compiler/jit/kernels/xla_ops.cc | 7 +- .../compiler/jit/xla_compilation_cache.cc | 1 + .../compiler/jit/xla_compile_on_demand_op.cc | 3 +- tensorflow/compiler/jit/xla_launch_util.cc | 49 +++++++++---- tensorflow/compiler/jit/xla_launch_util.h | 3 +- tensorflow/compiler/tf2xla/graph_compiler.cc | 2 +- tensorflow/compiler/tf2xla/xla_argument.h | 3 + tensorflow/compiler/tf2xla/xla_compiler.cc | 13 +++- tensorflow/compiler/tf2xla/xla_expression.cc | 28 +++++--- tensorflow/compiler/tf2xla/xla_expression.h | 17 ++++- .../compiler/tf2xla/xla_expression_test.cc | 18 ++++- tensorflow/compiler/tf2xla/xla_op_kernel.cc | 7 ++ tensorflow/compiler/tf2xla/xla_resource.cc | 2 + tensorflow/compiler/tf2xla/xla_resource.h | 3 + .../python/eager/def_function_xla_jit_test.py | 71 +++++++++++++++++++ 16 files changed, 193 insertions(+), 36 deletions(-) diff --git a/tensorflow/compiler/jit/get_compiler_ir.cc b/tensorflow/compiler/jit/get_compiler_ir.cc index 08b3bea1084..1685bec6706 100644 --- a/tensorflow/compiler/jit/get_compiler_ir.cc +++ b/tensorflow/compiler/jit/get_compiler_ir.cc @@ -115,7 +115,7 @@ xla::StatusOr GetCompilerIr( xla::StatusOr> args = XlaComputationLaunchContext::BuildXlaCompilerArguments( - constant_arg_indices, inputs, variable_infos); + constant_arg_indices, inputs, variable_infos, dev); TF_RETURN_IF_ERROR(args.status()); switch (stage) { diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index 0f0f43cbad6..563423b7755 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -206,8 +206,9 @@ static Status CompileToLocalExecutable( may_alias_resource_update; xla::StatusOr> args = - XlaComputationLaunchContext::BuildXlaCompilerArguments(constants, inputs, - variable_infos); + XlaComputationLaunchContext::BuildXlaCompilerArguments( + constants, inputs, variable_infos, + static_cast(ctx->device())); TF_RETURN_IF_ERROR(args.status()); return cache->Compile(options, function, *args, compile_options, lazy ? XlaCompilationCache::CompileMode::kLazy @@ -246,8 +247,6 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; - VLOG(1) << "Executing XLA Computation..."; - absl::optional tf_allocator_adapter; se::DeviceMemoryAllocator* allocator = GetAllocator( &tf_allocator_adapter, ctx->device(), diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index ea39331c4fb..6251f0353de 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -140,6 +140,7 @@ XlaCompilationCache::BuildSignature( for (const XlaCompiler::Argument& arg : args) { switch (arg.kind) { case XlaCompiler::Argument::kConstant: + case XlaCompiler::Argument::kConstantResource: signature.arg_values.push_back(arg.constant_value); break; case XlaCompiler::Argument::kParameter: diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index fa32a04a026..4005d0bf0cb 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -153,7 +153,8 @@ Status XlaCompileOnDemandOp::Compile( ctx, variables_indices, variable_infos, variable_args)); args = XlaComputationLaunchContext::BuildXlaCompilerArguments( - constant_input_indices, inputs, variable_infos); + constant_input_indices, inputs, variable_infos, + static_cast(ctx->device())); TF_RETURN_IF_ERROR(args.status()); } diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 1c5581eb4ab..b7f83301d2d 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -564,11 +564,26 @@ xla::StatusOr> XlaComputationLaunchContext::BuildXlaCompilerArguments( absl::Span must_be_constant_idxs, absl::Span inputs, - absl::Span variable_args) { + absl::Span variable_args, Device* device) { CHECK(absl::c_is_sorted(must_be_constant_idxs)); std::vector out; out.resize(inputs.size()); + // TODO(cheshire): Avoid duplication with framework/op_kernel.h + DeviceContext* device_context = nullptr; + TF_RETURN_IF_ERROR(device->TryGetDeviceContext(&device_context)); + bool using_default_context = false; + auto cleanup = xla::MakeCleanup([&] { + if (device_context != nullptr && !using_default_context) { + device_context->Unref(); + } + }); + if (device_context == nullptr) { + using_default_context = true; + auto* dev_info = device->tensorflow_gpu_device_info(); + if (dev_info) device_context = dev_info->default_context; + } + absl::flat_hash_map variable_info_lookup; for (const VariableInfo& info : variable_args) { CHECK(!info.var() || info.lock_held()) @@ -581,18 +596,7 @@ XlaComputationLaunchContext::BuildXlaCompilerArguments( const Tensor* input = inputs[input_num]; XlaCompiler::Argument& arg = out[input_num]; - if (absl::c_binary_search(must_be_constant_idxs, input_num)) { - // Handles compile-time constants. - - // TODO(b/157241314): Support constants located in resource variables. - TF_RET_CHECK(input->dtype() != DT_RESOURCE) - << "tf2xla bridge does not support must-be-constants located in " - "resource variables; try moving them to a tensor"; - arg.kind = XlaCompiler::Argument::kConstant; - arg.type = input->dtype(); - arg.shape = input->shape(); - arg.constant_value = *input; - } else if (variable_info_lookup.count(input_num)) { + if (variable_info_lookup.count(input_num)) { // Handles resource variables. TF_RET_CHECK(input->dtype() == DT_RESOURCE); const VariableInfo& variable = *variable_info_lookup[input_num]; @@ -613,6 +617,25 @@ XlaComputationLaunchContext::BuildXlaCompilerArguments( arg.type = DT_INVALID; arg.shape = TensorShape(); } + + if (absl::c_binary_search(must_be_constant_idxs, input_num)) { + TF_RET_CHECK(variable.var() && variable.var()->is_initialized); + const Tensor* value = variable.var()->tensor(); + Tensor value_on_host(value->dtype(), value->shape()); + if (!device_context) { + value_on_host = *value; + } else { + TF_RETURN_IF_ERROR(device_context->CopyDeviceTensorToCPUSync( + value, "", device, &value_on_host)); + } + arg.kind = XlaCompiler::Argument::kConstantResource; + arg.constant_value = value_on_host; + } + } else if (absl::c_binary_search(must_be_constant_idxs, input_num)) { + arg.kind = XlaCompiler::Argument::kConstant; + arg.type = input->dtype(); + arg.shape = input->shape(); + arg.constant_value = *input; } else { // Normal inputs. TF_RET_CHECK(input->dtype() != DT_RESOURCE); diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index ac085a022c8..8b939365ee5 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -143,7 +143,8 @@ class XlaComputationLaunchContext { static xla::StatusOr> BuildXlaCompilerArguments(absl::Span must_be_constant_idxs, absl::Span inputs, - absl::Span variable_args); + absl::Span variable_args, + Device* device); // Add all inputs within `ctx` as XLA arguments (returned by arguments()). // `variables` is a map from TensorFlow argument number to resource variable. diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index 30a7e94775b..2cf10974176 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -73,7 +73,7 @@ Status PrepareArguments(XlaOpKernelContext* ctx, Graph* graph, switch (expressions[i]->kind()) { case XlaExpression::Kind::kConstant: arg.kind = XlaCompiler::Argument::kConstant; - arg.constant_value = expressions[i]->constant_value(); + arg.constant_value = *expressions[i]->constant_value(); break; case XlaExpression::Kind::kXlaOp: if (arg_must_be_compile_time_constant[i]) { diff --git a/tensorflow/compiler/tf2xla/xla_argument.h b/tensorflow/compiler/tf2xla/xla_argument.h index e2cd634e1d5..c304c479f87 100644 --- a/tensorflow/compiler/tf2xla/xla_argument.h +++ b/tensorflow/compiler/tf2xla/xla_argument.h @@ -39,6 +39,9 @@ struct XlaArgument { // associated runtime parameter iff `initialized` is true. kResource, + // A resource variable with a constant value known at compile time. + kConstantResource, + // Argument is a run-time parameter. kParameter, diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 3d6a66c6ebc..56a7e9dd5d8 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -207,7 +207,7 @@ Status BuildComputation( switch (retval.kind()) { case XlaExpression::Kind::kConstant: output.is_constant = true; - output.constant_value = retval.constant_value(); + output.constant_value = *retval.constant_value(); output.shape = output.constant_value.shape(); break; @@ -446,6 +446,9 @@ string XlaCompiler::Argument::HumanString() const { case kConstant: return absl::StrCat("kind=constant", common, " value=", constant_value.DebugString()); + case kConstantResource: + return absl::StrCat("kind=constant-resource", common, + " value=", constant_value.DebugString()); case kResource: { string output = absl::StrCat( "kind=resource", common, @@ -856,6 +859,7 @@ Status XlaCompiler::XLAShapeForArgument( *xla_shape = absl::get(arg.shape); return Status::OK(); } + case XlaCompiler::Argument::kConstantResource: case XlaCompiler::Argument::kResource: { TF_RET_CHECK(arg.initialized); @@ -959,6 +963,7 @@ Status XlaCompiler::BuildArguments( const XlaCompiler::Argument& arg = args[i]; XlaExpression& arg_expression = (*arg_expressions)[i]; switch (arg.kind) { + case XlaCompiler::Argument::kConstantResource: case XlaCompiler::Argument::kResource: { TF_RET_CHECK(arg.resource_kind != XlaResource::kInvalid); TF_RET_CHECK(absl::holds_alternative(arg.shape)); @@ -971,7 +976,10 @@ Status XlaCompiler::BuildArguments( /*max_array_size=*/arg.max_array_size, /*tensor_array_gradients=*/arg.tensor_array_gradients, /*tensor_array_multiple_writes_aggregate=*/true)); - arg_expression = XlaExpression::Resource(resource); + arg_expression = + arg.kind == XlaCompiler::Argument::kResource + ? XlaExpression::Resource(resource) + : XlaExpression::ConstantResource(arg.constant_value, resource); if (arg.initialized) { input_to_args->push_back(i); } @@ -1124,6 +1132,7 @@ Status XlaCompiler::BuildArguments( arg_shardings.at(i).DebugString())); XlaExpression& arg_expression = (*arg_expressions)[input_to_args->at(i)]; switch (arg.kind) { + case XlaCompiler::Argument::kConstantResource: case XlaCompiler::Argument::kResource: { TF_RET_CHECK(arg.initialized); XlaResource* resource = arg_expression.resource(); diff --git a/tensorflow/compiler/tf2xla/xla_expression.cc b/tensorflow/compiler/tf2xla/xla_expression.cc index f0cc8d26709..40b154b496e 100644 --- a/tensorflow/compiler/tf2xla/xla_expression.cc +++ b/tensorflow/compiler/tf2xla/xla_expression.cc @@ -38,6 +38,16 @@ XlaExpression XlaExpression::Constant(Tensor value) { return e; } +XlaExpression XlaExpression::ConstantResource(Tensor value, + XlaResource* resource) { + XlaExpression e; + e.kind_ = Kind::kResource; + e.dtype_ = DT_RESOURCE; + e.resource_ = resource; + e.constant_value_ = value; + return e; +} + XlaExpression XlaExpression::XlaOp(xla::XlaOp value, DataType dtype) { XlaExpression e; e.kind_ = Kind::kXlaOp; @@ -83,7 +93,7 @@ xla::XlaOp XlaExpression::AsXlaOp(xla::XlaBuilder* builder) const { case Kind::kConstant: { xla::BorrowingLiteral literal; TF_RETURN_IF_ERROR( - HostTensorToBorrowingLiteral(constant_value_, &literal)); + HostTensorToBorrowingLiteral(*constant_value_, &literal)); return xla::ConstantLiteral(builder, literal); } case Kind::kTensorList: @@ -106,7 +116,7 @@ xla::StatusOr XlaExpression::ResolveDynamism( switch (kind()) { case Kind::kConstant: { // Constant values are considered static. - Tensor constant_false(DT_BOOL, constant_value().shape()); + Tensor constant_false(DT_BOOL, constant_value()->shape()); auto flat = constant_false.flat(); for (int64 i = 0; i < flat.size(); ++i) flat(i) = false; return constant_false; @@ -147,13 +157,12 @@ xla::StatusOr> XlaExpression::ResolveConstant( xla::Client* client, bool dynamic_dimension_is_minus_one) const { switch (kind()) { case Kind::kConstant: - return {constant_value()}; + case Kind::kResource: + return constant_value(); case Kind::kXlaOp: break; case Kind::kTensorList: TF_FALLTHROUGH_INTENDED; - case Kind::kResource: - TF_FALLTHROUGH_INTENDED; case Kind::kInvalid: return errors::InvalidArgument( "ResolveConstant called on XlaExpression: ", HumanString()); @@ -187,7 +196,12 @@ xla::StatusOr> XlaExpression::ResolveConstant( xla::StatusOr XlaExpression::GetShape() const { switch (kind_) { case Kind::kConstant: - return constant_value().shape(); + return constant_value()->shape(); + case Kind::kResource: + if (constant_value()) { + return constant_value()->shape(); + } + return TensorShape({}); case Kind::kXlaOp: { TF_ASSIGN_OR_RETURN(xla::Shape xla_shape, handle().builder()->GetShape(handle())); @@ -197,8 +211,6 @@ xla::StatusOr XlaExpression::GetShape() const { } case Kind::kTensorList: return TensorShape({}); - case Kind::kResource: - return TensorShape({}); case Kind::kInvalid: return errors::InvalidArgument( "GetShape() called on invalid XlaExpression"); diff --git a/tensorflow/compiler/tf2xla/xla_expression.h b/tensorflow/compiler/tf2xla/xla_expression.h index 3546368ff7b..fd6b311ae6e 100644 --- a/tensorflow/compiler/tf2xla/xla_expression.h +++ b/tensorflow/compiler/tf2xla/xla_expression.h @@ -74,6 +74,9 @@ class XlaExpression { // Builds a resource expression. static XlaExpression Resource(XlaResource* resource); + // Builds a resource whose value is known at a compile time. + static XlaExpression ConstantResource(Tensor value, XlaResource* resource); + Kind kind() const { return kind_; } DataType dtype() const { return dtype_; } @@ -81,7 +84,15 @@ class XlaExpression { // handle() returns the XlaOp that backs a kXlaOp expression. const xla::XlaOp& handle() const { return handle_; } - const Tensor& constant_value() const { return constant_value_; } + // Return a constant value associated with this expression. Always set for + // constants, might be set for resources. + absl::optional constant_value() const { + if (kind_ == Kind::kResource && resource_->IsOverwritten()) { + // The constant is no longer available if the value was overwritten. + return absl::nullopt; + } + return constant_value_; + } XlaResource* resource() const { return resource_; } @@ -124,8 +135,8 @@ class XlaExpression { // a tuple expression if kind_ == kTensorList. xla::XlaOp handle_; - // The value of the constant, if kind_ == kConstant. - Tensor constant_value_; + // The value of the constant, if available. + absl::optional constant_value_; // The resource, if kind_ == kResource. Not owned. XlaResource* resource_ = nullptr; diff --git a/tensorflow/compiler/tf2xla/xla_expression_test.cc b/tensorflow/compiler/tf2xla/xla_expression_test.cc index 84202c93139..6e4c4cf675f 100644 --- a/tensorflow/compiler/tf2xla/xla_expression_test.cc +++ b/tensorflow/compiler/tf2xla/xla_expression_test.cc @@ -110,8 +110,10 @@ TEST_F(XlaExpressionTest, GetShape) { TEST_F(XlaExpressionTest, ResolveConstant) { EXPECT_FALSE(XlaExpression().ResolveConstant(client_).ok()); EXPECT_FALSE(XlaExpression::Invalid().ResolveConstant(client_).ok()); - EXPECT_FALSE( - XlaExpression::Resource(resource_.get()).ResolveConstant(client_).ok()); + + EXPECT_FALSE(XlaExpression::Resource(resource_.get()) + .ResolveConstant(client_) + ->has_value()); TF_ASSERT_OK_AND_ASSIGN( absl::optional op_constant, @@ -131,5 +133,17 @@ TEST_F(XlaExpressionTest, ResolveConstant) { test::ExpectTensorEqual(constant_, *constant_constant); } +TEST_F(XlaExpressionTest, ResolveConstantOnResource) { + XlaExpression constant_resource = + XlaExpression::ConstantResource(constant_, resource_.get()); + EXPECT_TRUE(constant_resource.ResolveConstant(client_).ok()); + EXPECT_TRUE(resource_->SetZeroValue(builder_.get()).ok()); + LOG(ERROR) << "Resource is overwritten: " << resource_->IsOverwritten(); + xla::StatusOr> resolved_constant = + constant_resource.ResolveConstant(client_); + EXPECT_TRUE(resolved_constant.ok()); + EXPECT_FALSE(resolved_constant->has_value()); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index c2d1906e47a..1d382fe5b9c 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -477,6 +477,13 @@ Status ReadVariableInputTensor(const Tensor& tensor, DataType type, *shape = variable->shape(); } + if (!variable->IsOverwritten() && expression->constant_value()) { + TF_ASSIGN_OR_RETURN(xla::Literal literal, + HostTensorToLiteral(*expression->constant_value())); + *value = xla::ConstantLiteral(ctx->builder(), literal); + return Status::OK(); + } + TF_ASSIGN_OR_RETURN(xla::Shape representation_shape, ctx->compiler()->options().shape_representation_fn( variable->shape(), variable->type(), diff --git a/tensorflow/compiler/tf2xla/xla_resource.cc b/tensorflow/compiler/tf2xla/xla_resource.cc index bec0b46611d..8730c6dad54 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.cc +++ b/tensorflow/compiler/tf2xla/xla_resource.cc @@ -116,10 +116,12 @@ Status XlaResource::SetValue(const xla::XlaOp& value) { "' must be initialized with a valid type before use."); } value_ = value; + is_overwritten_ = true; return Status::OK(); } Status XlaResource::SetZeroValue(xla::XlaBuilder* builder) { + is_overwritten_ = true; if (type_ == DT_INVALID) { return errors::InvalidArgument( "Resource '", name_, diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h index ab3a5bdd9bc..d7b9d2f16d3 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.h +++ b/tensorflow/compiler/tf2xla/xla_resource.h @@ -135,6 +135,8 @@ class XlaResource { Status SetFromPack(const std::set& gradient_sources, const xla::XlaOp& pack, xla::XlaBuilder* builder); + bool IsOverwritten() { return is_overwritten_; } + // TensorArray and Stack specific fields // TODO(phawkins): refactor this code to use subclasses, rather than putting // kind-specific fields in XlaResource. @@ -179,6 +181,7 @@ class XlaResource { bool tensor_array_multiple_writes_aggregate_ = false; std::map> tensor_array_gradients_; + bool is_overwritten_ = false; }; } // namespace tensorflow diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index 5820bec31be..281ff142dd6 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -656,6 +656,77 @@ class DefFunctionTest(xla_test.XLATestCase): self.assertIn('tuple', f.experimental_get_compiler_ir(l)()) + @test_util.disable_mlir_bridge('TODO(b/172845417): MLIR bridge does not ' + 'support getting constants out of resources') + def testGetConstantOutOfResourceVariable(self): + with ops.device('device:{}:0'.format(self.device)): + + # Use floats to force device placement. + a = variables.Variable(50.0) + b = variables.Variable(2.0) + + @def_function.function(jit_compile=True) + def f(x): + return array_ops.reshape( + x, [math_ops.cast(a, dtypes.int32), + math_ops.cast(b, dtypes.int32)]) + + # OK since the value is known at compile time. + out = f(random_ops.random_normal([10, 10])) + self.assertEqual(out.shape[0], 50) + self.assertEqual(out.shape[1], 2) + + @test_util.disable_mlir_bridge('TODO(b/172845417): MLIR bridge does not ' + 'support getting constants out of resources') + def testGetConstantOutOfResourceVariableAfterWrite(self): + with ops.device('device:{}:0'.format(self.device)): + + # Use floats to force device placement. + a = variables.Variable(50.0) + b = variables.Variable(2.0) + + @def_function.function(jit_compile=True) + def f(x, val1, val2): + a.assign(math_ops.cast(val1, dtypes.float32)) + b.assign(math_ops.cast(val2, dtypes.float32)) + return array_ops.reshape( + x, [math_ops.cast(a, dtypes.int32), + math_ops.cast(b, dtypes.int32)]) + + val1 = constant_op.constant(2) + val2 = constant_op.constant(50) + + # Returns an error, since the value known at compile time was overriden. + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'concrete values at compile time'): + f(random_ops.random_normal([10, 10]), val1, val2) + + @test_util.disable_mlir_bridge('TODO(b/172845417): MLIR bridge does not ' + 'support getting constants out of resources') + def testGetConstantOutOfResourceVariableBeforeWrite(self): + with ops.device('device:{}:0'.format(self.device)): + + # Use floats to force device placement. + a = variables.Variable(50.0) + b = variables.Variable(2.0) + + @def_function.function(jit_compile=True) + def f(x, val1, val2): + out = array_ops.reshape( + x, [math_ops.cast(a, dtypes.int32), + math_ops.cast(b, dtypes.int32)]) + a.assign(math_ops.cast(val1, dtypes.float32)) + b.assign(math_ops.cast(val2, dtypes.float32)) + return out + + val1 = constant_op.constant(2) + val2 = constant_op.constant(50) + + # OK since the write happens after the reshape. + out = f(random_ops.random_normal([10, 10]), val1, val2) + self.assertEqual(out.shape[0], 50) + self.assertEqual(out.shape[1], 2) + if __name__ == '__main__': ops.enable_eager_execution() From a368ce42aece0bf7fa26ec7a0220df4225e220d4 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 9 Nov 2020 14:58:48 -0800 Subject: [PATCH 066/220] Added support of GPUObjects to MetalArguments. Using BufferDescriptor in FullyConnected. PiperOrigin-RevId: 341492017 Change-Id: I6aceb361df91dcae2a12863965d0566e18c48d9b --- tensorflow/lite/delegates/gpu/metal/BUILD | 2 + .../lite/delegates/gpu/metal/compute_task.mm | 2 +- .../lite/delegates/gpu/metal/kernels/BUILD | 1 + .../gpu/metal/kernels/fully_connected.cc | 41 ++- .../delegates/gpu/metal/metal_arguments.h | 38 ++- .../delegates/gpu/metal/metal_arguments.mm | 233 +++++++++++++++++- 6 files changed, 297 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD index c1b0eb38f91..8d00eeee03f 100644 --- a/tensorflow/lite/delegates/gpu/metal/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/BUILD @@ -255,6 +255,8 @@ objc_library( copts = DEFAULT_COPTS, sdk_frameworks = ["Metal"], deps = [ + ":buffer", + ":gpu_object", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/common/task:arguments", diff --git a/tensorflow/lite/delegates/gpu/metal/compute_task.mm b/tensorflow/lite/delegates/gpu/metal/compute_task.mm index 060a08aae09..95ac3f6d1d2 100644 --- a/tensorflow/lite/delegates/gpu/metal/compute_task.mm +++ b/tensorflow/lite/delegates/gpu/metal/compute_task.mm @@ -79,7 +79,7 @@ struct UniformBuffer { runtimeOptions:(const RuntimeOptions&)options { size_t offset = desc->input_buffers.size() + desc->uniform_buffers.size() + desc->immutable_buffers.size() + 1; - RETURN_IF_ERROR(_metal_args.Init(offset, &desc->args, &desc->shader_source)); + RETURN_IF_ERROR(_metal_args.Init(device, offset, &desc->args, &desc->shader_source)); NSString* barrier; // simdgroup_barrier is supported on macOS 10.13+ and Metal shading language version 2.0 if (@available(macOS 10.13, iOS 10.0, tvOS 10.0, *)) { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD index 867919d6f0a..4033784fa34 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD @@ -269,6 +269,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", + "//tensorflow/lite/delegates/gpu/common/task:buffer_desc", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:runtime_options", diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc index 2e6fa290670..00754b09dd4 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc @@ -26,6 +26,7 @@ limitations under the License. #include "absl/strings/substitute.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" @@ -68,7 +69,7 @@ std::string GetFullyConnectedCode(const DeviceInfo& device_info, FLT4(0.0f) : vector[j * 32 + tid_index]; $1(mem_flags::mem_threadgroup); for (uint i = 0, counter = j * 32 + tid.y * 8; i < 8; ++i, ++counter) { - summa += dot(local_vector[tid.y * 8 + i], matrix[counter * args.dst_channels_alignedx8 + ugid.x]); + summa += dot(local_vector[tid.y * 8 + i], args.weights.Read(counter * args.dst_channels_alignedx8 + ugid.x)); } $1(mem_flags::mem_none); } @@ -82,8 +83,8 @@ std::string GetFullyConnectedCode(const DeviceInfo& device_info, if (src_depth % 4 != 0) { code << " if (counter >= args.src_slices) continue;" << std::endl; } - code << " summa += dot(vector[counter], matrix[counter * " - "args.dst_channels_alignedx8 + ugid.x]);" + code << " summa += dot(vector[counter], args.weights.Read(counter * " + "args.dst_channels_alignedx8 + ugid.x));" << std::endl; code << " }" << std::endl; } @@ -102,7 +103,7 @@ std::string GetFullyConnectedCode(const DeviceInfo& device_info, if (tid.y == 0 && tid.x % 4 == 0 && ugid.x < args.dst_channels) { const int linear_index = ugid.x / 4; FLT4 value = FLT4(temp[tid.x][0], temp[tid.x + 1][0], temp[tid.x + 2][0], temp[tid.x + 3][0]) + - biases[linear_index]; + args.bias.Read(linear_index); uint3 gid = uint3(0u, 0u, uint(linear_index)); $$2 result[linear_index] = value; @@ -164,13 +165,31 @@ std::vector FullyConnected( } } - desc->immutable_buffers = { - {"device FLT4* const matrix", - GetByteBufferConverted(filters_reordered, options.storage_precision)}, - {"device FLT4* const biases", - GetByteBufferConvertedResized(attr.bias.data, options.storage_precision, - attr.weights.shape.o)}, - }; + BufferDescriptor weights_desc; + weights_desc.element_type = + options.storage_precision == RuntimeOptions::Precision::FP32 + ? DataType::FLOAT32 + : DataType::FLOAT16; + weights_desc.element_size = 4; + weights_desc.data = + GetByteBufferConverted(filters_reordered, options.storage_precision); + weights_desc.size = weights_desc.data.size(); + + desc->args.AddObject( + "weights", absl::make_unique(std::move(weights_desc))); + + BufferDescriptor bias_desc; + bias_desc.element_type = + options.storage_precision == RuntimeOptions::Precision::FP32 + ? DataType::FLOAT32 + : DataType::FLOAT16; + bias_desc.element_size = 4; + bias_desc.data = GetByteBufferConvertedResized( + attr.bias.data, options.storage_precision, attr.weights.shape.o); + bias_desc.size = bias_desc.data.size(); + + desc->args.AddObject( + "bias", absl::make_unique(std::move(bias_desc))); desc->resize_function = [attr](const std::map& buffers) { const uint3 groups_size{8, 4, 1}; diff --git a/tensorflow/lite/delegates/gpu/metal/metal_arguments.h b/tensorflow/lite/delegates/gpu/metal/metal_arguments.h index 8ae5850b008..ef2fa1edfeb 100644 --- a/tensorflow/lite/delegates/gpu/metal/metal_arguments.h +++ b/tensorflow/lite/delegates/gpu/metal/metal_arguments.h @@ -24,22 +24,18 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/task/arguments.h" #include "tensorflow/lite/delegates/gpu/common/task/gpu_object_desc.h" +#include "tensorflow/lite/delegates/gpu/metal/gpu_object.h" namespace tflite { namespace gpu { namespace metal { -struct GPUResourcesWithValue { - std::vector> ints; - std::vector> floats; - std::vector>> buffers; -}; - class MetalArguments : public ArgumentsBinder { public: MetalArguments() = default; - absl::Status Init(int buffer_offset, Arguments* args, std::string* code); + absl::Status Init(id device, int buffer_offset, Arguments* args, + std::string* code); // Move only MetalArguments(MetalArguments&& args) = default; @@ -54,6 +50,12 @@ class MetalArguments : public ArgumentsBinder { void Encode(id encoder, int buffer_offset) const; private: + absl::Status AllocateObjects(const Arguments& args, id device); + absl::Status AddObjectArgs(Arguments* args); + + void AddGPUResources(const std::string& name, const GPUResources& resources, + Arguments* args); + std::string GetListOfArgs(int buffer_offset); absl::Status SetGPUResources(const std::string& name, @@ -63,6 +65,25 @@ class MetalArguments : public ArgumentsBinder { absl::Status SetBuffer(const std::string& name, id handle); + absl::Status SetObjectsResources(const Arguments& args); + + absl::Status ResolveSelectorsPass( + const Arguments& args, + const std::map& linkables, std::string* code); + + absl::Status ResolveSelector( + const Arguments& args, + const std::map& linkables, + const std::string& object_name, const std::string& selector, + const std::vector& function_args, + const std::vector& template_args, std::string* result); + + void ResolveObjectNames(const std::string& object_name, + const std::vector& member_names, + std::string* code); + + void ResolveArgsPass(std::string* code); + static constexpr char kArgsPrefix[] = "args."; struct IntValue { int value; @@ -94,6 +115,9 @@ class MetalArguments : public ArgumentsBinder { id handle; }; std::map buffers_; + + std::map object_refs_; + std::vector objects_; }; } // namespace metal diff --git a/tensorflow/lite/delegates/gpu/metal/metal_arguments.mm b/tensorflow/lite/delegates/gpu/metal/metal_arguments.mm index 5d06bb32540..d5c7671e2cc 100644 --- a/tensorflow/lite/delegates/gpu/metal/metal_arguments.mm +++ b/tensorflow/lite/delegates/gpu/metal/metal_arguments.mm @@ -17,6 +17,7 @@ limitations under the License. #include #include "absl/strings/substitute.h" +#include "tensorflow/lite/delegates/gpu/metal/buffer.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/common/task/util.h" @@ -45,18 +46,100 @@ void ReplaceAllWords(const std::string& old_word, const std::string& new_word, } } +std::string GetNextWord(const std::string& code, size_t first_position) { + size_t pos = first_position; + char t = code[pos]; + while (IsWordSymbol(t)) { + pos++; + t = code[pos]; + } + return code.substr(first_position, pos - first_position); +} + +size_t FindEnclosingBracket(const std::string& text, size_t first_pos, + char bracket) { + const std::map brackets = { + {'(', ')'}, + {'{', '}'}, + {'[', ']'}, + {'<', '>'}, + }; + char b_open = bracket; + auto it = brackets.find(b_open); + if (it == brackets.end()) { + return -1; + } + char b_close = it->second; + size_t pos = first_pos; + int opened = 1; + int closed = 0; + while (opened != closed && pos < text.size()) { + if (text[pos] == b_open) { + opened++; + } else if (text[pos] == b_close) { + closed++; + } + pos++; + } + if (opened == closed) { + return pos; + } else { + return -1; + } +} + +absl::Status ParseArgsInsideBrackets(const std::string& text, + size_t open_bracket_pos, + size_t* close_bracket_pos, + std::vector* args) { + *close_bracket_pos = + FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]); + if (*close_bracket_pos == -1) { + return absl::NotFoundError("Not found enclosing bracket"); + } + std::string str_args = text.substr(open_bracket_pos + 1, + *close_bracket_pos - open_bracket_pos - 2); + std::vector words = absl::StrSplit(str_args, ','); + args->reserve(words.size()); + for (const auto& word : words) { + absl::string_view arg = absl::StripAsciiWhitespace(word); + if (!arg.empty()) { + args->push_back(std::string(arg)); + } + } + return absl::OkStatus(); +} + void AppendArgument(const std::string& arg, std::string* args) { if (!args->empty()) { absl::StrAppend(args, ",\n"); } absl::StrAppend(args, arg); } + +absl::Status CreateMetalObject(id device, GPUObjectDescriptor* desc, + GPUObjectPtr* result) { + const auto* buffer_desc = dynamic_cast(desc); + if (buffer_desc) { + Buffer gpu_buffer; + RETURN_IF_ERROR( + gpu_buffer.CreateFromBufferDescriptor(*buffer_desc, device)); + *result = absl::make_unique(std::move(gpu_buffer)); + return absl::OkStatus(); + } + + return absl::InvalidArgumentError("Unknown GPU descriptor."); +} } // namespace // Static constexpr char MetalArguments::kArgsPrefix[]; -absl::Status MetalArguments::Init(int buffer_offset, Arguments* args, std::string* code) { +absl::Status MetalArguments::Init(id device, int buffer_offset, Arguments* args, std::string* code) { + RETURN_IF_ERROR(AllocateObjects(*args, device)); + RETURN_IF_ERROR(AddObjectArgs(args)); + RETURN_IF_ERROR(ResolveSelectorsPass(*args, {}, code)); + RETURN_IF_ERROR(SetObjectsResources(*args)); args->GetActiveArguments(kArgsPrefix, *code); std::string struct_desc = "struct uniforms_buffer {\n"; int pos = 0; @@ -104,6 +187,7 @@ absl::Status MetalArguments::Init(int buffer_offset, Arguments* args, std::strin } else { struct_desc = ""; } + ResolveArgsPass(code); *code = absl::Substitute(*code, struct_desc, GetListOfArgs(buffer_offset)); return absl::OkStatus(); } @@ -140,11 +224,36 @@ absl::Status MetalArguments::SetHalf(const std::string& name, half value) { } void MetalArguments::Encode(id encoder, int buffer_offset) const { + for (auto& b : buffers_) { + [encoder setBuffer:b.second.handle offset:0 atIndex:buffer_offset]; + buffer_offset++; + } if (!const_data_.empty()) { [encoder setBytes:const_data_.data() length:const_data_.size() atIndex:buffer_offset]; } } +absl::Status MetalArguments::AllocateObjects(const Arguments& args, + id device) { + objects_.resize(args.objects_.size()); + int i = 0; + for (auto& t : args.objects_) { + RETURN_IF_ERROR(CreateMetalObject(device, t.second.get(), &objects_[i])); + i++; + } + return absl::OkStatus(); +} + +absl::Status MetalArguments::AddObjectArgs(Arguments* args) { + for (auto& t : args->objects_) { + AddGPUResources(t.first, t.second->GetGPUResources(), args); + } + for (auto& t : args->object_refs_) { + AddGPUResources(t.first, t.second->GetGPUResources(), args); + } + return absl::OkStatus(); +} + std::string MetalArguments::GetListOfArgs(int buffer_offset) { std::string result; for (auto& t : buffers_) { @@ -190,6 +299,20 @@ void MetalArguments::AddBuffer(const std::string& name, const GPUBufferDescripto buffers_[name].desc = desc; } +void MetalArguments::AddGPUResources(const std::string& name, + const GPUResources& resources, + Arguments* args) { + for (const auto& r : resources.ints) { + args->AddInt(absl::StrCat(name, "_", r)); + } + for (const auto& r : resources.floats) { + args->AddFloat(absl::StrCat(name, "_", r)); + } + for (const auto& r : resources.buffers) { + AddBuffer(absl::StrCat(name, "_", r.first), r.second); + } +} + absl::Status MetalArguments::SetBuffer(const std::string& name, id handle) { auto it = buffers_.find(name); if (it == buffers_.end()) { @@ -200,6 +323,114 @@ absl::Status MetalArguments::SetBuffer(const std::string& name, id ha return absl::OkStatus(); } +absl::Status MetalArguments::ResolveSelectorsPass( + const Arguments& args, const std::map& linkables, + std::string* code) { + std::string result; + size_t position = 0; + size_t next_position = code->find(kArgsPrefix); + while (next_position != std::string::npos) { + size_t arg_pos = next_position; + next_position += strlen(kArgsPrefix); + std::string object_name = GetNextWord(*code, next_position); + char next = (*code)[next_position + object_name.size()]; + if (next == '.') { + next_position += object_name.size() + 1; + std::string selector_name = GetNextWord(*code, next_position); + next_position += selector_name.size(); + next = (*code)[next_position]; + std::vector template_args; + if (next == '<') { + size_t close_bracket_pos; + RETURN_IF_ERROR(ParseArgsInsideBrackets( + *code, next_position, &close_bracket_pos, &template_args)); + next_position = close_bracket_pos; + next = (*code)[next_position]; + } + if (next != '(') { + return absl::NotFoundError(absl::StrCat( + "Expected ( after ", object_name, ".", selector_name, " call")); + } + std::vector function_args; + size_t close_bracket_pos; + RETURN_IF_ERROR(ParseArgsInsideBrackets( + *code, next_position, &close_bracket_pos, &function_args)); + for (auto& arg : function_args) { + RETURN_IF_ERROR(ResolveSelectorsPass(args, {}, &arg)); + } + std::string patch; + RETURN_IF_ERROR(ResolveSelector(args, linkables, object_name, + selector_name, function_args, + template_args, &patch)); + code->replace(arg_pos, close_bracket_pos - arg_pos, patch); + position = arg_pos + patch.size(); + } else { + position = arg_pos + strlen(kArgsPrefix); + } + next_position = code->find(kArgsPrefix, position); + } + return absl::OkStatus(); +} + +absl::Status MetalArguments::ResolveSelector( + const Arguments& args, const std::map& linkables, + const std::string& object_name, const std::string& selector, + const std::vector& function_args, + const std::vector& template_args, std::string* result) { + const GPUObjectDescriptor* desc_ptr; + auto it_ref = args.object_refs_.find(object_name); + auto it_obj = args.objects_.find(object_name); + if (it_ref != args.object_refs_.end()) { + desc_ptr = it_ref->second.get(); + } else if (it_obj != args.objects_.end()) { + desc_ptr = it_obj->second.get(); + } else { + return absl::NotFoundError( + absl::StrCat("No object with name - ", object_name)); + } + auto names = desc_ptr->GetGPUResources().GetNames(); + std::string patch; + RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, function_args, + template_args, &patch)); + ResolveObjectNames(object_name, names, &patch); + *result += patch; + return absl::OkStatus(); +} + +void MetalArguments::ResolveObjectNames( + const std::string& object_name, + const std::vector& member_names, std::string* code) { + for (const auto& member_name : member_names) { + const std::string new_name = kArgsPrefix + object_name + "_" + member_name; + ReplaceAllWords(member_name, new_name, code); + } +} + +void MetalArguments::ResolveArgsPass(std::string* code) { + size_t position = 0; + size_t next_position = code->find(kArgsPrefix); + while (next_position != std::string::npos) { + size_t arg_pos = next_position; + next_position += strlen(kArgsPrefix); + std::string object_name = GetNextWord(*code, next_position); + std::string new_name = object_name; + code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name); + position = arg_pos + new_name.size(); + next_position = code->find(kArgsPrefix, position); + } +} + +absl::Status MetalArguments::SetObjectsResources(const Arguments& args) { + int i = 0; + for (const auto& t : args.objects_) { + GPUResourcesWithValue resources; + RETURN_IF_ERROR(objects_[i]->GetGPUResources(t.second.get(), &resources)); + RETURN_IF_ERROR(SetGPUResources(t.first, resources)); + i++; + } + return absl::OkStatus(); +} + } // namespace metal } // namespace gpu } // namespace tflite From 5f7d4b6a54455e2b90288869718a5aa8a76353b1 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 9 Nov 2020 14:58:52 -0800 Subject: [PATCH 067/220] Expose the symbol `function.defun_with_attributes` as tf.__internal__ APIs. PiperOrigin-RevId: 341492026 Change-Id: I30132df3b8e4f15a4e9704135b16d43351fcec93 --- tensorflow/python/eager/function.py | 2 ++ tensorflow/python/tools/api/generator/api_init_files.bzl | 1 + .../api/golden/v2/tensorflow.__internal__.function.pbtxt | 7 +++++++ .../tools/api/golden/v2/tensorflow.__internal__.pbtxt | 4 ++++ 4 files changed, 14 insertions(+) create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.__internal__.function.pbtxt diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 09cf404828d..a41dc8e4765 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -75,6 +75,7 @@ from tensorflow.python.util import nest from tensorflow.python.util import object_identity from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect +from tensorflow.python.util.tf_export import tf_export # Loaded lazily due to a circular dependency (roughly # tf.function->autograph->->dataset->tf.function). @@ -3769,6 +3770,7 @@ def defun(func=None, experimental_relax_shapes=experimental_relax_shapes) +@tf_export("__internal__.function.defun_with_attributes", v1=[]) def defun_with_attributes(func=None, input_signature=None, attributes=None, diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl index 6dd3f88694f..359a6b17ab8 100644 --- a/tensorflow/python/tools/api/generator/api_init_files.bzl +++ b/tensorflow/python/tools/api/generator/api_init_files.bzl @@ -11,6 +11,7 @@ TENSORFLOW_API_INIT_FILES = [ "__internal__/distribute/combinations/__init__.py", "__internal__/distribute/multi_process_runner/__init__.py", "__internal__/eager_context/__init__.py", + "__internal__/function/__init__.py", "__internal__/nest/__init__.py", "__internal__/ops/__init__.py", "__internal__/test/__init__.py", diff --git a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.function.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.function.pbtxt new file mode 100644 index 00000000000..4b5187c119a --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.function.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.__internal__.function" +tf_module { + member_method { + name: "defun_with_attributes" + argspec: "args=[\'func\', \'input_signature\', \'attributes\', \'autograph\', \'experimental_autograph_options\', \'jit_compile\', \'experimental_relax_shapes\', \'experimental_follow_type_hints\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'False\', \'False\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt index af6f1a5ce93..2c967520b30 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt @@ -28,6 +28,10 @@ tf_module { name: "eager_context" mtype: "" } + member { + name: "function" + mtype: "" + } member { name: "nest" mtype: "" From e396a0ce019612a8263e20bcdb980c1233cc64b8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 15:02:43 -0800 Subject: [PATCH 068/220] Fix a typo for conv attribute -> "dilations" -> "dilation". PiperOrigin-RevId: 341492824 Change-Id: I1429f25ddf91735a6a74fc3599a8314342caf2c6 --- tensorflow/compiler/mlir/xla/hlo_function_importer.cc | 4 ++-- tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc index 66cf96b2f8a..2497bba0784 100644 --- a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc +++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc @@ -663,9 +663,9 @@ StatusOr HloFunctionImporter::ImportInstructionImpl( builder_->getNamedAttr("window_strides", Convert(strides))); attributes.push_back(ConvertPadding(paddings)); attributes.push_back( - builder_->getNamedAttr("lhs_dilations", Convert(lhs_dilations))); + builder_->getNamedAttr("lhs_dilation", Convert(lhs_dilations))); attributes.push_back( - builder_->getNamedAttr("rhs_dilations", Convert(rhs_dilations))); + builder_->getNamedAttr("rhs_dilation", Convert(rhs_dilations))); attributes.push_back(builder_->getNamedAttr( "dimension_numbers", ConvertConvDimensionNumbers( diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt index d5844facd87..55e845e0d85 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt @@ -232,10 +232,10 @@ add { // CHECK-SAME: output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64> // CHECK-SAME: } // CHECK-SAME: feature_group_count = 1 : i64 - // CHECK-SAME: lhs_dilations = dense<1> : tensor<2xi64> + // CHECK-SAME: lhs_dilation = dense<1> : tensor<2xi64> // CHECK-SAME: padding = dense<{{\[\[}}44, 45], [60, 60]]> : tensor<2x2xi64> // CHECK-SAME: precision_config = ["DEFAULT", "DEFAULT"] - // CHECK-SAME: rhs_dilations = dense<[2, 3]> : tensor<2xi64> + // CHECK-SAME: rhs_dilation = dense<[2, 3]> : tensor<2xi64> // CHECK-SAME: window_strides = dense<[4, 5]> : tensor<2xi64> // CHECK-SAME: } // CHECK-SAME: (tensor<256x32x32x6xf32>, tensor<2x2x1x1xf32>) -> tensor<16x30x30x256xf32> From 51dc6eca9cbab5cc2e907d1b1a0244f87d6b8633 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 9 Nov 2020 15:11:04 -0800 Subject: [PATCH 069/220] Replace the usages of `cudnn_rnn`, `cudnn_rnnv2` and `cudnn_rnnv3` with TF public apis. PiperOrigin-RevId: 341494517 Change-Id: I9ffcbfb5bc5f0be58ab5a557e67ecf93a8fe7e6b --- .../python/keras/layers/cudnn_recurrent.py | 2 +- tensorflow/python/keras/layers/recurrent_v2.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/layers/cudnn_recurrent.py b/tensorflow/python/keras/layers/cudnn_recurrent.py index e970232de40..7ecc7ac1996 100644 --- a/tensorflow/python/keras/layers/cudnn_recurrent.py +++ b/tensorflow/python/keras/layers/cudnn_recurrent.py @@ -504,7 +504,7 @@ class CuDNNLSTM(_CuDNNRNN): 'is_training': True, } - outputs, h, c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv2(**args) + outputs, h, c, _, _ = gen_cudnn_rnn_ops.CudnnRNNV2(**args) if self.stateful or self.return_state: h = h[0] diff --git a/tensorflow/python/keras/layers/recurrent_v2.py b/tensorflow/python/keras/layers/recurrent_v2.py index 263a341ea61..79889602186 100644 --- a/tensorflow/python/keras/layers/recurrent_v2.py +++ b/tensorflow/python/keras/layers/recurrent_v2.py @@ -698,9 +698,9 @@ def gpu_gru(inputs, init_h, kernel, recurrent_kernel, bias, mask, time_major, if go_backwards: # Reverse axis 0 since the input is already convert to time major. inputs = array_ops.reverse(inputs, axis=[0]) - outputs, h, _, _ = gen_cudnn_rnn_ops.cudnn_rnn( - inputs, input_h=init_h, input_c=0, params=params, is_training=True, - rnn_mode='gru') + outputs, h, _, _ = gen_cudnn_rnn_ops.CudnnRNN( + input=inputs, input_h=init_h, input_c=0, params=params, + is_training=True, rnn_mode='gru') last_output = outputs[-1] if not time_major and mask is None: @@ -1486,8 +1486,8 @@ def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, # expected_output = [0, 0, 6, 5 ,4] inputs = array_ops.reverse_sequence_v2( inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis) - outputs, h, c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3( - inputs, + outputs, h, c, _, _ = gen_cudnn_rnn_ops.CudnnRNNV3( + input=inputs, input_h=init_h, input_c=init_c, params=params, @@ -1506,9 +1506,9 @@ def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, if go_backwards: # Reverse axis 0 since the input is already convert to time major. inputs = array_ops.reverse(inputs, axis=[0]) - outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn( - inputs, input_h=init_h, input_c=init_c, params=params, is_training=True, - rnn_mode='lstm') + outputs, h, c, _ = gen_cudnn_rnn_ops.CudnnRNN( + input=inputs, input_h=init_h, input_c=init_c, params=params, + is_training=True, rnn_mode='lstm') last_output = outputs[-1] if not time_major and mask is None: From b7687e53d1c50b7be49ca2479af465926005be00 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 9 Nov 2020 15:20:48 -0800 Subject: [PATCH 070/220] Replace the usages of `array_ops.pack` with its TF public api. PiperOrigin-RevId: 341496338 Change-Id: If79be29825fbb67d53be04a37505661df108ac62 --- tensorflow/python/keras/tests/tracking_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/tests/tracking_test.py b/tensorflow/python/keras/tests/tracking_test.py index f3818190902..0e1384f52d8 100644 --- a/tensorflow/python/keras/tests/tracking_test.py +++ b/tensorflow/python/keras/tests/tracking_test.py @@ -34,6 +34,7 @@ from tensorflow.python.keras.layers import core from tensorflow.python.keras.layers import normalization from tensorflow.python.module import module from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -226,7 +227,7 @@ class ListTests(keras_parameterized.TestCase): self.assertAllEqual( [1., 2., 3.], - self.evaluate(array_ops.pack(ListToTensor().l))) + self.evaluate(gen_array_ops.Pack(values=ListToTensor().l))) class ListWrapperTest(test.TestCase): @@ -540,7 +541,7 @@ class TupleTests(keras_parameterized.TestCase): self.assertAllEqual( (1., 2., 3.), - self.evaluate(array_ops.pack(TupleToTensor().l))) + self.evaluate(gen_array_ops.Pack(values=TupleToTensor().l))) class InterfaceTests(keras_parameterized.TestCase): From e6ffdb7c6c208187ec63dff375502f01054887d6 Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Mon, 9 Nov 2020 15:23:30 -0800 Subject: [PATCH 071/220] Fix quantize kernel to prepare quantization for int16->int32 requant. Previously the kernel would run but quantized multiplier and shift were not generated properly during Prepare. PiperOrigin-RevId: 341496881 Change-Id: Id9d2534b09c91b8353e4364bfdb1af5ef3a81f82 --- tensorflow/lite/micro/kernels/quantize.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc index f6d8c927949..8b9bf7e5fb1 100644 --- a/tensorflow/lite/micro/kernels/quantize.cc +++ b/tensorflow/lite/micro/kernels/quantize.cc @@ -70,9 +70,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output->type == kTfLiteInt16 || output->type == kTfLiteInt32); - if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) && - output->type == kTfLiteInt8) || - (input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) { + if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) || + (input->type == kTfLiteInt8 && output->type == kTfLiteInt8) || + (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) || + (input->type == kTfLiteInt16 && output->type == kTfLiteInt32)) { double effective_scale = static_cast(input->params.scale) / static_cast(output->params.scale); From ffc145f4cc54455115e7f3b4b7cf88a3142dac5c Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 9 Nov 2020 15:29:15 -0800 Subject: [PATCH 072/220] Add CreateEventMetadata and CreateStatMetadata to XPlaneBuilder Modify the constructor to not populate by_name maps with empty names. PiperOrigin-RevId: 341497976 Change-Id: I4c1d881db5c293f38575b5ec591762c0d85bae04 --- .../core/profiler/utils/xplane_builder.cc | 34 +++++++++++++------ .../core/profiler/utils/xplane_builder.h | 8 +++++ 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/profiler/utils/xplane_builder.cc b/tensorflow/core/profiler/utils/xplane_builder.cc index 480b6b7a9ef..20408966504 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.cc +++ b/tensorflow/core/profiler/utils/xplane_builder.cc @@ -29,15 +29,21 @@ namespace profiler { XPlaneBuilder::XPlaneBuilder(XPlane* plane) : XStatsBuilder(plane, this), plane_(plane) { - for (auto& iter : *plane->mutable_event_metadata()) { + for (auto& id_and_metadata : *plane->mutable_event_metadata()) { + auto& metadata = id_and_metadata.second; last_event_metadata_id_ = - std::max(last_event_metadata_id_, iter.second.id()); - event_metadata_by_name_.try_emplace(iter.second.name(), &iter.second); + std::max(last_event_metadata_id_, metadata.id()); + if (!metadata.name().empty()) { + event_metadata_by_name_.try_emplace(metadata.name(), &metadata); + } } - for (auto& iter : *plane->mutable_stat_metadata()) { + for (auto& id_and_metadata : *plane->mutable_stat_metadata()) { + auto& metadata = id_and_metadata.second; last_stat_metadata_id_ = - std::max(last_stat_metadata_id_, iter.second.id()); - stat_metadata_by_name_.try_emplace(iter.second.name(), &iter.second); + std::max(last_stat_metadata_id_, metadata.id()); + if (!metadata.name().empty()) { + stat_metadata_by_name_.try_emplace(metadata.name(), &metadata); + } } for (XLine& line : *plane->mutable_lines()) { lines_by_id_.try_emplace(line.id(), &line); @@ -50,11 +56,15 @@ XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata(int64 metadata_id) { return &metadata; } +XEventMetadata* XPlaneBuilder::CreateEventMetadata() { + return GetOrCreateEventMetadata(++last_event_metadata_id_); +} + XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata( absl::string_view name) { XEventMetadata*& metadata = event_metadata_by_name_[name]; if (metadata == nullptr) { - metadata = GetOrCreateEventMetadata(++last_event_metadata_id_); + metadata = CreateEventMetadata(); metadata->set_name(std::string(name)); } return metadata; @@ -63,7 +73,7 @@ XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata( XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata(std::string&& name) { XEventMetadata*& metadata = event_metadata_by_name_[name]; if (metadata == nullptr) { - metadata = GetOrCreateEventMetadata(++last_event_metadata_id_); + metadata = CreateEventMetadata(); metadata->set_name(std::move(name)); } return metadata; @@ -75,10 +85,14 @@ XStatMetadata* XPlaneBuilder::GetOrCreateStatMetadata(int64 metadata_id) { return &metadata; } +XStatMetadata* XPlaneBuilder::CreateStatMetadata() { + return GetOrCreateStatMetadata(++last_stat_metadata_id_); +} + XStatMetadata* XPlaneBuilder::GetOrCreateStatMetadata(absl::string_view name) { XStatMetadata*& metadata = stat_metadata_by_name_[name]; if (metadata == nullptr) { - metadata = GetOrCreateStatMetadata(++last_stat_metadata_id_); + metadata = CreateStatMetadata(); metadata->set_name(std::string(name)); } return metadata; @@ -87,7 +101,7 @@ XStatMetadata* XPlaneBuilder::GetOrCreateStatMetadata(absl::string_view name) { XStatMetadata* XPlaneBuilder::GetOrCreateStatMetadata(std::string&& name) { XStatMetadata*& metadata = stat_metadata_by_name_[name]; if (metadata == nullptr) { - metadata = GetOrCreateStatMetadata(++last_stat_metadata_id_); + metadata = CreateStatMetadata(); metadata->set_name(std::move(name)); } return metadata; diff --git a/tensorflow/core/profiler/utils/xplane_builder.h b/tensorflow/core/profiler/utils/xplane_builder.h index df4c3023df0..505340e445a 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.h +++ b/tensorflow/core/profiler/utils/xplane_builder.h @@ -279,6 +279,10 @@ class XPlaneBuilder : public XStatsBuilder { // id was unused, otherwise the builder will add events to an existing line. XLineBuilder GetOrCreateLine(int64 line_id); + // Returns a new event metadata with an automatically generated metadata_id. + // WARNING: If calling this function, don't call GetOrCreateEventMetadata. + XEventMetadata* CreateEventMetadata(); + // Returns event metadata with the given id. Creates a new metadata if the id // was unused. // WARNING: If calling this function, don't call the string overloads below @@ -296,6 +300,10 @@ class XPlaneBuilder : public XStatsBuilder { return GetOrCreateEventMetadata(absl::string_view(name)); } + // Returns a new stat metadata with an automatically generated metadata_id. + // WARNING: If calling this function, don't call GetOrCreateEventMetadata. + XStatMetadata* CreateStatMetadata(); + // Returns stat metadata with the given id. Creates a new metadata if the id // was unused. // WARNING: If calling this function, don't call the string overloads below From 47388e6e562b3589950ec2ae4a5a5e9a5563b916 Mon Sep 17 00:00:00 2001 From: Luca Versari Date: Mon, 9 Nov 2020 15:32:45 -0800 Subject: [PATCH 073/220] Change hash function for Compress. ((a*b)>>18) & mask has higher throughput than (a*b)>>shift, and produces the same results when the hash table size is 2**14. In other cases, the hash function is still good, but it's not as necessary for that to be the case as the input is small anyway. This speeds up in encoding, especially in cases where hashing is a significant part of the encoding critical path (small or uncompressible files). PiperOrigin-RevId: 341498741 Change-Id: I359cbf5a38f680dd16fe5828fab20aae291915ef --- tensorflow/core/lib/io/snappy/snappy_test.cc | 47 ++++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/tensorflow/core/lib/io/snappy/snappy_test.cc b/tensorflow/core/lib/io/snappy/snappy_test.cc index b7d5eae6cc5..a391b820bbb 100644 --- a/tensorflow/core/lib/io/snappy/snappy_test.cc +++ b/tensorflow/core/lib/io/snappy/snappy_test.cc @@ -22,10 +22,13 @@ limitations under the License. namespace tensorflow { -// The current implementation of snappy compresses the below block to 619 bytes. -// We use this to validate the error messages. Please change this number if -// a new snappy implementation compresses to a different size. -const int COMPRESSED_RECORD_SIZE = 619; +static void CheckPrefixSuffix(const string& str, const string& prefix, + const string& suffix) { + CHECK_GE(str.size(), prefix.size()); + CHECK_GE(str.size(), suffix.size()); + CHECK_EQ(str.substr(0, prefix.length()), prefix); + CHECK_EQ(str.substr(str.length() - suffix.length()), suffix); +} static string GetRecord() { static const string lorem_ipsum = @@ -315,10 +318,12 @@ TEST(SnappyBuffers, SmallUncompressInputBuffer) { fprintf(stderr, "skipping compression tests\n"); return; } - CHECK_EQ(TestMultipleWrites(10000, 10000, 10, 10000, 2, true), - errors::ResourceExhausted("Input buffer(size: 10 bytes) too small. ", - "Should be larger than ", - COMPRESSED_RECORD_SIZE, " bytes.")); + Status status = TestMultipleWrites(10000, 10000, 10, 10000, 2, true); + CHECK_EQ(status.code(), error::Code::RESOURCE_EXHAUSTED); + CheckPrefixSuffix( + status.error_message(), + "Input buffer(size: 10 bytes) too small. Should be larger than ", + " bytes."); } TEST(SnappyBuffers, SmallUncompressInputStream) { @@ -337,9 +342,11 @@ TEST(SnappyBuffers, CorruptBlock) { fprintf(stderr, "skipping compression tests\n"); return; } - CHECK_EQ(TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true), - errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE, - " bytes from file. ", "Possible data corruption.")); + Status status = + TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true); + CHECK_EQ(status.code(), error::Code::DATA_LOSS); + CheckPrefixSuffix(status.error_message(), "Failed to read ", + " bytes from file. Possible data corruption."); } TEST(SnappyBuffers, CorruptBlockInputStream) { @@ -347,10 +354,11 @@ TEST(SnappyBuffers, CorruptBlockInputStream) { fprintf(stderr, "skipping compression tests\n"); return; } - CHECK_EQ( - TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true), - errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE, - " bytes from file. ", "Possible data corruption.")); + Status status = + TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true); + CHECK_EQ(status.code(), error::Code::DATA_LOSS); + CheckPrefixSuffix(status.error_message(), "Failed to read ", + " bytes from file. Possible data corruption."); } TEST(SnappyBuffers, CorruptBlockLargeInputBuffer) { @@ -367,10 +375,11 @@ TEST(SnappyBuffers, CorruptBlockLargeInputStream) { fprintf(stderr, "skipping compression tests\n"); return; } - CHECK_EQ(TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2, true, 1, - true), - errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE, - " bytes from file. Possible data corruption.")); + Status status = TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2, + true, 1, true); + CHECK_EQ(status.code(), error::Code::DATA_LOSS); + CheckPrefixSuffix(status.error_message(), "Failed to read ", + " bytes from file. Possible data corruption."); } TEST(SnappyBuffers, Tell) { From 30b69242f8b64ebe7d30057dd6adcd519ea0ed9f Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Mon, 9 Nov 2020 15:33:53 -0800 Subject: [PATCH 074/220] Class to hold precomputed information about a TensorFlow Python API, to allow the API to be executed rapidly. PiperOrigin-RevId: 341499017 Change-Id: I875ea89efcd86a7fe9e2f8fcefab1cbd3aa2c0e9 --- tensorflow/python/BUILD | 84 +++ .../python/framework/python_api_info.cc | 508 ++++++++++++++++++ tensorflow/python/framework/python_api_info.h | 298 ++++++++++ .../python/framework/python_api_info_test.py | 254 +++++++++ .../framework/python_api_info_wrapper.cc | 75 +++ .../tools/def_file_filter/symbols_pybind.txt | 3 + 6 files changed, 1222 insertions(+) create mode 100644 tensorflow/python/framework/python_api_info.cc create mode 100644 tensorflow/python/framework/python_api_info.h create mode 100644 tensorflow/python/framework/python_api_info_test.py create mode 100644 tensorflow/python/framework/python_api_info_wrapper.cc diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 1435b6852a8..55480f5b95e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1371,6 +1371,7 @@ py_library( ":_pywrap_py_exception_registry", ":_pywrap_py_func", # TODO(b/142001480): remove once the bug is fixed. ":_pywrap_python_api_dispatcher", + ":_pywrap_python_api_info", ":_pywrap_python_op_gen", ":_pywrap_quantize_training", ":_pywrap_stacktrace_handler", @@ -1696,6 +1697,7 @@ cc_library( ":cpp_python_util", ":safe_pyobject_ptr", "//tensorflow/core:protos_all_cc", + "//third_party/python_runtime:headers", # buildcleaner: keep "@com_google_absl//absl/strings", ], ) @@ -1739,6 +1741,86 @@ tf_py_test( tags = ["no_pip"], ) +cc_library( + name = "python_api_info", + srcs = ["framework/python_api_info.cc"], + hdrs = ["framework/python_api_info.h"], + deps = [ + ":cpp_python_util", + ":op_def_util_cc", + ":python_tensor_converter", + ":safe_pyobject_ptr", + "//tensorflow/core:framework", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform:status", + "//tensorflow/python/eager:pywrap_tfe_lib", + "//third_party/python_runtime:headers", # buildcleaner: keep + "@com_google_absl//absl/strings", + ], +) + +# Note: this target is only used by python_api_info_test. +tf_python_pybind_extension( + name = "_pywrap_python_api_info", + srcs = ["framework/python_api_info_wrapper.cc"], + hdrs = [ + "framework/op_def_util.h", + "framework/python_api_info.h", + "framework/python_tensor_converter.h", + "lib/core/numpy.h", + "//tensorflow/c:headers", + "//tensorflow/c/eager:pywrap_required_hdrs", + "//tensorflow/c/experimental/ops:pywrap_required_hdrs", + "//tensorflow/core/common_runtime/eager:pywrap_required_hdrs", + "//tensorflow/core/distributed_runtime:pywrap_required_hdrs", + "//tensorflow/core/distributed_runtime/eager:pywrap_required_hdrs", + "//tensorflow/python/eager:pywrap_required_hdrs", + ], + module_name = "_pywrap_python_api_info", + deps = [ + ":safe_pyobject_ptr_required_hdrs", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + "@pybind11", + "//third_party/python_runtime:headers", # buildcleaner: keep + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:lib", + "//tensorflow/core:framework", + "//tensorflow/core/common_runtime:core_cpu_headers_lib", + "//tensorflow/core:lib_headers_for_pybind", + "//third_party/py/numpy:headers", + "//tensorflow/c:pywrap_required_hdrs", + "@com_google_absl//absl/types:span", + ] + if_static( + extra_deps = [ + "//tensorflow/core/protobuf:eager_service_proto_cc", + "//tensorflow/core/protobuf:master_proto_cc", + "//tensorflow/core/protobuf:worker_proto_cc", + ], + otherwise = [ + "//tensorflow/core/protobuf:eager_service_proto_cc_headers_only", + "//tensorflow/core/protobuf:master_proto_cc_headers_only", + "//tensorflow/core/protobuf:worker_proto_cc_headers_only", + ], + ), +) + +tf_py_test( + name = "python_api_info_test", + srcs = ["framework/python_api_info_test.py"], + python_version = "PY3", + tags = ["no_pip"], + deps = [ + ":_pywrap_python_api_info", + ":_pywrap_python_tensor_converter", + ":client_testlib", + ], +) + cc_library( name = "python_api_dispatcher", srcs = ["framework/python_api_dispatcher.cc"], @@ -6109,6 +6191,7 @@ pywrap_tensorflow_macro( ":pybind11_status", ":pybind11_proto", ":python_api_dispatcher", + ":python_api_info", ":python_op_gen", ":python_tensor_converter", ":safe_pyobject_ptr", @@ -6181,6 +6264,7 @@ filegroup( ":py_exception_registry", # py_exception_registry ":py_func_lib", # py_func ":python_api_dispatcher", # python_api_dispatcher + ":python_api_info", # python_api_info ":python_tensor_converter", # python_tensor_converter ":python_op_gen", # python_op_gen ":safe_ptr", # checkpoint_reader diff --git a/tensorflow/python/framework/python_api_info.cc b/tensorflow/python/framework/python_api_info.cc new file mode 100644 index 00000000000..7c93afe0757 --- /dev/null +++ b/tensorflow/python/framework/python_api_info.cc @@ -0,0 +1,508 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/python/framework/python_api_info.h" + +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/python/eager/pywrap_tensor.h" +#include "tensorflow/python/eager/pywrap_tfe.h" +#include "tensorflow/python/framework/op_def_util.h" +#include "tensorflow/python/lib/core/safe_pyobject_ptr.h" +#include "tensorflow/python/util/util.h" + +namespace tensorflow { + +#if PY_MAJOR_VERSION < 3 +// Python 2.x: +#define PY_STRING_CHECK(x) (PyString_Check(x) || PyUnicode_Check(x)) +#define PY_INT_AS_LONG(x) (PyInt_AsLong(x)) +#define PY_STRING_FROMSTRING(x) (PyString_FromString(x)) +#define PY_STRING_INTERN_FROM_STRING(x) (PyString_InternFromString(x)) +#define PY_STRING_AS_CSTR(x) (PyString_AsString(x)) +#else +// Python 3.x: +#define PY_STRING_CHECK(x) (PyBytes_Check(x) || PyUnicode_Check(x)) +#define PY_INT_AS_LONG(x) (PyLong_AsLong(x)) +#define PY_STRING_FROMSTRING(x) (PyUnicode_FromString(x)) +#define PY_STRING_INTERN_FROM_STRING(x) (PyUnicode_InternFromString(x)) +#define PY_STRING_AS_CSTR(x) (PyUnicode_AsUTF8AndSize((x), nullptr)) +#endif + +namespace { + +// Converts the given object to an interned Python string, and returns its +// data pointer. (This means we don't need to worry about ownership for +// this string.) +const char* InternPyString(const std::string& s) { + Safe_PyObjectPtr interned(PY_STRING_INTERN_FROM_STRING(s.c_str())); + return PY_STRING_AS_CSTR(interned.get()); +} + +template +void RemoveIf(UnaryPredicate p, std::vector* vec) { + vec->erase(std::remove_if(vec->begin(), vec->end(), p), vec->end()); +} + +struct DataTypeFormatter { + void operator()(std::string* out, DataType dtype) const { + out->append(DataType_Name(dtype)); + } +}; + +// Populates `param_names` and `defaults_tuple` based on the given OpDef. +void GetOpDefNamesAndDefaults(const tensorflow::OpDef& op_def, + std::vector& param_names, + Safe_PyObjectPtr& defaults_tuple) { + param_names.reserve(op_def.input_arg_size() + op_def.attr_size()); + std::set inferred_attrs; + + // Input parameters come first, in the order they occur in the OpDef. + for (const auto& input : op_def.input_arg()) { + param_names.push_back(input.name()); + if (!input.type_attr().empty()) { + inferred_attrs.insert(input.type_attr()); + } + if (!input.type_list_attr().empty()) { + inferred_attrs.insert(input.type_list_attr()); + } + if (!input.number_attr().empty()) { + inferred_attrs.insert(input.number_attr()); + } + } + + // Next come attribute params without defaults, followed by attributes with + // defaults (but inferred attributes are not included). + std::vector param_names_with_default; + std::vector defaults; + for (const auto& attr : op_def.attr()) { + if (inferred_attrs.count(attr.name()) == 0) { + if (attr.has_default_value()) { + param_names_with_default.push_back(attr.name()); + defaults.push_back(AttrValueToPyObject(attr.default_value())); + } else { + param_names.push_back(attr.name()); + } + } + } + param_names.insert(param_names.end(), param_names_with_default.begin(), + param_names_with_default.end()); + + // Finally, the 'name' parameter comes at the end, and its default value + // is the operation's name. + param_names.push_back("name"); + defaults.emplace_back(PY_STRING_FROMSTRING(op_def.name().c_str())); + + defaults_tuple.reset(PyTuple_New(defaults.size())); + for (int i = 0; i < defaults.size(); ++i) { + PyTuple_SET_ITEM(defaults_tuple.get(), i, defaults[i].release()); + } +} + +} // namespace + +PythonAPIInfo::PythonAPIInfo(const std::string& api_name) + : api_name_(InternPyString(api_name)) {} + +Status PythonAPIInfo::Initialize(const OpDef& op_def, + const std::vector param_names, + PyObject* defaults_tuple) { + // Intern the parameter names. + param_names_.reserve(param_names.size()); + for (const auto& param_name : param_names) { + param_names_.push_back(InternPyString(param_name)); + } + + Py_INCREF(defaults_tuple); + defaults_tuple_.reset(defaults_tuple); + + // Build an index to look up parameter index by name. (Does not include + // inferred attributes.) + std::map param_name_to_index; + for (int i = 0; i < param_names_.size(); ++i) { + param_name_to_index[param_names_[i]] = i; + } + + // Initialize each attribute & input parameter. + attributes_.reserve(op_def.attr_size()); + for (const auto& attr_def : op_def.attr()) { + TF_RETURN_IF_ERROR(InitializeAttribute(attr_def, param_name_to_index)); + } + + inputs_.reserve(op_def.input_arg_size()); + for (const auto& arg_def : op_def.input_arg()) { + TF_RETURN_IF_ERROR(InitializeInput(arg_def, param_name_to_index)); + } + + TF_RETURN_IF_ERROR(CheckParamNames()); + + // Filter out any unused entries from inputs_with_*_attrs_. + RemoveIf( + [](const InputsWithTypeAttr& input) { + return input.tensor_params.empty() && input.tensor_list_params.empty(); + }, + &inputs_with_type_attrs_); + RemoveIf( + [](const InputsWithTypeListAttr& input) { + return input.tensor_list_params.empty(); + }, + &inputs_with_type_list_attrs_); + RemoveIf( + [](const InputsWithNumberAttr& input) { + return input.tensor_list_params.empty(); + }, + &inputs_with_number_attrs_); + + return Status::OK(); +} + +Status PythonAPIInfo::CheckParamNames() const { + std::vector param_found(param_names_.size()); + for (const auto& attr : attributes_) { + if (attr.index != -1) { + param_found[attr.index] = true; + } + } + for (const auto& input : inputs_) { + param_found[input.index] = true; + } + + for (int i = 0; i < param_names_.size(); ++i) { + if (param_names_[i] == std::string("name")) { + continue; + } + if (!param_found[i]) { + return errors::InvalidArgument( + api_name_, ": missing specification for parameter ", param_names_[i]); + } + } + return Status::OK(); +} + +Status PythonAPIInfo::InitializeFromRegisteredOp(const std::string& op_name) { + const tensorflow::OpDef* op_def = nullptr; + TF_RETURN_IF_ERROR( + tensorflow::OpRegistry::Global()->LookUpOpDef(op_name, &op_def)); + std::vector param_names; + Safe_PyObjectPtr defaults_tuple; + GetOpDefNamesAndDefaults(*op_def, param_names, defaults_tuple); + TF_RETURN_IF_ERROR(Initialize(*op_def, param_names, defaults_tuple.get())); + return Status::OK(); +} + +Status PythonAPIInfo::InitializeFromParamSpecs( + const std::map& input_specs, + const std::map& attr_specs, + const std::vector param_names, PyObject* defaults_tuple) { + OpDefBuilder op_def_builder(api_name_); + op_def_builder.AllowAttrTypeAny(); + for (const auto& attr_spec : attr_specs) { + op_def_builder.Attr(absl::StrCat(attr_spec.first, ": ", attr_spec.second)); + } + for (const auto& input_spec : input_specs) { + op_def_builder.Input( + absl::StrCat(input_spec.first, ": ", input_spec.second)); + } + OpRegistrationData op_reg_data; + TF_RETURN_IF_ERROR(op_def_builder.Finalize(&op_reg_data)); + + TF_RETURN_IF_ERROR( + Initialize(op_reg_data.op_def, param_names, defaults_tuple)); + + return Status::OK(); +} + +Status PythonAPIInfo::InitializeAttribute( + const OpDef::AttrDef& attr_def, + const std::map& param_name_to_index) { + if (attr_def.name() == "name") { + return errors::InvalidArgument( + api_name_, ": Reserved parameter `name` was used as an attribute."); + } + const char* name = InternPyString(attr_def.name()); + + const int param_index = + gtl::FindWithDefault(param_name_to_index, attr_def.name(), -1); + const AttributeType dtype = AttributeTypeFromName(attr_def.type()); + const int inferred_index = -1; + attributes_.push_back({param_index, dtype, name, inferred_index}); + Attribute& attr = attributes_.back(); + if (attr.type == AttributeType::UNKNOWN) { + return errors::InvalidArgument(api_name_, ": Bad attribute type for ", + attr_def.name(), ": '", attr_def.type(), + "'"); + } + std::vector* ok_dtypes = nullptr; + + if (attr.type == AttributeType::DTYPE) { + DataType default_dtype = attr_def.has_default_value() + ? attr_def.default_value().type() + : DT_INVALID; + inputs_with_type_attrs_.push_back({&attr, default_dtype}); + ok_dtypes = &inputs_with_type_attrs_.back().ok_dtypes; + + } else if (attr.type == AttributeType::LIST_DTYPE) { + inputs_with_type_list_attrs_.push_back({&attr}); + for (int d : attr_def.default_value().list().type()) { + inputs_with_type_list_attrs_.back().default_dtypes.push_back( + static_cast(d)); + } + ok_dtypes = &inputs_with_type_list_attrs_.back().ok_dtypes; + } + + if (attr_def.has_allowed_values() && ok_dtypes) { + const auto& dtypes = attr_def.allowed_values().list(); + for (int i = 0; i < dtypes.type_size(); ++i) { + ok_dtypes->push_back(dtypes.type(i)); + } + } + + if (attr.type == AttributeType::INT) { + int64 default_len = + attr_def.has_default_value() ? attr_def.default_value().i() : -1; + inputs_with_number_attrs_.push_back({&attr, default_len}); + } + + // If this is an inferred attribute, then record its name and index. + if (attr.index == -1) { + std::vector* inferred_attr_names = + attr.type == AttributeType::DTYPE ? &inferred_type_attrs_ + : attr.type == AttributeType::LIST_DTYPE ? &inferred_type_list_attrs_ + : attr.type == AttributeType::INT ? &inferred_length_attrs_ + : nullptr; + if (inferred_attr_names == nullptr) { + return errors::InvalidArgument( + api_name_, ": Missing specification for parameter ", attr_def.name()); + } else { + attr.inferred_index = inferred_attr_names->size(); + inferred_attr_names->push_back(attr.name); + } + } + + return Status::OK(); +} + +Status PythonAPIInfo::InitializeInput( + const OpDef::ArgDef& arg_def, + const std::map& param_name_to_index) { + if (arg_def.name() == "name") { + return errors::InvalidArgument( + api_name_, ": Reserved parameter `name` was used as a tensor input."); + } + const ParamIndex param_index = + gtl::FindWithDefault(param_name_to_index, arg_def.name(), -1); + if (param_index == -1) { + return errors::InvalidArgument( + api_name_, ": Missing specification for parameter ", arg_def.name()); + } + if (arg_def.is_ref()) { + // TODO(b/164980194): Support reference parameters. + // - Pass as_ref to convert_to_tensor + // - Check that values for ref inputs have ref types. + return errors::InvalidArgument(api_name_, + ": PythonAPIInfo doesn't support reference " + "parameters yet."); + } + bool is_list = + !arg_def.number_attr().empty() || !arg_def.type_list_attr().empty(); + inputs_.push_back({param_index, is_list}); + + if (!arg_def.type_list_attr().empty()) { + // list(input) with dtypes specified by a `list(type)` attribute. + InputsWithTypeListAttr* input = + FindInputsWithTypeListAttr(arg_def.type_list_attr()); + if (!input) { + return errors::InvalidArgument( + api_name_, ": Type attribute ", arg_def.type_list_attr(), + " for parameter ", arg_def.name(), " not found."); + } + input->tensor_list_params.push_back(param_index); + } else if (!arg_def.type_attr().empty()) { + InputsWithTypeAttr* input = FindInputsWithTypeAttr(arg_def.type_attr()); + // input or list(input) with dtype specified by a `type` attribute. + if (!input) { + return errors::InvalidArgument(api_name_, ": Type attribute ", + arg_def.type_attr(), " for parameter ", + arg_def.name(), " not found."); + } + if (arg_def.number_attr().empty()) { + input->tensor_params.push_back(param_index); + } else { + input->tensor_list_params.push_back(param_index); + } + } else { + // input or list(input) with fixed dtype + inputs_with_fixed_dtype_.push_back({arg_def.type(), param_index, is_list}); + } + + if (!arg_def.number_attr().empty()) { + InputsWithNumberAttr* input = + FindInputsWithNumberAttr(arg_def.number_attr()); + if (!input) { + return errors::InvalidArgument(api_name_, ": Length attribute ", + arg_def.number_attr(), " for parameter ", + arg_def.name(), " not found."); + } + input->tensor_list_params.push_back(param_index); + } + + return Status::OK(); +} + +PythonAPIInfo::InputsWithTypeAttr* PythonAPIInfo::FindInputsWithTypeAttr( + const string& name) { + for (auto& input : inputs_with_type_attrs_) { + if (name == input.type_attr->name) { + return &input; + } + } + return nullptr; +} + +PythonAPIInfo::InputsWithTypeListAttr* +PythonAPIInfo::FindInputsWithTypeListAttr(const string& name) { + for (auto& input : inputs_with_type_list_attrs_) { + if (name == input.type_list_attr->name) { + return &input; + } + } + return nullptr; +} + +PythonAPIInfo::InputsWithNumberAttr* PythonAPIInfo::FindInputsWithNumberAttr( + const string& name) { + for (auto& input : inputs_with_number_attrs_) { + if (name == input.number_attr->name) { + return &input; + } + } + return nullptr; +} + +string PythonAPIInfo::DebugInfo() const { + string s = absl::StrCat("DebugInfo for ", api_name_, ":\n"); + absl::StrAppend(&s, " param_names=[", absl::StrJoin(param_names_, ", "), + "]\n"); + Safe_PyObjectPtr defaults_repr(PyObject_Repr(defaults_tuple_.get())); + absl::StrAppend( + &s, " defaults_tuple=", TFE_GetPythonString(defaults_repr.get()), "\n"); + if (!attributes_.empty()) { + absl::StrAppend(&s, " attributes=["); + for (const auto& attrib : attributes_) { + if (attrib.index != -1) { + absl::StrAppend(&s, "\n {index=", attrib.index); + DCHECK_EQ(attrib.inferred_index, -1); + } else { + absl::StrAppend(&s, "\n {inferred_index=", attrib.inferred_index); + } + absl::StrAppend(&s, ", name=", attrib.name, + ", type=", AttributeTypeToName(attrib.type), "},"); + } + absl::StrAppend(&s, "]\n"); + } + if (!inputs_.empty()) { + absl::StrAppend(&s, " inputs=["); + for (const auto& input : inputs_) { + absl::StrAppend(&s, "\n {index=", input.index, + ", name=", param_names_[input.index], + ", is_list=", input.is_list, "},"); + } + absl::StrAppend(&s, "]\n"); + } + if (!inputs_with_fixed_dtype_.empty()) { + absl::StrAppend(&s, " inputs_with_fixed_dtype=["); + for (const auto& input : inputs_with_fixed_dtype_) { + absl::StrAppend(&s, "\n {index=", input.index, + ", dtype=", DataType_Name(input.dtype), + ", is_list=", input.is_list, "},"); + } + absl::StrAppend(&s, "]\n"); + } + if (!inputs_with_type_attrs_.empty()) { + absl::StrAppend(&s, " inputs_with_type_attr=["); + for (const auto& input : inputs_with_type_attrs_) { + absl::StrAppend(&s, "\n {type_attr=", input.type_attr->name); + if (input.default_dtype != DT_INVALID) { + absl::StrAppend(&s, + ", default_dtype=", DataType_Name(input.default_dtype)); + } + if (!input.tensor_params.empty()) { + absl::StrAppend(&s, ", tensor_params=[", + absl::StrJoin(input.tensor_params, ", "), "]"); + } + if (!input.tensor_list_params.empty()) { + absl::StrAppend(&s, ", tensor_list_params=[", + absl::StrJoin(input.tensor_list_params, ", "), "]"); + } + if (!input.ok_dtypes.empty()) { + absl::StrAppend( + &s, ", ok_dtypes=[", + absl::StrJoin(input.ok_dtypes, ", ", DataTypeFormatter()), "]"); + } + absl::StrAppend(&s, "},"); + } + absl::StrAppend(&s, "]\n"); + } + if (!inputs_with_type_list_attrs_.empty()) { + absl::StrAppend(&s, " inputs_with_type_list_attrs=["); + for (const auto& input : inputs_with_type_list_attrs_) { + absl::StrAppend(&s, "\n {type_list_attr=", input.type_list_attr->name); + if (!input.default_dtypes.empty()) { + absl::StrAppend( + &s, ", default_dtypes=[", + absl::StrJoin(input.default_dtypes, ", ", DataTypeFormatter()), + "]"); + } + if (!input.tensor_list_params.empty()) { + absl::StrAppend(&s, ", tensor_list_params=[", + absl::StrJoin(input.tensor_list_params, ", "), "]"); + } + if (!input.ok_dtypes.empty()) { + absl::StrAppend( + &s, ", ok_dtypes=[", + absl::StrJoin(input.ok_dtypes, ", ", DataTypeFormatter()), "]"); + } + absl::StrAppend(&s, "},"); + } + absl::StrAppend(&s, "]\n"); + } + if (!inputs_with_number_attrs_.empty()) { + absl::StrAppend(&s, " inputs_with_number_attrs=["); + for (const auto& input : inputs_with_number_attrs_) { + absl::StrAppend(&s, "\n {number_attr=", input.number_attr->name, + ", default_length=", input.default_length, + ", tensor_list_params=[", + absl::StrJoin(input.tensor_list_params, ", "), "],\n"); + } + absl::StrAppend(&s, "]\n"); + } + if (!inferred_type_attrs_.empty()) { + absl::StrAppend(&s, " inferred_type_attrs=[", + absl::StrJoin(inferred_type_attrs_, ", "), "]\n"); + } + if (!inferred_type_list_attrs_.empty()) { + absl::StrAppend(&s, " inferred_type_list_attrs=[", + absl::StrJoin(inferred_type_list_attrs_, ", "), "]\n"); + } + if (!inferred_length_attrs_.empty()) { + absl::StrAppend(&s, " inferred_length_attrs=[", + absl::StrJoin(inferred_length_attrs_, ", "), "]\n"); + } + return s; +} + +} // namespace tensorflow diff --git a/tensorflow/python/framework/python_api_info.h b/tensorflow/python/framework/python_api_info.h new file mode 100644 index 00000000000..4da710fbbd9 --- /dev/null +++ b/tensorflow/python/framework/python_api_info.h @@ -0,0 +1,298 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_PYTHON_UTIL_PYTHON_API_INFO_H_ +#define TENSORFLOW_PYTHON_UTIL_PYTHON_API_INFO_H_ + +#include + +#include +#include +#include + +#include "absl/types/span.h" +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/python/framework/op_def_util.h" +#include "tensorflow/python/framework/python_tensor_converter.h" +#include "tensorflow/python/lib/core/safe_pyobject_ptr.h" + +namespace tensorflow { + +// Precomputed information about a TensorFlow Python API. +// +// PythonAPIInfo records information about a single TensorFlow Python API, +// in order to allow calls to the API to be executed more efficiently. This +// information includes: +// +// * The name of the API. (E.g. "tf.math.add") +// +// * The name of the registered op that implements the API, if applicable +// (e.g. "AddV2"). +// +// * Information about the API's parameters. Parameters are divided into two +// "kinds": inputs and attributes. An *input* is a parameter that +// expects a Tensor or list of Tensors, and it is described by an `ArgDef`. +// An *attribute* is a parameter that expects any other value type, and it is +// described by an `AttrDef`. +// +// * Default values for the API's attribute parameters. +// +// * Information about "inferred attributes" -- attributes whose values are +// inferred from `input` parameters. There are two kinds of inferred +// attributes: Tensor dtypes, which are inferred from tensor and list(tensor) +// parameters; and list lengths, which are inferred from list(tensor) +// parameters. +class PythonAPIInfo { + public: + // The index of a parameter in the canonicalized parameter list. The + // canonicalized parameter list includes inputs and attributes (but does + // not include inferred attributes). `-1` is used for inferred attributes. + using ParamIndex = int; + + // Information about a parameter that expects a non-Tensor value. + struct Attribute { + ParamIndex index; // -1 if this is an inferred attribute + AttributeType type; + const char* name; // Interned python string + int inferred_index; // index to store attribute in InferredAttributes + }; + + // Information about a parameter that expects a Tensor or list(Tensor). + // Additional information about tensor parameters is stored in types + // defined below, in order to simplify dtype/length inference: + // * FixedDTypeInput: inputs with fixed dtypes. + // * InputsWithTypeAttr: groups inputs that use a type_attr for dtype. + // * InputsWithTypeListAttr: groups inputs that use a type_list_attr. + // * InputsWithNumberAttr: groups inputs by a number_attr for length. + struct Input { + ParamIndex index; + bool is_list; + }; + + // Information about a Tensor parameter w/ fixed dtype. + struct InputWithFixedDType { + DataType dtype; + ParamIndex index; + bool is_list; + }; + + // Information about Tensor parameters whose DType is specified by a single + // `type_attr` attribute. + struct InputsWithTypeAttr { + Attribute* type_attr; // not owned. + DataType default_dtype; // DT_INVALID if no default. + std::vector tensor_params; // single-tensor inputs. + std::vector tensor_list_params; // list(tensor) inputs. + std::vector ok_dtypes; + }; + + // Information about Tensor parameters whose DType is specified by a single + // `type_list_attr` attribute. + struct InputsWithTypeListAttr { + Attribute* type_list_attr; // not owned. + std::vector default_dtypes; // empty if no default. + std::vector tensor_list_params; // list(tensor) inputs. + std::vector ok_dtypes; + }; + + // Information about Tensor-list parameters whose length is specified by a + // single `int` attribute. + struct InputsWithNumberAttr { + Attribute* number_attr; // not owned. + int64 default_length; // -1 for no default. + std::vector tensor_list_params; // list(tensor) inputs. + }; + + // Structure used to return inferred attribute values. + // * types[i] is the inferred value for inferred_type_attrs()[i] + // * type_lists[i] is the inferred value for inferred_type_list_attrs()[i] + // * lengths[i] is the inferred value for inferred_length_attrs()[i] + struct InferredAttributes { + std::vector types; + std::vector> type_lists; + std::vector lengths; + }; + + // Constructs a new PythonAPIInfo. + // + // Note: One of the `Initialize()` functions must be called before the + // `PythonAPIInfo` is used. + // + // Args: + // api_name: The fully-qualified name of the python API (e.g., tf.math.sum). + explicit PythonAPIInfo(const std::string& api_name); + + // Initializes this PythonAPIInfo. + // + // Args: + // op_def: Contains information about the parameters. + // param_names: The argument names for the python API, in canonical order. + // defaults_tuple: Tuple containing default values for the parameters, + // right-aligned with `param_names` -- i.e., `defaults[-i]` is the default + // for `param_names[-i]`. + Status Initialize(const OpDef& op_def, const std::vector param_names, + PyObject* defaults_tuple); + + // Initialize this PythonAPIInfo based on the registered OpDef for the given + // operation. + // + // Args: + // op_name: The registered name of the operation (e.g. "AddV2"). + Status InitializeFromRegisteredOp(const std::string& op_name); + + // Initializes this PythonAPIInfo based on a set of parameter specifications. + // + // Args: + // input_specs: Mapping from parameter name to specification string for + // each input (parameter that expects a tensor value). + // attr_specs: Mapping from parameter name to specification string for + // each attribute (parameter that expects a non-tensor value). + // param_names: The argument names for the python API, in canonical order. + // defaults_tuple: Tuple containing default values for the parameters, + // right-aligned with `param_names` -- i.e., `defaults[-i]` is the default + // for `param_names[-i]`. + // + // Note: the `name` parameter should not be included in `input_specs` or + // `attr_specs`. + Status InitializeFromParamSpecs( + const std::map& input_specs, + const std::map& attr_specs, + const std::vector param_names, PyObject* defaults_tuple); + + // The name of the API that is described by this PythonAPIInfo. + const char* api_name() const { return api_name_; } + + // The ordered names of the canononical parameters that this API expects. + const std::vector& param_names() const { return param_names_; } + + // A Python tuple containing the default values for parameters. This is + // right-aligned with `param_name` -- i.e., `defaults[-i]` is the default + // for `param_names[-i]`. + const PyObject* defaults_tuple() const { return defaults_tuple_.get(); } + + // Information about the attribute (non-tensor) parameters for this API. + const std::vector& attributes() const { return attributes_; } + + // Information about the input (tensor) parameters for this API. + const std::vector& inputs() const { return inputs_; } + const std::vector& inputs_with_fixed_dtype() const { + return inputs_with_fixed_dtype_; + } + const std::vector& inputs_with_type_attrs() const { + return inputs_with_type_attrs_; + } + const std::vector& inputs_with_type_list_attrs() + const { + return inputs_with_type_list_attrs_; + } + const std::vector& inputs_with_number_attrs() const { + return inputs_with_number_attrs_; + } + + // Names of inferred attributes. + const std::vector& inferred_type_attrs() const { + return inferred_type_attrs_; + } + const std::vector& inferred_type_list_attrs() const { + return inferred_type_list_attrs_; + } + const std::vector& inferred_length_attrs() const { + return inferred_length_attrs_; + } + + // Returns a string summarizing the internal state of this type converter. + string DebugInfo() const; + + private: + // Adds an entry to the attributes_ vector based on the given `AttrDef`. + // + // If `attr_def` describes a type attribute, then adds a value to + // inputs_with_type_attrs_ or inputs_with_type_list_attrs_ (to record any + // tensor inputs that use this dtype). + // + // If `attr_def` describes an int attribute, then adds a value to + // inputs_with_number_attrs_ (to record any tensor inputs that use this + // value as a list length). + Status InitializeAttribute( + const OpDef::AttrDef& attr_def, + const std::map& param_name_to_index); + + // Adds an entry to the inputs_ vector based on the given `ArgDef`. + // + // If `arg_def` has a fixed dtype, then adds a value to `fixed_dtype_inputs`. + // + // If `arg_def`'s dtype is described by a `type` attr, then updates the + // appropriate value in `inputs_with_type_attrs_` with information about the + // `arg_def`. + // + // If `arg_def`'s dtype is described by a `list(type)` attr, then updates the + // appropriate value in `inputs_with_type_list_attrs_` with information about + // the `arg_def`. + Status InitializeInput(const OpDef::ArgDef& arg_def, + const std::map& param_name_to_index); + + // Checks that the OpDef used to initialize this PythonAPIInfo + // had an AttrDef or ArgDef specification for each parameter. + Status CheckParamNames() const; + + // Searches inputs_with_type_attrs_ for an input with the given name. + InputsWithTypeAttr* FindInputsWithTypeAttr(const string& name); + + // Searches inputs_with_type_list_attrs_ for an input with the given name. + InputsWithTypeListAttr* FindInputsWithTypeListAttr(const string& name); + + // Searches inputs_with_type_list_attrs_ for an input with the given name. + InputsWithNumberAttr* FindInputsWithNumberAttr(const string& name); + + ABSL_MUST_USE_RESULT + bool InferLengthAttributes(const absl::Span params, + std::vector& inferred_length_attrs) const; + + // ========================================================================== + // Member Variables + // ========================================================================== + + // The name of the API that is described by this PythonAPIInfo. + // (Interned python string). + const char* api_name_; + + // The names of the parameters that this API expects. + // (Interned python strings.) + std::vector param_names_; + + // Tuple containing default values for the parameters, right-aligned with + // `param_names` -- i.e., `defaults[-i]` is the default for `param_names[-i]`. + Safe_PyObjectPtr defaults_tuple_; + + // Information about the non-tensor-valued parameters that this API expects. + std::vector attributes_; + + // Information about the tensor-valued parameters that this API expects. + std::vector inputs_; + std::vector inputs_with_fixed_dtype_; + std::vector inputs_with_type_attrs_; + std::vector inputs_with_type_list_attrs_; + std::vector inputs_with_number_attrs_; + + // Names of inferred attributes. (Interned python strings.) + std::vector inferred_type_attrs_; + std::vector inferred_type_list_attrs_; + std::vector inferred_length_attrs_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_PYTHON_UTIL_PYTHON_API_INFO_H_ diff --git a/tensorflow/python/framework/python_api_info_test.py b/tensorflow/python/framework/python_api_info_test.py new file mode 100644 index 00000000000..f8c9df1beaf --- /dev/null +++ b/tensorflow/python/framework/python_api_info_test.py @@ -0,0 +1,254 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.python.framework.python_api_info.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.python import _pywrap_python_api_info +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import test_util +from tensorflow.python.platform import googletest + +# pylint: disable=g-long-lambda + + +# Helper function to make expected output in examples more compact: +def Const(x): + return constant_op.constant(x) + + +@test_util.run_all_in_graph_and_eager_modes +class PythonAPIInfoTest(test_util.TensorFlowTestCase, parameterized.TestCase): + + def setUp(self): + context.ensure_initialized() + super(PythonAPIInfoTest, self).setUp() + + def makeConverterForGenOp(self, op_name): + """Returns a PythonAPIInfo for the given gen_op.""" + api_info = _pywrap_python_api_info.PythonAPIInfo(op_name) + api_info.InitializeFromRegisteredOp(op_name) + return api_info + + def makeConverterFromParamSpecs(self, + api_name, + param_names, + input_specs, + attr_specs, + defaults=()): + """Returns a PythonAPIInfo built from the given specs.""" + api_info = _pywrap_python_api_info.PythonAPIInfo(api_name) + api_info.InitializeFromParamSpecs(input_specs, attr_specs, param_names, + defaults) + return api_info + + # This test initializes a PythonAPIInfo from a registered + # op, and then uses DebugInfo() to check that the internal state is + # correct. + @parameterized.named_parameters([ + # An op whose inputs have fixed dtypes. + ("RegexFullMatch", "RegexFullMatch", "DebugInfo for RegexFullMatch:\n" + " param_names=[input, pattern, name]\n" + " defaults_tuple=('RegexFullMatch',)\n" + " inputs=[\n" + " {index=0, name=input, is_list=0},\n" + " {index=1, name=pattern, is_list=0},]\n" + " inputs_with_fixed_dtype=[\n" + " {index=0, dtype=DT_STRING, is_list=0},\n" + " {index=1, dtype=DT_STRING, is_list=0},]\n"), + # An op whose input has a variable dtype. + ("Abs", "Abs", "DebugInfo for Abs:\n" + " param_names=[x, name]\n" + " defaults_tuple=('Abs',)\n" + " attributes=[\n" + " {inferred_index=0, name=T, type=type},]\n" + " inputs=[\n" + " {index=0, name=x, is_list=0},]\n" + " inputs_with_type_attr=[\n" + " {type_attr=T, tensor_params=[0], ok_dtypes=[DT_BFLOAT16, DT_HALF, " + "DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64]},]\n" + " inferred_type_attrs=[T]\n"), + # An op with two inputs that have the same (variable) dtype. + ("AddV2", "AddV2", "DebugInfo for AddV2:\n" + " param_names=[x, y, name]\n" + " defaults_tuple=('AddV2',)\n" + " attributes=[\n" + " {inferred_index=0, name=T, type=type},]\n" + " inputs=[\n" + " {index=0, name=x, is_list=0},\n" + " {index=1, name=y, is_list=0},]\n" + " inputs_with_type_attr=[\n" + " {type_attr=T, tensor_params=[0, 1], ok_dtypes=[DT_BFLOAT16, " + "DT_HALF, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_UINT32, " + "DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128]},]\n" + " inferred_type_attrs=[T]\n"), + # An op with an int attribute. + ("GatherV2", "GatherV2", "DebugInfo for GatherV2:\n" + " param_names=[params, indices, axis, batch_dims, name]\n" + " defaults_tuple=(0, 'GatherV2')\n" + " attributes=[\n" + " {index=3, name=batch_dims, type=int},\n" + " {inferred_index=0, name=Tparams, type=type},\n" + " {inferred_index=1, name=Tindices, type=type},\n" + " {inferred_index=2, name=Taxis, type=type},]\n" + " inputs=[\n" + " {index=0, name=params, is_list=0},\n" + " {index=1, name=indices, is_list=0},\n" + " {index=2, name=axis, is_list=0},]\n" + " inputs_with_type_attr=[\n" + " {type_attr=Tparams, tensor_params=[0]},\n" + " {type_attr=Tindices, tensor_params=[1], " + "ok_dtypes=[DT_INT32, DT_INT64]},\n" + " {type_attr=Taxis, tensor_params=[2], " + "ok_dtypes=[DT_INT32, DT_INT64]},]\n" + " inferred_type_attrs=[Tparams, Tindices, Taxis]\n"), + # An op with default attrib values. + ("ReduceJoin", "ReduceJoin", "DebugInfo for ReduceJoin:\n" + " param_names=[inputs, reduction_indices, keep_dims, separator, name]\n" + " defaults_tuple=(False, '', 'ReduceJoin')\n" + " attributes=[\n" + " {index=2, name=keep_dims, type=bool},\n" + " {index=3, name=separator, type=string},]\n" + " inputs=[\n" + " {index=0, name=inputs, is_list=0},\n" + " {index=1, name=reduction_indices, is_list=0},]\n" + " inputs_with_fixed_dtype=[\n" + " {index=0, dtype=DT_STRING, is_list=0},\n" + " {index=1, dtype=DT_INT32, is_list=0},]\n"), + # An op with a variable-dtype list input, and an int attribute. + ("ParseExampleV2", "ParseExampleV2", "DebugInfo for ParseExampleV2:\n" + " param_names=[serialized, names, sparse_keys, dense_keys, " + "ragged_keys, dense_defaults, num_sparse, sparse_types, " + "ragged_value_types, ragged_split_types, dense_shapes, name]\n" + " defaults_tuple=('ParseExampleV2',)\n" + " attributes=[\n" + " {inferred_index=0, name=Tdense, type=list(type)},\n" + " {index=6, name=num_sparse, type=int},\n" + " {index=7, name=sparse_types, type=list(type)},\n" + " {index=8, name=ragged_value_types, type=list(type)},\n" + " {index=9, name=ragged_split_types, type=list(type)},\n" + " {index=10, name=dense_shapes, type=list(shape)},]\n" + " inputs=[\n" + " {index=0, name=serialized, is_list=0},\n" + " {index=1, name=names, is_list=0},\n" + " {index=2, name=sparse_keys, is_list=0},\n" + " {index=3, name=dense_keys, is_list=0},\n" + " {index=4, name=ragged_keys, is_list=0},\n" + " {index=5, name=dense_defaults, is_list=1},]\n" + " inputs_with_fixed_dtype=[\n" + " {index=0, dtype=DT_STRING, is_list=0},\n" + " {index=1, dtype=DT_STRING, is_list=0},\n" + " {index=2, dtype=DT_STRING, is_list=0},\n" + " {index=3, dtype=DT_STRING, is_list=0},\n" + " {index=4, dtype=DT_STRING, is_list=0},]\n" + " inputs_with_type_list_attrs=[\n" + " {type_list_attr=Tdense, tensor_list_params=[5], " + "ok_dtypes=[DT_FLOAT, DT_INT64, DT_STRING]},]\n" + " inferred_type_list_attrs=[Tdense]\n"), + # An op with a default dtype + ("BroadcastArgs", "BroadcastArgs", "DebugInfo for BroadcastArgs:\n" + " param_names=[s0, s1, name]\n" + " defaults_tuple=('BroadcastArgs',)\n" + " attributes=[\n" + " {inferred_index=0, name=T, type=type},]\n" + " inputs=[\n" + " {index=0, name=s0, is_list=0},\n" + " {index=1, name=s1, is_list=0},]\n" + " inputs_with_type_attr=[\n" + " {type_attr=T, default_dtype=DT_INT32, tensor_params=[0, 1], " + "ok_dtypes=[DT_INT32, DT_INT64]},]\n" + " inferred_type_attrs=[T]\n"), + ]) + def testInitializeFromRegisteredOp(self, op_name, debug_info): + api_info = self.makeConverterForGenOp(op_name) + self.assertEqual(api_info.DebugInfo().strip(), debug_info.strip()) + + # This test initializes a PythonAPIInfo from parameter specs, + # and then uses DebugInfo() to check that the internal state is correct. + @parameterized.named_parameters([ + ("NoParams", "NoParams", [], {}, {}, "DebugInfo for NoParams:\n" + " param_names=[]\n" + " defaults_tuple=()\n"), + ("OnlyNameParam", "OnlyNameParam", ["name"], {}, {}, + "DebugInfo for OnlyNameParam:\n" + " param_names=[name]\n" + " defaults_tuple=()\n"), + ("SomeBinaryOp", "SomeBinaryOp", ["x", "y"], dict(x="T", y="T"), + dict(T="type"), "DebugInfo for SomeBinaryOp:\n" + " param_names=[x, y]\n" + " defaults_tuple=()\n" + " attributes=[\n" + " {inferred_index=0, name=T, type=type},]\n" + " inputs=[\n" + " {index=0, name=x, is_list=0},\n" + " {index=1, name=y, is_list=0},]\n" + " inputs_with_type_attr=[\n" + " {type_attr=T, tensor_params=[0, 1]},]\n" + " inferred_type_attrs=[T]\n"), + ("AllAttributeTypes", "AllAttributeTypes", [ + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", + "o", "p" + ], {}, + dict( + a="any", + b="float", + c="int", + d="string", + e="bool", + f="type", + g="shape", + h="tensor", + i="list(any)", + j="list(float)", + k="list(int)", + l="list(string)", + m="list(bool)", + n="list(type)", + o="list(shape)", + p="list(tensor)"), "DebugInfo for AllAttributeTypes:\n" + " param_names=[a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p]\n" + " defaults_tuple=()\n" + " attributes=[\n" + " {index=0, name=a, type=any},\n" + " {index=1, name=b, type=float},\n" + " {index=2, name=c, type=int},\n" + " {index=3, name=d, type=string},\n" + " {index=4, name=e, type=bool},\n" + " {index=5, name=f, type=type},\n" + " {index=6, name=g, type=shape},\n" + " {index=7, name=h, type=tensor},\n" + " {index=8, name=i, type=list(any)},\n" + " {index=9, name=j, type=list(float)},\n" + " {index=10, name=k, type=list(int)},\n" + " {index=11, name=l, type=list(string)},\n" + " {index=12, name=m, type=list(bool)},\n" + " {index=13, name=n, type=list(type)},\n" + " {index=14, name=o, type=list(shape)},\n" + " {index=15, name=p, type=list(tensor)},]\n"), + ]) + def testInitializeFromParamSpecs(self, api_name, param_names, input_specs, + attr_specs, debug_info): + api_info = self.makeConverterFromParamSpecs(api_name, param_names, + input_specs, attr_specs) + self.assertEqual(api_info.DebugInfo().strip(), debug_info.strip()) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/python/framework/python_api_info_wrapper.cc b/tensorflow/python/framework/python_api_info_wrapper.cc new file mode 100644 index 00000000000..483e475401f --- /dev/null +++ b/tensorflow/python/framework/python_api_info_wrapper.cc @@ -0,0 +1,75 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Note: This library is only used by python_api_info_test. It +// is not meant to be used in other circumstances. + +#include "pybind11/pybind11.h" +#include "pybind11/pytypes.h" +#include "pybind11/stl.h" +#include "tensorflow/python/framework/python_api_info.h" + +namespace py = pybind11; + +namespace tensorflow { +namespace { + +void InitializeFromRegisteredOp(PythonAPIInfo* api_info, + const std::string& op_name) { + auto result = api_info->InitializeFromRegisteredOp(op_name); + if (!result.ok()) { + PyErr_SetString(PyExc_ValueError, result.ToString().c_str()); + throw py::error_already_set(); + } +} + +void InitializeFromParamSpecs( + PythonAPIInfo* api_info, + const std::map& input_specs, + const std::map& attr_specs, + const std::vector& param_names, py::handle defaults_tuple) { + auto result = api_info->InitializeFromParamSpecs( + input_specs, attr_specs, param_names, defaults_tuple.ptr()); + if (!result.ok()) { + PyErr_SetString(PyExc_ValueError, result.ToString().c_str()); + throw py::error_already_set(); + } +} + +std::string DebugInfo(PythonAPIInfo* api_info) { return api_info->DebugInfo(); } + +} // namespace +} // namespace tensorflow + +using PythonAPIInfo = tensorflow::PythonAPIInfo; +using InferredAttributes = tensorflow::PythonAPIInfo::InferredAttributes; + +PYBIND11_MODULE(_pywrap_python_api_info, m) { + py::class_(m, "PythonAPIInfo") + .def(py::init()) + .def("InitializeFromRegisteredOp", + &tensorflow::InitializeFromRegisteredOp) + .def("InitializeFromParamSpecs", &tensorflow::InitializeFromParamSpecs) + .def("DebugInfo", &tensorflow::DebugInfo) + .def("InferredTypeAttrs", + [](PythonAPIInfo* self) { return self->inferred_type_attrs(); }) + .def("InferredTypeListAttrs", + [](PythonAPIInfo* self) { return self->inferred_type_list_attrs(); }) + .def("InferredLengthAttrs", + [](PythonAPIInfo* self) { return self->inferred_length_attrs(); }); + py::class_(m, "InferredAttributes") + .def_readonly("types", &InferredAttributes::types) + .def_readonly("type_lists", &InferredAttributes::type_lists) + .def_readonly("lengths", &InferredAttributes::lengths); +} diff --git a/tensorflow/tools/def_file_filter/symbols_pybind.txt b/tensorflow/tools/def_file_filter/symbols_pybind.txt index ebe1427ba71..a6082788413 100644 --- a/tensorflow/tools/def_file_filter/symbols_pybind.txt +++ b/tensorflow/tools/def_file_filter/symbols_pybind.txt @@ -404,3 +404,6 @@ tensorflow::PythonAPIDispatcher [python_tensor_converter] # python_tensor_converter tensorflow::PythonTensorConverter + +[python_api_info] # python_api_info +tensorflow::PythonAPIInfo From db1293e8956cac48ada1b4a976470461bfa2a7e3 Mon Sep 17 00:00:00 2001 From: Monica Song Date: Mon, 9 Nov 2020 15:37:57 -0800 Subject: [PATCH 075/220] Update save_options.function_aliases docstring. PiperOrigin-RevId: 341499805 Change-Id: I70b6795d73a50fdf225f5b25bb5ad4ca70c6ab6b --- tensorflow/python/saved_model/save_options.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/saved_model/save_options.py b/tensorflow/python/saved_model/save_options.py index f6330848441..30795ac7929 100644 --- a/tensorflow/python/saved_model/save_options.py +++ b/tensorflow/python/saved_model/save_options.py @@ -126,26 +126,22 @@ class SaveOptions(object): by a single tf.function you can use the `function_aliases` argument to store a map from the alias name to all concrete function names. E.g. - ```python - class MyModel: - @tf.function - def func(): - ... - @tf.function - def serve(): - ... - func() + >>> class Adder(tf.Module): + ... @tf.function + ... def double(self, x): + ... return x + x + + >>> model = Adder() + >>> model.double.get_concrete_function( + ... tf.TensorSpec(shape=[], dtype=tf.float32, name="float_input")) + >>> model.double.get_concrete_function( + ... tf.TensorSpec(shape=[], dtype=tf.string, name="string_input")) + + >>> options = tf.saved_model.SaveOptions( + ... function_aliases={'double': model.double}) + >>> tf.saved_model.save(model, '/tmp/adder', options=options) - model = MyModel() - signatures = { - 'serving_default': model.serve.get_concrete_function(), - } - options = tf.saved_model.SaveOptions(function_aliases={ - 'my_func': func, - }) - tf.saved_model.save(model, export_dir, signatures, options) - ``` experimental_io_device: string. Applies in a distributed setting. Tensorflow device to use to access the filesystem. If `None` (default) then for each variable the filesystem is accessed from the CPU:0 device From b3d45cd17cda6d0df210c3d99ec86fbe6bf5301b Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Mon, 9 Nov 2020 15:38:16 -0800 Subject: [PATCH 076/220] [tf.data] Apply gradient descent method as default algorithm for autotuning optimization. PiperOrigin-RevId: 341499875 Change-Id: Ie2eab5ed5e85e0c9afac1fb5b612057e51bd0e12 --- .../core/kernels/data/optimize_dataset_op.cc | 4 +- .../kernel_tests/optimize_dataset_test.py | 38 +++++++++++++++++-- .../experimental/ops/optimization_options.py | 4 +- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index 15a035e808a..b3df18a53c7 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -84,6 +84,7 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // of the Borg jobs, the experiments will be randomly turned on. // clang-format off absl::flat_hash_map live_experiments = { + {"enable_gradient_descent", 100}, {"map_parallelization", 20} }; // clang-format on @@ -110,9 +111,6 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // The vector stores the graduated experiment names which will be turned on // for all input pipelines. - // - // Note some of the graduated experiments may be hard coded, so not listed - // below. // clang-format off std::vector graduated_experiments = {"disable_intra_op_parallelism"}; // clang-format on diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py index f731a714cab..ef22a5ba6f5 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py @@ -245,6 +245,38 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertDatasetProduces(dataset, expected_output=expected_output) + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(autotune=False, autotune_buffers=False) + + combinations.combine(autotune=True, autotune_buffers=False) + + combinations.combine(autotune=True, autotune_buffers=True), + combinations.combine(set_env=[False, True]))) + def testOptimizationEnableGradientDescent(self, autotune, autotune_buffers, + set_env): + if set_env: + os.environ["TF_DATA_EXPERIMENT_OPT_IN"] = "enable_gradient_descent" + os.environ["TF_JOB_NAME"] = "test_job" + + dataset = dataset_ops.Dataset.range(5) + dataset = dataset.prefetch(buffer_size=-1) + dataset = dataset.map(lambda x: x + 1, num_parallel_calls=2) + dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1) + dataset = dataset.prefetch(buffer_size=3) + dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1) + dataset = dataset.prefetch(buffer_size=1) + + options = dataset_ops.Options() + options.experimental_optimization.autotune = autotune + options.experimental_optimization.autotune_buffers = autotune_buffers + dataset = dataset.with_options(options) + + self.assertDatasetProduces(dataset, expected_output=list(range(3, 8))) + + if set_env: + del os.environ["TF_DATA_EXPERIMENT_OPT_IN"] + del os.environ["TF_JOB_NAME"] + @combinations.generate( combinations.times( test_base.default_test_combinations(), @@ -543,16 +575,14 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): if autotune_buffers is True: # pylint: disable=g-bool-id-comparison self.assertIn("autotune_buffer_sizes", graph_rewrites.enabled) self.assertIn("disable_prefetch_legacy_autotune", graph_rewrites.enabled) + self.assertEqual(algorithm, + optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT) else: self.assertNotIn("autotune_buffer_sizes", graph_rewrites.enabled) self.assertNotIn("disable_prefetch_legacy_autotune", graph_rewrites.enabled) - if autotune_buffers is False: # pylint: disable=g-bool-id-comparison self.assertEqual(algorithm, optimization_options._AutotuneAlgorithm.HILL_CLIMB) - else: - self.assertEqual(algorithm, - optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT) @combinations.generate( combinations.times( diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index a2d6c77cfb7..5c69855e15f 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -228,8 +228,8 @@ class OptimizationOptions(options.OptionsBase): # If autotune_buffers is enabled, we use the GRADIENT_DESCENT algorithm by # default, which is more performant for tuning heterogeneous parameters. algorithm = ( - _AutotuneAlgorithm.HILL_CLIMB if self.autotune_buffers is False # pylint: disable=g-bool-id-comparison - else _AutotuneAlgorithm.GRADIENT_DESCENT) + _AutotuneAlgorithm.GRADIENT_DESCENT + if self._autotune_buffers() else _AutotuneAlgorithm.HILL_CLIMB) cpu_budget = 0 # Indicates that all CPU cores should be used by default. ram_budget = 0 # Indicates that default value of RAM budget should be used. From af3f3f91112c9d71efa9d2ee42263b3d058b5137 Mon Sep 17 00:00:00 2001 From: Robert Suderman Date: Mon, 9 Nov 2020 15:57:42 -0800 Subject: [PATCH 077/220] Correct SpaceToBatch result shape inference Results of SpaceToBatch Tf-to-Tf lowering did not infer all result types. Updated pass to correct this. PiperOrigin-RevId: 341503610 Change-Id: Iac5e909556a8bf96886fa28c50f1af7da46c333c --- .../mlir/tensorflow/tests/lower_tf.mlir | 19 ++++++-- .../mlir/tensorflow/transforms/lower_tf.cc | 43 ++++++++++++++----- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index ff480ef4980..2de6c3c16d3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -231,11 +231,11 @@ func @fourdim_space_to_batch_nd(%input: tensor<3x5x7x10xf32>, %block_shape: tens // CHECK-DAG: [[PAD00:%.+]] = "tf.Const"() {value = dense<0> : tensor<1x2xi64>} // CHECK-DAG: [[ZERO_I32:%.+]] = "tf.Const"() {value = dense<0> : tensor} // CHECK-DAG: [[ZERO_I64:%.+]] = "tf.Const"() {value = dense<0> : tensor} - // CHECK-DAG: [[ONE_I64:%.+]] = "tf.Const"() {value = dense<1> : tensor} // CHECK-DAG: [[FULL_PADDINGS:%.+]] = "tf.ConcatV2"([[PAD00]], %arg2, [[PAD00]], [[ZERO_I64]]) // CHECK-DAG: [[PAD_DEFAULT:%.+]] = "tf.Const"() {value = dense<0.000000e+00> : tensor} // CHECK-DAG: [[PADDED:%.+]] = "tf.PadV2"(%arg0, [[FULL_PADDINGS]], [[PAD_DEFAULT]]) - // CHECK-DAG: [[PADDINGS_SUM:%.+]] = "tf.Sum"([[FULL_PADDINGS]], [[ONE_I64]]) + // CHECK-DAG: [[PADDINGS:%.+]]:2 = "tf.Unpack"([[FULL_PADDINGS]]) {axis = 1 : i64} + // CHECK-DAG: [[PADDINGS_SUM:%.+]] = "tf.Add"([[PADDINGS]]#0, [[PADDINGS]]#1) // CHECK-DAG: [[INPUT_SHAPE:%.+]] = "tf.Const"() {value = dense<[3, 5, 7, 10]> : tensor<4xi64>} // CHECK-DAG: [[PADDED_SHAPE:%.+]] = "tf.Add"([[PADDINGS_SUM]], [[INPUT_SHAPE]]) // CHECK-DAG: [[PADDED_SHAPE_SPLITS:%.+]]:4 = "tf.Split"([[ZERO_I32]], [[PADDED_SHAPE]]) @@ -256,14 +256,25 @@ func @fourdim_space_to_batch_nd(%input: tensor<3x5x7x10xf32>, %block_shape: tens } // Verify the result shape for the tf.PadV2 op. +// CHECK-LABEL: const_paddings_space_to_batch_nd func @const_paddings_space_to_batch_nd(%arg0: tensor<1x8x2xf32>) -> (tensor<3x5x2xf32>) { %0 = "tf.Const"() {value = dense<3> : tensor<1xi32>} : () -> tensor<1xi32> %1 = "tf.Const"() {value = dense<[[3, 4]]> : tensor<1x2xi32>} : () -> tensor<1x2xi32> - // CHECK: "tf.PadV2" - // CHECK-SAME: tensor<1x5x2xf32> + + // CHECK-DAG: [[VAL0:%.+]] = "tf.Const"() {value = dense<[3, 5, 2]> : tensor<3xi64>} + // CHECK-DAG: [[VAL1:%.+]] = "tf.Const"() {value = dense<[1, 5, 3, 2]> : tensor<4xi64>} + // CHECK-DAG: [[VAL2:%.+]] = "tf.Const"() {value = dense<{{\[\[}}0, 0], [3, 4], [0, 0{{\]\]}}> : tensor<3x2xi64>} + // CHECK-DAG: [[VAL3:%.+]] = "tf.Const"() {value = dense<0.000000e+00> : tensor} + // CHECK-DAG: [[VAL4:%.+]] = "tf.Const"() {value = dense<[2, 0, 1, 3]> : tensor<4xi64>} + // CHECK-DAG: [[VAL5:%.+]] = "tf.PadV2"(%arg0, [[VAL2]], [[VAL3]]) + // CHECK-SAME: tensor<1x15x2xf32> + // CHECK-DAG: [[VAL6:%.+]] = "tf.Reshape"([[VAL5]], [[VAL1]]) + // CHECK-DAG: [[VAL7:%.+]] = "tf.Transpose"([[VAL6]], [[VAL4]]) + // CHECK-DAG: [[VAL8:%.+]] = "tf.Reshape"([[VAL7]], [[VAL0]]) %2 = "tf.SpaceToBatchND"(%arg0, %0, %1) : (tensor<1x8x2xf32>, tensor<1xi32>, tensor<1x2xi32>) -> tensor<3x5x2xf32> + // CHECK: return [[VAL8]] return %2 : tensor<3x5x2xf32> } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc index 1b8f7d2f596..53a73ce89e2 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc @@ -26,6 +26,7 @@ limitations under the License. #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/core/util/tensor_format.h" @@ -805,8 +806,8 @@ class LowerSpaceToBatchNDOp : public RewritePattern { ConcatV2Op::getOperationName(), AddOp::getOperationName(), PadOp::getOperationName(), - SumOp::getOperationName(), SplitOp::getOperationName(), + UnpackOp::getOperationName(), DivOp::getOperationName(), MulOp::getOperationName(), ReshapeOp::getOperationName(), @@ -867,6 +868,7 @@ class LowerSpaceToBatchNDOp : public RewritePattern { // full_paddings won't be available as a constant for shape inference. ElementsAttr block_shape; ElementsAttr paddings; + llvm::SmallVector block_shape_ints; auto padded_shape = llvm::to_vector<4>(input_shape); if (matchPattern(op.block_shape(), m_Constant(&block_shape)) && matchPattern(op.paddings(), m_Constant(&paddings))) { @@ -876,13 +878,14 @@ class LowerSpaceToBatchNDOp : public RewritePattern { paddings.getValue({i, 1}).cast().getInt(); int64_t block_shape_i = block_shape.getValue({i}).cast().getInt(); - padded_shape[i + 1] = - (paddings_sum + padded_shape[i + 1]) / block_shape_i; + padded_shape[i + 1] = (paddings_sum + input_shape[i + 1]); + block_shape_ints.push_back(block_shape_i); } } else { for (int i = 0; i < block_rank; i++) { padded_shape[i + 1] = ShapedType::kDynamicSize; } + block_shape_ints.resize(block_shape_type.getNumElements(), -1); } auto padded_type = @@ -893,13 +896,13 @@ class LowerSpaceToBatchNDOp : public RewritePattern { auto paddings_sum_type = RankedTensorType::get({input_rank}, rewriter.getIntegerType(64)); - auto one_i64 = rewriter.create( - loc, GetScalarOfType(rewriter.getIntegerType(64), 1)); // paddings_sum = paddings[*,0] + paddings[*,1] - auto paddings_sum = - rewriter.create(loc, paddings_sum_type, full_paddings, one_i64); + auto paddings_split = rewriter.create( + loc, TypeRange({paddings_sum_type, paddings_sum_type}), full_paddings, + rewriter.getI64IntegerAttr(1)); + auto paddings_sum = rewriter.create(loc, paddings_split.getResult(0), + paddings_split.getResult(1)); - // input_shape_tensor = input.shape auto input_shape_tensor = rewriter.create( loc, DenseElementsAttr::get( @@ -928,25 +931,46 @@ class LowerSpaceToBatchNDOp : public RewritePattern { block_shape_i64) .output()); + SmallVector outer_shape_ints; SmallVector outer_shape_vals; for (int64_t i = 0; i < block_rank; ++i) { // TODO(b/157475606): Insert tf.Assert that the following division has // remainder 0. outer_shape_vals.push_back(rewriter.create( loc, padded_shape_splits[1 + i], block_shape_splits[i])); + + auto padded_shape_i = padded_shape[1 + i]; + auto block_shape_ints_i = block_shape_ints[i]; + + // Compute the outer_shape constant values to infer the reshape. + if (padded_shape_i == -1 || block_shape_ints_i == -1) { + outer_shape_ints.push_back(-1); + } else { + outer_shape_ints.push_back(padded_shape_i / block_shape_ints_i); + } } SmallVector reshaped_shape_vals{padded_shape_splits[0]}; + SmallVector reshaped_shape_ints{padded_shape[0]}; for (int64_t i = 0; i < block_rank; ++i) { reshaped_shape_vals.push_back(outer_shape_vals[i]); reshaped_shape_vals.push_back(block_shape_splits[i]); + + reshaped_shape_ints.push_back(outer_shape_ints[i]); + reshaped_shape_ints.push_back(block_shape_ints[i]); } for (int64_t i = 1 + block_rank; i < input_rank; ++i) { reshaped_shape_vals.push_back(padded_shape_splits[i]); + reshaped_shape_ints.push_back(padded_shape[i]); } auto reshaped_shape = ValuesToRank1( rewriter, loc, rewriter.getIntegerType(64), reshaped_shape_vals); + auto reshaped = rewriter.create( + loc, + RankedTensorType::get(reshaped_shape_ints, input_type.getElementType()), + padded, reshaped_shape); + SmallVector permutation_vals; for (int64_t i = 0; i < block_rank; ++i) { permutation_vals.push_back(2 + 2 * i); @@ -961,6 +985,7 @@ class LowerSpaceToBatchNDOp : public RewritePattern { auto permutation = rewriter.create( loc, GetI64ElementsAttr(permutation_vals, &rewriter)); + auto permuted = rewriter.create(loc, reshaped, permutation); auto output_batch = padded_shape_splits[0]; for (int64_t i = 0; i < block_rank; ++i) { output_batch = @@ -975,8 +1000,6 @@ class LowerSpaceToBatchNDOp : public RewritePattern { } auto output_shape = ValuesToRank1( rewriter, loc, rewriter.getIntegerType(64), output_shape_vals); - auto reshaped = rewriter.create(loc, padded, reshaped_shape); - auto permuted = rewriter.create(loc, reshaped, permutation); // Sometimes the result type is more specific than what the reshape builder // can infer. From 99118cceb86180920e939aca9ba4a960e4872d03 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 9 Nov 2020 16:45:08 -0800 Subject: [PATCH 078/220] Add examples + explanation to `tf.gather` Add example code for image, move image to the top. PiperOrigin-RevId: 341512111 Change-Id: Ieeee2acdf9ebc878187f6ca16c7c102544d1c0a6 --- tensorflow/python/ops/array_ops.py | 199 +++++++++++++++++++++++------ 1 file changed, 159 insertions(+), 40 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 843021ae046..284bc5147a3 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -4734,6 +4734,11 @@ def reverse_sequence_v2(input, @tf_export(v1=["gather"]) +@deprecation.deprecated_args(None, + ("The `validate_indices` argument has no effect. " + "Indices are always validated on CPU and never " + "validated on GPU."), + "validate_indices") @dispatch.add_dispatch_support def gather(params, indices, @@ -4743,62 +4748,176 @@ def gather(params, batch_dims=0): # pylint: disable=g-doc-args r"""Gather slices from params axis `axis` according to indices. - Gather slices from params axis `axis` according to `indices`. `indices` must - be an integer tensor of any dimension (usually 0-D or 1-D). + Gather slices from `params` axis `axis` according to `indices`. `indices` + must be an integer tensor of any dimension (often 1-D). - For 0-D (scalar) `indices`: + `Tensor.__getitem__` works for scalars, `tf.newaxis`, and + [python slices](https://numpy.org/doc/stable/reference/arrays.indexing.html#basic-slicing-and-indexing) - $$\begin{align*} - output[p_0, ..., p_{axis-1}, && &&& p_{axis + 1}, ..., p_{N-1}] = \\ - params[p_0, ..., p_{axis-1}, && indices, &&& p_{axis + 1}, ..., p_{N-1}] - \end{align*}$$ + `tf.gather` extends indexing to handle tensors of indices. - Where *N* = `ndims(params)`. + In the simplest case it's identical to scalar indexing: - For 1-D (vector) `indices` with `batch_dims=0`: + >>> params = tf.constant(['p0', 'p1', 'p2', 'p3', 'p4', 'p5']) + >>> params[3].numpy() + b'p3' + >>> tf.gather(params, 3).numpy() + b'p3' - $$\begin{align*} - output[p_0, ..., p_{axis-1}, && &i, &&p_{axis + 1}, ..., p_{N-1}] =\\ - params[p_0, ..., p_{axis-1}, && indices[&i], &&p_{axis + 1}, ..., p_{N-1}] - \end{align*}$$ + The most common case is to pass a single axis tensor of indices (this + can't be expressed as a python slice because the indices are not sequential): - In the general case, produces an output tensor where: - - $$\begin{align*} - output[p_0, &..., p_{axis-1}, & - &i_{B}, ..., i_{M-1}, & - p_{axis + 1}, &..., p_{N-1}] = \\ - params[p_0, &..., p_{axis-1}, & - indices[p_0, ..., p_{B-1}, &i_{B}, ..., i_{M-1}], & - p_{axis + 1}, &..., p_{N-1}] - \end{align*}$$ - - Where *N* = `ndims(params)`, *M* = `ndims(indices)`, and *B* = `batch_dims`. - Note that `params.shape[:batch_dims]` must be identical to - `indices.shape[:batch_dims]`. - - The shape of the output tensor is: - - > `output.shape = params.shape[:axis] + indices.shape[batch_dims:] + - > params.shape[axis + 1:]`. - - Note that on CPU, if an out of bound index is found, an error is returned. - On GPU, if an out of bound index is found, a 0 is stored in the corresponding - output value. - - See also `tf.gather_nd`. + >>> indices = [2, 0, 2, 5] + >>> tf.gather(params, indices).numpy() + array([b'p2', b'p0', b'p2', b'p5'], dtype=object)
+ The indices can have any shape. When the `params` has 1 axis, the + output shape is equal to the input shape: + + >>> tf.gather(params, [[2, 0], [2, 5]]).numpy() + array([[b'p2', b'p0'], + [b'p2', b'p5']], dtype=object) + + The `params` may also have any shape. `gather` can select slices + across any axis depending on the `axis` argument (which defaults to 0). + Below it is used to gather first rows, then columns from a matrix: + + >>> params = tf.constant([[0, 1.0, 2.0], + ... [10.0, 11.0, 12.0], + ... [20.0, 21.0, 22.0], + ... [30.0, 31.0, 32.0]]) + >>> tf.gather(params, indices=[3,1]).numpy() + array([[30., 31., 32.], + [10., 11., 12.]], dtype=float32) + >>> tf.gather(params, indices=[2,1], axis=1).numpy() + array([[ 2., 1.], + [12., 11.], + [22., 21.], + [32., 31.]], dtype=float32) + + More generally: The output shape has the same shape as the input, with the + indexed-axis replaced by the shape of the indices. + + >>> def result_shape(p_shape, i_shape, axis=0): + ... return p_shape[:axis] + i_shape + p_shape[axis+1:] + >>> + >>> result_shape([1, 2, 3], [], axis=1) + [1, 3] + >>> result_shape([1, 2, 3], [7], axis=1) + [1, 7, 3] + >>> result_shape([1, 2, 3], [7, 5], axis=1) + [1, 7, 5, 3] + + Here are some examples: + + >>> params.shape.as_list() + [4, 3] + >>> indices = tf.constant([[0, 2]]) + >>> tf.gather(params, indices=indices, axis=0).shape.as_list() + [1, 2, 3] + >>> tf.gather(params, indices=indices, axis=1).shape.as_list() + [4, 1, 2] + + >>> params = tf.random.normal(shape=(5, 6, 7, 8)) + >>> indices = tf.random.uniform(shape=(10, 11), maxval=7, dtype=tf.int32) + >>> result = tf.gather(params, indices, axis=2) + >>> result.shape.as_list() + [5, 6, 10, 11, 8] + + This is because each index takes a slice from `params`, and + places it at the corresponding location in the output. For the above example + + >>> # For any location in indices + >>> a, b = 0, 1 + >>> tf.reduce_all( + ... # the corresponding slice of the result + ... result[:, :, a, b, :] == + ... # is equal to the slice of `params` along `axis` at the index. + ... params[:, :, indices[a, b], :] + ... ).numpy() + True + + ### Batching: + + The `batch_dims` argument lets you gather different items from each element + of a batch. + + Using `batch_dims=1` is equivalent to having an outer loop over the first + axis of `params` and `indices`: + + >>> params = tf.constant([ + ... [0, 0, 1, 0, 2], + ... [3, 0, 0, 0, 4], + ... [0, 5, 0, 6, 0]]) + >>> indices = tf.constant([ + ... [2, 4], + ... [0, 4], + ... [1, 3]]) + + >>> tf.gather(params, indices, axis=1, batch_dims=1).numpy() + array([[1, 2], + [3, 4], + [5, 6]], dtype=int32) + + This is is equivalent to: + + >>> def manually_batched_gather(params, indices, axis): + ... batch_dims=1 + ... result = [] + ... for p,i in zip(params, indices): + ... r = tf.gather(p, i, axis=axis-batch_dims) + ... result.append(r) + ... return tf.stack(result) + >>> manually_batched_gather(params, indices, axis=1).numpy() + array([[1, 2], + [3, 4], + [5, 6]], dtype=int32) + + Higher values of `batch_dims` are equivalent to multiple nested loops over + the outer axes of `params` and `indices`. So the overall shape function is + + >>> def batched_result_shape(p_shape, i_shape, axis=0, batch_dims=0): + ... return p_shape[:axis] + i_shape[batch_dims:] + p_shape[axis+1:] + >>> + >>> batched_result_shape( + ... p_shape=params.shape.as_list(), + ... i_shape=indices.shape.as_list(), + ... axis=1, + ... batch_dims=1) + [3, 2] + + >>> tf.gather(params, indices, axis=1, batch_dims=1).shape.as_list() + [3, 2] + + See also: + + * `tf.Tensor.__getitem__`: The direct tensor index operation (`t[]`), handles + scalars and python-slices `tensor[..., 7, 1:-1]` + * `tf.scatter`: A collection of operations similar to `__setitem__` + (`t[i] = x`) + * `tf.gather_nd`: An operation similar to `tf.gather` but gathers across + multiple axis at once (it can gather elements of a matrix instead of rows + or columns) + * `tf.boolean_mask`, `tf.where`: Binary indexing. + * `tf.slice` and `tf.strided_slice`: For lower level access to the + implementation of `__getitem__`'s python-slice handling (`t[1:-1:2]) + Args: params: The `Tensor` from which to gather values. Must be at least rank `axis + 1`. indices: The index `Tensor`. Must be one of the following types: `int32`, - `int64`. Must be in range `[0, params.shape[axis])`. - validate_indices: Deprecated, does nothing. + `int64`. The values must be in range `[0, params.shape[axis])`. + validate_indices: Deprecated, does nothing. Indices are always validated on + CPU, never validated on GPU. + + Caution: On CPU, if an out of bound index is found, an error is raised. + On GPU, if an out of bound index is found, a 0 is stored in the + corresponding output value. axis: A `Tensor`. Must be one of the following types: `int32`, `int64`. The `axis` in `params` to gather `indices` from. Must be greater than or equal to `batch_dims`. Defaults to the first non-batch dimension. Supports From d99d8b10dc91d9f12a4d8af91a7deb6f217711cd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 16:46:30 -0800 Subject: [PATCH 079/220] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 341512329 Change-Id: I75cd65e79ffcc95f057d583d892843046c12d2b6 --- tensorflow/go/op/wrappers.go | 215 ++++++++++++++++++++++------------- 1 file changed, 139 insertions(+), 76 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index f8c4149e1ab..d65bd33f48f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -14570,6 +14570,21 @@ func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_ return op.Output(0) } +// Generate a glob pattern matching all sharded file names. +func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShardedFilespec", + Input: []tf.Input{ + basename, num_shards, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Saves the input tensors to disk. // // The size of `tensor_names` must match the number of tensors in `data`. `data[i]` @@ -22697,6 +22712,69 @@ func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { return op.Output(0) } +// RiscConvAttr is an optional argument to RiscConv. +type RiscConvAttr func(optionalAttr) + +// RiscConvDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func RiscConvDataFormat(value string) RiscConvAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// RiscConvDilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func RiscConvDilations(value []int64) RiscConvAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes a 2-D convolution given 4-D `input` and `filter` tensors. +// +// Arguments: +// input: A 4-D tensor. The dimension order is interpreted according to the value +// of `data_format`, see below for details. +// filter: A 4-D tensor of shape +// `[filter_height, filter_width, in_channels, out_channels]` +// strides: 1-D tensor of length 4. The stride of the sliding window for each +// dimension of `input`. The dimension order is determined by the value of +// `data_format`, see below for details. +// +// Returns A 4-D tensor. The dimension order is determined by the value of +// `data_format`, see below for details. +func RiscConv(scope *Scope, input tf.Output, filter tf.Output, strides []int64, optional ...RiscConvAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RiscConv", + Input: []tf.Input{ + input, filter, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes hyperbolic tangent of `x` element-wise. // // Given an input tensor, this function computes hyperbolic tangent of every @@ -29742,6 +29820,67 @@ func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf return op.Output(0) } +// RandomShuffleAttr is an optional argument to RandomShuffle. +type RandomShuffleAttr func(optionalAttr) + +// RandomShuffleSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomShuffleSeed(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomShuffleSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleSeed2(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Randomly shuffles a tensor along its first dimension. +// +// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped +// to one and only one `output[i]`. For example, a mapping that might occur for a +// 3x2 tensor is: +// +// ``` +// [[1, 2], [[5, 6], +// [3, 4], ==> [1, 2], +// [5, 6]] [3, 4]] +// ``` +// +// Arguments: +// value: The tensor to be shuffled. +// +// Returns A tensor of same shape and type as `value`, shuffled along its first +// dimension. +func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomShuffle", + Input: []tf.Input{ + value, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a dataset that takes a Bernoulli sample of the contents of another dataset. // // There is no transformation in the `tf.data` Python API for creating this dataset. @@ -36066,21 +36205,6 @@ func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes [ return op.Output(0), op.Output(1), op.Output(2) } -// Generate a glob pattern matching all sharded file names. -func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ShardedFilespec", - Input: []tf.Input{ - basename, num_shards, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Writes a scalar summary. // // Writes scalar `value` at `step` with `tag` using summary `writer`. @@ -37137,67 +37261,6 @@ func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf return op.Output(0), op.Output(1), op.Output(2) } -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) - -// RandomShuffleSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomShuffleSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Randomly shuffles a tensor along its first dimension. -// -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: -// -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` -// -// Arguments: -// value: The tensor to be shuffled. -// -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomShuffle", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Selects elements from `x` or `y`, depending on `condition`. // // The `x`, and `y` tensors must all have the same shape, and the From ef94af1f7c911f04c7602a5ea27909bb8ea1fd66 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Mon, 9 Nov 2020 17:03:37 -0800 Subject: [PATCH 080/220] Add the unit test for a fixed big. PiperOrigin-RevId: 341515086 Change-Id: Ia6ba3e17a44d8d619748006f414bb217bc703ba7 --- .../coordinator/cluster_coordinator_test.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py b/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py index 53f26d8736a..b38688ebaf5 100644 --- a/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py +++ b/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py @@ -658,6 +658,26 @@ class ClusterCoordinatorTest(TestCaseWithErrorReportingThread): 'only accepts a `tf.function` or a concrete function.'): self.coordinator.schedule(func, args=(1,)) + def testDatasetPartiallyCreatedOnCoordinator(self): + dataset = dataset_ops.DatasetV2.range(1, 10) + + @def_function.function + def input_fn(): + return dataset.shuffle(9) + + @def_function.function + def worker_fn(iterator): + x = next(iterator) + return x + + per_worker_dataset = self.coordinator.create_per_worker_dataset(input_fn) + self.coordinator.schedule(worker_fn, args=(iter(per_worker_dataset),)) + + with self.assertRaisesRegexp( + coordinator_lib.InputError, + 'error message is Failed copying input tensor from'): + self.coordinator.join() + class LimitedClosureQueueSizeBasicTest(ClusterCoordinatorTest): """Test basic functionality works with explicit maximum closure queue size. From 8ecbafd84c3ce33a32e77154b43d4719471ed05f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 9 Nov 2020 17:03:39 -0800 Subject: [PATCH 081/220] Create BUILD files and corresponding targets for `tensorflow/core/user_ops/BUILD`. PiperOrigin-RevId: 341515091 Change-Id: I931a075b29d9859e7ae72021b3f6e090ba7716f8 --- tensorflow/core/BUILD | 10 +++------- tensorflow/core/user_ops/BUILD | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 tensorflow/core/user_ops/BUILD diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 6b0c0b86b8c..35bdbce34aa 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -537,15 +537,11 @@ cc_library( ], ) -# One target for all user ops -cc_library( +# One target for all user ops. +alias( name = "user_ops_op_lib", - srcs = glob(["user_ops/**/*.cc"]), - copts = tf_copts(), - linkstatic = 1, + actual = "//tensorflow/core/user_ops:user_ops_op_lib", visibility = ["//visibility:public"], - deps = [":framework"], - alwayslink = 1, ) cc_library( diff --git a/tensorflow/core/user_ops/BUILD b/tensorflow/core/user_ops/BUILD new file mode 100644 index 00000000000..3f7d5096e42 --- /dev/null +++ b/tensorflow/core/user_ops/BUILD @@ -0,0 +1,28 @@ +# User ops. + +load( + "//tensorflow/core/platform:rules_cc.bzl", + "cc_library", +) +load( + "//tensorflow:tensorflow.bzl", + "tf_copts", +) + +package( + default_visibility = [ + "//tensorflow/core:__pkg__", + ], + licenses = ["notice"], # Apache 2.0 +) + +# One target for all user ops. +cc_library( + name = "user_ops_op_lib", + srcs = glob(["*.cc"]), + copts = tf_copts(), + linkstatic = 1, + visibility = ["//tensorflow/core:__pkg__"], + deps = ["//tensorflow/core:framework"], + alwayslink = 1, +) From 9def0ed7a4dc9c40d63424ed69baa2f54af1ff50 Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 9 Nov 2020 17:05:37 -0800 Subject: [PATCH 082/220] Add GetStatMetadataByType to XPlaneVisitor Remove GetStatMetadataId. Remove GetEventType(const XEvent&) and GetStatType(const XStat&) overloads. Remove unused event_type_map. Rename other maps according to value_by_key convention. Add XStatVisitor constructor for use in StatsOwner::GetStat avoiding two unnecessary lookups. PiperOrigin-RevId: 341515412 Change-Id: I47f26fbbf4ece88ed5a6560716d28eef7b03e885 --- .../core/profiler/utils/group_events.cc | 20 +++--- tensorflow/core/profiler/utils/group_events.h | 2 + .../core/profiler/utils/group_events_test.cc | 20 +++--- .../core/profiler/utils/xplane_utils.cc | 6 +- tensorflow/core/profiler/utils/xplane_utils.h | 2 +- .../core/profiler/utils/xplane_visitor.cc | 67 ++++++++++--------- .../core/profiler/utils/xplane_visitor.h | 46 +++++++------ 7 files changed, 88 insertions(+), 75 deletions(-) diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 7c32b3d0753..d5b44fb4510 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -403,11 +403,15 @@ std::string EventNode::GetGroupName() const { return name; } +XStat* EventNode::FindOrAddStatByType(int64 stat_type) { + const XStatMetadata* stat_metadata = plane_->GetStatMetadataByType(stat_type); + DCHECK(stat_metadata != nullptr); + return FindOrAddMutableStat(*stat_metadata, raw_event_); +} + void EventNode::SetGroupId(int64 group_id) { group_id_ = group_id; - FindOrAddMutableStat(*plane_->GetStatMetadataId(StatType::kGroupId), - raw_event_) - ->set_int64_value(group_id); + FindOrAddStatByType(StatType::kGroupId)->set_int64_value(group_id); } void EventNode::PropagateGroupId(int64 group_id, @@ -436,8 +440,7 @@ void EventNode::PropagateGroupId(int64 group_id, } void EventNode::AddStepName(absl::string_view step_name) { - FindOrAddMutableStat(*plane_->GetStatMetadataId(StatType::kStepName), - raw_event_) + FindOrAddStatByType(StatType::kStepName) ->set_str_value(step_name.data(), step_name.size()); } @@ -452,16 +455,13 @@ void EventNode::AddSelectedGroupIds( group_metadata.parents.end()); group_ids.insert(group_ids.end(), group_metadata.children.begin(), group_metadata.children.end()); - FindOrAddMutableStat(*plane_->GetStatMetadataId(StatType::kSelectedGroupIds), - raw_event_) + FindOrAddStatByType(StatType::kSelectedGroupIds) ->set_str_value( absl::StrCat("?selected_group_ids=", absl::StrJoin(group_ids, ","))); } void EventNode::SetIsEager(bool is_eager) { - FindOrAddMutableStat(*plane_->GetStatMetadataId(StatType::kIsEager), - raw_event_) - ->set_int64_value(is_eager ? 1 : 0); + FindOrAddStatByType(StatType::kIsEager)->set_int64_value(is_eager ? 1 : 0); } bool EventNode::IsEager() { diff --git a/tensorflow/core/profiler/utils/group_events.h b/tensorflow/core/profiler/utils/group_events.h index 57519f361a6..706a2cbf67d 100644 --- a/tensorflow/core/profiler/utils/group_events.h +++ b/tensorflow/core/profiler/utils/group_events.h @@ -126,6 +126,8 @@ class EventNode { bool StartsBefore(const EventNode& other) const; private: + XStat* FindOrAddStatByType(int64 stat_type); + const XPlaneVisitor* plane_; XEventVisitor visitor_; XLine* raw_line_; diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index 604485f03e5..2a72f017619 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -75,7 +75,7 @@ TEST(GroupEventsTest, GroupGpuTraceLegacyRootTest) { XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); EXPECT_EQ(device_plane_visitor.GetStatType( - device_plane->lines(0).events(0).stats(1)), + device_plane->lines(0).events(0).stats(1).metadata_id()), StatType::kGroupId); EXPECT_EQ(group_metadata_map.size(), 1); EXPECT_EQ(group_metadata_map.at(0).name, "train 123"); @@ -118,7 +118,7 @@ TEST(GroupEventsTest, GroupGpuTraceTest) { XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); EXPECT_EQ(device_plane_visitor.GetStatType( - device_plane->lines(0).events(0).stats(1)), + device_plane->lines(0).events(0).stats(1).metadata_id()), StatType::kGroupId); EXPECT_EQ(group_metadata_map.size(), 1); EXPECT_EQ(group_metadata_map.at(0).name, "train 123"); @@ -158,7 +158,7 @@ TEST(GroupEventsTest, GroupTensorFlowLoopTest) { XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); EXPECT_EQ(device_plane_visitor.GetStatType( - device_plane->lines(0).events(0).stats(1)), + device_plane->lines(0).events(0).stats(1).metadata_id()), StatType::kGroupId); EXPECT_EQ(device_plane->lines(0).events(0).stats(1).int64_value(), 10); EXPECT_EQ(group_metadata_map.size(), 1); @@ -289,14 +289,16 @@ TEST(GroupEventsTest, EagerOpTest) { XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); const XEvent& eager_cpu_tf_op = host_plane->lines(0).events(3); EXPECT_EQ(eager_cpu_tf_op.stats_size(), 1); - EXPECT_EQ(host_plane_visitor.GetStatType(eager_cpu_tf_op.stats(0)), - StatType::kIsEager); + EXPECT_EQ( + host_plane_visitor.GetStatType(eager_cpu_tf_op.stats(0).metadata_id()), + StatType::kIsEager); EXPECT_EQ(eager_cpu_tf_op.stats(0).int64_value(), 1); XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); const XEvent& eager_gpu_kernel = device_plane->lines(0).events(0); EXPECT_EQ(eager_gpu_kernel.stats_size(), 2); - EXPECT_EQ(device_plane_visitor.GetStatType(eager_gpu_kernel.stats(1)), - StatType::kIsEager); + EXPECT_EQ( + device_plane_visitor.GetStatType(eager_gpu_kernel.stats(1).metadata_id()), + StatType::kIsEager); EXPECT_EQ(eager_gpu_kernel.stats(1).int64_value(), 1); } @@ -341,13 +343,13 @@ TEST(GroupEventsTest, FunctionOpTest) { XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); const XEvent& cpu_tf_op = host_plane->lines(1).events(2); EXPECT_EQ(cpu_tf_op.stats_size(), 2); - EXPECT_EQ(host_plane_visitor.GetStatType(cpu_tf_op.stats(1)), + EXPECT_EQ(host_plane_visitor.GetStatType(cpu_tf_op.stats(1).metadata_id()), StatType::kIsEager); EXPECT_EQ(cpu_tf_op.stats(1).int64_value(), 0); XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); const XEvent& gpu_kernel = device_plane->lines(0).events(0); EXPECT_EQ(gpu_kernel.stats_size(), 3); - EXPECT_EQ(device_plane_visitor.GetStatType(gpu_kernel.stats(2)), + EXPECT_EQ(device_plane_visitor.GetStatType(gpu_kernel.stats(2).metadata_id()), StatType::kIsEager); EXPECT_EQ(gpu_kernel.stats(2).int64_value(), 0); } diff --git a/tensorflow/core/profiler/utils/xplane_utils.cc b/tensorflow/core/profiler/utils/xplane_utils.cc index 96cf4fc117c..e73b96806ad 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.cc +++ b/tensorflow/core/profiler/utils/xplane_utils.cc @@ -117,14 +117,14 @@ bool IsNested(const XEvent& event, const XEvent& parent) { return XEventTimespan(parent).Includes(XEventTimespan(event)); } -XStat* FindOrAddMutableStat(int64 metadata_id, XEvent* event) { +XStat* FindOrAddMutableStat(const XStatMetadata& stat_metadata, XEvent* event) { for (auto& stat : *event->mutable_stats()) { - if (stat.metadata_id() == metadata_id) { + if (stat.metadata_id() == stat_metadata.id()) { return &stat; } } XStat* stat = event->add_stats(); - stat->set_metadata_id(metadata_id); + stat->set_metadata_id(stat_metadata.id()); return stat; } diff --git a/tensorflow/core/profiler/utils/xplane_utils.h b/tensorflow/core/profiler/utils/xplane_utils.h index 2183c1151dc..286469c5a65 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.h +++ b/tensorflow/core/profiler/utils/xplane_utils.h @@ -44,7 +44,7 @@ std::vector FindMutablePlanesWithPrefix(XSpace* space, bool IsNested(const tensorflow::profiler::XEvent& event, const tensorflow::profiler::XEvent& parent); -XStat* FindOrAddMutableStat(int64 metadata_id, XEvent* event); +XStat* FindOrAddMutableStat(const XStatMetadata& stat_metadata, XEvent* event); void RemovePlane(XSpace* space, const XPlane* plane); diff --git a/tensorflow/core/profiler/utils/xplane_visitor.cc b/tensorflow/core/profiler/utils/xplane_visitor.cc index 626657a5c2d..a62614ec38c 100644 --- a/tensorflow/core/profiler/utils/xplane_visitor.cc +++ b/tensorflow/core/profiler/utils/xplane_visitor.cc @@ -29,10 +29,13 @@ namespace tensorflow { namespace profiler { XStatVisitor::XStatVisitor(const XPlaneVisitor* plane, const XStat* stat) - : stat_(stat), - metadata_(plane->GetStatMetadata(stat->metadata_id())), - plane_(plane), - type_(plane->GetStatType(stat->metadata_id())) {} + : XStatVisitor(plane, stat, plane->GetStatMetadata(stat->metadata_id()), + plane->GetStatType(stat->metadata_id())) {} + +XStatVisitor::XStatVisitor(const XPlaneVisitor* plane, const XStat* stat, + const XStatMetadata* metadata, + absl::optional type) + : stat_(stat), metadata_(metadata), plane_(plane), type_(type) {} std::string XStatVisitor::ToString() const { switch (stat_->value_case()) { @@ -93,15 +96,29 @@ void XPlaneVisitor::BuildEventTypeMap( for (const auto& event_type_getter : event_type_getter_list) { absl::optional event_type = event_type_getter(metadata.name()); if (event_type.has_value()) { - auto result = event_metadata_id_map_.emplace(metadata_id, *event_type); + auto result = event_type_by_id_.emplace(metadata_id, *event_type); DCHECK(result.second); // inserted - event_type_map_.emplace(*event_type, &metadata); break; } } } } +const XEventMetadata* XPlaneVisitor::GetEventMetadata( + int64 event_metadata_id) const { + const auto& event_metadata_by_id = plane_->event_metadata(); + const auto it = event_metadata_by_id.find(event_metadata_id); + if (it != event_metadata_by_id.end()) return &it->second; + return &XEventMetadata::default_instance(); +} + +absl::optional XPlaneVisitor::GetEventType( + int64 event_metadata_id) const { + const auto it = event_type_by_id_.find(event_metadata_id); + if (it != event_type_by_id_.end()) return it->second; + return absl::nullopt; +} + void XPlaneVisitor::BuildStatTypeMap( const XPlane* plane, const TypeGetterList& stat_type_getter_list) { for (const auto& stat_metadata : plane->stat_metadata()) { @@ -110,9 +127,9 @@ void XPlaneVisitor::BuildStatTypeMap( for (const auto& stat_type_getter : stat_type_getter_list) { absl::optional stat_type = stat_type_getter(metadata.name()); if (stat_type.has_value()) { - auto result = stat_metadata_id_map_.emplace(metadata_id, *stat_type); + auto result = stat_type_by_id_.emplace(metadata_id, *stat_type); DCHECK(result.second); // inserted - stat_type_map_.emplace(*stat_type, &metadata); + stat_metadata_by_type_.emplace(*stat_type, &metadata); break; } } @@ -121,37 +138,23 @@ void XPlaneVisitor::BuildStatTypeMap( const XStatMetadata* XPlaneVisitor::GetStatMetadata( int64 stat_metadata_id) const { - const auto& stat_metadata_map = plane_->stat_metadata(); - const auto it = stat_metadata_map.find(stat_metadata_id); - if (it != stat_metadata_map.end()) return &it->second; + const auto& stat_metadata_by_id = plane_->stat_metadata(); + const auto it = stat_metadata_by_id.find(stat_metadata_id); + if (it != stat_metadata_by_id.end()) return &it->second; return &XStatMetadata::default_instance(); } absl::optional XPlaneVisitor::GetStatType(int64 stat_metadata_id) const { - const auto it = stat_metadata_id_map_.find(stat_metadata_id); - if (it != stat_metadata_id_map_.end()) return it->second; + const auto it = stat_type_by_id_.find(stat_metadata_id); + if (it != stat_type_by_id_.end()) return it->second; return absl::nullopt; } -absl::optional XPlaneVisitor::GetStatMetadataId(int64 stat_type) const { - const auto it = stat_type_map_.find(stat_type); - if (it != stat_type_map_.end()) return it->second->id(); - return absl::nullopt; -} - -const XEventMetadata* XPlaneVisitor::GetEventMetadata( - int64 event_metadata_id) const { - const auto& event_metadata_map = plane_->event_metadata(); - const auto it = event_metadata_map.find(event_metadata_id); - if (it != event_metadata_map.end()) return &it->second; - return &XEventMetadata::default_instance(); -} - -absl::optional XPlaneVisitor::GetEventType( - int64 event_metadata_id) const { - const auto it = event_metadata_id_map_.find(event_metadata_id); - if (it != event_metadata_id_map_.end()) return it->second; - return absl::nullopt; +const XStatMetadata* XPlaneVisitor::GetStatMetadataByType( + int64 stat_type) const { + const auto it = stat_metadata_by_type_.find(stat_type); + if (it != stat_metadata_by_type_.end()) return it->second; + return nullptr; } } // namespace profiler diff --git a/tensorflow/core/profiler/utils/xplane_visitor.h b/tensorflow/core/profiler/utils/xplane_visitor.h index 93830c0852a..e7ac97f3098 100644 --- a/tensorflow/core/profiler/utils/xplane_visitor.h +++ b/tensorflow/core/profiler/utils/xplane_visitor.h @@ -39,6 +39,10 @@ class XStatVisitor { // REQUIRED: plane and stat cannot be nullptr. XStatVisitor(const XPlaneVisitor* plane, const XStat* stat); + // REQUIRED: plane, stat and metadata cannot be nullptr. + XStatVisitor(const XPlaneVisitor* plane, const XStat* stat, + const XStatMetadata* metadata, absl::optional type); + int64 Id() const { return stat_->metadata_id(); } absl::string_view Name() const { return metadata_->name(); } @@ -224,18 +228,21 @@ class XPlaneVisitor : public XStatsOwner { } } - // TODO(jiesun): use single map look up for both StatMetadata and StatType. - const XStatMetadata* GetStatMetadata(int64 stat_metadata_id) const; - absl::optional GetStatType(int64 stat_metadata_id) const; - absl::optional GetStatType(const XStat& stat) const { - return GetStatType(stat.metadata_id()); - } - absl::optional GetStatMetadataId(int64 stat_type) const; + // Returns event metadata given its id. Returns a default value if not found. const XEventMetadata* GetEventMetadata(int64 event_metadata_id) const; + + // Returns the type of an event given its id. absl::optional GetEventType(int64 event_metadata_id) const; - absl::optional GetEventType(const XEvent& event) const { - return GetEventType(event.metadata_id()); - } + + // Returns stat metadata given its id. Returns a default value if not found. + const XStatMetadata* GetStatMetadata(int64 stat_metadata_id) const; + + // Returns stat metadata given its type. Returns nullptr if not found. + // Use as an alternative to GetStatMetadata above. + const XStatMetadata* GetStatMetadataByType(int64 stat_type) const; + + // Returns the type of an stat given its id. + absl::optional GetStatType(int64 stat_metadata_id) const; private: void BuildEventTypeMap(const XPlane* plane, @@ -245,22 +252,21 @@ class XPlaneVisitor : public XStatsOwner { const XPlane* plane_; - absl::flat_hash_map - stat_metadata_id_map_; - absl::flat_hash_map stat_type_map_; absl::flat_hash_map - event_metadata_id_map_; - absl::flat_hash_map - event_type_map_; + event_type_by_id_; + absl::flat_hash_map + stat_type_by_id_; + absl::flat_hash_map + stat_metadata_by_type_; }; template absl::optional XStatsOwner::GetStat(int64 stat_type) const { - if (absl::optional stat_metadata_id = - metadata_->GetStatMetadataId(stat_type)) { + const auto* stat_metadata = metadata_->GetStatMetadataByType(stat_type); + if (stat_metadata != nullptr) { for (const XStat& stat : stats_owner_->stats()) { - if (stat.metadata_id() == *stat_metadata_id) { - return XStatVisitor(metadata_, &stat); + if (stat.metadata_id() == stat_metadata->id()) { + return XStatVisitor(metadata_, &stat, stat_metadata, stat_type); } } } From e54e804bc788488be0c49a4cad7f55c4f864d6f7 Mon Sep 17 00:00:00 2001 From: Fergus Henderson Date: Mon, 9 Nov 2020 17:16:14 -0800 Subject: [PATCH 083/220] (lite) Fix typo in build target name. PiperOrigin-RevId: 341516982 Change-Id: I9d0e5986db29b58762854b2dfc10f19e2e738fb2 --- tensorflow/lite/java/BUILD | 2 +- tensorflow/lite/testing/BUILD | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index 9bceb939c02..d7e1d747bd9 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -364,7 +364,7 @@ java_test( test_class = "org.tensorflow.lite.InterpreterCustomizedAndroidBuildTest", visibility = ["//visibility:private"], deps = [ - "//tensorflow/lite/testing:customtized_tflite_for_add_ops", + "//tensorflow/lite/testing:customized_tflite_for_add_ops", "@com_google_truth", "@junit", ], diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index 02cd86b61f0..74380bd9d4c 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -591,7 +591,7 @@ pybind_extension( tflite_portable_test_suite() tflite_custom_android_library( - name = "customtized_tflite_for_add_ops", + name = "customized_tflite_for_add_ops", models = ["//tensorflow/lite:testdata/add.bin"], visibility = ["//visibility:public"], ) From 647752a840de7322cad7f4c7094a2cdba2fd8f46 Mon Sep 17 00:00:00 2001 From: Marissa Ikonomidis Date: Mon, 9 Nov 2020 17:18:56 -0800 Subject: [PATCH 084/220] Pass tensorflow::Graph into GetMlirBridgeRolloutPolicy Enable selectively enabling the new bridge using properties of the graph. PiperOrigin-RevId: 341517370 Change-Id: I31a1b02641b28f578e96d37d6429861e173bdcd4 --- tensorflow/compiler/jit/BUILD | 2 +- .../compiler/jit/xla_compilation_cache.cc | 2 +- tensorflow/compiler/jit/xla_kernel_creator.cc | 28 ++++++++++--------- .../mlir/mlir_bridge_rollout_policy.cc | 2 +- .../mlir/mlir_bridge_rollout_policy.h | 2 ++ .../mlir/mlir_graph_optimization_pass.cc | 7 +++-- .../mlir/mlir_graph_optimization_pass.h | 10 ++++--- .../transforms/graph_optimization_pass.cc | 3 +- .../transforms/graph_optimization_pass.h | 6 ++-- .../tfr/integration/graph_decompose_pass.cc | 7 +++-- .../tfr/integration/graph_decompose_pass.h | 6 ++-- .../compiler/tf2xla/mlir_bridge_pass.cc | 22 +++++++++++++-- tensorflow/compiler/tf2xla/mlir_bridge_pass.h | 17 ++++++----- 13 files changed, 73 insertions(+), 41 deletions(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index d2c61370e15..51c93a750cc 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -508,7 +508,7 @@ cc_library( ":flags", ":jit_compilation_passes", "//tensorflow/compiler/jit/kernels:xla_ops_no_jit_rewrite_registration", - "//tensorflow/compiler/mlir:mlir_bridge_rollout_policy", + "//tensorflow/compiler/tf2xla:mlir_bridge_pass", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 6251f0353de..005332a22e2 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -289,7 +289,7 @@ Status XlaCompilationCache::CompileSingleOp( const ConfigProto* config = ctx->function_library()->config_proto(); // TODO(b/171039585): Support tf.VarIsInitializedOp using MLIR. bool use_mlir = config && - GetMlirBridgeRolloutPolicy(*config) == + GetMlirBridgeRolloutPolicy(*graph, *config) == MlirBridgeRolloutPolicy::kEnabledByUser && node_def.op() != "VarIsInitializedOp"; #ifdef LIBTPU_ON_GCE diff --git a/tensorflow/compiler/jit/xla_kernel_creator.cc b/tensorflow/compiler/jit/xla_kernel_creator.cc index 444059e598d..a549c99d9b7 100644 --- a/tensorflow/compiler/jit/xla_kernel_creator.cc +++ b/tensorflow/compiler/jit/xla_kernel_creator.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/jit/xla_kernel_creator.h" -#include "tensorflow/compiler/mlir/mlir_bridge_rollout_policy.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -23,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/kernels/xla_ops.h" #include "tensorflow/compiler/tf2xla/const_analysis.h" +#include "tensorflow/compiler/tf2xla/mlir_bridge_pass.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -89,10 +89,21 @@ static Status CreateXlaKernel(FunctionLibraryRuntime* flr, // Make sure that kernels have been registered on the JIT device. XlaOpRegistry::RegisterCompilationKernels(); + // Get function body, constant args, and resource args. + NameAttrList function; + TF_RETURN_IF_ERROR(NameAndAttrsFromFunctionCall(node_def, &function)); + const FunctionBody* fbody = nullptr; + std::vector constant_arg_indices; + std::vector resource_arg_indices; + TF_RETURN_IF_ERROR(GetBodyAndConstantsAndResources( + flr, function, &fbody, &constant_arg_indices, &resource_arg_indices)); + // Only check for compilability if the MLIR bridge is not enabled. - MlirBridgeRolloutPolicy policy = GetMlirBridgeRolloutPolicy(absl::nullopt); - if (policy == MlirBridgeRolloutPolicy::kDisabledByUser || - policy == MlirBridgeRolloutPolicy::kDisabledAfterGraphAnalysis) { + absl::optional config_proto; + if (flr->config_proto()) { + config_proto = *flr->config_proto(); + } + if (!IsMlirBridgePassEnabled(*fbody->graph, config_proto)) { RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { std::vector @@ -121,15 +132,6 @@ static Status CreateXlaKernel(FunctionLibraryRuntime* flr, } } - // Get function body, constant args, and resource args. - NameAttrList function; - TF_RETURN_IF_ERROR(NameAndAttrsFromFunctionCall(node_def, &function)); - const FunctionBody* fbody = nullptr; - std::vector constant_arg_indices; - std::vector resource_arg_indices; - TF_RETURN_IF_ERROR(GetBodyAndConstantsAndResources( - flr, function, &fbody, &constant_arg_indices, &resource_arg_indices)); - MemoryTypeVector input_memory_types = GetInputMemoryTypes(fbody, constant_arg_indices, resource_arg_indices); MemoryTypeVector output_memory_types = GetOutputMemoryTypes(fbody); diff --git a/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.cc b/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.cc index 9ffbf8202fc..2bf0b28439e 100644 --- a/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.cc +++ b/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.cc @@ -51,7 +51,7 @@ static ConfigProto::Experimental::MlirBridgeRollout GetUserRequest( } MlirBridgeRolloutPolicy GetMlirBridgeRolloutPolicy( - absl::optional config_proto) { + const tensorflow::Graph& graph, absl::optional config_proto) { switch (GetUserRequest(config_proto)) { case ConfigProto::Experimental::MLIR_BRIDGE_ROLLOUT_ENABLED: return MlirBridgeRolloutPolicy::kEnabledByUser; diff --git a/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.h b/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.h index cb805ea4434..6aa61c86230 100644 --- a/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.h +++ b/tensorflow/compiler/mlir/mlir_bridge_rollout_policy.h @@ -17,6 +17,7 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_COMPILER_MLIR_MLIR_BRIDGE_ROLLOUT_POLICY_H_ #include "absl/types/optional.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { @@ -46,6 +47,7 @@ enum class MlirBridgeRolloutPolicy { // The config_proto param is a required input for all TF1 graphs but it is // redundant for TF2 graphs. MlirBridgeRolloutPolicy GetMlirBridgeRolloutPolicy( + const tensorflow::Graph& graph, absl::optional config_proto); } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc index 58b57d89dbf..ff611bac943 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc @@ -109,7 +109,7 @@ Status MlirFunctionOptimizationPass::Run( // Skip conversion from Graph to MLIR if none of the passes are enabled. const bool is_enabled = llvm::any_of(registry_->passes(), [&](auto& pass_registration) -> bool { - return pass_registration.pass->IsEnabled(config_proto); + return pass_registration.pass->IsEnabled(config_proto, **graph); }); if (!is_enabled) { @@ -144,7 +144,8 @@ Status MlirFunctionOptimizationPass::Run( DumpModule(*module_ref, llvm::formatv("mlir_{0}_before_", name)); } - TF_RETURN_IF_ERROR(pass_registration.pass->Run(config_proto, *module_ref)); + TF_RETURN_IF_ERROR( + pass_registration.pass->Run(config_proto, *module_ref, **graph)); if (VLOG_IS_ON(1)) { DumpModule(*module_ref, llvm::formatv("mlir_{0}_after_", name)); @@ -183,7 +184,7 @@ Status MlirV1CompatGraphOptimizationPass::Run( const bool is_enabled = absl::c_any_of(registry_->passes(), [&](auto& pass_registration) -> bool { return pass_registration.pass->IsEnabled( - options.session_options->config); + options.session_options->config, **options.graph); }); if (!is_enabled) { diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h index b405bcd6913..3130805633b 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h @@ -34,10 +34,11 @@ class MlirOptimizationPass { public: virtual ~MlirOptimizationPass() = default; virtual llvm::StringRef name() const = 0; - virtual bool IsEnabled(const ConfigProto& config_proto) const = 0; + virtual bool IsEnabled(const ConfigProto& config_proto, + const Graph& graph) const = 0; - virtual Status Run(const ConfigProto& config_proto, - mlir::ModuleOp module) = 0; + virtual Status Run(const ConfigProto& config_proto, mlir::ModuleOp module, + const Graph& graph) = 0; }; class MlirOptimizationPassRegistry { @@ -100,7 +101,8 @@ class MlirV1CompatOptimizationPass { public: virtual ~MlirV1CompatOptimizationPass() = default; virtual llvm::StringRef name() const = 0; - virtual bool IsEnabled(const ConfigProto& config_proto) const = 0; + virtual bool IsEnabled(const ConfigProto& config_proto, + const Graph& graph) const = 0; virtual Status Run(const GraphOptimizationPassOptions& options, mlir::ModuleOp module) = 0; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.cc index 769a97ac2b5..8a0a999fc24 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.cc @@ -28,10 +28,11 @@ namespace TF { namespace { using Status = ::tensorflow::Status; using ConfigProto = ::tensorflow::ConfigProto; +using Graph = ::tensorflow::Graph; } // namespace Status MlirGraphOptimizationPass::Run(const ConfigProto& config_proto, - ModuleOp module) { + ModuleOp module, const Graph& graph) { if (!config_proto.experimental().enable_mlir_graph_optimization()) { VLOG(1) << "Skipping MLIR Graph Optimization Pass" << ", session flag not enabled"; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.h b/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.h index 5bab0ffab7e..9272574b7a5 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/graph_optimization_pass.h @@ -27,12 +27,14 @@ class MlirGraphOptimizationPass : public ::tensorflow::MlirOptimizationPass { public: llvm::StringRef name() const override { return "graph_optimization"; } - bool IsEnabled(const ::tensorflow::ConfigProto& config_proto) const override { + bool IsEnabled(const ::tensorflow::ConfigProto& config_proto, + const tensorflow::Graph& graph) const override { return config_proto.experimental().enable_mlir_graph_optimization(); } ::tensorflow::Status Run(const ::tensorflow::ConfigProto& config_proto, - ModuleOp module) override; + ModuleOp module, + const ::tensorflow::Graph& graph) override; }; } // namespace TF diff --git a/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.cc b/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.cc index 7041545637a..06e649ffe09 100644 --- a/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.cc +++ b/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.cc @@ -29,14 +29,15 @@ auto* tf_core_op_expansion_graph_counter = namespace tfr { -bool GraphDecomposePass::IsEnabled(const ConfigProto& config_proto) const { +bool GraphDecomposePass::IsEnabled(const ConfigProto& config_proto, + const Graph& graph) const { const char* tfr_lib_env_val = getenv(std::string(kTFRLibEnv).c_str()); return tfr_lib_env_val != nullptr; } Status GraphDecomposePass::Run(const ConfigProto& config_proto, - mlir::ModuleOp module) { - if (!IsEnabled(config_proto)) { + mlir::ModuleOp module, const Graph& graph) { + if (!IsEnabled(config_proto, graph)) { LOG_FIRST_N(INFO, 1) << "Skipping Graph Decomposition Pass, decompositin " "library was not found"; return Status::OK(); diff --git a/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.h b/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.h index dd93e99f04b..37685f39779 100644 --- a/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.h +++ b/tensorflow/compiler/mlir/tfr/integration/graph_decompose_pass.h @@ -33,11 +33,13 @@ class GraphDecomposePass : public MlirOptimizationPass { // Whether to run this pass. If this is enabled, the GraphDef will be imported // to MLIR even no tf composition file is found. - bool IsEnabled(const ConfigProto& config_proto) const override; + bool IsEnabled(const ConfigProto& config_proto, + const Graph& graph) const override; // This should be used as a thin mapper around mlir::ModulePass::runOnModule // API integrated with the Tensorflow runtime. - Status Run(const ConfigProto& config_proto, mlir::ModuleOp module) override; + Status Run(const ConfigProto& config_proto, mlir::ModuleOp module, + const Graph& graph) override; }; } // namespace tfr diff --git a/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc b/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc index 6c328b095ce..28e3c99fa3d 100644 --- a/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc +++ b/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc @@ -48,6 +48,22 @@ auto* mlir_bridge_gauge_v2 = monitoring::Gauge::New( "/tensorflow/config/experimental/enable_mlir_bridge_gauge_v2", "Tracks usage of the MLIR-based TF2XLA bridge among TF2 models"); +// Analyzes the user requested policy as well as the contents of the graph and +// determines whether the MLIR Bridge should be run. +// +// If the user explicitly requests the bridge be enabled or disabled, this +// function will respect the request. If the user does not explicitly request +// enabled or disabled, it will decide whether or not to run the bridge. +// +// The config_proto param is a required input for all TF1 graphs but it is +// redundant for TF2 graphs. +bool IsMlirBridgePassEnabled(const Graph& graph, + const absl::optional& config_proto) { + MlirBridgeRolloutPolicy policy = + GetMlirBridgeRolloutPolicy(graph, config_proto); + return policy == MlirBridgeRolloutPolicy::kEnabledByUser; +} + // This runs the first phase of the "bridge", transforming the graph in a form // that can be executed with delegation of some computations to an accelerator. // This builds on the model of XLA where a subset of the graph is encapsulated @@ -55,8 +71,8 @@ auto* mlir_bridge_gauge_v2 = monitoring::Gauge::New( // operation. The kernel for these operations is responsible to lower the // encapsulated graph to a particular device. Status MlirBridgePass::Run(const ConfigProto& config_proto, - mlir::ModuleOp module) { - if (!IsEnabled(config_proto)) { + mlir::ModuleOp module, const Graph& graph) { + if (!IsEnabled(config_proto, graph)) { VLOG(0) << "Skipping MLIR TPU Bridge, session flag not enabled"; mlir_bridge_gauge_v2->GetCell()->Set(false); return Status::OK(); @@ -80,7 +96,7 @@ Status MlirBridgeV1CompatPass::Run(const GraphOptimizationPassOptions& options, // Skip function graphs as MlirBridgePass will be used instead. if (options.is_function_graph) return Status::OK(); - if (!IsEnabled(options.session_options->config)) { + if (!IsEnabled(options.session_options->config, **options.graph)) { VLOG(0) << "Skipping MLIR TPU Bridge V1 Compat, session flag not enabled"; mlir_bridge_gauge_v1->GetCell()->Set(false); return Status::OK(); diff --git a/tensorflow/compiler/tf2xla/mlir_bridge_pass.h b/tensorflow/compiler/tf2xla/mlir_bridge_pass.h index 2e23b040c73..350c198ee70 100644 --- a/tensorflow/compiler/tf2xla/mlir_bridge_pass.h +++ b/tensorflow/compiler/tf2xla/mlir_bridge_pass.h @@ -23,6 +23,8 @@ limitations under the License. namespace tensorflow { +bool IsMlirBridgePassEnabled(const Graph& graph, + const absl::optional& config_proto); // This pass uses MLIR to implement all the conversion steps to target XLA from // a TensorFlow Function Graph. It is meant to expose a very limited set of // functionalities during the bring-up of MLIR-based bridge. @@ -30,14 +32,15 @@ class MlirBridgePass : public MlirOptimizationPass { public: llvm::StringRef name() const override { return "bridge"; } - bool IsEnabled(const ConfigProto& config_proto) const override { - MlirBridgeRolloutPolicy policy = GetMlirBridgeRolloutPolicy(config_proto); - return policy == MlirBridgeRolloutPolicy::kEnabledByUser; + bool IsEnabled(const ConfigProto& config_proto, + const Graph& graph) const override { + return IsMlirBridgePassEnabled(graph, config_proto); } // This should be used as a thin mapper around mlir::ModulePass::runOnModule // API integrated with the Tensorflow runtime. - Status Run(const ConfigProto& config_proto, mlir::ModuleOp module) override; + Status Run(const ConfigProto& config_proto, mlir::ModuleOp module, + const Graph& graph) override; }; // This pass uses MLIR to implement all the conversion steps to target XLA from @@ -47,9 +50,9 @@ class MlirBridgeV1CompatPass : public MlirV1CompatOptimizationPass { public: llvm::StringRef name() const override { return "bridge"; } - bool IsEnabled(const ConfigProto& config_proto) const override { - MlirBridgeRolloutPolicy policy = GetMlirBridgeRolloutPolicy(config_proto); - return policy == MlirBridgeRolloutPolicy::kEnabledByUser; + bool IsEnabled(const ConfigProto& config_proto, + const Graph& graph) const override { + return IsMlirBridgePassEnabled(graph, config_proto); } // This should be used as a thin mapper around mlir::ModulePass::runOnModule From eb17b4716f4273bfea29259a8023f79ef18c3610 Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Mon, 9 Nov 2020 17:22:20 -0800 Subject: [PATCH 085/220] Update keyword benchmark to use new int16->int32 quantize op instead of dequantize for compatibility with tflite PiperOrigin-RevId: 341517855 Change-Id: I3193d2952ed5576425109b957ddaa262aad388bf --- .../keyword_scrambled_model_data.cc | 5665 ++++++++--------- 1 file changed, 2806 insertions(+), 2859 deletions(-) diff --git a/tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.cc b/tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.cc index 834f44ca5ab..254e194b5d4 100644 --- a/tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.cc +++ b/tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.cc @@ -17,2882 +17,2829 @@ limitations under the License. // Keep model aligned to 8 bytes to guarantee aligned 64-bit accesses. alignas(8) const unsigned char g_keyword_scrambled_model_data[] = { - 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, - 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xd0, 0x6e, 0x00, 0x00, - 0xe4, 0x85, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0xbc, 0x6e, 0x00, 0x00, 0xac, 0x56, 0x00, 0x00, 0x9c, 0x52, 0x00, 0x00, - 0x8c, 0x51, 0x00, 0x00, 0x7c, 0x4d, 0x00, 0x00, 0x2c, 0x4d, 0x00, 0x00, - 0x1c, 0x49, 0x00, 0x00, 0x0c, 0x45, 0x00, 0x00, 0xfc, 0x43, 0x00, 0x00, - 0xec, 0x3f, 0x00, 0x00, 0x9c, 0x3f, 0x00, 0x00, 0x8c, 0x3b, 0x00, 0x00, - 0x7c, 0x37, 0x00, 0x00, 0x6c, 0x36, 0x00, 0x00, 0x5c, 0x32, 0x00, 0x00, - 0x0c, 0x32, 0x00, 0x00, 0xfc, 0x2d, 0x00, 0x00, 0xec, 0x29, 0x00, 0x00, - 0xdc, 0x28, 0x00, 0x00, 0xcc, 0x24, 0x00, 0x00, 0x7c, 0x24, 0x00, 0x00, - 0x6c, 0x22, 0x00, 0x00, 0x5c, 0x1a, 0x00, 0x00, 0xcc, 0x19, 0x00, 0x00, - 0xbc, 0x15, 0x00, 0x00, 0xac, 0x0d, 0x00, 0x00, 0x1c, 0x0d, 0x00, 0x00, - 0x0c, 0x09, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x2a, 0x91, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x34, 0xe1, 0x4f, 0xa1, - 0x63, 0xa4, 0x62, 0xbf, 0x3e, 0x91, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0xa3, 0xb2, 0x8f, 0xee, 0x35, 0xe6, 0xf2, 0xcc, - 0x68, 0xa0, 0x33, 0xc4, 0x7d, 0x4e, 0xbb, 0xa9, 0x10, 0x32, 0x8e, 0x3d, - 0x76, 0x14, 0x1c, 0x33, 0x0e, 0x77, 0xf7, 0xc8, 0x7b, 0x45, 0xc7, 0xdb, - 0xcf, 0x87, 0xc7, 0x70, 0xa9, 0x29, 0xfd, 0x70, 0x32, 0x96, 0x35, 0x7d, - 0xe9, 0xac, 0x6d, 0x9b, 0xfd, 0xe4, 0xbc, 0x4a, 0x57, 0xcd, 0x43, 0xcc, - 0x73, 0x72, 0xdf, 0x07, 0x68, 0xc5, 0x67, 0xbd, 0x8a, 0x91, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xb0, 0xfb, 0x5f, 0xdf, - 0x0e, 0xb9, 0xa2, 0xfd, 0x66, 0x86, 0x13, 0x1b, 0x6d, 0x1d, 0x53, 0xdb, - 0x83, 0xbf, 0x44, 0x29, 0x3f, 0x93, 0xee, 0x42, 0x9a, 0xf4, 0x31, 0x6e, - 0xc3, 0x15, 0x7e, 0x48, 0x72, 0x50, 0xc3, 0x53, 0xef, 0x35, 0x1f, 0xc2, - 0x29, 0x42, 0xb4, 0xd7, 0x4b, 0xd7, 0x98, 0x60, 0xb9, 0x3e, 0xbb, 0x31, - 0x35, 0xc3, 0xf6, 0x15, 0x7a, 0x9a, 0x2c, 0xfd, 0xff, 0x04, 0xd9, 0x04, - 0x57, 0x52, 0xae, 0x99, 0xa3, 0x95, 0xae, 0x6a, 0x66, 0x52, 0x5f, 0x91, - 0x17, 0x83, 0x0d, 0x27, 0x16, 0x02, 0x06, 0x64, 0x80, 0x05, 0x99, 0x1c, - 0x6c, 0xab, 0xb1, 0xa1, 0x0e, 0x44, 0x1f, 0x63, 0xe9, 0xc1, 0xab, 0x8d, - 0x08, 0x79, 0x56, 0xe0, 0x90, 0xa5, 0xb8, 0x3b, 0xc4, 0x1e, 0xa5, 0x1f, - 0x64, 0xe4, 0x0b, 0x72, 0x62, 0x19, 0x5f, 0x66, 0xc0, 0x9b, 0x7b, 0xc4, - 0xe5, 0x9f, 0x82, 0xa7, 0x16, 0x92, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x08, 0x00, 0x00, 0x3e, 0x3d, 0xf4, 0x61, 0x45, 0x2a, 0x48, 0x53, - 0x1f, 0x22, 0x74, 0x65, 0xea, 0x5a, 0x00, 0x83, 0x68, 0xf9, 0xbb, 0xa3, - 0xc2, 0x1a, 0x8f, 0xe1, 0xfb, 0x76, 0x6a, 0xe9, 0x1a, 0x0e, 0x4d, 0x32, - 0xc6, 0xf3, 0x8d, 0x85, 0x54, 0xa1, 0xe9, 0xb8, 0x35, 0xee, 0xba, 0x53, - 0x40, 0xa2, 0xea, 0x7f, 0xc3, 0x99, 0x71, 0x17, 0xdd, 0xd5, 0xfe, 0xdf, - 0x5e, 0x15, 0xa0, 0x73, 0xf8, 0x78, 0x49, 0x73, 0xcc, 0xf0, 0x18, 0x12, - 0x06, 0x81, 0xd6, 0x19, 0x2c, 0xa8, 0xd7, 0x80, 0x19, 0x19, 0xbf, 0x1e, - 0x50, 0xb1, 0xfb, 0xb3, 0xa6, 0x56, 0x6f, 0x52, 0xa6, 0xc0, 0xdd, 0x3f, - 0xbb, 0x13, 0x6e, 0x04, 0xdf, 0x79, 0xca, 0x8b, 0xa5, 0x9c, 0xa1, 0x78, - 0x49, 0xca, 0xe5, 0x29, 0xbb, 0x29, 0x7c, 0x96, 0xc6, 0x29, 0x06, 0x99, - 0xec, 0x50, 0xd1, 0xe8, 0x9b, 0xb7, 0x53, 0xd2, 0x36, 0x89, 0xb1, 0x5c, - 0x38, 0xf4, 0x2f, 0xa1, 0xda, 0x6f, 0xd8, 0xd1, 0x62, 0xd2, 0xd4, 0x97, - 0xce, 0xf1, 0xbd, 0x73, 0x2d, 0x92, 0xdb, 0x62, 0x0c, 0xb0, 0x77, 0xed, - 0x32, 0x3a, 0xfc, 0x59, 0x94, 0xef, 0x2b, 0x48, 0x60, 0xb2, 0x82, 0xa2, - 0xb6, 0x51, 0xdb, 0x51, 0x47, 0x99, 0x4c, 0x50, 0x93, 0x53, 0x9d, 0xa9, - 0x3c, 0x94, 0x34, 0x9f, 0xa6, 0x3e, 0x4f, 0x87, 0xd4, 0xa0, 0x40, 0xeb, - 0x7b, 0xfa, 0x1b, 0x7d, 0x03, 0xa8, 0xf8, 0x8b, 0xa5, 0x32, 0x3a, 0xaf, - 0x7e, 0x6b, 0x25, 0x08, 0x97, 0x71, 0x8d, 0x0c, 0x30, 0xc9, 0xa7, 0x23, - 0xe3, 0x51, 0xb3, 0xf2, 0x86, 0xad, 0x12, 0xe2, 0x79, 0x94, 0x7f, 0xf3, - 0xf7, 0x88, 0x67, 0x3e, 0x8e, 0x8e, 0x04, 0x5e, 0x4f, 0x01, 0x6f, 0x1d, - 0x78, 0x42, 0x9e, 0x47, 0x81, 0xdf, 0x03, 0x39, 0x3d, 0x9b, 0xbd, 0xb6, - 0x06, 0x21, 0x82, 0xfe, 0xf2, 0x50, 0xe1, 0x14, 0xbc, 0xe3, 0x5e, 0xe1, - 0xbd, 0x8f, 0xfa, 0x35, 0x31, 0x4e, 0x66, 0xeb, 0x67, 0x49, 0x1c, 0x07, - 0x88, 0xb6, 0x22, 0x0c, 0xeb, 0xd9, 0x9f, 0x9b, 0x8b, 0xe0, 0x9c, 0x3c, - 0xf7, 0x91, 0xab, 0x98, 0x5b, 0x0e, 0x09, 0xdd, 0xe3, 0x0b, 0x14, 0x55, - 0xe9, 0xe4, 0x42, 0xd8, 0xce, 0xd7, 0xfd, 0x4c, 0x20, 0x9f, 0x44, 0x93, - 0xa6, 0x17, 0x8a, 0x68, 0x8f, 0xec, 0x62, 0xd1, 0x97, 0x9c, 0xcc, 0xc4, - 0xd9, 0x42, 0xda, 0xf1, 0x34, 0x04, 0xc6, 0xb6, 0x0f, 0xc7, 0xe6, 0x2d, - 0x26, 0x6e, 0x6f, 0x92, 0x7e, 0xd9, 0xd4, 0x40, 0xc6, 0x70, 0xfa, 0x12, - 0x2a, 0x1b, 0xbc, 0x50, 0xeb, 0x3b, 0x24, 0x96, 0x8d, 0x7c, 0xae, 0xbe, - 0xc3, 0x27, 0xce, 0x97, 0xcf, 0xcd, 0x10, 0x13, 0x01, 0xc6, 0x48, 0x6a, - 0x99, 0x38, 0x79, 0xb9, 0x1c, 0xc9, 0x09, 0xac, 0x96, 0x8c, 0xf7, 0x82, - 0x8f, 0xb8, 0x17, 0x94, 0x2c, 0x5f, 0x40, 0xcc, 0x80, 0xf4, 0x9f, 0xaa, - 0xcb, 0x83, 0x13, 0x7b, 0x3a, 0x78, 0x0a, 0x9f, 0x79, 0x9e, 0xfc, 0x0e, - 0x8f, 0x98, 0x60, 0x39, 0x86, 0x44, 0x8e, 0x4b, 0xc4, 0xad, 0xe6, 0x98, - 0x92, 0x08, 0x84, 0x48, 0x8f, 0x1d, 0x78, 0x10, 0x9e, 0xf7, 0xb8, 0x61, - 0x65, 0x46, 0xdb, 0x4a, 0xcf, 0xc5, 0x37, 0xe3, 0x77, 0x76, 0xcf, 0x0a, - 0x7e, 0x72, 0x3f, 0xe4, 0x51, 0x30, 0x28, 0x57, 0x13, 0xfd, 0xdb, 0x7e, - 0xd6, 0xa3, 0xdd, 0x64, 0xdd, 0x00, 0xd0, 0x7f, 0xbc, 0x48, 0x1d, 0xaf, - 0xde, 0x0e, 0x45, 0xc4, 0xc9, 0xfa, 0xf6, 0xb2, 0xb7, 0x9a, 0x42, 0x8b, - 0x18, 0x08, 0xed, 0xdb, 0xa9, 0xc3, 0x32, 0xf1, 0x9c, 0xcf, 0x16, 0x74, - 0x57, 0xce, 0xe9, 0x44, 0x21, 0xdb, 0x8a, 0x45, 0x89, 0x70, 0x41, 0x5c, - 0xbf, 0x10, 0xdf, 0x83, 0x4a, 0xe4, 0x4c, 0xd8, 0xc9, 0x2e, 0x5b, 0xa3, - 0x05, 0xed, 0x73, 0xb1, 0xb0, 0xb7, 0xc4, 0xd7, 0x0d, 0xea, 0xf6, 0xb4, - 0xc1, 0x5e, 0x12, 0x54, 0x30, 0x73, 0x5c, 0x93, 0xd9, 0xf7, 0xc9, 0x24, - 0x43, 0x8f, 0x4f, 0x8e, 0x94, 0x95, 0xb6, 0xfd, 0xa3, 0x14, 0x42, 0x50, - 0xb8, 0x66, 0xfb, 0xc4, 0xed, 0x72, 0xcf, 0x7b, 0xa9, 0x73, 0xeb, 0xc4, - 0x4a, 0x05, 0xea, 0xb4, 0x47, 0xca, 0x21, 0x56, 0x28, 0xa8, 0x87, 0xb8, - 0x87, 0x0b, 0xe3, 0x8d, 0xfd, 0x70, 0xf7, 0x33, 0x76, 0xf0, 0x3d, 0xa4, - 0x3b, 0x83, 0xab, 0x14, 0x01, 0xe1, 0xb0, 0xa9, 0x44, 0xe8, 0xd7, 0x50, - 0x26, 0x0b, 0xbb, 0x2d, 0x57, 0x39, 0x82, 0x7c, 0x71, 0xd8, 0x12, 0xaf, - 0xf3, 0x9f, 0x46, 0xbd, 0x62, 0xd6, 0x61, 0xf5, 0xb7, 0x04, 0x94, 0xbf, - 0x87, 0xea, 0xc4, 0xc4, 0x33, 0xcf, 0x36, 0x3b, 0x4f, 0xc7, 0x71, 0xf1, - 0x98, 0xe6, 0xb0, 0x96, 0x25, 0xd7, 0xac, 0x75, 0xfc, 0x92, 0xe0, 0x69, - 0x72, 0x37, 0x8d, 0x40, 0x31, 0xaa, 0x2c, 0x86, 0xfb, 0x95, 0x3f, 0x9c, - 0x23, 0xd4, 0x39, 0x99, 0xff, 0xea, 0x95, 0x79, 0xb9, 0x2e, 0xb0, 0x33, - 0xf1, 0xe8, 0xd0, 0x42, 0xb5, 0x70, 0x5c, 0xca, 0x69, 0x48, 0x28, 0x23, - 0x58, 0xb4, 0x07, 0xfc, 0x3e, 0x15, 0x29, 0x00, 0xa9, 0x22, 0x44, 0x70, - 0xd0, 0xc7, 0x01, 0x0d, 0x3e, 0xfc, 0x57, 0xb7, 0x54, 0x3a, 0xc3, 0x43, - 0xd6, 0x2f, 0x55, 0x09, 0x52, 0x4a, 0x6b, 0x8e, 0x4c, 0x82, 0xbb, 0x4e, - 0x3e, 0x38, 0xe1, 0x9e, 0x72, 0x83, 0xec, 0x40, 0xf5, 0xf7, 0x0e, 0x3c, - 0x24, 0xed, 0xda, 0xf2, 0x39, 0x6c, 0xad, 0xeb, 0xff, 0xfb, 0x4a, 0x38, - 0x50, 0x49, 0x28, 0x3d, 0x05, 0xb2, 0x98, 0x44, 0x2b, 0x61, 0xa2, 0x9b, - 0x3a, 0x3c, 0xad, 0xd9, 0x8c, 0xef, 0x3c, 0x72, 0x50, 0x74, 0x13, 0x80, - 0xc4, 0x7e, 0x6e, 0xf3, 0xc9, 0xdf, 0x63, 0xf6, 0x41, 0xb2, 0x08, 0x78, - 0x9b, 0x7c, 0xa9, 0x13, 0xd1, 0x21, 0xe7, 0x5e, 0x6a, 0x0d, 0x64, 0xf7, - 0x52, 0x75, 0xf2, 0x80, 0x69, 0xbe, 0x43, 0xf8, 0xd4, 0xad, 0x49, 0xfc, - 0x97, 0x76, 0x1c, 0xb6, 0x43, 0x9e, 0xcb, 0x45, 0x4d, 0x75, 0x07, 0xae, - 0xdb, 0xbf, 0xf5, 0x8a, 0xeb, 0xb9, 0x6b, 0x12, 0x06, 0xbf, 0x94, 0xad, - 0x77, 0x29, 0xb1, 0xae, 0x24, 0x9b, 0x4d, 0xdc, 0xe1, 0x5e, 0xd7, 0x57, - 0xec, 0xd1, 0xd8, 0xad, 0xf0, 0x06, 0x08, 0x43, 0x33, 0x99, 0xd2, 0x04, - 0xfc, 0xc8, 0xf6, 0x53, 0x3d, 0x73, 0xd4, 0x36, 0xd3, 0x8e, 0x4a, 0xcd, - 0xb1, 0xe9, 0xcb, 0x3a, 0x5f, 0x54, 0xbc, 0xde, 0x16, 0xa2, 0x85, 0xde, - 0x35, 0x27, 0x99, 0x32, 0x4f, 0xb9, 0x2c, 0x16, 0xa2, 0x6e, 0xae, 0x75, - 0x60, 0x77, 0xe9, 0x08, 0x0f, 0x08, 0xc4, 0xd0, 0x62, 0xc7, 0xd2, 0x1f, - 0x3b, 0x29, 0xdd, 0xb7, 0xea, 0xa3, 0x58, 0xaf, 0x4c, 0x05, 0xd2, 0x82, - 0x6a, 0xe0, 0xc4, 0xe9, 0x70, 0x7e, 0xf2, 0xca, 0x82, 0x6a, 0xae, 0xc1, - 0x9a, 0x42, 0x5d, 0x46, 0x4a, 0xb7, 0x8f, 0x4d, 0x33, 0xfe, 0x6f, 0x47, - 0xb5, 0x49, 0xb3, 0x89, 0x51, 0x31, 0x74, 0x68, 0x14, 0xda, 0x0a, 0x41, - 0x3d, 0x1f, 0x8e, 0x30, 0x8c, 0x77, 0xd1, 0xa9, 0x36, 0x41, 0x78, 0x34, - 0xb7, 0x7e, 0x4e, 0x7a, 0x77, 0x12, 0x43, 0x97, 0x43, 0xba, 0xd6, 0x28, - 0x14, 0x2a, 0x9f, 0x98, 0xb4, 0x39, 0x08, 0x5c, 0xb7, 0xb8, 0x03, 0x63, - 0x62, 0x68, 0xc6, 0x9a, 0x4d, 0xf5, 0xdc, 0x7c, 0x0f, 0x7e, 0x77, 0xdc, - 0x85, 0x53, 0x31, 0x8c, 0x53, 0x8b, 0x27, 0xc4, 0xb7, 0x3d, 0xd0, 0x94, - 0x9b, 0x7e, 0x59, 0x59, 0x03, 0x09, 0x8c, 0x30, 0x70, 0x7d, 0x9c, 0x73, - 0x89, 0x6c, 0x5f, 0xbf, 0xf9, 0xc7, 0x72, 0x76, 0x12, 0x98, 0xe3, 0xbe, - 0xc3, 0x67, 0xdf, 0xa1, 0x76, 0xa3, 0xec, 0x44, 0x30, 0x70, 0x2f, 0x6a, - 0x86, 0x28, 0xb9, 0x9d, 0x7f, 0x93, 0xf2, 0x4a, 0x34, 0x48, 0x1f, 0x2e, - 0x2e, 0x95, 0x88, 0xdb, 0x1f, 0x2c, 0x19, 0x46, 0x2e, 0x91, 0x5f, 0x81, - 0x0d, 0x08, 0x9d, 0x03, 0x0b, 0xaf, 0x59, 0x0a, 0x41, 0xad, 0x4d, 0x6c, - 0x09, 0x0e, 0x9f, 0xd1, 0xc4, 0xdb, 0xac, 0x59, 0x27, 0x04, 0x1c, 0x73, - 0xe9, 0xf3, 0xe8, 0x54, 0xd9, 0x11, 0x31, 0xb2, 0xed, 0x2d, 0x8c, 0xeb, - 0x99, 0x26, 0x48, 0x9e, 0xac, 0x88, 0x96, 0xcb, 0x19, 0x49, 0xfa, 0x4a, - 0x82, 0xd5, 0x5d, 0xb8, 0x0f, 0x22, 0x3f, 0xb6, 0x5c, 0x02, 0x2a, 0xb9, - 0xd9, 0xfe, 0x4d, 0x9d, 0xdb, 0x85, 0x90, 0x19, 0x7f, 0x1a, 0x44, 0xa3, - 0x74, 0x68, 0xbf, 0xa2, 0x3b, 0xb4, 0x3b, 0xeb, 0xab, 0x99, 0xc2, 0x46, - 0x50, 0x7e, 0xec, 0xa9, 0xb4, 0x86, 0xfa, 0x50, 0xcb, 0x71, 0x7e, 0x75, - 0xa5, 0xca, 0xa6, 0x2f, 0x40, 0x1d, 0xa1, 0x4a, 0x5c, 0x91, 0xd7, 0x2a, - 0xa6, 0x17, 0x11, 0x4d, 0x19, 0x2b, 0xb3, 0x0f, 0xf0, 0xb3, 0x06, 0x70, - 0x51, 0x5c, 0x52, 0x8c, 0xdf, 0xe3, 0x19, 0x92, 0x08, 0x40, 0xa2, 0xb4, - 0xc0, 0xf2, 0xe8, 0x44, 0xcc, 0x36, 0xaa, 0xf9, 0xf8, 0xfc, 0x2d, 0x83, - 0x79, 0xc6, 0x58, 0xc1, 0xdf, 0x32, 0xb7, 0xde, 0x0f, 0x3e, 0xc0, 0xa8, - 0x7e, 0xeb, 0xf2, 0x30, 0x16, 0xdf, 0x38, 0xcb, 0x69, 0xd9, 0x44, 0x0d, - 0x44, 0xf4, 0x45, 0x9c, 0x81, 0xc8, 0xe7, 0x06, 0xae, 0x95, 0xaf, 0xff, - 0x17, 0x3b, 0x1c, 0x3f, 0xda, 0xa5, 0xf8, 0xfd, 0x9c, 0xf1, 0x0a, 0xca, - 0xda, 0xc0, 0xfa, 0x02, 0xc4, 0xce, 0x78, 0xfb, 0x35, 0x8c, 0xfe, 0x55, - 0xad, 0x0d, 0x9b, 0xeb, 0x10, 0xf1, 0x7b, 0xb1, 0x09, 0xf8, 0xef, 0xfc, - 0xde, 0x7a, 0x69, 0x74, 0x76, 0xef, 0x91, 0x64, 0x33, 0xc4, 0x08, 0x15, - 0x73, 0x85, 0x56, 0xae, 0x9c, 0xf6, 0xdd, 0x55, 0x19, 0x96, 0xe6, 0x41, - 0x12, 0xc9, 0x87, 0x91, 0x9e, 0xc6, 0x18, 0xe8, 0xbf, 0xa0, 0x59, 0xfd, - 0x20, 0xab, 0xb5, 0xcf, 0x0f, 0x6e, 0x30, 0xd3, 0xc5, 0x70, 0xf2, 0x50, - 0xa4, 0x2a, 0xdf, 0xb0, 0x45, 0xfc, 0x82, 0x1a, 0x3b, 0xfe, 0x0c, 0xad, - 0x41, 0x95, 0xf1, 0xd6, 0x85, 0xa2, 0xc9, 0xff, 0xbe, 0x3a, 0x64, 0x70, - 0x43, 0xc0, 0xc5, 0xc8, 0x80, 0x11, 0x0d, 0x20, 0xcd, 0xf2, 0xa2, 0xbb, - 0x43, 0x68, 0x0e, 0xf4, 0x01, 0xb3, 0x73, 0x79, 0x9f, 0x68, 0x41, 0x63, - 0x3e, 0xda, 0xf9, 0xf4, 0x23, 0x57, 0x97, 0x84, 0x99, 0xe8, 0x5e, 0xdb, - 0xaa, 0x24, 0xab, 0x9c, 0x40, 0x83, 0xf9, 0x3f, 0x4f, 0x5a, 0x53, 0xa6, - 0xf1, 0xe8, 0x95, 0xcf, 0xcb, 0x50, 0x13, 0x51, 0xa7, 0x8c, 0x71, 0x1d, - 0xff, 0xcc, 0x66, 0xab, 0xff, 0xca, 0xc5, 0xc3, 0x73, 0x45, 0xb7, 0x21, - 0x1d, 0x65, 0x7a, 0xe5, 0x1f, 0x3f, 0x1a, 0x58, 0x23, 0x28, 0xc8, 0xf3, - 0xbf, 0x98, 0x25, 0xc0, 0x83, 0x68, 0xf0, 0x62, 0x63, 0x90, 0xcf, 0x1f, - 0x20, 0xb8, 0x04, 0x5c, 0xc4, 0x80, 0x5b, 0xf4, 0x6d, 0xdc, 0xe9, 0xac, - 0xd8, 0x13, 0x3b, 0x42, 0xf8, 0x4e, 0xa2, 0x1c, 0xce, 0x3f, 0x8d, 0x15, - 0xd3, 0x87, 0x1b, 0x44, 0x79, 0x52, 0x34, 0x4b, 0x63, 0x4d, 0xbf, 0x95, - 0xec, 0xae, 0xf9, 0xc6, 0x7b, 0x7b, 0x85, 0x8c, 0x4f, 0x20, 0x58, 0x9d, - 0x48, 0x03, 0x2f, 0x77, 0x2e, 0x8b, 0x6f, 0x66, 0x76, 0xb9, 0xb8, 0xb7, - 0x34, 0x5a, 0x63, 0x06, 0x85, 0x82, 0x5f, 0x23, 0x8f, 0x8d, 0x0c, 0x92, - 0x3b, 0xd2, 0x8a, 0x1b, 0x39, 0xee, 0x6a, 0xbc, 0xf6, 0x94, 0x2a, 0xc6, - 0x73, 0xa6, 0x99, 0x98, 0xdc, 0x96, 0xd7, 0xc1, 0xfe, 0x9b, 0xc8, 0xfb, - 0x86, 0x5a, 0xad, 0xce, 0xf8, 0xd5, 0x32, 0x62, 0x96, 0x63, 0xaf, 0x4c, - 0x4a, 0xae, 0xec, 0x26, 0x3d, 0x84, 0x69, 0x50, 0x5f, 0x37, 0x9b, 0x29, - 0xac, 0x15, 0x76, 0x3d, 0x33, 0x96, 0x06, 0xde, 0xc1, 0x6d, 0xa2, 0xc7, - 0xc3, 0x8a, 0x20, 0x2e, 0xf7, 0x08, 0x55, 0x83, 0x23, 0x9c, 0x23, 0x2d, - 0x3a, 0xa1, 0x32, 0xbc, 0x47, 0x48, 0xd5, 0x6a, 0x71, 0xb9, 0xcc, 0x2d, - 0x99, 0xa0, 0x37, 0x07, 0x46, 0x45, 0xbe, 0xf0, 0x27, 0x5a, 0x25, 0x72, - 0x58, 0x47, 0x6d, 0xbf, 0x23, 0xdc, 0x48, 0x44, 0x45, 0x95, 0xb1, 0x62, - 0xf1, 0x7e, 0x4c, 0x95, 0x1c, 0xb4, 0x17, 0x8b, 0x59, 0x2e, 0xf3, 0x4f, - 0x45, 0x3b, 0x5d, 0x67, 0x92, 0x52, 0xd8, 0xc1, 0x91, 0xfa, 0x53, 0xaa, - 0x87, 0xc0, 0xa7, 0xb0, 0x9f, 0x10, 0xe8, 0xac, 0x45, 0x52, 0xbb, 0x17, - 0xee, 0xf6, 0x18, 0xbe, 0x02, 0x70, 0xce, 0x79, 0x66, 0x72, 0xf9, 0xf6, - 0xca, 0x66, 0xff, 0xa4, 0x9a, 0xd9, 0xb7, 0x07, 0xa9, 0xc1, 0x23, 0x7e, - 0x7b, 0x9c, 0xe3, 0x02, 0x7a, 0xcc, 0xa3, 0x67, 0xb7, 0xb0, 0x37, 0xba, - 0xae, 0x12, 0xda, 0x48, 0x6e, 0x7f, 0xde, 0x5f, 0x75, 0x15, 0xca, 0xd2, - 0x46, 0xdd, 0xb0, 0x82, 0xbf, 0x6d, 0xe9, 0x51, 0x66, 0xa5, 0x9e, 0x0c, - 0xd5, 0x03, 0xbd, 0x97, 0x0e, 0x1b, 0x88, 0xf6, 0x61, 0x5a, 0x8b, 0xe0, - 0xdd, 0x3e, 0x59, 0x4c, 0x35, 0xfd, 0xb0, 0x3b, 0x79, 0x8c, 0x1c, 0x96, - 0x97, 0x35, 0x62, 0x36, 0x62, 0x4c, 0x4b, 0x46, 0xb1, 0x21, 0xf7, 0xf0, - 0x34, 0xdc, 0xd9, 0x9f, 0xf8, 0x53, 0x7d, 0xca, 0xbc, 0x4d, 0xaf, 0xf4, - 0xb7, 0x2f, 0xa7, 0x5d, 0x18, 0xf9, 0x3b, 0xa9, 0xb0, 0xbb, 0xdf, 0xfa, - 0x28, 0x2b, 0x58, 0xce, 0x46, 0x01, 0x3f, 0x76, 0xf2, 0x39, 0x45, 0x8b, - 0x3c, 0xda, 0x62, 0x2b, 0x6b, 0xe1, 0x5f, 0x14, 0xfc, 0x79, 0x17, 0x2d, - 0xe2, 0xe5, 0x8c, 0xc5, 0xde, 0x91, 0xfd, 0xf5, 0x6d, 0x9b, 0x6b, 0xbb, - 0xb0, 0x13, 0xae, 0xbe, 0x1e, 0xa8, 0x8f, 0x3c, 0xfd, 0x24, 0xbe, 0xb8, - 0x39, 0x80, 0x03, 0x06, 0x8b, 0xff, 0xca, 0x90, 0x88, 0x0f, 0x45, 0xc4, - 0xeb, 0x50, 0x52, 0xf5, 0x00, 0x8c, 0x16, 0x9d, 0x26, 0xaa, 0xec, 0xb1, - 0x44, 0xd6, 0xfe, 0x67, 0xa3, 0xc1, 0xec, 0x4a, 0x12, 0xa6, 0x7c, 0x7c, - 0xc3, 0x46, 0x1c, 0x64, 0x61, 0x67, 0xec, 0xce, 0x1e, 0xa2, 0xb4, 0xdd, - 0x6e, 0x7f, 0x02, 0x14, 0xf4, 0x1c, 0x17, 0xa7, 0x31, 0x9f, 0xc2, 0xc6, - 0xc0, 0x21, 0x41, 0x88, 0x61, 0xd8, 0xca, 0x06, 0xa5, 0xe4, 0xef, 0xa4, - 0xaa, 0x4d, 0xa3, 0xad, 0x5f, 0xd4, 0x0c, 0x6b, 0x14, 0x38, 0x2e, 0xe8, - 0x87, 0x5a, 0x68, 0x10, 0x51, 0xd8, 0xbb, 0xa6, 0xd9, 0xdc, 0xd3, 0x7f, - 0x1f, 0xea, 0xa8, 0xcc, 0x3f, 0x43, 0xa4, 0x04, 0x95, 0xb4, 0xde, 0x2f, - 0x07, 0x5d, 0x91, 0x1c, 0x8e, 0xc3, 0xbc, 0xaa, 0x46, 0x8a, 0xa8, 0x42, - 0xa7, 0x2c, 0x0f, 0x1f, 0xb3, 0xe2, 0x8a, 0x0b, 0xa0, 0x3f, 0xfb, 0x87, - 0x9e, 0x42, 0xa5, 0x60, 0xce, 0x5a, 0x54, 0x91, 0x26, 0x51, 0xea, 0x81, - 0x6f, 0xf1, 0x54, 0x93, 0xe7, 0xa0, 0xf8, 0x64, 0xab, 0x1d, 0x0d, 0x9d, - 0x64, 0x6a, 0xd5, 0x19, 0x03, 0xbb, 0x94, 0x7f, 0x0a, 0xb8, 0x6b, 0x87, - 0xc3, 0x1a, 0x38, 0xe5, 0xe8, 0xba, 0x13, 0x17, 0xeb, 0x13, 0xcc, 0xac, - 0xcb, 0x1f, 0x96, 0x4c, 0x3b, 0x18, 0xfb, 0xe8, 0x5c, 0x54, 0xce, 0x1a, - 0x91, 0x44, 0xf5, 0x49, 0x6c, 0x38, 0x2a, 0x92, 0x8a, 0x0d, 0x3d, 0x08, - 0xc2, 0x5f, 0x6c, 0xac, 0x48, 0xb3, 0xdc, 0x2e, 0xa6, 0x5a, 0xa8, 0xee, - 0x22, 0x9a, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x96, 0xc5, 0x3a, 0x4e, 0x42, 0x7d, 0x27, 0xce, 0x44, 0x84, 0xf1, 0x67, - 0x8c, 0xc5, 0xdd, 0x75, 0x3b, 0x8a, 0xed, 0x2e, 0x29, 0x62, 0x7b, 0xb0, - 0xe6, 0xa3, 0xb4, 0x61, 0x73, 0x10, 0xff, 0x0e, 0x0c, 0x98, 0x74, 0xef, - 0xbb, 0xc4, 0xca, 0x03, 0x88, 0xa4, 0x96, 0x61, 0xef, 0x36, 0x6d, 0xa2, - 0xb1, 0xc8, 0xf0, 0xac, 0xf1, 0xb2, 0x08, 0x56, 0xc7, 0x99, 0xcf, 0xae, - 0x0a, 0x37, 0x85, 0x60, 0x78, 0x2d, 0x14, 0xda, 0xb1, 0xa7, 0x00, 0xb6, - 0x00, 0x04, 0x76, 0x80, 0x0e, 0x9f, 0x2a, 0x30, 0x8b, 0x85, 0xd9, 0xc1, - 0xaf, 0xee, 0x27, 0x80, 0x20, 0xed, 0xef, 0x25, 0x5c, 0x98, 0x6b, 0xcc, - 0xf8, 0x72, 0xfb, 0x3f, 0x13, 0xe6, 0x9b, 0x47, 0xee, 0xa1, 0x18, 0x55, - 0xa0, 0x68, 0xbe, 0xd4, 0x21, 0x59, 0x72, 0xa8, 0xa4, 0xd2, 0x33, 0x57, - 0x50, 0xfc, 0x6b, 0xa8, 0x49, 0x1b, 0x74, 0xdb, 0x5a, 0x16, 0xb8, 0x52, - 0x0c, 0xda, 0xa0, 0xa3, 0xff, 0x33, 0x56, 0x82, 0x0f, 0x0a, 0x90, 0x82, - 0xee, 0xf1, 0x1b, 0xb3, 0x05, 0x44, 0x39, 0x01, 0xf7, 0x1e, 0xff, 0xcb, - 0xea, 0xd0, 0xb6, 0x20, 0xbc, 0x84, 0xb1, 0xf9, 0xa2, 0xc1, 0x56, 0xe6, - 0xfa, 0x47, 0xc9, 0xfd, 0x45, 0x77, 0x51, 0x8e, 0x01, 0xe4, 0x17, 0x20, - 0x6f, 0x99, 0xe3, 0x90, 0x2f, 0xcc, 0xaf, 0xd9, 0x61, 0x32, 0x91, 0x62, - 0x58, 0xf4, 0x98, 0xf5, 0xf4, 0xeb, 0x13, 0xeb, 0xdc, 0x8a, 0xac, 0xb2, - 0x9e, 0xcf, 0xe7, 0xa7, 0xd4, 0x97, 0x22, 0x12, 0x08, 0x10, 0x6d, 0x40, - 0xea, 0x26, 0xea, 0x42, 0x29, 0x6e, 0x75, 0x62, 0x47, 0x08, 0x17, 0xa8, - 0x69, 0x0f, 0xf7, 0x35, 0x59, 0x23, 0x86, 0x83, 0xfd, 0xb5, 0x61, 0x98, - 0x9c, 0x4d, 0x37, 0xda, 0x9f, 0xfc, 0xfb, 0x16, 0xb7, 0x6c, 0x52, 0xee, - 0xa8, 0x9c, 0x3e, 0x93, 0x43, 0xc5, 0x2b, 0xd4, 0xd0, 0x9f, 0x69, 0x2c, - 0xc9, 0x1f, 0x2e, 0xdf, 0x5b, 0xe6, 0xc6, 0x5f, 0x71, 0xd1, 0xd7, 0xb2, - 0x8f, 0x3a, 0xba, 0x60, 0x75, 0x3d, 0x34, 0x41, 0x43, 0x9b, 0x13, 0xc0, - 0x3b, 0x30, 0xc5, 0xe9, 0x84, 0x81, 0xde, 0x85, 0x4e, 0x65, 0x7b, 0x21, - 0x37, 0xb8, 0xef, 0x24, 0x19, 0xaa, 0x26, 0x0c, 0x27, 0xa7, 0xd9, 0x29, - 0x47, 0x1a, 0x15, 0x42, 0x1e, 0x30, 0x79, 0x79, 0x96, 0x09, 0x62, 0x26, - 0xad, 0x98, 0x8b, 0xcb, 0x3d, 0xeb, 0x66, 0x83, 0x77, 0xd9, 0x79, 0x4d, - 0x05, 0x81, 0x72, 0xe9, 0xe0, 0x6f, 0x13, 0x00, 0x7e, 0xa3, 0x92, 0x82, - 0x1c, 0x90, 0x83, 0x4b, 0x15, 0x97, 0x0f, 0x92, 0xe2, 0xd3, 0x3d, 0xd7, - 0x6c, 0xb9, 0x60, 0x9a, 0x23, 0x52, 0xbe, 0x59, 0xc9, 0x36, 0x9e, 0xf7, - 0x77, 0x09, 0x79, 0x01, 0xcc, 0xec, 0x17, 0xd1, 0x74, 0xbc, 0x58, 0x65, - 0x45, 0x3c, 0x86, 0xf1, 0xbc, 0xbd, 0x95, 0x54, 0x46, 0x45, 0x7b, 0x4c, - 0xa2, 0xea, 0x2a, 0x6e, 0xa8, 0xd1, 0x66, 0x03, 0xb2, 0x6a, 0xe0, 0xd3, - 0x07, 0x8d, 0xe0, 0x09, 0x81, 0x42, 0xe3, 0x97, 0xc4, 0xe7, 0x37, 0xc5, - 0x82, 0xcf, 0xb1, 0xec, 0xba, 0xbd, 0xf4, 0xb6, 0x41, 0xb2, 0xb8, 0xa6, - 0x3a, 0x85, 0x4b, 0x4f, 0x46, 0x48, 0xe9, 0x9b, 0x72, 0xf5, 0xb0, 0x64, - 0x66, 0x75, 0x42, 0xb4, 0x00, 0xbe, 0x11, 0x6d, 0x86, 0x93, 0x07, 0x50, - 0xa7, 0xef, 0x55, 0x42, 0xcf, 0xe8, 0x61, 0xd0, 0x9b, 0x11, 0x84, 0x8c, - 0x74, 0xe4, 0xb8, 0x3f, 0x48, 0xb3, 0x61, 0xe3, 0xea, 0x66, 0x86, 0x94, - 0x95, 0x12, 0x77, 0x26, 0x75, 0x30, 0xb5, 0xd3, 0x7a, 0xad, 0x2d, 0x58, - 0x46, 0x1b, 0x4b, 0xd9, 0x2d, 0x1e, 0x0b, 0xff, 0xd7, 0x03, 0x56, 0x3b, - 0xbd, 0x65, 0xb0, 0xf9, 0xfe, 0x43, 0x1c, 0x9c, 0x18, 0x82, 0x78, 0x5e, - 0x06, 0x02, 0x21, 0x70, 0xb2, 0x7f, 0xb5, 0x63, 0x71, 0x85, 0x95, 0x79, - 0xae, 0x1e, 0xc6, 0x62, 0x7a, 0x7c, 0x63, 0x46, 0x70, 0x1c, 0x58, 0x72, - 0x1d, 0xde, 0xca, 0xb4, 0xfc, 0xc8, 0x56, 0x38, 0x32, 0xf4, 0x0b, 0x56, - 0x87, 0x6b, 0x5b, 0x53, 0xd2, 0x2c, 0x35, 0xef, 0x5b, 0x33, 0x59, 0x13, - 0x76, 0x82, 0x30, 0x80, 0x23, 0x10, 0x07, 0x4c, 0x3f, 0xac, 0x9c, 0x58, - 0x2d, 0x04, 0xe6, 0x6a, 0xd3, 0x5c, 0xf9, 0xb6, 0x59, 0x4e, 0x85, 0xfe, - 0x01, 0x71, 0xf0, 0xf7, 0xf2, 0x1f, 0x46, 0xd5, 0x20, 0x3c, 0x9b, 0xc2, - 0x1e, 0x73, 0x1c, 0x56, 0x9c, 0x76, 0x8c, 0x12, 0x95, 0x51, 0xd4, 0x6f, - 0x5b, 0x3a, 0xa7, 0x5f, 0xa7, 0xe4, 0xfa, 0xb7, 0x1a, 0xdd, 0xb6, 0x4c, - 0x01, 0x02, 0xae, 0x9c, 0x02, 0x0d, 0x66, 0x2f, 0x40, 0x87, 0xa1, 0xbc, - 0xf3, 0xde, 0xf4, 0xdb, 0x65, 0xee, 0xcc, 0xca, 0xe1, 0x7a, 0xa2, 0xf4, - 0xf7, 0xf5, 0x7c, 0x2a, 0x3f, 0xa4, 0x67, 0xbb, 0x07, 0x50, 0x7a, 0x29, - 0x8a, 0xcf, 0x2c, 0x7a, 0x0e, 0x0d, 0xc7, 0x95, 0x8b, 0xf4, 0xe2, 0x50, - 0xe1, 0xc1, 0x40, 0x16, 0x99, 0x5c, 0x72, 0xe7, 0xe4, 0x01, 0xeb, 0x29, - 0x6a, 0x99, 0xf2, 0x67, 0x23, 0x46, 0x1f, 0xaa, 0xea, 0xc1, 0x51, 0x30, - 0xeb, 0x7d, 0x34, 0x52, 0x91, 0x37, 0x2d, 0xc6, 0x5c, 0x3a, 0x7c, 0x54, - 0xc0, 0x79, 0xdc, 0xf9, 0xbf, 0x08, 0x2a, 0xf6, 0xe1, 0x1e, 0xee, 0xc6, - 0xd2, 0xe9, 0x30, 0x27, 0x60, 0x0c, 0xa2, 0x63, 0x16, 0x06, 0x3d, 0xe2, - 0xf5, 0x6f, 0xea, 0xe4, 0x4d, 0x9f, 0x2d, 0x36, 0x62, 0x95, 0x47, 0x5d, - 0x00, 0x22, 0x9f, 0x0c, 0xbb, 0x71, 0xad, 0xea, 0xe7, 0x62, 0x59, 0x21, - 0xd1, 0xaf, 0x04, 0x5a, 0xfc, 0x1f, 0x28, 0x6b, 0x6f, 0x71, 0xec, 0xd4, - 0xbd, 0x9c, 0x88, 0xfb, 0x3f, 0x04, 0xea, 0xd6, 0xb2, 0x24, 0xe5, 0x28, - 0xfe, 0xc5, 0x3e, 0x15, 0x00, 0x8c, 0xa2, 0xdf, 0x18, 0x3d, 0x10, 0x9a, - 0xb1, 0xcd, 0x64, 0xda, 0x87, 0x41, 0xc8, 0xa1, 0x1c, 0x97, 0xd5, 0x44, - 0xd9, 0x51, 0xd2, 0x96, 0xed, 0xad, 0x28, 0x1f, 0x03, 0x89, 0x21, 0xbd, - 0x79, 0x91, 0x48, 0x9c, 0x8e, 0x17, 0xfd, 0x36, 0x72, 0xf6, 0x69, 0x4f, - 0x3f, 0x02, 0x57, 0xcc, 0x3f, 0x1c, 0x49, 0x82, 0x00, 0x45, 0x9e, 0x29, - 0x83, 0x14, 0x12, 0xbb, 0xd2, 0xd0, 0x1a, 0x66, 0x0f, 0x57, 0x24, 0xd4, - 0x9f, 0x46, 0x0c, 0xf4, 0xb8, 0x28, 0x85, 0x52, 0xe2, 0xa1, 0xc2, 0x3a, - 0x8c, 0x34, 0x4a, 0x81, 0xe3, 0xbc, 0xa2, 0x67, 0x67, 0x12, 0x13, 0xc4, - 0xe7, 0xd7, 0x2c, 0x4e, 0xa9, 0xf5, 0xed, 0x63, 0xf2, 0x18, 0x9c, 0x0c, - 0xe2, 0x4d, 0x25, 0x23, 0x30, 0x3e, 0x49, 0x29, 0xa6, 0x37, 0xdf, 0xc2, - 0xdc, 0xf6, 0x5e, 0xae, 0x45, 0xd7, 0x8d, 0x56, 0xba, 0x29, 0x4f, 0xee, - 0xc9, 0x26, 0xd7, 0xbf, 0x10, 0x4d, 0x0a, 0x3b, 0x3d, 0x1f, 0xd5, 0x72, - 0xe1, 0xe6, 0xf5, 0x23, 0x4a, 0x17, 0x2d, 0xe4, 0x40, 0x55, 0x9b, 0x39, - 0x66, 0x36, 0xe4, 0x6d, 0x6d, 0xb6, 0x8d, 0x2a, 0x7e, 0x76, 0x73, 0xa5, - 0x86, 0x20, 0x3d, 0x18, 0xa0, 0x6c, 0x35, 0x59, 0xc8, 0x1c, 0xef, 0x0f, - 0x36, 0x1d, 0x6f, 0xba, 0x89, 0xb9, 0x9e, 0x7a, 0x58, 0x1d, 0x43, 0xad, - 0x85, 0x8b, 0x6b, 0xcc, 0x25, 0xb8, 0xe4, 0xdd, 0xa1, 0x35, 0xd9, 0xef, - 0xc4, 0xb1, 0xf6, 0x99, 0x27, 0x17, 0xb7, 0xbe, 0xd1, 0x4f, 0xa1, 0x81, - 0x4e, 0xb6, 0x19, 0xcd, 0xa0, 0x92, 0xeb, 0x56, 0x41, 0x4f, 0x37, 0xca, - 0x3b, 0x43, 0x85, 0x86, 0xdf, 0x5d, 0x5a, 0x8c, 0xd4, 0x5b, 0xc4, 0x28, - 0xdb, 0x16, 0xea, 0x3a, 0x2e, 0x9e, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x00, 0x00, 0xea, 0x59, 0x40, 0xc4, 0x40, 0x8b, 0x6a, 0x8a, - 0xb8, 0x7f, 0x1e, 0x0b, 0xfe, 0xab, 0xa4, 0xac, 0x42, 0x91, 0xc5, 0xfa, - 0x2c, 0x7e, 0xb4, 0xf9, 0x5c, 0xd5, 0x4c, 0x6a, 0x74, 0x82, 0x90, 0x81, - 0x96, 0xb0, 0xf4, 0xd4, 0xba, 0xc9, 0xa3, 0x2e, 0x26, 0x0a, 0xc9, 0x55, - 0x65, 0xac, 0xde, 0x83, 0x37, 0xec, 0x0e, 0xf6, 0xdc, 0x8c, 0x34, 0xe6, - 0x57, 0xde, 0x32, 0x0a, 0x02, 0x62, 0x4f, 0x6a, 0x92, 0xa5, 0xb4, 0x40, - 0xde, 0x57, 0xf4, 0xd1, 0xa3, 0x1c, 0xd3, 0xf7, 0x4a, 0x15, 0xcc, 0x27, - 0x26, 0x00, 0xba, 0xf3, 0xfa, 0x4e, 0xc6, 0xe9, 0xc3, 0x05, 0x3d, 0x3a, - 0x89, 0x96, 0x7d, 0x41, 0xac, 0xca, 0x28, 0x7f, 0x69, 0x02, 0x40, 0x03, - 0x93, 0x86, 0x85, 0x85, 0x73, 0x00, 0x09, 0x5a, 0xcf, 0x5f, 0x1d, 0xaa, - 0x46, 0x41, 0x9d, 0x08, 0xbf, 0xea, 0x45, 0x9b, 0x93, 0xda, 0x9e, 0x81, - 0xba, 0x9e, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, - 0x6a, 0x1f, 0x9b, 0x03, 0xdd, 0xe4, 0x16, 0x07, 0x7f, 0x5b, 0xb0, 0xee, - 0xac, 0x55, 0xc4, 0x50, 0xe6, 0x2b, 0x17, 0xed, 0x7f, 0x50, 0x4d, 0x71, - 0x73, 0xae, 0xe0, 0x4d, 0xce, 0x08, 0xd9, 0x8b, 0x83, 0x2c, 0x01, 0x48, - 0x02, 0xd3, 0xbb, 0xca, 0x86, 0xd7, 0xca, 0x5f, 0xc7, 0xce, 0x59, 0xdf, - 0xc1, 0xcc, 0xf7, 0x7b, 0x54, 0xf8, 0x0d, 0x4f, 0x81, 0x9e, 0x50, 0x6a, - 0x65, 0x66, 0x4a, 0xec, 0x7a, 0x1b, 0x92, 0xb2, 0x39, 0x8f, 0x5d, 0x41, - 0x33, 0xcf, 0xe6, 0x1b, 0x34, 0x5d, 0xe1, 0xf6, 0xef, 0xcb, 0xa0, 0x55, - 0x7e, 0x1f, 0x45, 0x38, 0xb9, 0x56, 0x15, 0x3b, 0x70, 0xab, 0xc8, 0x2f, - 0x1c, 0xb9, 0x7d, 0x37, 0xe1, 0xb4, 0x03, 0x44, 0x5a, 0xf6, 0x57, 0x97, - 0x03, 0x54, 0x4c, 0x22, 0x88, 0xc3, 0x82, 0xfd, 0x91, 0xc1, 0xf1, 0x63, - 0xb4, 0x50, 0x46, 0x11, 0x64, 0x07, 0xfd, 0x85, 0xe5, 0x78, 0x57, 0xdd, - 0x19, 0x2a, 0x6b, 0x64, 0x3e, 0xec, 0xb8, 0xf3, 0xb5, 0x95, 0x29, 0x72, - 0xf1, 0x9d, 0xdd, 0xb9, 0xad, 0xd0, 0x78, 0x26, 0x86, 0x10, 0x10, 0x19, - 0xe4, 0x79, 0xae, 0xdc, 0x56, 0xb7, 0x54, 0x4f, 0x94, 0xc6, 0x26, 0x9a, - 0x93, 0xa8, 0x2e, 0x1b, 0x1c, 0xda, 0x87, 0x3a, 0xa2, 0x44, 0xb9, 0x0b, - 0x0f, 0xab, 0x70, 0x3b, 0xb7, 0x6c, 0xbf, 0x58, 0x67, 0x32, 0x7d, 0xa3, - 0x2a, 0xcb, 0x4e, 0x02, 0x92, 0xa1, 0x26, 0x0e, 0x20, 0x5e, 0xb3, 0xec, - 0xc4, 0x04, 0x5b, 0x7f, 0xe5, 0xbd, 0x30, 0xeb, 0xc8, 0xdd, 0xf1, 0x72, - 0x5a, 0x7e, 0xcb, 0x93, 0x22, 0xa0, 0x01, 0x9f, 0xbb, 0x24, 0x9f, 0x50, - 0x01, 0x1f, 0x24, 0x02, 0x85, 0x6d, 0xe6, 0x4d, 0x55, 0xc4, 0x07, 0xe9, - 0x87, 0x38, 0xbf, 0x1a, 0x3b, 0x05, 0x82, 0xc4, 0x73, 0x4b, 0x87, 0x3c, - 0xb4, 0x0a, 0x48, 0x8c, 0x06, 0x67, 0xe7, 0xbf, 0xcc, 0xe7, 0xe5, 0xc3, - 0xb2, 0x81, 0x60, 0xe2, 0xd1, 0xb1, 0x8f, 0x98, 0xbd, 0x7d, 0xbd, 0x4e, - 0x9a, 0xca, 0xbe, 0xcb, 0x81, 0x47, 0x25, 0xaa, 0xfa, 0x91, 0xcf, 0x78, - 0xce, 0xcb, 0x1a, 0x11, 0x79, 0xcf, 0x97, 0xa3, 0x95, 0x95, 0x6f, 0xd7, - 0xae, 0x80, 0xc9, 0xd5, 0x95, 0xb7, 0xcf, 0xe2, 0x9d, 0x98, 0x65, 0x80, - 0xfd, 0x2e, 0xee, 0x46, 0x5e, 0x46, 0x8c, 0xde, 0x52, 0xb4, 0xdc, 0xce, - 0xa8, 0xab, 0x4e, 0x0c, 0x12, 0x9f, 0x89, 0x9c, 0x84, 0x80, 0xfe, 0x08, - 0x64, 0x12, 0x12, 0x95, 0x62, 0xea, 0x65, 0xcc, 0x34, 0x80, 0xcf, 0x92, - 0x5f, 0xc2, 0xae, 0x76, 0xe7, 0x2f, 0xbb, 0xa8, 0xdb, 0x6a, 0x66, 0x60, - 0xaf, 0x88, 0xba, 0x65, 0x32, 0xcf, 0xf7, 0x6e, 0xd8, 0xd0, 0x69, 0xb0, - 0x12, 0x23, 0xd6, 0xc2, 0x32, 0xe5, 0x8e, 0x51, 0xc5, 0x61, 0x28, 0x45, - 0xf7, 0xf9, 0xea, 0x73, 0xce, 0x04, 0x2d, 0x56, 0x43, 0x10, 0x8b, 0x4f, - 0x6b, 0xfa, 0x32, 0xa8, 0x92, 0x8f, 0xd9, 0xb4, 0xfd, 0xa4, 0x74, 0xa8, - 0xea, 0xca, 0xd3, 0x84, 0xbb, 0x5a, 0x34, 0x57, 0xf9, 0xda, 0x25, 0x40, - 0x1f, 0x5e, 0xc2, 0x66, 0x43, 0x05, 0xdd, 0x13, 0x88, 0x91, 0x60, 0xa1, - 0x75, 0xd3, 0xc4, 0x27, 0xff, 0xda, 0x24, 0x3d, 0xd9, 0xd7, 0x47, 0x46, - 0x30, 0xd0, 0x76, 0xc4, 0x9e, 0x97, 0xe3, 0x43, 0xd7, 0x45, 0xaf, 0x49, - 0x36, 0xf2, 0x18, 0xdd, 0x3f, 0x86, 0x9a, 0xec, 0x9a, 0x70, 0xeb, 0x5a, - 0xe2, 0xa0, 0x4b, 0x45, 0x21, 0xb3, 0x32, 0x3d, 0x0c, 0x8c, 0x03, 0x13, - 0xae, 0x46, 0xb5, 0x1a, 0x0a, 0x03, 0x36, 0xfe, 0xfe, 0xfa, 0xc9, 0x4d, - 0x46, 0xf8, 0xfe, 0x6f, 0x99, 0x8c, 0xe4, 0x77, 0x0c, 0x27, 0x59, 0xf7, - 0xc3, 0xfc, 0x32, 0xb3, 0xa5, 0xae, 0xdc, 0x49, 0xac, 0x31, 0x27, 0xa6, - 0x14, 0x92, 0xfb, 0xe3, 0x69, 0x35, 0x8d, 0xa0, 0x50, 0x55, 0x09, 0x90, - 0xdf, 0x67, 0x08, 0x4c, 0x0e, 0xaf, 0x71, 0xc2, 0xe8, 0xb8, 0xdc, 0x45, - 0xe3, 0x6d, 0x58, 0x3f, 0x19, 0x8d, 0xcd, 0xeb, 0xe3, 0x02, 0x49, 0xd8, - 0xc8, 0x8b, 0x29, 0xb3, 0xef, 0x2b, 0xf0, 0x39, 0x5c, 0x11, 0xaa, 0x52, - 0x44, 0x0d, 0x1a, 0x3a, 0x7a, 0x62, 0xda, 0x6d, 0xe3, 0xdd, 0x03, 0x30, - 0x6d, 0x3e, 0x18, 0x30, 0x1d, 0xc0, 0xd0, 0x05, 0x67, 0x98, 0xf5, 0x2a, - 0xc7, 0xa1, 0x58, 0xd7, 0xf8, 0x6f, 0x7d, 0x07, 0x59, 0x27, 0x95, 0xb9, - 0x8d, 0x4d, 0xd7, 0xc8, 0x5e, 0x8b, 0x89, 0x14, 0xb7, 0x1b, 0x35, 0xaa, - 0x72, 0x02, 0x39, 0x3c, 0x41, 0x7c, 0x91, 0x93, 0x81, 0xe1, 0xad, 0xbe, - 0x77, 0x28, 0x80, 0xa2, 0x9c, 0xa8, 0x00, 0x18, 0xa5, 0x70, 0xec, 0xec, - 0x96, 0x95, 0x37, 0xa3, 0xee, 0x15, 0xa0, 0x69, 0x0e, 0x05, 0xb5, 0xb4, - 0xb6, 0xa7, 0x8b, 0xb9, 0x41, 0x88, 0x4f, 0x56, 0x39, 0xa7, 0xbe, 0x24, - 0xce, 0x4c, 0xe0, 0x9c, 0x24, 0x5a, 0xa1, 0xab, 0xcd, 0x82, 0xf1, 0x16, - 0x3f, 0xc0, 0xaf, 0xe1, 0x42, 0xe0, 0x7d, 0x1b, 0xd9, 0x8f, 0xb8, 0x04, - 0xa1, 0x88, 0xd9, 0xc3, 0xaf, 0x4f, 0xda, 0xfd, 0x0b, 0x5c, 0xc3, 0x04, - 0xf3, 0xdb, 0xe6, 0x76, 0x6e, 0xe9, 0xdc, 0xea, 0x6f, 0xa2, 0xa5, 0x75, - 0x2c, 0xc7, 0x91, 0x7d, 0x4b, 0xd5, 0x68, 0x55, 0xbb, 0x2d, 0x14, 0xdb, - 0x06, 0x76, 0xf7, 0xcc, 0x0a, 0x88, 0x6c, 0x2b, 0xa1, 0x57, 0xd6, 0x15, - 0x9c, 0x46, 0xcf, 0x5b, 0x6f, 0x9e, 0x7e, 0xc5, 0x39, 0xda, 0x97, 0x26, - 0x5e, 0xf5, 0x25, 0x06, 0xed, 0x8e, 0x9b, 0x1d, 0x1b, 0x91, 0x07, 0x89, - 0x08, 0xce, 0xd7, 0x38, 0x43, 0x64, 0x8e, 0xf5, 0x3a, 0x52, 0x4a, 0xfb, - 0x3e, 0xff, 0x2c, 0xb3, 0x78, 0x40, 0xb5, 0xdd, 0xb2, 0x8a, 0xd3, 0x6a, - 0xc5, 0xb0, 0xa3, 0x4a, 0xb8, 0xe7, 0x27, 0xa0, 0x5a, 0x8f, 0x0f, 0xda, - 0x53, 0x49, 0xc9, 0x77, 0x2a, 0xef, 0x78, 0xc6, 0xec, 0xaf, 0x10, 0xe5, - 0x71, 0xc5, 0x7a, 0x85, 0xdf, 0xb2, 0x85, 0x02, 0xe3, 0x55, 0x7a, 0x91, - 0x3a, 0x68, 0xb2, 0x9d, 0x3d, 0xd9, 0x01, 0xc5, 0x5f, 0x3c, 0xa8, 0x1d, - 0x99, 0xc6, 0xe7, 0xad, 0x09, 0xd1, 0x39, 0x3a, 0x92, 0xc5, 0x77, 0x9c, - 0xdf, 0x99, 0x56, 0x9f, 0xfe, 0xf8, 0xfd, 0xc8, 0x4f, 0x19, 0xa3, 0xa0, - 0xdf, 0xff, 0x17, 0xac, 0xa9, 0x03, 0x32, 0x85, 0x4c, 0x29, 0xca, 0x89, - 0x58, 0xdc, 0x88, 0xdd, 0xeb, 0x79, 0x68, 0x5e, 0x0f, 0x37, 0x1a, 0xf7, - 0x05, 0xfd, 0x39, 0x91, 0x25, 0x61, 0xf3, 0x04, 0xda, 0x97, 0xfc, 0x7b, - 0xcc, 0x40, 0x63, 0xfd, 0x5b, 0x3b, 0x27, 0x8e, 0x92, 0x6d, 0x98, 0x0f, - 0xcc, 0x9c, 0x9b, 0xda, 0xb2, 0xc6, 0xca, 0x56, 0xff, 0x7e, 0xcc, 0xa2, - 0xc0, 0x45, 0x3e, 0xf6, 0xdf, 0xa7, 0xe8, 0x2a, 0xef, 0x0c, 0xde, 0xec, - 0xa4, 0x1d, 0x2c, 0x3e, 0x03, 0xfd, 0xa4, 0x44, 0x60, 0x4a, 0xf5, 0x83, - 0x8f, 0x09, 0x2d, 0xe8, 0xd5, 0x46, 0xf6, 0x1c, 0x2d, 0x39, 0x28, 0x0c, - 0xdf, 0xa1, 0x2b, 0x05, 0x6e, 0x3c, 0x36, 0xdd, 0x91, 0x81, 0x52, 0xf1, - 0x56, 0xdc, 0xbb, 0x79, 0x62, 0xd8, 0x2e, 0x27, 0x5d, 0x9f, 0x3c, 0xce, - 0x81, 0x5c, 0x70, 0xe5, 0x4d, 0x33, 0x06, 0xd5, 0x14, 0x04, 0xb7, 0xbc, - 0x7b, 0x7a, 0xb4, 0xf7, 0x4a, 0x48, 0x8f, 0x97, 0x85, 0x96, 0x69, 0xc9, - 0x40, 0x52, 0xb1, 0x1c, 0x28, 0x82, 0xb3, 0x63, 0xee, 0x94, 0x2f, 0xcb, - 0x40, 0xad, 0xd7, 0x78, 0xb1, 0xc4, 0x21, 0x05, 0x36, 0xd9, 0x46, 0xf0, - 0x83, 0xcd, 0xee, 0x52, 0x7a, 0xa6, 0xa4, 0x40, 0xb0, 0x2f, 0xf0, 0x1c, - 0xfa, 0x42, 0x98, 0x54, 0x5b, 0xfe, 0x5e, 0xd6, 0x84, 0x73, 0xca, 0x39, - 0xbe, 0x87, 0xf2, 0x92, 0xee, 0x3d, 0x21, 0xcc, 0x69, 0x81, 0xe5, 0xe8, - 0x8a, 0xc3, 0x23, 0x64, 0x98, 0xd5, 0x1d, 0xcd, 0x5c, 0x6c, 0x37, 0xc8, - 0x8b, 0x08, 0x22, 0x12, 0x9f, 0x85, 0xc9, 0xed, 0xb4, 0xa6, 0x07, 0xe1, - 0x62, 0x79, 0x35, 0x5d, 0x26, 0x11, 0x4a, 0x6b, 0x33, 0x37, 0x91, 0x78, - 0xe8, 0xe2, 0xba, 0x8b, 0x8a, 0xb7, 0xbb, 0x0f, 0xd2, 0xb3, 0xa2, 0x02, - 0x0c, 0x57, 0x35, 0x99, 0x88, 0x6b, 0x9b, 0x64, 0x79, 0x1f, 0x4a, 0x48, - 0xd4, 0x3b, 0x5c, 0xeb, 0xb4, 0x83, 0xc3, 0xad, 0x9c, 0x6a, 0xb0, 0xcf, - 0x7f, 0x70, 0xe8, 0x22, 0x46, 0x25, 0xfe, 0x7e, 0x02, 0x44, 0x83, 0x02, - 0xb3, 0x08, 0x2e, 0x34, 0x08, 0x4b, 0xff, 0xa2, 0xc1, 0x60, 0xbb, 0xd8, - 0x89, 0x16, 0xf8, 0xaa, 0xab, 0xea, 0xf7, 0xa0, 0x10, 0x9a, 0xc9, 0xe9, - 0xa4, 0x81, 0xa7, 0x87, 0x32, 0x5b, 0xc1, 0xd0, 0xd9, 0x70, 0x6f, 0xb6, - 0x7c, 0x65, 0xd5, 0x0e, 0x65, 0x93, 0xfe, 0x6d, 0x66, 0xaa, 0xab, 0xd0, - 0x03, 0x07, 0xf2, 0xbe, 0x39, 0xd6, 0xc8, 0xac, 0xf2, 0x06, 0x58, 0x58, - 0x46, 0xc0, 0x1a, 0xbd, 0xa4, 0x96, 0x38, 0x31, 0x32, 0x89, 0x04, 0xdf, - 0xcd, 0x3c, 0x2e, 0x98, 0xb8, 0x39, 0xba, 0xe2, 0xca, 0x6b, 0xd0, 0x53, - 0xce, 0x4a, 0xc8, 0x95, 0x81, 0x84, 0x17, 0xce, 0x7f, 0x1d, 0xc1, 0x5a, - 0xc4, 0xc2, 0x73, 0x30, 0x6d, 0x0b, 0x8c, 0xf8, 0x66, 0x38, 0x4e, 0xa3, - 0x14, 0x84, 0x15, 0x36, 0x9e, 0x0d, 0x56, 0x6b, 0xa6, 0x77, 0x65, 0xa4, - 0x2c, 0x77, 0x00, 0x8b, 0x43, 0x57, 0xc6, 0x25, 0xc5, 0xd0, 0x17, 0x79, - 0x6b, 0x5d, 0xbc, 0xcd, 0xc8, 0x25, 0x8f, 0x20, 0x09, 0xcc, 0xbd, 0x80, - 0x10, 0xdf, 0x35, 0xf6, 0x9c, 0x04, 0x80, 0x23, 0xdc, 0x97, 0xe0, 0xba, - 0x29, 0x48, 0x2e, 0x95, 0x0f, 0xb1, 0x9b, 0xc7, 0xe6, 0x0b, 0x89, 0x16, - 0xe2, 0x81, 0x3b, 0x32, 0x69, 0xc4, 0xde, 0xc6, 0x12, 0x09, 0x47, 0xff, - 0x50, 0xe4, 0x45, 0xb7, 0x35, 0xd2, 0x61, 0x9b, 0x52, 0x6e, 0xbe, 0xaf, - 0xd2, 0xeb, 0x0c, 0x50, 0xf1, 0x57, 0x9f, 0x59, 0xe1, 0xc1, 0x4f, 0x8c, - 0x79, 0x07, 0x05, 0xce, 0x8d, 0x64, 0xb2, 0xf0, 0xd3, 0x4f, 0xe1, 0x7b, - 0xfa, 0x30, 0x0a, 0xc2, 0x5d, 0x0c, 0x47, 0x6c, 0x17, 0x77, 0x1f, 0xe5, - 0xd8, 0x14, 0xfd, 0xc1, 0x01, 0x70, 0x51, 0x60, 0xb2, 0x20, 0xfd, 0x86, - 0xbc, 0x19, 0x5e, 0x01, 0xa6, 0x19, 0x3a, 0x21, 0xa5, 0x0a, 0x1c, 0xd9, - 0xa9, 0x78, 0xbb, 0xc9, 0x01, 0x65, 0xe4, 0xb3, 0x48, 0xb8, 0xe1, 0xe7, - 0xb5, 0xf4, 0x4e, 0xa9, 0xb6, 0xe2, 0x5b, 0xeb, 0xf5, 0x76, 0x06, 0x1a, - 0xd9, 0x08, 0x40, 0xff, 0x72, 0xb2, 0xe3, 0x01, 0x50, 0xb1, 0xad, 0xb3, - 0xa3, 0xf6, 0xef, 0x72, 0x05, 0x0c, 0xf4, 0xce, 0x24, 0x2c, 0x63, 0x89, - 0x63, 0x9e, 0x21, 0xb8, 0xb0, 0xbe, 0xc7, 0x45, 0xae, 0x47, 0x2b, 0x9e, - 0x61, 0x81, 0x4c, 0x76, 0x96, 0x7b, 0x18, 0x37, 0x74, 0xcb, 0x00, 0xef, - 0x38, 0x72, 0x24, 0x0a, 0x63, 0xc1, 0x64, 0xd6, 0x41, 0xc8, 0x6a, 0xf1, - 0xe7, 0x11, 0x20, 0x4b, 0xc2, 0x95, 0x70, 0xb8, 0xf8, 0x8f, 0xd9, 0xae, - 0x8c, 0x12, 0xd8, 0x6f, 0x63, 0x30, 0xca, 0x56, 0x46, 0x11, 0xda, 0x49, - 0x1f, 0x84, 0x3d, 0xae, 0xab, 0x78, 0x29, 0x02, 0x6c, 0x43, 0xa3, 0xef, - 0x9d, 0x97, 0x59, 0x15, 0x53, 0xcd, 0xc7, 0x47, 0x65, 0x30, 0xc7, 0xae, - 0x31, 0x4a, 0x41, 0xb4, 0x66, 0x9c, 0xbb, 0x51, 0x0b, 0xbd, 0xe2, 0x7d, - 0x41, 0x2c, 0xd0, 0x75, 0x57, 0x93, 0xce, 0x2e, 0xeb, 0x31, 0x7f, 0x56, - 0xb2, 0xa4, 0x2b, 0x9f, 0xcc, 0xef, 0x6f, 0xf0, 0x77, 0x19, 0xad, 0x4d, - 0x2e, 0x37, 0x00, 0x75, 0x53, 0xae, 0x22, 0x44, 0x69, 0x1c, 0x8a, 0x90, - 0xf2, 0xcd, 0x0f, 0x6b, 0x37, 0xdb, 0xfd, 0x71, 0x64, 0x80, 0xd8, 0x57, - 0x1b, 0x8f, 0xff, 0x14, 0xd4, 0x5f, 0xe1, 0xd1, 0x0f, 0x06, 0x13, 0x61, - 0x29, 0xa9, 0x80, 0x9d, 0xc7, 0x8a, 0xa0, 0xb5, 0xaa, 0xfc, 0xe0, 0xb4, - 0xb4, 0xf0, 0x31, 0xf0, 0xec, 0x78, 0x03, 0x28, 0xb9, 0xf7, 0xd9, 0xa7, - 0xc8, 0xad, 0x2e, 0x16, 0xb8, 0x18, 0x82, 0x43, 0x66, 0x8b, 0xae, 0xb2, - 0x45, 0x2b, 0x0c, 0x9d, 0x69, 0xbd, 0x1b, 0xc5, 0x20, 0xc6, 0x41, 0xe7, - 0x4f, 0x4b, 0x7b, 0x46, 0x3d, 0x7a, 0x6d, 0x9f, 0x13, 0x2e, 0x0f, 0xf3, - 0x85, 0x3e, 0x5b, 0x12, 0xe5, 0xbf, 0x1b, 0x20, 0xc3, 0x5f, 0x6b, 0xf7, - 0xf7, 0xa3, 0xd7, 0x33, 0xd2, 0xcb, 0x18, 0xa5, 0xa4, 0xa2, 0xd3, 0x59, - 0x91, 0x9a, 0x04, 0xfa, 0x9d, 0xa5, 0x55, 0xad, 0x09, 0x5a, 0x1e, 0x0b, - 0x10, 0xd0, 0x46, 0x18, 0xe4, 0x09, 0xe8, 0x1b, 0x44, 0xd3, 0x78, 0x45, - 0xc0, 0xdf, 0xa2, 0xef, 0xfc, 0x59, 0x8a, 0x1b, 0x22, 0x60, 0xc9, 0x58, - 0x7d, 0x65, 0x45, 0xa9, 0xac, 0xd5, 0xd4, 0xc4, 0x44, 0xd3, 0x08, 0x44, - 0x40, 0x4d, 0x3d, 0x7e, 0x39, 0x81, 0x72, 0x15, 0x49, 0xd7, 0x2c, 0xda, - 0x33, 0xaf, 0xc5, 0xb5, 0x8a, 0x3c, 0xbf, 0x81, 0x88, 0x4f, 0x12, 0xe4, - 0xe8, 0xe6, 0x00, 0xb6, 0xd9, 0xcd, 0xb2, 0x70, 0x08, 0x15, 0x72, 0xf6, - 0x46, 0xc7, 0x98, 0x7c, 0x1d, 0x54, 0xd0, 0x66, 0x2d, 0xa1, 0xd8, 0xda, - 0xb0, 0xe5, 0x9f, 0xa3, 0x2f, 0x2c, 0xfb, 0x34, 0xb3, 0x21, 0x8b, 0x61, - 0xf4, 0xce, 0x60, 0x2b, 0xb5, 0x5e, 0x3d, 0x14, 0x2c, 0xbe, 0x19, 0x9d, - 0x5f, 0x01, 0xe1, 0x21, 0x34, 0x11, 0x6b, 0x10, 0xd4, 0x17, 0x58, 0xb3, - 0x0a, 0x30, 0xe4, 0x17, 0x51, 0x0b, 0xf2, 0xbb, 0xa6, 0xb7, 0x00, 0xa2, - 0xe8, 0xa5, 0xa3, 0x41, 0x1d, 0x65, 0x2d, 0x26, 0x93, 0x26, 0x7d, 0xdc, - 0xad, 0x6f, 0x83, 0xeb, 0x66, 0x55, 0xde, 0x60, 0x21, 0x56, 0x19, 0x4f, - 0x9b, 0x7b, 0x26, 0x4a, 0x80, 0xf5, 0xab, 0x8b, 0xbf, 0xe4, 0xb1, 0xa1, - 0xd6, 0x33, 0x32, 0xbf, 0x86, 0x8c, 0x3c, 0xd0, 0x12, 0x03, 0xd4, 0xb9, - 0x23, 0x54, 0x1b, 0x94, 0x2f, 0xa5, 0x34, 0x4d, 0x59, 0x18, 0x33, 0x8e, - 0x8c, 0xf7, 0x1f, 0xc9, 0x6d, 0x75, 0xfb, 0x2a, 0x22, 0x6c, 0x64, 0xb7, - 0x79, 0xd8, 0x3b, 0xf6, 0x4e, 0x98, 0xd8, 0xa8, 0x2c, 0x06, 0xd1, 0x92, - 0x32, 0x44, 0xec, 0x38, 0x40, 0x3b, 0x53, 0x16, 0x40, 0x8f, 0x92, 0x72, - 0x87, 0xa8, 0xb8, 0xc0, 0x8f, 0x25, 0x4c, 0x4f, 0x24, 0xfc, 0x8d, 0xc6, - 0xa6, 0xeb, 0x2f, 0xdf, 0x2f, 0x0d, 0x2f, 0xd3, 0x6e, 0x70, 0x71, 0xfe, - 0xf0, 0x2e, 0xe9, 0x84, 0xd3, 0xc1, 0xd1, 0x70, 0x4b, 0x8f, 0x7b, 0x60, - 0xb0, 0xb7, 0xe3, 0x79, 0x52, 0x6a, 0x6b, 0x26, 0x03, 0x8f, 0x6a, 0x0f, - 0x8d, 0x85, 0xd7, 0x5f, 0xf7, 0x39, 0x31, 0x0e, 0x26, 0x73, 0x84, 0x3f, - 0x9b, 0x10, 0x6f, 0x29, 0x63, 0x14, 0x36, 0xa2, 0xec, 0x44, 0x7d, 0x84, - 0xc6, 0x4a, 0xec, 0xfe, 0xac, 0xcb, 0xe4, 0xfa, 0xf6, 0x68, 0x83, 0x68, - 0xe0, 0x8f, 0xd3, 0x8a, 0x60, 0x73, 0xf1, 0x5c, 0x71, 0x02, 0x0c, 0xa2, - 0x88, 0x2c, 0xa2, 0x35, 0x35, 0x5c, 0x3f, 0xb1, 0xbe, 0xb3, 0x6b, 0x5c, - 0xe1, 0x78, 0x75, 0x40, 0x20, 0x87, 0x67, 0xca, 0x07, 0x1c, 0x9c, 0x02, - 0xc7, 0xf2, 0x9d, 0x1c, 0xda, 0x1b, 0x86, 0x1b, 0xc6, 0xa6, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x93, 0xca, 0x30, 0xae, - 0xea, 0x26, 0x6a, 0x1b, 0x15, 0x46, 0x0a, 0xe3, 0x57, 0x23, 0x4c, 0x0c, - 0x98, 0x8e, 0x3e, 0xbb, 0x43, 0x14, 0x73, 0xdf, 0x17, 0x91, 0xe2, 0xee, - 0x39, 0xf9, 0xc2, 0x2f, 0xdc, 0xad, 0x0e, 0x00, 0xf5, 0xdd, 0xe3, 0x97, - 0xba, 0x8c, 0xee, 0x53, 0xc4, 0x70, 0x37, 0x46, 0xcf, 0x04, 0xc3, 0xc8, - 0x56, 0x38, 0x2e, 0x39, 0x75, 0x32, 0x6d, 0x98, 0xc4, 0x14, 0xae, 0xa4, - 0x29, 0xa3, 0xc6, 0xb6, 0x66, 0x45, 0x48, 0xdf, 0xc0, 0xa9, 0x4b, 0x4f, - 0xef, 0xb9, 0xb4, 0x89, 0x0d, 0x64, 0x00, 0x5c, 0xd1, 0xc8, 0x2b, 0xf7, - 0xc5, 0x1a, 0x1b, 0x06, 0xb7, 0x49, 0xb1, 0xe3, 0x4d, 0x87, 0xf9, 0x3f, - 0xba, 0x39, 0xa3, 0x56, 0x7f, 0x43, 0xcc, 0x15, 0x9c, 0x3d, 0xba, 0x71, - 0x7b, 0xeb, 0x45, 0x0f, 0x15, 0x1b, 0x6c, 0x84, 0x75, 0x6d, 0x43, 0x0b, - 0x27, 0x12, 0x6b, 0xbc, 0x0a, 0x6d, 0xe4, 0xf6, 0x4f, 0xc7, 0xbb, 0x9e, - 0x91, 0xb5, 0x09, 0x5f, 0x79, 0x2a, 0xbf, 0xda, 0x34, 0x91, 0x44, 0x47, - 0x52, 0x64, 0x00, 0x89, 0x27, 0x17, 0x5c, 0xe9, 0x90, 0x8b, 0xcb, 0xbe, - 0x21, 0x47, 0x65, 0x1c, 0x54, 0x61, 0x48, 0x17, 0x66, 0xb7, 0xa1, 0x60, - 0x27, 0x31, 0x04, 0x42, 0x3b, 0x33, 0x3d, 0xda, 0xf7, 0x61, 0x3d, 0x4b, - 0x91, 0xa5, 0x74, 0x4b, 0xde, 0x16, 0xf2, 0x79, 0x3e, 0xf7, 0x89, 0x87, - 0xb3, 0xdd, 0xa2, 0x49, 0xd7, 0x54, 0x1b, 0x39, 0xff, 0xb5, 0xec, 0x9d, - 0x1d, 0x09, 0x7e, 0x5a, 0x3c, 0xd1, 0xdc, 0x0e, 0x2a, 0x0e, 0x2c, 0x40, - 0x4e, 0xa5, 0x8c, 0x9d, 0xc8, 0x9b, 0xa5, 0xb2, 0x40, 0xa4, 0xaa, 0x3b, - 0xac, 0x93, 0x19, 0xf7, 0xa1, 0x8b, 0xf8, 0x4a, 0x40, 0x08, 0x5d, 0x1d, - 0xb0, 0xae, 0x0f, 0x67, 0xa7, 0x21, 0xaf, 0xe3, 0xb1, 0xfc, 0xff, 0xa0, - 0x95, 0x66, 0x2b, 0xf7, 0x82, 0x2d, 0x8a, 0x26, 0x0f, 0xc3, 0xed, 0x62, - 0xb6, 0xcb, 0x4c, 0x86, 0xe9, 0x20, 0x78, 0x3f, 0x08, 0x53, 0x8f, 0x41, - 0xf1, 0xa1, 0x04, 0x77, 0xd9, 0xe6, 0xea, 0x26, 0x6d, 0x33, 0x48, 0xb3, - 0xbb, 0xed, 0xfc, 0xd7, 0xa3, 0x2b, 0xe2, 0x39, 0xcf, 0x78, 0x4e, 0x11, - 0x26, 0xad, 0x39, 0x83, 0x6e, 0x72, 0xbf, 0xc6, 0x34, 0x23, 0x97, 0x5d, - 0x7b, 0x64, 0x1e, 0x78, 0x00, 0x34, 0x92, 0x5d, 0x3f, 0x23, 0x28, 0x60, - 0x7f, 0x88, 0xf0, 0xca, 0x96, 0x4a, 0x15, 0xbf, 0x8a, 0xb7, 0xd0, 0xd9, - 0x99, 0x8b, 0xdb, 0x26, 0xdc, 0x7e, 0x8d, 0x35, 0x53, 0x60, 0x07, 0x85, - 0x80, 0xc4, 0x9c, 0x0d, 0x81, 0xe2, 0x93, 0x85, 0x76, 0x2d, 0x85, 0x21, - 0x6e, 0xda, 0x29, 0xe5, 0xb1, 0x08, 0x46, 0x09, 0x1b, 0x8a, 0xd9, 0xd2, - 0xd7, 0x16, 0x74, 0xee, 0x26, 0x3e, 0xc4, 0x8c, 0x2e, 0x6b, 0x0c, 0xbc, - 0x95, 0xea, 0x4a, 0xb2, 0xd6, 0x6f, 0x43, 0xd1, 0x3a, 0x8f, 0xbd, 0x77, - 0xb4, 0x67, 0x63, 0x6b, 0xd2, 0xe0, 0xf0, 0x81, 0x74, 0xb7, 0xc5, 0x11, - 0x60, 0x10, 0x6b, 0xc6, 0x0f, 0xfd, 0x84, 0x2e, 0x5c, 0x8f, 0x3b, 0xf5, - 0x68, 0xa7, 0x62, 0xc6, 0x4f, 0xa6, 0xee, 0x19, 0x44, 0xea, 0xc0, 0xe4, - 0x64, 0x12, 0x71, 0x2f, 0xfb, 0xa3, 0x4d, 0xb0, 0x8e, 0x5e, 0xe1, 0x79, - 0x65, 0xd4, 0xf3, 0xed, 0x73, 0x04, 0xf1, 0x6d, 0xc6, 0x75, 0x54, 0x28, - 0x13, 0xe2, 0xd6, 0xa1, 0x26, 0xf9, 0xa4, 0x29, 0x20, 0x5b, 0xd0, 0x3c, - 0x3d, 0xf3, 0x7a, 0x18, 0x9a, 0x3d, 0xec, 0x6a, 0x4c, 0xfd, 0xa5, 0x00, - 0xdf, 0xec, 0xfd, 0x64, 0x38, 0x66, 0xa7, 0xba, 0x59, 0xb3, 0x9b, 0x9c, - 0x44, 0xfb, 0x10, 0x08, 0xb8, 0x79, 0xea, 0x85, 0xbf, 0xa4, 0x14, 0xce, - 0xce, 0x85, 0x22, 0x3f, 0x16, 0x00, 0x1c, 0x57, 0xc8, 0x5a, 0x1b, 0xf5, - 0xff, 0xde, 0x7e, 0xa9, 0xcc, 0xf3, 0xb5, 0x1d, 0x57, 0x06, 0xda, 0xbb, - 0x6c, 0x0a, 0x1e, 0xd4, 0x09, 0x74, 0x84, 0x1d, 0xfa, 0xdf, 0x33, 0x1e, - 0xe2, 0x8f, 0x10, 0xf7, 0x73, 0xab, 0x71, 0xb8, 0x64, 0xce, 0xc0, 0x49, - 0xc0, 0x36, 0xd3, 0x39, 0x31, 0x4c, 0x12, 0x5b, 0xf3, 0xf9, 0xb4, 0x2c, - 0x88, 0xba, 0xd4, 0x1a, 0xbd, 0x0c, 0x99, 0xbd, 0x0e, 0xad, 0x51, 0xe0, - 0xca, 0xdb, 0x25, 0x66, 0x83, 0xe0, 0x55, 0x18, 0xeb, 0xa6, 0x4e, 0x56, - 0xcb, 0x2f, 0xa5, 0xf2, 0x42, 0x7a, 0xa1, 0x05, 0xf0, 0x3a, 0x71, 0x5a, - 0x78, 0x3a, 0x7a, 0x6d, 0x12, 0x9f, 0x43, 0xc5, 0xcc, 0xb3, 0xfd, 0xf2, - 0xbf, 0x05, 0x16, 0xef, 0x07, 0xf9, 0xde, 0x0d, 0x51, 0xf0, 0x33, 0x86, - 0x43, 0x57, 0x40, 0xbc, 0xa9, 0xbd, 0xa0, 0x23, 0xff, 0xbb, 0xe6, 0x15, - 0xa1, 0xeb, 0xe9, 0x78, 0x0d, 0x72, 0x76, 0xf2, 0xb6, 0x6e, 0x46, 0xe2, - 0x86, 0xab, 0x3c, 0x52, 0x2c, 0xc6, 0x77, 0xdd, 0x57, 0xf7, 0x4d, 0x36, - 0xbb, 0x41, 0x08, 0x21, 0xaa, 0xe6, 0x44, 0x50, 0xed, 0xaf, 0x18, 0xb3, - 0xdd, 0x6b, 0x57, 0x46, 0x9e, 0x44, 0x93, 0x20, 0xe0, 0x62, 0x95, 0xcd, - 0xcf, 0xe4, 0x96, 0x92, 0xc3, 0x0d, 0x16, 0xb2, 0xc3, 0xf4, 0x0f, 0x3f, - 0x87, 0x17, 0xb9, 0x7b, 0x60, 0x60, 0xfa, 0xfb, 0x81, 0x5c, 0xb3, 0xb7, - 0x89, 0x73, 0xf7, 0x35, 0xf7, 0x27, 0xf1, 0x0e, 0xa4, 0xa1, 0xba, 0xea, - 0x6a, 0xe3, 0x5c, 0x0f, 0xf7, 0x15, 0xbc, 0x28, 0x57, 0x27, 0x8f, 0xd8, - 0xca, 0x82, 0x19, 0xd0, 0xa3, 0x9d, 0xe5, 0xe0, 0x44, 0xbf, 0x78, 0xa4, - 0x09, 0x69, 0x27, 0xa0, 0x69, 0xb5, 0xd4, 0xbe, 0x00, 0xe6, 0x03, 0x97, - 0xbc, 0x8b, 0xfc, 0x25, 0x70, 0xb3, 0x49, 0x30, 0xe3, 0x24, 0x19, 0x77, - 0xb4, 0x93, 0x46, 0x03, 0xe6, 0x22, 0xaf, 0x76, 0xd2, 0x90, 0x00, 0x05, - 0x46, 0xb8, 0xa4, 0xf5, 0x4c, 0xaa, 0x04, 0x63, 0xa0, 0x57, 0xe0, 0x20, - 0x6e, 0x1a, 0xed, 0x21, 0x86, 0xd0, 0x38, 0x5b, 0xe6, 0xa7, 0xb0, 0xe7, - 0x75, 0xe3, 0x76, 0xb3, 0x15, 0x8b, 0xdc, 0x10, 0x52, 0x15, 0x21, 0x7b, - 0xd0, 0xc4, 0x75, 0x26, 0x1d, 0x6e, 0x0d, 0x4c, 0x08, 0x5b, 0x95, 0x9a, - 0xd0, 0xda, 0xbe, 0x23, 0x98, 0xde, 0x60, 0x2a, 0xe9, 0xa4, 0x92, 0xf0, - 0x92, 0x84, 0xdc, 0x86, 0x60, 0xf5, 0x23, 0x31, 0xf5, 0xe9, 0xd6, 0x00, - 0xc1, 0x78, 0xab, 0x05, 0x94, 0xd3, 0x47, 0x4d, 0x32, 0x0f, 0x82, 0xa0, - 0x99, 0x0b, 0xfe, 0x6b, 0x58, 0xf9, 0x24, 0xf6, 0x17, 0xa0, 0x5f, 0x24, - 0x6a, 0xc6, 0x01, 0xa8, 0xfa, 0xca, 0xdc, 0xb6, 0x83, 0xcb, 0xd2, 0x3b, - 0xb7, 0x0b, 0x04, 0x3e, 0x6a, 0xaf, 0x23, 0x17, 0x3e, 0x14, 0xce, 0x52, - 0x1c, 0xe3, 0x06, 0x66, 0x29, 0x17, 0x6f, 0x7e, 0x66, 0x06, 0xa9, 0x68, - 0x7f, 0xca, 0xad, 0xa8, 0xb7, 0x2d, 0xa4, 0x5d, 0xa6, 0x16, 0xcd, 0xed, - 0xee, 0x14, 0x96, 0xc8, 0x12, 0x69, 0x4e, 0x70, 0x72, 0x2a, 0x75, 0x82, - 0x08, 0x3f, 0x3e, 0x27, 0xa0, 0xea, 0x43, 0x84, 0xa9, 0x9a, 0x91, 0x87, - 0x4f, 0x20, 0x61, 0x55, 0x8d, 0x70, 0xad, 0x6c, 0x59, 0x5d, 0x13, 0x80, - 0xbb, 0x52, 0x55, 0x81, 0x8b, 0x59, 0x94, 0x0f, 0xc2, 0x54, 0x79, 0x59, - 0xe8, 0x9d, 0x58, 0xe5, 0x91, 0x10, 0xb3, 0xef, 0x1c, 0xda, 0xaa, 0xdd, - 0x91, 0x0b, 0xb0, 0x14, 0x3b, 0xad, 0x02, 0x98, 0x40, 0x3c, 0x54, 0xc4, - 0x23, 0xb9, 0x40, 0x54, 0x7e, 0x88, 0x10, 0x3e, 0x24, 0xe5, 0xf6, 0xdf, - 0x5c, 0x9e, 0x7a, 0x9f, 0xd0, 0xff, 0x5e, 0x9c, 0xb6, 0x30, 0x17, 0x94, - 0xd2, 0xaa, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, - 0x96, 0xff, 0x2f, 0x01, 0x60, 0x2c, 0x1b, 0xe3, 0xc6, 0xcb, 0xa4, 0x41, - 0xa1, 0x44, 0x13, 0x14, 0xe2, 0x44, 0x77, 0x1c, 0x96, 0xe8, 0xe6, 0x4f, - 0x70, 0x99, 0x3a, 0xef, 0xa1, 0x6f, 0x1f, 0x7f, 0xb9, 0xe9, 0x1e, 0x35, - 0x37, 0x5b, 0x94, 0x90, 0x78, 0xcc, 0x8d, 0xcd, 0x6c, 0x9f, 0xf6, 0x73, - 0xed, 0x23, 0xa2, 0x28, 0x64, 0x58, 0x50, 0x64, 0x05, 0xbc, 0xc9, 0x9b, - 0x5a, 0xec, 0x3f, 0x2b, 0x61, 0xcf, 0xa7, 0x35, 0x56, 0x8c, 0x77, 0x68, - 0xd6, 0xcf, 0x9b, 0xc5, 0x62, 0xee, 0x3a, 0xb2, 0xfe, 0x78, 0xba, 0x02, - 0xe7, 0x26, 0x8a, 0x89, 0x30, 0x19, 0xcc, 0xb0, 0x98, 0xbf, 0x30, 0x2c, - 0xae, 0x13, 0x6c, 0x93, 0x86, 0x19, 0x84, 0x13, 0x01, 0x2f, 0x39, 0x4e, - 0x33, 0xd1, 0x15, 0x99, 0xf7, 0x1e, 0xb8, 0x86, 0xdb, 0xb6, 0xf9, 0x56, - 0x42, 0x0e, 0x4a, 0xb1, 0x5e, 0xf0, 0x9a, 0x06, 0x5e, 0xab, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0xcd, 0xde, 0xad, 0x40, - 0x34, 0xcd, 0x79, 0x0a, 0x29, 0x84, 0x05, 0x3f, 0xb5, 0xbe, 0x49, 0x84, - 0x43, 0xcc, 0xa6, 0xe3, 0xe9, 0xdc, 0x84, 0x14, 0xe7, 0xb3, 0x1b, 0x96, - 0xe8, 0xda, 0x35, 0x15, 0x38, 0xf5, 0xb3, 0xb5, 0x91, 0xc3, 0xc3, 0x94, - 0xc6, 0x79, 0xeb, 0xf5, 0x22, 0x78, 0xf0, 0x0b, 0xda, 0xb0, 0x91, 0xa7, - 0x43, 0x71, 0x8e, 0xa6, 0x52, 0x0f, 0x81, 0x06, 0xc8, 0xdf, 0xb5, 0x1f, - 0x92, 0xb0, 0xfe, 0x93, 0x38, 0x4c, 0xf4, 0x17, 0x66, 0x31, 0xea, 0x08, - 0x72, 0xb9, 0xaa, 0xfd, 0x40, 0x8d, 0xbf, 0x56, 0x19, 0xb1, 0xb5, 0x8e, - 0x4e, 0x4e, 0x73, 0x7f, 0x4b, 0x0c, 0x70, 0x94, 0x7c, 0x9f, 0xfc, 0x23, - 0x35, 0xba, 0xd2, 0x23, 0x88, 0x1d, 0x83, 0x28, 0x45, 0xd7, 0x1b, 0x63, - 0xfb, 0x36, 0x86, 0x06, 0xf3, 0x99, 0x81, 0x6e, 0xd7, 0xf1, 0xd4, 0x53, - 0x6d, 0x30, 0x3c, 0x8d, 0xac, 0xc6, 0x9a, 0xd5, 0xe8, 0x4f, 0x11, 0x58, - 0xba, 0xfd, 0x67, 0x06, 0xe7, 0x1a, 0xb4, 0xa1, 0x45, 0x13, 0xf2, 0x3b, - 0xdc, 0x71, 0xf0, 0xc6, 0x53, 0xfc, 0x8b, 0x2f, 0x14, 0xe4, 0xe0, 0xd6, - 0x8c, 0x96, 0x4c, 0x48, 0xc0, 0x30, 0x6e, 0x00, 0x0f, 0x42, 0xfe, 0xa7, - 0x9d, 0x0f, 0xf2, 0x52, 0x58, 0xf9, 0x35, 0x33, 0x99, 0xda, 0xd5, 0x9d, - 0x61, 0x26, 0x6b, 0x80, 0xff, 0x08, 0x51, 0x54, 0x26, 0xfa, 0x8d, 0xfc, - 0x67, 0x60, 0x93, 0x0e, 0xcd, 0x78, 0x41, 0x5a, 0x31, 0x47, 0x14, 0xb0, - 0x65, 0x89, 0x30, 0xcb, 0x0c, 0xc5, 0xa0, 0x37, 0xa8, 0xe0, 0xcf, 0x24, - 0xa4, 0x2f, 0xad, 0xa7, 0x9c, 0xa2, 0xe8, 0x81, 0x17, 0xbe, 0x2f, 0xd5, - 0xd1, 0xa8, 0xff, 0x9d, 0x5e, 0x7f, 0xd9, 0x6c, 0x56, 0xe6, 0xc4, 0x60, - 0x8d, 0xa5, 0x47, 0x5e, 0x43, 0x1e, 0x34, 0x23, 0xb3, 0x6a, 0xdf, 0x6c, - 0xf8, 0xd1, 0x85, 0x11, 0xaa, 0x74, 0x85, 0x71, 0x27, 0xc5, 0x80, 0x37, - 0x60, 0xb4, 0x2b, 0x53, 0x5a, 0xc4, 0x35, 0xd1, 0xe8, 0x4b, 0x01, 0x58, - 0x1f, 0xdb, 0x73, 0xf3, 0x2c, 0x8b, 0xbb, 0x17, 0x36, 0x76, 0x35, 0x6b, - 0xa0, 0x82, 0x47, 0xf5, 0x16, 0x21, 0x41, 0x43, 0xc9, 0x1f, 0x53, 0xf9, - 0xe9, 0x47, 0xf0, 0x9c, 0x6d, 0xe3, 0x23, 0x59, 0x74, 0xdc, 0x1a, 0x8f, - 0x4e, 0x6c, 0x71, 0x83, 0x7e, 0xd0, 0x2b, 0x50, 0x44, 0x86, 0x5f, 0xbf, - 0x60, 0x92, 0xeb, 0x9a, 0x9b, 0xa2, 0xc9, 0x2b, 0xa8, 0xc4, 0x77, 0x4e, - 0x3f, 0xf8, 0xa6, 0x39, 0x50, 0x5c, 0x7e, 0x2a, 0x70, 0xb0, 0x5d, 0x28, - 0xb2, 0x81, 0xa9, 0xaf, 0x16, 0x5e, 0x27, 0xeb, 0x03, 0x0e, 0x82, 0xad, - 0x28, 0x51, 0x16, 0xd1, 0xf4, 0x58, 0x75, 0x1a, 0xf9, 0x6a, 0xbf, 0x73, - 0xd7, 0x84, 0x07, 0x7f, 0x4c, 0x4e, 0x29, 0x02, 0x9b, 0x60, 0x81, 0x85, - 0xa9, 0xbf, 0xc7, 0xa0, 0x8f, 0x8a, 0xdc, 0xa4, 0xc5, 0x17, 0x51, 0x24, - 0x15, 0x28, 0x9e, 0x5e, 0x78, 0x84, 0x21, 0x02, 0xca, 0x26, 0x61, 0x4e, - 0x95, 0xa6, 0x8d, 0xa6, 0x98, 0x7d, 0x1f, 0x84, 0x19, 0x24, 0x8b, 0x31, - 0x76, 0x89, 0x2a, 0x5f, 0xa9, 0xfb, 0xaa, 0x8a, 0x8c, 0xce, 0xe4, 0x30, - 0xd6, 0xec, 0x5b, 0x39, 0xb7, 0x09, 0x80, 0x23, 0x4c, 0xe1, 0x6e, 0x8f, - 0x7c, 0x10, 0xe8, 0x8a, 0x60, 0x35, 0xd7, 0xa3, 0xe0, 0x5f, 0xcd, 0xfa, - 0x3d, 0x8f, 0xd8, 0x5d, 0xec, 0xc9, 0xc5, 0xa0, 0x73, 0x41, 0x89, 0xe5, - 0x39, 0xf2, 0x42, 0xff, 0x08, 0xa0, 0x12, 0xb7, 0x4a, 0x5e, 0x46, 0x06, - 0x31, 0xbd, 0x88, 0x5e, 0x9e, 0x05, 0x17, 0x51, 0xb3, 0xe7, 0x88, 0x10, - 0x19, 0x32, 0xff, 0x8a, 0x1e, 0xce, 0x66, 0xbc, 0x84, 0x1f, 0xed, 0x52, - 0x52, 0x77, 0xe1, 0x5e, 0xa6, 0x21, 0xe4, 0xad, 0x59, 0xca, 0xa3, 0x77, - 0xea, 0x66, 0x28, 0x15, 0x73, 0x3a, 0xfd, 0xe4, 0x75, 0x46, 0x99, 0x59, - 0x5c, 0x7a, 0x9b, 0x9d, 0x11, 0xb4, 0x76, 0x45, 0x06, 0x45, 0x41, 0x1e, - 0x94, 0xb7, 0xd9, 0xb8, 0xcb, 0xbf, 0x71, 0xec, 0xba, 0x9f, 0x4a, 0x1b, - 0xbc, 0xfd, 0x5c, 0x06, 0x64, 0xfd, 0x31, 0x52, 0xc0, 0xe4, 0xa7, 0x21, - 0x2f, 0x22, 0x92, 0xf0, 0x51, 0x33, 0x92, 0x1d, 0x40, 0x3c, 0x01, 0x81, - 0x3b, 0xa8, 0x2e, 0x4e, 0xb6, 0x60, 0xcd, 0xd4, 0x36, 0x3b, 0x2e, 0x1d, - 0x5e, 0x43, 0xd9, 0x94, 0xf1, 0x51, 0xd3, 0x59, 0x94, 0x6a, 0xd5, 0x5f, - 0x1f, 0xd3, 0xa6, 0x55, 0xda, 0x15, 0xf1, 0x3e, 0x2c, 0x60, 0xb8, 0xc3, - 0xda, 0x0e, 0x56, 0x53, 0xea, 0xcd, 0x39, 0x27, 0x94, 0x86, 0x94, 0xb2, - 0x5b, 0xd8, 0x9a, 0x12, 0x94, 0xb0, 0xb6, 0x77, 0x28, 0xba, 0xde, 0xb6, - 0x60, 0x4d, 0x2b, 0x6e, 0x3d, 0xf6, 0xf1, 0x48, 0xf7, 0x77, 0xa1, 0x49, - 0xe0, 0x9f, 0x1e, 0xc9, 0xe6, 0xcb, 0x95, 0x26, 0x61, 0x5a, 0xc9, 0xed, - 0x49, 0x40, 0x17, 0x57, 0x15, 0xfc, 0x3c, 0xb8, 0x28, 0x79, 0xb8, 0x42, - 0x2a, 0xf9, 0xd4, 0x19, 0xb9, 0x5f, 0x41, 0xc2, 0x25, 0xd7, 0x88, 0x34, - 0xb3, 0x25, 0x4e, 0xca, 0xff, 0x9e, 0x59, 0x9a, 0x33, 0xc8, 0x12, 0xf9, - 0xd5, 0x70, 0xc0, 0x8b, 0x43, 0x13, 0xc4, 0x8d, 0x45, 0x99, 0xaa, 0xd7, - 0xeb, 0xb1, 0xe9, 0xb7, 0x5b, 0xab, 0x48, 0xd1, 0x26, 0x60, 0x8c, 0x13, - 0x55, 0x8a, 0x41, 0xd3, 0x68, 0x58, 0xd4, 0xa6, 0x30, 0x6e, 0x88, 0x3e, - 0x81, 0x6e, 0x61, 0x06, 0x13, 0x66, 0xd5, 0x8e, 0x5d, 0x87, 0x4f, 0xd9, - 0xb1, 0x66, 0xb3, 0xc5, 0x88, 0xa9, 0xc0, 0x73, 0xcb, 0x7f, 0x42, 0xec, - 0x96, 0x64, 0xad, 0x72, 0x85, 0x72, 0xaf, 0xeb, 0xa9, 0xc4, 0x17, 0x86, - 0xab, 0xe7, 0x23, 0xd7, 0x96, 0xf7, 0xb2, 0xb3, 0x51, 0xe1, 0x9a, 0x3b, - 0x0e, 0xaf, 0x89, 0xca, 0x7b, 0xf1, 0x70, 0x7b, 0xc7, 0x82, 0xfc, 0xc7, - 0x6c, 0x37, 0xd9, 0x7b, 0x82, 0x0f, 0x94, 0xcf, 0xd1, 0xa9, 0x33, 0xc2, - 0xa4, 0xab, 0xed, 0xad, 0xee, 0x64, 0x5d, 0x04, 0xf2, 0xcb, 0x8e, 0x99, - 0x22, 0x33, 0x69, 0x85, 0x85, 0xb6, 0x1a, 0x9b, 0x09, 0x18, 0xbe, 0xcd, - 0x63, 0xf6, 0x5d, 0x52, 0xbc, 0x26, 0x99, 0x3e, 0x52, 0xe5, 0x0c, 0xc5, - 0xee, 0xdd, 0xbb, 0x07, 0xbc, 0x38, 0xc1, 0x67, 0x96, 0x8c, 0xe6, 0xe4, - 0x18, 0xfa, 0x07, 0x91, 0x48, 0xef, 0x9c, 0x70, 0x9d, 0x5b, 0x1c, 0x0e, - 0xd5, 0xd3, 0x59, 0xee, 0x44, 0x13, 0xf7, 0x00, 0xa6, 0x20, 0xad, 0x65, - 0x1d, 0xb7, 0x96, 0x2f, 0x79, 0x7b, 0x04, 0xa3, 0x10, 0x90, 0x29, 0x8c, - 0xa3, 0x2e, 0x14, 0x39, 0xd3, 0xe4, 0x6e, 0x46, 0xf7, 0x6e, 0x96, 0x68, - 0xd9, 0xef, 0x45, 0xf7, 0x3c, 0xcd, 0xc7, 0xca, 0x33, 0x64, 0x8e, 0x31, - 0x80, 0x48, 0x7b, 0x7c, 0x81, 0x9a, 0x48, 0xff, 0xd5, 0x0d, 0x74, 0xe7, - 0x77, 0x46, 0x61, 0x9b, 0xde, 0xed, 0x83, 0xe9, 0x4f, 0x92, 0xc1, 0x16, - 0xad, 0x44, 0x40, 0x23, 0xce, 0x04, 0x31, 0xbf, 0xcf, 0xe2, 0x5a, 0x68, - 0x5a, 0xf4, 0x0f, 0xe1, 0x87, 0x79, 0xb0, 0x32, 0x0b, 0x09, 0x6b, 0x72, - 0x2b, 0x16, 0x06, 0x67, 0x82, 0x0b, 0x92, 0x35, 0xdb, 0x4c, 0xe2, 0x4a, - 0x60, 0x99, 0xaf, 0x52, 0x10, 0x4b, 0xa5, 0xcf, 0xac, 0x66, 0x49, 0x56, - 0x04, 0xc0, 0xd6, 0x6f, 0x62, 0x53, 0x6f, 0xcb, 0x62, 0xe9, 0xa5, 0xca, - 0x18, 0x8e, 0x86, 0x3f, 0x36, 0xfd, 0xea, 0x55, 0x16, 0x6d, 0x6c, 0x6a, - 0x8f, 0xa7, 0x9c, 0x70, 0x15, 0xd7, 0xf4, 0x57, 0x68, 0x04, 0x84, 0x60, - 0x3b, 0xb0, 0x32, 0xc4, 0xea, 0x9d, 0x70, 0xb9, 0xa6, 0x34, 0xe5, 0xfa, - 0xa1, 0x24, 0x54, 0x7f, 0xef, 0xac, 0xb4, 0x5f, 0xa0, 0xc0, 0x40, 0x3f, - 0x73, 0xdf, 0x56, 0xa6, 0xd9, 0x17, 0xf4, 0xff, 0x50, 0xae, 0x21, 0x0d, - 0x5a, 0xe0, 0xb0, 0xf9, 0x5b, 0x7a, 0x61, 0x6e, 0xa6, 0x85, 0x85, 0xbf, - 0x19, 0x03, 0xe2, 0x74, 0x1f, 0x03, 0x70, 0x76, 0x3c, 0xed, 0x02, 0x7d, - 0xfa, 0xf9, 0x1e, 0x17, 0xdd, 0x42, 0x30, 0xf0, 0x32, 0x47, 0x46, 0xae, - 0xf5, 0x64, 0xe6, 0x5e, 0x2b, 0x40, 0x86, 0x97, 0xb1, 0x24, 0x52, 0x69, - 0x67, 0x79, 0x8e, 0x0d, 0xcc, 0x07, 0xcb, 0x72, 0x29, 0xe9, 0xba, 0x2d, - 0xf7, 0xcb, 0xe3, 0x86, 0x06, 0xaa, 0x6d, 0x79, 0xf8, 0xb6, 0x93, 0x0a, - 0x9c, 0x97, 0xef, 0x47, 0x37, 0x13, 0x2e, 0x6b, 0xfd, 0x59, 0x0c, 0xc9, - 0x5e, 0x5e, 0xcd, 0x71, 0x6f, 0x99, 0x0d, 0x88, 0x9d, 0xbb, 0x7c, 0x2b, - 0x22, 0xd5, 0xbe, 0xee, 0x26, 0x1c, 0xe1, 0xad, 0xc8, 0x4d, 0x5f, 0x6b, - 0xd1, 0xf4, 0x30, 0x4d, 0x46, 0x1d, 0x54, 0x11, 0x4b, 0xa0, 0x7f, 0x94, - 0x71, 0xc0, 0x44, 0x4a, 0x42, 0x11, 0xf5, 0x89, 0xec, 0xb5, 0x24, 0x45, - 0xf1, 0xf0, 0x30, 0x54, 0xf8, 0x62, 0xdb, 0x58, 0x3d, 0x7c, 0x2a, 0x82, - 0xe5, 0xbe, 0x13, 0xcf, 0xdc, 0x88, 0xfb, 0xd3, 0x1e, 0x4d, 0xa5, 0x3e, - 0xad, 0x95, 0xa2, 0xe6, 0x48, 0x73, 0xb2, 0xbe, 0x96, 0xef, 0x8e, 0x0b, - 0x28, 0xf9, 0xbe, 0x2a, 0xd6, 0x68, 0x9e, 0x9c, 0x7b, 0x5a, 0xaf, 0x20, - 0xf6, 0xa5, 0x3f, 0x99, 0x61, 0x57, 0xe8, 0x1c, 0xb2, 0xc3, 0xd0, 0x7f, - 0x2c, 0xb5, 0xe9, 0x66, 0x8e, 0x88, 0xec, 0x13, 0x51, 0xbc, 0x8e, 0xb6, - 0xe2, 0x91, 0xbf, 0x5e, 0x8c, 0x1c, 0xdd, 0x0e, 0x0a, 0x13, 0x06, 0xc6, - 0x62, 0x1c, 0x41, 0x8d, 0xa1, 0xc0, 0xf2, 0xfa, 0x76, 0x35, 0xaa, 0x77, - 0x06, 0x3f, 0x76, 0x50, 0xf6, 0x43, 0xf2, 0x25, 0x00, 0x79, 0xde, 0xca, - 0xa1, 0x06, 0x6f, 0xb4, 0x17, 0x4b, 0x99, 0x5a, 0x00, 0x32, 0xd6, 0xb0, - 0x1f, 0x80, 0x53, 0x16, 0xaa, 0x87, 0x72, 0xa2, 0x34, 0xaf, 0x90, 0x3d, - 0x60, 0xde, 0x0e, 0x6d, 0x83, 0xda, 0xb2, 0x11, 0x2f, 0x39, 0xdc, 0x1a, - 0xfe, 0x51, 0x74, 0x10, 0x3c, 0x41, 0xd5, 0x41, 0x65, 0x4a, 0xa0, 0x11, - 0xde, 0x95, 0x34, 0xef, 0xa0, 0xc9, 0xa8, 0xd3, 0xcb, 0xb9, 0x7d, 0x51, - 0x7d, 0xff, 0x26, 0x88, 0xd8, 0x29, 0x0e, 0xa0, 0xd4, 0xa7, 0x07, 0x33, - 0xe7, 0x7d, 0x59, 0x9f, 0x35, 0xc1, 0xb5, 0xf7, 0x78, 0x78, 0x84, 0xf0, - 0x20, 0x41, 0x3f, 0x02, 0x7d, 0x41, 0x90, 0x01, 0x8d, 0xa4, 0xd8, 0xd7, - 0xeb, 0x56, 0x7f, 0x38, 0xbc, 0x1e, 0x15, 0xdf, 0xfc, 0x34, 0xe7, 0x99, - 0xd4, 0x92, 0xd5, 0xf3, 0x9e, 0x16, 0x0b, 0x5c, 0xeb, 0xb6, 0x78, 0xac, - 0x84, 0x06, 0x8e, 0xfe, 0xd0, 0x7c, 0xce, 0x4a, 0x43, 0x49, 0x3b, 0xe1, - 0xab, 0x57, 0xc0, 0x12, 0xd6, 0x9d, 0xa4, 0xee, 0x91, 0x10, 0x81, 0xe2, - 0xfc, 0x02, 0x26, 0x7a, 0xca, 0x81, 0x5b, 0x2f, 0x34, 0x51, 0xdd, 0x25, - 0x4d, 0xc8, 0xf9, 0x3e, 0x59, 0x0f, 0x3d, 0x64, 0x51, 0xbf, 0x42, 0xc4, - 0x92, 0x9d, 0x8f, 0x39, 0x8a, 0x31, 0x09, 0x24, 0x19, 0x44, 0xc0, 0xf4, - 0xea, 0xca, 0x59, 0xcb, 0x86, 0x6c, 0x02, 0x7a, 0xe5, 0x30, 0x79, 0xe2, - 0x2c, 0x76, 0x08, 0x8f, 0x98, 0x0d, 0x4d, 0x12, 0xc3, 0x98, 0xb4, 0x24, - 0x04, 0x4f, 0x51, 0xec, 0x4e, 0xec, 0xbd, 0x8c, 0xc4, 0x79, 0x51, 0x7f, - 0xe1, 0xce, 0x76, 0x28, 0x0b, 0x7b, 0xc5, 0x3f, 0x5b, 0x48, 0x19, 0x76, - 0x68, 0x31, 0x8e, 0x28, 0xff, 0x18, 0x24, 0xe3, 0x91, 0xe7, 0x49, 0x0d, - 0x10, 0xbd, 0x00, 0xc6, 0x58, 0xfd, 0xb6, 0x88, 0x63, 0xbd, 0xb4, 0x4b, - 0xb8, 0xed, 0xdd, 0xb7, 0x53, 0xce, 0x89, 0xdb, 0x7f, 0xf4, 0xc3, 0x21, - 0x31, 0xad, 0x20, 0x78, 0x06, 0x71, 0xaf, 0xc0, 0xe3, 0xdc, 0xb8, 0xf4, - 0x80, 0xc8, 0x33, 0x1d, 0x8b, 0xff, 0x5a, 0x92, 0x68, 0x4d, 0xc1, 0x5b, - 0x58, 0x3e, 0xf6, 0x7f, 0xba, 0x42, 0xa5, 0x6d, 0xec, 0x03, 0x36, 0xc9, - 0x3f, 0x83, 0x1f, 0x0c, 0x33, 0x57, 0x6a, 0x43, 0x5f, 0x11, 0x72, 0x19, - 0x2c, 0xda, 0x71, 0x58, 0xf2, 0x50, 0x50, 0x06, 0x97, 0xd0, 0xdf, 0xd1, - 0x4f, 0x0b, 0x00, 0x1a, 0xea, 0x85, 0x3b, 0x37, 0x2f, 0xf0, 0x40, 0x52, - 0xd9, 0x2a, 0xe8, 0x54, 0xa5, 0xee, 0x0f, 0x49, 0x74, 0x39, 0x96, 0x5d, - 0x60, 0x8f, 0x14, 0x59, 0x86, 0x59, 0x86, 0xfb, 0x67, 0x71, 0x5c, 0x26, - 0x5f, 0xe9, 0xab, 0x32, 0x77, 0x83, 0xdf, 0x02, 0x19, 0x85, 0xae, 0x4d, - 0x7d, 0x9c, 0x8d, 0x4f, 0x61, 0x05, 0x3c, 0x0c, 0xc6, 0x74, 0x9e, 0x36, - 0x33, 0xb8, 0x14, 0x85, 0xab, 0xa2, 0x0b, 0x5d, 0x22, 0xf2, 0x50, 0x3e, - 0xa4, 0x88, 0xac, 0x67, 0xf9, 0x06, 0xe5, 0x30, 0x8e, 0xf9, 0x67, 0x34, - 0xd5, 0x94, 0x5b, 0x35, 0xb7, 0x3d, 0x39, 0x5f, 0x4e, 0xae, 0xfe, 0xf7, - 0x57, 0xd3, 0x95, 0x7b, 0x0a, 0xd9, 0x92, 0x4a, 0x66, 0x29, 0xa0, 0x18, - 0x35, 0x54, 0x14, 0x44, 0x79, 0x72, 0xc3, 0xbc, 0xa8, 0x1a, 0xd3, 0xa3, - 0xbe, 0x6f, 0x9e, 0xcc, 0x68, 0xb6, 0x5f, 0xd4, 0x42, 0xab, 0xe8, 0x09, - 0x60, 0x57, 0x2e, 0xb2, 0x9a, 0x5b, 0x62, 0x38, 0xfb, 0x0a, 0x35, 0x9c, - 0x4f, 0xf7, 0xe0, 0xd2, 0x06, 0x04, 0x1f, 0x79, 0x7f, 0xa7, 0x7b, 0xd3, - 0x63, 0xc9, 0xbd, 0x16, 0x58, 0x38, 0x7b, 0xaa, 0x08, 0xf3, 0x14, 0x6c, - 0x25, 0xf8, 0xa5, 0xe9, 0x4b, 0x45, 0x34, 0x89, 0x76, 0x74, 0xcb, 0x41, - 0x9c, 0x2a, 0xd9, 0xca, 0xb3, 0x12, 0x46, 0x6d, 0x85, 0x4d, 0x63, 0x2d, - 0x24, 0x1b, 0x19, 0x6b, 0x3f, 0x61, 0x6b, 0x4b, 0x15, 0x83, 0x2d, 0x8f, - 0x61, 0xab, 0xd1, 0x55, 0x93, 0x4e, 0x26, 0xd6, 0x7a, 0x0a, 0x8a, 0xff, - 0x58, 0x44, 0xf7, 0x39, 0x31, 0x1a, 0xab, 0xa6, 0x98, 0x31, 0x41, 0x03, - 0xb6, 0xc9, 0xf5, 0x50, 0xe3, 0x7b, 0xc0, 0x59, 0x74, 0x60, 0x91, 0xb4, - 0x79, 0x02, 0x25, 0xc1, 0xb5, 0xbd, 0xcb, 0x6e, 0x40, 0x61, 0xfe, 0x68, - 0x29, 0x83, 0x1b, 0xd2, 0x49, 0xe1, 0x31, 0xde, 0xdd, 0x53, 0xb0, 0xb8, - 0x96, 0xa2, 0xce, 0xea, 0x8b, 0x66, 0x2c, 0x5a, 0x80, 0x51, 0x0b, 0xc1, - 0x2d, 0x9a, 0xfa, 0x9d, 0xc6, 0xcc, 0x2b, 0xbb, 0xaa, 0xce, 0x98, 0xaa, - 0x26, 0x15, 0x8f, 0x4a, 0xe7, 0xdb, 0x17, 0x6c, 0xe5, 0x58, 0xc9, 0xae, - 0xe4, 0x9c, 0x1d, 0xab, 0x59, 0x84, 0x3e, 0x27, 0x76, 0x03, 0xe3, 0x82, - 0x64, 0x6f, 0x6e, 0x6f, 0x63, 0xd2, 0x12, 0x84, 0xe3, 0x9b, 0x9d, 0x7e, - 0x53, 0x1a, 0x54, 0x8d, 0xc1, 0xf0, 0x94, 0xae, 0xad, 0x8f, 0x6a, 0x12, - 0x4e, 0xa7, 0x30, 0xdb, 0x55, 0xbe, 0x09, 0xe2, 0x56, 0x08, 0xc4, 0x3a, - 0xb0, 0x55, 0xb0, 0x24, 0x96, 0xa6, 0x3e, 0x28, 0xd0, 0x35, 0xfb, 0x58, - 0x47, 0xba, 0x2d, 0x51, 0xbb, 0x72, 0x20, 0x59, 0xd2, 0xdd, 0x9c, 0xe2, - 0xb5, 0x31, 0x90, 0xac, 0x74, 0x5d, 0x9f, 0x3d, 0x8c, 0x1c, 0x96, 0xc0, - 0x60, 0x61, 0xa8, 0xbb, 0x3c, 0xb3, 0x6d, 0x6d, 0x92, 0x4a, 0xca, 0xbb, - 0x60, 0x5e, 0x82, 0x0d, 0x7f, 0xab, 0x4b, 0x36, 0x4c, 0x93, 0x0d, 0x88, - 0x71, 0xaf, 0xb6, 0x53, 0xb0, 0x38, 0xb4, 0x1c, 0xb4, 0x7b, 0xd4, 0x13, - 0x32, 0x6c, 0xe4, 0xee, 0x6a, 0xb3, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x00, 0x88, 0x83, 0x91, 0x4c, 0x2e, 0x1e, 0xbe, 0xa4, - 0xb5, 0x96, 0xff, 0x67, 0x50, 0xe9, 0x81, 0x0e, 0x5d, 0x0e, 0xad, 0xc4, - 0x1f, 0xeb, 0x98, 0x38, 0xcc, 0x54, 0x9d, 0x27, 0xa6, 0xf1, 0x37, 0x23, - 0xce, 0xb4, 0x5b, 0xff, 0x12, 0xb1, 0xb8, 0x35, 0x5e, 0x03, 0x02, 0x04, - 0xad, 0xa6, 0x6f, 0x43, 0xfc, 0xe4, 0xbe, 0x0c, 0xe0, 0x93, 0xd5, 0xef, - 0x09, 0xfa, 0x04, 0xe9, 0x5a, 0x22, 0xd4, 0x81, 0xc1, 0x27, 0x4f, 0x5f, - 0x6e, 0x83, 0x5a, 0x8a, 0x2d, 0xbb, 0x8f, 0xa4, 0x91, 0xcc, 0x82, 0x37, - 0x3b, 0x14, 0x98, 0x58, 0x86, 0x44, 0xb7, 0xa9, 0x58, 0xf3, 0x3d, 0x49, - 0x71, 0x7a, 0x37, 0xcd, 0xc5, 0xb9, 0xc9, 0x46, 0xd5, 0xd4, 0x17, 0x60, - 0x1a, 0xbf, 0x93, 0xa9, 0xe9, 0x08, 0x25, 0x40, 0xd1, 0x65, 0xae, 0xdd, - 0x85, 0xa6, 0xcc, 0x06, 0xca, 0x91, 0xe1, 0x63, 0xf9, 0x6b, 0x15, 0xa8, - 0x04, 0x61, 0xd2, 0xa6, 0x59, 0x21, 0x1a, 0x1c, 0xc9, 0xa9, 0xa9, 0xc8, - 0x54, 0x86, 0xac, 0xa5, 0xd6, 0x95, 0x39, 0x83, 0x4b, 0x6b, 0x69, 0xa6, - 0x94, 0xd8, 0xc0, 0xfb, 0x66, 0x0f, 0x3a, 0xbe, 0xc7, 0xf3, 0xcc, 0xd5, - 0xb7, 0x1b, 0x60, 0x02, 0x95, 0x45, 0x4a, 0x12, 0xc9, 0xfe, 0x75, 0x7c, - 0x1b, 0xb2, 0x86, 0x96, 0x28, 0x07, 0xa2, 0x18, 0x7a, 0x6c, 0x90, 0x6f, - 0x32, 0x0c, 0xc8, 0x34, 0xbc, 0x75, 0x4d, 0x96, 0x03, 0xa6, 0x0f, 0x3d, - 0x35, 0x1b, 0x64, 0x76, 0x95, 0x55, 0xff, 0x25, 0xd4, 0x71, 0xcf, 0x8a, - 0x73, 0x6d, 0x9b, 0x74, 0xfe, 0xff, 0x9e, 0x31, 0x9e, 0x5e, 0x89, 0x5a, - 0x1a, 0xeb, 0x8d, 0x06, 0x3b, 0xf2, 0xf6, 0x06, 0x5d, 0xc3, 0xba, 0x04, - 0xca, 0x0f, 0x07, 0x2c, 0xbd, 0x54, 0x52, 0xd9, 0x1c, 0x2f, 0x0e, 0x13, - 0x5e, 0x25, 0x13, 0xe5, 0xd7, 0x8e, 0x19, 0x42, 0x1b, 0x52, 0x2e, 0xd2, - 0x8f, 0xc5, 0x8e, 0x1c, 0x34, 0x2e, 0x4d, 0xd5, 0x51, 0x7d, 0x91, 0x64, - 0xbc, 0xb4, 0x0d, 0xc9, 0xe7, 0x1c, 0x6c, 0x47, 0xe9, 0xbb, 0x67, 0x9a, - 0x96, 0xde, 0xad, 0xff, 0xba, 0x35, 0x25, 0x6d, 0x57, 0xa1, 0x93, 0xfe, - 0xe2, 0x8d, 0x02, 0xeb, 0xf0, 0x2f, 0x54, 0xfd, 0x46, 0xc0, 0x8f, 0xea, - 0x32, 0x7b, 0x57, 0xda, 0xe0, 0x29, 0x1c, 0x19, 0xba, 0xa4, 0xa6, 0x1c, - 0x6e, 0xeb, 0x7a, 0xa8, 0x8a, 0xe1, 0xc6, 0x12, 0xf5, 0xa3, 0x24, 0x1a, - 0x96, 0xe1, 0x02, 0xc0, 0xf4, 0x7d, 0x14, 0x72, 0xd6, 0x12, 0x8e, 0x6c, - 0x8c, 0xd2, 0xfd, 0x88, 0x78, 0x48, 0xf3, 0x74, 0x38, 0x86, 0x04, 0x68, - 0x6d, 0x7c, 0xf4, 0x4c, 0x40, 0x17, 0xf6, 0x8f, 0xb2, 0x6c, 0xd7, 0x66, - 0x66, 0x3b, 0x38, 0xa1, 0xbb, 0x1e, 0xff, 0x72, 0x1f, 0x64, 0x56, 0xc2, - 0x53, 0x1c, 0x6f, 0x84, 0x2b, 0xbd, 0x23, 0xd9, 0xb4, 0x6b, 0x87, 0x79, - 0x99, 0xec, 0x81, 0x8d, 0x1a, 0x58, 0x00, 0xf0, 0x2c, 0xc1, 0xc4, 0x57, - 0x74, 0x0f, 0xce, 0x32, 0xe2, 0x5e, 0xae, 0x02, 0x1c, 0xe8, 0x94, 0xc6, - 0x44, 0xaa, 0x7b, 0x9a, 0x32, 0xb5, 0x33, 0xac, 0xfc, 0x41, 0x65, 0xf2, - 0xca, 0xcc, 0xc6, 0x74, 0x36, 0xb2, 0xc9, 0x0e, 0x26, 0x73, 0xae, 0x68, - 0x98, 0xa4, 0x36, 0xe8, 0x98, 0x39, 0xad, 0x05, 0x3f, 0xca, 0x12, 0xcc, - 0x86, 0xfd, 0xc6, 0x57, 0xf0, 0x02, 0x4e, 0x45, 0xcb, 0x54, 0x34, 0xdd, - 0x66, 0x26, 0xab, 0xda, 0x95, 0xa5, 0x85, 0xec, 0x02, 0x03, 0xb6, 0x29, - 0x30, 0x11, 0x40, 0x54, 0x9a, 0x6a, 0x87, 0x2e, 0x97, 0xa1, 0x7e, 0xeb, - 0x34, 0x39, 0x78, 0x3b, 0xbc, 0x5f, 0x8e, 0xc5, 0x0e, 0x21, 0x29, 0x4b, - 0xb7, 0x1b, 0xe7, 0x14, 0x08, 0x34, 0xb7, 0x9a, 0x0a, 0xb2, 0x6c, 0x25, - 0x76, 0xb5, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0xe2, 0x7d, 0x48, 0xdd, 0x1a, 0xcb, 0xb6, 0x5c, 0x6f, 0xbe, 0x32, 0x9d, - 0xd2, 0x2b, 0x9e, 0x10, 0x65, 0xd7, 0x1e, 0xec, 0xc8, 0xb5, 0x10, 0x64, - 0x8f, 0x5d, 0xef, 0xfe, 0x9b, 0x6c, 0x9b, 0x02, 0x6a, 0x6d, 0xf7, 0x98, - 0x7b, 0xf7, 0x17, 0xfd, 0x49, 0x1b, 0x6a, 0xc5, 0x3c, 0xa0, 0xfc, 0xa8, - 0x94, 0x95, 0xed, 0x48, 0x81, 0x04, 0x53, 0x8c, 0xbe, 0xe4, 0x4e, 0xaf, - 0xc1, 0x9d, 0xc3, 0xdf, 0xc2, 0xb5, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0xae, 0xb0, 0x67, 0x5b, 0x99, 0x26, 0x07, 0xfb, - 0x6c, 0x98, 0xfe, 0xbb, 0x35, 0xf1, 0x5b, 0x02, 0xc6, 0x03, 0xfc, 0x97, - 0x21, 0x16, 0x8d, 0x48, 0xd4, 0x4f, 0x03, 0xd9, 0x7c, 0x9f, 0xa6, 0x1e, - 0x6f, 0x5a, 0x58, 0x17, 0x6d, 0x26, 0xb4, 0xc5, 0x4c, 0xe5, 0x93, 0x0a, - 0x9c, 0xb2, 0x40, 0xbc, 0x60, 0xc7, 0x2b, 0xdb, 0x3b, 0xc0, 0x3c, 0x5c, - 0x44, 0x4b, 0xdd, 0x58, 0xbe, 0xdc, 0xc5, 0xb5, 0x6a, 0xf9, 0x5e, 0x73, - 0x07, 0x58, 0x8f, 0x45, 0x7b, 0xac, 0xba, 0x82, 0x96, 0x49, 0x4d, 0x22, - 0x70, 0x7a, 0x3d, 0x69, 0x26, 0x8b, 0x88, 0x13, 0xf1, 0x8d, 0xfc, 0xdf, - 0x73, 0xd5, 0x20, 0x3c, 0x52, 0x92, 0x16, 0xb1, 0x6e, 0xb7, 0x41, 0xbe, - 0x23, 0x9b, 0x51, 0xf7, 0xc9, 0x38, 0x8a, 0xc7, 0x6e, 0x68, 0x82, 0xd1, - 0x59, 0x50, 0x09, 0x4b, 0x44, 0x3b, 0x28, 0x06, 0x60, 0x75, 0x7a, 0xe5, - 0xa1, 0x36, 0xbb, 0x62, 0x44, 0xe3, 0xd0, 0x68, 0x14, 0xea, 0xad, 0xf9, - 0x18, 0xcc, 0xd5, 0x42, 0x5d, 0x18, 0x53, 0xe6, 0x4a, 0xfe, 0xde, 0x32, - 0xe1, 0xe7, 0xf8, 0x8c, 0x9d, 0x35, 0xf4, 0x4a, 0xcb, 0x23, 0x2f, 0x91, - 0xb5, 0xb0, 0xb2, 0x01, 0x5c, 0x22, 0x8c, 0x42, 0x42, 0xd5, 0xf0, 0x82, - 0x6f, 0x9f, 0x64, 0xe5, 0x99, 0x4d, 0x36, 0x0b, 0xfc, 0x78, 0x38, 0x30, - 0x47, 0x8f, 0x0b, 0x57, 0x86, 0x4f, 0x1b, 0xc9, 0x05, 0x0e, 0x08, 0xc4, - 0xf4, 0xab, 0x9e, 0x90, 0xb4, 0x4f, 0x36, 0x54, 0xe8, 0xa1, 0x3f, 0x90, - 0xd2, 0xf3, 0xb4, 0xb4, 0xdd, 0xf3, 0x43, 0x2f, 0xc4, 0x43, 0xbb, 0x99, - 0x8e, 0xb8, 0x61, 0x59, 0x5e, 0xfa, 0x1b, 0x3c, 0xc1, 0xeb, 0x9d, 0x35, - 0x62, 0x34, 0x82, 0x45, 0xef, 0x41, 0xe9, 0xfc, 0x35, 0xae, 0xb4, 0x0b, - 0xce, 0x52, 0x5b, 0x40, 0x7d, 0xdd, 0x86, 0x83, 0x52, 0x74, 0x77, 0x11, - 0xc2, 0x9b, 0x8c, 0xa3, 0x63, 0xc2, 0x2d, 0xdd, 0x8c, 0x76, 0x13, 0xc5, - 0xc0, 0xde, 0x3e, 0x6b, 0xe1, 0x0f, 0xeb, 0x0f, 0x0a, 0x25, 0x41, 0x2f, - 0x8b, 0x4a, 0x98, 0x30, 0xcb, 0x1a, 0x43, 0xa3, 0xc1, 0xcc, 0x44, 0x9a, - 0x6c, 0xdc, 0x92, 0x40, 0xc4, 0x7a, 0x1f, 0x8a, 0x6f, 0x74, 0xf3, 0xf5, - 0x52, 0x72, 0xf7, 0x81, 0x6e, 0x74, 0x75, 0xe6, 0xea, 0xd9, 0x57, 0x91, - 0xae, 0xf2, 0x3f, 0x35, 0x4b, 0x99, 0xd9, 0x3f, 0x85, 0xe0, 0x92, 0xaa, - 0x35, 0xac, 0x28, 0xbf, 0x43, 0xb8, 0xad, 0xc7, 0xc5, 0xf6, 0x15, 0x2f, - 0x7c, 0xfb, 0x34, 0x48, 0xf3, 0x04, 0x12, 0xf4, 0x2f, 0x92, 0x74, 0xc8, - 0xea, 0xbc, 0x24, 0x6e, 0x3b, 0x0e, 0x9e, 0xf0, 0xaf, 0x02, 0x97, 0x95, - 0xbc, 0x90, 0x7f, 0xc4, 0xf8, 0xe2, 0x04, 0x9a, 0x8f, 0xfc, 0xbc, 0x50, - 0xfe, 0xf7, 0x89, 0x17, 0x2c, 0xdb, 0xd6, 0x5e, 0xbf, 0xd9, 0x8e, 0x89, - 0x8b, 0x06, 0x1d, 0x0b, 0x81, 0x2a, 0x55, 0x5c, 0x5f, 0xb6, 0xa6, 0xa5, - 0xd2, 0xaa, 0x79, 0x9c, 0x39, 0x31, 0x76, 0x03, 0x98, 0x42, 0xd6, 0xb7, - 0x37, 0x1f, 0xc8, 0x51, 0x8a, 0x1c, 0x5d, 0xcd, 0x9c, 0x78, 0xa4, 0x22, - 0x6e, 0x12, 0x10, 0x0a, 0x33, 0xc9, 0xe0, 0xfe, 0xfc, 0xe8, 0x15, 0xe7, - 0xef, 0xd8, 0x6d, 0xc7, 0xc9, 0xc2, 0x8e, 0x18, 0x82, 0x2f, 0xa6, 0x09, - 0x8a, 0xdc, 0x41, 0x6b, 0x89, 0xea, 0xd9, 0xd6, 0x96, 0xfd, 0xba, 0x6e, - 0xae, 0x2d, 0x0c, 0xf9, 0x3c, 0x4c, 0x1a, 0xfa, 0x98, 0x83, 0x51, 0x45, - 0x9d, 0x1e, 0xa5, 0xc1, 0x81, 0x54, 0x37, 0x5d, 0x28, 0xca, 0xa6, 0xfe, - 0x48, 0xf4, 0x77, 0x17, 0x92, 0x1d, 0x0c, 0xb3, 0x39, 0x77, 0x22, 0xd9, - 0xc7, 0xc2, 0xaf, 0x70, 0x0a, 0xd3, 0xa6, 0x57, 0x69, 0xfb, 0xb9, 0xe0, - 0xc4, 0x73, 0x7a, 0x68, 0xee, 0x27, 0x6e, 0x3a, 0x6e, 0xae, 0x32, 0xf6, - 0x09, 0xb3, 0x0b, 0x40, 0x72, 0xc6, 0x26, 0x6e, 0xc5, 0x88, 0x6b, 0xce, - 0x99, 0x88, 0x60, 0x6f, 0x6e, 0xa9, 0xe6, 0xd7, 0x35, 0x5e, 0x3b, 0x36, - 0x0d, 0x14, 0xb8, 0x2f, 0xde, 0x67, 0xc8, 0x2e, 0x52, 0xc1, 0xf1, 0x58, - 0x87, 0x32, 0x2a, 0x52, 0x21, 0x27, 0x1e, 0x04, 0xed, 0xc4, 0x82, 0xd7, - 0xeb, 0x85, 0x12, 0x3e, 0xea, 0xd0, 0x07, 0xa0, 0x80, 0x48, 0xe9, 0xbd, - 0x9b, 0x3a, 0x8e, 0x8b, 0xa0, 0xfc, 0x07, 0xf0, 0x69, 0x4e, 0xc7, 0x1d, - 0xd9, 0x9a, 0x73, 0x18, 0x63, 0xb8, 0xe6, 0x4a, 0xa0, 0x81, 0xf0, 0xdb, - 0xb9, 0x88, 0xf4, 0x2b, 0x1f, 0x0d, 0xda, 0x31, 0xc0, 0xb0, 0x55, 0x79, - 0x56, 0x48, 0x22, 0xbb, 0x49, 0x7f, 0xb1, 0xf1, 0xf6, 0x6f, 0x42, 0xd3, - 0xba, 0x68, 0x3a, 0x8f, 0xe7, 0xac, 0x53, 0x30, 0x96, 0xec, 0x51, 0x7d, - 0xfc, 0xc0, 0x35, 0xe9, 0x59, 0xe7, 0x0e, 0xed, 0x29, 0x46, 0x50, 0x3c, - 0x4b, 0x36, 0xc6, 0x2a, 0xaa, 0x3b, 0xbe, 0xce, 0xd3, 0xda, 0x4d, 0x65, - 0xb0, 0xe8, 0x52, 0x68, 0xf0, 0x23, 0xde, 0x02, 0x77, 0xb3, 0xcc, 0xce, - 0x78, 0xdd, 0x8c, 0xf8, 0xbe, 0x5d, 0x0d, 0xa9, 0xb6, 0x96, 0x85, 0xbf, - 0x92, 0x2a, 0x6b, 0x1b, 0xe8, 0x76, 0x05, 0x13, 0x30, 0xd8, 0x3d, 0x80, - 0xaa, 0xa2, 0xa3, 0xbc, 0x07, 0xba, 0x9c, 0x75, 0x5b, 0x42, 0x03, 0xd8, - 0xde, 0x42, 0x44, 0xf7, 0x29, 0x43, 0x29, 0x0d, 0x48, 0x2b, 0x02, 0xd0, - 0xcc, 0xe9, 0x17, 0x47, 0x23, 0x73, 0x6d, 0xc5, 0x91, 0x6d, 0x4e, 0xc5, - 0xcf, 0xc3, 0x58, 0xaf, 0x6e, 0xa2, 0x9e, 0xe7, 0xe1, 0x88, 0xac, 0x62, - 0xff, 0xbc, 0x69, 0x57, 0xad, 0x0f, 0x08, 0xf8, 0x32, 0xfd, 0x79, 0xcb, - 0x30, 0xbc, 0xd2, 0xe5, 0x20, 0xd9, 0x0f, 0xd1, 0x33, 0xbf, 0xe4, 0x49, - 0x7a, 0x2b, 0x5c, 0xb3, 0x63, 0x13, 0x4d, 0xed, 0x17, 0xe7, 0x5b, 0xf4, - 0x36, 0x9d, 0x3c, 0x4e, 0x51, 0xb2, 0xf7, 0xf2, 0xcd, 0xfb, 0xec, 0x42, - 0x79, 0x46, 0xae, 0x18, 0x50, 0xdf, 0xbf, 0x5b, 0xb1, 0x9a, 0x49, 0x22, - 0xae, 0xe9, 0xf3, 0x86, 0x3f, 0xe0, 0xb4, 0xc6, 0x9c, 0x08, 0xd6, 0xd9, - 0xf4, 0x68, 0xbb, 0x33, 0x0e, 0x59, 0x3d, 0x76, 0xf0, 0xd7, 0x54, 0x04, - 0x19, 0x66, 0xee, 0x61, 0x11, 0x0d, 0x48, 0x10, 0x21, 0x16, 0x7c, 0xac, - 0x49, 0xab, 0xe0, 0x19, 0x85, 0x93, 0x48, 0x65, 0x7c, 0x5e, 0x6c, 0x1a, - 0xf5, 0xb0, 0xc6, 0x80, 0xa1, 0x2a, 0xd5, 0x71, 0x42, 0xec, 0x2f, 0x25, - 0xf7, 0xb8, 0x84, 0xcd, 0xf0, 0x5c, 0xcd, 0xee, 0x44, 0xcb, 0xeb, 0x74, - 0x96, 0x3c, 0xb0, 0x56, 0xcb, 0xaf, 0x7e, 0x9e, 0x4a, 0x12, 0x06, 0xae, - 0x57, 0x43, 0x2d, 0xb2, 0x11, 0x96, 0x05, 0xdb, 0xb3, 0x1a, 0x01, 0xa7, - 0x1d, 0x02, 0x81, 0x1c, 0x36, 0x41, 0x65, 0xf0, 0x67, 0xd6, 0xd0, 0x0f, - 0xec, 0x34, 0x7d, 0xd3, 0x89, 0xac, 0x60, 0x67, 0x95, 0x81, 0x84, 0xe7, - 0xbb, 0x9a, 0x59, 0x36, 0x3b, 0xde, 0xa4, 0x88, 0xda, 0xf2, 0xd2, 0xa2, - 0x0c, 0xba, 0xfb, 0x93, 0xbf, 0xc8, 0xad, 0xe8, 0x57, 0xa0, 0x2b, 0xbb, - 0x4e, 0xa9, 0x38, 0xe7, 0x86, 0x6b, 0x95, 0x34, 0x24, 0x96, 0xc0, 0x09, - 0xd9, 0xfd, 0x5f, 0x1c, 0x93, 0xd9, 0x72, 0xfa, 0xc4, 0x14, 0x72, 0x9c, - 0x19, 0x6f, 0xee, 0x12, 0x17, 0xee, 0x65, 0xb4, 0x8c, 0x83, 0x39, 0x3c, - 0x0f, 0xbf, 0x25, 0xcf, 0xee, 0x05, 0x8c, 0x6a, 0x56, 0x18, 0xf0, 0x20, - 0x72, 0xc1, 0xbf, 0xe4, 0xce, 0x37, 0xbf, 0x2b, 0xba, 0x70, 0x1e, 0xc2, - 0xc8, 0xcd, 0x58, 0xb9, 0x60, 0xc7, 0xfb, 0xd0, 0xce, 0xb9, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x7c, 0x63, 0x50, 0x90, - 0xcb, 0x9c, 0xce, 0x59, 0xb1, 0x47, 0xb0, 0x49, 0x9b, 0xfc, 0xfb, 0x3d, - 0x3d, 0x62, 0xcf, 0x58, 0x4c, 0x2a, 0x79, 0xf0, 0x72, 0x7f, 0x81, 0x41, - 0xac, 0x82, 0x2d, 0xa9, 0xf0, 0x0e, 0x4d, 0xd2, 0xe0, 0xbd, 0xca, 0x17, - 0xb7, 0x59, 0x9f, 0xdb, 0xfe, 0x51, 0x90, 0x88, 0xb9, 0xeb, 0x4e, 0xac, - 0x80, 0x30, 0x64, 0xc4, 0x49, 0xd1, 0xb6, 0x65, 0x67, 0xef, 0x9d, 0x5c, - 0x04, 0xe0, 0x9d, 0xbe, 0x47, 0x75, 0x9b, 0x6e, 0x30, 0x76, 0xad, 0x37, - 0x9a, 0x56, 0xff, 0xcd, 0x40, 0x26, 0x3e, 0xe2, 0x7d, 0x30, 0x55, 0x09, - 0x92, 0x25, 0x36, 0x2f, 0xf8, 0x55, 0xb8, 0x9b, 0x66, 0x49, 0x41, 0x9d, - 0x78, 0x6d, 0x3f, 0x54, 0x41, 0x01, 0x93, 0x9c, 0x5e, 0x0c, 0x4a, 0x38, - 0x79, 0x76, 0xb4, 0x98, 0xae, 0xf9, 0x99, 0x21, 0x05, 0x6a, 0xfb, 0xbc, - 0x44, 0xf7, 0xdc, 0x85, 0x5e, 0x5f, 0x18, 0x49, 0x22, 0x11, 0x6d, 0xa5, - 0x9e, 0x6b, 0x59, 0x60, 0xf8, 0x73, 0x8b, 0xcb, 0x38, 0xbb, 0xc9, 0xbf, - 0x49, 0x0e, 0x57, 0x65, 0x48, 0x41, 0x41, 0xa2, 0x40, 0x67, 0x91, 0x1d, - 0x54, 0xac, 0xa7, 0xef, 0x16, 0x8b, 0xc7, 0xd1, 0xe6, 0xdb, 0xc5, 0x9c, - 0xd4, 0x04, 0x67, 0xd8, 0x75, 0x21, 0x2b, 0x1d, 0x11, 0xc1, 0x79, 0x45, - 0xb4, 0x91, 0x7a, 0x97, 0x00, 0xde, 0xc6, 0xc5, 0x8a, 0xd1, 0xd7, 0xea, - 0xc1, 0x22, 0xe1, 0x58, 0x61, 0xf2, 0x89, 0x3d, 0xdb, 0x04, 0x3d, 0xe4, - 0xe9, 0xe7, 0xbf, 0x4b, 0x99, 0x8a, 0xc6, 0xf2, 0x09, 0xc4, 0xe2, 0x6d, - 0x0b, 0xda, 0x13, 0xfb, 0xff, 0xbf, 0x0b, 0xfc, 0x78, 0x33, 0xb8, 0x7b, - 0x3e, 0xd8, 0xba, 0x27, 0xba, 0xae, 0xdf, 0xce, 0xea, 0x80, 0x08, 0x38, - 0xd8, 0x33, 0x00, 0xa9, 0xb6, 0x88, 0x48, 0xa9, 0x3b, 0x54, 0xf0, 0x95, - 0xda, 0xba, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0xb1, 0xd7, 0x8d, 0x6c, 0xb9, 0x96, 0xdc, 0x64, 0x9b, 0x0c, 0x74, 0x54, - 0x59, 0x82, 0xf6, 0x6e, 0x7c, 0x4e, 0x23, 0x83, 0x04, 0x2e, 0x49, 0xfb, - 0x56, 0x4b, 0xcd, 0x0d, 0x76, 0x29, 0xb1, 0xce, 0x40, 0xa3, 0xd0, 0x02, - 0x16, 0x8e, 0x1c, 0x0a, 0x00, 0x5b, 0x8c, 0x06, 0xf9, 0x07, 0x97, 0x12, - 0x0c, 0x33, 0xd5, 0x48, 0x6d, 0xae, 0x7d, 0x2c, 0x8f, 0x74, 0x32, 0x24, - 0xcf, 0x91, 0xd7, 0xbe, 0xb2, 0x05, 0xcf, 0x2f, 0x93, 0xd5, 0x43, 0x90, - 0xce, 0x02, 0x97, 0xf8, 0x51, 0xb3, 0xba, 0x56, 0x5d, 0x94, 0x41, 0xa4, - 0x11, 0xf3, 0x21, 0xc0, 0xcc, 0x28, 0xf8, 0x5a, 0x00, 0x0a, 0xd4, 0x53, - 0xdd, 0xac, 0xfe, 0x25, 0x03, 0xea, 0x2b, 0x6b, 0x9d, 0x7e, 0x1a, 0xe1, - 0x5f, 0x5c, 0xa7, 0x47, 0xa2, 0x72, 0x4f, 0x92, 0x60, 0x25, 0x7c, 0x1c, - 0xa5, 0x34, 0xa6, 0x86, 0x0e, 0xda, 0x8f, 0x3f, 0xec, 0xe2, 0xe4, 0xad, - 0xa9, 0x41, 0xcc, 0x3d, 0x94, 0x43, 0xfd, 0x28, 0xd8, 0xb0, 0x0f, 0x05, - 0x9e, 0x2b, 0x27, 0x3f, 0xe0, 0x84, 0xbc, 0x9e, 0x7a, 0xa5, 0x83, 0x3d, - 0x3b, 0xac, 0x83, 0xd3, 0x16, 0x92, 0x8c, 0xd2, 0x4a, 0x81, 0xdd, 0xba, - 0x0a, 0xb7, 0xc5, 0x9f, 0x83, 0x0f, 0x78, 0xb8, 0xab, 0x2d, 0xca, 0xf8, - 0x6c, 0x06, 0xd7, 0x82, 0xb8, 0x61, 0x7d, 0x2a, 0x31, 0x3a, 0x39, 0x97, - 0x5f, 0xc7, 0x00, 0x6e, 0x46, 0xf2, 0xc5, 0x12, 0x71, 0x55, 0x5b, 0x10, - 0xaf, 0xbb, 0x07, 0x4c, 0x2f, 0xa3, 0x51, 0x53, 0x22, 0x20, 0xab, 0xed, - 0x02, 0x95, 0xc6, 0x5f, 0xaa, 0xb8, 0xc0, 0xcb, 0xe5, 0xe0, 0x25, 0x97, - 0xf7, 0xda, 0x1d, 0xd8, 0x5a, 0xff, 0x76, 0x0c, 0x3e, 0x33, 0x1b, 0x7a, - 0x15, 0xb8, 0x34, 0x75, 0xcf, 0xe9, 0xf3, 0x53, 0x61, 0x03, 0x2d, 0x52, - 0x29, 0x69, 0x3a, 0xc3, 0xd9, 0x22, 0xc0, 0x2d, 0x80, 0xed, 0x66, 0xc4, - 0xf4, 0x89, 0x60, 0x14, 0xdb, 0xec, 0x7d, 0xcc, 0x99, 0x5c, 0x94, 0x27, - 0xab, 0xed, 0xd2, 0x17, 0xf4, 0x36, 0xfc, 0x7e, 0x99, 0x98, 0xb6, 0x86, - 0xb6, 0x7c, 0x54, 0xd6, 0xec, 0xb5, 0xad, 0x62, 0xcc, 0xb0, 0xf7, 0x8c, - 0x52, 0x99, 0xf2, 0x44, 0x27, 0x3a, 0xb0, 0xff, 0x8f, 0x09, 0xae, 0xe1, - 0x61, 0xd8, 0x9f, 0xdd, 0x2f, 0x6b, 0xea, 0xd0, 0x12, 0x70, 0x8c, 0x9d, - 0x8f, 0x4c, 0x36, 0x98, 0x1e, 0x2e, 0xb5, 0x50, 0x63, 0x33, 0x9c, 0x4b, - 0xc3, 0xd4, 0xa0, 0xe6, 0x96, 0x96, 0x75, 0xfd, 0x8a, 0xc4, 0x0c, 0xa7, - 0xea, 0x9d, 0xf1, 0x23, 0x9e, 0x38, 0xff, 0x1a, 0x67, 0x36, 0x5f, 0x5f, - 0x17, 0x88, 0x1a, 0x43, 0x25, 0xea, 0x76, 0xb5, 0xcd, 0xce, 0x43, 0xf8, - 0x71, 0x2b, 0xdb, 0xf0, 0xcd, 0x76, 0xbd, 0x94, 0x57, 0xdb, 0x77, 0xcd, - 0xb2, 0x8f, 0xd1, 0xc0, 0xeb, 0x00, 0x61, 0x7f, 0x66, 0xb0, 0x43, 0x6e, - 0xe0, 0x9f, 0x11, 0x0e, 0x65, 0xf7, 0x4e, 0x00, 0x74, 0xc3, 0xeb, 0xb1, - 0xeb, 0x0c, 0x24, 0x5d, 0x15, 0x56, 0x16, 0x47, 0x87, 0xcf, 0x34, 0xbe, - 0x2a, 0xdd, 0x77, 0x55, 0xa4, 0x09, 0x15, 0x79, 0x8c, 0xaa, 0xce, 0x32, - 0x90, 0x9b, 0x16, 0x40, 0x94, 0x7f, 0x19, 0x27, 0xbc, 0xbf, 0x45, 0x4b, - 0xa5, 0xf0, 0xd0, 0x9e, 0x5b, 0xb9, 0x46, 0x6e, 0x72, 0x8f, 0x49, 0x3b, - 0x7a, 0xc1, 0x92, 0xb0, 0xd5, 0x25, 0x1b, 0x0b, 0xf3, 0xd0, 0x8a, 0x47, - 0x8b, 0xbe, 0xa4, 0xf9, 0x6a, 0x09, 0x84, 0x9a, 0x5b, 0x5b, 0xea, 0xbb, - 0x6f, 0xd8, 0xaf, 0xcd, 0x67, 0x9b, 0x79, 0x7c, 0x8f, 0xcc, 0xd7, 0x5f, - 0x3a, 0xc3, 0xd0, 0xb7, 0xba, 0x28, 0x83, 0x81, 0x4a, 0x05, 0x51, 0xaf, - 0xa0, 0x52, 0x34, 0xe3, 0x4f, 0xec, 0x82, 0xdc, 0x97, 0xd8, 0x69, 0xb2, - 0x0d, 0x68, 0x35, 0x87, 0x58, 0xc0, 0xcf, 0x58, 0x0d, 0xf6, 0x6b, 0x6d, - 0x2a, 0xc0, 0x72, 0xe4, 0x90, 0x8c, 0x7b, 0x45, 0xba, 0xf1, 0x13, 0x6f, - 0x8c, 0xd2, 0xdd, 0xc5, 0x8e, 0xc8, 0xec, 0xf9, 0xfb, 0xde, 0xe5, 0xaa, - 0xcb, 0xc0, 0xff, 0x77, 0x2d, 0x99, 0xb1, 0x69, 0x7f, 0xe3, 0x38, 0x61, - 0x35, 0xb6, 0x45, 0xdd, 0x73, 0x45, 0x84, 0x89, 0x1b, 0x96, 0x7e, 0x6a, - 0x1d, 0xd9, 0xe6, 0x76, 0xa8, 0x16, 0x0f, 0x42, 0xc9, 0x41, 0xec, 0x5d, - 0x25, 0x01, 0xb0, 0x45, 0xa6, 0xaa, 0x69, 0x87, 0x11, 0xa1, 0xb8, 0x9e, - 0x68, 0x48, 0x68, 0xe9, 0xb5, 0xc2, 0xff, 0x83, 0x8f, 0x71, 0xb9, 0xd7, - 0xbb, 0xae, 0x59, 0x8b, 0x1b, 0x4c, 0x44, 0xd8, 0xe3, 0xce, 0xab, 0x88, - 0xfb, 0x64, 0xd9, 0x61, 0x5a, 0x7d, 0xce, 0x3a, 0x27, 0xb5, 0xa3, 0xfd, - 0x5d, 0xa3, 0xb8, 0xa1, 0x15, 0x63, 0x0b, 0x75, 0x39, 0xc3, 0xa4, 0xfb, - 0x60, 0x53, 0xfd, 0x11, 0x21, 0x35, 0x0f, 0x19, 0x28, 0x14, 0xcd, 0x8a, - 0xcf, 0x33, 0xaa, 0x4f, 0x6a, 0x1e, 0x56, 0x87, 0xd5, 0x6e, 0x43, 0x9b, - 0xa3, 0x72, 0x95, 0x8c, 0x34, 0xa2, 0xac, 0x11, 0x76, 0x95, 0xd7, 0xdd, - 0xbf, 0x10, 0xf4, 0x0f, 0x2a, 0x64, 0xd2, 0x4d, 0x7b, 0xc6, 0x9b, 0x7d, - 0xf7, 0xa5, 0xb3, 0x84, 0x9a, 0x9a, 0x5e, 0xcf, 0x7f, 0x95, 0x6d, 0x44, - 0xd1, 0xb2, 0x19, 0xbb, 0xed, 0x37, 0x42, 0x4b, 0x4b, 0x6d, 0xb7, 0x10, - 0x02, 0x5f, 0x00, 0x1f, 0x24, 0xce, 0xb2, 0x8b, 0x3e, 0x7d, 0xc6, 0x6e, - 0x6c, 0x90, 0x75, 0xad, 0x3f, 0x9d, 0x63, 0x04, 0x76, 0x20, 0x7a, 0x56, - 0x48, 0xa1, 0x6a, 0x37, 0x74, 0xd2, 0xb7, 0x4f, 0xa3, 0x64, 0x62, 0xaa, - 0xce, 0x75, 0x8c, 0x15, 0x75, 0x79, 0xa0, 0xbd, 0xdd, 0x01, 0x46, 0xca, - 0xa0, 0x31, 0x1a, 0x16, 0x1f, 0xef, 0x8b, 0xc6, 0x54, 0x57, 0xfa, 0x6e, - 0x43, 0xdf, 0xb0, 0x99, 0xed, 0xa4, 0xcb, 0xeb, 0x91, 0x35, 0x14, 0x0c, - 0xa9, 0x1d, 0xb5, 0xa9, 0x32, 0x99, 0xe3, 0x89, 0x74, 0xaa, 0xa4, 0x65, - 0x1e, 0x82, 0x47, 0xfa, 0x37, 0x23, 0xe5, 0x86, 0xb6, 0xc0, 0xb6, 0x89, - 0x9a, 0xd9, 0xae, 0x29, 0x39, 0x7b, 0x66, 0xc7, 0x5b, 0x02, 0x08, 0x86, - 0xd4, 0xf0, 0x75, 0xc2, 0x05, 0x86, 0xc3, 0x75, 0xd2, 0x2a, 0x1e, 0xec, - 0x6e, 0x75, 0x29, 0x58, 0x8c, 0x25, 0x3b, 0x95, 0x21, 0xde, 0x42, 0xd5, - 0xb7, 0x15, 0x30, 0x09, 0x49, 0x78, 0x55, 0xd5, 0xf2, 0x30, 0x80, 0x93, - 0x8a, 0xce, 0x84, 0x27, 0xdb, 0x4a, 0x09, 0x30, 0x0c, 0x7f, 0x4d, 0xd1, - 0x0f, 0xda, 0x66, 0x58, 0xe1, 0x01, 0xfd, 0x75, 0x83, 0xf5, 0x39, 0x2e, - 0xe2, 0x6b, 0xde, 0xff, 0x20, 0x8a, 0xf7, 0xcc, 0x81, 0x8e, 0x99, 0xb4, - 0xeb, 0x76, 0x74, 0x38, 0x2b, 0xe0, 0x6d, 0x61, 0x8f, 0x39, 0x59, 0x10, - 0x7d, 0xb5, 0xd3, 0x14, 0x96, 0x04, 0x1d, 0x22, 0x89, 0xef, 0x15, 0x7c, - 0x28, 0x5a, 0xd6, 0x8d, 0xf3, 0xb7, 0x6a, 0x9a, 0xce, 0x21, 0x77, 0xfd, - 0x4f, 0x22, 0x26, 0x28, 0xb8, 0xb5, 0xb3, 0x73, 0xfd, 0x2a, 0x7b, 0x42, - 0x26, 0x77, 0x41, 0x93, 0xed, 0xf9, 0x8f, 0xa9, 0x92, 0xd5, 0x9f, 0x2e, - 0x60, 0xec, 0x60, 0x98, 0xf1, 0xd5, 0x11, 0xe2, 0xe0, 0xd7, 0x45, 0xa7, - 0xe4, 0xf2, 0x82, 0x61, 0x2f, 0x41, 0x1b, 0xd9, 0x8e, 0x78, 0xd5, 0x6b, - 0x68, 0x74, 0xf0, 0xc3, 0x83, 0x01, 0x16, 0x60, 0x6e, 0x34, 0x88, 0x45, - 0x8a, 0x86, 0x44, 0x5b, 0xa5, 0xa8, 0x55, 0xbc, 0xfa, 0x8f, 0xbd, 0x93, - 0x95, 0x3f, 0xab, 0x19, 0x54, 0x8f, 0x06, 0x8e, 0xca, 0x0b, 0x4a, 0x18, - 0x3f, 0x7a, 0x9c, 0x3f, 0xe6, 0xbe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x81, 0x32, 0x41, 0x46, 0x59, 0x26, 0xf4, 0xef, - 0x93, 0x9f, 0x04, 0xc2, 0x67, 0x13, 0x32, 0x45, 0xc0, 0x79, 0x70, 0x27, - 0x21, 0x2b, 0xaf, 0x35, 0xf3, 0xc4, 0x88, 0x52, 0x28, 0xea, 0xca, 0x8a, - 0x08, 0x01, 0x6f, 0x61, 0xab, 0x10, 0xa3, 0xf0, 0x6b, 0x3b, 0x54, 0x64, - 0xf1, 0x63, 0x83, 0x38, 0x2b, 0x26, 0x18, 0x5a, 0x67, 0xc4, 0x67, 0x38, - 0x3f, 0x2c, 0x9a, 0xc9, 0x48, 0x33, 0x77, 0xb4, 0xb2, 0xc2, 0xc7, 0x08, - 0x21, 0x5e, 0xc4, 0x19, 0x59, 0xe1, 0xfa, 0x32, 0xa4, 0x4c, 0x3e, 0xba, - 0x65, 0x92, 0x98, 0x39, 0x71, 0x2f, 0x99, 0x08, 0xf8, 0xb3, 0x7a, 0x03, - 0x53, 0xd7, 0x68, 0xb2, 0x5e, 0xb0, 0xef, 0xe0, 0x1e, 0x7d, 0xb2, 0x23, - 0x5d, 0x2b, 0xd7, 0x09, 0xa6, 0x78, 0xa4, 0x7c, 0x08, 0xed, 0x8a, 0xf6, - 0x96, 0xa0, 0x10, 0x17, 0x62, 0x8b, 0x8a, 0xa0, 0xac, 0x22, 0x67, 0x02, - 0xa8, 0x66, 0x1a, 0xb5, 0x02, 0xde, 0xa5, 0xfa, 0x69, 0x29, 0x5f, 0x24, - 0x89, 0x46, 0x68, 0xd6, 0x51, 0x2a, 0xfe, 0x88, 0xf0, 0x40, 0xde, 0xd1, - 0x12, 0x2e, 0xed, 0x13, 0x7b, 0x49, 0xf6, 0xe1, 0x7a, 0xcf, 0x61, 0xcb, - 0x70, 0x9d, 0xaa, 0x51, 0x07, 0xc2, 0x54, 0x76, 0x89, 0x29, 0x94, 0x29, - 0x8b, 0x0e, 0xf5, 0xe8, 0x81, 0xc7, 0xdb, 0x59, 0x1e, 0x75, 0xda, 0x6a, - 0x94, 0x18, 0x16, 0xae, 0xbb, 0x43, 0x87, 0x56, 0x66, 0x8b, 0x84, 0xe9, - 0xa9, 0xd0, 0xd2, 0x8f, 0x5b, 0xbf, 0x1d, 0x24, 0x3a, 0xb7, 0x64, 0xff, - 0xe9, 0x22, 0x21, 0x65, 0xaf, 0x2b, 0x45, 0x8d, 0x28, 0xea, 0xbc, 0x07, - 0x10, 0x6e, 0xfb, 0x4d, 0x6f, 0x35, 0xe5, 0xeb, 0x5d, 0x29, 0x72, 0xe1, - 0x94, 0xad, 0xed, 0x25, 0xd7, 0x39, 0x63, 0x32, 0x37, 0x0b, 0xb2, 0xd7, - 0x54, 0x1f, 0xe4, 0x0d, 0xe7, 0xb3, 0xd1, 0xa6, 0x2a, 0xcf, 0x8e, 0x97, - 0xf1, 0xa8, 0xfc, 0xb1, 0x61, 0xdc, 0xb4, 0x8f, 0x29, 0xa2, 0x68, 0x4a, - 0xe6, 0x2f, 0x8a, 0x69, 0x2c, 0xa1, 0x1d, 0xe2, 0x9e, 0x65, 0x71, 0xb7, - 0x83, 0xef, 0x63, 0xf5, 0x36, 0xdc, 0xa0, 0x94, 0x5a, 0x45, 0x8a, 0x85, - 0x5e, 0x28, 0x86, 0x21, 0xd2, 0xbf, 0x7a, 0x2f, 0x76, 0x1c, 0x2a, 0x15, - 0xb2, 0xe8, 0xaf, 0x63, 0x37, 0xbe, 0xd8, 0x0a, 0xef, 0x54, 0xee, 0xe6, - 0xd9, 0xb3, 0xdb, 0x41, 0x55, 0xba, 0xd8, 0x14, 0x7c, 0x10, 0x61, 0x06, - 0x40, 0x45, 0x69, 0x37, 0x60, 0xf7, 0x6a, 0x7a, 0x23, 0x70, 0x30, 0x57, - 0x3e, 0xe5, 0x12, 0x24, 0xbc, 0x5e, 0x82, 0x89, 0xd8, 0x37, 0xc9, 0x33, - 0xb9, 0x38, 0xa5, 0xba, 0xed, 0xdd, 0x93, 0x58, 0x81, 0x15, 0xec, 0x15, - 0x70, 0x2f, 0x30, 0xfa, 0xaf, 0xf7, 0xf5, 0xcb, 0x41, 0x74, 0xea, 0xc0, - 0x91, 0xbe, 0x53, 0x4c, 0xc2, 0x74, 0x1b, 0x5b, 0x8c, 0x74, 0xd8, 0xc3, - 0x4a, 0x12, 0xaa, 0x57, 0xd6, 0x61, 0xb1, 0xb8, 0x81, 0x5d, 0x81, 0x37, - 0x1e, 0x5b, 0x3d, 0x5a, 0xbc, 0xa6, 0xb2, 0x27, 0xe3, 0x01, 0x4c, 0xf0, - 0xad, 0x7b, 0xdf, 0x50, 0xf9, 0xd7, 0xb7, 0xcc, 0xa8, 0x5c, 0x3d, 0x9a, - 0xb7, 0x60, 0x3e, 0x63, 0x3f, 0x6a, 0x08, 0x0b, 0x82, 0xdc, 0x3e, 0xfa, - 0x24, 0x33, 0xd3, 0x01, 0xbf, 0xef, 0xeb, 0x52, 0x3f, 0x91, 0x61, 0xda, - 0xe2, 0x26, 0x10, 0xdf, 0xe4, 0x9b, 0x77, 0x91, 0x22, 0xc5, 0x4e, 0x9c, - 0x0b, 0x32, 0xff, 0x27, 0x85, 0x85, 0x0c, 0x99, 0x50, 0x8f, 0xad, 0x5d, - 0x06, 0x18, 0x52, 0xb4, 0x64, 0x09, 0xc4, 0xa4, 0x84, 0xd4, 0x81, 0x07, - 0x0a, 0x97, 0x55, 0xf8, 0x96, 0x52, 0xb2, 0x9a, 0xf4, 0x06, 0x2c, 0x9a, - 0x3b, 0x8b, 0xaa, 0x67, 0x18, 0x3a, 0xee, 0xbc, 0xca, 0x8f, 0x46, 0xf6, - 0x4a, 0x33, 0x5b, 0x56, 0x09, 0xb2, 0x72, 0x87, 0xdb, 0xbb, 0x57, 0x67, - 0x53, 0x82, 0x77, 0x31, 0x66, 0xbb, 0xf1, 0x33, 0x6d, 0x55, 0x82, 0xaa, - 0x80, 0xd4, 0x4d, 0xb8, 0xab, 0xbd, 0x2a, 0xda, 0x10, 0x3a, 0xc8, 0xf0, - 0x14, 0x1e, 0xcb, 0x8e, 0x76, 0x6c, 0xc8, 0x74, 0x05, 0xb3, 0x51, 0xbd, - 0x63, 0x06, 0x69, 0x05, 0x2a, 0x21, 0xd6, 0x2f, 0xe4, 0x38, 0xae, 0xf8, - 0xd4, 0xe9, 0xa7, 0xe8, 0xc8, 0x5a, 0x65, 0x7d, 0x54, 0x34, 0x33, 0x0d, - 0xf6, 0x07, 0xd6, 0x8c, 0xe5, 0x72, 0x9b, 0xfb, 0x60, 0x49, 0xd2, 0xaf, - 0xb4, 0x17, 0xc4, 0x74, 0x8d, 0xe5, 0x54, 0xda, 0x96, 0x56, 0x7d, 0x97, - 0x62, 0xe8, 0xec, 0x0d, 0x2b, 0x02, 0x2e, 0x59, 0xf8, 0xa1, 0x06, 0x6a, - 0xb6, 0x3e, 0x15, 0xeb, 0x64, 0x1a, 0x48, 0x3d, 0x53, 0x2c, 0x42, 0x3b, - 0x97, 0xa1, 0x3f, 0x47, 0x8b, 0x74, 0x87, 0x8b, 0x96, 0x63, 0x08, 0x4c, - 0x99, 0x38, 0x5a, 0xb6, 0x93, 0xa8, 0xcc, 0xee, 0x62, 0x3a, 0x00, 0x6d, - 0x5c, 0xab, 0x77, 0x3c, 0x46, 0xae, 0x6e, 0xeb, 0xf1, 0xf9, 0x63, 0xf1, - 0xa2, 0x31, 0x21, 0x38, 0xc3, 0x4f, 0xe2, 0x3a, 0x33, 0x7f, 0xe7, 0xc6, - 0x69, 0xd5, 0x1c, 0x7e, 0x5b, 0x4f, 0xb1, 0x50, 0x3b, 0xbe, 0x31, 0xa7, - 0x42, 0xa3, 0x97, 0x7b, 0xe3, 0x90, 0xd0, 0x07, 0xfd, 0x05, 0xb9, 0xf2, - 0x47, 0xc4, 0xc8, 0xdd, 0x1c, 0x3c, 0xa4, 0x22, 0x96, 0x04, 0xca, 0x28, - 0x17, 0xcc, 0x5c, 0x49, 0x7e, 0xc6, 0x93, 0x98, 0xd3, 0x8b, 0xd2, 0xf6, - 0x4a, 0xb6, 0xbe, 0x8d, 0xa2, 0xdd, 0xb6, 0x7c, 0x66, 0x0c, 0x29, 0xcb, - 0x1d, 0x98, 0xf6, 0xe4, 0xe5, 0x30, 0x4c, 0x84, 0xbf, 0x6f, 0x71, 0x4e, - 0xc2, 0x12, 0x9f, 0x35, 0xd6, 0xf8, 0xc6, 0x30, 0xe9, 0x9e, 0x1a, 0x8a, - 0x2f, 0xd1, 0x96, 0xb3, 0x3c, 0x0f, 0xf5, 0x78, 0xa7, 0xe0, 0xbd, 0x4b, - 0xe0, 0xd8, 0x3d, 0x57, 0xa5, 0x44, 0xa0, 0xd9, 0x10, 0x79, 0xd2, 0x10, - 0x50, 0xc7, 0x77, 0x73, 0x09, 0xf8, 0xb4, 0xcf, 0x66, 0xe3, 0x0c, 0xfb, - 0x96, 0xf8, 0x52, 0xb3, 0x7e, 0x44, 0xf0, 0x03, 0x54, 0xd4, 0xa2, 0x57, - 0x38, 0x8a, 0x96, 0xfc, 0x7c, 0x4c, 0x9f, 0x3a, 0xf2, 0xa2, 0x48, 0xbb, - 0x3e, 0xd1, 0x11, 0x2c, 0xab, 0xdf, 0x53, 0x96, 0xac, 0x58, 0x33, 0xb9, - 0xdd, 0xd2, 0x4f, 0x8a, 0x0a, 0x89, 0x0e, 0xd3, 0x6f, 0x58, 0x8c, 0xa1, - 0x0a, 0x0b, 0xa7, 0xd7, 0x1f, 0x0a, 0x70, 0xe3, 0x43, 0x12, 0x56, 0xb8, - 0x6c, 0xf8, 0x75, 0x4e, 0x2b, 0xb0, 0x17, 0x29, 0xe4, 0x95, 0x85, 0xd8, - 0x85, 0x95, 0x63, 0x55, 0xa8, 0x82, 0xf0, 0xe7, 0x7d, 0xf3, 0xf1, 0x78, - 0x66, 0xd1, 0x92, 0x71, 0x99, 0xad, 0x30, 0x94, 0xe9, 0x54, 0x2c, 0xe1, - 0x57, 0xf3, 0x6a, 0xe6, 0x0c, 0x5e, 0xc7, 0x58, 0xba, 0xb7, 0x61, 0xd3, - 0x74, 0x72, 0x96, 0x06, 0x0b, 0x01, 0x3d, 0xc2, 0xa1, 0xb4, 0x38, 0x81, - 0x19, 0x44, 0xbc, 0x84, 0x52, 0x22, 0xc9, 0x67, 0x81, 0x99, 0xfb, 0x0a, - 0xc2, 0xff, 0x50, 0x67, 0xbe, 0x38, 0x5e, 0x13, 0x16, 0x60, 0x83, 0x35, - 0xb9, 0x2f, 0xa9, 0x55, 0xbb, 0x30, 0x6b, 0x19, 0xfc, 0x2a, 0x40, 0x24, - 0x74, 0x20, 0x57, 0x78, 0xb9, 0x55, 0xb7, 0x70, 0x86, 0x65, 0x43, 0x1c, - 0x76, 0x2e, 0x91, 0x83, 0x5e, 0x33, 0xc2, 0xd4, 0xcc, 0xb5, 0x1c, 0x45, - 0xaf, 0xa3, 0x87, 0x95, 0x9b, 0x77, 0x50, 0x44, 0x7e, 0xdd, 0xca, 0x3f, - 0x51, 0x21, 0xae, 0xf2, 0x15, 0xa9, 0x32, 0x94, 0xca, 0xde, 0x3b, 0x97, - 0x13, 0x6b, 0xff, 0xe0, 0x79, 0x39, 0x40, 0xf0, 0x66, 0x7d, 0x5e, 0xef, - 0xec, 0x0a, 0x35, 0xd2, 0x0d, 0x09, 0x19, 0x13, 0xf2, 0xc2, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xdc, 0x07, 0x2e, 0x46, - 0xab, 0x4d, 0x6d, 0xf7, 0x24, 0xba, 0x02, 0xe3, 0xc5, 0xe3, 0xed, 0x64, - 0xc6, 0x77, 0x5a, 0x14, 0xae, 0x38, 0x52, 0x8c, 0x16, 0x2c, 0x52, 0x0e, - 0xf6, 0x65, 0x99, 0xcc, 0xf6, 0x9f, 0x77, 0xcc, 0x2e, 0xaf, 0x14, 0xd1, - 0xf0, 0x0f, 0xa7, 0x3e, 0x5b, 0x74, 0xff, 0xb9, 0xd3, 0x30, 0x02, 0x5e, - 0x52, 0xc8, 0x6f, 0x57, 0xef, 0x28, 0xf5, 0xfa, 0x9e, 0x70, 0x00, 0xfc, - 0x3e, 0xc3, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0xaa, 0x9f, 0x86, 0xb0, 0x6d, 0xa1, 0x0c, 0xfa, 0xef, 0xb3, 0x6a, 0x50, - 0xa6, 0xfe, 0xff, 0xa9, 0x61, 0x0b, 0x18, 0x72, 0xee, 0xc6, 0xcd, 0x3a, - 0x34, 0x5e, 0xa8, 0x81, 0x31, 0x54, 0x25, 0x05, 0xc1, 0xd9, 0x66, 0x3d, - 0x17, 0xbb, 0x03, 0x21, 0x07, 0x69, 0x3a, 0x37, 0xe8, 0xd4, 0x6a, 0x68, - 0xe1, 0xa3, 0x19, 0x5a, 0x8d, 0x14, 0x11, 0x09, 0xef, 0xae, 0xfe, 0x94, - 0x19, 0x8a, 0xe4, 0xb9, 0x6e, 0xe8, 0xfa, 0x12, 0x2a, 0x5d, 0x00, 0x29, - 0x27, 0x6d, 0x5a, 0xa5, 0x09, 0x34, 0x79, 0x2b, 0xa8, 0xcc, 0x42, 0xb4, - 0xde, 0xe0, 0x91, 0xb9, 0x06, 0x0c, 0x11, 0x17, 0x25, 0x7a, 0x35, 0x57, - 0x51, 0x40, 0xf3, 0xc7, 0xc6, 0x4a, 0x69, 0x98, 0x2b, 0x2b, 0x3e, 0x5d, - 0x32, 0xd8, 0x8f, 0xb0, 0x1d, 0xee, 0x77, 0xe3, 0xaf, 0x4f, 0x71, 0x05, - 0x04, 0xd2, 0xff, 0x51, 0xed, 0xa4, 0x69, 0x50, 0x24, 0x2a, 0xe5, 0xaa, - 0xbb, 0xc6, 0x7a, 0x7f, 0xb2, 0xdf, 0x1d, 0xc2, 0x02, 0x2e, 0x52, 0xd1, - 0xd9, 0x5b, 0xe7, 0x6c, 0x50, 0x31, 0x4e, 0xdf, 0x8e, 0x3f, 0x37, 0xfc, - 0xf5, 0x34, 0x0e, 0xdb, 0x4c, 0x5d, 0x7d, 0xc8, 0xe4, 0x72, 0x40, 0xcb, - 0x95, 0xa5, 0x41, 0xeb, 0x78, 0x5f, 0x64, 0x20, 0x55, 0x19, 0xc7, 0xf9, - 0x9c, 0x71, 0x40, 0x8f, 0xcc, 0x2d, 0x86, 0xc0, 0xf4, 0x36, 0x2b, 0x0e, - 0x28, 0xb4, 0xad, 0x1b, 0xde, 0x60, 0x67, 0x03, 0x0f, 0x7c, 0x18, 0xd9, - 0xc3, 0x73, 0x67, 0x0d, 0x44, 0x3d, 0xbe, 0x7c, 0xcf, 0x96, 0x22, 0x0b, - 0x0e, 0x3a, 0x0b, 0xcf, 0x04, 0x95, 0x92, 0x7d, 0x4b, 0xa2, 0x6a, 0x0b, - 0x47, 0x72, 0x73, 0xa8, 0x9b, 0x96, 0x3d, 0xc6, 0x03, 0x34, 0xb1, 0x69, - 0xc2, 0x50, 0x60, 0x89, 0x8c, 0x55, 0x8f, 0x8e, 0x74, 0xa8, 0x9e, 0x25, - 0xe4, 0x0e, 0x73, 0xef, 0x4f, 0x51, 0xbe, 0xed, 0x5c, 0x14, 0xd3, 0xfa, - 0x94, 0x58, 0x8d, 0x5c, 0xa0, 0xb1, 0xfc, 0x37, 0x6e, 0x9c, 0x9e, 0x61, - 0xe5, 0x12, 0x13, 0xb2, 0x88, 0xc6, 0xcf, 0x60, 0x3f, 0x0d, 0x51, 0x33, - 0x22, 0xfa, 0xfb, 0x2d, 0x2b, 0x8d, 0x43, 0x9b, 0x3d, 0x1e, 0x88, 0x24, - 0x50, 0x78, 0xf7, 0x7e, 0x45, 0xb1, 0x0f, 0xa9, 0xe6, 0x77, 0xf8, 0x78, - 0xff, 0x57, 0x6a, 0x05, 0x06, 0x0c, 0x7e, 0x1e, 0x7f, 0xe9, 0x90, 0xe8, - 0x61, 0x68, 0xbc, 0x9e, 0xc4, 0xe5, 0x06, 0x04, 0x76, 0xcc, 0x01, 0x57, - 0x1a, 0x55, 0x9e, 0x45, 0x26, 0xd6, 0xd8, 0xc2, 0x50, 0x25, 0xfc, 0x72, - 0x4e, 0x18, 0xbe, 0xf2, 0x2f, 0xc0, 0x1b, 0xc8, 0x14, 0xeb, 0x24, 0xda, - 0x15, 0x0a, 0x83, 0x38, 0xc5, 0xdd, 0xc9, 0xd7, 0x12, 0x35, 0x55, 0xdf, - 0x2c, 0x23, 0xea, 0x17, 0xca, 0xbf, 0x18, 0xc9, 0x80, 0x63, 0x4b, 0x77, - 0x8b, 0x17, 0x01, 0x05, 0x1b, 0xa3, 0x0b, 0x0f, 0xdd, 0xc6, 0xe0, 0xdf, - 0xc9, 0xa6, 0x8c, 0x50, 0x95, 0x8d, 0x6c, 0x96, 0x67, 0xff, 0x88, 0x38, - 0x3b, 0x76, 0x72, 0x11, 0x35, 0xa0, 0x1c, 0xc8, 0x96, 0x9c, 0xe5, 0x90, - 0x79, 0x0e, 0x62, 0x57, 0x00, 0xd9, 0x57, 0xf8, 0xa4, 0xc2, 0xc2, 0x0a, - 0x17, 0x8e, 0xd7, 0x03, 0x6d, 0x4d, 0x14, 0xb6, 0x96, 0x8a, 0x76, 0x67, - 0x58, 0xce, 0x9c, 0xb3, 0x10, 0x49, 0x06, 0xeb, 0x56, 0x43, 0x40, 0xcb, - 0xd4, 0xd7, 0x59, 0x42, 0xa4, 0xd7, 0x21, 0x6a, 0x51, 0x3d, 0x1c, 0x54, - 0xd7, 0xd6, 0xa2, 0xcf, 0xf8, 0xf6, 0x72, 0x35, 0x04, 0xa6, 0xe3, 0x53, - 0xca, 0xc5, 0x62, 0xee, 0xa9, 0xc3, 0x6d, 0x1b, 0xc4, 0xc5, 0xd9, 0xa7, - 0x37, 0xc2, 0x04, 0x01, 0xc9, 0x4a, 0x2e, 0x26, 0xdd, 0x12, 0x6e, 0x41, - 0x64, 0xb4, 0xe8, 0xe8, 0xc7, 0xf8, 0xab, 0x8a, 0xab, 0x1d, 0x7f, 0x2d, - 0x58, 0xc2, 0xc4, 0xf0, 0x5d, 0x11, 0x35, 0x52, 0x88, 0xbc, 0x0f, 0x44, - 0x6e, 0x91, 0x1e, 0x87, 0xb4, 0xb1, 0x91, 0x52, 0x32, 0xe4, 0x38, 0x6d, - 0x5e, 0x8d, 0x30, 0xf0, 0xbc, 0xc3, 0x15, 0x80, 0x47, 0x36, 0x35, 0xb0, - 0x93, 0xf3, 0xc4, 0x82, 0xc7, 0x73, 0xc1, 0x67, 0x0c, 0x7a, 0x31, 0x36, - 0xbc, 0x73, 0x67, 0x66, 0xae, 0x48, 0x82, 0x27, 0x6e, 0x14, 0xd0, 0xd5, - 0x12, 0x10, 0xce, 0x5e, 0x37, 0xcd, 0x7e, 0xa5, 0xcb, 0xff, 0x91, 0xf0, - 0x62, 0xdb, 0x95, 0x74, 0x0c, 0x8c, 0x1e, 0x78, 0x11, 0x02, 0xb3, 0x02, - 0x0b, 0x31, 0xe7, 0x4e, 0x8b, 0x58, 0x6a, 0xde, 0x20, 0x93, 0x8b, 0x8e, - 0x62, 0x03, 0x24, 0xc9, 0xca, 0xf8, 0x44, 0x1d, 0x0c, 0x1b, 0xd8, 0x5d, - 0xcc, 0xe2, 0x8e, 0x02, 0xc6, 0x5c, 0x06, 0x45, 0xe6, 0x94, 0x8f, 0xa2, - 0x3e, 0xf5, 0xe9, 0xf5, 0x88, 0x87, 0xb2, 0x84, 0x1e, 0xb6, 0xb6, 0xfc, - 0x9f, 0x8e, 0x79, 0xf5, 0x4b, 0x24, 0x81, 0x3e, 0x5d, 0xf4, 0x10, 0x6e, - 0xdd, 0x8c, 0x8c, 0xae, 0xc6, 0x2c, 0x26, 0xb2, 0xfc, 0xf3, 0x99, 0xe8, - 0x8c, 0x65, 0x5d, 0x6c, 0xa8, 0x1d, 0x6f, 0x1e, 0x32, 0x0a, 0xee, 0x87, - 0xf6, 0xe1, 0xdd, 0x5e, 0x7f, 0x7a, 0x90, 0x8c, 0x3f, 0xe8, 0x47, 0x95, - 0x9b, 0xc8, 0x2c, 0x49, 0xc9, 0xe4, 0x2d, 0xea, 0x58, 0xfc, 0x29, 0x1a, - 0xb7, 0xa1, 0xf9, 0xb8, 0x84, 0x41, 0xa0, 0xf1, 0x77, 0x83, 0x56, 0x73, - 0x86, 0xea, 0xf4, 0xf5, 0x2a, 0xa6, 0x6b, 0x00, 0x64, 0x39, 0x08, 0x8f, - 0xf0, 0x22, 0x1a, 0x4c, 0xf2, 0x5a, 0xd0, 0xaa, 0x39, 0xae, 0x8a, 0xbc, - 0x03, 0x99, 0xf7, 0xcc, 0x80, 0xdf, 0x2b, 0x85, 0xbe, 0x1a, 0x97, 0x28, - 0x63, 0x04, 0x72, 0x75, 0x75, 0xb4, 0x9c, 0xd3, 0x17, 0xcc, 0x1e, 0xa1, - 0xd2, 0x47, 0x18, 0x45, 0xad, 0xb4, 0x0a, 0x32, 0x31, 0x36, 0x64, 0x48, - 0x3f, 0x7b, 0x4b, 0xc0, 0xd6, 0x78, 0x46, 0xaa, 0x90, 0x89, 0xf9, 0x36, - 0x3d, 0xb4, 0xb3, 0x50, 0x51, 0xd9, 0x55, 0x6f, 0xa9, 0xe7, 0x25, 0xaf, - 0xa0, 0xca, 0x9d, 0x45, 0x83, 0xc3, 0x0b, 0x2a, 0x0c, 0xf9, 0x3f, 0xe4, - 0x08, 0xf4, 0xbd, 0x23, 0x45, 0x85, 0xcf, 0x41, 0x93, 0xd3, 0x21, 0x5f, - 0x53, 0xa2, 0x5b, 0xa9, 0xf5, 0xe9, 0x8f, 0x2a, 0x2d, 0x53, 0x3c, 0x36, - 0x17, 0xce, 0x37, 0x35, 0x3e, 0x9e, 0x6b, 0xbc, 0xba, 0xaa, 0xa5, 0x61, - 0x79, 0x98, 0x8e, 0xbd, 0x19, 0xf4, 0x5f, 0xa9, 0xb8, 0x96, 0xa2, 0xce, - 0x32, 0x00, 0xab, 0x51, 0xcb, 0xfa, 0x30, 0x3a, 0x83, 0x92, 0x91, 0xad, - 0x08, 0x61, 0x62, 0x51, 0x7f, 0x19, 0xa9, 0x2a, 0x84, 0xf2, 0xab, 0x7e, - 0x5e, 0xa7, 0x5a, 0x54, 0x7f, 0x68, 0x2a, 0x7b, 0x4f, 0xde, 0x45, 0x1d, - 0xef, 0x73, 0x5f, 0xc0, 0x40, 0x6e, 0xec, 0x6c, 0xe9, 0xa5, 0x6b, 0x46, - 0x54, 0x7c, 0x24, 0x8b, 0xa4, 0xe5, 0xb4, 0x82, 0x31, 0x1f, 0x3e, 0x79, - 0x2e, 0x21, 0x8c, 0xf1, 0xbd, 0xad, 0x7c, 0x28, 0xcc, 0xbd, 0x58, 0x72, - 0xe9, 0x6a, 0x04, 0x56, 0x67, 0x0f, 0x62, 0x98, 0x5a, 0x97, 0x4b, 0xe2, - 0x67, 0x70, 0xbb, 0x17, 0xb1, 0x84, 0x5b, 0xd4, 0x6e, 0xab, 0x90, 0x29, - 0x20, 0x93, 0x34, 0xa8, 0x03, 0x0f, 0xed, 0x1a, 0xf0, 0x1b, 0x92, 0x87, - 0x43, 0xa5, 0x6a, 0x1c, 0xdc, 0xd7, 0x22, 0x68, 0x83, 0x98, 0x74, 0x2a, - 0x4c, 0x51, 0xef, 0x71, 0x19, 0xd5, 0x3d, 0x05, 0x19, 0x61, 0xb2, 0x52, - 0xa8, 0x6e, 0xda, 0x72, 0x51, 0x66, 0x9f, 0xf0, 0x12, 0xf6, 0x18, 0x60, - 0xcc, 0xd7, 0x2f, 0x2e, 0x83, 0x14, 0x09, 0xdb, 0x55, 0x1c, 0xf2, 0xaf, - 0xfd, 0xa4, 0x40, 0xf1, 0x4a, 0xc7, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x00, 0x9c, 0x52, 0xff, 0x48, 0x06, 0x61, 0x76, 0x6d, - 0xd7, 0x44, 0xb1, 0x0c, 0x32, 0x62, 0x15, 0xa1, 0xc3, 0x97, 0x03, 0xdd, - 0xed, 0x20, 0x3c, 0x3a, 0x09, 0x16, 0xe5, 0x7d, 0x8c, 0xf9, 0x7b, 0x22, - 0x5e, 0x3a, 0xdd, 0xf0, 0xc6, 0xf0, 0x3a, 0xd4, 0x94, 0x85, 0x1c, 0x60, - 0x74, 0x91, 0xa3, 0xe2, 0x8a, 0xe5, 0x3e, 0xd4, 0x95, 0x28, 0x8b, 0x1a, - 0x7b, 0xbe, 0x07, 0xc0, 0xe3, 0x6b, 0xb9, 0x85, 0x82, 0x0b, 0x24, 0xba, - 0x1c, 0xfc, 0xc0, 0x0a, 0x21, 0x33, 0xad, 0x00, 0x19, 0xce, 0xb5, 0x8f, - 0x73, 0x05, 0xf1, 0xac, 0x03, 0xbe, 0x1f, 0x22, 0xd5, 0x32, 0x5e, 0x50, - 0xe3, 0xe0, 0x62, 0x26, 0xf4, 0xb0, 0x85, 0xd8, 0xf7, 0xa7, 0xf4, 0xa7, - 0xff, 0x10, 0xb8, 0xbc, 0xe0, 0x3e, 0x4d, 0xcb, 0x37, 0x74, 0xcc, 0x85, - 0xed, 0xa0, 0x34, 0x6c, 0xfa, 0x37, 0x84, 0x6a, 0x94, 0x55, 0x3b, 0x1e, - 0x14, 0xab, 0x26, 0x7b, 0x3e, 0xac, 0xc3, 0x79, 0xcd, 0x1b, 0x00, 0x02, - 0xb3, 0x01, 0xc3, 0x10, 0xdd, 0x56, 0x7d, 0x0e, 0x69, 0x39, 0x3c, 0x17, - 0xa3, 0xae, 0x9c, 0x2d, 0xc7, 0x5a, 0x0b, 0x7c, 0xd0, 0xac, 0xa1, 0x91, - 0x6a, 0x6d, 0xc0, 0x3f, 0x98, 0xf1, 0x21, 0xf5, 0xa5, 0x7c, 0xbc, 0x70, - 0x0d, 0x7b, 0x2f, 0x0d, 0x5a, 0xa5, 0x4a, 0x5a, 0xff, 0x51, 0xbf, 0x7f, - 0xb5, 0x4f, 0x2c, 0xba, 0xa9, 0x46, 0x81, 0x6b, 0xac, 0xc6, 0x62, 0x2d, - 0xd7, 0xb5, 0x04, 0x5f, 0xd4, 0x5f, 0x1f, 0x6b, 0x11, 0x7d, 0xe3, 0x58, - 0x1f, 0xb5, 0xbf, 0x16, 0x43, 0x88, 0x05, 0xf5, 0xa4, 0x7b, 0xb5, 0x0e, - 0xf4, 0x01, 0xb6, 0x90, 0x69, 0x52, 0x0a, 0x5e, 0x9b, 0x87, 0x51, 0x5e, - 0xd5, 0xed, 0x2c, 0xcc, 0x58, 0xad, 0xe6, 0x77, 0xa2, 0xc5, 0x7c, 0x1e, - 0xc5, 0x92, 0xbe, 0xed, 0x3a, 0x9a, 0x97, 0xed, 0x56, 0xc8, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x16, 0xe8, 0x24, 0xe3, - 0x82, 0x36, 0x8e, 0x50, 0x45, 0xbe, 0xc6, 0x10, 0x02, 0xb9, 0x6d, 0xf9, - 0xed, 0x8f, 0x64, 0x35, 0x4d, 0x2c, 0x9f, 0x99, 0xdc, 0xee, 0xfa, 0x63, - 0x99, 0xc4, 0xb8, 0x3d, 0x77, 0xea, 0xda, 0xd5, 0x95, 0x8b, 0x8e, 0x76, - 0x02, 0x9c, 0x62, 0xa0, 0xad, 0xfe, 0x80, 0x61, 0x72, 0x59, 0xd6, 0x9f, - 0x16, 0x2e, 0x09, 0x71, 0xb8, 0xd7, 0x65, 0x25, 0xc2, 0x5b, 0x40, 0x67, - 0x8e, 0xd6, 0xf8, 0xdf, 0x67, 0x29, 0x19, 0xa2, 0xa6, 0x07, 0xf3, 0xc8, - 0x91, 0x7d, 0xf2, 0x50, 0x71, 0xba, 0x5c, 0x2d, 0xa7, 0xae, 0xc4, 0xd5, - 0xeb, 0xb9, 0x0d, 0x2d, 0x23, 0xe5, 0x8c, 0x65, 0xf5, 0xf8, 0x97, 0x69, - 0xde, 0x25, 0x6f, 0xea, 0x12, 0x72, 0x3e, 0xb9, 0xa7, 0x8d, 0xcf, 0xa5, - 0x66, 0xee, 0x4e, 0x2e, 0x66, 0x6b, 0xec, 0x77, 0x7f, 0x53, 0xdc, 0x29, - 0x73, 0x5e, 0xe9, 0x2f, 0x79, 0xac, 0x8d, 0x0f, 0x44, 0x09, 0x5d, 0x25, - 0x1d, 0x78, 0xb6, 0xe9, 0xd0, 0xfa, 0x8f, 0x5f, 0x9c, 0xf0, 0xe0, 0xfc, - 0x62, 0x9f, 0x52, 0x6b, 0x5b, 0x8e, 0x3f, 0xdf, 0xb4, 0xf1, 0xdf, 0x35, - 0xd0, 0x8f, 0x5a, 0xc9, 0x1f, 0x08, 0x86, 0xaa, 0x5a, 0x9e, 0xe8, 0xb0, - 0xaa, 0xd4, 0xcd, 0x2a, 0x5b, 0x4f, 0x7f, 0x39, 0x9f, 0x7f, 0x21, 0xf2, - 0xfd, 0x05, 0x96, 0x53, 0x09, 0xfd, 0x36, 0x4c, 0xcd, 0x98, 0x74, 0xf5, - 0xbd, 0xcd, 0x9e, 0x14, 0x15, 0x05, 0xb9, 0x3d, 0x5f, 0x8a, 0x02, 0x86, - 0x10, 0xd7, 0xd4, 0x01, 0x20, 0xd9, 0x8c, 0x65, 0x7d, 0x9d, 0x39, 0x25, - 0xbc, 0xce, 0x1a, 0xb1, 0x76, 0x92, 0xc3, 0x03, 0xed, 0xa2, 0x41, 0x31, - 0x0d, 0xc0, 0x40, 0x94, 0x01, 0xbc, 0x9b, 0xe9, 0x5e, 0x3e, 0x8c, 0x49, - 0xf6, 0x98, 0x0c, 0x39, 0x79, 0xdc, 0xd1, 0x1b, 0xc5, 0xb2, 0x20, 0xb4, - 0x6c, 0xb4, 0x4f, 0xce, 0xf4, 0x6c, 0x0b, 0xef, 0x85, 0xf2, 0x7d, 0x9a, - 0x90, 0x58, 0x1b, 0x51, 0x56, 0x52, 0xac, 0x75, 0x9f, 0x17, 0xe6, 0x48, - 0xaf, 0x18, 0x4c, 0xd8, 0x67, 0xe8, 0xd2, 0x61, 0xbc, 0xa0, 0x95, 0xc9, - 0x78, 0xd8, 0xa2, 0x1d, 0x47, 0x59, 0x30, 0xcf, 0xf3, 0x79, 0x06, 0xd4, - 0x25, 0xf8, 0x9c, 0x5c, 0x28, 0xee, 0xb0, 0xd2, 0xb6, 0xaf, 0x34, 0x0e, - 0xe5, 0xe4, 0x16, 0x2e, 0x05, 0x45, 0x23, 0xc1, 0x88, 0x90, 0x4a, 0x8f, - 0xff, 0xfb, 0xe2, 0xc0, 0xb7, 0xae, 0xb5, 0x50, 0xc9, 0x26, 0xf0, 0xa2, - 0xf5, 0x21, 0x23, 0x79, 0x23, 0xb6, 0x8f, 0x57, 0x64, 0xd1, 0x27, 0xc2, - 0x07, 0x63, 0xa6, 0x54, 0x1f, 0x2f, 0xca, 0x16, 0xb8, 0x28, 0x51, 0x2a, - 0x92, 0xe0, 0x06, 0x36, 0x55, 0x00, 0x6c, 0x99, 0x31, 0xa7, 0x56, 0xb3, - 0x7b, 0x15, 0xcd, 0xc1, 0x32, 0x3a, 0xc0, 0x37, 0x1f, 0xea, 0x29, 0xb6, - 0x75, 0xdf, 0x8a, 0x17, 0x09, 0x45, 0xc2, 0x6e, 0xe2, 0x4c, 0xa5, 0x93, - 0x9b, 0x17, 0x08, 0x27, 0x75, 0x33, 0xdb, 0x1f, 0xab, 0x37, 0xad, 0x8e, - 0xaa, 0xef, 0x0b, 0x82, 0xaa, 0xa7, 0xae, 0x2c, 0x43, 0x4d, 0x8f, 0xa0, - 0x43, 0xd7, 0xa1, 0x34, 0xeb, 0xc0, 0x4e, 0xbd, 0x64, 0xfc, 0xc8, 0x6a, - 0x56, 0xa8, 0xfc, 0x9e, 0x2d, 0x5f, 0x7a, 0xa3, 0x72, 0x06, 0x79, 0x38, - 0x33, 0x05, 0xa7, 0xf0, 0x09, 0x48, 0x55, 0xfe, 0x3f, 0xab, 0x25, 0x8e, - 0x76, 0x1d, 0x12, 0x5a, 0x20, 0x68, 0xfb, 0x51, 0x51, 0x33, 0x40, 0x37, - 0x0c, 0x90, 0x98, 0x6f, 0x66, 0x3f, 0x40, 0xa2, 0x2e, 0x3c, 0xd1, 0x22, - 0x51, 0x54, 0x25, 0x7e, 0x4c, 0x5d, 0x96, 0xb2, 0x65, 0x0f, 0xa3, 0xdf, - 0x8e, 0x97, 0xfe, 0xeb, 0xe7, 0xc6, 0x22, 0x2a, 0x47, 0x3a, 0x78, 0x1b, - 0x39, 0x2e, 0xd6, 0xbc, 0x35, 0xb4, 0xf4, 0xc3, 0xf2, 0x6a, 0x12, 0xc9, - 0xe7, 0x6c, 0x9a, 0xfc, 0xed, 0xbc, 0x11, 0xc7, 0x71, 0x09, 0x8f, 0x56, - 0xc1, 0xd8, 0xb6, 0x92, 0x35, 0x97, 0x8e, 0x71, 0xd2, 0xbb, 0xb4, 0xed, - 0xf0, 0x7e, 0xff, 0x58, 0xd9, 0x95, 0x26, 0xea, 0xa9, 0x4d, 0x38, 0x8d, - 0x4e, 0x8e, 0x53, 0xae, 0x7e, 0xe6, 0xe6, 0x82, 0x35, 0x96, 0xab, 0x0f, - 0x04, 0x0f, 0xf2, 0xac, 0x1b, 0xcd, 0x07, 0x17, 0x1b, 0x25, 0x2f, 0x92, - 0xaf, 0x19, 0xa2, 0x1b, 0xa0, 0x7a, 0xc7, 0x4f, 0xb8, 0x1b, 0x89, 0x21, - 0xb5, 0xe2, 0x24, 0xe9, 0x78, 0xae, 0x7d, 0xd7, 0xcc, 0x8e, 0x3f, 0xa7, - 0xe9, 0xbe, 0xe6, 0x79, 0x0f, 0xdf, 0x86, 0xe9, 0xb9, 0xcd, 0x82, 0x7b, - 0xf5, 0x04, 0x89, 0xa0, 0x73, 0x5d, 0xa2, 0x4e, 0xd6, 0xa0, 0x60, 0x21, - 0xe2, 0xfe, 0xd3, 0xf4, 0x19, 0x8b, 0x6a, 0x03, 0x12, 0x9c, 0x51, 0x9a, - 0x41, 0x4e, 0xf6, 0xb4, 0x6e, 0x0c, 0x43, 0xf5, 0x00, 0x00, 0x78, 0x12, - 0xdd, 0x21, 0xa8, 0xc7, 0x21, 0xa1, 0x4e, 0x44, 0x10, 0xd0, 0xdb, 0x6f, - 0x0b, 0x4c, 0xe7, 0x7a, 0x8c, 0x0c, 0xaa, 0xb6, 0x9a, 0x7d, 0xa9, 0xff, - 0x5a, 0x2e, 0x15, 0x9e, 0x6f, 0xea, 0xe1, 0x42, 0x0c, 0x9c, 0x5a, 0x3b, - 0xd5, 0xe6, 0xde, 0x23, 0x3f, 0x9c, 0x45, 0x20, 0x67, 0x96, 0x50, 0x16, - 0x80, 0x42, 0xe7, 0x67, 0x7d, 0x24, 0xdc, 0x00, 0xaa, 0x01, 0x8a, 0xa3, - 0x61, 0xfe, 0x9a, 0xce, 0xc1, 0xe5, 0x2e, 0x19, 0x85, 0x04, 0xe6, 0x7b, - 0xe8, 0x7a, 0xbc, 0x9d, 0xfe, 0x71, 0x29, 0x1d, 0x17, 0xae, 0x6b, 0x1a, - 0x64, 0xd7, 0xfe, 0x18, 0x29, 0x07, 0x9b, 0x49, 0x43, 0xba, 0x29, 0x37, - 0xa8, 0xb0, 0x26, 0x27, 0x6b, 0x7d, 0xde, 0x49, 0x12, 0x90, 0x05, 0xe2, - 0x2c, 0xd8, 0x08, 0xd0, 0x5d, 0x74, 0xa7, 0x15, 0xbe, 0x34, 0x34, 0x6d, - 0xad, 0xfb, 0xa8, 0x01, 0x4a, 0x6c, 0x98, 0xba, 0x84, 0x38, 0xbd, 0x05, - 0xe8, 0x87, 0x27, 0x91, 0x3f, 0xb8, 0xe9, 0x06, 0x27, 0xda, 0x56, 0x07, - 0xaa, 0xea, 0xf4, 0x80, 0x5c, 0x12, 0x44, 0xbe, 0x23, 0xb3, 0x63, 0x9f, - 0x5f, 0x37, 0xa7, 0x53, 0x4c, 0xfc, 0x4d, 0x87, 0xeb, 0x91, 0xe8, 0xd7, - 0x5a, 0xd6, 0xca, 0x67, 0x2d, 0x2f, 0x5a, 0x0e, 0xc7, 0x82, 0x78, 0xa4, - 0xf3, 0x56, 0x07, 0xa5, 0xab, 0x6d, 0x09, 0xd2, 0x0d, 0x08, 0x6b, 0x6e, - 0x1f, 0xc1, 0xf2, 0x91, 0x1a, 0x39, 0xfe, 0x14, 0x56, 0x3f, 0xeb, 0x9f, - 0x14, 0xc2, 0xb3, 0xb2, 0xc2, 0x8d, 0xc2, 0xee, 0x7e, 0xf0, 0x7d, 0x92, - 0xd2, 0xc3, 0x57, 0x3e, 0x2c, 0x07, 0x1b, 0x6a, 0x9b, 0x3b, 0x79, 0x59, - 0xc9, 0x22, 0x96, 0x6c, 0x3e, 0x37, 0xd3, 0x0e, 0x5c, 0xf6, 0x8f, 0xa9, - 0xaa, 0xc9, 0xa4, 0x4b, 0xaf, 0x5d, 0x1a, 0xb6, 0xf3, 0x91, 0x32, 0x4f, - 0xca, 0x72, 0xa0, 0x42, 0x01, 0x51, 0xaf, 0x19, 0x89, 0xc4, 0xcc, 0x9b, - 0xf3, 0x52, 0xe9, 0xa6, 0xf2, 0x71, 0x6f, 0x5a, 0x38, 0x02, 0xb8, 0x75, - 0x88, 0x5f, 0x8d, 0x12, 0xc5, 0x55, 0x4f, 0xd1, 0xba, 0xf2, 0x24, 0xdc, - 0x63, 0x5f, 0x93, 0xc7, 0xf3, 0xe7, 0x59, 0xac, 0xc3, 0xed, 0xbc, 0x02, - 0xe3, 0xad, 0xb2, 0x8e, 0x2c, 0x2d, 0x47, 0xb4, 0x34, 0x8d, 0xae, 0x44, - 0xc8, 0x5f, 0x14, 0xe8, 0x8e, 0x7b, 0xc3, 0x60, 0x53, 0x9a, 0x51, 0xea, - 0x7f, 0x2f, 0xb6, 0x62, 0x61, 0xf7, 0xc0, 0x18, 0x0f, 0x20, 0x79, 0x13, - 0x5c, 0xe8, 0xca, 0x04, 0x29, 0x5f, 0x70, 0x4d, 0x88, 0xa2, 0x43, 0x20, - 0x57, 0x33, 0x04, 0x74, 0x8e, 0x7c, 0x89, 0xd4, 0x56, 0x8f, 0x93, 0x86, - 0x81, 0x6c, 0x11, 0xfc, 0x32, 0x0e, 0xb0, 0x3e, 0xe5, 0x13, 0xbf, 0x76, - 0x62, 0xcc, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x0e, 0xf8, 0x8f, 0xde, 0xfd, 0xfd, 0xcf, 0xd1, 0x6f, 0x9f, 0xf2, 0xb6, - 0xb6, 0x59, 0xb2, 0x73, 0x1c, 0x3c, 0x0d, 0xb0, 0x4d, 0xb8, 0x96, 0xc6, - 0xeb, 0xe5, 0xf8, 0x0d, 0x3e, 0xd7, 0x0c, 0xbd, 0x9c, 0xaa, 0xd5, 0x1c, - 0x19, 0x9a, 0x4c, 0x8e, 0xfa, 0xac, 0x68, 0x74, 0x16, 0x06, 0xb5, 0x49, - 0xe7, 0xd5, 0x6f, 0x4f, 0xcc, 0xd9, 0x02, 0x74, 0xd6, 0x08, 0x73, 0x7c, - 0xa9, 0xfa, 0x3e, 0x50, 0x87, 0xf7, 0xfb, 0xa6, 0x94, 0xdc, 0xb1, 0x40, - 0xec, 0xa7, 0xa9, 0x39, 0xff, 0x40, 0x4a, 0x97, 0x9b, 0xcc, 0x57, 0x66, - 0x68, 0xd6, 0xa8, 0x4d, 0x13, 0x06, 0x0e, 0x03, 0xc4, 0xdf, 0x7a, 0xe4, - 0x2f, 0x0e, 0xd7, 0x54, 0xe0, 0xbd, 0x93, 0xeb, 0x82, 0xd8, 0x05, 0x2d, - 0xa2, 0xf0, 0x4e, 0xd0, 0xf9, 0x3e, 0x3e, 0x6b, 0x3d, 0x08, 0x39, 0x4e, - 0x35, 0x13, 0x7b, 0x3b, 0x39, 0x2c, 0x47, 0x2c, 0x61, 0x9f, 0xfd, 0x59, - 0x88, 0x5f, 0x65, 0x08, 0xa9, 0x66, 0xec, 0xb5, 0x21, 0xf3, 0xe9, 0xba, - 0x11, 0x63, 0x24, 0x6c, 0xf4, 0x50, 0x3a, 0xe5, 0x0c, 0x06, 0x39, 0x69, - 0x2f, 0xca, 0x0f, 0x48, 0xbe, 0x95, 0x7d, 0x13, 0x3d, 0xa5, 0x75, 0x69, - 0x85, 0xc8, 0xb3, 0x72, 0x72, 0x3c, 0x4f, 0x96, 0xe7, 0xb7, 0xbd, 0xe7, - 0x76, 0xba, 0xac, 0xc0, 0x07, 0x4d, 0xc1, 0xed, 0xb9, 0xf0, 0x91, 0x2e, - 0x36, 0xb7, 0x5b, 0x1c, 0xb7, 0xd6, 0xb3, 0x45, 0x7d, 0x0a, 0xf5, 0x43, - 0xdd, 0x7a, 0x8b, 0x4e, 0x18, 0xf2, 0xf3, 0x19, 0xcd, 0x4a, 0xda, 0x3c, - 0x1b, 0x05, 0x27, 0x67, 0x43, 0xa9, 0x8e, 0xe7, 0x4a, 0x95, 0xa9, 0xad, - 0x6c, 0x8c, 0xb2, 0x2e, 0x12, 0xcb, 0xf3, 0xeb, 0x65, 0x26, 0xf4, 0x3e, - 0x86, 0xee, 0x7e, 0xd9, 0xba, 0xce, 0x8d, 0x15, 0x3e, 0xa8, 0x40, 0x59, - 0x1d, 0x27, 0x78, 0x75, 0xf0, 0xf9, 0x33, 0xb5, 0x32, 0xa9, 0x66, 0xe6, - 0x2e, 0x2e, 0x3d, 0xf5, 0x4a, 0xf0, 0x97, 0x2d, 0xe7, 0x43, 0x85, 0x43, - 0x61, 0x25, 0x15, 0x13, 0x9e, 0x8e, 0xf6, 0x78, 0xe8, 0x67, 0xba, 0xc2, - 0x6d, 0xda, 0x46, 0x25, 0x76, 0xd9, 0x9b, 0x69, 0x95, 0x4b, 0x50, 0x8c, - 0xb7, 0x36, 0x49, 0xbc, 0xd7, 0x39, 0x69, 0xb9, 0xc1, 0x5f, 0x5f, 0xcc, - 0x83, 0x4c, 0x16, 0xb8, 0x0c, 0x85, 0xf1, 0xa4, 0x57, 0x6c, 0x22, 0x1f, - 0x60, 0x0c, 0xff, 0xb6, 0xc9, 0xf7, 0x21, 0x2d, 0x35, 0x78, 0x31, 0x79, - 0xd0, 0x6d, 0x61, 0xec, 0x61, 0x04, 0x75, 0x5c, 0x06, 0xc3, 0x53, 0x1b, - 0xb5, 0xdc, 0x23, 0xb9, 0xd9, 0x07, 0xd1, 0xd0, 0xb3, 0xa5, 0xab, 0xd9, - 0xbe, 0xb7, 0xdc, 0xae, 0x3f, 0x3e, 0xd7, 0x2a, 0x79, 0x3f, 0x9c, 0x27, - 0x81, 0x8d, 0x61, 0xe8, 0x46, 0x8f, 0x05, 0xf4, 0x9c, 0x30, 0x35, 0x9a, - 0x2f, 0x62, 0x84, 0x7c, 0xa5, 0x95, 0x68, 0x34, 0xe6, 0xf0, 0xb9, 0x42, - 0xd4, 0x37, 0xc6, 0xd2, 0x35, 0x1f, 0x7b, 0xe0, 0xa6, 0x92, 0xcf, 0xf7, - 0x0f, 0x08, 0x10, 0x79, 0xbd, 0xa8, 0x7c, 0x4e, 0xef, 0xf1, 0x01, 0x8d, - 0x1b, 0x0c, 0x98, 0x46, 0x28, 0xdc, 0xd5, 0xa8, 0xcf, 0x67, 0x7d, 0x87, - 0x2a, 0x8f, 0xdd, 0x52, 0x43, 0x5a, 0x55, 0x80, 0x88, 0xa6, 0xcd, 0x9c, - 0x5d, 0x36, 0xae, 0xef, 0x61, 0x43, 0xec, 0xf0, 0x7f, 0x92, 0x21, 0x1f, - 0xa2, 0xa3, 0x76, 0x0e, 0x5d, 0xf3, 0xa7, 0xe7, 0x7d, 0xb0, 0x2c, 0x94, - 0x36, 0x95, 0x34, 0x4e, 0x04, 0xfb, 0x51, 0xf9, 0xe6, 0x7e, 0x56, 0x7a, - 0x59, 0xce, 0x0a, 0x45, 0x7e, 0xeb, 0xc4, 0xbc, 0xfd, 0x20, 0xaa, 0x34, - 0x6b, 0xee, 0x3b, 0x09, 0xe8, 0x00, 0x4b, 0xfc, 0x68, 0x24, 0x43, 0xdb, - 0x09, 0x58, 0xd0, 0xb6, 0xbf, 0xaf, 0x1d, 0x7f, 0x8a, 0x4c, 0x9e, 0x51, - 0x97, 0x97, 0xe1, 0x0c, 0x0d, 0xaf, 0xd1, 0x1e, 0x62, 0xad, 0x70, 0xa5, - 0x8a, 0x24, 0x2f, 0x4a, 0xa6, 0x55, 0xb1, 0x44, 0x09, 0x88, 0xab, 0xa5, - 0x45, 0x28, 0xa0, 0x34, 0x9e, 0x14, 0x2c, 0xf9, 0x0f, 0xb8, 0x33, 0x8f, - 0xcc, 0xba, 0x50, 0x34, 0x4c, 0x96, 0x89, 0x09, 0xb9, 0xa8, 0xfb, 0xac, - 0x59, 0x73, 0xea, 0x61, 0xbc, 0x0d, 0x24, 0x3a, 0x20, 0xc2, 0x76, 0xfc, - 0x2e, 0xce, 0xfb, 0x75, 0x00, 0xca, 0x58, 0xbd, 0xab, 0x61, 0x9b, 0x13, - 0x2b, 0xa3, 0xf6, 0x15, 0x55, 0x83, 0x23, 0xc4, 0xf3, 0x4c, 0x89, 0xc5, - 0x4a, 0x18, 0x5c, 0x8d, 0x41, 0xcc, 0x06, 0x7b, 0xe3, 0x2a, 0x1f, 0x6a, - 0x57, 0xbc, 0x54, 0x61, 0x0c, 0xf2, 0xec, 0xbf, 0xb0, 0xf0, 0x21, 0xde, - 0xfc, 0xe4, 0xef, 0xce, 0x47, 0xc8, 0xdc, 0x11, 0xc7, 0x8a, 0x12, 0x97, - 0x68, 0x1d, 0x9e, 0x9a, 0xbf, 0xad, 0x62, 0x7e, 0x4b, 0x88, 0xd7, 0x20, - 0x22, 0xce, 0x5e, 0xe3, 0x87, 0x12, 0xa3, 0x05, 0xef, 0x1f, 0x05, 0xb1, - 0xbd, 0x1b, 0x80, 0x43, 0x84, 0x33, 0x8b, 0x87, 0xa5, 0xc2, 0xe1, 0x49, - 0xa8, 0x75, 0x49, 0x9b, 0x1b, 0x64, 0x8a, 0xd0, 0x86, 0x10, 0xa8, 0x72, - 0xeb, 0x2e, 0xe7, 0x3f, 0xaa, 0x6b, 0x4a, 0x22, 0xae, 0x17, 0x8f, 0x10, - 0x22, 0x03, 0x66, 0x67, 0x35, 0x40, 0x29, 0x1e, 0xf2, 0x05, 0x36, 0xd5, - 0xed, 0xe2, 0x2a, 0xcc, 0x77, 0xe2, 0x16, 0xef, 0xa7, 0x9b, 0xe1, 0x1b, - 0xba, 0xf3, 0xf5, 0x74, 0x6c, 0x2a, 0x98, 0x8a, 0x14, 0xaf, 0x2c, 0xab, - 0xfb, 0x51, 0x53, 0x75, 0x17, 0xcb, 0x5c, 0x86, 0xb5, 0x60, 0x70, 0x29, - 0x65, 0x69, 0x49, 0x42, 0x4f, 0x42, 0x6b, 0xc7, 0xdb, 0x98, 0x7d, 0x1e, - 0xf8, 0x45, 0xb2, 0x33, 0xd6, 0x34, 0x26, 0xa6, 0x7f, 0x76, 0x31, 0x13, - 0x13, 0x9d, 0xd2, 0xb0, 0x30, 0x0b, 0x0b, 0x3e, 0x1a, 0x84, 0xb0, 0xbd, - 0x81, 0x34, 0x25, 0x73, 0x99, 0x87, 0x1a, 0xc8, 0x44, 0x34, 0x9d, 0x1a, - 0x3d, 0x76, 0x44, 0x1d, 0xe2, 0x22, 0xad, 0x3d, 0xb2, 0xa3, 0x1c, 0xd5, - 0x27, 0x8c, 0xc6, 0x84, 0xdf, 0x33, 0xbe, 0xb2, 0xa7, 0xb9, 0xc5, 0x6e, - 0x48, 0xdc, 0xe9, 0xf8, 0xef, 0xfc, 0xaa, 0x1f, 0x5e, 0x41, 0x48, 0x1e, - 0xe0, 0xb9, 0xd6, 0x6e, 0x7a, 0x9c, 0xa3, 0x98, 0x4b, 0xfa, 0x90, 0xa4, - 0x58, 0x33, 0x85, 0x3b, 0x11, 0x44, 0x83, 0x4b, 0x1e, 0x0e, 0x5d, 0x11, - 0x36, 0x15, 0xe1, 0xbf, 0x15, 0x04, 0x8e, 0x88, 0xc6, 0x18, 0x53, 0xc3, - 0x8d, 0x28, 0x86, 0x25, 0xef, 0x55, 0x7b, 0xf6, 0x85, 0xf8, 0xed, 0x3b, - 0xcf, 0x5d, 0xa6, 0xc7, 0x66, 0xb7, 0xbe, 0x14, 0xf0, 0x62, 0x89, 0x1f, - 0x32, 0x1e, 0x86, 0x2a, 0x93, 0xd5, 0xca, 0x37, 0x03, 0x0b, 0xf8, 0x0f, - 0xca, 0x50, 0x6c, 0x16, 0x2b, 0xf0, 0x77, 0xca, 0xbb, 0x8e, 0x95, 0x11, - 0xef, 0x5b, 0xbe, 0x2f, 0x62, 0x50, 0xb8, 0x3d, 0xff, 0xfa, 0x30, 0x21, - 0xb2, 0x86, 0x3f, 0x50, 0x57, 0x98, 0x79, 0x15, 0xce, 0x3e, 0xbf, 0x49, - 0x58, 0xb0, 0xb5, 0xd7, 0xbe, 0x01, 0x55, 0xee, 0x60, 0x14, 0x9d, 0x5b, - 0x57, 0x48, 0x05, 0x72, 0x6a, 0x23, 0x29, 0xeb, 0xf3, 0x36, 0x2a, 0xc1, - 0xda, 0x5e, 0x4a, 0x63, 0xc4, 0x6b, 0x04, 0xe8, 0xe8, 0xc1, 0xb5, 0xc4, - 0x2d, 0x60, 0x1f, 0xa0, 0x2b, 0x33, 0xa5, 0xb7, 0x82, 0x59, 0x21, 0xba, - 0x13, 0xda, 0x79, 0xda, 0x5a, 0xb1, 0x82, 0x5b, 0x52, 0x7f, 0x0c, 0x70, - 0x75, 0x65, 0xe0, 0x44, 0xb3, 0xca, 0xd0, 0x09, 0x38, 0x24, 0x83, 0x8e, - 0x0c, 0x4c, 0xef, 0x96, 0xe4, 0x04, 0x30, 0x46, 0x23, 0x6a, 0x28, 0x13, - 0x1d, 0x37, 0x14, 0x75, 0x6e, 0xd0, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x21, 0xa2, 0xf0, 0x7d, 0x29, 0x8f, 0x62, 0x2e, - 0xf4, 0x0e, 0x14, 0x9b, 0x60, 0x38, 0xc0, 0x95, 0xfb, 0x3c, 0x90, 0x5a, - 0xa0, 0x1f, 0x30, 0x09, 0xfc, 0x6d, 0xa9, 0xd1, 0x7b, 0x0b, 0x7c, 0x78, - 0xf9, 0xf6, 0xa8, 0x5e, 0xa6, 0x7a, 0xf6, 0x1c, 0xab, 0x1b, 0x0e, 0xa9, - 0x08, 0xfd, 0xd9, 0x97, 0x08, 0x24, 0x2b, 0xda, 0x08, 0x8b, 0x0c, 0x07, - 0x70, 0x15, 0xa8, 0x0c, 0x86, 0xfc, 0xd1, 0x84, 0xba, 0xd0, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x35, 0x7a, 0xab, 0xaa, - 0xbe, 0xd7, 0xad, 0x22, 0x99, 0x46, 0xbb, 0x78, 0xfd, 0x47, 0x8f, 0x2a, - 0x4a, 0xa6, 0x2f, 0x8d, 0x15, 0x07, 0xed, 0x26, 0x1d, 0xb3, 0x12, 0xd3, - 0x88, 0x0f, 0xf1, 0x75, 0x2a, 0x07, 0x62, 0xac, 0xbf, 0x52, 0x4a, 0xc3, - 0x12, 0xe5, 0x3c, 0xea, 0xa6, 0x1e, 0x57, 0x90, 0x56, 0x60, 0x7d, 0xcf, - 0x4b, 0x65, 0xaf, 0xee, 0x17, 0x56, 0xbe, 0xd2, 0x38, 0x3f, 0xd6, 0xbc, - 0xef, 0xa7, 0x32, 0xb7, 0x10, 0xe9, 0xbd, 0x97, 0x45, 0x92, 0x3c, 0xd3, - 0x35, 0x2e, 0x59, 0x37, 0x65, 0x5c, 0x7f, 0xd0, 0x99, 0x9c, 0x01, 0xe9, - 0x1f, 0x65, 0xe9, 0xec, 0x0f, 0x2d, 0x46, 0xbc, 0xd4, 0x8f, 0x51, 0x1c, - 0xa0, 0xa4, 0x9b, 0x4f, 0x95, 0x54, 0xb0, 0x50, 0x74, 0xfa, 0x0f, 0xe6, - 0x55, 0x81, 0xce, 0x0f, 0xd1, 0x25, 0x56, 0xc8, 0x2f, 0x3a, 0x65, 0xd4, - 0x86, 0x4a, 0x8e, 0xff, 0x5a, 0xcc, 0x67, 0x96, 0xcc, 0x65, 0x0d, 0x20, - 0xee, 0xba, 0x6b, 0xcb, 0xde, 0x10, 0x2f, 0xbf, 0x67, 0x6d, 0xbe, 0xef, - 0x72, 0xfc, 0x25, 0x62, 0xbf, 0xbb, 0xc5, 0xe0, 0x7b, 0x4c, 0x32, 0xc5, - 0xdb, 0x9f, 0xb5, 0xe2, 0x75, 0x8a, 0xba, 0xbb, 0x69, 0x28, 0xb6, 0x41, - 0x25, 0x83, 0x67, 0x35, 0x1b, 0xd7, 0xb3, 0xd7, 0x58, 0x54, 0x8a, 0x0b, - 0x7c, 0xf3, 0x05, 0xcf, 0x2c, 0x78, 0x70, 0xc6, 0xed, 0x7e, 0x56, 0xb6, - 0x4e, 0x48, 0xaa, 0x57, 0xc4, 0xb0, 0xb2, 0xa0, 0xca, 0x50, 0xe1, 0xc7, - 0x41, 0xea, 0xac, 0x5f, 0x18, 0x13, 0xe5, 0x85, 0x78, 0x3f, 0x05, 0xf3, - 0xfd, 0x74, 0x7a, 0x42, 0x61, 0x91, 0x19, 0xc6, 0x19, 0xe9, 0xd2, 0x78, - 0x2c, 0xb1, 0xa3, 0x7f, 0x62, 0xea, 0x2a, 0x35, 0x1c, 0x55, 0xa3, 0xf7, - 0xdc, 0xec, 0x48, 0x23, 0x99, 0x8d, 0xe1, 0x4d, 0x45, 0xad, 0x92, 0xc6, - 0xf4, 0xa2, 0xe5, 0xe6, 0x58, 0xe4, 0xd5, 0x37, 0xd0, 0x47, 0x0b, 0x64, - 0x68, 0x48, 0x7e, 0xeb, 0xbe, 0x5e, 0x74, 0xd1, 0xc4, 0xa5, 0x60, 0xd0, - 0x30, 0x62, 0xbc, 0x81, 0xc4, 0x01, 0x68, 0x18, 0xf3, 0xac, 0x9d, 0xb1, - 0x4d, 0xdd, 0x8b, 0xd2, 0x54, 0x5d, 0xd1, 0x1c, 0xee, 0x75, 0x9e, 0x99, - 0x42, 0x69, 0x38, 0xcc, 0x66, 0x24, 0xd9, 0x8f, 0x70, 0x98, 0xc3, 0x5e, - 0x08, 0xf0, 0xd8, 0x2d, 0xe6, 0x52, 0x48, 0xdf, 0xd0, 0x03, 0x04, 0x92, - 0xab, 0xa1, 0xa1, 0x2f, 0x7d, 0x84, 0xb2, 0x82, 0x51, 0x56, 0x74, 0x4a, - 0x94, 0xff, 0xd2, 0xe4, 0x4e, 0x1a, 0xbd, 0x18, 0xab, 0x33, 0x68, 0x0e, - 0x4f, 0x99, 0x1d, 0x7e, 0x02, 0x3f, 0x1f, 0x50, 0x05, 0xf8, 0x59, 0x47, - 0x97, 0x98, 0x60, 0xb1, 0x30, 0xb1, 0x14, 0xac, 0x2c, 0x0a, 0xa8, 0x97, - 0x83, 0xf5, 0x5a, 0x5c, 0x87, 0xe5, 0x36, 0x26, 0xec, 0xb4, 0x94, 0x46, - 0x9a, 0xad, 0x2b, 0x9a, 0xb7, 0xac, 0xc4, 0x1a, 0x55, 0x53, 0xc0, 0x16, - 0x91, 0x1c, 0xd6, 0xaa, 0x6b, 0xdd, 0x85, 0x6a, 0x54, 0xec, 0x7c, 0xa1, - 0xd5, 0x18, 0x00, 0x74, 0xd2, 0xf1, 0x7e, 0xad, 0x7c, 0xa8, 0x85, 0x9b, - 0xc0, 0x9f, 0x4f, 0x3b, 0xd9, 0x08, 0xc8, 0x9d, 0x31, 0x22, 0x7a, 0x53, - 0xa8, 0xbd, 0x00, 0xdf, 0xe8, 0x39, 0x52, 0xe9, 0x14, 0x74, 0x7b, 0x53, - 0xf9, 0xbd, 0x29, 0x8e, 0x5d, 0xf2, 0x35, 0x3b, 0xe3, 0x48, 0xbf, 0xa0, - 0xc4, 0x3d, 0x40, 0xb4, 0xf2, 0x7c, 0xd0, 0xe3, 0x17, 0x11, 0x5b, 0xd6, - 0x55, 0xd2, 0x54, 0xcf, 0x20, 0x8d, 0x74, 0x4a, 0x6b, 0xe9, 0x5d, 0xfe, - 0x72, 0x14, 0x6a, 0x11, 0x8b, 0x14, 0x19, 0xba, 0x63, 0xe4, 0x6b, 0x39, - 0xb4, 0x90, 0x67, 0x79, 0x56, 0x31, 0xd3, 0xb5, 0xeb, 0x9e, 0x95, 0x4b, - 0x1e, 0x04, 0x20, 0xd8, 0xbe, 0xe8, 0x1c, 0xd7, 0x95, 0xcb, 0x57, 0x60, - 0xe6, 0x11, 0x35, 0x42, 0x90, 0xfd, 0xb2, 0xe4, 0x9b, 0x24, 0x70, 0xc0, - 0xc3, 0xa9, 0x8a, 0xc9, 0x46, 0xd0, 0xea, 0xc9, 0x93, 0x7d, 0x9f, 0x64, - 0x12, 0x54, 0x09, 0xb7, 0xc2, 0x4d, 0x6e, 0xcc, 0x60, 0x07, 0x36, 0x31, - 0x64, 0x3d, 0x1e, 0xd3, 0x86, 0x47, 0x47, 0x42, 0x76, 0xb6, 0xf0, 0xe5, - 0xb4, 0xe7, 0xbe, 0x47, 0x91, 0x78, 0xbe, 0x06, 0xf1, 0x6e, 0x58, 0xce, - 0x32, 0x13, 0x26, 0x34, 0x92, 0xae, 0xb2, 0x29, 0xd0, 0x30, 0x55, 0xfd, - 0x89, 0x6a, 0xbf, 0x3e, 0xdf, 0x11, 0x39, 0xe4, 0xfd, 0x56, 0xd7, 0x2f, - 0x89, 0x96, 0x08, 0x54, 0xaa, 0xab, 0x8b, 0xfa, 0x65, 0xe5, 0x64, 0xff, - 0x24, 0x25, 0x8f, 0x7d, 0xf6, 0xb1, 0x7f, 0x2f, 0xa6, 0xf6, 0x46, 0xab, - 0x61, 0xfd, 0x47, 0xad, 0x6d, 0x38, 0x6d, 0xc1, 0xe9, 0x4a, 0xf1, 0x85, - 0x05, 0x0e, 0x69, 0x48, 0x7c, 0xa6, 0x76, 0x61, 0xe3, 0x94, 0xf2, 0xd6, - 0x7a, 0x9c, 0x79, 0xc0, 0x2a, 0x51, 0x23, 0xc6, 0xaf, 0x29, 0x04, 0x0f, - 0x47, 0xc2, 0x93, 0xd7, 0x64, 0xe5, 0x37, 0x2e, 0x53, 0x3b, 0xb7, 0x7c, - 0x9c, 0xb4, 0x63, 0x13, 0xc7, 0x56, 0x90, 0xe9, 0x53, 0xd5, 0x86, 0x2b, - 0x96, 0x41, 0x42, 0x56, 0xc5, 0x16, 0xd7, 0x9e, 0x30, 0xce, 0xa1, 0x0d, - 0x93, 0x5d, 0x11, 0x07, 0xb2, 0x95, 0xfd, 0xf6, 0x0b, 0x28, 0x95, 0x1a, - 0x8f, 0xfa, 0xe1, 0x57, 0x7e, 0x06, 0xff, 0x18, 0xaf, 0xe3, 0x4f, 0x3c, - 0x34, 0x5b, 0xd4, 0x46, 0x1a, 0xd1, 0xd1, 0x7e, 0x55, 0xba, 0x5d, 0x2a, - 0x1f, 0x42, 0x49, 0x95, 0x75, 0x5f, 0x80, 0x60, 0x02, 0x01, 0xdb, 0x36, - 0xad, 0x68, 0x69, 0x1e, 0x0b, 0x90, 0x3f, 0xa6, 0xb6, 0x2f, 0x66, 0xa6, - 0x7d, 0x81, 0x8c, 0xa0, 0xee, 0x05, 0x95, 0xbc, 0xb3, 0x7c, 0x18, 0xd4, - 0x1b, 0x40, 0x96, 0xf5, 0x05, 0x9d, 0x27, 0x3b, 0x78, 0xfc, 0x19, 0x18, - 0xc0, 0x61, 0xa0, 0xd6, 0xf9, 0xc0, 0x3f, 0xe5, 0x48, 0x35, 0x0f, 0x8b, - 0x0d, 0xfb, 0x31, 0xb7, 0x32, 0x40, 0x1d, 0x69, 0x12, 0x5a, 0x23, 0xf0, - 0xce, 0xe9, 0x5e, 0xa6, 0x68, 0x6b, 0xe1, 0xe2, 0x68, 0x07, 0x02, 0x0d, - 0x7a, 0xc2, 0x0a, 0x40, 0x10, 0x5e, 0x94, 0xba, 0x77, 0x1d, 0xf7, 0xac, - 0xec, 0x79, 0xa9, 0xa1, 0x8a, 0xb8, 0x49, 0x32, 0x08, 0xe0, 0x18, 0xa8, - 0x3d, 0x69, 0x41, 0x5d, 0x30, 0x3b, 0xb6, 0x91, 0x46, 0x8d, 0x81, 0x10, - 0xb0, 0xc2, 0xed, 0xa0, 0x4e, 0x59, 0x48, 0xd8, 0x64, 0x7d, 0x2d, 0x46, - 0xf2, 0x8a, 0x2e, 0x5d, 0x0c, 0x4d, 0x9f, 0xfe, 0x7b, 0x5e, 0xbf, 0x1a, - 0x78, 0xdf, 0xfc, 0x0f, 0x04, 0x37, 0x72, 0x1a, 0x09, 0xb8, 0x6e, 0x1b, - 0xf1, 0x18, 0x7d, 0x83, 0x44, 0xaa, 0x9b, 0x71, 0xe1, 0x03, 0x04, 0x83, - 0xe5, 0xaa, 0xc0, 0xd4, 0xa7, 0x80, 0x10, 0x35, 0x09, 0xae, 0xf7, 0xe1, - 0x5e, 0x7c, 0x31, 0x20, 0x43, 0x82, 0xda, 0x07, 0x39, 0xfe, 0x8f, 0x9d, - 0x70, 0x3c, 0x57, 0x43, 0x01, 0x51, 0x37, 0x2e, 0x97, 0xef, 0xcf, 0x05, - 0x44, 0x75, 0x69, 0xf7, 0xdb, 0xda, 0x80, 0x78, 0x0c, 0xcc, 0xc1, 0x49, - 0xac, 0x3b, 0x7e, 0x27, 0x6a, 0xbb, 0xdf, 0x45, 0x5b, 0x3b, 0x29, 0xf6, - 0x1b, 0xa9, 0x25, 0xf9, 0x2f, 0xcf, 0x37, 0x71, 0x33, 0xb4, 0x90, 0xd7, - 0x9b, 0x87, 0x41, 0x15, 0xd1, 0xa6, 0x39, 0xa7, 0xa9, 0xcd, 0x66, 0x29, - 0x59, 0xb4, 0x53, 0x12, 0xa1, 0x20, 0xd5, 0x04, 0xca, 0x40, 0x31, 0xfa, - 0x6f, 0xbb, 0x92, 0x04, 0xf3, 0xc2, 0x10, 0x0d, 0xc1, 0x19, 0x78, 0x8c, - 0x82, 0xed, 0x92, 0x3a, 0x6b, 0xd1, 0x3d, 0xe8, 0xac, 0x55, 0xe4, 0x8c, - 0xc6, 0xd4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, - 0xc2, 0x1d, 0x86, 0xe4, 0xf6, 0xa1, 0xbe, 0xf5, 0xf3, 0x36, 0x9d, 0x32, - 0x80, 0x17, 0x3b, 0x1f, 0x18, 0x21, 0xed, 0xa7, 0xf5, 0xaf, 0xf1, 0x94, - 0xe2, 0xa7, 0x08, 0xd5, 0xca, 0x18, 0x45, 0xf5, 0x68, 0x94, 0x82, 0x61, - 0xf7, 0xb7, 0xb2, 0xfa, 0xd4, 0x5e, 0x32, 0xd0, 0xf0, 0x20, 0x66, 0x83, - 0xd1, 0x6b, 0x3c, 0xdf, 0x73, 0xeb, 0x73, 0x82, 0x09, 0x9b, 0xd0, 0xc5, - 0xb0, 0x9f, 0x01, 0x77, 0x85, 0xcc, 0x6e, 0x23, 0xb7, 0x00, 0x45, 0xe0, - 0xa6, 0x01, 0x29, 0x1d, 0x8b, 0xc4, 0xe0, 0xc2, 0xe0, 0x4f, 0x3b, 0x07, - 0xd5, 0xac, 0x6b, 0x88, 0xb8, 0xa4, 0xe2, 0x5c, 0x19, 0xe9, 0x98, 0x72, - 0xa5, 0x6b, 0xf5, 0xa4, 0xf7, 0x15, 0xaf, 0xfb, 0xb4, 0x80, 0x9a, 0xe3, - 0xa5, 0x35, 0x2f, 0x45, 0x81, 0xf1, 0x8b, 0x2d, 0x26, 0x5c, 0x65, 0xa9, - 0x5b, 0x6e, 0x83, 0xc3, 0x62, 0x2f, 0x84, 0xef, 0x11, 0xa5, 0x58, 0x48, - 0xe9, 0x67, 0x7e, 0xd3, 0x0b, 0x5d, 0x51, 0x80, 0x39, 0x08, 0x8e, 0xc1, - 0x0d, 0x04, 0x11, 0x5f, 0x72, 0x64, 0x1f, 0x83, 0xf8, 0xd3, 0x09, 0x38, - 0xb6, 0x7f, 0x50, 0x78, 0x27, 0x20, 0xe5, 0xbd, 0x16, 0xbf, 0x51, 0xd8, - 0x4f, 0x67, 0x60, 0xf6, 0x9e, 0xff, 0x08, 0xfe, 0xc6, 0x96, 0xd6, 0x64, - 0x94, 0x28, 0xc6, 0x9a, 0x09, 0x1a, 0x34, 0x08, 0x31, 0x4b, 0x0b, 0x97, - 0x5a, 0x18, 0x72, 0x49, 0xe9, 0x1d, 0xbb, 0x9c, 0xed, 0x7e, 0xb5, 0xc5, - 0xa7, 0xf4, 0x25, 0x7a, 0x26, 0xe9, 0x15, 0x61, 0x85, 0x32, 0xc9, 0xb3, - 0xcf, 0x95, 0xbf, 0x35, 0x10, 0x2d, 0x71, 0xfe, 0x03, 0xd6, 0x69, 0x75, - 0x8d, 0xb7, 0x16, 0xa7, 0x3d, 0x0e, 0xb7, 0x55, 0x6d, 0xa7, 0x9f, 0x10, - 0x7e, 0x7e, 0xff, 0x39, 0xee, 0x8e, 0xa7, 0x81, 0x7d, 0x11, 0xea, 0xa9, - 0xd6, 0xed, 0x54, 0xf8, 0xd2, 0xd5, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0xf9, 0xde, 0x41, 0xe7, 0xa6, 0x88, 0x53, 0x76, - 0x5a, 0x26, 0xc3, 0x5c, 0xf2, 0x58, 0x68, 0x9c, 0xc7, 0x4e, 0x53, 0x18, - 0x53, 0x67, 0x39, 0x23, 0x96, 0xb0, 0xef, 0x58, 0x29, 0xe1, 0x68, 0xd8, - 0xce, 0xc0, 0x41, 0xc2, 0x35, 0x5f, 0x74, 0xfa, 0xdf, 0xc7, 0x0f, 0x80, - 0x50, 0xd1, 0xf6, 0x5a, 0x3a, 0x81, 0xe0, 0xd9, 0x9b, 0x47, 0x96, 0xcd, - 0xc5, 0x0f, 0x91, 0x12, 0x81, 0x77, 0x1e, 0xef, 0x2e, 0xba, 0x16, 0x51, - 0x70, 0x78, 0xdc, 0xa3, 0x84, 0x12, 0x7c, 0x9e, 0x21, 0x7d, 0xa3, 0x5f, - 0xce, 0xa1, 0x25, 0x84, 0x99, 0xa4, 0x2d, 0xa6, 0x0f, 0x95, 0xef, 0xef, - 0x31, 0xe6, 0xf2, 0x18, 0x08, 0x47, 0xd2, 0x5a, 0x39, 0x01, 0x7a, 0xca, - 0xd3, 0x03, 0xb1, 0xc2, 0x48, 0xf4, 0x1f, 0x6d, 0xc2, 0x8c, 0x5c, 0xda, - 0xf5, 0x10, 0xed, 0xfc, 0x2e, 0x0c, 0xb3, 0x52, 0xaa, 0xa9, 0xed, 0xbc, - 0x41, 0xcc, 0xd4, 0x4b, 0x1c, 0xd0, 0xa3, 0x1d, 0xf4, 0xe7, 0x48, 0x34, - 0x4e, 0xcf, 0x3b, 0xb3, 0x71, 0x06, 0xbe, 0x0c, 0x35, 0xbb, 0xb4, 0x17, - 0xd8, 0x8b, 0xba, 0xdd, 0x32, 0x30, 0x51, 0xb1, 0xb1, 0xd6, 0x3a, 0xdc, - 0x3b, 0x25, 0x9a, 0x57, 0xc7, 0x4d, 0xd3, 0x75, 0x93, 0x59, 0x3e, 0x9b, - 0x10, 0xcf, 0xdb, 0x38, 0x75, 0x51, 0xb2, 0x2a, 0x48, 0x78, 0xfc, 0xaa, - 0xe3, 0x91, 0xe7, 0x93, 0xe7, 0x0a, 0x07, 0x2c, 0xf8, 0x88, 0x93, 0xde, - 0x2f, 0xba, 0x7b, 0x72, 0xcd, 0x92, 0xdd, 0xb1, 0xac, 0x1e, 0xe4, 0xe3, - 0x5d, 0xa4, 0x7f, 0x86, 0xa7, 0xcb, 0xb5, 0x81, 0x86, 0xf1, 0xf5, 0xad, - 0xd6, 0x36, 0x08, 0x09, 0x9f, 0x75, 0x6f, 0x4a, 0x5b, 0x30, 0xf8, 0xaf, - 0xd2, 0xbc, 0xb5, 0xbe, 0xf2, 0xeb, 0x9b, 0xbc, 0x11, 0xd4, 0x0c, 0x14, - 0xa6, 0x6f, 0x43, 0xd3, 0xc9, 0x4e, 0xca, 0x9b, 0x4e, 0x46, 0x60, 0x4c, - 0x63, 0xcc, 0x07, 0x36, 0x8c, 0xf2, 0xd1, 0x93, 0x7a, 0x51, 0x49, 0x15, - 0xbf, 0xbf, 0x9e, 0x82, 0x21, 0x06, 0xa0, 0x39, 0x11, 0x1d, 0x6c, 0x41, - 0x72, 0xcd, 0x2a, 0x8a, 0x4a, 0xd0, 0x13, 0x6c, 0x56, 0xf4, 0x00, 0x48, - 0xaf, 0xab, 0xdf, 0xa9, 0xe9, 0xa6, 0xaa, 0x06, 0x61, 0x79, 0xc4, 0x57, - 0x42, 0xca, 0x12, 0x18, 0xcf, 0x81, 0xec, 0x79, 0x19, 0xd2, 0xd2, 0xe3, - 0x1d, 0xc6, 0x6c, 0xd0, 0xd6, 0x0a, 0xfb, 0x70, 0x42, 0x28, 0x25, 0x23, - 0xb6, 0x23, 0x15, 0x28, 0x5e, 0x9f, 0x49, 0xf2, 0x7b, 0x69, 0x74, 0xa5, - 0xb9, 0x26, 0x81, 0xfe, 0x39, 0x3e, 0x3f, 0xc8, 0x7e, 0x9e, 0x5e, 0x8e, - 0xf2, 0xdb, 0x6b, 0xfd, 0xe1, 0xc3, 0x01, 0x4a, 0xba, 0x8f, 0x33, 0x71, - 0x09, 0x80, 0x5d, 0x9c, 0x58, 0x64, 0xb7, 0x90, 0x13, 0x2a, 0xe9, 0x1d, - 0x07, 0x2c, 0x06, 0x70, 0x43, 0x0d, 0xb6, 0x57, 0x02, 0x3c, 0xbe, 0x3c, - 0x42, 0xab, 0x77, 0x15, 0x0e, 0x98, 0xfb, 0xf2, 0x1d, 0x14, 0xd9, 0xb8, - 0xd1, 0x59, 0x2a, 0x67, 0x6f, 0xfc, 0x59, 0x39, 0x33, 0xe0, 0x49, 0x0b, - 0x4e, 0x65, 0x81, 0x9f, 0x71, 0xf2, 0xa5, 0x90, 0x4f, 0x24, 0xc7, 0x05, - 0xfb, 0x77, 0x1e, 0x14, 0xca, 0x2f, 0xfc, 0xac, 0xec, 0xbf, 0xa2, 0x69, - 0x15, 0x0a, 0x6b, 0xa9, 0xa0, 0x74, 0xee, 0xad, 0xa9, 0x50, 0x4d, 0x4d, - 0xab, 0x6e, 0xc1, 0xb3, 0xda, 0xbb, 0xbd, 0xab, 0x00, 0x05, 0x14, 0xc1, - 0xc4, 0x53, 0x7b, 0x78, 0x97, 0x68, 0x3c, 0x05, 0xf2, 0xed, 0x87, 0xca, - 0x86, 0xd1, 0xdf, 0xda, 0xb3, 0x2f, 0x17, 0x87, 0x87, 0x2f, 0xd8, 0xe9, - 0xb2, 0x96, 0xdc, 0x7f, 0x22, 0xf1, 0x2a, 0x9f, 0xfe, 0x54, 0x55, 0xa1, - 0x96, 0xab, 0x9f, 0x61, 0x74, 0xcd, 0x4d, 0x77, 0x38, 0x02, 0x23, 0x29, - 0x28, 0x5b, 0xfc, 0x86, 0x17, 0x40, 0xd4, 0x42, 0x2a, 0x9b, 0x84, 0xf7, - 0x67, 0x2b, 0x3a, 0xc1, 0x31, 0x89, 0x4b, 0x67, 0xd1, 0x7d, 0x6b, 0x36, - 0xec, 0x69, 0x6b, 0x24, 0xca, 0xd6, 0x2d, 0xbb, 0x21, 0xc8, 0x0c, 0x53, - 0x41, 0x29, 0x0b, 0xc1, 0xfe, 0xd5, 0xa3, 0x4c, 0x66, 0x2f, 0xc7, 0xf1, - 0xa8, 0xc0, 0x3d, 0x9a, 0xb9, 0x09, 0x50, 0x3f, 0x09, 0x87, 0xa4, 0x3f, - 0x7a, 0x33, 0xef, 0xf0, 0xfb, 0x77, 0x02, 0x7d, 0x92, 0xaf, 0x73, 0xaa, - 0xcc, 0x3f, 0x66, 0x56, 0xd0, 0x21, 0xd1, 0xe8, 0x0e, 0x47, 0x03, 0x5e, - 0x3b, 0xe9, 0xa2, 0xe3, 0x83, 0x0b, 0x73, 0xd3, 0xaa, 0x94, 0x80, 0xef, - 0x7c, 0xdf, 0xde, 0x86, 0xc3, 0xa9, 0x62, 0x34, 0x76, 0xee, 0x4d, 0x15, - 0x73, 0x7b, 0xd7, 0x6d, 0xd4, 0x21, 0x05, 0xd4, 0xcf, 0xf3, 0x54, 0xdc, - 0x49, 0x5f, 0x5a, 0x2a, 0x37, 0x19, 0x89, 0x61, 0x1d, 0x95, 0x17, 0x8b, - 0x09, 0x95, 0x5d, 0x9f, 0xde, 0x86, 0x03, 0x93, 0x76, 0xec, 0x54, 0xec, - 0x13, 0xc3, 0xf9, 0x38, 0x8f, 0xa9, 0x11, 0xf0, 0x9a, 0x0e, 0x5e, 0x38, - 0x69, 0xeb, 0x62, 0x41, 0x9e, 0xd0, 0x1b, 0x59, 0x8c, 0xfd, 0x16, 0xfa, - 0xd8, 0x99, 0x0d, 0x83, 0x7e, 0xba, 0x5b, 0xc6, 0x59, 0xe1, 0xae, 0xba, - 0xb9, 0xb8, 0xba, 0xa5, 0x4d, 0x20, 0x00, 0xc9, 0x0c, 0xe1, 0x77, 0xdf, - 0xc4, 0x95, 0xca, 0x7c, 0xa5, 0xef, 0x0a, 0xed, 0x9b, 0x31, 0x06, 0xe1, - 0xc9, 0xa3, 0x88, 0x0a, 0xcc, 0x3d, 0xc8, 0xb6, 0x01, 0xe2, 0xa9, 0x29, - 0x03, 0x8a, 0x28, 0xf8, 0x0d, 0x70, 0x77, 0xb9, 0xe1, 0x1b, 0x06, 0x19, - 0x86, 0xc1, 0xd3, 0xcf, 0x6b, 0x9c, 0x09, 0x70, 0x50, 0xed, 0xb5, 0xf6, - 0x69, 0xcc, 0xac, 0x30, 0x6a, 0x1f, 0x1d, 0xe6, 0x75, 0x33, 0xab, 0x55, - 0x48, 0xfa, 0x81, 0xb8, 0x06, 0x3a, 0x78, 0xee, 0xde, 0xef, 0xe2, 0x17, - 0xc4, 0x3e, 0xe5, 0x22, 0xa7, 0xd1, 0x45, 0x5b, 0x57, 0xb0, 0xde, 0x69, - 0x30, 0xd1, 0x9a, 0xd7, 0x6b, 0x0e, 0x7a, 0x30, 0x0d, 0xb5, 0xec, 0x60, - 0xa7, 0x05, 0x87, 0x42, 0x4b, 0x92, 0x1f, 0x68, 0x8e, 0x1a, 0x90, 0x84, - 0x27, 0x2a, 0xc0, 0xd2, 0xff, 0xbc, 0x8e, 0x34, 0x53, 0x9d, 0x04, 0x50, - 0xcb, 0x79, 0xd9, 0x55, 0xd5, 0x4d, 0x3c, 0xe2, 0xb4, 0x9b, 0x57, 0x07, - 0x1f, 0xce, 0xd0, 0xa7, 0x84, 0xe1, 0xb7, 0x3a, 0xaf, 0xc5, 0x67, 0x64, - 0xbc, 0x02, 0xbe, 0xb0, 0x65, 0x7e, 0xb0, 0x4c, 0xc2, 0x2d, 0xcd, 0xf8, - 0x60, 0xcb, 0xfe, 0xd1, 0x8d, 0x14, 0x5a, 0xd3, 0x38, 0xd4, 0x71, 0x5a, - 0xca, 0xbb, 0xfe, 0x0e, 0x54, 0xf9, 0xb4, 0x25, 0xa5, 0x71, 0x13, 0x95, - 0x14, 0xdc, 0x86, 0xb8, 0x21, 0xa7, 0x2e, 0x13, 0xc6, 0x2f, 0xce, 0xe7, - 0x6c, 0xb8, 0x0d, 0xc9, 0xe4, 0xc4, 0x64, 0x12, 0x78, 0x1c, 0x95, 0x92, - 0xc2, 0xec, 0xaa, 0xd3, 0xc3, 0x3a, 0xd2, 0xe8, 0x95, 0xf0, 0x6b, 0x03, - 0x8c, 0xcf, 0x6b, 0xdb, 0x21, 0xa0, 0xcf, 0xf4, 0x05, 0xc8, 0xe7, 0x77, - 0x05, 0x55, 0x7b, 0x6b, 0xfa, 0x96, 0xf1, 0x7c, 0x30, 0x62, 0x75, 0xbe, - 0x6e, 0xea, 0xba, 0x9f, 0x40, 0x2e, 0x9a, 0x86, 0x93, 0xcc, 0x38, 0xf7, - 0xee, 0xd8, 0xbb, 0x24, 0xcd, 0x85, 0x3e, 0x85, 0x16, 0x8c, 0x33, 0x23, - 0x73, 0xe6, 0x43, 0xc4, 0x67, 0xbf, 0xef, 0x85, 0xb1, 0x44, 0xf9, 0x55, - 0x93, 0x4d, 0x0b, 0x8e, 0xc1, 0x42, 0x13, 0xc6, 0xc8, 0x09, 0x63, 0xab, - 0xb3, 0xc7, 0xc4, 0xa4, 0x8b, 0x72, 0xfb, 0xa5, 0x99, 0xa1, 0x5d, 0x07, - 0x02, 0x82, 0x56, 0x11, 0x3c, 0xc2, 0x5a, 0x55, 0xf9, 0x3a, 0x93, 0x61, - 0x89, 0x46, 0xb7, 0x6a, 0x42, 0x76, 0x1e, 0x70, 0xde, 0xd9, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x32, 0xc1, 0x61, 0xaa, - 0xdb, 0xe9, 0xae, 0x88, 0xcb, 0xf7, 0x28, 0xdd, 0x82, 0x62, 0x61, 0x41, - 0x4e, 0xbb, 0xf9, 0xb7, 0xe8, 0x81, 0x99, 0x18, 0xe2, 0xa7, 0xb4, 0x7c, - 0xb7, 0x08, 0x44, 0x6f, 0x24, 0xb3, 0xda, 0x57, 0x62, 0x29, 0xc7, 0xa6, - 0x84, 0xb1, 0x5d, 0xc5, 0x00, 0x4c, 0x30, 0x16, 0xf0, 0x0a, 0x74, 0x73, - 0xec, 0xaf, 0xb5, 0xde, 0xb0, 0xa7, 0x75, 0x22, 0x8f, 0x9e, 0x43, 0x01, - 0x68, 0xae, 0x91, 0xeb, 0x46, 0x52, 0x3f, 0x2c, 0x4e, 0xc5, 0xd0, 0xc8, - 0x15, 0xea, 0x99, 0xc2, 0x37, 0x5b, 0x68, 0xb5, 0xce, 0x41, 0x92, 0xbf, - 0xd6, 0xdb, 0x85, 0xad, 0x08, 0xd1, 0x11, 0x93, 0xe8, 0xd4, 0x78, 0x43, - 0x3b, 0x7d, 0xcb, 0x42, 0x84, 0xf3, 0x61, 0x88, 0x9e, 0x6a, 0x73, 0xb9, - 0x78, 0x17, 0x9a, 0x9f, 0xfb, 0x97, 0xcb, 0xd6, 0xb5, 0x3f, 0x00, 0x41, - 0xb0, 0x30, 0x2f, 0x6f, 0x89, 0xdd, 0xfa, 0x13, 0xd1, 0x07, 0xbe, 0x2f, - 0xea, 0x91, 0x62, 0xaa, 0xed, 0xcb, 0xfd, 0x07, 0x82, 0xbb, 0x3f, 0xf4, - 0xa6, 0x94, 0x66, 0x71, 0x20, 0x61, 0xac, 0x84, 0x04, 0x70, 0xf2, 0xd3, - 0xdf, 0xac, 0x44, 0xfd, 0x47, 0x26, 0x81, 0x64, 0xb3, 0xa6, 0x90, 0x2b, - 0xd2, 0x2c, 0xd0, 0x77, 0x81, 0x53, 0x45, 0x78, 0x5f, 0x30, 0x77, 0x91, - 0x83, 0x13, 0x33, 0xd1, 0x91, 0xa6, 0x35, 0x21, 0xcb, 0x26, 0x54, 0x0a, - 0xf7, 0x70, 0x5e, 0xdb, 0xd8, 0x92, 0xc7, 0xdf, 0xf9, 0x2a, 0x46, 0x91, - 0x22, 0x3b, 0xe6, 0xe1, 0x91, 0xeb, 0xa6, 0x78, 0x81, 0x57, 0xf3, 0x04, - 0xdf, 0x34, 0x55, 0x74, 0x0a, 0xfe, 0xf2, 0xbd, 0xb3, 0xeb, 0xa3, 0x8e, - 0x71, 0x15, 0xa9, 0x2f, 0x53, 0xe2, 0xa1, 0x45, 0xdf, 0xe8, 0x29, 0x40, - 0xf1, 0x4b, 0x23, 0xdb, 0x8e, 0xee, 0x19, 0xa8, 0xd4, 0x15, 0x90, 0x8c, - 0x04, 0x46, 0x81, 0x49, 0x92, 0xe5, 0xe1, 0xfe, 0x99, 0x06, 0xfc, 0x3e, - 0x43, 0x58, 0x3b, 0x19, 0x7f, 0xd2, 0x13, 0x65, 0xc2, 0x64, 0x27, 0x6d, - 0x93, 0x6a, 0xcf, 0x48, 0x2a, 0x3d, 0xdd, 0x79, 0x9f, 0x05, 0x32, 0xeb, - 0xfd, 0xb4, 0xd2, 0x1d, 0x16, 0x61, 0x3d, 0x17, 0x4c, 0xb8, 0xad, 0x63, - 0x0e, 0x6b, 0x8a, 0x4a, 0x34, 0x4c, 0xb5, 0x3c, 0x0f, 0x05, 0x28, 0x8c, - 0x8b, 0xdf, 0xf4, 0xa0, 0x49, 0xbf, 0x34, 0x6c, 0x6a, 0x5f, 0x40, 0x95, - 0x48, 0x4b, 0x93, 0x1e, 0x61, 0x6d, 0x58, 0xc3, 0x86, 0x98, 0x70, 0x11, - 0x4e, 0x44, 0x65, 0xc1, 0x0d, 0xea, 0x2f, 0xda, 0x38, 0x16, 0xbd, 0xd4, - 0x7b, 0x3e, 0x31, 0xee, 0x42, 0x4c, 0xdc, 0xe9, 0x8b, 0x1f, 0xa9, 0xcf, - 0xab, 0x60, 0xb5, 0xb1, 0xd2, 0xf2, 0x6a, 0xe9, 0xbc, 0xcc, 0xcb, 0x60, - 0x4a, 0xca, 0x70, 0x79, 0x64, 0x9d, 0x07, 0x1e, 0xdb, 0xef, 0x34, 0xaf, - 0x17, 0x93, 0x6b, 0x60, 0x73, 0x2d, 0x8c, 0x08, 0x27, 0x1e, 0x46, 0x9f, - 0xcb, 0x33, 0xdd, 0x76, 0xef, 0x17, 0x58, 0x9a, 0x5f, 0x82, 0x78, 0x0f, - 0xbf, 0xe7, 0x0f, 0x3a, 0x1e, 0xa8, 0x30, 0xbf, 0xff, 0xc7, 0xc7, 0x82, - 0x8b, 0xc3, 0x65, 0x04, 0xfd, 0x45, 0xc9, 0x88, 0x99, 0x8e, 0x44, 0xc5, - 0x23, 0x1e, 0xbf, 0xf1, 0x95, 0x70, 0x35, 0xe6, 0x56, 0x4a, 0x53, 0xb2, - 0xac, 0x0c, 0xfd, 0xf5, 0x61, 0x26, 0x5b, 0x70, 0xd6, 0x4c, 0xfc, 0x0f, - 0xcc, 0x53, 0x6e, 0x25, 0xca, 0x1d, 0x0c, 0x56, 0xf7, 0x9c, 0x95, 0xf6, - 0x3c, 0x08, 0x0c, 0x64, 0xb1, 0x1c, 0x5c, 0xe6, 0x25, 0xa4, 0xa3, 0xb7, - 0xaf, 0x8b, 0xbc, 0xe1, 0x68, 0xdf, 0x10, 0xab, 0xbb, 0xd5, 0x30, 0x64, - 0x42, 0xf6, 0xe6, 0x9a, 0xb5, 0x59, 0x12, 0x76, 0x92, 0xac, 0x29, 0xe9, - 0x45, 0xdb, 0x2e, 0x62, 0x22, 0x58, 0x24, 0x89, 0xc8, 0x6a, 0x2a, 0xa7, - 0x3f, 0x04, 0x53, 0x4e, 0x07, 0x41, 0x4e, 0x5f, 0x95, 0x5f, 0x6e, 0x14, - 0x5b, 0xa7, 0xa7, 0xd3, 0x5a, 0xa2, 0x95, 0x4a, 0xc8, 0xe9, 0x3c, 0x5a, - 0x84, 0x50, 0xbc, 0xe1, 0x9c, 0x7a, 0x16, 0xe5, 0xc7, 0x04, 0x9d, 0x60, - 0x2e, 0x7d, 0xb3, 0x77, 0x5d, 0x86, 0x2e, 0xac, 0x57, 0x2a, 0x31, 0x26, - 0x23, 0x6e, 0xcc, 0x7f, 0xb8, 0x36, 0x29, 0xa9, 0xa8, 0xd9, 0xc6, 0x75, - 0xee, 0x16, 0x23, 0x27, 0x0f, 0xe1, 0xb0, 0x3d, 0x91, 0x3a, 0x26, 0x4a, - 0x60, 0x72, 0x14, 0xf9, 0x3c, 0x66, 0x66, 0xe8, 0x7d, 0x4a, 0x6f, 0x7e, - 0x63, 0x58, 0x6a, 0x28, 0x78, 0x50, 0xef, 0x3b, 0x9d, 0xeb, 0xb6, 0x4b, - 0x5d, 0x55, 0x80, 0x84, 0x97, 0x9b, 0x74, 0x4b, 0x5c, 0x09, 0x1d, 0xe7, - 0x57, 0xfc, 0x40, 0x3f, 0xa9, 0xbd, 0xdf, 0x61, 0x2a, 0x89, 0x62, 0x51, - 0xfc, 0x24, 0xee, 0xee, 0x97, 0x10, 0xca, 0xb6, 0x0e, 0x8e, 0x71, 0x67, - 0x2a, 0x79, 0x4f, 0xc4, 0xe6, 0x3e, 0x27, 0xc2, 0x9b, 0x85, 0xfd, 0xde, - 0xfb, 0x58, 0x75, 0xf3, 0x1c, 0x31, 0xa2, 0x56, 0x3e, 0xdc, 0x24, 0xf4, - 0x4f, 0xcb, 0x5a, 0x1a, 0x77, 0x5c, 0x28, 0xd1, 0x5a, 0x55, 0xa9, 0x8c, - 0xb5, 0xdd, 0x77, 0x93, 0x58, 0xd8, 0x2f, 0x7d, 0x5a, 0x67, 0xa1, 0x95, - 0x0a, 0xd2, 0x6a, 0x93, 0xa6, 0xf0, 0x5f, 0x7f, 0x0a, 0x29, 0xdb, 0x1d, - 0x8c, 0xa7, 0x12, 0x0a, 0xf4, 0xc9, 0xcd, 0x70, 0xd1, 0xbd, 0x48, 0xd4, - 0x9a, 0xbb, 0xbb, 0x24, 0xbf, 0x52, 0x25, 0xb9, 0x75, 0xc2, 0x17, 0x36, - 0x6f, 0x4a, 0xc0, 0x53, 0x6d, 0x38, 0xfb, 0x7a, 0x60, 0xc8, 0x5d, 0x03, - 0xc1, 0x1c, 0x0c, 0x31, 0xf0, 0x59, 0xed, 0x0a, 0x5f, 0x84, 0xf2, 0x89, - 0x6c, 0xb4, 0xd5, 0x24, 0x2d, 0x2a, 0xda, 0xbe, 0x74, 0x1d, 0x22, 0xe2, - 0xc6, 0xf0, 0x9b, 0x98, 0x5a, 0x41, 0x11, 0x4c, 0x51, 0x97, 0x16, 0xa7, - 0xc9, 0xd8, 0x53, 0x12, 0x53, 0xdd, 0x22, 0xa9, 0xf2, 0xae, 0x52, 0x49, - 0x02, 0xf9, 0x5c, 0x78, 0x00, 0xa2, 0x64, 0xff, 0x91, 0x62, 0x20, 0x6a, - 0x87, 0x6a, 0x40, 0x01, 0x85, 0x30, 0xf5, 0xdd, 0xa7, 0x64, 0x0a, 0x85, - 0x8d, 0x37, 0x99, 0xcb, 0x03, 0xc8, 0x29, 0x56, 0x7e, 0x75, 0x4f, 0xa1, - 0xc3, 0x76, 0xce, 0xdb, 0xa3, 0xb4, 0x7e, 0x91, 0x95, 0xbe, 0x53, 0x0e, - 0x20, 0xc9, 0xe7, 0x71, 0x78, 0xad, 0x3d, 0x4c, 0xbb, 0x59, 0xb9, 0x77, - 0xcf, 0x7d, 0x7b, 0xff, 0x15, 0xdb, 0x1d, 0xae, 0x1f, 0xbe, 0x33, 0x88, - 0x01, 0x04, 0x95, 0xe5, 0xe9, 0x6a, 0x1c, 0xbf, 0xc8, 0xc3, 0x33, 0x3b, - 0xd8, 0x2f, 0x75, 0x4a, 0xc3, 0x6f, 0x09, 0x88, 0x26, 0x46, 0x90, 0x89, - 0x53, 0x12, 0x27, 0xc2, 0x7d, 0x23, 0x6b, 0xc4, 0xe3, 0x0a, 0x0f, 0xc2, - 0x86, 0x6d, 0x20, 0x35, 0x82, 0x33, 0xec, 0xdd, 0xa7, 0x6a, 0xc3, 0xa8, - 0x11, 0xdc, 0x02, 0xd9, 0x05, 0x1b, 0x04, 0x75, 0x92, 0x6c, 0x08, 0x9e, - 0x38, 0x72, 0xd9, 0x7d, 0x9b, 0xbc, 0xfd, 0xca, 0xb8, 0x06, 0x0e, 0x24, - 0x89, 0x90, 0xde, 0x52, 0xe4, 0xd1, 0xcc, 0x99, 0x87, 0x0b, 0x87, 0xbb, - 0x5c, 0xa9, 0xab, 0xec, 0xb5, 0xe4, 0xdd, 0x5d, 0xfa, 0xb1, 0x97, 0x5f, - 0x61, 0xf7, 0x58, 0xd6, 0x08, 0x02, 0xf2, 0x51, 0x7c, 0x7a, 0xe6, 0xf1, - 0xcb, 0x43, 0xd0, 0x21, 0x09, 0xb8, 0x82, 0xa9, 0x52, 0xd9, 0xa8, 0x7f, - 0x2b, 0xe1, 0x0f, 0x31, 0xbc, 0x16, 0xa2, 0xce, 0x35, 0x55, 0x2e, 0xd6, - 0xda, 0x38, 0xd9, 0xc2, 0x5e, 0xca, 0x27, 0xd9, 0xa6, 0xd6, 0x4b, 0xa2, - 0x73, 0xc4, 0xce, 0x66, 0x30, 0x60, 0xa2, 0x01, 0xfa, 0xc1, 0xd6, 0xc8, - 0xea, 0xdd, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x70, 0xe2, 0x62, 0x68, 0xff, 0x60, 0x67, 0x64, 0x88, 0xdd, 0x81, 0x79, - 0x82, 0xf5, 0x46, 0xf9, 0x7e, 0x0e, 0xa9, 0x26, 0xf6, 0xcf, 0x5d, 0xef, - 0x10, 0x11, 0xe1, 0x71, 0x72, 0x77, 0xcf, 0x02, 0x7b, 0xf1, 0x6e, 0xc4, - 0xb4, 0xfa, 0x2a, 0x12, 0xfe, 0x7e, 0x3c, 0x66, 0xef, 0x41, 0x98, 0x3a, - 0x1f, 0xa9, 0x14, 0x8f, 0x46, 0x22, 0xa0, 0xc2, 0xee, 0x93, 0x25, 0x34, - 0xf2, 0xb7, 0x6d, 0x0a, 0x36, 0xde, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0xd4, 0x17, 0x62, 0x25, 0xfd, 0x5b, 0x75, 0xeb, - 0xec, 0x06, 0xc9, 0x39, 0x86, 0x6d, 0xc5, 0x60, 0x2d, 0x33, 0x3d, 0xce, - 0x6a, 0x9f, 0x07, 0x3b, 0xb9, 0x70, 0x0f, 0xc7, 0x13, 0x46, 0x35, 0x46, - 0x26, 0xe4, 0xbc, 0x6e, 0x54, 0x89, 0x29, 0xd5, 0xa4, 0x94, 0xa0, 0x3a, - 0x7a, 0x61, 0xcf, 0xd1, 0x48, 0x27, 0x7a, 0x72, 0x95, 0xde, 0x93, 0xd1, - 0x19, 0x1f, 0xc9, 0xc8, 0x8f, 0x0d, 0xce, 0x34, 0x03, 0x39, 0x0a, 0x92, - 0x16, 0x09, 0xc4, 0x49, 0xf9, 0x30, 0x2e, 0x19, 0xd1, 0x69, 0x7e, 0x78, - 0x00, 0x25, 0x30, 0x6f, 0x6b, 0xe1, 0xbe, 0xad, 0xb2, 0x05, 0xde, 0xc7, - 0xc2, 0xf7, 0xd5, 0xa7, 0x4d, 0x03, 0x6f, 0x6b, 0xcd, 0xcb, 0x42, 0xfa, - 0x88, 0x16, 0xd5, 0xa6, 0x60, 0x08, 0xd4, 0xa5, 0x5b, 0x3b, 0x7b, 0xa2, - 0xca, 0xa3, 0xa2, 0x5d, 0x63, 0x7f, 0xc0, 0x37, 0xc5, 0x7e, 0x99, 0x04, - 0x5d, 0x9a, 0xb9, 0xa5, 0xac, 0xd1, 0xe2, 0x5d, 0xb2, 0x2b, 0x7e, 0xbb, - 0xb9, 0x66, 0x13, 0xa7, 0x30, 0xbf, 0x80, 0x0c, 0x2b, 0x8d, 0x45, 0xe1, - 0x8d, 0x96, 0x25, 0x27, 0x47, 0x3d, 0x21, 0x7d, 0x1c, 0x42, 0xac, 0x31, - 0x26, 0x47, 0x59, 0xb3, 0x44, 0x85, 0xf2, 0x8e, 0x7d, 0x01, 0x96, 0x6d, - 0xb2, 0x64, 0xc3, 0xfc, 0xa7, 0x82, 0x06, 0x4a, 0x87, 0x75, 0x9b, 0x99, - 0x47, 0x7e, 0xa6, 0x4d, 0x2c, 0x36, 0xff, 0xac, 0x2b, 0x77, 0x96, 0x52, - 0x14, 0x8d, 0x07, 0x0d, 0x28, 0x9d, 0x84, 0xa2, 0xda, 0xd6, 0x45, 0x3a, - 0xd4, 0xe6, 0xb7, 0x9a, 0xf3, 0x34, 0xe3, 0xda, 0x39, 0xdf, 0x35, 0x9c, - 0xe4, 0x87, 0x55, 0xc8, 0x43, 0xd0, 0x61, 0x46, 0x52, 0x2f, 0x75, 0x63, - 0xbb, 0x98, 0x97, 0xeb, 0xfb, 0x15, 0xaf, 0x8e, 0x96, 0xdc, 0xff, 0x0a, - 0x90, 0xda, 0x09, 0x63, 0x28, 0x7b, 0x92, 0x73, 0x0b, 0xd4, 0x2b, 0x72, - 0x2a, 0x86, 0x32, 0xc3, 0xc1, 0x3e, 0xe4, 0x2c, 0x07, 0x89, 0x53, 0xb7, - 0xfe, 0x78, 0x6c, 0x95, 0xb4, 0x62, 0x4d, 0x4b, 0xfe, 0x6c, 0xfc, 0x5e, - 0x4e, 0xa7, 0x8c, 0x07, 0x4f, 0x85, 0x27, 0xe0, 0x7b, 0xd9, 0x7a, 0xe5, - 0x1d, 0xbc, 0x36, 0xda, 0x8e, 0x21, 0xff, 0xb3, 0x60, 0x2c, 0x5e, 0x23, - 0x0f, 0xde, 0x3f, 0xae, 0xa5, 0x3a, 0x50, 0xa9, 0x99, 0x39, 0x45, 0xaf, - 0xd3, 0x5f, 0x4a, 0x15, 0xad, 0x9c, 0x66, 0x7f, 0x92, 0xe0, 0x02, 0x81, - 0x3e, 0x06, 0x6a, 0x5e, 0xd0, 0x0c, 0x42, 0xe7, 0xcf, 0xe2, 0xeb, 0xa3, - 0xe0, 0xf7, 0x2d, 0x8a, 0x21, 0xdb, 0x64, 0x28, 0x2a, 0xb3, 0x2b, 0xc4, - 0xc9, 0xd5, 0x60, 0xaf, 0xfc, 0x15, 0xa1, 0x44, 0x9c, 0x96, 0x04, 0x42, - 0x1c, 0x55, 0x8c, 0xa5, 0xce, 0x80, 0xce, 0x75, 0x64, 0xa9, 0xf6, 0xa5, - 0x5a, 0x0f, 0x8a, 0x4b, 0x8b, 0x72, 0xcf, 0x3e, 0xd7, 0xeb, 0xe1, 0xd0, - 0xd3, 0x2d, 0x04, 0x6c, 0x9e, 0x02, 0x75, 0x43, 0x5c, 0xc1, 0x57, 0x66, - 0xd9, 0x14, 0x5b, 0x08, 0x10, 0x44, 0x8d, 0x8e, 0x89, 0xd1, 0x65, 0x27, - 0x2a, 0x0b, 0x99, 0x6f, 0x09, 0xa6, 0x20, 0xa5, 0x75, 0x24, 0xe4, 0xf7, - 0xf5, 0xe0, 0xed, 0x79, 0x37, 0x18, 0x13, 0x1c, 0xd9, 0xd1, 0xf5, 0x69, - 0x0c, 0xa5, 0x02, 0xdf, 0x6a, 0xfd, 0x2e, 0x35, 0x8e, 0xd0, 0x41, 0x91, - 0x61, 0x0f, 0x5c, 0xdd, 0x70, 0xbf, 0x1c, 0x49, 0xcb, 0xe9, 0xc9, 0x33, - 0xc4, 0x99, 0x1e, 0x8b, 0x75, 0x48, 0xc2, 0x58, 0xa4, 0x70, 0x1f, 0xbb, - 0xcd, 0xd3, 0x0e, 0x79, 0x25, 0xbe, 0x53, 0xfa, 0x32, 0x32, 0xf6, 0xb9, - 0xf0, 0x0a, 0x52, 0x5b, 0xe0, 0x69, 0xff, 0x43, 0xda, 0x98, 0x1f, 0xee, - 0x54, 0x60, 0xf8, 0x24, 0x43, 0xc5, 0x37, 0x72, 0xd1, 0xfc, 0x99, 0x9a, - 0x3e, 0x24, 0xe0, 0xd9, 0xc2, 0x61, 0x47, 0xb3, 0x26, 0x09, 0x85, 0x74, - 0xa1, 0x2b, 0x4a, 0x70, 0xd0, 0x1b, 0x90, 0x03, 0x25, 0xd9, 0x22, 0xc2, - 0x16, 0x22, 0x3a, 0x62, 0x20, 0xd4, 0x13, 0xce, 0xa2, 0xc7, 0x02, 0xfb, - 0x9a, 0xbf, 0xf1, 0x1c, 0x80, 0x01, 0x97, 0x90, 0x7f, 0x5a, 0x98, 0x70, - 0x30, 0x61, 0x77, 0xe5, 0xd4, 0x3b, 0x03, 0x42, 0x57, 0x31, 0x5e, 0xc6, - 0x64, 0xe1, 0xf4, 0x64, 0x77, 0x21, 0x9b, 0x44, 0x1c, 0xd9, 0x8c, 0x95, - 0x8a, 0xf1, 0xcb, 0x82, 0xac, 0xc1, 0x26, 0x31, 0xf2, 0x22, 0x41, 0xab, - 0xbb, 0x23, 0xd3, 0x8d, 0xcc, 0x5c, 0x9d, 0x9b, 0x1d, 0x9c, 0x4d, 0xf3, - 0x62, 0xde, 0x15, 0x6a, 0x94, 0x8d, 0x24, 0xe7, 0x52, 0x8d, 0x2a, 0xa4, - 0x1d, 0x54, 0x5a, 0xda, 0xaf, 0xab, 0x05, 0x27, 0x4b, 0xbb, 0xb4, 0xda, - 0x0c, 0xb9, 0x20, 0xb3, 0xaf, 0x4a, 0xeb, 0x37, 0xe5, 0x43, 0xe4, 0xc1, - 0xf6, 0x9e, 0xf8, 0x6c, 0xd8, 0xa1, 0x0c, 0xf9, 0xd1, 0x4b, 0x96, 0xa0, - 0x6d, 0x38, 0x64, 0x41, 0xd3, 0x14, 0xfb, 0xad, 0x89, 0xa9, 0xf7, 0x36, - 0x01, 0x0f, 0xbe, 0x8e, 0xd7, 0x76, 0xc6, 0x70, 0x22, 0x32, 0x8b, 0x08, - 0xca, 0x95, 0xbf, 0xcf, 0x5e, 0xb8, 0xc0, 0x3f, 0xd9, 0xaa, 0x84, 0xab, - 0x30, 0x5b, 0xe3, 0x7a, 0x61, 0x32, 0xe5, 0x54, 0x01, 0x5e, 0xb6, 0x1c, - 0x9c, 0x78, 0x52, 0x2a, 0xa7, 0xf5, 0x29, 0xa6, 0x0f, 0x14, 0xa5, 0x3a, - 0x34, 0xd4, 0xf5, 0xc2, 0xb2, 0x8d, 0x12, 0x7b, 0x8a, 0x64, 0x00, 0xfd, - 0x02, 0x0e, 0x02, 0x26, 0x5a, 0xb9, 0xeb, 0xfd, 0x30, 0xce, 0x51, 0xec, - 0x5f, 0xbc, 0xee, 0x53, 0x21, 0xec, 0x0e, 0xee, 0xc4, 0x28, 0x1a, 0xec, - 0x2a, 0x39, 0x4e, 0xe1, 0x50, 0x11, 0x3f, 0x16, 0xdd, 0xbf, 0xaf, 0x3e, - 0xbe, 0xd4, 0xfe, 0x34, 0x1e, 0x62, 0x3f, 0x5a, 0xea, 0x05, 0xfc, 0xd5, - 0x45, 0x08, 0x47, 0xce, 0x38, 0x3f, 0x75, 0x7e, 0x0c, 0x3a, 0x2a, 0x14, - 0xa7, 0x61, 0xba, 0x3a, 0xa1, 0x41, 0xa2, 0x72, 0x19, 0xfa, 0x33, 0x43, - 0xa7, 0xf4, 0x4e, 0x5b, 0xf9, 0xb1, 0x45, 0x16, 0x57, 0x8e, 0xb1, 0xad, - 0x7d, 0x88, 0xd3, 0x93, 0xa2, 0x08, 0xf3, 0x96, 0x4d, 0x84, 0x63, 0x08, - 0xfa, 0x9d, 0xf3, 0x04, 0x33, 0xbd, 0x7e, 0x7a, 0xc7, 0x63, 0xc5, 0x31, - 0x5a, 0x82, 0x33, 0x90, 0x56, 0x44, 0xe9, 0xd3, 0xc4, 0xd4, 0x76, 0x29, - 0x2f, 0xdb, 0xa3, 0x9d, 0xff, 0xd4, 0xd2, 0xb1, 0xce, 0xf1, 0xcb, 0x7f, - 0x10, 0x3b, 0x90, 0xa4, 0x1b, 0xa0, 0x9b, 0xa7, 0xfa, 0x27, 0x40, 0x11, - 0x35, 0xc9, 0x7f, 0x01, 0x97, 0x76, 0x9f, 0x33, 0xc5, 0xd6, 0x8d, 0x20, - 0x07, 0x73, 0x93, 0x0b, 0x24, 0x88, 0x4e, 0x73, 0x68, 0x79, 0x92, 0x20, - 0x2a, 0x71, 0xed, 0x22, 0x0b, 0xfb, 0x42, 0xb5, 0xd9, 0xc3, 0xaa, 0xed, - 0x45, 0x03, 0x64, 0xde, 0x6f, 0x25, 0x8e, 0x3b, 0x9a, 0xef, 0xc5, 0x63, - 0xc2, 0x7f, 0x34, 0xd0, 0x1b, 0x20, 0xa3, 0xab, 0x9d, 0x54, 0x41, 0x0e, - 0x7b, 0x2e, 0x96, 0x12, 0x75, 0x58, 0xdf, 0xd5, 0xaa, 0x3c, 0xf2, 0x26, - 0xc1, 0xf1, 0x18, 0x37, 0x56, 0xf2, 0xd2, 0x86, 0x6f, 0xd4, 0x9f, 0x57, - 0x2b, 0x32, 0xe9, 0x08, 0x94, 0x53, 0x40, 0xc5, 0x4d, 0x77, 0x39, 0xc6, - 0x4c, 0x63, 0x53, 0xf9, 0xbf, 0x35, 0x08, 0xc5, 0x0d, 0xd0, 0x89, 0x82, - 0xa7, 0x2d, 0x6a, 0xb4, 0x22, 0xb1, 0x10, 0x7f, 0xcf, 0x2e, 0x21, 0x27, - 0x9c, 0x12, 0xc6, 0x0e, 0xca, 0xd2, 0x32, 0xb1, 0x6d, 0xfd, 0x59, 0x12, - 0x23, 0x60, 0x46, 0x89, 0xe0, 0x75, 0x5e, 0xc9, 0xf4, 0x3d, 0x8a, 0x89, - 0xd4, 0x23, 0xc2, 0xbe, 0x30, 0x32, 0x4a, 0x95, 0x42, 0xe2, 0xff, 0xff, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0xa7, 0x0b, 0x48, 0xe2, - 0xeb, 0xd7, 0x12, 0x42, 0x4c, 0x71, 0xfb, 0x25, 0x17, 0x23, 0x0e, 0x01, - 0xa6, 0x21, 0xb9, 0x17, 0x6e, 0xf0, 0x24, 0x66, 0x9e, 0x9d, 0x0f, 0x71, - 0xf8, 0x5b, 0x79, 0xb0, 0x1b, 0x1f, 0xe7, 0xa2, 0xc0, 0x17, 0x16, 0x08, - 0x5e, 0x24, 0x7b, 0xf9, 0x7a, 0x1e, 0x70, 0xe2, 0x05, 0x40, 0x16, 0x56, - 0xe7, 0x79, 0xf2, 0x30, 0xa3, 0xdc, 0xe3, 0x7a, 0x7e, 0x22, 0x88, 0xc0, - 0xf7, 0xc8, 0x5c, 0x93, 0x95, 0x86, 0x02, 0x6c, 0x73, 0x76, 0xef, 0x03, - 0x2d, 0xcb, 0xa5, 0x22, 0xfe, 0x05, 0xbb, 0xe6, 0xfd, 0x19, 0x8c, 0x8b, - 0x67, 0x58, 0x81, 0x81, 0x2d, 0x36, 0xd0, 0xc1, 0x20, 0xb2, 0x87, 0x87, - 0xdb, 0xe4, 0xe5, 0xd1, 0xd1, 0xd5, 0x81, 0x34, 0x4c, 0xd6, 0x09, 0xa2, - 0x5d, 0xcc, 0x99, 0x12, 0xa5, 0x06, 0x0f, 0x06, 0x7e, 0xbb, 0x67, 0x26, - 0x69, 0x15, 0x6e, 0x5f, 0xb1, 0x8e, 0xd6, 0x34, 0xfc, 0x4d, 0xd9, 0x03, - 0xb7, 0x5a, 0xf4, 0xaa, 0x03, 0x00, 0x88, 0x6b, 0x5a, 0xc9, 0xf2, 0xfb, - 0x67, 0x72, 0xbc, 0xf7, 0xb9, 0xdc, 0x97, 0xdf, 0x80, 0x91, 0xfa, 0x30, - 0x18, 0x02, 0x89, 0xc7, 0xc9, 0x62, 0x1d, 0xc0, 0x0b, 0xa6, 0xfe, 0x7e, - 0xb9, 0xa9, 0x1f, 0x11, 0x71, 0xe1, 0xd1, 0xfe, 0x8d, 0x90, 0x2c, 0x09, - 0x82, 0x2e, 0x36, 0x79, 0xa5, 0x75, 0x54, 0xfb, 0xd3, 0x3c, 0xb4, 0x18, - 0x2f, 0x4e, 0x3f, 0x37, 0xc4, 0xf8, 0xc5, 0x59, 0xa3, 0xfd, 0x0c, 0x62, - 0x9e, 0xa8, 0x7a, 0x56, 0xc5, 0x97, 0x89, 0x35, 0xc7, 0xb0, 0x29, 0x87, - 0xbf, 0x6a, 0xdc, 0xb1, 0x2f, 0x01, 0xf4, 0x0d, 0x7c, 0x25, 0x95, 0x39, - 0x81, 0xdd, 0x1a, 0x81, 0x36, 0xc0, 0x6b, 0xbf, 0x6b, 0x4d, 0xea, 0x23, - 0xc0, 0x3e, 0x5c, 0x39, 0xe5, 0x6b, 0x59, 0xa0, 0x50, 0x02, 0x99, 0xdf, - 0x4e, 0xe3, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x17, 0x88, 0xf8, 0xda, 0x3d, 0x57, 0x83, 0x63, 0x76, 0xa0, 0x5c, 0x13, - 0x1a, 0x00, 0x64, 0x30, 0x19, 0xfd, 0x2e, 0x9c, 0x64, 0xb6, 0xda, 0x51, - 0x7b, 0x55, 0xe8, 0xc4, 0x67, 0x1b, 0xda, 0xfc, 0x4c, 0xd0, 0x27, 0x58, - 0x56, 0xa1, 0x52, 0xd2, 0xb8, 0xd8, 0xd5, 0x94, 0x69, 0xcf, 0xd0, 0xd5, - 0x72, 0xeb, 0x2b, 0x05, 0xf3, 0x12, 0xa6, 0xac, 0xa6, 0xf7, 0x90, 0x24, - 0x1f, 0x22, 0x97, 0x5e, 0x8b, 0x7c, 0x2c, 0x30, 0x61, 0x11, 0x9b, 0xdf, - 0x83, 0x2b, 0x10, 0x09, 0x42, 0x77, 0x2b, 0xd9, 0x43, 0xb3, 0x27, 0x69, - 0x75, 0xf2, 0x2e, 0x72, 0xed, 0x50, 0xea, 0xbf, 0x7f, 0x47, 0x39, 0x9c, - 0xf8, 0x1e, 0xce, 0x6f, 0xdd, 0xe8, 0x40, 0xc5, 0x14, 0x01, 0x7e, 0xbb, - 0x0f, 0x43, 0x2d, 0x36, 0x70, 0x54, 0xc6, 0xbe, 0x69, 0x24, 0xd1, 0x65, - 0x49, 0x77, 0xf0, 0xd2, 0x99, 0xb4, 0x50, 0x8d, 0x98, 0xcb, 0xbf, 0x7a, - 0x7c, 0x65, 0xd3, 0x46, 0xcf, 0x90, 0x69, 0x56, 0x15, 0xa2, 0xae, 0x11, - 0x94, 0x60, 0xf9, 0x45, 0x17, 0x54, 0x6b, 0xbd, 0xeb, 0xd8, 0x74, 0x41, - 0x5c, 0xf6, 0x49, 0x0a, 0x14, 0xce, 0x43, 0x1f, 0x67, 0xc3, 0x6c, 0xf4, - 0x01, 0xce, 0x3f, 0x85, 0xed, 0x19, 0xa1, 0xf7, 0x1b, 0xf8, 0x46, 0x45, - 0xb4, 0xe9, 0xa7, 0x1f, 0x2a, 0x65, 0x00, 0x2a, 0xd3, 0x8b, 0x6a, 0x3b, - 0xac, 0x78, 0xab, 0xf4, 0xc8, 0x62, 0x76, 0xc8, 0x24, 0xf8, 0xf8, 0x08, - 0xe0, 0x64, 0x00, 0x64, 0x74, 0x9e, 0x55, 0x2e, 0xf8, 0xc9, 0xc8, 0x58, - 0x0e, 0x1f, 0x27, 0x32, 0xfd, 0x30, 0x24, 0x68, 0xc8, 0xa4, 0x8c, 0x1c, - 0xf3, 0xa7, 0x32, 0xae, 0x84, 0x0a, 0x8a, 0x1e, 0x11, 0xce, 0xb2, 0x02, - 0xf1, 0xb3, 0x5f, 0x7d, 0x5e, 0x54, 0x8c, 0xe0, 0xeb, 0x46, 0x6e, 0x8a, - 0x5f, 0x3f, 0x71, 0x47, 0x2a, 0x8a, 0xe6, 0xf0, 0xb0, 0x04, 0x49, 0x64, - 0xb3, 0x7e, 0x16, 0x09, 0x83, 0x5f, 0x12, 0xe0, 0x85, 0xb7, 0x36, 0xc0, - 0x8a, 0xa5, 0xcd, 0xae, 0xc0, 0xb4, 0xa2, 0x62, 0x9b, 0xfa, 0x64, 0x18, - 0x16, 0x8e, 0xb6, 0x50, 0xf2, 0x9b, 0xc4, 0x7d, 0x0c, 0x4c, 0x8b, 0x58, - 0xcf, 0x9b, 0x87, 0x09, 0xb1, 0x37, 0xbb, 0xaf, 0xa7, 0x72, 0x79, 0x81, - 0x09, 0x55, 0xa1, 0x6a, 0x87, 0xb0, 0x7d, 0xc8, 0xb0, 0xc1, 0xa4, 0xa9, - 0xdf, 0xcf, 0x95, 0x77, 0x36, 0x8e, 0x2b, 0xae, 0xeb, 0x4b, 0xf9, 0x2a, - 0x83, 0x6c, 0x53, 0x3c, 0x89, 0xa6, 0x08, 0xae, 0x00, 0x4e, 0xb8, 0xf6, - 0x34, 0x7c, 0xc6, 0x76, 0x87, 0x1a, 0x02, 0xb0, 0x89, 0xa3, 0x0f, 0x00, - 0xc6, 0x7b, 0xeb, 0xf7, 0x95, 0x40, 0xc5, 0x0d, 0x6f, 0x74, 0xd8, 0x21, - 0x2f, 0x9f, 0x24, 0xac, 0x43, 0xdb, 0x3a, 0x39, 0x6c, 0x34, 0x59, 0x62, - 0x66, 0xbc, 0x28, 0x7f, 0x8c, 0x64, 0x62, 0x8c, 0x28, 0x6c, 0xf5, 0x79, - 0x24, 0xb1, 0x00, 0x9c, 0x58, 0x6b, 0x09, 0xef, 0xb0, 0x73, 0xcd, 0x47, - 0xbb, 0x52, 0xfd, 0x26, 0x6a, 0xff, 0xb9, 0xf1, 0xd5, 0x82, 0x59, 0x01, - 0xfa, 0x87, 0x14, 0x24, 0x10, 0xb0, 0xf7, 0xdf, 0xf9, 0x3f, 0x67, 0x19, - 0xbd, 0xc7, 0x85, 0xb0, 0xad, 0x47, 0xa8, 0x4c, 0x3e, 0xb6, 0x2e, 0x8a, - 0xb3, 0xcc, 0x35, 0xa0, 0x48, 0xc7, 0x90, 0x81, 0xb7, 0x53, 0x1c, 0x38, - 0x63, 0xf2, 0x2f, 0xa0, 0x71, 0x82, 0xe2, 0x56, 0xdb, 0x68, 0xe8, 0x5f, - 0xf8, 0x42, 0xf2, 0xf6, 0xb8, 0x10, 0x6b, 0x54, 0x21, 0xa0, 0xc1, 0xfe, - 0xcb, 0xce, 0x12, 0xa2, 0x49, 0x51, 0x86, 0x53, 0x56, 0xec, 0x33, 0xb3, - 0x72, 0xce, 0xa4, 0x46, 0xe3, 0x37, 0xcb, 0xc0, 0x95, 0xaa, 0xe2, 0xa3, - 0xc5, 0xe9, 0x36, 0x40, 0xfe, 0xf7, 0xe2, 0x5a, 0x6d, 0x58, 0x39, 0xb2, - 0x41, 0x5d, 0xe2, 0x71, 0x72, 0xd0, 0xf0, 0x5c, 0x16, 0x88, 0x95, 0x30, - 0x0a, 0xfb, 0x8d, 0xda, 0x14, 0x80, 0xf4, 0x15, 0xf2, 0xf6, 0xac, 0xf3, - 0xd8, 0x8d, 0x13, 0x24, 0x2c, 0x74, 0x60, 0x6e, 0x8c, 0xa1, 0x59, 0xcf, - 0x74, 0x7c, 0x2d, 0x0b, 0xbb, 0x06, 0x5c, 0x9d, 0xcd, 0xf3, 0x1e, 0x4a, - 0xba, 0x3f, 0x9c, 0x4a, 0xc4, 0xd7, 0xf9, 0xf0, 0xa5, 0x56, 0x7f, 0xb0, - 0xa2, 0x57, 0xd0, 0xc3, 0xaa, 0xa7, 0xd0, 0x49, 0xe2, 0x28, 0x9b, 0xc4, - 0x64, 0x0c, 0xe0, 0x71, 0x9c, 0x05, 0x04, 0x95, 0x00, 0x1f, 0x7b, 0xa9, - 0xb9, 0xb3, 0x2b, 0x8f, 0x0b, 0x45, 0x1e, 0x23, 0xaa, 0x27, 0x89, 0x4a, - 0xb0, 0x7d, 0x03, 0xdf, 0xae, 0xdb, 0xcb, 0xc4, 0xec, 0x3b, 0x02, 0xe2, - 0x85, 0x3a, 0xb7, 0x25, 0xfb, 0xab, 0xca, 0xc1, 0x33, 0x00, 0x5b, 0xd2, - 0xcf, 0xb0, 0x11, 0x1d, 0x51, 0xb5, 0x5b, 0xea, 0x94, 0xf7, 0xa0, 0x98, - 0x33, 0xba, 0x58, 0xfc, 0x12, 0xea, 0xdd, 0x89, 0xbd, 0x63, 0x03, 0xbe, - 0x7e, 0x3b, 0x69, 0xc4, 0x9d, 0x57, 0x0f, 0xd6, 0xbe, 0xea, 0x5b, 0xd0, - 0x97, 0x63, 0x89, 0xb0, 0xa0, 0xc0, 0xd6, 0x39, 0xc1, 0x69, 0x12, 0x6a, - 0xfb, 0xac, 0x74, 0x7f, 0xfb, 0xf4, 0x7f, 0x38, 0x44, 0x4c, 0x8a, 0xa2, - 0x41, 0x15, 0xc0, 0x54, 0xc0, 0xed, 0x14, 0x83, 0xef, 0xbc, 0x9c, 0xc7, - 0xdd, 0x21, 0xd6, 0xf0, 0x9b, 0x7f, 0x09, 0xd5, 0x96, 0xe5, 0xf7, 0xc5, - 0xa9, 0xb3, 0x41, 0xb0, 0x9d, 0xeb, 0x49, 0x68, 0x9d, 0x2b, 0xea, 0x47, - 0x80, 0x3b, 0x54, 0xb8, 0xf4, 0x14, 0x5e, 0xd6, 0x66, 0x89, 0x04, 0xb3, - 0x00, 0xa3, 0xa8, 0x32, 0x62, 0x2e, 0xc3, 0x15, 0xc6, 0x93, 0x7d, 0x40, - 0x32, 0xb1, 0x6b, 0x60, 0xd3, 0x52, 0xdf, 0x09, 0x8c, 0x80, 0x2b, 0x01, - 0xe7, 0x97, 0x8d, 0xbb, 0x14, 0xd6, 0x10, 0x15, 0x64, 0x00, 0x4a, 0x2c, - 0x67, 0xca, 0xd0, 0xa1, 0x37, 0x33, 0x7b, 0xa1, 0x2a, 0x5b, 0x5b, 0x78, - 0xf8, 0x2f, 0xdd, 0x76, 0xab, 0x8a, 0xc3, 0xe3, 0x37, 0x00, 0xd1, 0x29, - 0xb0, 0x96, 0x1d, 0x18, 0xbe, 0x5d, 0x32, 0x7e, 0xb7, 0x11, 0xa9, 0x78, - 0x72, 0xa2, 0x2d, 0x29, 0x1c, 0x32, 0xa4, 0xff, 0xc7, 0xce, 0xfe, 0xaf, - 0xb7, 0x17, 0x43, 0xe5, 0x2f, 0xae, 0x45, 0xd3, 0xaf, 0x10, 0xe3, 0xd0, - 0x58, 0xb6, 0xee, 0xee, 0x7a, 0xb5, 0x06, 0x70, 0x26, 0x7e, 0x2d, 0x5b, - 0xd5, 0xe1, 0x7b, 0x9a, 0x37, 0x02, 0xfc, 0x1d, 0x08, 0x4f, 0x1a, 0xf5, - 0x44, 0x63, 0xde, 0x4b, 0x14, 0x68, 0x54, 0x0b, 0x6a, 0x22, 0x4e, 0x02, - 0x65, 0xcd, 0xf4, 0x04, 0xec, 0xcc, 0x8a, 0x0b, 0xe0, 0x59, 0xf8, 0x65, - 0x25, 0x63, 0xed, 0x0f, 0xa6, 0xc5, 0x3c, 0xcb, 0x5d, 0xc5, 0xd8, 0x9f, - 0x5a, 0xd3, 0x88, 0x3d, 0xd4, 0x2c, 0xb3, 0x04, 0xf6, 0x97, 0xc7, 0xe2, - 0xfd, 0xb6, 0xf4, 0x7d, 0x0d, 0xb9, 0x75, 0x7e, 0x9d, 0x81, 0xdc, 0xdf, - 0x8e, 0x90, 0x40, 0x0c, 0x7b, 0x45, 0xfe, 0x68, 0xfd, 0xff, 0x1c, 0xf1, - 0x16, 0x09, 0x33, 0x74, 0x27, 0x7b, 0x4d, 0xd9, 0x9b, 0x48, 0x6d, 0x84, - 0xeb, 0x96, 0x8f, 0x4b, 0x82, 0x73, 0xd5, 0x69, 0x7d, 0x14, 0x45, 0x8c, - 0xb8, 0x71, 0x87, 0x70, 0x09, 0x26, 0xfc, 0x89, 0x6f, 0x0f, 0xb6, 0xc1, - 0xd6, 0xe1, 0xbf, 0xdb, 0x85, 0x8f, 0x94, 0xad, 0x94, 0x01, 0x01, 0xbb, - 0x3f, 0xc0, 0xb5, 0xff, 0xf5, 0xbb, 0x4f, 0x50, 0x09, 0xca, 0x7d, 0x36, - 0x47, 0x66, 0x9a, 0x8c, 0xee, 0x84, 0x73, 0x9a, 0x1f, 0x49, 0x75, 0xb4, - 0xab, 0x66, 0xf7, 0x3b, 0xfe, 0x81, 0x67, 0xc9, 0xd1, 0x16, 0xde, 0x1f, - 0xc2, 0x24, 0xed, 0x6a, 0x5a, 0xe7, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x18, 0x00, 0x00, 0xc5, 0xd7, 0x14, 0x84, 0xf8, 0xcf, 0x9b, 0xf4, - 0xb7, 0x6f, 0x47, 0x90, 0x47, 0x30, 0x80, 0x4b, 0x9e, 0x32, 0x25, 0xa9, - 0xf1, 0x33, 0xb5, 0xde, 0xa1, 0x68, 0xf4, 0xe2, 0x85, 0x1f, 0x07, 0x2f, - 0xcc, 0x00, 0xfc, 0xaa, 0x7c, 0xa6, 0x20, 0x61, 0x71, 0x7a, 0x48, 0xe5, - 0x2e, 0x29, 0xa3, 0xfa, 0x37, 0x9a, 0x95, 0x3f, 0xaa, 0x68, 0x93, 0xe3, - 0x2e, 0xc5, 0xa2, 0x7b, 0x94, 0x5e, 0x60, 0x5f, 0x10, 0x85, 0xf3, 0x23, - 0x2d, 0x42, 0x4c, 0x13, 0x29, 0xc8, 0x8d, 0x78, 0x6e, 0xd6, 0x8c, 0xe6, - 0xfc, 0xb6, 0x2a, 0xa6, 0x3b, 0xf9, 0xab, 0x61, 0x7c, 0x08, 0x8a, 0x3b, - 0x70, 0xbe, 0x57, 0xaa, 0xda, 0x1f, 0x33, 0x4a, 0x70, 0x17, 0x25, 0x0d, - 0x3f, 0x60, 0x3d, 0xc8, 0x2e, 0xbd, 0x3b, 0x12, 0x0b, 0x63, 0x5e, 0x3f, - 0xf5, 0x6b, 0x1f, 0x0b, 0xd9, 0x33, 0x85, 0x23, 0x71, 0x24, 0x9a, 0xb3, - 0xdf, 0x5c, 0x1f, 0xef, 0x14, 0x33, 0xc8, 0x66, 0x85, 0xb7, 0xf0, 0x56, - 0x68, 0x1d, 0x51, 0x52, 0xaf, 0x80, 0x3c, 0xe2, 0x59, 0x06, 0xf1, 0xd1, - 0x9f, 0xb6, 0xc6, 0x80, 0x4e, 0x06, 0xea, 0x28, 0xab, 0x17, 0x8f, 0x45, - 0x7a, 0xf6, 0xb4, 0x93, 0xb7, 0x43, 0x9e, 0xc6, 0xd4, 0x29, 0x00, 0x62, - 0xab, 0x51, 0x7a, 0x72, 0xe5, 0xc1, 0xd4, 0x10, 0xcd, 0xd6, 0x17, 0x54, - 0xe4, 0x20, 0x84, 0x50, 0xe4, 0xf9, 0x00, 0x13, 0xfd, 0xa6, 0x9f, 0xef, - 0x19, 0xd4, 0x60, 0x2a, 0x42, 0x07, 0xcd, 0xd5, 0xa1, 0x01, 0x6d, 0x07, - 0x01, 0x32, 0x61, 0x3c, 0x65, 0x9a, 0x8f, 0x5d, 0x33, 0xf3, 0xcb, 0x29, - 0x0b, 0x8c, 0xe7, 0x3b, 0x83, 0x44, 0xb1, 0x3a, 0x4f, 0x8e, 0x09, 0x15, - 0x14, 0x69, 0x84, 0xa1, 0xbb, 0x15, 0xfd, 0xea, 0xde, 0xbe, 0x5b, 0x6a, - 0xc0, 0x95, 0x04, 0x46, 0x4d, 0x8a, 0xaa, 0xac, 0xbc, 0x2f, 0xad, 0x12, - 0x15, 0x8a, 0x53, 0x4c, 0x94, 0xb8, 0xca, 0x42, 0x96, 0x3a, 0xf4, 0x7a, - 0x18, 0x9d, 0x5b, 0x24, 0x9a, 0xce, 0xa8, 0x99, 0xd4, 0x37, 0x32, 0xf6, - 0xf2, 0xac, 0xaf, 0x3f, 0xf5, 0x3b, 0xfe, 0xda, 0x13, 0x9a, 0xab, 0x4f, - 0x55, 0xc0, 0x2c, 0x21, 0x2b, 0x65, 0x71, 0x1f, 0xc5, 0x04, 0x32, 0xc9, - 0x94, 0xe5, 0xfa, 0x6f, 0xd8, 0x2a, 0xbc, 0x70, 0x85, 0x55, 0xdc, 0x62, - 0xb7, 0x3a, 0x20, 0x0e, 0xe7, 0x67, 0x3c, 0xfe, 0xcb, 0x83, 0x6a, 0x15, - 0x6e, 0x4a, 0x35, 0x65, 0xea, 0xc1, 0xb9, 0x4d, 0x35, 0xf9, 0x4b, 0xcf, - 0xd8, 0xfd, 0xa5, 0xff, 0xff, 0x67, 0x70, 0x04, 0xae, 0xa2, 0xa4, 0x12, - 0x4b, 0x83, 0x4f, 0xc2, 0x96, 0xf0, 0x21, 0x2b, 0x14, 0x21, 0x73, 0x42, - 0x14, 0x99, 0x07, 0xe5, 0xa9, 0x52, 0x4c, 0xeb, 0xbe, 0xc3, 0x11, 0x2e, - 0x27, 0xda, 0x69, 0x94, 0xd5, 0xf6, 0xc6, 0x77, 0x0a, 0x00, 0x5d, 0x9a, - 0x82, 0xaa, 0x21, 0xfc, 0x86, 0x9b, 0xd0, 0xc4, 0xc4, 0x1f, 0x53, 0x41, - 0x7a, 0x92, 0xab, 0x1c, 0x12, 0xf6, 0xd5, 0x48, 0xfb, 0x29, 0x4d, 0xb4, - 0xd2, 0x12, 0xee, 0xc5, 0xea, 0x18, 0x33, 0xf1, 0x4d, 0x0a, 0x10, 0x43, - 0xa5, 0x35, 0xb1, 0x63, 0xc4, 0xfb, 0x38, 0x1e, 0xef, 0xac, 0x3f, 0x97, - 0x41, 0xc6, 0x96, 0x3e, 0x60, 0x13, 0xc8, 0xe3, 0xbe, 0x61, 0xe9, 0xb6, - 0x26, 0x16, 0x14, 0xf8, 0x82, 0x0d, 0x6e, 0x75, 0x2f, 0xd7, 0x9c, 0x3a, - 0x4a, 0xda, 0xd8, 0x2b, 0x35, 0xd4, 0x20, 0x32, 0xd4, 0x4f, 0x0f, 0xe4, - 0xdc, 0xd5, 0x0f, 0xfe, 0xa6, 0x81, 0x28, 0xb4, 0x24, 0x3e, 0xb7, 0x0f, - 0xb0, 0xb2, 0x5b, 0x05, 0x76, 0xbb, 0x24, 0x49, 0x6a, 0x01, 0x68, 0x3f, - 0x03, 0x96, 0xbc, 0x0c, 0x77, 0x48, 0x5f, 0xe8, 0x39, 0xf4, 0xb0, 0x84, - 0x42, 0x0e, 0x6a, 0xb9, 0xab, 0xf2, 0x95, 0x97, 0xa7, 0x5e, 0x29, 0x34, - 0x9d, 0x50, 0xc0, 0x4b, 0x40, 0x72, 0xa1, 0x7c, 0x79, 0x5e, 0x95, 0xbe, - 0xd6, 0x17, 0x43, 0x0a, 0xc9, 0x27, 0x25, 0x43, 0xd7, 0x99, 0xd5, 0x48, - 0xd8, 0x98, 0xb5, 0x2b, 0x7f, 0xe3, 0xbd, 0x1d, 0xc0, 0xd1, 0x04, 0xd5, - 0xa4, 0xe1, 0x68, 0xbe, 0x96, 0xf1, 0x2e, 0x5e, 0x37, 0x8d, 0x39, 0x4e, - 0xe4, 0xcc, 0x5e, 0xd7, 0xdd, 0x59, 0x7e, 0xe8, 0xae, 0x48, 0xb5, 0xec, - 0x2c, 0xf7, 0x68, 0x96, 0x00, 0xe5, 0xec, 0x03, 0x6f, 0x98, 0x3a, 0x9a, - 0x4f, 0xd9, 0xf1, 0x2f, 0xfe, 0x76, 0xcf, 0x8f, 0x0b, 0x3d, 0x8a, 0x14, - 0x00, 0x83, 0xcb, 0xca, 0xe3, 0x34, 0x81, 0xb5, 0x91, 0x64, 0x2b, 0x12, - 0x24, 0x86, 0x9c, 0xae, 0x3c, 0x7f, 0x53, 0x22, 0xd4, 0x94, 0x90, 0x44, - 0x6b, 0x35, 0xd2, 0xce, 0x8e, 0x95, 0xe2, 0xbe, 0x46, 0x50, 0x3f, 0x3d, - 0xc3, 0xcd, 0xef, 0x47, 0x99, 0xb5, 0xf2, 0xd4, 0x6f, 0xf4, 0xfa, 0xa2, - 0xfc, 0x1e, 0xe3, 0x99, 0x49, 0xfd, 0x1a, 0x6e, 0x0d, 0xb5, 0xf1, 0xc8, - 0x05, 0x22, 0x29, 0xca, 0x03, 0xb8, 0x15, 0x3b, 0x01, 0x8a, 0x95, 0x74, - 0x48, 0x93, 0x61, 0x35, 0xde, 0xeb, 0xa9, 0xc4, 0x56, 0xa9, 0xd7, 0xde, - 0x4b, 0xe5, 0x4b, 0xa1, 0x42, 0x6a, 0x5f, 0xe3, 0xb2, 0xc7, 0xda, 0xfb, - 0xc7, 0x70, 0x64, 0xe0, 0x68, 0x19, 0xc6, 0x11, 0x77, 0x2b, 0x5f, 0xba, - 0x1d, 0x58, 0x77, 0x98, 0x2c, 0x91, 0xb4, 0xd2, 0xea, 0x1b, 0xdc, 0xe8, - 0xfa, 0x82, 0xf3, 0x6e, 0xac, 0x88, 0x15, 0x16, 0x1a, 0x53, 0xb3, 0x01, - 0x94, 0x03, 0x47, 0x20, 0xdb, 0x71, 0xcb, 0x71, 0xe8, 0x62, 0xad, 0x34, - 0x2b, 0xa3, 0xa5, 0xe9, 0xa6, 0x82, 0x0e, 0x16, 0x61, 0xbc, 0x29, 0x6b, - 0xb1, 0x60, 0x67, 0x80, 0x9a, 0x9f, 0xc4, 0x82, 0xf6, 0xb0, 0x7a, 0x16, - 0x9c, 0x25, 0x04, 0xeb, 0xfd, 0xe0, 0x18, 0xd3, 0xfc, 0xeb, 0xe1, 0x3c, - 0x2b, 0x29, 0x7b, 0x32, 0x4e, 0xd3, 0x6d, 0xe1, 0x27, 0xda, 0xc9, 0x14, - 0x5c, 0x7f, 0xfa, 0x70, 0x41, 0x8e, 0xb4, 0xa3, 0xde, 0x36, 0x92, 0x67, - 0x97, 0xe2, 0xec, 0x85, 0x8b, 0x76, 0x08, 0x3c, 0x32, 0x58, 0xd4, 0x7f, - 0x6f, 0x91, 0x03, 0xdb, 0x19, 0x3e, 0xc4, 0x8b, 0x3c, 0xb7, 0x75, 0x90, - 0x71, 0x7a, 0x21, 0x9d, 0xa7, 0x77, 0xbf, 0xf5, 0x92, 0x57, 0x46, 0x07, - 0xa7, 0xbb, 0x0c, 0x42, 0xca, 0x4f, 0x5a, 0x27, 0x45, 0x69, 0xfe, 0x6d, - 0x78, 0x43, 0x77, 0xc4, 0xb4, 0x43, 0xff, 0x37, 0x0d, 0xb7, 0xfa, 0xe9, - 0x9e, 0x06, 0x70, 0x53, 0xfd, 0xf6, 0xa0, 0x28, 0x84, 0x46, 0xcd, 0x61, - 0xa2, 0x95, 0xc4, 0x1e, 0x6a, 0x13, 0xa1, 0x7f, 0xaf, 0xe1, 0x73, 0x85, - 0xb0, 0x53, 0x9c, 0x08, 0xb6, 0x1d, 0x4d, 0xb4, 0x0b, 0xfb, 0x1f, 0x0c, - 0x7b, 0x17, 0x06, 0x73, 0xa7, 0x22, 0x1f, 0xb0, 0xd8, 0x45, 0x6e, 0xe5, - 0xde, 0x48, 0xb7, 0x9f, 0x5a, 0xa8, 0xd1, 0xc3, 0x04, 0xd1, 0x87, 0xec, - 0x15, 0x3e, 0xd1, 0xc7, 0x57, 0x01, 0x46, 0x4b, 0x28, 0xa8, 0x79, 0x5a, - 0x7e, 0x0b, 0x56, 0x56, 0x28, 0xda, 0x35, 0xea, 0x4c, 0x14, 0x81, 0xae, - 0xc0, 0x0d, 0x12, 0xfe, 0x2d, 0xb7, 0x95, 0x4d, 0xea, 0x78, 0xb6, 0x53, - 0xcf, 0xac, 0x8a, 0xfc, 0xc9, 0x07, 0x9f, 0x93, 0xf0, 0x11, 0x86, 0x13, - 0xe9, 0xca, 0x3d, 0xce, 0xb1, 0xfd, 0x1a, 0x0a, 0x8b, 0x11, 0x82, 0x94, - 0x6a, 0xae, 0xc5, 0x80, 0x6a, 0x3b, 0xa8, 0x7c, 0xb4, 0x53, 0x4e, 0xa9, - 0x04, 0x1a, 0x4f, 0xb0, 0xb9, 0x95, 0x96, 0xa5, 0xfd, 0xce, 0xdc, 0x57, - 0x00, 0x48, 0x16, 0xe2, 0x40, 0xae, 0x04, 0xf5, 0x83, 0x60, 0x23, 0xd9, - 0x8e, 0x59, 0x56, 0x20, 0x50, 0x38, 0xc4, 0xde, 0x88, 0x9f, 0x91, 0x06, - 0xdb, 0x8f, 0x84, 0xa2, 0xaf, 0x61, 0xdd, 0x48, 0x03, 0x4f, 0xc4, 0xb8, - 0xed, 0x12, 0xd2, 0x74, 0x08, 0xb9, 0x51, 0x63, 0xb5, 0xfe, 0x09, 0x7f, - 0x7b, 0x8c, 0x5e, 0xd7, 0x27, 0xe5, 0x79, 0xe6, 0x33, 0x60, 0x54, 0xe1, - 0x21, 0xda, 0xca, 0x8b, 0x81, 0xdf, 0xb6, 0xa7, 0x2e, 0x9d, 0x0f, 0xfc, - 0x05, 0x80, 0x67, 0xcb, 0xc5, 0xdf, 0xc7, 0x13, 0xee, 0xb5, 0x40, 0x8e, - 0xa7, 0x0c, 0xcb, 0xf2, 0x45, 0x15, 0x29, 0xb1, 0xb8, 0x02, 0x23, 0x61, - 0x38, 0xf1, 0x16, 0xa1, 0x0c, 0xa1, 0xc9, 0x40, 0x8c, 0xd0, 0x48, 0x4b, - 0xce, 0x9c, 0x1e, 0x53, 0x40, 0x44, 0xf6, 0x17, 0x16, 0xc6, 0x5c, 0xb0, - 0x2a, 0x29, 0x59, 0x87, 0x67, 0x85, 0xa7, 0x81, 0x84, 0xe9, 0x4f, 0xe5, - 0x4e, 0x13, 0x5a, 0x11, 0xa1, 0x24, 0x62, 0xe9, 0x7a, 0xea, 0x51, 0xaa, - 0x45, 0xf3, 0x1d, 0x2a, 0xaf, 0x01, 0x28, 0x35, 0xda, 0xb4, 0xe7, 0xab, - 0xc1, 0xb9, 0x3c, 0x45, 0xa2, 0x0b, 0x5d, 0x40, 0x09, 0xac, 0x62, 0x16, - 0xd3, 0x1f, 0x9f, 0xc7, 0x1a, 0x56, 0xb7, 0x27, 0xd1, 0x1b, 0xe1, 0xb5, - 0x82, 0x9e, 0xe8, 0xd3, 0x5c, 0x0f, 0xe8, 0x87, 0x61, 0xc6, 0x20, 0xb7, - 0x31, 0x3f, 0x0d, 0xb3, 0x0a, 0x5a, 0xce, 0x06, 0xa5, 0xe9, 0xfd, 0xf3, - 0x29, 0x1a, 0xcd, 0x86, 0x0e, 0x31, 0x29, 0xaa, 0xb7, 0x32, 0xf1, 0x10, - 0x4e, 0x92, 0x12, 0x00, 0xc0, 0xac, 0x50, 0x4b, 0x52, 0x59, 0x51, 0x7c, - 0xa8, 0x0c, 0xf7, 0xcb, 0x16, 0x73, 0x7b, 0x90, 0xa8, 0x57, 0x79, 0xb4, - 0x73, 0x53, 0xd7, 0xed, 0xba, 0x46, 0xc5, 0x06, 0x53, 0x02, 0xc7, 0x58, - 0x4c, 0x09, 0x0c, 0xa5, 0x01, 0x13, 0x18, 0x39, 0x4b, 0x4e, 0xc2, 0x0d, - 0xd6, 0xdf, 0xaa, 0x7e, 0x46, 0xba, 0x6e, 0xcc, 0x25, 0x42, 0xd0, 0xb3, - 0x31, 0xdc, 0xdf, 0x7d, 0xf1, 0xc3, 0x73, 0xca, 0x7a, 0xf6, 0xcb, 0x23, - 0x81, 0x8d, 0xbe, 0x0b, 0xf2, 0x79, 0x8d, 0x14, 0xa4, 0xc8, 0x36, 0x18, - 0x49, 0xc8, 0x0d, 0xd7, 0xc9, 0xdd, 0x35, 0xeb, 0xec, 0x52, 0x56, 0xae, - 0xf2, 0xd2, 0x51, 0x91, 0x39, 0xbc, 0xb0, 0x49, 0xb7, 0xf2, 0x1b, 0x64, - 0x83, 0x5a, 0xa6, 0x97, 0xc2, 0x15, 0x95, 0xdc, 0x11, 0xd2, 0x89, 0xc0, - 0x6a, 0xb1, 0x44, 0x43, 0x38, 0xb6, 0x54, 0x0f, 0xdc, 0xcb, 0xed, 0x26, - 0x27, 0xd9, 0x46, 0x56, 0x4e, 0x6a, 0x54, 0x74, 0x0f, 0x45, 0xfc, 0xb6, - 0x93, 0xab, 0x3c, 0xd1, 0x86, 0x51, 0xaf, 0xa9, 0x4a, 0xc0, 0x9c, 0x78, - 0xc1, 0xb1, 0xc7, 0xf1, 0x9c, 0xd1, 0xd0, 0x32, 0x4e, 0x4b, 0x02, 0x36, - 0x68, 0x38, 0x88, 0x56, 0xc0, 0x2b, 0x12, 0x05, 0x3b, 0xb9, 0xf6, 0xa2, - 0x37, 0xe7, 0xbc, 0x81, 0xf9, 0x75, 0x51, 0x27, 0x56, 0x0d, 0x55, 0xd1, - 0x6a, 0xe0, 0xcf, 0x87, 0x0a, 0x44, 0xc6, 0x57, 0xe1, 0x1b, 0xc0, 0x2c, - 0xcf, 0xab, 0x77, 0xe9, 0x14, 0xf5, 0x34, 0x89, 0xfb, 0xc9, 0xf2, 0x87, - 0x5c, 0x75, 0xba, 0x51, 0x9a, 0x49, 0xe9, 0x23, 0x23, 0xf4, 0xc9, 0xd1, - 0x2f, 0x87, 0xf6, 0x75, 0x38, 0x97, 0x48, 0xb8, 0x30, 0x46, 0x1d, 0x46, - 0x65, 0x03, 0x10, 0xcf, 0xfb, 0x36, 0xf2, 0xb1, 0xaf, 0x31, 0x02, 0x7b, - 0x74, 0xfe, 0x9f, 0x8c, 0x73, 0x04, 0xfd, 0xb5, 0xae, 0x2e, 0x27, 0x9c, - 0xd8, 0x73, 0xbc, 0xc3, 0x4a, 0x76, 0x93, 0x66, 0xf6, 0xb7, 0x90, 0xc4, - 0x42, 0x3d, 0xcd, 0xb5, 0xf1, 0x75, 0xbf, 0xb7, 0xdd, 0x8e, 0xb7, 0xcd, - 0x90, 0x35, 0xf5, 0x95, 0x3d, 0xe4, 0x4e, 0xb0, 0x7c, 0x5f, 0xad, 0xff, - 0x75, 0x38, 0xc4, 0xc7, 0xed, 0xec, 0x70, 0xcc, 0x9f, 0xf9, 0x77, 0xa1, - 0x00, 0x2f, 0xf1, 0xa2, 0xc9, 0x74, 0xdc, 0x18, 0x14, 0xd0, 0x2f, 0x86, - 0x66, 0xa7, 0x5b, 0x39, 0x5c, 0xba, 0x0e, 0x77, 0x16, 0x04, 0xc3, 0x02, - 0x42, 0x3b, 0x66, 0x29, 0xee, 0x65, 0x00, 0xd4, 0x22, 0x5a, 0x77, 0x74, - 0xd4, 0xc3, 0xf3, 0x00, 0xdf, 0x6b, 0xc3, 0x15, 0x89, 0x0e, 0xb1, 0xbc, - 0xac, 0xe8, 0x44, 0x2f, 0x80, 0x34, 0x34, 0x8b, 0x0c, 0x48, 0x45, 0xc2, - 0x6a, 0xa3, 0x67, 0xd7, 0x3d, 0x36, 0xf3, 0x3f, 0xe5, 0xf0, 0x5b, 0xe8, - 0xad, 0x41, 0xd5, 0x82, 0xc1, 0x28, 0xab, 0x77, 0xe8, 0x7f, 0xb3, 0xf6, - 0xd2, 0x0c, 0xe4, 0x03, 0xcf, 0xe4, 0x72, 0xdb, 0x7b, 0x81, 0xf4, 0xf3, - 0x48, 0x74, 0xe1, 0x91, 0xb8, 0xf8, 0x4c, 0x2c, 0x60, 0x99, 0x3e, 0x1e, - 0x4f, 0xaf, 0x12, 0xab, 0x52, 0xef, 0xc7, 0x60, 0xd2, 0xfe, 0x62, 0x55, - 0xc8, 0x18, 0xad, 0x60, 0xa7, 0x5d, 0xde, 0x4d, 0xfc, 0x6d, 0xe1, 0x10, - 0x7c, 0xf9, 0xa2, 0x64, 0x00, 0x16, 0x1f, 0x44, 0x7c, 0xe2, 0x72, 0x37, - 0xd9, 0x92, 0xad, 0xfc, 0x62, 0x53, 0xbe, 0xb6, 0xe0, 0xc8, 0xe0, 0xa2, - 0xef, 0x22, 0x4b, 0x70, 0x3a, 0x4f, 0xc9, 0xed, 0x6b, 0xbc, 0x17, 0x0a, - 0xcf, 0x6a, 0x2c, 0xd3, 0xd2, 0x6b, 0x02, 0x45, 0xfa, 0x9e, 0xc2, 0x21, - 0x28, 0xfc, 0x07, 0x68, 0xd6, 0xb8, 0x9f, 0x2a, 0x0b, 0x7a, 0x0e, 0xbc, - 0x4e, 0xee, 0x84, 0x38, 0xe4, 0x8e, 0x70, 0xc3, 0xc4, 0xad, 0x74, 0x87, - 0x2d, 0x16, 0x4f, 0xa1, 0xf8, 0x20, 0xf5, 0xde, 0xa3, 0xc5, 0x0c, 0x3b, - 0xde, 0x44, 0x48, 0x0f, 0x3c, 0xdc, 0x7e, 0x10, 0x8b, 0x87, 0xc4, 0x3b, - 0xb0, 0x95, 0xbf, 0x61, 0x1e, 0xad, 0x07, 0x52, 0xfd, 0x0b, 0x84, 0xa9, - 0x46, 0xb0, 0x32, 0xd5, 0x22, 0x80, 0x35, 0x26, 0x41, 0xf8, 0x11, 0x72, - 0xb1, 0x31, 0x6f, 0x5a, 0x75, 0xcc, 0x67, 0xe0, 0xb2, 0x50, 0x89, 0xb2, - 0x66, 0x6e, 0xee, 0xa0, 0x41, 0x8d, 0x00, 0x2a, 0xa7, 0x9d, 0xa5, 0x11, - 0x2b, 0x07, 0x95, 0x3a, 0x55, 0x8c, 0x67, 0xb1, 0xe5, 0x2d, 0xd4, 0xd1, - 0x3e, 0x29, 0xed, 0xa5, 0x59, 0x97, 0x7b, 0xdf, 0x92, 0x10, 0x0b, 0x04, - 0x89, 0x27, 0xa0, 0xa2, 0x93, 0x18, 0x7f, 0x47, 0x84, 0x1c, 0xc6, 0xd6, - 0x8f, 0x73, 0x81, 0xa0, 0xfa, 0xe5, 0x3e, 0xd8, 0xbf, 0x56, 0x1a, 0x76, - 0xf4, 0xc4, 0x0f, 0x7a, 0x29, 0x9d, 0x32, 0x5d, 0x41, 0xe0, 0x07, 0xb9, - 0xd3, 0x3f, 0x7e, 0xff, 0x90, 0x89, 0xce, 0xdc, 0xf1, 0x1d, 0x54, 0xb6, - 0x67, 0x7f, 0x4d, 0x71, 0x9a, 0x4a, 0x5f, 0x80, 0x0d, 0x5c, 0x77, 0xd5, - 0x50, 0x7c, 0x41, 0x56, 0x7e, 0x99, 0x0a, 0xeb, 0x66, 0x1f, 0xd2, 0x55, - 0xc3, 0xc6, 0x6c, 0xc5, 0xfc, 0x34, 0x40, 0x2c, 0x05, 0x29, 0x05, 0x7c, - 0xca, 0xe6, 0x8d, 0xd3, 0xb0, 0xca, 0x84, 0x27, 0x50, 0x7c, 0x6b, 0x17, - 0x1b, 0x22, 0xe4, 0x7f, 0xe6, 0x44, 0x94, 0x06, 0x4b, 0xb3, 0xb7, 0xbb, - 0x98, 0x81, 0x44, 0x0b, 0xf5, 0x66, 0xcb, 0xad, 0xf2, 0x9a, 0xe1, 0x47, - 0xf3, 0x97, 0xa9, 0xb2, 0xc2, 0xca, 0xcd, 0x98, 0x78, 0x60, 0xdc, 0x6e, - 0x87, 0x55, 0x47, 0xf3, 0xae, 0x84, 0xdd, 0x9a, 0xe9, 0x1a, 0x63, 0x83, - 0xea, 0x23, 0x09, 0x67, 0x34, 0x83, 0x00, 0x6e, 0x5e, 0x58, 0xb8, 0x89, - 0x04, 0x08, 0x0a, 0x55, 0x9e, 0x78, 0xc9, 0xff, 0xb9, 0xb5, 0x2c, 0xdd, - 0x3b, 0x0c, 0x58, 0x07, 0x8b, 0xb4, 0x6a, 0xc4, 0x64, 0xa3, 0x5e, 0x5b, - 0xfe, 0x4d, 0xd0, 0x74, 0x01, 0x1b, 0xdf, 0x10, 0x45, 0x2b, 0xd6, 0x9e, - 0xa9, 0x60, 0x1f, 0xad, 0x46, 0xa1, 0x8c, 0xf8, 0xf6, 0xa9, 0x8a, 0x27, - 0xea, 0x51, 0x37, 0x84, 0xcf, 0xe5, 0xd7, 0x51, 0xd6, 0x40, 0x39, 0x39, - 0x5f, 0xf6, 0x96, 0x33, 0xd9, 0x86, 0x8d, 0x38, 0xb6, 0x26, 0x04, 0x14, - 0x07, 0x46, 0x3e, 0xd0, 0xc5, 0xf6, 0x0d, 0xa0, 0x47, 0x2b, 0xc8, 0x73, - 0x18, 0x6b, 0xd3, 0x0e, 0x18, 0xcc, 0x43, 0x98, 0xd0, 0xcf, 0x1c, 0xe4, - 0x4a, 0x41, 0x6a, 0x56, 0x2d, 0xf0, 0x93, 0x89, 0x81, 0x6c, 0xce, 0x04, - 0x1a, 0x23, 0x05, 0x91, 0x4f, 0x48, 0x44, 0x3a, 0xaa, 0x03, 0xa5, 0x4a, - 0xa9, 0x20, 0x2c, 0xbe, 0x6a, 0x81, 0xe6, 0xa9, 0xf8, 0xf0, 0x2b, 0x29, - 0xa1, 0xe0, 0xc4, 0xce, 0xf5, 0xda, 0x25, 0x70, 0x49, 0xcc, 0xa0, 0x4b, - 0x24, 0x49, 0x4f, 0x11, 0xc4, 0x3b, 0x22, 0x89, 0x9a, 0xb4, 0xf4, 0xcd, - 0xa3, 0xee, 0xb0, 0x76, 0x13, 0xc4, 0xbb, 0xaf, 0x03, 0x7f, 0x27, 0xf3, - 0x38, 0xbc, 0xde, 0x7c, 0x0c, 0x39, 0x14, 0xb7, 0x14, 0xbb, 0x5c, 0xae, - 0x89, 0xf8, 0xf7, 0xd6, 0x00, 0x78, 0xf4, 0xb0, 0x52, 0x16, 0xf5, 0x54, - 0xc5, 0x93, 0xf7, 0x6d, 0x0d, 0xe8, 0x58, 0xe2, 0xa1, 0xa7, 0xdc, 0x49, - 0xdb, 0xc8, 0x79, 0xbc, 0xc3, 0x97, 0x7b, 0x6c, 0x82, 0x7b, 0xbe, 0xe9, - 0x79, 0xac, 0x4a, 0xa4, 0x7c, 0x49, 0x83, 0x58, 0x3a, 0xe4, 0xf5, 0x68, - 0x5c, 0xb7, 0x7f, 0x2d, 0xfe, 0x6b, 0x96, 0xc7, 0x8b, 0x67, 0xb5, 0xd0, - 0xa1, 0x0a, 0x16, 0x62, 0x64, 0x53, 0xea, 0x29, 0x80, 0x93, 0xf9, 0xd6, - 0xa0, 0xc5, 0x1b, 0x3a, 0x1e, 0xab, 0x51, 0x88, 0xe0, 0x9e, 0xd4, 0xf6, - 0xbf, 0x70, 0x2d, 0x29, 0x2e, 0x08, 0xa9, 0x31, 0x78, 0x0a, 0x15, 0x30, - 0x9f, 0x2e, 0xc8, 0x41, 0x65, 0x8e, 0x97, 0x51, 0x5e, 0x73, 0x46, 0x42, - 0x74, 0x84, 0xfd, 0x9b, 0x4a, 0x8a, 0x68, 0x28, 0x45, 0xd0, 0x5d, 0x65, - 0x08, 0xb3, 0xf5, 0x40, 0x8a, 0x29, 0x8e, 0x70, 0x02, 0x49, 0x6a, 0x01, - 0xd6, 0x41, 0x4a, 0xf8, 0x15, 0xa3, 0x70, 0x59, 0xe9, 0xa2, 0xe2, 0x76, - 0x8c, 0x60, 0x33, 0xb3, 0xfa, 0x8b, 0xb4, 0x90, 0x6f, 0x92, 0xc8, 0x21, - 0x59, 0xc0, 0x3a, 0x30, 0x46, 0xeb, 0x49, 0xd8, 0x85, 0x63, 0x5a, 0x23, - 0x87, 0xe1, 0xa7, 0xc0, 0x1a, 0xb0, 0xc7, 0xc4, 0x40, 0x4d, 0x11, 0x9c, - 0xe3, 0xd4, 0x6b, 0xef, 0x68, 0xc8, 0x2c, 0x31, 0xcd, 0x3e, 0xee, 0x55, - 0x10, 0x67, 0x77, 0x7b, 0x30, 0xc1, 0xd0, 0x23, 0x6c, 0x65, 0x6f, 0xfb, - 0x2e, 0x62, 0x33, 0x42, 0x63, 0xdc, 0xca, 0x86, 0xf1, 0x0e, 0xb3, 0xb0, - 0x69, 0x11, 0x65, 0xe1, 0x6e, 0x6c, 0x03, 0x49, 0x79, 0xe8, 0xf1, 0x2e, - 0x8d, 0x94, 0xc8, 0xa8, 0x98, 0x2d, 0x3f, 0xfe, 0xbd, 0x2d, 0x75, 0x45, - 0xd1, 0x7a, 0x09, 0xf8, 0x90, 0x49, 0xbd, 0x4a, 0x3b, 0xa4, 0xa3, 0x26, - 0xb8, 0x62, 0x66, 0x97, 0xd9, 0xc1, 0xca, 0x12, 0x49, 0xe1, 0x27, 0x93, - 0x4f, 0x60, 0xfa, 0xb3, 0x4f, 0x4c, 0xdb, 0x87, 0x6c, 0x3b, 0x50, 0x47, - 0xe2, 0xd8, 0x5b, 0x13, 0x99, 0xf0, 0x2b, 0xbb, 0x32, 0x33, 0xfd, 0x7d, - 0x15, 0x0f, 0x2c, 0xee, 0x85, 0x83, 0xc0, 0x53, 0x79, 0x3e, 0x51, 0xfe, - 0x7c, 0x06, 0x73, 0x49, 0x49, 0x4f, 0x5a, 0x22, 0x36, 0x8f, 0x30, 0x8a, - 0xef, 0x84, 0xd6, 0x15, 0x26, 0x48, 0xe7, 0x1e, 0xb1, 0xaa, 0x82, 0xd0, - 0xc7, 0x0b, 0x97, 0x7b, 0x6c, 0x2d, 0x49, 0x7e, 0x6d, 0xe7, 0xa3, 0x05, - 0x80, 0xd7, 0x42, 0xa9, 0xc6, 0x66, 0x98, 0x30, 0xe3, 0x8a, 0x79, 0x86, - 0x9c, 0x2b, 0xbc, 0x4a, 0xe6, 0x0d, 0xc5, 0xe5, 0x1a, 0x92, 0xd9, 0xef, - 0x63, 0x52, 0x03, 0x88, 0x36, 0xc5, 0x83, 0x65, 0xf8, 0xf1, 0x87, 0xce, - 0x43, 0xfe, 0x89, 0x58, 0x07, 0x6a, 0xad, 0x85, 0x37, 0x0f, 0xdf, 0x9e, - 0xa5, 0x62, 0xa9, 0xd2, 0x41, 0x3f, 0x7f, 0xb7, 0xf1, 0xe2, 0x58, 0xb5, - 0xda, 0xdf, 0xd1, 0xba, 0x36, 0x2c, 0xe7, 0x43, 0x31, 0x07, 0xc5, 0xf5, - 0x79, 0xc9, 0x31, 0xd7, 0x1d, 0x97, 0x57, 0x9a, 0x8e, 0x3f, 0xac, 0x00, - 0x49, 0x00, 0x2f, 0xad, 0xac, 0xe7, 0x65, 0x7c, 0xbf, 0xec, 0x85, 0x57, - 0xe6, 0xcc, 0x07, 0x34, 0x02, 0x36, 0xa8, 0x6a, 0x9f, 0x3a, 0x9a, 0x2f, - 0x34, 0x93, 0x1f, 0x7d, 0x38, 0x54, 0xe3, 0x54, 0x54, 0xee, 0x84, 0x55, - 0xe1, 0x0d, 0xc1, 0x08, 0x3e, 0x33, 0x9e, 0x2a, 0xc3, 0x6a, 0x83, 0xc4, - 0x75, 0xed, 0xbc, 0x5f, 0xd9, 0x04, 0xd7, 0x77, 0x91, 0xb1, 0xa0, 0xf2, - 0xef, 0x81, 0xb0, 0x8b, 0x53, 0x5f, 0x71, 0xec, 0xa5, 0x0b, 0xbe, 0xf2, - 0x92, 0x7e, 0x0a, 0x34, 0xeb, 0x5d, 0x65, 0xc7, 0xa9, 0x44, 0x10, 0xfb, - 0xd3, 0xef, 0xe1, 0xbc, 0x06, 0x65, 0x68, 0x22, 0xfb, 0x43, 0x2c, 0xcf, - 0x8e, 0x6a, 0x28, 0xdb, 0x0b, 0xf4, 0xaf, 0x01, 0x65, 0x97, 0xd6, 0xe5, - 0x91, 0x20, 0x13, 0x2c, 0xb1, 0xc2, 0xd3, 0xc3, 0x76, 0x90, 0xf8, 0xcd, - 0x00, 0xde, 0x93, 0xf8, 0x4e, 0xcc, 0xdc, 0xca, 0x9a, 0xf0, 0xbd, 0x9b, - 0xd6, 0x57, 0xb1, 0x13, 0xd9, 0xe0, 0xe1, 0x9e, 0x21, 0x74, 0xa9, 0x76, - 0xc0, 0x0c, 0xad, 0x4f, 0x5d, 0xfe, 0x23, 0x32, 0x5a, 0x10, 0x75, 0x5b, - 0x05, 0xdf, 0xdc, 0x5b, 0x94, 0xcb, 0xe1, 0x9f, 0x13, 0x51, 0xf5, 0x50, - 0x36, 0x3b, 0xf2, 0x90, 0x9c, 0x9a, 0xc8, 0x10, 0x88, 0xa9, 0xec, 0x22, - 0x1e, 0x96, 0x70, 0xe8, 0x9e, 0x69, 0xc1, 0x22, 0xd9, 0x14, 0x15, 0x2e, - 0xbc, 0x03, 0x96, 0x9e, 0x1d, 0x00, 0x10, 0x16, 0x4f, 0x56, 0xf0, 0x29, - 0x47, 0x0a, 0x45, 0x34, 0x27, 0x21, 0x3b, 0x67, 0x33, 0xf9, 0xdd, 0x29, - 0x3a, 0xf2, 0xe4, 0x56, 0x34, 0x46, 0xbe, 0xd8, 0x42, 0x29, 0x11, 0x7f, - 0x30, 0xc1, 0xbe, 0xa5, 0xc8, 0x9d, 0x7b, 0x2e, 0x4e, 0xcf, 0xba, 0x91, - 0xb4, 0xbf, 0x0a, 0x04, 0x00, 0x49, 0x83, 0x6b, 0x46, 0x5f, 0x3b, 0xfa, - 0xf7, 0x40, 0x8d, 0x85, 0x47, 0x14, 0x58, 0xb3, 0xa5, 0x66, 0x30, 0xfd, - 0x4a, 0x80, 0xa4, 0x61, 0x3b, 0x7c, 0xb4, 0xcc, 0x34, 0x8c, 0xc6, 0xb6, - 0x10, 0xa9, 0x76, 0xc9, 0x11, 0xd7, 0x8a, 0x51, 0x86, 0x17, 0x89, 0x28, - 0xab, 0xd5, 0x03, 0x88, 0x74, 0x5b, 0x81, 0xbd, 0x3a, 0x57, 0xfe, 0x66, - 0x25, 0xd0, 0x92, 0x15, 0x84, 0x02, 0x0f, 0x51, 0xa8, 0x58, 0xcf, 0x77, - 0x65, 0x10, 0x61, 0xe8, 0xe6, 0xab, 0xb1, 0xba, 0x3b, 0x08, 0xd6, 0xba, - 0x5f, 0xf5, 0x74, 0xc5, 0x07, 0x60, 0xfd, 0xd3, 0xc8, 0x52, 0x4e, 0xdb, - 0xc3, 0xe3, 0x6d, 0x81, 0x20, 0x51, 0x01, 0x9a, 0x5e, 0x32, 0x4e, 0x80, - 0x5a, 0xcb, 0x83, 0xd7, 0xa4, 0xd9, 0xfb, 0xed, 0x3d, 0x80, 0xa1, 0x83, - 0x81, 0x91, 0xc0, 0x0b, 0xff, 0x67, 0xd8, 0x8b, 0xd0, 0x12, 0x0b, 0xd4, - 0x2b, 0x8e, 0x0d, 0x0f, 0xfc, 0xc7, 0xb3, 0xf1, 0xe3, 0xf3, 0x5e, 0x0c, - 0xb6, 0x6b, 0x9d, 0xdc, 0x22, 0x70, 0x31, 0x54, 0xe8, 0x41, 0xfe, 0xa1, - 0xe1, 0x4f, 0xfa, 0x81, 0xfb, 0xae, 0x72, 0x16, 0xb8, 0x87, 0xc9, 0x31, - 0x9d, 0x42, 0x47, 0x4a, 0x20, 0xae, 0x63, 0x16, 0x0d, 0xfa, 0xf1, 0x27, - 0x19, 0x47, 0xee, 0x45, 0x84, 0x29, 0x9a, 0xb6, 0x42, 0xef, 0xbd, 0x15, - 0xa8, 0x34, 0x33, 0x38, 0x9c, 0x9d, 0xbb, 0x5c, 0x03, 0xf3, 0xcf, 0xcf, - 0x6d, 0x2e, 0xd5, 0x88, 0xf8, 0xdd, 0xfc, 0xc0, 0x4a, 0xdb, 0x69, 0xd9, - 0x62, 0x89, 0x24, 0x46, 0xee, 0xa4, 0xb9, 0x95, 0xe6, 0xaf, 0x7d, 0x53, - 0xec, 0x41, 0xae, 0x70, 0xfe, 0x4f, 0x31, 0xe3, 0xa2, 0x59, 0x2c, 0xa1, - 0x53, 0x8b, 0xb6, 0x3b, 0x39, 0xc1, 0xa4, 0xa7, 0x9e, 0xaa, 0x00, 0x60, - 0x9a, 0x5f, 0x56, 0x51, 0xf3, 0x7b, 0x28, 0x84, 0x36, 0x1a, 0xc1, 0x2d, - 0xc8, 0xed, 0xf8, 0x48, 0x48, 0x1d, 0x39, 0x4d, 0x3d, 0xce, 0x30, 0x90, - 0x29, 0x33, 0x6f, 0x9a, 0xce, 0x58, 0xe7, 0x88, 0xac, 0x59, 0xce, 0x85, - 0x5a, 0x52, 0x2b, 0x6c, 0xb7, 0xe9, 0x2e, 0xa9, 0xd9, 0x9a, 0xea, 0x1c, - 0x47, 0xb2, 0x59, 0xff, 0x73, 0x76, 0x21, 0x40, 0xe1, 0xde, 0x32, 0xb8, - 0x73, 0x3d, 0xa5, 0x44, 0x66, 0x79, 0xa1, 0xfe, 0xaf, 0xf6, 0x8a, 0x97, - 0x09, 0x5c, 0x8b, 0x64, 0x38, 0x9f, 0xe1, 0x59, 0x38, 0x18, 0xe9, 0xc0, - 0xd6, 0xa2, 0xac, 0x74, 0xa9, 0xfd, 0x4a, 0x0d, 0xf6, 0x47, 0x00, 0x2b, - 0x09, 0x46, 0x38, 0x1c, 0xa4, 0x9f, 0x63, 0x20, 0x18, 0x75, 0x5a, 0xb8, - 0xc4, 0xbc, 0xd6, 0x6b, 0xc8, 0x14, 0x72, 0x03, 0xe4, 0x05, 0xd4, 0x4e, - 0x66, 0x20, 0x42, 0xa2, 0x8f, 0x96, 0xe7, 0xaf, 0xd3, 0xfb, 0xa8, 0x88, - 0x9b, 0xe3, 0xaa, 0xcd, 0xab, 0xce, 0x8f, 0x07, 0x6d, 0xef, 0x98, 0xce, - 0xdb, 0x42, 0x5b, 0xf4, 0x61, 0x57, 0x62, 0x27, 0x8a, 0x53, 0x5e, 0xf8, - 0x3e, 0xf6, 0x7f, 0xde, 0x5e, 0x3b, 0x1b, 0x13, 0x2e, 0x30, 0x46, 0x4b, - 0x6b, 0xb7, 0xbb, 0x33, 0x31, 0xc0, 0xfa, 0x40, 0xab, 0x68, 0x72, 0xe3, - 0x92, 0x30, 0x47, 0xd6, 0x30, 0x60, 0x42, 0x5b, 0x88, 0x8d, 0xa6, 0x56, - 0xe4, 0xac, 0x33, 0x2e, 0xca, 0x05, 0x1f, 0x60, 0xaf, 0xde, 0x7f, 0xa9, - 0xda, 0x3f, 0xa8, 0x21, 0xf6, 0xfc, 0x98, 0x7d, 0xc4, 0x1e, 0xb0, 0xa9, - 0x56, 0x2d, 0x8d, 0xea, 0x03, 0x51, 0x48, 0xac, 0xe8, 0x22, 0xc7, 0x8b, - 0xef, 0x91, 0x0e, 0xcf, 0x0c, 0xe9, 0x38, 0x43, 0x99, 0xa8, 0x98, 0x4f, - 0xfa, 0xe3, 0x03, 0xa6, 0x4f, 0xd4, 0x0d, 0x98, 0x5b, 0x50, 0x28, 0xd7, - 0xe7, 0x46, 0xd7, 0xad, 0x43, 0xb8, 0x56, 0x2a, 0x2f, 0x7c, 0x39, 0x67, - 0xf4, 0x62, 0x0e, 0xc0, 0xa8, 0x87, 0xb5, 0x81, 0xe2, 0x13, 0x9f, 0xe4, - 0xdd, 0x72, 0xf2, 0x07, 0xca, 0xac, 0x6d, 0xb2, 0x96, 0x53, 0x5a, 0x8f, - 0x66, 0x3c, 0xb4, 0xc1, 0x4f, 0x9a, 0x82, 0x55, 0xcf, 0x0e, 0x27, 0x5f, - 0xc7, 0xd2, 0x28, 0x27, 0x7f, 0x22, 0x6e, 0xa5, 0xe7, 0x32, 0x56, 0x51, - 0x18, 0xe0, 0x85, 0x6d, 0x1f, 0xfc, 0x25, 0x08, 0x18, 0x60, 0x57, 0xfc, - 0x66, 0x94, 0x2c, 0x4c, 0xbe, 0x00, 0xab, 0x9e, 0x73, 0x9b, 0x06, 0xd3, - 0xb5, 0x24, 0xa8, 0x8f, 0xb1, 0x33, 0x99, 0x4c, 0xb4, 0x13, 0x07, 0xcd, - 0x04, 0xdd, 0x77, 0xdc, 0xee, 0x96, 0x02, 0x59, 0xe8, 0x22, 0x07, 0x16, - 0x2e, 0x41, 0xc9, 0xc4, 0x59, 0x70, 0x37, 0x0f, 0x14, 0xc9, 0xcf, 0x90, - 0x57, 0xc2, 0x0d, 0xa3, 0xd7, 0x66, 0xb6, 0x7d, 0x10, 0xd4, 0xfc, 0x18, - 0x66, 0xad, 0xea, 0x5e, 0x64, 0x6c, 0x12, 0x66, 0x3d, 0x96, 0xa5, 0xa8, - 0x9c, 0x49, 0x5c, 0xd4, 0x8d, 0x1c, 0xc3, 0x38, 0xfe, 0x53, 0xc2, 0x71, - 0xd1, 0xc6, 0x41, 0xe2, 0xb9, 0x17, 0x74, 0x6e, 0xcc, 0xf8, 0x72, 0x28, - 0x38, 0x4e, 0x54, 0x9b, 0x0e, 0xa3, 0x3a, 0x43, 0x5c, 0xd5, 0x83, 0x06, - 0xbb, 0x46, 0x16, 0x6e, 0xe3, 0x8a, 0xd5, 0x1e, 0x7f, 0x88, 0x62, 0xac, - 0x35, 0x89, 0xfb, 0xbe, 0x96, 0x1d, 0x87, 0x37, 0xb7, 0x91, 0x63, 0xae, - 0x77, 0x7b, 0x66, 0x60, 0xc1, 0x3e, 0x80, 0x56, 0xb1, 0xc8, 0x0d, 0x16, - 0xde, 0x38, 0x82, 0x66, 0x99, 0x2b, 0x35, 0xd8, 0xb4, 0x5b, 0x4b, 0x3e, - 0x93, 0x96, 0x59, 0xf8, 0x96, 0x7e, 0x7b, 0x27, 0xf4, 0x62, 0xb7, 0xda, - 0x89, 0xa7, 0x34, 0x47, 0xed, 0xb3, 0x42, 0x20, 0xeb, 0xcd, 0xf6, 0xa3, - 0x9f, 0xf7, 0x48, 0x91, 0x17, 0xd2, 0x21, 0xed, 0x5a, 0x22, 0x39, 0xc9, - 0x76, 0x95, 0x36, 0xd9, 0x97, 0x0f, 0x19, 0xce, 0xd3, 0xbc, 0x74, 0x7d, - 0x53, 0x37, 0x3b, 0x4a, 0x97, 0xb7, 0xf8, 0x7e, 0xdd, 0x4c, 0x5f, 0xae, - 0x5c, 0x0b, 0xab, 0x4c, 0x34, 0xa1, 0x7e, 0x34, 0x35, 0xf4, 0xfc, 0x92, - 0xab, 0x2e, 0x6a, 0x15, 0xce, 0x84, 0xae, 0x70, 0xae, 0x85, 0x21, 0xe6, - 0x41, 0x13, 0x31, 0xe0, 0x8f, 0xab, 0x82, 0xe3, 0x09, 0xaf, 0xa4, 0x7c, - 0xb4, 0xb9, 0xb7, 0xc0, 0x67, 0x08, 0xc9, 0x9d, 0xcd, 0x0b, 0x3c, 0xa0, - 0x0c, 0xde, 0x49, 0x2f, 0x40, 0x19, 0x95, 0x64, 0xb9, 0x7c, 0x2a, 0x72, - 0xdd, 0xa2, 0x92, 0x0a, 0x21, 0xeb, 0x8c, 0xc3, 0x6d, 0x52, 0xe7, 0x05, - 0x50, 0x01, 0x55, 0x19, 0x2f, 0xbd, 0x1b, 0x72, 0x73, 0xfe, 0x82, 0x9f, - 0xbf, 0xa0, 0xfe, 0x19, 0x7c, 0x42, 0x6d, 0x76, 0x32, 0x47, 0x36, 0x15, - 0x2e, 0xde, 0xe8, 0xe6, 0xca, 0x07, 0xa3, 0x6b, 0x40, 0x99, 0x96, 0xcd, - 0x19, 0xea, 0x7e, 0xc9, 0x87, 0x9d, 0x3d, 0xa0, 0x82, 0x88, 0xe7, 0xe4, - 0x34, 0x9f, 0xa5, 0x27, 0xdf, 0xae, 0x03, 0x37, 0xa8, 0x35, 0x64, 0x02, - 0x09, 0x09, 0x9e, 0xec, 0x38, 0x0a, 0xff, 0x79, 0x8c, 0x9a, 0x87, 0x66, - 0xcd, 0xe4, 0xf4, 0x9d, 0xa9, 0x07, 0x96, 0x36, 0xae, 0x2e, 0x4e, 0xc5, - 0xe9, 0x86, 0xb2, 0x8e, 0x71, 0x5d, 0xe8, 0xee, 0x84, 0xf3, 0x30, 0x2a, - 0x58, 0x1a, 0x80, 0xb8, 0xaa, 0xb8, 0x1d, 0xc4, 0xae, 0x59, 0x91, 0xf3, - 0x16, 0x9b, 0xa3, 0x8a, 0xa3, 0x26, 0xb2, 0x0a, 0xe5, 0x58, 0xb7, 0x96, - 0x87, 0xfb, 0x00, 0xe4, 0x50, 0x7c, 0xb1, 0x77, 0x3a, 0x18, 0xc2, 0xe3, - 0xc1, 0x12, 0xa6, 0x0d, 0x06, 0xeb, 0x80, 0x6c, 0x5a, 0xee, 0x34, 0xcc, - 0x1c, 0x87, 0x35, 0x46, 0x1d, 0x05, 0x83, 0xd8, 0x91, 0x22, 0xaa, 0xf6, - 0xad, 0x87, 0xab, 0x76, 0x18, 0x79, 0xe2, 0x09, 0xc3, 0xa3, 0x15, 0x67, - 0x3a, 0x7c, 0x0f, 0xa0, 0x4c, 0x7b, 0xfc, 0xfc, 0xdd, 0x5c, 0xe4, 0x86, - 0x58, 0x13, 0xb8, 0x97, 0xae, 0x8c, 0x75, 0xc8, 0x02, 0x1e, 0x33, 0x45, - 0xa9, 0x54, 0x09, 0x15, 0x53, 0x4f, 0x28, 0x47, 0x4d, 0x5f, 0xd0, 0xc7, - 0x09, 0xbd, 0x93, 0xb0, 0x08, 0x79, 0x05, 0xbc, 0xbc, 0xaf, 0x2c, 0xbd, - 0xbb, 0x21, 0xd1, 0x60, 0xb8, 0x81, 0x4c, 0x6c, 0x5e, 0x45, 0x39, 0xa3, - 0x31, 0x54, 0xb7, 0x82, 0xef, 0x86, 0xe4, 0x5e, 0xca, 0xd6, 0xb8, 0x31, - 0xa2, 0x4c, 0x84, 0x5b, 0xac, 0xe5, 0x29, 0xbf, 0xbf, 0x89, 0xb4, 0x4c, - 0xd3, 0x69, 0x66, 0x50, 0xeb, 0xda, 0x7d, 0x00, 0xbb, 0x45, 0x0f, 0xe1, - 0xd1, 0x30, 0x1a, 0xc6, 0x94, 0x66, 0xdc, 0x01, 0x75, 0xce, 0xf8, 0xfc, - 0xd9, 0xce, 0xcf, 0x1f, 0x9e, 0x5a, 0x55, 0xa4, 0x3e, 0xe6, 0x51, 0xc7, - 0x74, 0x40, 0x82, 0x09, 0xea, 0xa0, 0xf5, 0xb2, 0x70, 0x9f, 0x0e, 0xfb, - 0x46, 0x8a, 0x69, 0xbf, 0x07, 0x92, 0xdc, 0x74, 0x03, 0x70, 0xc6, 0x44, - 0x81, 0x66, 0x40, 0xc7, 0xf5, 0xb8, 0xf0, 0x45, 0x0f, 0xca, 0xd8, 0xb0, - 0x9e, 0x48, 0x94, 0xff, 0x85, 0xcb, 0x7b, 0xec, 0x67, 0x5d, 0xfe, 0xe9, - 0x13, 0xd1, 0x67, 0x95, 0xd9, 0x35, 0x9e, 0x8a, 0x53, 0x4d, 0x6b, 0x9d, - 0x42, 0x53, 0xb1, 0x6b, 0x51, 0x1e, 0x35, 0x40, 0x81, 0x92, 0x91, 0x5f, - 0x1f, 0x8e, 0xbe, 0x37, 0xd3, 0x85, 0xab, 0x85, 0x37, 0x1c, 0x0f, 0xae, - 0xd9, 0xf7, 0xa2, 0x75, 0x3d, 0xd9, 0xd7, 0x2a, 0x80, 0xb0, 0x4c, 0x14, - 0x04, 0x40, 0xc5, 0xba, 0x0e, 0xbe, 0xab, 0xcc, 0x38, 0x35, 0x62, 0x6c, - 0xa5, 0xce, 0x49, 0x15, 0x2a, 0x10, 0xb5, 0x6a, 0xd2, 0x3b, 0xd2, 0x6a, - 0xad, 0x2e, 0x34, 0x46, 0x8b, 0x78, 0x57, 0x6e, 0xc4, 0xde, 0x65, 0x68, - 0x05, 0x8f, 0xd6, 0x6e, 0x34, 0xb9, 0xaa, 0x80, 0x77, 0xff, 0x6c, 0x1a, - 0x37, 0x87, 0xdd, 0x33, 0x13, 0x33, 0xa7, 0xa9, 0x3a, 0x90, 0x32, 0x7b, - 0x9b, 0x21, 0x31, 0xc8, 0xf5, 0x4c, 0xa6, 0x73, 0x42, 0x79, 0x46, 0x14, - 0x1b, 0xef, 0xf4, 0x78, 0xd9, 0x7e, 0x6f, 0x31, 0xaa, 0x59, 0x97, 0x34, - 0xe5, 0xe6, 0x67, 0xf3, 0x86, 0xf5, 0x61, 0xe7, 0x51, 0x6d, 0xce, 0xb3, - 0xdc, 0x86, 0xc7, 0x55, 0x43, 0xfa, 0x38, 0x78, 0xb0, 0x8d, 0x03, 0x9c, - 0xe4, 0x6c, 0xca, 0x73, 0x94, 0xa1, 0x0c, 0xb8, 0x11, 0xda, 0x0c, 0x0b, - 0x18, 0x1b, 0xd0, 0x99, 0xe7, 0xa9, 0x0d, 0xc3, 0x36, 0xd7, 0x8c, 0x16, - 0xad, 0x16, 0x1f, 0xb2, 0x3c, 0x07, 0x32, 0x11, 0x6c, 0xd2, 0x8f, 0x33, - 0x37, 0x5c, 0x3e, 0x4f, 0x7a, 0x76, 0xf7, 0x85, 0xcc, 0x68, 0x1a, 0xf9, - 0x26, 0x74, 0x42, 0xc9, 0xea, 0x21, 0x7e, 0x74, 0x3c, 0x4f, 0xde, 0xfb, - 0xd7, 0x83, 0x62, 0x12, 0xc7, 0x4f, 0xfc, 0x47, 0x18, 0x9d, 0xc5, 0xf5, - 0xe9, 0xd7, 0xaa, 0x76, 0x20, 0x99, 0x79, 0xae, 0x9b, 0x7a, 0xde, 0x8b, - 0x95, 0xc2, 0xa5, 0xa3, 0x6a, 0x30, 0x9b, 0x99, 0x63, 0x34, 0x7c, 0xd1, - 0x53, 0xa1, 0x6c, 0xd6, 0xed, 0x7d, 0x8c, 0xba, 0xc8, 0x21, 0xf3, 0xe1, - 0x31, 0x55, 0x3d, 0x88, 0x87, 0x04, 0xc7, 0xc9, 0x65, 0x0c, 0x53, 0x1e, - 0xd4, 0xd9, 0xaa, 0xda, 0xc2, 0x14, 0x88, 0xf2, 0x07, 0x2c, 0x12, 0x4d, - 0x79, 0x54, 0xaa, 0xd9, 0x47, 0x95, 0xf9, 0x7e, 0x26, 0x89, 0x4b, 0x63, - 0x7e, 0x44, 0x06, 0x0e, 0xe2, 0x8d, 0x9a, 0x0a, 0xc3, 0xee, 0x55, 0x13, - 0x55, 0x04, 0xcc, 0xb5, 0x2e, 0xa0, 0x0d, 0xec, 0x76, 0x84, 0xc1, 0x1e, - 0xdd, 0xe6, 0xfa, 0x54, 0x6e, 0x38, 0x30, 0x6f, 0xcc, 0xa4, 0x8d, 0x76, - 0x1e, 0xa3, 0x8e, 0x2c, 0x5e, 0x37, 0xeb, 0x0b, 0xf4, 0xb5, 0x80, 0xde, - 0x58, 0x13, 0x5a, 0x52, 0xdc, 0x65, 0x99, 0x1a, 0x1b, 0x75, 0x0c, 0xbd, - 0x83, 0xe8, 0x90, 0x8e, 0xa9, 0xbf, 0x42, 0x22, 0xe1, 0x3a, 0x31, 0x4e, - 0x54, 0xad, 0xd4, 0x6f, 0x80, 0xb4, 0xb5, 0x82, 0x05, 0x20, 0xd7, 0x38, - 0xd7, 0xeb, 0x25, 0x33, 0xe9, 0x4b, 0xc3, 0x5e, 0xd1, 0x11, 0xb0, 0xd9, - 0x8e, 0x90, 0x48, 0x2a, 0xe3, 0xa0, 0x60, 0x16, 0x70, 0xe3, 0xd1, 0x45, - 0x11, 0x64, 0x91, 0x69, 0x87, 0x1c, 0xbb, 0x91, 0xc4, 0x43, 0x12, 0x62, - 0x99, 0x69, 0xe5, 0x96, 0x01, 0x15, 0xdb, 0xdf, 0x05, 0x55, 0x34, 0xbb, - 0xd6, 0x76, 0x89, 0xcd, 0xb5, 0x4f, 0x2e, 0xa7, 0x6e, 0x15, 0xc9, 0xc0, - 0x8e, 0xa8, 0x63, 0x79, 0x12, 0xfb, 0x7e, 0x69, 0x8f, 0x52, 0x5e, 0xe7, - 0x76, 0x16, 0x28, 0x76, 0xca, 0xcb, 0xd8, 0x0e, 0x4a, 0x93, 0x9d, 0x16, - 0x68, 0x98, 0xf8, 0xc3, 0x39, 0xb2, 0x2d, 0xea, 0xba, 0x72, 0x16, 0x33, - 0xb7, 0xec, 0x61, 0x9e, 0x94, 0x32, 0x01, 0x22, 0xde, 0x66, 0xfd, 0x68, - 0xfa, 0xcf, 0xf2, 0x52, 0x4f, 0x02, 0xe8, 0x25, 0xd3, 0xa3, 0x5b, 0x29, - 0xae, 0xe9, 0x62, 0xfa, 0xd6, 0x1a, 0x50, 0x80, 0x95, 0x96, 0xdf, 0x00, - 0xfc, 0x23, 0xf1, 0x95, 0xef, 0xbb, 0xf5, 0x23, 0x9d, 0x6b, 0xd6, 0xed, - 0xb4, 0xe2, 0x4a, 0xf6, 0xb8, 0x20, 0x83, 0x6b, 0x45, 0x92, 0x29, 0x5a, - 0x02, 0xe9, 0xf7, 0x8e, 0x5c, 0x02, 0xde, 0xb4, 0x9a, 0xdf, 0x18, 0x10, - 0x17, 0x7f, 0xd8, 0x2e, 0x17, 0xc0, 0xf0, 0x6b, 0x3b, 0x88, 0x09, 0x58, - 0xf2, 0x18, 0x22, 0x09, 0x80, 0x4a, 0xe0, 0x51, 0x6f, 0x7a, 0x70, 0x09, - 0x1f, 0xe5, 0xfa, 0xa9, 0x4d, 0x24, 0x1f, 0x18, 0x1c, 0x74, 0xcd, 0x87, - 0x04, 0xfd, 0x85, 0x33, 0x4c, 0x28, 0xbd, 0xa3, 0x66, 0x6c, 0x99, 0x7e, - 0x50, 0x5e, 0xb5, 0x22, 0x33, 0x92, 0xd4, 0xd8, 0x82, 0x4e, 0x38, 0xbe, - 0xcb, 0x3d, 0x5f, 0x19, 0xd1, 0x0f, 0x8b, 0xa1, 0x78, 0x08, 0x1c, 0x10, - 0x0b, 0x77, 0xa7, 0x39, 0x2e, 0x91, 0x83, 0xee, 0x1d, 0x36, 0xd8, 0x77, - 0x87, 0x8a, 0x38, 0x45, 0x3c, 0xbd, 0xb9, 0x88, 0xbb, 0x1b, 0x20, 0xd1, - 0x95, 0xb9, 0x8f, 0x03, 0x46, 0xfa, 0xab, 0x70, 0x68, 0x26, 0xd9, 0xb1, - 0x25, 0x52, 0x5a, 0x77, 0x2d, 0x92, 0xc2, 0x1d, 0xb6, 0x6e, 0xec, 0x67, - 0xef, 0x34, 0xe2, 0x64, 0xb3, 0xa0, 0xae, 0x0c, 0xd9, 0x36, 0xa1, 0xc7, - 0xd8, 0xbf, 0x7a, 0x43, 0xbf, 0xc0, 0xc6, 0x90, 0x60, 0x6a, 0x23, 0xc0, - 0x6a, 0x5d, 0x62, 0x18, 0xac, 0xc1, 0x20, 0x35, 0x17, 0xba, 0x4e, 0x54, - 0xb7, 0xec, 0xd4, 0xad, 0x99, 0x94, 0xa4, 0xda, 0x57, 0xe7, 0x46, 0xed, - 0x47, 0xd1, 0xb4, 0xa2, 0x3e, 0x0f, 0x4a, 0xb6, 0xa6, 0x68, 0x3e, 0x94, - 0xb9, 0x18, 0x30, 0xe0, 0x75, 0x08, 0xe8, 0xf3, 0x21, 0x79, 0x26, 0x68, - 0x6a, 0x65, 0xb6, 0xbe, 0x03, 0x98, 0x8f, 0x04, 0xad, 0x1e, 0xb0, 0x54, - 0xd2, 0x28, 0xdd, 0x4a, 0xe9, 0xf3, 0xa0, 0x06, 0xbf, 0x0b, 0x2a, 0xee, - 0xf8, 0x03, 0x7e, 0x1d, 0x37, 0xc1, 0x32, 0xd1, 0x41, 0xf4, 0x9b, 0xc5, - 0x02, 0x10, 0x6f, 0x55, 0x5a, 0xec, 0x5b, 0xe7, 0x61, 0x05, 0x17, 0xf0, - 0xf8, 0xc6, 0x89, 0xe8, 0xad, 0x32, 0x57, 0x14, 0xe5, 0xf8, 0xf5, 0x88, - 0xd9, 0x73, 0x17, 0x10, 0xa7, 0xc3, 0xf8, 0x78, 0x0b, 0x66, 0xab, 0x63, - 0x4f, 0x96, 0x5d, 0xdf, 0x36, 0x83, 0xc4, 0x6f, 0x20, 0xbd, 0xcb, 0x4c, - 0xd2, 0xfa, 0x35, 0x87, 0xd8, 0xb6, 0xbb, 0xcc, 0xb6, 0xd2, 0x85, 0x03, - 0x6a, 0xea, 0xbb, 0x6d, 0x2f, 0xa2, 0x06, 0xc0, 0xd6, 0x68, 0xd9, 0x7f, - 0xd6, 0xa2, 0x3b, 0x08, 0x6a, 0x98, 0x26, 0x6d, 0x9a, 0x2b, 0x68, 0x51, - 0x78, 0xde, 0xa6, 0x96, 0x50, 0x7b, 0xfc, 0x03, 0x43, 0xf8, 0x21, 0x01, - 0x9d, 0xe2, 0x89, 0x65, 0x47, 0xae, 0x9c, 0x45, 0x5e, 0xa5, 0xce, 0x97, - 0xb3, 0xe6, 0xf6, 0xd4, 0x5a, 0xe8, 0x6b, 0x87, 0xd6, 0xdf, 0xfb, 0x1f, - 0xaf, 0xfb, 0xaf, 0x19, 0xa5, 0xfd, 0xba, 0xe0, 0x22, 0x2f, 0x91, 0x97, - 0xdf, 0xae, 0xe9, 0x39, 0xb1, 0xe4, 0xd3, 0x10, 0xcb, 0xb3, 0x03, 0xb5, - 0x0b, 0xf0, 0xd9, 0x70, 0x1e, 0x9c, 0x63, 0x6f, 0x3a, 0xcf, 0x3c, 0x1b, - 0x86, 0xa3, 0xad, 0x1a, 0xe7, 0x4c, 0x09, 0xd0, 0x80, 0xf6, 0x8b, 0x72, - 0x96, 0x53, 0x7e, 0x66, 0xfb, 0x7c, 0x7c, 0x8a, 0xb0, 0x60, 0xa6, 0x4c, - 0x20, 0xc4, 0x63, 0x69, 0x6a, 0xc3, 0x53, 0xf8, 0x9a, 0x28, 0x30, 0x9d, - 0x6f, 0x0e, 0x1b, 0xb2, 0x2c, 0xe6, 0x94, 0x9f, 0xfc, 0xc0, 0x8d, 0x71, - 0xbe, 0x37, 0xa6, 0xc9, 0xbd, 0x3c, 0x4a, 0xf3, 0xc4, 0xb3, 0x88, 0x4c, - 0x45, 0x26, 0x4e, 0x2f, 0x83, 0x16, 0x70, 0xb6, 0xc7, 0xb2, 0x36, 0xf0, - 0x0c, 0x67, 0xd2, 0x0a, 0xd3, 0xd9, 0x7c, 0x35, 0x29, 0xac, 0xd4, 0x9c, - 0x6d, 0xfc, 0xec, 0x58, 0x92, 0xf0, 0xba, 0x32, 0x00, 0xae, 0xb1, 0xeb, - 0x4d, 0x8c, 0x1a, 0x20, 0xe7, 0x5c, 0xfc, 0x9a, 0x4d, 0x51, 0x24, 0x7b, - 0x52, 0xeb, 0x13, 0x3d, 0xb4, 0xab, 0xda, 0xb3, 0x74, 0x39, 0xd2, 0xf8, - 0x2d, 0xef, 0x9b, 0x0f, 0xae, 0xf5, 0x3c, 0x99, 0x34, 0xbe, 0x15, 0x5c, - 0x9f, 0x5d, 0xae, 0xf4, 0x72, 0xc2, 0xac, 0x06, 0xbe, 0xad, 0xe4, 0x68, - 0xea, 0xd5, 0xa1, 0xdc, 0xdb, 0xf4, 0x61, 0x51, 0xf5, 0x1a, 0x62, 0x15, - 0xfd, 0x00, 0x51, 0x35, 0x53, 0x6c, 0x39, 0x3e, 0xdb, 0x60, 0x0a, 0x52, - 0xc1, 0x52, 0x3c, 0xd7, 0xab, 0x73, 0xea, 0x1e, 0x38, 0x38, 0x65, 0x35, - 0x35, 0x2b, 0x28, 0x04, 0x5c, 0x82, 0xea, 0x4a, 0x9e, 0x96, 0x72, 0xa4, - 0x8e, 0x42, 0xfd, 0x55, 0xa8, 0x66, 0x7a, 0x40, 0xc9, 0xf2, 0xc2, 0x1e, - 0x5d, 0x09, 0x90, 0x32, 0x18, 0xdb, 0x11, 0x4c, 0x6c, 0x9c, 0x27, 0x62, - 0x0a, 0xe6, 0xc1, 0xdf, 0xf2, 0x6a, 0x8c, 0x26, 0xb4, 0xfb, 0xda, 0xa9, - 0x08, 0x10, 0x3a, 0xf0, 0xe1, 0x64, 0xe5, 0x03, 0x81, 0x7d, 0x15, 0x74, - 0xa1, 0x8d, 0x10, 0xc8, 0xbb, 0x6a, 0x7c, 0x60, 0xa1, 0x09, 0x35, 0x19, - 0x2d, 0x70, 0xb5, 0x36, 0xc8, 0x8b, 0x66, 0x5f, 0xe0, 0xe7, 0xea, 0x70, - 0x2f, 0x5d, 0x3f, 0xae, 0x5e, 0x25, 0x84, 0xdd, 0x9b, 0x69, 0x44, 0x37, - 0x7c, 0x6b, 0x9e, 0x81, 0x18, 0x36, 0x4b, 0xff, 0x86, 0x44, 0x2a, 0x39, - 0x66, 0x7f, 0x71, 0x43, 0xe7, 0x65, 0xfe, 0xfd, 0x34, 0xb9, 0xd9, 0x5a, - 0x00, 0xd1, 0x41, 0x43, 0xc7, 0xbc, 0x65, 0x68, 0xb7, 0x73, 0xff, 0x19, - 0xd3, 0xed, 0x15, 0xa4, 0x67, 0xa1, 0x53, 0x0e, 0xa6, 0xfb, 0x25, 0xce, - 0x9d, 0x5b, 0x73, 0x08, 0xf3, 0x3b, 0x69, 0xe4, 0x94, 0x9b, 0x94, 0x03, - 0xb3, 0x8a, 0x2e, 0x07, 0x0c, 0xef, 0x18, 0x4c, 0x2b, 0x1c, 0x83, 0x9f, - 0x25, 0x20, 0x29, 0x72, 0x11, 0xa0, 0xaa, 0xed, 0x0c, 0xf9, 0xce, 0x94, - 0x0d, 0x7a, 0xb6, 0xb3, 0xa4, 0x57, 0xd6, 0x61, 0xca, 0x1a, 0x0e, 0x89, - 0x6d, 0x99, 0x4d, 0x06, 0xcd, 0x83, 0x7e, 0x09, 0x14, 0x5b, 0xe7, 0x4c, - 0x72, 0xa8, 0x98, 0xc8, 0x27, 0xf3, 0x70, 0x89, 0x87, 0x11, 0xbb, 0x98, - 0x82, 0x77, 0x9d, 0xaa, 0x95, 0x8c, 0xc1, 0xf8, 0x39, 0x27, 0xd5, 0x64, - 0x59, 0x6a, 0x8c, 0xbe, 0xe2, 0xe1, 0xd1, 0x6b, 0xe3, 0xaf, 0x30, 0x6f, - 0xf4, 0x9e, 0x35, 0x0b, 0x10, 0x24, 0x77, 0xd8, 0xa4, 0x30, 0x2e, 0xf7, - 0x97, 0xfd, 0xef, 0x1e, 0x9e, 0xf2, 0xbd, 0xf2, 0x41, 0x73, 0x19, 0xe6, - 0x7b, 0x7f, 0x74, 0x11, 0x91, 0x38, 0xc5, 0xac, 0xd5, 0xb0, 0x48, 0xc4, - 0xe9, 0x41, 0xd4, 0x50, 0x76, 0x13, 0xbf, 0xec, 0xe8, 0x3a, 0xa8, 0x84, - 0x42, 0x98, 0x12, 0x64, 0x95, 0x85, 0x79, 0x29, 0xea, 0x3a, 0xf9, 0xa4, - 0x5c, 0x9c, 0x35, 0x01, 0x68, 0x71, 0xb9, 0x5b, 0xbe, 0xaa, 0x76, 0x9e, - 0x63, 0x1c, 0xc1, 0x83, 0x94, 0xc6, 0x89, 0x2b, 0x1d, 0x00, 0x43, 0x74, - 0x00, 0x41, 0x93, 0x58, 0x52, 0xf9, 0x13, 0xfe, 0x9f, 0x7a, 0xb7, 0x3d, - 0x6b, 0x70, 0x4e, 0x4f, 0x8f, 0xf4, 0x9c, 0xe4, 0x97, 0x62, 0xaf, 0x69, - 0x45, 0xec, 0xf4, 0x53, 0x71, 0xdc, 0xc7, 0x8d, 0x6f, 0xb2, 0x9d, 0xec, - 0x43, 0xdd, 0xc0, 0xe5, 0xd1, 0x6c, 0x1a, 0x82, 0x19, 0xf6, 0x18, 0xd3, - 0x59, 0x0e, 0x07, 0x81, 0x5a, 0x23, 0x10, 0x8b, 0xaa, 0x0b, 0x99, 0xc8, - 0x34, 0xc2, 0xd0, 0xa9, 0x69, 0x7f, 0x54, 0xe3, 0xc4, 0xa0, 0xe7, 0x4b, - 0x31, 0x90, 0xe7, 0x3b, 0x45, 0x9b, 0x7f, 0xae, 0xd2, 0xab, 0x22, 0xb9, - 0xfc, 0x07, 0x39, 0x4b, 0x45, 0x83, 0x8d, 0x41, 0x7a, 0x52, 0xb2, 0xae, - 0x71, 0x78, 0x17, 0x63, 0xfa, 0xbe, 0x59, 0xca, 0xf0, 0xfd, 0x68, 0xe5, - 0xc4, 0x9a, 0x74, 0x3d, 0xec, 0xd4, 0x8b, 0xa1, 0x2c, 0x31, 0x4d, 0x73, - 0xfd, 0x5c, 0x1e, 0xeb, 0x5f, 0xf6, 0x42, 0x0d, 0x79, 0x5f, 0x64, 0x10, - 0xae, 0xb2, 0xf6, 0x9e, 0xa8, 0xab, 0xa5, 0x2b, 0x9a, 0xcf, 0x25, 0xfa, - 0xa2, 0xb3, 0xdc, 0x30, 0x3d, 0x08, 0x4e, 0xbb, 0x7b, 0x0c, 0x28, 0x34, - 0x9d, 0xda, 0xc4, 0x94, 0xa4, 0xf4, 0x1e, 0x78, 0x8b, 0xa9, 0xd3, 0xa7, - 0x1c, 0x2a, 0x27, 0x14, 0xa0, 0x44, 0x1a, 0x9a, 0x87, 0x72, 0xa5, 0x6d, - 0x69, 0x46, 0xe5, 0xc1, 0x4f, 0x29, 0x87, 0xc0, 0xa7, 0xa8, 0x96, 0xde, - 0xa9, 0x63, 0x08, 0xd8, 0x4a, 0xa1, 0x25, 0x43, 0x76, 0x41, 0xf7, 0x9f, - 0x17, 0xe3, 0xe1, 0x4b, 0xc6, 0x2b, 0x79, 0xea, 0xd5, 0xa7, 0x72, 0x16, - 0x0a, 0x8c, 0xcd, 0x49, 0x70, 0x75, 0xd4, 0x59, 0x4a, 0x19, 0x7b, 0x31, - 0x02, 0x7a, 0x3a, 0x20, 0x15, 0x62, 0x7e, 0x4e, 0x6f, 0xac, 0xd0, 0xd1, - 0x29, 0xbd, 0x2d, 0xa1, 0xc6, 0x3e, 0xa6, 0x1a, 0x26, 0x18, 0x96, 0x98, - 0x12, 0x56, 0x37, 0xbf, 0xb4, 0x91, 0x57, 0xe8, 0xda, 0x61, 0x7c, 0x2f, - 0x3e, 0xd4, 0x51, 0xfe, 0xe8, 0x5b, 0x00, 0x30, 0x08, 0xf6, 0x4e, 0x69, - 0xa8, 0x1a, 0x2b, 0x82, 0x41, 0x85, 0xa9, 0xd9, 0x3c, 0xc8, 0x02, 0x91, - 0x99, 0xd4, 0xa2, 0xfd, 0x9d, 0x1b, 0x08, 0xfc, 0x41, 0x3e, 0x10, 0x6b, - 0x80, 0x74, 0x3d, 0x72, 0x61, 0x97, 0xdd, 0x96, 0xec, 0xf4, 0xd6, 0x6d, - 0x68, 0x02, 0x6e, 0xbb, 0x55, 0x9d, 0x6f, 0x11, 0xde, 0xd1, 0xad, 0x6d, - 0x42, 0x96, 0x2c, 0x42, 0x1e, 0xa9, 0x19, 0x42, 0x22, 0x38, 0x38, 0x18, - 0x3c, 0x4b, 0xc1, 0x9c, 0x0f, 0xe1, 0x34, 0x61, 0x06, 0x77, 0x54, 0x04, - 0xe0, 0x87, 0x94, 0x5c, 0xc9, 0xa1, 0x35, 0x55, 0x3d, 0x4a, 0xf2, 0x4f, - 0x05, 0x11, 0x98, 0x6f, 0x3c, 0x85, 0x84, 0xe6, 0xf8, 0x71, 0x8a, 0xdf, - 0xe9, 0x9a, 0xe3, 0x70, 0xd6, 0x36, 0xd6, 0xc8, 0x66, 0x3e, 0xba, 0x7c, - 0x0a, 0x23, 0x0a, 0xd0, 0xb6, 0x66, 0x68, 0xa8, 0xdf, 0x37, 0x17, 0xfb, - 0xdd, 0x9c, 0x8b, 0xc7, 0x8e, 0xc4, 0x4f, 0x40, 0x08, 0x23, 0x58, 0x15, - 0xa2, 0xba, 0xef, 0xdf, 0x67, 0xcd, 0x1f, 0xb6, 0xc4, 0xea, 0xce, 0x81, - 0x38, 0x58, 0x92, 0x57, 0xcf, 0x83, 0x47, 0x29, 0x9f, 0xde, 0x9b, 0xde, - 0x01, 0xfe, 0x68, 0x91, 0x67, 0x06, 0x9d, 0x31, 0xd0, 0xb9, 0xc3, 0xbb, - 0xc3, 0x6b, 0xa0, 0x04, 0x1e, 0x34, 0xd5, 0x38, 0xd4, 0xac, 0x70, 0xae, - 0xab, 0xb2, 0xbd, 0x4b, 0xa0, 0xad, 0x2b, 0x82, 0xaf, 0x8c, 0x90, 0x4d, - 0xd3, 0xca, 0x71, 0x35, 0x75, 0x89, 0xe5, 0x42, 0x91, 0x46, 0x8d, 0x18, - 0x04, 0x7a, 0xb9, 0xaa, 0x3b, 0xe7, 0x1e, 0x8c, 0x4e, 0xf9, 0x6e, 0x74, - 0xaa, 0x2e, 0x36, 0x86, 0xfb, 0xef, 0x9c, 0xd7, 0xba, 0x5e, 0x2e, 0x3c, - 0x40, 0xce, 0x8b, 0x2b, 0x94, 0x55, 0xf2, 0xd4, 0x7d, 0xbf, 0x8c, 0x8a, - 0xa8, 0x59, 0x84, 0x6f, 0x32, 0x95, 0xc5, 0xcc, 0xad, 0xee, 0x30, 0x23, - 0x7c, 0x54, 0xea, 0x60, 0xb8, 0x88, 0x12, 0x45, 0x03, 0xbc, 0xe3, 0x92, - 0x9f, 0xa8, 0x5b, 0x07, 0x97, 0x53, 0x0d, 0xe1, 0xe3, 0x3d, 0xdf, 0xf2, - 0x2a, 0x12, 0xee, 0xdf, 0x73, 0x8d, 0x41, 0xf4, 0xe4, 0x2c, 0xb4, 0xd4, - 0x9e, 0xfe, 0xf2, 0xe6, 0xa0, 0x9e, 0x2a, 0x3a, 0x36, 0x26, 0x7e, 0xd9, - 0xe1, 0x22, 0xee, 0x0b, 0x5b, 0x48, 0xd2, 0xa9, 0x55, 0xab, 0x50, 0x7c, - 0xf6, 0xc8, 0x56, 0x31, 0xbb, 0x51, 0xe9, 0x31, 0x4d, 0xaa, 0x13, 0x3a, - 0x99, 0x9f, 0x8c, 0x59, 0x6a, 0xc9, 0xf1, 0x0a, 0x89, 0xcc, 0x39, 0x98, - 0xbd, 0xc3, 0x93, 0x97, 0x28, 0xe5, 0x73, 0x94, 0xf2, 0x0a, 0x7a, 0x09, - 0x38, 0x0b, 0xab, 0xd8, 0x49, 0x98, 0x14, 0x34, 0x32, 0x9d, 0xef, 0x9d, - 0x47, 0xdb, 0x82, 0xb9, 0x84, 0xd6, 0xd7, 0x9f, 0xf7, 0xdf, 0x79, 0x5b, - 0xe8, 0x92, 0x44, 0x31, 0x5d, 0x42, 0x80, 0x90, 0x8d, 0x36, 0xa2, 0x39, - 0x02, 0x64, 0x21, 0xa2, 0xb8, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x4c, 0xe9, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0xd8, 0x03, 0x00, 0x00, 0xdc, 0x03, 0x00, 0x00, 0xe0, 0x03, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0xa8, 0x03, 0x00, 0x00, 0x50, 0x03, 0x00, 0x00, - 0x04, 0x03, 0x00, 0x00, 0xac, 0x02, 0x00, 0x00, 0x74, 0x02, 0x00, 0x00, - 0x2c, 0x02, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0xac, 0x01, 0x00, 0x00, - 0x74, 0x01, 0x00, 0x00, 0x2c, 0x01, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, - 0x9c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x9e, 0xfc, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x5e, 0xfd, 0xff, 0xff, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, - 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x96, 0xfd, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x88, 0xfd, 0xff, 0xff, - 0x01, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2f, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0xca, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, - 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x78, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x0e, 0xfe, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0xbc, 0xfd, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x52, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x96, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, - 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x88, 0xfe, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x78, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x0e, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, - 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x00, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x42, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0xf0, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x86, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, - 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x78, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x68, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, - 0x0b, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x08, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, - 0x0b, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x06, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, - 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xd0, 0x6e, 0x00, 0x00, 0x60, 0x83, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0xbc, 0x6e, 0x00, 0x00, 0xac, 0x56, 0x00, 0x00, + 0x9c, 0x52, 0x00, 0x00, 0x8c, 0x51, 0x00, 0x00, 0x7c, 0x4d, 0x00, 0x00, + 0x2c, 0x4d, 0x00, 0x00, 0x1c, 0x49, 0x00, 0x00, 0x0c, 0x45, 0x00, 0x00, + 0xfc, 0x43, 0x00, 0x00, 0xec, 0x3f, 0x00, 0x00, 0x9c, 0x3f, 0x00, 0x00, + 0x8c, 0x3b, 0x00, 0x00, 0x7c, 0x37, 0x00, 0x00, 0x6c, 0x36, 0x00, 0x00, + 0x5c, 0x32, 0x00, 0x00, 0x0c, 0x32, 0x00, 0x00, 0xfc, 0x2d, 0x00, 0x00, + 0xec, 0x29, 0x00, 0x00, 0xdc, 0x28, 0x00, 0x00, 0xcc, 0x24, 0x00, 0x00, + 0x7c, 0x24, 0x00, 0x00, 0x6c, 0x22, 0x00, 0x00, 0x5c, 0x1a, 0x00, 0x00, + 0xcc, 0x19, 0x00, 0x00, 0xbc, 0x15, 0x00, 0x00, 0xac, 0x0d, 0x00, 0x00, + 0x1c, 0x0d, 0x00, 0x00, 0x0c, 0x09, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, + 0x6c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x2a, 0x91, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x34, 0xe1, 0x4f, 0xa1, 0x63, 0xa4, 0x62, 0xbf, 0x3e, 0x91, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xa3, 0xb2, 0x8f, 0xee, + 0x35, 0xe6, 0xf2, 0xcc, 0x68, 0xa0, 0x33, 0xc4, 0x7d, 0x4e, 0xbb, 0xa9, + 0x10, 0x32, 0x8e, 0x3d, 0x76, 0x14, 0x1c, 0x33, 0x0e, 0x77, 0xf7, 0xc8, + 0x7b, 0x45, 0xc7, 0xdb, 0xcf, 0x87, 0xc7, 0x70, 0xa9, 0x29, 0xfd, 0x70, + 0x32, 0x96, 0x35, 0x7d, 0xe9, 0xac, 0x6d, 0x9b, 0xfd, 0xe4, 0xbc, 0x4a, + 0x57, 0xcd, 0x43, 0xcc, 0x73, 0x72, 0xdf, 0x07, 0x68, 0xc5, 0x67, 0xbd, + 0x8a, 0x91, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, + 0xb0, 0xfb, 0x5f, 0xdf, 0x0e, 0xb9, 0xa2, 0xfd, 0x66, 0x86, 0x13, 0x1b, + 0x6d, 0x1d, 0x53, 0xdb, 0x83, 0xbf, 0x44, 0x29, 0x3f, 0x93, 0xee, 0x42, + 0x9a, 0xf4, 0x31, 0x6e, 0xc3, 0x15, 0x7e, 0x48, 0x72, 0x50, 0xc3, 0x53, + 0xef, 0x35, 0x1f, 0xc2, 0x29, 0x42, 0xb4, 0xd7, 0x4b, 0xd7, 0x98, 0x60, + 0xb9, 0x3e, 0xbb, 0x31, 0x35, 0xc3, 0xf6, 0x15, 0x7a, 0x9a, 0x2c, 0xfd, + 0xff, 0x04, 0xd9, 0x04, 0x57, 0x52, 0xae, 0x99, 0xa3, 0x95, 0xae, 0x6a, + 0x66, 0x52, 0x5f, 0x91, 0x17, 0x83, 0x0d, 0x27, 0x16, 0x02, 0x06, 0x64, + 0x80, 0x05, 0x99, 0x1c, 0x6c, 0xab, 0xb1, 0xa1, 0x0e, 0x44, 0x1f, 0x63, + 0xe9, 0xc1, 0xab, 0x8d, 0x08, 0x79, 0x56, 0xe0, 0x90, 0xa5, 0xb8, 0x3b, + 0xc4, 0x1e, 0xa5, 0x1f, 0x64, 0xe4, 0x0b, 0x72, 0x62, 0x19, 0x5f, 0x66, + 0xc0, 0x9b, 0x7b, 0xc4, 0xe5, 0x9f, 0x82, 0xa7, 0x16, 0x92, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x3e, 0x3d, 0xf4, 0x61, + 0x45, 0x2a, 0x48, 0x53, 0x1f, 0x22, 0x74, 0x65, 0xea, 0x5a, 0x00, 0x83, + 0x68, 0xf9, 0xbb, 0xa3, 0xc2, 0x1a, 0x8f, 0xe1, 0xfb, 0x76, 0x6a, 0xe9, + 0x1a, 0x0e, 0x4d, 0x32, 0xc6, 0xf3, 0x8d, 0x85, 0x54, 0xa1, 0xe9, 0xb8, + 0x35, 0xee, 0xba, 0x53, 0x40, 0xa2, 0xea, 0x7f, 0xc3, 0x99, 0x71, 0x17, + 0xdd, 0xd5, 0xfe, 0xdf, 0x5e, 0x15, 0xa0, 0x73, 0xf8, 0x78, 0x49, 0x73, + 0xcc, 0xf0, 0x18, 0x12, 0x06, 0x81, 0xd6, 0x19, 0x2c, 0xa8, 0xd7, 0x80, + 0x19, 0x19, 0xbf, 0x1e, 0x50, 0xb1, 0xfb, 0xb3, 0xa6, 0x56, 0x6f, 0x52, + 0xa6, 0xc0, 0xdd, 0x3f, 0xbb, 0x13, 0x6e, 0x04, 0xdf, 0x79, 0xca, 0x8b, + 0xa5, 0x9c, 0xa1, 0x78, 0x49, 0xca, 0xe5, 0x29, 0xbb, 0x29, 0x7c, 0x96, + 0xc6, 0x29, 0x06, 0x99, 0xec, 0x50, 0xd1, 0xe8, 0x9b, 0xb7, 0x53, 0xd2, + 0x36, 0x89, 0xb1, 0x5c, 0x38, 0xf4, 0x2f, 0xa1, 0xda, 0x6f, 0xd8, 0xd1, + 0x62, 0xd2, 0xd4, 0x97, 0xce, 0xf1, 0xbd, 0x73, 0x2d, 0x92, 0xdb, 0x62, + 0x0c, 0xb0, 0x77, 0xed, 0x32, 0x3a, 0xfc, 0x59, 0x94, 0xef, 0x2b, 0x48, + 0x60, 0xb2, 0x82, 0xa2, 0xb6, 0x51, 0xdb, 0x51, 0x47, 0x99, 0x4c, 0x50, + 0x93, 0x53, 0x9d, 0xa9, 0x3c, 0x94, 0x34, 0x9f, 0xa6, 0x3e, 0x4f, 0x87, + 0xd4, 0xa0, 0x40, 0xeb, 0x7b, 0xfa, 0x1b, 0x7d, 0x03, 0xa8, 0xf8, 0x8b, + 0xa5, 0x32, 0x3a, 0xaf, 0x7e, 0x6b, 0x25, 0x08, 0x97, 0x71, 0x8d, 0x0c, + 0x30, 0xc9, 0xa7, 0x23, 0xe3, 0x51, 0xb3, 0xf2, 0x86, 0xad, 0x12, 0xe2, + 0x79, 0x94, 0x7f, 0xf3, 0xf7, 0x88, 0x67, 0x3e, 0x8e, 0x8e, 0x04, 0x5e, + 0x4f, 0x01, 0x6f, 0x1d, 0x78, 0x42, 0x9e, 0x47, 0x81, 0xdf, 0x03, 0x39, + 0x3d, 0x9b, 0xbd, 0xb6, 0x06, 0x21, 0x82, 0xfe, 0xf2, 0x50, 0xe1, 0x14, + 0xbc, 0xe3, 0x5e, 0xe1, 0xbd, 0x8f, 0xfa, 0x35, 0x31, 0x4e, 0x66, 0xeb, + 0x67, 0x49, 0x1c, 0x07, 0x88, 0xb6, 0x22, 0x0c, 0xeb, 0xd9, 0x9f, 0x9b, + 0x8b, 0xe0, 0x9c, 0x3c, 0xf7, 0x91, 0xab, 0x98, 0x5b, 0x0e, 0x09, 0xdd, + 0xe3, 0x0b, 0x14, 0x55, 0xe9, 0xe4, 0x42, 0xd8, 0xce, 0xd7, 0xfd, 0x4c, + 0x20, 0x9f, 0x44, 0x93, 0xa6, 0x17, 0x8a, 0x68, 0x8f, 0xec, 0x62, 0xd1, + 0x97, 0x9c, 0xcc, 0xc4, 0xd9, 0x42, 0xda, 0xf1, 0x34, 0x04, 0xc6, 0xb6, + 0x0f, 0xc7, 0xe6, 0x2d, 0x26, 0x6e, 0x6f, 0x92, 0x7e, 0xd9, 0xd4, 0x40, + 0xc6, 0x70, 0xfa, 0x12, 0x2a, 0x1b, 0xbc, 0x50, 0xeb, 0x3b, 0x24, 0x96, + 0x8d, 0x7c, 0xae, 0xbe, 0xc3, 0x27, 0xce, 0x97, 0xcf, 0xcd, 0x10, 0x13, + 0x01, 0xc6, 0x48, 0x6a, 0x99, 0x38, 0x79, 0xb9, 0x1c, 0xc9, 0x09, 0xac, + 0x96, 0x8c, 0xf7, 0x82, 0x8f, 0xb8, 0x17, 0x94, 0x2c, 0x5f, 0x40, 0xcc, + 0x80, 0xf4, 0x9f, 0xaa, 0xcb, 0x83, 0x13, 0x7b, 0x3a, 0x78, 0x0a, 0x9f, + 0x79, 0x9e, 0xfc, 0x0e, 0x8f, 0x98, 0x60, 0x39, 0x86, 0x44, 0x8e, 0x4b, + 0xc4, 0xad, 0xe6, 0x98, 0x92, 0x08, 0x84, 0x48, 0x8f, 0x1d, 0x78, 0x10, + 0x9e, 0xf7, 0xb8, 0x61, 0x65, 0x46, 0xdb, 0x4a, 0xcf, 0xc5, 0x37, 0xe3, + 0x77, 0x76, 0xcf, 0x0a, 0x7e, 0x72, 0x3f, 0xe4, 0x51, 0x30, 0x28, 0x57, + 0x13, 0xfd, 0xdb, 0x7e, 0xd6, 0xa3, 0xdd, 0x64, 0xdd, 0x00, 0xd0, 0x7f, + 0xbc, 0x48, 0x1d, 0xaf, 0xde, 0x0e, 0x45, 0xc4, 0xc9, 0xfa, 0xf6, 0xb2, + 0xb7, 0x9a, 0x42, 0x8b, 0x18, 0x08, 0xed, 0xdb, 0xa9, 0xc3, 0x32, 0xf1, + 0x9c, 0xcf, 0x16, 0x74, 0x57, 0xce, 0xe9, 0x44, 0x21, 0xdb, 0x8a, 0x45, + 0x89, 0x70, 0x41, 0x5c, 0xbf, 0x10, 0xdf, 0x83, 0x4a, 0xe4, 0x4c, 0xd8, + 0xc9, 0x2e, 0x5b, 0xa3, 0x05, 0xed, 0x73, 0xb1, 0xb0, 0xb7, 0xc4, 0xd7, + 0x0d, 0xea, 0xf6, 0xb4, 0xc1, 0x5e, 0x12, 0x54, 0x30, 0x73, 0x5c, 0x93, + 0xd9, 0xf7, 0xc9, 0x24, 0x43, 0x8f, 0x4f, 0x8e, 0x94, 0x95, 0xb6, 0xfd, + 0xa3, 0x14, 0x42, 0x50, 0xb8, 0x66, 0xfb, 0xc4, 0xed, 0x72, 0xcf, 0x7b, + 0xa9, 0x73, 0xeb, 0xc4, 0x4a, 0x05, 0xea, 0xb4, 0x47, 0xca, 0x21, 0x56, + 0x28, 0xa8, 0x87, 0xb8, 0x87, 0x0b, 0xe3, 0x8d, 0xfd, 0x70, 0xf7, 0x33, + 0x76, 0xf0, 0x3d, 0xa4, 0x3b, 0x83, 0xab, 0x14, 0x01, 0xe1, 0xb0, 0xa9, + 0x44, 0xe8, 0xd7, 0x50, 0x26, 0x0b, 0xbb, 0x2d, 0x57, 0x39, 0x82, 0x7c, + 0x71, 0xd8, 0x12, 0xaf, 0xf3, 0x9f, 0x46, 0xbd, 0x62, 0xd6, 0x61, 0xf5, + 0xb7, 0x04, 0x94, 0xbf, 0x87, 0xea, 0xc4, 0xc4, 0x33, 0xcf, 0x36, 0x3b, + 0x4f, 0xc7, 0x71, 0xf1, 0x98, 0xe6, 0xb0, 0x96, 0x25, 0xd7, 0xac, 0x75, + 0xfc, 0x92, 0xe0, 0x69, 0x72, 0x37, 0x8d, 0x40, 0x31, 0xaa, 0x2c, 0x86, + 0xfb, 0x95, 0x3f, 0x9c, 0x23, 0xd4, 0x39, 0x99, 0xff, 0xea, 0x95, 0x79, + 0xb9, 0x2e, 0xb0, 0x33, 0xf1, 0xe8, 0xd0, 0x42, 0xb5, 0x70, 0x5c, 0xca, + 0x69, 0x48, 0x28, 0x23, 0x58, 0xb4, 0x07, 0xfc, 0x3e, 0x15, 0x29, 0x00, + 0xa9, 0x22, 0x44, 0x70, 0xd0, 0xc7, 0x01, 0x0d, 0x3e, 0xfc, 0x57, 0xb7, + 0x54, 0x3a, 0xc3, 0x43, 0xd6, 0x2f, 0x55, 0x09, 0x52, 0x4a, 0x6b, 0x8e, + 0x4c, 0x82, 0xbb, 0x4e, 0x3e, 0x38, 0xe1, 0x9e, 0x72, 0x83, 0xec, 0x40, + 0xf5, 0xf7, 0x0e, 0x3c, 0x24, 0xed, 0xda, 0xf2, 0x39, 0x6c, 0xad, 0xeb, + 0xff, 0xfb, 0x4a, 0x38, 0x50, 0x49, 0x28, 0x3d, 0x05, 0xb2, 0x98, 0x44, + 0x2b, 0x61, 0xa2, 0x9b, 0x3a, 0x3c, 0xad, 0xd9, 0x8c, 0xef, 0x3c, 0x72, + 0x50, 0x74, 0x13, 0x80, 0xc4, 0x7e, 0x6e, 0xf3, 0xc9, 0xdf, 0x63, 0xf6, + 0x41, 0xb2, 0x08, 0x78, 0x9b, 0x7c, 0xa9, 0x13, 0xd1, 0x21, 0xe7, 0x5e, + 0x6a, 0x0d, 0x64, 0xf7, 0x52, 0x75, 0xf2, 0x80, 0x69, 0xbe, 0x43, 0xf8, + 0xd4, 0xad, 0x49, 0xfc, 0x97, 0x76, 0x1c, 0xb6, 0x43, 0x9e, 0xcb, 0x45, + 0x4d, 0x75, 0x07, 0xae, 0xdb, 0xbf, 0xf5, 0x8a, 0xeb, 0xb9, 0x6b, 0x12, + 0x06, 0xbf, 0x94, 0xad, 0x77, 0x29, 0xb1, 0xae, 0x24, 0x9b, 0x4d, 0xdc, + 0xe1, 0x5e, 0xd7, 0x57, 0xec, 0xd1, 0xd8, 0xad, 0xf0, 0x06, 0x08, 0x43, + 0x33, 0x99, 0xd2, 0x04, 0xfc, 0xc8, 0xf6, 0x53, 0x3d, 0x73, 0xd4, 0x36, + 0xd3, 0x8e, 0x4a, 0xcd, 0xb1, 0xe9, 0xcb, 0x3a, 0x5f, 0x54, 0xbc, 0xde, + 0x16, 0xa2, 0x85, 0xde, 0x35, 0x27, 0x99, 0x32, 0x4f, 0xb9, 0x2c, 0x16, + 0xa2, 0x6e, 0xae, 0x75, 0x60, 0x77, 0xe9, 0x08, 0x0f, 0x08, 0xc4, 0xd0, + 0x62, 0xc7, 0xd2, 0x1f, 0x3b, 0x29, 0xdd, 0xb7, 0xea, 0xa3, 0x58, 0xaf, + 0x4c, 0x05, 0xd2, 0x82, 0x6a, 0xe0, 0xc4, 0xe9, 0x70, 0x7e, 0xf2, 0xca, + 0x82, 0x6a, 0xae, 0xc1, 0x9a, 0x42, 0x5d, 0x46, 0x4a, 0xb7, 0x8f, 0x4d, + 0x33, 0xfe, 0x6f, 0x47, 0xb5, 0x49, 0xb3, 0x89, 0x51, 0x31, 0x74, 0x68, + 0x14, 0xda, 0x0a, 0x41, 0x3d, 0x1f, 0x8e, 0x30, 0x8c, 0x77, 0xd1, 0xa9, + 0x36, 0x41, 0x78, 0x34, 0xb7, 0x7e, 0x4e, 0x7a, 0x77, 0x12, 0x43, 0x97, + 0x43, 0xba, 0xd6, 0x28, 0x14, 0x2a, 0x9f, 0x98, 0xb4, 0x39, 0x08, 0x5c, + 0xb7, 0xb8, 0x03, 0x63, 0x62, 0x68, 0xc6, 0x9a, 0x4d, 0xf5, 0xdc, 0x7c, + 0x0f, 0x7e, 0x77, 0xdc, 0x85, 0x53, 0x31, 0x8c, 0x53, 0x8b, 0x27, 0xc4, + 0xb7, 0x3d, 0xd0, 0x94, 0x9b, 0x7e, 0x59, 0x59, 0x03, 0x09, 0x8c, 0x30, + 0x70, 0x7d, 0x9c, 0x73, 0x89, 0x6c, 0x5f, 0xbf, 0xf9, 0xc7, 0x72, 0x76, + 0x12, 0x98, 0xe3, 0xbe, 0xc3, 0x67, 0xdf, 0xa1, 0x76, 0xa3, 0xec, 0x44, + 0x30, 0x70, 0x2f, 0x6a, 0x86, 0x28, 0xb9, 0x9d, 0x7f, 0x93, 0xf2, 0x4a, + 0x34, 0x48, 0x1f, 0x2e, 0x2e, 0x95, 0x88, 0xdb, 0x1f, 0x2c, 0x19, 0x46, + 0x2e, 0x91, 0x5f, 0x81, 0x0d, 0x08, 0x9d, 0x03, 0x0b, 0xaf, 0x59, 0x0a, + 0x41, 0xad, 0x4d, 0x6c, 0x09, 0x0e, 0x9f, 0xd1, 0xc4, 0xdb, 0xac, 0x59, + 0x27, 0x04, 0x1c, 0x73, 0xe9, 0xf3, 0xe8, 0x54, 0xd9, 0x11, 0x31, 0xb2, + 0xed, 0x2d, 0x8c, 0xeb, 0x99, 0x26, 0x48, 0x9e, 0xac, 0x88, 0x96, 0xcb, + 0x19, 0x49, 0xfa, 0x4a, 0x82, 0xd5, 0x5d, 0xb8, 0x0f, 0x22, 0x3f, 0xb6, + 0x5c, 0x02, 0x2a, 0xb9, 0xd9, 0xfe, 0x4d, 0x9d, 0xdb, 0x85, 0x90, 0x19, + 0x7f, 0x1a, 0x44, 0xa3, 0x74, 0x68, 0xbf, 0xa2, 0x3b, 0xb4, 0x3b, 0xeb, + 0xab, 0x99, 0xc2, 0x46, 0x50, 0x7e, 0xec, 0xa9, 0xb4, 0x86, 0xfa, 0x50, + 0xcb, 0x71, 0x7e, 0x75, 0xa5, 0xca, 0xa6, 0x2f, 0x40, 0x1d, 0xa1, 0x4a, + 0x5c, 0x91, 0xd7, 0x2a, 0xa6, 0x17, 0x11, 0x4d, 0x19, 0x2b, 0xb3, 0x0f, + 0xf0, 0xb3, 0x06, 0x70, 0x51, 0x5c, 0x52, 0x8c, 0xdf, 0xe3, 0x19, 0x92, + 0x08, 0x40, 0xa2, 0xb4, 0xc0, 0xf2, 0xe8, 0x44, 0xcc, 0x36, 0xaa, 0xf9, + 0xf8, 0xfc, 0x2d, 0x83, 0x79, 0xc6, 0x58, 0xc1, 0xdf, 0x32, 0xb7, 0xde, + 0x0f, 0x3e, 0xc0, 0xa8, 0x7e, 0xeb, 0xf2, 0x30, 0x16, 0xdf, 0x38, 0xcb, + 0x69, 0xd9, 0x44, 0x0d, 0x44, 0xf4, 0x45, 0x9c, 0x81, 0xc8, 0xe7, 0x06, + 0xae, 0x95, 0xaf, 0xff, 0x17, 0x3b, 0x1c, 0x3f, 0xda, 0xa5, 0xf8, 0xfd, + 0x9c, 0xf1, 0x0a, 0xca, 0xda, 0xc0, 0xfa, 0x02, 0xc4, 0xce, 0x78, 0xfb, + 0x35, 0x8c, 0xfe, 0x55, 0xad, 0x0d, 0x9b, 0xeb, 0x10, 0xf1, 0x7b, 0xb1, + 0x09, 0xf8, 0xef, 0xfc, 0xde, 0x7a, 0x69, 0x74, 0x76, 0xef, 0x91, 0x64, + 0x33, 0xc4, 0x08, 0x15, 0x73, 0x85, 0x56, 0xae, 0x9c, 0xf6, 0xdd, 0x55, + 0x19, 0x96, 0xe6, 0x41, 0x12, 0xc9, 0x87, 0x91, 0x9e, 0xc6, 0x18, 0xe8, + 0xbf, 0xa0, 0x59, 0xfd, 0x20, 0xab, 0xb5, 0xcf, 0x0f, 0x6e, 0x30, 0xd3, + 0xc5, 0x70, 0xf2, 0x50, 0xa4, 0x2a, 0xdf, 0xb0, 0x45, 0xfc, 0x82, 0x1a, + 0x3b, 0xfe, 0x0c, 0xad, 0x41, 0x95, 0xf1, 0xd6, 0x85, 0xa2, 0xc9, 0xff, + 0xbe, 0x3a, 0x64, 0x70, 0x43, 0xc0, 0xc5, 0xc8, 0x80, 0x11, 0x0d, 0x20, + 0xcd, 0xf2, 0xa2, 0xbb, 0x43, 0x68, 0x0e, 0xf4, 0x01, 0xb3, 0x73, 0x79, + 0x9f, 0x68, 0x41, 0x63, 0x3e, 0xda, 0xf9, 0xf4, 0x23, 0x57, 0x97, 0x84, + 0x99, 0xe8, 0x5e, 0xdb, 0xaa, 0x24, 0xab, 0x9c, 0x40, 0x83, 0xf9, 0x3f, + 0x4f, 0x5a, 0x53, 0xa6, 0xf1, 0xe8, 0x95, 0xcf, 0xcb, 0x50, 0x13, 0x51, + 0xa7, 0x8c, 0x71, 0x1d, 0xff, 0xcc, 0x66, 0xab, 0xff, 0xca, 0xc5, 0xc3, + 0x73, 0x45, 0xb7, 0x21, 0x1d, 0x65, 0x7a, 0xe5, 0x1f, 0x3f, 0x1a, 0x58, + 0x23, 0x28, 0xc8, 0xf3, 0xbf, 0x98, 0x25, 0xc0, 0x83, 0x68, 0xf0, 0x62, + 0x63, 0x90, 0xcf, 0x1f, 0x20, 0xb8, 0x04, 0x5c, 0xc4, 0x80, 0x5b, 0xf4, + 0x6d, 0xdc, 0xe9, 0xac, 0xd8, 0x13, 0x3b, 0x42, 0xf8, 0x4e, 0xa2, 0x1c, + 0xce, 0x3f, 0x8d, 0x15, 0xd3, 0x87, 0x1b, 0x44, 0x79, 0x52, 0x34, 0x4b, + 0x63, 0x4d, 0xbf, 0x95, 0xec, 0xae, 0xf9, 0xc6, 0x7b, 0x7b, 0x85, 0x8c, + 0x4f, 0x20, 0x58, 0x9d, 0x48, 0x03, 0x2f, 0x77, 0x2e, 0x8b, 0x6f, 0x66, + 0x76, 0xb9, 0xb8, 0xb7, 0x34, 0x5a, 0x63, 0x06, 0x85, 0x82, 0x5f, 0x23, + 0x8f, 0x8d, 0x0c, 0x92, 0x3b, 0xd2, 0x8a, 0x1b, 0x39, 0xee, 0x6a, 0xbc, + 0xf6, 0x94, 0x2a, 0xc6, 0x73, 0xa6, 0x99, 0x98, 0xdc, 0x96, 0xd7, 0xc1, + 0xfe, 0x9b, 0xc8, 0xfb, 0x86, 0x5a, 0xad, 0xce, 0xf8, 0xd5, 0x32, 0x62, + 0x96, 0x63, 0xaf, 0x4c, 0x4a, 0xae, 0xec, 0x26, 0x3d, 0x84, 0x69, 0x50, + 0x5f, 0x37, 0x9b, 0x29, 0xac, 0x15, 0x76, 0x3d, 0x33, 0x96, 0x06, 0xde, + 0xc1, 0x6d, 0xa2, 0xc7, 0xc3, 0x8a, 0x20, 0x2e, 0xf7, 0x08, 0x55, 0x83, + 0x23, 0x9c, 0x23, 0x2d, 0x3a, 0xa1, 0x32, 0xbc, 0x47, 0x48, 0xd5, 0x6a, + 0x71, 0xb9, 0xcc, 0x2d, 0x99, 0xa0, 0x37, 0x07, 0x46, 0x45, 0xbe, 0xf0, + 0x27, 0x5a, 0x25, 0x72, 0x58, 0x47, 0x6d, 0xbf, 0x23, 0xdc, 0x48, 0x44, + 0x45, 0x95, 0xb1, 0x62, 0xf1, 0x7e, 0x4c, 0x95, 0x1c, 0xb4, 0x17, 0x8b, + 0x59, 0x2e, 0xf3, 0x4f, 0x45, 0x3b, 0x5d, 0x67, 0x92, 0x52, 0xd8, 0xc1, + 0x91, 0xfa, 0x53, 0xaa, 0x87, 0xc0, 0xa7, 0xb0, 0x9f, 0x10, 0xe8, 0xac, + 0x45, 0x52, 0xbb, 0x17, 0xee, 0xf6, 0x18, 0xbe, 0x02, 0x70, 0xce, 0x79, + 0x66, 0x72, 0xf9, 0xf6, 0xca, 0x66, 0xff, 0xa4, 0x9a, 0xd9, 0xb7, 0x07, + 0xa9, 0xc1, 0x23, 0x7e, 0x7b, 0x9c, 0xe3, 0x02, 0x7a, 0xcc, 0xa3, 0x67, + 0xb7, 0xb0, 0x37, 0xba, 0xae, 0x12, 0xda, 0x48, 0x6e, 0x7f, 0xde, 0x5f, + 0x75, 0x15, 0xca, 0xd2, 0x46, 0xdd, 0xb0, 0x82, 0xbf, 0x6d, 0xe9, 0x51, + 0x66, 0xa5, 0x9e, 0x0c, 0xd5, 0x03, 0xbd, 0x97, 0x0e, 0x1b, 0x88, 0xf6, + 0x61, 0x5a, 0x8b, 0xe0, 0xdd, 0x3e, 0x59, 0x4c, 0x35, 0xfd, 0xb0, 0x3b, + 0x79, 0x8c, 0x1c, 0x96, 0x97, 0x35, 0x62, 0x36, 0x62, 0x4c, 0x4b, 0x46, + 0xb1, 0x21, 0xf7, 0xf0, 0x34, 0xdc, 0xd9, 0x9f, 0xf8, 0x53, 0x7d, 0xca, + 0xbc, 0x4d, 0xaf, 0xf4, 0xb7, 0x2f, 0xa7, 0x5d, 0x18, 0xf9, 0x3b, 0xa9, + 0xb0, 0xbb, 0xdf, 0xfa, 0x28, 0x2b, 0x58, 0xce, 0x46, 0x01, 0x3f, 0x76, + 0xf2, 0x39, 0x45, 0x8b, 0x3c, 0xda, 0x62, 0x2b, 0x6b, 0xe1, 0x5f, 0x14, + 0xfc, 0x79, 0x17, 0x2d, 0xe2, 0xe5, 0x8c, 0xc5, 0xde, 0x91, 0xfd, 0xf5, + 0x6d, 0x9b, 0x6b, 0xbb, 0xb0, 0x13, 0xae, 0xbe, 0x1e, 0xa8, 0x8f, 0x3c, + 0xfd, 0x24, 0xbe, 0xb8, 0x39, 0x80, 0x03, 0x06, 0x8b, 0xff, 0xca, 0x90, + 0x88, 0x0f, 0x45, 0xc4, 0xeb, 0x50, 0x52, 0xf5, 0x00, 0x8c, 0x16, 0x9d, + 0x26, 0xaa, 0xec, 0xb1, 0x44, 0xd6, 0xfe, 0x67, 0xa3, 0xc1, 0xec, 0x4a, + 0x12, 0xa6, 0x7c, 0x7c, 0xc3, 0x46, 0x1c, 0x64, 0x61, 0x67, 0xec, 0xce, + 0x1e, 0xa2, 0xb4, 0xdd, 0x6e, 0x7f, 0x02, 0x14, 0xf4, 0x1c, 0x17, 0xa7, + 0x31, 0x9f, 0xc2, 0xc6, 0xc0, 0x21, 0x41, 0x88, 0x61, 0xd8, 0xca, 0x06, + 0xa5, 0xe4, 0xef, 0xa4, 0xaa, 0x4d, 0xa3, 0xad, 0x5f, 0xd4, 0x0c, 0x6b, + 0x14, 0x38, 0x2e, 0xe8, 0x87, 0x5a, 0x68, 0x10, 0x51, 0xd8, 0xbb, 0xa6, + 0xd9, 0xdc, 0xd3, 0x7f, 0x1f, 0xea, 0xa8, 0xcc, 0x3f, 0x43, 0xa4, 0x04, + 0x95, 0xb4, 0xde, 0x2f, 0x07, 0x5d, 0x91, 0x1c, 0x8e, 0xc3, 0xbc, 0xaa, + 0x46, 0x8a, 0xa8, 0x42, 0xa7, 0x2c, 0x0f, 0x1f, 0xb3, 0xe2, 0x8a, 0x0b, + 0xa0, 0x3f, 0xfb, 0x87, 0x9e, 0x42, 0xa5, 0x60, 0xce, 0x5a, 0x54, 0x91, + 0x26, 0x51, 0xea, 0x81, 0x6f, 0xf1, 0x54, 0x93, 0xe7, 0xa0, 0xf8, 0x64, + 0xab, 0x1d, 0x0d, 0x9d, 0x64, 0x6a, 0xd5, 0x19, 0x03, 0xbb, 0x94, 0x7f, + 0x0a, 0xb8, 0x6b, 0x87, 0xc3, 0x1a, 0x38, 0xe5, 0xe8, 0xba, 0x13, 0x17, + 0xeb, 0x13, 0xcc, 0xac, 0xcb, 0x1f, 0x96, 0x4c, 0x3b, 0x18, 0xfb, 0xe8, + 0x5c, 0x54, 0xce, 0x1a, 0x91, 0x44, 0xf5, 0x49, 0x6c, 0x38, 0x2a, 0x92, + 0x8a, 0x0d, 0x3d, 0x08, 0xc2, 0x5f, 0x6c, 0xac, 0x48, 0xb3, 0xdc, 0x2e, + 0xa6, 0x5a, 0xa8, 0xee, 0x22, 0x9a, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x96, 0xc5, 0x3a, 0x4e, 0x42, 0x7d, 0x27, 0xce, + 0x44, 0x84, 0xf1, 0x67, 0x8c, 0xc5, 0xdd, 0x75, 0x3b, 0x8a, 0xed, 0x2e, + 0x29, 0x62, 0x7b, 0xb0, 0xe6, 0xa3, 0xb4, 0x61, 0x73, 0x10, 0xff, 0x0e, + 0x0c, 0x98, 0x74, 0xef, 0xbb, 0xc4, 0xca, 0x03, 0x88, 0xa4, 0x96, 0x61, + 0xef, 0x36, 0x6d, 0xa2, 0xb1, 0xc8, 0xf0, 0xac, 0xf1, 0xb2, 0x08, 0x56, + 0xc7, 0x99, 0xcf, 0xae, 0x0a, 0x37, 0x85, 0x60, 0x78, 0x2d, 0x14, 0xda, + 0xb1, 0xa7, 0x00, 0xb6, 0x00, 0x04, 0x76, 0x80, 0x0e, 0x9f, 0x2a, 0x30, + 0x8b, 0x85, 0xd9, 0xc1, 0xaf, 0xee, 0x27, 0x80, 0x20, 0xed, 0xef, 0x25, + 0x5c, 0x98, 0x6b, 0xcc, 0xf8, 0x72, 0xfb, 0x3f, 0x13, 0xe6, 0x9b, 0x47, + 0xee, 0xa1, 0x18, 0x55, 0xa0, 0x68, 0xbe, 0xd4, 0x21, 0x59, 0x72, 0xa8, + 0xa4, 0xd2, 0x33, 0x57, 0x50, 0xfc, 0x6b, 0xa8, 0x49, 0x1b, 0x74, 0xdb, + 0x5a, 0x16, 0xb8, 0x52, 0x0c, 0xda, 0xa0, 0xa3, 0xff, 0x33, 0x56, 0x82, + 0x0f, 0x0a, 0x90, 0x82, 0xee, 0xf1, 0x1b, 0xb3, 0x05, 0x44, 0x39, 0x01, + 0xf7, 0x1e, 0xff, 0xcb, 0xea, 0xd0, 0xb6, 0x20, 0xbc, 0x84, 0xb1, 0xf9, + 0xa2, 0xc1, 0x56, 0xe6, 0xfa, 0x47, 0xc9, 0xfd, 0x45, 0x77, 0x51, 0x8e, + 0x01, 0xe4, 0x17, 0x20, 0x6f, 0x99, 0xe3, 0x90, 0x2f, 0xcc, 0xaf, 0xd9, + 0x61, 0x32, 0x91, 0x62, 0x58, 0xf4, 0x98, 0xf5, 0xf4, 0xeb, 0x13, 0xeb, + 0xdc, 0x8a, 0xac, 0xb2, 0x9e, 0xcf, 0xe7, 0xa7, 0xd4, 0x97, 0x22, 0x12, + 0x08, 0x10, 0x6d, 0x40, 0xea, 0x26, 0xea, 0x42, 0x29, 0x6e, 0x75, 0x62, + 0x47, 0x08, 0x17, 0xa8, 0x69, 0x0f, 0xf7, 0x35, 0x59, 0x23, 0x86, 0x83, + 0xfd, 0xb5, 0x61, 0x98, 0x9c, 0x4d, 0x37, 0xda, 0x9f, 0xfc, 0xfb, 0x16, + 0xb7, 0x6c, 0x52, 0xee, 0xa8, 0x9c, 0x3e, 0x93, 0x43, 0xc5, 0x2b, 0xd4, + 0xd0, 0x9f, 0x69, 0x2c, 0xc9, 0x1f, 0x2e, 0xdf, 0x5b, 0xe6, 0xc6, 0x5f, + 0x71, 0xd1, 0xd7, 0xb2, 0x8f, 0x3a, 0xba, 0x60, 0x75, 0x3d, 0x34, 0x41, + 0x43, 0x9b, 0x13, 0xc0, 0x3b, 0x30, 0xc5, 0xe9, 0x84, 0x81, 0xde, 0x85, + 0x4e, 0x65, 0x7b, 0x21, 0x37, 0xb8, 0xef, 0x24, 0x19, 0xaa, 0x26, 0x0c, + 0x27, 0xa7, 0xd9, 0x29, 0x47, 0x1a, 0x15, 0x42, 0x1e, 0x30, 0x79, 0x79, + 0x96, 0x09, 0x62, 0x26, 0xad, 0x98, 0x8b, 0xcb, 0x3d, 0xeb, 0x66, 0x83, + 0x77, 0xd9, 0x79, 0x4d, 0x05, 0x81, 0x72, 0xe9, 0xe0, 0x6f, 0x13, 0x00, + 0x7e, 0xa3, 0x92, 0x82, 0x1c, 0x90, 0x83, 0x4b, 0x15, 0x97, 0x0f, 0x92, + 0xe2, 0xd3, 0x3d, 0xd7, 0x6c, 0xb9, 0x60, 0x9a, 0x23, 0x52, 0xbe, 0x59, + 0xc9, 0x36, 0x9e, 0xf7, 0x77, 0x09, 0x79, 0x01, 0xcc, 0xec, 0x17, 0xd1, + 0x74, 0xbc, 0x58, 0x65, 0x45, 0x3c, 0x86, 0xf1, 0xbc, 0xbd, 0x95, 0x54, + 0x46, 0x45, 0x7b, 0x4c, 0xa2, 0xea, 0x2a, 0x6e, 0xa8, 0xd1, 0x66, 0x03, + 0xb2, 0x6a, 0xe0, 0xd3, 0x07, 0x8d, 0xe0, 0x09, 0x81, 0x42, 0xe3, 0x97, + 0xc4, 0xe7, 0x37, 0xc5, 0x82, 0xcf, 0xb1, 0xec, 0xba, 0xbd, 0xf4, 0xb6, + 0x41, 0xb2, 0xb8, 0xa6, 0x3a, 0x85, 0x4b, 0x4f, 0x46, 0x48, 0xe9, 0x9b, + 0x72, 0xf5, 0xb0, 0x64, 0x66, 0x75, 0x42, 0xb4, 0x00, 0xbe, 0x11, 0x6d, + 0x86, 0x93, 0x07, 0x50, 0xa7, 0xef, 0x55, 0x42, 0xcf, 0xe8, 0x61, 0xd0, + 0x9b, 0x11, 0x84, 0x8c, 0x74, 0xe4, 0xb8, 0x3f, 0x48, 0xb3, 0x61, 0xe3, + 0xea, 0x66, 0x86, 0x94, 0x95, 0x12, 0x77, 0x26, 0x75, 0x30, 0xb5, 0xd3, + 0x7a, 0xad, 0x2d, 0x58, 0x46, 0x1b, 0x4b, 0xd9, 0x2d, 0x1e, 0x0b, 0xff, + 0xd7, 0x03, 0x56, 0x3b, 0xbd, 0x65, 0xb0, 0xf9, 0xfe, 0x43, 0x1c, 0x9c, + 0x18, 0x82, 0x78, 0x5e, 0x06, 0x02, 0x21, 0x70, 0xb2, 0x7f, 0xb5, 0x63, + 0x71, 0x85, 0x95, 0x79, 0xae, 0x1e, 0xc6, 0x62, 0x7a, 0x7c, 0x63, 0x46, + 0x70, 0x1c, 0x58, 0x72, 0x1d, 0xde, 0xca, 0xb4, 0xfc, 0xc8, 0x56, 0x38, + 0x32, 0xf4, 0x0b, 0x56, 0x87, 0x6b, 0x5b, 0x53, 0xd2, 0x2c, 0x35, 0xef, + 0x5b, 0x33, 0x59, 0x13, 0x76, 0x82, 0x30, 0x80, 0x23, 0x10, 0x07, 0x4c, + 0x3f, 0xac, 0x9c, 0x58, 0x2d, 0x04, 0xe6, 0x6a, 0xd3, 0x5c, 0xf9, 0xb6, + 0x59, 0x4e, 0x85, 0xfe, 0x01, 0x71, 0xf0, 0xf7, 0xf2, 0x1f, 0x46, 0xd5, + 0x20, 0x3c, 0x9b, 0xc2, 0x1e, 0x73, 0x1c, 0x56, 0x9c, 0x76, 0x8c, 0x12, + 0x95, 0x51, 0xd4, 0x6f, 0x5b, 0x3a, 0xa7, 0x5f, 0xa7, 0xe4, 0xfa, 0xb7, + 0x1a, 0xdd, 0xb6, 0x4c, 0x01, 0x02, 0xae, 0x9c, 0x02, 0x0d, 0x66, 0x2f, + 0x40, 0x87, 0xa1, 0xbc, 0xf3, 0xde, 0xf4, 0xdb, 0x65, 0xee, 0xcc, 0xca, + 0xe1, 0x7a, 0xa2, 0xf4, 0xf7, 0xf5, 0x7c, 0x2a, 0x3f, 0xa4, 0x67, 0xbb, + 0x07, 0x50, 0x7a, 0x29, 0x8a, 0xcf, 0x2c, 0x7a, 0x0e, 0x0d, 0xc7, 0x95, + 0x8b, 0xf4, 0xe2, 0x50, 0xe1, 0xc1, 0x40, 0x16, 0x99, 0x5c, 0x72, 0xe7, + 0xe4, 0x01, 0xeb, 0x29, 0x6a, 0x99, 0xf2, 0x67, 0x23, 0x46, 0x1f, 0xaa, + 0xea, 0xc1, 0x51, 0x30, 0xeb, 0x7d, 0x34, 0x52, 0x91, 0x37, 0x2d, 0xc6, + 0x5c, 0x3a, 0x7c, 0x54, 0xc0, 0x79, 0xdc, 0xf9, 0xbf, 0x08, 0x2a, 0xf6, + 0xe1, 0x1e, 0xee, 0xc6, 0xd2, 0xe9, 0x30, 0x27, 0x60, 0x0c, 0xa2, 0x63, + 0x16, 0x06, 0x3d, 0xe2, 0xf5, 0x6f, 0xea, 0xe4, 0x4d, 0x9f, 0x2d, 0x36, + 0x62, 0x95, 0x47, 0x5d, 0x00, 0x22, 0x9f, 0x0c, 0xbb, 0x71, 0xad, 0xea, + 0xe7, 0x62, 0x59, 0x21, 0xd1, 0xaf, 0x04, 0x5a, 0xfc, 0x1f, 0x28, 0x6b, + 0x6f, 0x71, 0xec, 0xd4, 0xbd, 0x9c, 0x88, 0xfb, 0x3f, 0x04, 0xea, 0xd6, + 0xb2, 0x24, 0xe5, 0x28, 0xfe, 0xc5, 0x3e, 0x15, 0x00, 0x8c, 0xa2, 0xdf, + 0x18, 0x3d, 0x10, 0x9a, 0xb1, 0xcd, 0x64, 0xda, 0x87, 0x41, 0xc8, 0xa1, + 0x1c, 0x97, 0xd5, 0x44, 0xd9, 0x51, 0xd2, 0x96, 0xed, 0xad, 0x28, 0x1f, + 0x03, 0x89, 0x21, 0xbd, 0x79, 0x91, 0x48, 0x9c, 0x8e, 0x17, 0xfd, 0x36, + 0x72, 0xf6, 0x69, 0x4f, 0x3f, 0x02, 0x57, 0xcc, 0x3f, 0x1c, 0x49, 0x82, + 0x00, 0x45, 0x9e, 0x29, 0x83, 0x14, 0x12, 0xbb, 0xd2, 0xd0, 0x1a, 0x66, + 0x0f, 0x57, 0x24, 0xd4, 0x9f, 0x46, 0x0c, 0xf4, 0xb8, 0x28, 0x85, 0x52, + 0xe2, 0xa1, 0xc2, 0x3a, 0x8c, 0x34, 0x4a, 0x81, 0xe3, 0xbc, 0xa2, 0x67, + 0x67, 0x12, 0x13, 0xc4, 0xe7, 0xd7, 0x2c, 0x4e, 0xa9, 0xf5, 0xed, 0x63, + 0xf2, 0x18, 0x9c, 0x0c, 0xe2, 0x4d, 0x25, 0x23, 0x30, 0x3e, 0x49, 0x29, + 0xa6, 0x37, 0xdf, 0xc2, 0xdc, 0xf6, 0x5e, 0xae, 0x45, 0xd7, 0x8d, 0x56, + 0xba, 0x29, 0x4f, 0xee, 0xc9, 0x26, 0xd7, 0xbf, 0x10, 0x4d, 0x0a, 0x3b, + 0x3d, 0x1f, 0xd5, 0x72, 0xe1, 0xe6, 0xf5, 0x23, 0x4a, 0x17, 0x2d, 0xe4, + 0x40, 0x55, 0x9b, 0x39, 0x66, 0x36, 0xe4, 0x6d, 0x6d, 0xb6, 0x8d, 0x2a, + 0x7e, 0x76, 0x73, 0xa5, 0x86, 0x20, 0x3d, 0x18, 0xa0, 0x6c, 0x35, 0x59, + 0xc8, 0x1c, 0xef, 0x0f, 0x36, 0x1d, 0x6f, 0xba, 0x89, 0xb9, 0x9e, 0x7a, + 0x58, 0x1d, 0x43, 0xad, 0x85, 0x8b, 0x6b, 0xcc, 0x25, 0xb8, 0xe4, 0xdd, + 0xa1, 0x35, 0xd9, 0xef, 0xc4, 0xb1, 0xf6, 0x99, 0x27, 0x17, 0xb7, 0xbe, + 0xd1, 0x4f, 0xa1, 0x81, 0x4e, 0xb6, 0x19, 0xcd, 0xa0, 0x92, 0xeb, 0x56, + 0x41, 0x4f, 0x37, 0xca, 0x3b, 0x43, 0x85, 0x86, 0xdf, 0x5d, 0x5a, 0x8c, + 0xd4, 0x5b, 0xc4, 0x28, 0xdb, 0x16, 0xea, 0x3a, 0x2e, 0x9e, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xea, 0x59, 0x40, 0xc4, + 0x40, 0x8b, 0x6a, 0x8a, 0xb8, 0x7f, 0x1e, 0x0b, 0xfe, 0xab, 0xa4, 0xac, + 0x42, 0x91, 0xc5, 0xfa, 0x2c, 0x7e, 0xb4, 0xf9, 0x5c, 0xd5, 0x4c, 0x6a, + 0x74, 0x82, 0x90, 0x81, 0x96, 0xb0, 0xf4, 0xd4, 0xba, 0xc9, 0xa3, 0x2e, + 0x26, 0x0a, 0xc9, 0x55, 0x65, 0xac, 0xde, 0x83, 0x37, 0xec, 0x0e, 0xf6, + 0xdc, 0x8c, 0x34, 0xe6, 0x57, 0xde, 0x32, 0x0a, 0x02, 0x62, 0x4f, 0x6a, + 0x92, 0xa5, 0xb4, 0x40, 0xde, 0x57, 0xf4, 0xd1, 0xa3, 0x1c, 0xd3, 0xf7, + 0x4a, 0x15, 0xcc, 0x27, 0x26, 0x00, 0xba, 0xf3, 0xfa, 0x4e, 0xc6, 0xe9, + 0xc3, 0x05, 0x3d, 0x3a, 0x89, 0x96, 0x7d, 0x41, 0xac, 0xca, 0x28, 0x7f, + 0x69, 0x02, 0x40, 0x03, 0x93, 0x86, 0x85, 0x85, 0x73, 0x00, 0x09, 0x5a, + 0xcf, 0x5f, 0x1d, 0xaa, 0x46, 0x41, 0x9d, 0x08, 0xbf, 0xea, 0x45, 0x9b, + 0x93, 0xda, 0x9e, 0x81, 0xba, 0x9e, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x08, 0x00, 0x00, 0x6a, 0x1f, 0x9b, 0x03, 0xdd, 0xe4, 0x16, 0x07, + 0x7f, 0x5b, 0xb0, 0xee, 0xac, 0x55, 0xc4, 0x50, 0xe6, 0x2b, 0x17, 0xed, + 0x7f, 0x50, 0x4d, 0x71, 0x73, 0xae, 0xe0, 0x4d, 0xce, 0x08, 0xd9, 0x8b, + 0x83, 0x2c, 0x01, 0x48, 0x02, 0xd3, 0xbb, 0xca, 0x86, 0xd7, 0xca, 0x5f, + 0xc7, 0xce, 0x59, 0xdf, 0xc1, 0xcc, 0xf7, 0x7b, 0x54, 0xf8, 0x0d, 0x4f, + 0x81, 0x9e, 0x50, 0x6a, 0x65, 0x66, 0x4a, 0xec, 0x7a, 0x1b, 0x92, 0xb2, + 0x39, 0x8f, 0x5d, 0x41, 0x33, 0xcf, 0xe6, 0x1b, 0x34, 0x5d, 0xe1, 0xf6, + 0xef, 0xcb, 0xa0, 0x55, 0x7e, 0x1f, 0x45, 0x38, 0xb9, 0x56, 0x15, 0x3b, + 0x70, 0xab, 0xc8, 0x2f, 0x1c, 0xb9, 0x7d, 0x37, 0xe1, 0xb4, 0x03, 0x44, + 0x5a, 0xf6, 0x57, 0x97, 0x03, 0x54, 0x4c, 0x22, 0x88, 0xc3, 0x82, 0xfd, + 0x91, 0xc1, 0xf1, 0x63, 0xb4, 0x50, 0x46, 0x11, 0x64, 0x07, 0xfd, 0x85, + 0xe5, 0x78, 0x57, 0xdd, 0x19, 0x2a, 0x6b, 0x64, 0x3e, 0xec, 0xb8, 0xf3, + 0xb5, 0x95, 0x29, 0x72, 0xf1, 0x9d, 0xdd, 0xb9, 0xad, 0xd0, 0x78, 0x26, + 0x86, 0x10, 0x10, 0x19, 0xe4, 0x79, 0xae, 0xdc, 0x56, 0xb7, 0x54, 0x4f, + 0x94, 0xc6, 0x26, 0x9a, 0x93, 0xa8, 0x2e, 0x1b, 0x1c, 0xda, 0x87, 0x3a, + 0xa2, 0x44, 0xb9, 0x0b, 0x0f, 0xab, 0x70, 0x3b, 0xb7, 0x6c, 0xbf, 0x58, + 0x67, 0x32, 0x7d, 0xa3, 0x2a, 0xcb, 0x4e, 0x02, 0x92, 0xa1, 0x26, 0x0e, + 0x20, 0x5e, 0xb3, 0xec, 0xc4, 0x04, 0x5b, 0x7f, 0xe5, 0xbd, 0x30, 0xeb, + 0xc8, 0xdd, 0xf1, 0x72, 0x5a, 0x7e, 0xcb, 0x93, 0x22, 0xa0, 0x01, 0x9f, + 0xbb, 0x24, 0x9f, 0x50, 0x01, 0x1f, 0x24, 0x02, 0x85, 0x6d, 0xe6, 0x4d, + 0x55, 0xc4, 0x07, 0xe9, 0x87, 0x38, 0xbf, 0x1a, 0x3b, 0x05, 0x82, 0xc4, + 0x73, 0x4b, 0x87, 0x3c, 0xb4, 0x0a, 0x48, 0x8c, 0x06, 0x67, 0xe7, 0xbf, + 0xcc, 0xe7, 0xe5, 0xc3, 0xb2, 0x81, 0x60, 0xe2, 0xd1, 0xb1, 0x8f, 0x98, + 0xbd, 0x7d, 0xbd, 0x4e, 0x9a, 0xca, 0xbe, 0xcb, 0x81, 0x47, 0x25, 0xaa, + 0xfa, 0x91, 0xcf, 0x78, 0xce, 0xcb, 0x1a, 0x11, 0x79, 0xcf, 0x97, 0xa3, + 0x95, 0x95, 0x6f, 0xd7, 0xae, 0x80, 0xc9, 0xd5, 0x95, 0xb7, 0xcf, 0xe2, + 0x9d, 0x98, 0x65, 0x80, 0xfd, 0x2e, 0xee, 0x46, 0x5e, 0x46, 0x8c, 0xde, + 0x52, 0xb4, 0xdc, 0xce, 0xa8, 0xab, 0x4e, 0x0c, 0x12, 0x9f, 0x89, 0x9c, + 0x84, 0x80, 0xfe, 0x08, 0x64, 0x12, 0x12, 0x95, 0x62, 0xea, 0x65, 0xcc, + 0x34, 0x80, 0xcf, 0x92, 0x5f, 0xc2, 0xae, 0x76, 0xe7, 0x2f, 0xbb, 0xa8, + 0xdb, 0x6a, 0x66, 0x60, 0xaf, 0x88, 0xba, 0x65, 0x32, 0xcf, 0xf7, 0x6e, + 0xd8, 0xd0, 0x69, 0xb0, 0x12, 0x23, 0xd6, 0xc2, 0x32, 0xe5, 0x8e, 0x51, + 0xc5, 0x61, 0x28, 0x45, 0xf7, 0xf9, 0xea, 0x73, 0xce, 0x04, 0x2d, 0x56, + 0x43, 0x10, 0x8b, 0x4f, 0x6b, 0xfa, 0x32, 0xa8, 0x92, 0x8f, 0xd9, 0xb4, + 0xfd, 0xa4, 0x74, 0xa8, 0xea, 0xca, 0xd3, 0x84, 0xbb, 0x5a, 0x34, 0x57, + 0xf9, 0xda, 0x25, 0x40, 0x1f, 0x5e, 0xc2, 0x66, 0x43, 0x05, 0xdd, 0x13, + 0x88, 0x91, 0x60, 0xa1, 0x75, 0xd3, 0xc4, 0x27, 0xff, 0xda, 0x24, 0x3d, + 0xd9, 0xd7, 0x47, 0x46, 0x30, 0xd0, 0x76, 0xc4, 0x9e, 0x97, 0xe3, 0x43, + 0xd7, 0x45, 0xaf, 0x49, 0x36, 0xf2, 0x18, 0xdd, 0x3f, 0x86, 0x9a, 0xec, + 0x9a, 0x70, 0xeb, 0x5a, 0xe2, 0xa0, 0x4b, 0x45, 0x21, 0xb3, 0x32, 0x3d, + 0x0c, 0x8c, 0x03, 0x13, 0xae, 0x46, 0xb5, 0x1a, 0x0a, 0x03, 0x36, 0xfe, + 0xfe, 0xfa, 0xc9, 0x4d, 0x46, 0xf8, 0xfe, 0x6f, 0x99, 0x8c, 0xe4, 0x77, + 0x0c, 0x27, 0x59, 0xf7, 0xc3, 0xfc, 0x32, 0xb3, 0xa5, 0xae, 0xdc, 0x49, + 0xac, 0x31, 0x27, 0xa6, 0x14, 0x92, 0xfb, 0xe3, 0x69, 0x35, 0x8d, 0xa0, + 0x50, 0x55, 0x09, 0x90, 0xdf, 0x67, 0x08, 0x4c, 0x0e, 0xaf, 0x71, 0xc2, + 0xe8, 0xb8, 0xdc, 0x45, 0xe3, 0x6d, 0x58, 0x3f, 0x19, 0x8d, 0xcd, 0xeb, + 0xe3, 0x02, 0x49, 0xd8, 0xc8, 0x8b, 0x29, 0xb3, 0xef, 0x2b, 0xf0, 0x39, + 0x5c, 0x11, 0xaa, 0x52, 0x44, 0x0d, 0x1a, 0x3a, 0x7a, 0x62, 0xda, 0x6d, + 0xe3, 0xdd, 0x03, 0x30, 0x6d, 0x3e, 0x18, 0x30, 0x1d, 0xc0, 0xd0, 0x05, + 0x67, 0x98, 0xf5, 0x2a, 0xc7, 0xa1, 0x58, 0xd7, 0xf8, 0x6f, 0x7d, 0x07, + 0x59, 0x27, 0x95, 0xb9, 0x8d, 0x4d, 0xd7, 0xc8, 0x5e, 0x8b, 0x89, 0x14, + 0xb7, 0x1b, 0x35, 0xaa, 0x72, 0x02, 0x39, 0x3c, 0x41, 0x7c, 0x91, 0x93, + 0x81, 0xe1, 0xad, 0xbe, 0x77, 0x28, 0x80, 0xa2, 0x9c, 0xa8, 0x00, 0x18, + 0xa5, 0x70, 0xec, 0xec, 0x96, 0x95, 0x37, 0xa3, 0xee, 0x15, 0xa0, 0x69, + 0x0e, 0x05, 0xb5, 0xb4, 0xb6, 0xa7, 0x8b, 0xb9, 0x41, 0x88, 0x4f, 0x56, + 0x39, 0xa7, 0xbe, 0x24, 0xce, 0x4c, 0xe0, 0x9c, 0x24, 0x5a, 0xa1, 0xab, + 0xcd, 0x82, 0xf1, 0x16, 0x3f, 0xc0, 0xaf, 0xe1, 0x42, 0xe0, 0x7d, 0x1b, + 0xd9, 0x8f, 0xb8, 0x04, 0xa1, 0x88, 0xd9, 0xc3, 0xaf, 0x4f, 0xda, 0xfd, + 0x0b, 0x5c, 0xc3, 0x04, 0xf3, 0xdb, 0xe6, 0x76, 0x6e, 0xe9, 0xdc, 0xea, + 0x6f, 0xa2, 0xa5, 0x75, 0x2c, 0xc7, 0x91, 0x7d, 0x4b, 0xd5, 0x68, 0x55, + 0xbb, 0x2d, 0x14, 0xdb, 0x06, 0x76, 0xf7, 0xcc, 0x0a, 0x88, 0x6c, 0x2b, + 0xa1, 0x57, 0xd6, 0x15, 0x9c, 0x46, 0xcf, 0x5b, 0x6f, 0x9e, 0x7e, 0xc5, + 0x39, 0xda, 0x97, 0x26, 0x5e, 0xf5, 0x25, 0x06, 0xed, 0x8e, 0x9b, 0x1d, + 0x1b, 0x91, 0x07, 0x89, 0x08, 0xce, 0xd7, 0x38, 0x43, 0x64, 0x8e, 0xf5, + 0x3a, 0x52, 0x4a, 0xfb, 0x3e, 0xff, 0x2c, 0xb3, 0x78, 0x40, 0xb5, 0xdd, + 0xb2, 0x8a, 0xd3, 0x6a, 0xc5, 0xb0, 0xa3, 0x4a, 0xb8, 0xe7, 0x27, 0xa0, + 0x5a, 0x8f, 0x0f, 0xda, 0x53, 0x49, 0xc9, 0x77, 0x2a, 0xef, 0x78, 0xc6, + 0xec, 0xaf, 0x10, 0xe5, 0x71, 0xc5, 0x7a, 0x85, 0xdf, 0xb2, 0x85, 0x02, + 0xe3, 0x55, 0x7a, 0x91, 0x3a, 0x68, 0xb2, 0x9d, 0x3d, 0xd9, 0x01, 0xc5, + 0x5f, 0x3c, 0xa8, 0x1d, 0x99, 0xc6, 0xe7, 0xad, 0x09, 0xd1, 0x39, 0x3a, + 0x92, 0xc5, 0x77, 0x9c, 0xdf, 0x99, 0x56, 0x9f, 0xfe, 0xf8, 0xfd, 0xc8, + 0x4f, 0x19, 0xa3, 0xa0, 0xdf, 0xff, 0x17, 0xac, 0xa9, 0x03, 0x32, 0x85, + 0x4c, 0x29, 0xca, 0x89, 0x58, 0xdc, 0x88, 0xdd, 0xeb, 0x79, 0x68, 0x5e, + 0x0f, 0x37, 0x1a, 0xf7, 0x05, 0xfd, 0x39, 0x91, 0x25, 0x61, 0xf3, 0x04, + 0xda, 0x97, 0xfc, 0x7b, 0xcc, 0x40, 0x63, 0xfd, 0x5b, 0x3b, 0x27, 0x8e, + 0x92, 0x6d, 0x98, 0x0f, 0xcc, 0x9c, 0x9b, 0xda, 0xb2, 0xc6, 0xca, 0x56, + 0xff, 0x7e, 0xcc, 0xa2, 0xc0, 0x45, 0x3e, 0xf6, 0xdf, 0xa7, 0xe8, 0x2a, + 0xef, 0x0c, 0xde, 0xec, 0xa4, 0x1d, 0x2c, 0x3e, 0x03, 0xfd, 0xa4, 0x44, + 0x60, 0x4a, 0xf5, 0x83, 0x8f, 0x09, 0x2d, 0xe8, 0xd5, 0x46, 0xf6, 0x1c, + 0x2d, 0x39, 0x28, 0x0c, 0xdf, 0xa1, 0x2b, 0x05, 0x6e, 0x3c, 0x36, 0xdd, + 0x91, 0x81, 0x52, 0xf1, 0x56, 0xdc, 0xbb, 0x79, 0x62, 0xd8, 0x2e, 0x27, + 0x5d, 0x9f, 0x3c, 0xce, 0x81, 0x5c, 0x70, 0xe5, 0x4d, 0x33, 0x06, 0xd5, + 0x14, 0x04, 0xb7, 0xbc, 0x7b, 0x7a, 0xb4, 0xf7, 0x4a, 0x48, 0x8f, 0x97, + 0x85, 0x96, 0x69, 0xc9, 0x40, 0x52, 0xb1, 0x1c, 0x28, 0x82, 0xb3, 0x63, + 0xee, 0x94, 0x2f, 0xcb, 0x40, 0xad, 0xd7, 0x78, 0xb1, 0xc4, 0x21, 0x05, + 0x36, 0xd9, 0x46, 0xf0, 0x83, 0xcd, 0xee, 0x52, 0x7a, 0xa6, 0xa4, 0x40, + 0xb0, 0x2f, 0xf0, 0x1c, 0xfa, 0x42, 0x98, 0x54, 0x5b, 0xfe, 0x5e, 0xd6, + 0x84, 0x73, 0xca, 0x39, 0xbe, 0x87, 0xf2, 0x92, 0xee, 0x3d, 0x21, 0xcc, + 0x69, 0x81, 0xe5, 0xe8, 0x8a, 0xc3, 0x23, 0x64, 0x98, 0xd5, 0x1d, 0xcd, + 0x5c, 0x6c, 0x37, 0xc8, 0x8b, 0x08, 0x22, 0x12, 0x9f, 0x85, 0xc9, 0xed, + 0xb4, 0xa6, 0x07, 0xe1, 0x62, 0x79, 0x35, 0x5d, 0x26, 0x11, 0x4a, 0x6b, + 0x33, 0x37, 0x91, 0x78, 0xe8, 0xe2, 0xba, 0x8b, 0x8a, 0xb7, 0xbb, 0x0f, + 0xd2, 0xb3, 0xa2, 0x02, 0x0c, 0x57, 0x35, 0x99, 0x88, 0x6b, 0x9b, 0x64, + 0x79, 0x1f, 0x4a, 0x48, 0xd4, 0x3b, 0x5c, 0xeb, 0xb4, 0x83, 0xc3, 0xad, + 0x9c, 0x6a, 0xb0, 0xcf, 0x7f, 0x70, 0xe8, 0x22, 0x46, 0x25, 0xfe, 0x7e, + 0x02, 0x44, 0x83, 0x02, 0xb3, 0x08, 0x2e, 0x34, 0x08, 0x4b, 0xff, 0xa2, + 0xc1, 0x60, 0xbb, 0xd8, 0x89, 0x16, 0xf8, 0xaa, 0xab, 0xea, 0xf7, 0xa0, + 0x10, 0x9a, 0xc9, 0xe9, 0xa4, 0x81, 0xa7, 0x87, 0x32, 0x5b, 0xc1, 0xd0, + 0xd9, 0x70, 0x6f, 0xb6, 0x7c, 0x65, 0xd5, 0x0e, 0x65, 0x93, 0xfe, 0x6d, + 0x66, 0xaa, 0xab, 0xd0, 0x03, 0x07, 0xf2, 0xbe, 0x39, 0xd6, 0xc8, 0xac, + 0xf2, 0x06, 0x58, 0x58, 0x46, 0xc0, 0x1a, 0xbd, 0xa4, 0x96, 0x38, 0x31, + 0x32, 0x89, 0x04, 0xdf, 0xcd, 0x3c, 0x2e, 0x98, 0xb8, 0x39, 0xba, 0xe2, + 0xca, 0x6b, 0xd0, 0x53, 0xce, 0x4a, 0xc8, 0x95, 0x81, 0x84, 0x17, 0xce, + 0x7f, 0x1d, 0xc1, 0x5a, 0xc4, 0xc2, 0x73, 0x30, 0x6d, 0x0b, 0x8c, 0xf8, + 0x66, 0x38, 0x4e, 0xa3, 0x14, 0x84, 0x15, 0x36, 0x9e, 0x0d, 0x56, 0x6b, + 0xa6, 0x77, 0x65, 0xa4, 0x2c, 0x77, 0x00, 0x8b, 0x43, 0x57, 0xc6, 0x25, + 0xc5, 0xd0, 0x17, 0x79, 0x6b, 0x5d, 0xbc, 0xcd, 0xc8, 0x25, 0x8f, 0x20, + 0x09, 0xcc, 0xbd, 0x80, 0x10, 0xdf, 0x35, 0xf6, 0x9c, 0x04, 0x80, 0x23, + 0xdc, 0x97, 0xe0, 0xba, 0x29, 0x48, 0x2e, 0x95, 0x0f, 0xb1, 0x9b, 0xc7, + 0xe6, 0x0b, 0x89, 0x16, 0xe2, 0x81, 0x3b, 0x32, 0x69, 0xc4, 0xde, 0xc6, + 0x12, 0x09, 0x47, 0xff, 0x50, 0xe4, 0x45, 0xb7, 0x35, 0xd2, 0x61, 0x9b, + 0x52, 0x6e, 0xbe, 0xaf, 0xd2, 0xeb, 0x0c, 0x50, 0xf1, 0x57, 0x9f, 0x59, + 0xe1, 0xc1, 0x4f, 0x8c, 0x79, 0x07, 0x05, 0xce, 0x8d, 0x64, 0xb2, 0xf0, + 0xd3, 0x4f, 0xe1, 0x7b, 0xfa, 0x30, 0x0a, 0xc2, 0x5d, 0x0c, 0x47, 0x6c, + 0x17, 0x77, 0x1f, 0xe5, 0xd8, 0x14, 0xfd, 0xc1, 0x01, 0x70, 0x51, 0x60, + 0xb2, 0x20, 0xfd, 0x86, 0xbc, 0x19, 0x5e, 0x01, 0xa6, 0x19, 0x3a, 0x21, + 0xa5, 0x0a, 0x1c, 0xd9, 0xa9, 0x78, 0xbb, 0xc9, 0x01, 0x65, 0xe4, 0xb3, + 0x48, 0xb8, 0xe1, 0xe7, 0xb5, 0xf4, 0x4e, 0xa9, 0xb6, 0xe2, 0x5b, 0xeb, + 0xf5, 0x76, 0x06, 0x1a, 0xd9, 0x08, 0x40, 0xff, 0x72, 0xb2, 0xe3, 0x01, + 0x50, 0xb1, 0xad, 0xb3, 0xa3, 0xf6, 0xef, 0x72, 0x05, 0x0c, 0xf4, 0xce, + 0x24, 0x2c, 0x63, 0x89, 0x63, 0x9e, 0x21, 0xb8, 0xb0, 0xbe, 0xc7, 0x45, + 0xae, 0x47, 0x2b, 0x9e, 0x61, 0x81, 0x4c, 0x76, 0x96, 0x7b, 0x18, 0x37, + 0x74, 0xcb, 0x00, 0xef, 0x38, 0x72, 0x24, 0x0a, 0x63, 0xc1, 0x64, 0xd6, + 0x41, 0xc8, 0x6a, 0xf1, 0xe7, 0x11, 0x20, 0x4b, 0xc2, 0x95, 0x70, 0xb8, + 0xf8, 0x8f, 0xd9, 0xae, 0x8c, 0x12, 0xd8, 0x6f, 0x63, 0x30, 0xca, 0x56, + 0x46, 0x11, 0xda, 0x49, 0x1f, 0x84, 0x3d, 0xae, 0xab, 0x78, 0x29, 0x02, + 0x6c, 0x43, 0xa3, 0xef, 0x9d, 0x97, 0x59, 0x15, 0x53, 0xcd, 0xc7, 0x47, + 0x65, 0x30, 0xc7, 0xae, 0x31, 0x4a, 0x41, 0xb4, 0x66, 0x9c, 0xbb, 0x51, + 0x0b, 0xbd, 0xe2, 0x7d, 0x41, 0x2c, 0xd0, 0x75, 0x57, 0x93, 0xce, 0x2e, + 0xeb, 0x31, 0x7f, 0x56, 0xb2, 0xa4, 0x2b, 0x9f, 0xcc, 0xef, 0x6f, 0xf0, + 0x77, 0x19, 0xad, 0x4d, 0x2e, 0x37, 0x00, 0x75, 0x53, 0xae, 0x22, 0x44, + 0x69, 0x1c, 0x8a, 0x90, 0xf2, 0xcd, 0x0f, 0x6b, 0x37, 0xdb, 0xfd, 0x71, + 0x64, 0x80, 0xd8, 0x57, 0x1b, 0x8f, 0xff, 0x14, 0xd4, 0x5f, 0xe1, 0xd1, + 0x0f, 0x06, 0x13, 0x61, 0x29, 0xa9, 0x80, 0x9d, 0xc7, 0x8a, 0xa0, 0xb5, + 0xaa, 0xfc, 0xe0, 0xb4, 0xb4, 0xf0, 0x31, 0xf0, 0xec, 0x78, 0x03, 0x28, + 0xb9, 0xf7, 0xd9, 0xa7, 0xc8, 0xad, 0x2e, 0x16, 0xb8, 0x18, 0x82, 0x43, + 0x66, 0x8b, 0xae, 0xb2, 0x45, 0x2b, 0x0c, 0x9d, 0x69, 0xbd, 0x1b, 0xc5, + 0x20, 0xc6, 0x41, 0xe7, 0x4f, 0x4b, 0x7b, 0x46, 0x3d, 0x7a, 0x6d, 0x9f, + 0x13, 0x2e, 0x0f, 0xf3, 0x85, 0x3e, 0x5b, 0x12, 0xe5, 0xbf, 0x1b, 0x20, + 0xc3, 0x5f, 0x6b, 0xf7, 0xf7, 0xa3, 0xd7, 0x33, 0xd2, 0xcb, 0x18, 0xa5, + 0xa4, 0xa2, 0xd3, 0x59, 0x91, 0x9a, 0x04, 0xfa, 0x9d, 0xa5, 0x55, 0xad, + 0x09, 0x5a, 0x1e, 0x0b, 0x10, 0xd0, 0x46, 0x18, 0xe4, 0x09, 0xe8, 0x1b, + 0x44, 0xd3, 0x78, 0x45, 0xc0, 0xdf, 0xa2, 0xef, 0xfc, 0x59, 0x8a, 0x1b, + 0x22, 0x60, 0xc9, 0x58, 0x7d, 0x65, 0x45, 0xa9, 0xac, 0xd5, 0xd4, 0xc4, + 0x44, 0xd3, 0x08, 0x44, 0x40, 0x4d, 0x3d, 0x7e, 0x39, 0x81, 0x72, 0x15, + 0x49, 0xd7, 0x2c, 0xda, 0x33, 0xaf, 0xc5, 0xb5, 0x8a, 0x3c, 0xbf, 0x81, + 0x88, 0x4f, 0x12, 0xe4, 0xe8, 0xe6, 0x00, 0xb6, 0xd9, 0xcd, 0xb2, 0x70, + 0x08, 0x15, 0x72, 0xf6, 0x46, 0xc7, 0x98, 0x7c, 0x1d, 0x54, 0xd0, 0x66, + 0x2d, 0xa1, 0xd8, 0xda, 0xb0, 0xe5, 0x9f, 0xa3, 0x2f, 0x2c, 0xfb, 0x34, + 0xb3, 0x21, 0x8b, 0x61, 0xf4, 0xce, 0x60, 0x2b, 0xb5, 0x5e, 0x3d, 0x14, + 0x2c, 0xbe, 0x19, 0x9d, 0x5f, 0x01, 0xe1, 0x21, 0x34, 0x11, 0x6b, 0x10, + 0xd4, 0x17, 0x58, 0xb3, 0x0a, 0x30, 0xe4, 0x17, 0x51, 0x0b, 0xf2, 0xbb, + 0xa6, 0xb7, 0x00, 0xa2, 0xe8, 0xa5, 0xa3, 0x41, 0x1d, 0x65, 0x2d, 0x26, + 0x93, 0x26, 0x7d, 0xdc, 0xad, 0x6f, 0x83, 0xeb, 0x66, 0x55, 0xde, 0x60, + 0x21, 0x56, 0x19, 0x4f, 0x9b, 0x7b, 0x26, 0x4a, 0x80, 0xf5, 0xab, 0x8b, + 0xbf, 0xe4, 0xb1, 0xa1, 0xd6, 0x33, 0x32, 0xbf, 0x86, 0x8c, 0x3c, 0xd0, + 0x12, 0x03, 0xd4, 0xb9, 0x23, 0x54, 0x1b, 0x94, 0x2f, 0xa5, 0x34, 0x4d, + 0x59, 0x18, 0x33, 0x8e, 0x8c, 0xf7, 0x1f, 0xc9, 0x6d, 0x75, 0xfb, 0x2a, + 0x22, 0x6c, 0x64, 0xb7, 0x79, 0xd8, 0x3b, 0xf6, 0x4e, 0x98, 0xd8, 0xa8, + 0x2c, 0x06, 0xd1, 0x92, 0x32, 0x44, 0xec, 0x38, 0x40, 0x3b, 0x53, 0x16, + 0x40, 0x8f, 0x92, 0x72, 0x87, 0xa8, 0xb8, 0xc0, 0x8f, 0x25, 0x4c, 0x4f, + 0x24, 0xfc, 0x8d, 0xc6, 0xa6, 0xeb, 0x2f, 0xdf, 0x2f, 0x0d, 0x2f, 0xd3, + 0x6e, 0x70, 0x71, 0xfe, 0xf0, 0x2e, 0xe9, 0x84, 0xd3, 0xc1, 0xd1, 0x70, + 0x4b, 0x8f, 0x7b, 0x60, 0xb0, 0xb7, 0xe3, 0x79, 0x52, 0x6a, 0x6b, 0x26, + 0x03, 0x8f, 0x6a, 0x0f, 0x8d, 0x85, 0xd7, 0x5f, 0xf7, 0x39, 0x31, 0x0e, + 0x26, 0x73, 0x84, 0x3f, 0x9b, 0x10, 0x6f, 0x29, 0x63, 0x14, 0x36, 0xa2, + 0xec, 0x44, 0x7d, 0x84, 0xc6, 0x4a, 0xec, 0xfe, 0xac, 0xcb, 0xe4, 0xfa, + 0xf6, 0x68, 0x83, 0x68, 0xe0, 0x8f, 0xd3, 0x8a, 0x60, 0x73, 0xf1, 0x5c, + 0x71, 0x02, 0x0c, 0xa2, 0x88, 0x2c, 0xa2, 0x35, 0x35, 0x5c, 0x3f, 0xb1, + 0xbe, 0xb3, 0x6b, 0x5c, 0xe1, 0x78, 0x75, 0x40, 0x20, 0x87, 0x67, 0xca, + 0x07, 0x1c, 0x9c, 0x02, 0xc7, 0xf2, 0x9d, 0x1c, 0xda, 0x1b, 0x86, 0x1b, + 0xc6, 0xa6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x93, 0xca, 0x30, 0xae, 0xea, 0x26, 0x6a, 0x1b, 0x15, 0x46, 0x0a, 0xe3, + 0x57, 0x23, 0x4c, 0x0c, 0x98, 0x8e, 0x3e, 0xbb, 0x43, 0x14, 0x73, 0xdf, + 0x17, 0x91, 0xe2, 0xee, 0x39, 0xf9, 0xc2, 0x2f, 0xdc, 0xad, 0x0e, 0x00, + 0xf5, 0xdd, 0xe3, 0x97, 0xba, 0x8c, 0xee, 0x53, 0xc4, 0x70, 0x37, 0x46, + 0xcf, 0x04, 0xc3, 0xc8, 0x56, 0x38, 0x2e, 0x39, 0x75, 0x32, 0x6d, 0x98, + 0xc4, 0x14, 0xae, 0xa4, 0x29, 0xa3, 0xc6, 0xb6, 0x66, 0x45, 0x48, 0xdf, + 0xc0, 0xa9, 0x4b, 0x4f, 0xef, 0xb9, 0xb4, 0x89, 0x0d, 0x64, 0x00, 0x5c, + 0xd1, 0xc8, 0x2b, 0xf7, 0xc5, 0x1a, 0x1b, 0x06, 0xb7, 0x49, 0xb1, 0xe3, + 0x4d, 0x87, 0xf9, 0x3f, 0xba, 0x39, 0xa3, 0x56, 0x7f, 0x43, 0xcc, 0x15, + 0x9c, 0x3d, 0xba, 0x71, 0x7b, 0xeb, 0x45, 0x0f, 0x15, 0x1b, 0x6c, 0x84, + 0x75, 0x6d, 0x43, 0x0b, 0x27, 0x12, 0x6b, 0xbc, 0x0a, 0x6d, 0xe4, 0xf6, + 0x4f, 0xc7, 0xbb, 0x9e, 0x91, 0xb5, 0x09, 0x5f, 0x79, 0x2a, 0xbf, 0xda, + 0x34, 0x91, 0x44, 0x47, 0x52, 0x64, 0x00, 0x89, 0x27, 0x17, 0x5c, 0xe9, + 0x90, 0x8b, 0xcb, 0xbe, 0x21, 0x47, 0x65, 0x1c, 0x54, 0x61, 0x48, 0x17, + 0x66, 0xb7, 0xa1, 0x60, 0x27, 0x31, 0x04, 0x42, 0x3b, 0x33, 0x3d, 0xda, + 0xf7, 0x61, 0x3d, 0x4b, 0x91, 0xa5, 0x74, 0x4b, 0xde, 0x16, 0xf2, 0x79, + 0x3e, 0xf7, 0x89, 0x87, 0xb3, 0xdd, 0xa2, 0x49, 0xd7, 0x54, 0x1b, 0x39, + 0xff, 0xb5, 0xec, 0x9d, 0x1d, 0x09, 0x7e, 0x5a, 0x3c, 0xd1, 0xdc, 0x0e, + 0x2a, 0x0e, 0x2c, 0x40, 0x4e, 0xa5, 0x8c, 0x9d, 0xc8, 0x9b, 0xa5, 0xb2, + 0x40, 0xa4, 0xaa, 0x3b, 0xac, 0x93, 0x19, 0xf7, 0xa1, 0x8b, 0xf8, 0x4a, + 0x40, 0x08, 0x5d, 0x1d, 0xb0, 0xae, 0x0f, 0x67, 0xa7, 0x21, 0xaf, 0xe3, + 0xb1, 0xfc, 0xff, 0xa0, 0x95, 0x66, 0x2b, 0xf7, 0x82, 0x2d, 0x8a, 0x26, + 0x0f, 0xc3, 0xed, 0x62, 0xb6, 0xcb, 0x4c, 0x86, 0xe9, 0x20, 0x78, 0x3f, + 0x08, 0x53, 0x8f, 0x41, 0xf1, 0xa1, 0x04, 0x77, 0xd9, 0xe6, 0xea, 0x26, + 0x6d, 0x33, 0x48, 0xb3, 0xbb, 0xed, 0xfc, 0xd7, 0xa3, 0x2b, 0xe2, 0x39, + 0xcf, 0x78, 0x4e, 0x11, 0x26, 0xad, 0x39, 0x83, 0x6e, 0x72, 0xbf, 0xc6, + 0x34, 0x23, 0x97, 0x5d, 0x7b, 0x64, 0x1e, 0x78, 0x00, 0x34, 0x92, 0x5d, + 0x3f, 0x23, 0x28, 0x60, 0x7f, 0x88, 0xf0, 0xca, 0x96, 0x4a, 0x15, 0xbf, + 0x8a, 0xb7, 0xd0, 0xd9, 0x99, 0x8b, 0xdb, 0x26, 0xdc, 0x7e, 0x8d, 0x35, + 0x53, 0x60, 0x07, 0x85, 0x80, 0xc4, 0x9c, 0x0d, 0x81, 0xe2, 0x93, 0x85, + 0x76, 0x2d, 0x85, 0x21, 0x6e, 0xda, 0x29, 0xe5, 0xb1, 0x08, 0x46, 0x09, + 0x1b, 0x8a, 0xd9, 0xd2, 0xd7, 0x16, 0x74, 0xee, 0x26, 0x3e, 0xc4, 0x8c, + 0x2e, 0x6b, 0x0c, 0xbc, 0x95, 0xea, 0x4a, 0xb2, 0xd6, 0x6f, 0x43, 0xd1, + 0x3a, 0x8f, 0xbd, 0x77, 0xb4, 0x67, 0x63, 0x6b, 0xd2, 0xe0, 0xf0, 0x81, + 0x74, 0xb7, 0xc5, 0x11, 0x60, 0x10, 0x6b, 0xc6, 0x0f, 0xfd, 0x84, 0x2e, + 0x5c, 0x8f, 0x3b, 0xf5, 0x68, 0xa7, 0x62, 0xc6, 0x4f, 0xa6, 0xee, 0x19, + 0x44, 0xea, 0xc0, 0xe4, 0x64, 0x12, 0x71, 0x2f, 0xfb, 0xa3, 0x4d, 0xb0, + 0x8e, 0x5e, 0xe1, 0x79, 0x65, 0xd4, 0xf3, 0xed, 0x73, 0x04, 0xf1, 0x6d, + 0xc6, 0x75, 0x54, 0x28, 0x13, 0xe2, 0xd6, 0xa1, 0x26, 0xf9, 0xa4, 0x29, + 0x20, 0x5b, 0xd0, 0x3c, 0x3d, 0xf3, 0x7a, 0x18, 0x9a, 0x3d, 0xec, 0x6a, + 0x4c, 0xfd, 0xa5, 0x00, 0xdf, 0xec, 0xfd, 0x64, 0x38, 0x66, 0xa7, 0xba, + 0x59, 0xb3, 0x9b, 0x9c, 0x44, 0xfb, 0x10, 0x08, 0xb8, 0x79, 0xea, 0x85, + 0xbf, 0xa4, 0x14, 0xce, 0xce, 0x85, 0x22, 0x3f, 0x16, 0x00, 0x1c, 0x57, + 0xc8, 0x5a, 0x1b, 0xf5, 0xff, 0xde, 0x7e, 0xa9, 0xcc, 0xf3, 0xb5, 0x1d, + 0x57, 0x06, 0xda, 0xbb, 0x6c, 0x0a, 0x1e, 0xd4, 0x09, 0x74, 0x84, 0x1d, + 0xfa, 0xdf, 0x33, 0x1e, 0xe2, 0x8f, 0x10, 0xf7, 0x73, 0xab, 0x71, 0xb8, + 0x64, 0xce, 0xc0, 0x49, 0xc0, 0x36, 0xd3, 0x39, 0x31, 0x4c, 0x12, 0x5b, + 0xf3, 0xf9, 0xb4, 0x2c, 0x88, 0xba, 0xd4, 0x1a, 0xbd, 0x0c, 0x99, 0xbd, + 0x0e, 0xad, 0x51, 0xe0, 0xca, 0xdb, 0x25, 0x66, 0x83, 0xe0, 0x55, 0x18, + 0xeb, 0xa6, 0x4e, 0x56, 0xcb, 0x2f, 0xa5, 0xf2, 0x42, 0x7a, 0xa1, 0x05, + 0xf0, 0x3a, 0x71, 0x5a, 0x78, 0x3a, 0x7a, 0x6d, 0x12, 0x9f, 0x43, 0xc5, + 0xcc, 0xb3, 0xfd, 0xf2, 0xbf, 0x05, 0x16, 0xef, 0x07, 0xf9, 0xde, 0x0d, + 0x51, 0xf0, 0x33, 0x86, 0x43, 0x57, 0x40, 0xbc, 0xa9, 0xbd, 0xa0, 0x23, + 0xff, 0xbb, 0xe6, 0x15, 0xa1, 0xeb, 0xe9, 0x78, 0x0d, 0x72, 0x76, 0xf2, + 0xb6, 0x6e, 0x46, 0xe2, 0x86, 0xab, 0x3c, 0x52, 0x2c, 0xc6, 0x77, 0xdd, + 0x57, 0xf7, 0x4d, 0x36, 0xbb, 0x41, 0x08, 0x21, 0xaa, 0xe6, 0x44, 0x50, + 0xed, 0xaf, 0x18, 0xb3, 0xdd, 0x6b, 0x57, 0x46, 0x9e, 0x44, 0x93, 0x20, + 0xe0, 0x62, 0x95, 0xcd, 0xcf, 0xe4, 0x96, 0x92, 0xc3, 0x0d, 0x16, 0xb2, + 0xc3, 0xf4, 0x0f, 0x3f, 0x87, 0x17, 0xb9, 0x7b, 0x60, 0x60, 0xfa, 0xfb, + 0x81, 0x5c, 0xb3, 0xb7, 0x89, 0x73, 0xf7, 0x35, 0xf7, 0x27, 0xf1, 0x0e, + 0xa4, 0xa1, 0xba, 0xea, 0x6a, 0xe3, 0x5c, 0x0f, 0xf7, 0x15, 0xbc, 0x28, + 0x57, 0x27, 0x8f, 0xd8, 0xca, 0x82, 0x19, 0xd0, 0xa3, 0x9d, 0xe5, 0xe0, + 0x44, 0xbf, 0x78, 0xa4, 0x09, 0x69, 0x27, 0xa0, 0x69, 0xb5, 0xd4, 0xbe, + 0x00, 0xe6, 0x03, 0x97, 0xbc, 0x8b, 0xfc, 0x25, 0x70, 0xb3, 0x49, 0x30, + 0xe3, 0x24, 0x19, 0x77, 0xb4, 0x93, 0x46, 0x03, 0xe6, 0x22, 0xaf, 0x76, + 0xd2, 0x90, 0x00, 0x05, 0x46, 0xb8, 0xa4, 0xf5, 0x4c, 0xaa, 0x04, 0x63, + 0xa0, 0x57, 0xe0, 0x20, 0x6e, 0x1a, 0xed, 0x21, 0x86, 0xd0, 0x38, 0x5b, + 0xe6, 0xa7, 0xb0, 0xe7, 0x75, 0xe3, 0x76, 0xb3, 0x15, 0x8b, 0xdc, 0x10, + 0x52, 0x15, 0x21, 0x7b, 0xd0, 0xc4, 0x75, 0x26, 0x1d, 0x6e, 0x0d, 0x4c, + 0x08, 0x5b, 0x95, 0x9a, 0xd0, 0xda, 0xbe, 0x23, 0x98, 0xde, 0x60, 0x2a, + 0xe9, 0xa4, 0x92, 0xf0, 0x92, 0x84, 0xdc, 0x86, 0x60, 0xf5, 0x23, 0x31, + 0xf5, 0xe9, 0xd6, 0x00, 0xc1, 0x78, 0xab, 0x05, 0x94, 0xd3, 0x47, 0x4d, + 0x32, 0x0f, 0x82, 0xa0, 0x99, 0x0b, 0xfe, 0x6b, 0x58, 0xf9, 0x24, 0xf6, + 0x17, 0xa0, 0x5f, 0x24, 0x6a, 0xc6, 0x01, 0xa8, 0xfa, 0xca, 0xdc, 0xb6, + 0x83, 0xcb, 0xd2, 0x3b, 0xb7, 0x0b, 0x04, 0x3e, 0x6a, 0xaf, 0x23, 0x17, + 0x3e, 0x14, 0xce, 0x52, 0x1c, 0xe3, 0x06, 0x66, 0x29, 0x17, 0x6f, 0x7e, + 0x66, 0x06, 0xa9, 0x68, 0x7f, 0xca, 0xad, 0xa8, 0xb7, 0x2d, 0xa4, 0x5d, + 0xa6, 0x16, 0xcd, 0xed, 0xee, 0x14, 0x96, 0xc8, 0x12, 0x69, 0x4e, 0x70, + 0x72, 0x2a, 0x75, 0x82, 0x08, 0x3f, 0x3e, 0x27, 0xa0, 0xea, 0x43, 0x84, + 0xa9, 0x9a, 0x91, 0x87, 0x4f, 0x20, 0x61, 0x55, 0x8d, 0x70, 0xad, 0x6c, + 0x59, 0x5d, 0x13, 0x80, 0xbb, 0x52, 0x55, 0x81, 0x8b, 0x59, 0x94, 0x0f, + 0xc2, 0x54, 0x79, 0x59, 0xe8, 0x9d, 0x58, 0xe5, 0x91, 0x10, 0xb3, 0xef, + 0x1c, 0xda, 0xaa, 0xdd, 0x91, 0x0b, 0xb0, 0x14, 0x3b, 0xad, 0x02, 0x98, + 0x40, 0x3c, 0x54, 0xc4, 0x23, 0xb9, 0x40, 0x54, 0x7e, 0x88, 0x10, 0x3e, + 0x24, 0xe5, 0xf6, 0xdf, 0x5c, 0x9e, 0x7a, 0x9f, 0xd0, 0xff, 0x5e, 0x9c, + 0xb6, 0x30, 0x17, 0x94, 0xd2, 0xaa, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x96, 0xff, 0x2f, 0x01, 0x60, 0x2c, 0x1b, 0xe3, + 0xc6, 0xcb, 0xa4, 0x41, 0xa1, 0x44, 0x13, 0x14, 0xe2, 0x44, 0x77, 0x1c, + 0x96, 0xe8, 0xe6, 0x4f, 0x70, 0x99, 0x3a, 0xef, 0xa1, 0x6f, 0x1f, 0x7f, + 0xb9, 0xe9, 0x1e, 0x35, 0x37, 0x5b, 0x94, 0x90, 0x78, 0xcc, 0x8d, 0xcd, + 0x6c, 0x9f, 0xf6, 0x73, 0xed, 0x23, 0xa2, 0x28, 0x64, 0x58, 0x50, 0x64, + 0x05, 0xbc, 0xc9, 0x9b, 0x5a, 0xec, 0x3f, 0x2b, 0x61, 0xcf, 0xa7, 0x35, + 0x56, 0x8c, 0x77, 0x68, 0xd6, 0xcf, 0x9b, 0xc5, 0x62, 0xee, 0x3a, 0xb2, + 0xfe, 0x78, 0xba, 0x02, 0xe7, 0x26, 0x8a, 0x89, 0x30, 0x19, 0xcc, 0xb0, + 0x98, 0xbf, 0x30, 0x2c, 0xae, 0x13, 0x6c, 0x93, 0x86, 0x19, 0x84, 0x13, + 0x01, 0x2f, 0x39, 0x4e, 0x33, 0xd1, 0x15, 0x99, 0xf7, 0x1e, 0xb8, 0x86, + 0xdb, 0xb6, 0xf9, 0x56, 0x42, 0x0e, 0x4a, 0xb1, 0x5e, 0xf0, 0x9a, 0x06, + 0x5e, 0xab, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, + 0xcd, 0xde, 0xad, 0x40, 0x34, 0xcd, 0x79, 0x0a, 0x29, 0x84, 0x05, 0x3f, + 0xb5, 0xbe, 0x49, 0x84, 0x43, 0xcc, 0xa6, 0xe3, 0xe9, 0xdc, 0x84, 0x14, + 0xe7, 0xb3, 0x1b, 0x96, 0xe8, 0xda, 0x35, 0x15, 0x38, 0xf5, 0xb3, 0xb5, + 0x91, 0xc3, 0xc3, 0x94, 0xc6, 0x79, 0xeb, 0xf5, 0x22, 0x78, 0xf0, 0x0b, + 0xda, 0xb0, 0x91, 0xa7, 0x43, 0x71, 0x8e, 0xa6, 0x52, 0x0f, 0x81, 0x06, + 0xc8, 0xdf, 0xb5, 0x1f, 0x92, 0xb0, 0xfe, 0x93, 0x38, 0x4c, 0xf4, 0x17, + 0x66, 0x31, 0xea, 0x08, 0x72, 0xb9, 0xaa, 0xfd, 0x40, 0x8d, 0xbf, 0x56, + 0x19, 0xb1, 0xb5, 0x8e, 0x4e, 0x4e, 0x73, 0x7f, 0x4b, 0x0c, 0x70, 0x94, + 0x7c, 0x9f, 0xfc, 0x23, 0x35, 0xba, 0xd2, 0x23, 0x88, 0x1d, 0x83, 0x28, + 0x45, 0xd7, 0x1b, 0x63, 0xfb, 0x36, 0x86, 0x06, 0xf3, 0x99, 0x81, 0x6e, + 0xd7, 0xf1, 0xd4, 0x53, 0x6d, 0x30, 0x3c, 0x8d, 0xac, 0xc6, 0x9a, 0xd5, + 0xe8, 0x4f, 0x11, 0x58, 0xba, 0xfd, 0x67, 0x06, 0xe7, 0x1a, 0xb4, 0xa1, + 0x45, 0x13, 0xf2, 0x3b, 0xdc, 0x71, 0xf0, 0xc6, 0x53, 0xfc, 0x8b, 0x2f, + 0x14, 0xe4, 0xe0, 0xd6, 0x8c, 0x96, 0x4c, 0x48, 0xc0, 0x30, 0x6e, 0x00, + 0x0f, 0x42, 0xfe, 0xa7, 0x9d, 0x0f, 0xf2, 0x52, 0x58, 0xf9, 0x35, 0x33, + 0x99, 0xda, 0xd5, 0x9d, 0x61, 0x26, 0x6b, 0x80, 0xff, 0x08, 0x51, 0x54, + 0x26, 0xfa, 0x8d, 0xfc, 0x67, 0x60, 0x93, 0x0e, 0xcd, 0x78, 0x41, 0x5a, + 0x31, 0x47, 0x14, 0xb0, 0x65, 0x89, 0x30, 0xcb, 0x0c, 0xc5, 0xa0, 0x37, + 0xa8, 0xe0, 0xcf, 0x24, 0xa4, 0x2f, 0xad, 0xa7, 0x9c, 0xa2, 0xe8, 0x81, + 0x17, 0xbe, 0x2f, 0xd5, 0xd1, 0xa8, 0xff, 0x9d, 0x5e, 0x7f, 0xd9, 0x6c, + 0x56, 0xe6, 0xc4, 0x60, 0x8d, 0xa5, 0x47, 0x5e, 0x43, 0x1e, 0x34, 0x23, + 0xb3, 0x6a, 0xdf, 0x6c, 0xf8, 0xd1, 0x85, 0x11, 0xaa, 0x74, 0x85, 0x71, + 0x27, 0xc5, 0x80, 0x37, 0x60, 0xb4, 0x2b, 0x53, 0x5a, 0xc4, 0x35, 0xd1, + 0xe8, 0x4b, 0x01, 0x58, 0x1f, 0xdb, 0x73, 0xf3, 0x2c, 0x8b, 0xbb, 0x17, + 0x36, 0x76, 0x35, 0x6b, 0xa0, 0x82, 0x47, 0xf5, 0x16, 0x21, 0x41, 0x43, + 0xc9, 0x1f, 0x53, 0xf9, 0xe9, 0x47, 0xf0, 0x9c, 0x6d, 0xe3, 0x23, 0x59, + 0x74, 0xdc, 0x1a, 0x8f, 0x4e, 0x6c, 0x71, 0x83, 0x7e, 0xd0, 0x2b, 0x50, + 0x44, 0x86, 0x5f, 0xbf, 0x60, 0x92, 0xeb, 0x9a, 0x9b, 0xa2, 0xc9, 0x2b, + 0xa8, 0xc4, 0x77, 0x4e, 0x3f, 0xf8, 0xa6, 0x39, 0x50, 0x5c, 0x7e, 0x2a, + 0x70, 0xb0, 0x5d, 0x28, 0xb2, 0x81, 0xa9, 0xaf, 0x16, 0x5e, 0x27, 0xeb, + 0x03, 0x0e, 0x82, 0xad, 0x28, 0x51, 0x16, 0xd1, 0xf4, 0x58, 0x75, 0x1a, + 0xf9, 0x6a, 0xbf, 0x73, 0xd7, 0x84, 0x07, 0x7f, 0x4c, 0x4e, 0x29, 0x02, + 0x9b, 0x60, 0x81, 0x85, 0xa9, 0xbf, 0xc7, 0xa0, 0x8f, 0x8a, 0xdc, 0xa4, + 0xc5, 0x17, 0x51, 0x24, 0x15, 0x28, 0x9e, 0x5e, 0x78, 0x84, 0x21, 0x02, + 0xca, 0x26, 0x61, 0x4e, 0x95, 0xa6, 0x8d, 0xa6, 0x98, 0x7d, 0x1f, 0x84, + 0x19, 0x24, 0x8b, 0x31, 0x76, 0x89, 0x2a, 0x5f, 0xa9, 0xfb, 0xaa, 0x8a, + 0x8c, 0xce, 0xe4, 0x30, 0xd6, 0xec, 0x5b, 0x39, 0xb7, 0x09, 0x80, 0x23, + 0x4c, 0xe1, 0x6e, 0x8f, 0x7c, 0x10, 0xe8, 0x8a, 0x60, 0x35, 0xd7, 0xa3, + 0xe0, 0x5f, 0xcd, 0xfa, 0x3d, 0x8f, 0xd8, 0x5d, 0xec, 0xc9, 0xc5, 0xa0, + 0x73, 0x41, 0x89, 0xe5, 0x39, 0xf2, 0x42, 0xff, 0x08, 0xa0, 0x12, 0xb7, + 0x4a, 0x5e, 0x46, 0x06, 0x31, 0xbd, 0x88, 0x5e, 0x9e, 0x05, 0x17, 0x51, + 0xb3, 0xe7, 0x88, 0x10, 0x19, 0x32, 0xff, 0x8a, 0x1e, 0xce, 0x66, 0xbc, + 0x84, 0x1f, 0xed, 0x52, 0x52, 0x77, 0xe1, 0x5e, 0xa6, 0x21, 0xe4, 0xad, + 0x59, 0xca, 0xa3, 0x77, 0xea, 0x66, 0x28, 0x15, 0x73, 0x3a, 0xfd, 0xe4, + 0x75, 0x46, 0x99, 0x59, 0x5c, 0x7a, 0x9b, 0x9d, 0x11, 0xb4, 0x76, 0x45, + 0x06, 0x45, 0x41, 0x1e, 0x94, 0xb7, 0xd9, 0xb8, 0xcb, 0xbf, 0x71, 0xec, + 0xba, 0x9f, 0x4a, 0x1b, 0xbc, 0xfd, 0x5c, 0x06, 0x64, 0xfd, 0x31, 0x52, + 0xc0, 0xe4, 0xa7, 0x21, 0x2f, 0x22, 0x92, 0xf0, 0x51, 0x33, 0x92, 0x1d, + 0x40, 0x3c, 0x01, 0x81, 0x3b, 0xa8, 0x2e, 0x4e, 0xb6, 0x60, 0xcd, 0xd4, + 0x36, 0x3b, 0x2e, 0x1d, 0x5e, 0x43, 0xd9, 0x94, 0xf1, 0x51, 0xd3, 0x59, + 0x94, 0x6a, 0xd5, 0x5f, 0x1f, 0xd3, 0xa6, 0x55, 0xda, 0x15, 0xf1, 0x3e, + 0x2c, 0x60, 0xb8, 0xc3, 0xda, 0x0e, 0x56, 0x53, 0xea, 0xcd, 0x39, 0x27, + 0x94, 0x86, 0x94, 0xb2, 0x5b, 0xd8, 0x9a, 0x12, 0x94, 0xb0, 0xb6, 0x77, + 0x28, 0xba, 0xde, 0xb6, 0x60, 0x4d, 0x2b, 0x6e, 0x3d, 0xf6, 0xf1, 0x48, + 0xf7, 0x77, 0xa1, 0x49, 0xe0, 0x9f, 0x1e, 0xc9, 0xe6, 0xcb, 0x95, 0x26, + 0x61, 0x5a, 0xc9, 0xed, 0x49, 0x40, 0x17, 0x57, 0x15, 0xfc, 0x3c, 0xb8, + 0x28, 0x79, 0xb8, 0x42, 0x2a, 0xf9, 0xd4, 0x19, 0xb9, 0x5f, 0x41, 0xc2, + 0x25, 0xd7, 0x88, 0x34, 0xb3, 0x25, 0x4e, 0xca, 0xff, 0x9e, 0x59, 0x9a, + 0x33, 0xc8, 0x12, 0xf9, 0xd5, 0x70, 0xc0, 0x8b, 0x43, 0x13, 0xc4, 0x8d, + 0x45, 0x99, 0xaa, 0xd7, 0xeb, 0xb1, 0xe9, 0xb7, 0x5b, 0xab, 0x48, 0xd1, + 0x26, 0x60, 0x8c, 0x13, 0x55, 0x8a, 0x41, 0xd3, 0x68, 0x58, 0xd4, 0xa6, + 0x30, 0x6e, 0x88, 0x3e, 0x81, 0x6e, 0x61, 0x06, 0x13, 0x66, 0xd5, 0x8e, + 0x5d, 0x87, 0x4f, 0xd9, 0xb1, 0x66, 0xb3, 0xc5, 0x88, 0xa9, 0xc0, 0x73, + 0xcb, 0x7f, 0x42, 0xec, 0x96, 0x64, 0xad, 0x72, 0x85, 0x72, 0xaf, 0xeb, + 0xa9, 0xc4, 0x17, 0x86, 0xab, 0xe7, 0x23, 0xd7, 0x96, 0xf7, 0xb2, 0xb3, + 0x51, 0xe1, 0x9a, 0x3b, 0x0e, 0xaf, 0x89, 0xca, 0x7b, 0xf1, 0x70, 0x7b, + 0xc7, 0x82, 0xfc, 0xc7, 0x6c, 0x37, 0xd9, 0x7b, 0x82, 0x0f, 0x94, 0xcf, + 0xd1, 0xa9, 0x33, 0xc2, 0xa4, 0xab, 0xed, 0xad, 0xee, 0x64, 0x5d, 0x04, + 0xf2, 0xcb, 0x8e, 0x99, 0x22, 0x33, 0x69, 0x85, 0x85, 0xb6, 0x1a, 0x9b, + 0x09, 0x18, 0xbe, 0xcd, 0x63, 0xf6, 0x5d, 0x52, 0xbc, 0x26, 0x99, 0x3e, + 0x52, 0xe5, 0x0c, 0xc5, 0xee, 0xdd, 0xbb, 0x07, 0xbc, 0x38, 0xc1, 0x67, + 0x96, 0x8c, 0xe6, 0xe4, 0x18, 0xfa, 0x07, 0x91, 0x48, 0xef, 0x9c, 0x70, + 0x9d, 0x5b, 0x1c, 0x0e, 0xd5, 0xd3, 0x59, 0xee, 0x44, 0x13, 0xf7, 0x00, + 0xa6, 0x20, 0xad, 0x65, 0x1d, 0xb7, 0x96, 0x2f, 0x79, 0x7b, 0x04, 0xa3, + 0x10, 0x90, 0x29, 0x8c, 0xa3, 0x2e, 0x14, 0x39, 0xd3, 0xe4, 0x6e, 0x46, + 0xf7, 0x6e, 0x96, 0x68, 0xd9, 0xef, 0x45, 0xf7, 0x3c, 0xcd, 0xc7, 0xca, + 0x33, 0x64, 0x8e, 0x31, 0x80, 0x48, 0x7b, 0x7c, 0x81, 0x9a, 0x48, 0xff, + 0xd5, 0x0d, 0x74, 0xe7, 0x77, 0x46, 0x61, 0x9b, 0xde, 0xed, 0x83, 0xe9, + 0x4f, 0x92, 0xc1, 0x16, 0xad, 0x44, 0x40, 0x23, 0xce, 0x04, 0x31, 0xbf, + 0xcf, 0xe2, 0x5a, 0x68, 0x5a, 0xf4, 0x0f, 0xe1, 0x87, 0x79, 0xb0, 0x32, + 0x0b, 0x09, 0x6b, 0x72, 0x2b, 0x16, 0x06, 0x67, 0x82, 0x0b, 0x92, 0x35, + 0xdb, 0x4c, 0xe2, 0x4a, 0x60, 0x99, 0xaf, 0x52, 0x10, 0x4b, 0xa5, 0xcf, + 0xac, 0x66, 0x49, 0x56, 0x04, 0xc0, 0xd6, 0x6f, 0x62, 0x53, 0x6f, 0xcb, + 0x62, 0xe9, 0xa5, 0xca, 0x18, 0x8e, 0x86, 0x3f, 0x36, 0xfd, 0xea, 0x55, + 0x16, 0x6d, 0x6c, 0x6a, 0x8f, 0xa7, 0x9c, 0x70, 0x15, 0xd7, 0xf4, 0x57, + 0x68, 0x04, 0x84, 0x60, 0x3b, 0xb0, 0x32, 0xc4, 0xea, 0x9d, 0x70, 0xb9, + 0xa6, 0x34, 0xe5, 0xfa, 0xa1, 0x24, 0x54, 0x7f, 0xef, 0xac, 0xb4, 0x5f, + 0xa0, 0xc0, 0x40, 0x3f, 0x73, 0xdf, 0x56, 0xa6, 0xd9, 0x17, 0xf4, 0xff, + 0x50, 0xae, 0x21, 0x0d, 0x5a, 0xe0, 0xb0, 0xf9, 0x5b, 0x7a, 0x61, 0x6e, + 0xa6, 0x85, 0x85, 0xbf, 0x19, 0x03, 0xe2, 0x74, 0x1f, 0x03, 0x70, 0x76, + 0x3c, 0xed, 0x02, 0x7d, 0xfa, 0xf9, 0x1e, 0x17, 0xdd, 0x42, 0x30, 0xf0, + 0x32, 0x47, 0x46, 0xae, 0xf5, 0x64, 0xe6, 0x5e, 0x2b, 0x40, 0x86, 0x97, + 0xb1, 0x24, 0x52, 0x69, 0x67, 0x79, 0x8e, 0x0d, 0xcc, 0x07, 0xcb, 0x72, + 0x29, 0xe9, 0xba, 0x2d, 0xf7, 0xcb, 0xe3, 0x86, 0x06, 0xaa, 0x6d, 0x79, + 0xf8, 0xb6, 0x93, 0x0a, 0x9c, 0x97, 0xef, 0x47, 0x37, 0x13, 0x2e, 0x6b, + 0xfd, 0x59, 0x0c, 0xc9, 0x5e, 0x5e, 0xcd, 0x71, 0x6f, 0x99, 0x0d, 0x88, + 0x9d, 0xbb, 0x7c, 0x2b, 0x22, 0xd5, 0xbe, 0xee, 0x26, 0x1c, 0xe1, 0xad, + 0xc8, 0x4d, 0x5f, 0x6b, 0xd1, 0xf4, 0x30, 0x4d, 0x46, 0x1d, 0x54, 0x11, + 0x4b, 0xa0, 0x7f, 0x94, 0x71, 0xc0, 0x44, 0x4a, 0x42, 0x11, 0xf5, 0x89, + 0xec, 0xb5, 0x24, 0x45, 0xf1, 0xf0, 0x30, 0x54, 0xf8, 0x62, 0xdb, 0x58, + 0x3d, 0x7c, 0x2a, 0x82, 0xe5, 0xbe, 0x13, 0xcf, 0xdc, 0x88, 0xfb, 0xd3, + 0x1e, 0x4d, 0xa5, 0x3e, 0xad, 0x95, 0xa2, 0xe6, 0x48, 0x73, 0xb2, 0xbe, + 0x96, 0xef, 0x8e, 0x0b, 0x28, 0xf9, 0xbe, 0x2a, 0xd6, 0x68, 0x9e, 0x9c, + 0x7b, 0x5a, 0xaf, 0x20, 0xf6, 0xa5, 0x3f, 0x99, 0x61, 0x57, 0xe8, 0x1c, + 0xb2, 0xc3, 0xd0, 0x7f, 0x2c, 0xb5, 0xe9, 0x66, 0x8e, 0x88, 0xec, 0x13, + 0x51, 0xbc, 0x8e, 0xb6, 0xe2, 0x91, 0xbf, 0x5e, 0x8c, 0x1c, 0xdd, 0x0e, + 0x0a, 0x13, 0x06, 0xc6, 0x62, 0x1c, 0x41, 0x8d, 0xa1, 0xc0, 0xf2, 0xfa, + 0x76, 0x35, 0xaa, 0x77, 0x06, 0x3f, 0x76, 0x50, 0xf6, 0x43, 0xf2, 0x25, + 0x00, 0x79, 0xde, 0xca, 0xa1, 0x06, 0x6f, 0xb4, 0x17, 0x4b, 0x99, 0x5a, + 0x00, 0x32, 0xd6, 0xb0, 0x1f, 0x80, 0x53, 0x16, 0xaa, 0x87, 0x72, 0xa2, + 0x34, 0xaf, 0x90, 0x3d, 0x60, 0xde, 0x0e, 0x6d, 0x83, 0xda, 0xb2, 0x11, + 0x2f, 0x39, 0xdc, 0x1a, 0xfe, 0x51, 0x74, 0x10, 0x3c, 0x41, 0xd5, 0x41, + 0x65, 0x4a, 0xa0, 0x11, 0xde, 0x95, 0x34, 0xef, 0xa0, 0xc9, 0xa8, 0xd3, + 0xcb, 0xb9, 0x7d, 0x51, 0x7d, 0xff, 0x26, 0x88, 0xd8, 0x29, 0x0e, 0xa0, + 0xd4, 0xa7, 0x07, 0x33, 0xe7, 0x7d, 0x59, 0x9f, 0x35, 0xc1, 0xb5, 0xf7, + 0x78, 0x78, 0x84, 0xf0, 0x20, 0x41, 0x3f, 0x02, 0x7d, 0x41, 0x90, 0x01, + 0x8d, 0xa4, 0xd8, 0xd7, 0xeb, 0x56, 0x7f, 0x38, 0xbc, 0x1e, 0x15, 0xdf, + 0xfc, 0x34, 0xe7, 0x99, 0xd4, 0x92, 0xd5, 0xf3, 0x9e, 0x16, 0x0b, 0x5c, + 0xeb, 0xb6, 0x78, 0xac, 0x84, 0x06, 0x8e, 0xfe, 0xd0, 0x7c, 0xce, 0x4a, + 0x43, 0x49, 0x3b, 0xe1, 0xab, 0x57, 0xc0, 0x12, 0xd6, 0x9d, 0xa4, 0xee, + 0x91, 0x10, 0x81, 0xe2, 0xfc, 0x02, 0x26, 0x7a, 0xca, 0x81, 0x5b, 0x2f, + 0x34, 0x51, 0xdd, 0x25, 0x4d, 0xc8, 0xf9, 0x3e, 0x59, 0x0f, 0x3d, 0x64, + 0x51, 0xbf, 0x42, 0xc4, 0x92, 0x9d, 0x8f, 0x39, 0x8a, 0x31, 0x09, 0x24, + 0x19, 0x44, 0xc0, 0xf4, 0xea, 0xca, 0x59, 0xcb, 0x86, 0x6c, 0x02, 0x7a, + 0xe5, 0x30, 0x79, 0xe2, 0x2c, 0x76, 0x08, 0x8f, 0x98, 0x0d, 0x4d, 0x12, + 0xc3, 0x98, 0xb4, 0x24, 0x04, 0x4f, 0x51, 0xec, 0x4e, 0xec, 0xbd, 0x8c, + 0xc4, 0x79, 0x51, 0x7f, 0xe1, 0xce, 0x76, 0x28, 0x0b, 0x7b, 0xc5, 0x3f, + 0x5b, 0x48, 0x19, 0x76, 0x68, 0x31, 0x8e, 0x28, 0xff, 0x18, 0x24, 0xe3, + 0x91, 0xe7, 0x49, 0x0d, 0x10, 0xbd, 0x00, 0xc6, 0x58, 0xfd, 0xb6, 0x88, + 0x63, 0xbd, 0xb4, 0x4b, 0xb8, 0xed, 0xdd, 0xb7, 0x53, 0xce, 0x89, 0xdb, + 0x7f, 0xf4, 0xc3, 0x21, 0x31, 0xad, 0x20, 0x78, 0x06, 0x71, 0xaf, 0xc0, + 0xe3, 0xdc, 0xb8, 0xf4, 0x80, 0xc8, 0x33, 0x1d, 0x8b, 0xff, 0x5a, 0x92, + 0x68, 0x4d, 0xc1, 0x5b, 0x58, 0x3e, 0xf6, 0x7f, 0xba, 0x42, 0xa5, 0x6d, + 0xec, 0x03, 0x36, 0xc9, 0x3f, 0x83, 0x1f, 0x0c, 0x33, 0x57, 0x6a, 0x43, + 0x5f, 0x11, 0x72, 0x19, 0x2c, 0xda, 0x71, 0x58, 0xf2, 0x50, 0x50, 0x06, + 0x97, 0xd0, 0xdf, 0xd1, 0x4f, 0x0b, 0x00, 0x1a, 0xea, 0x85, 0x3b, 0x37, + 0x2f, 0xf0, 0x40, 0x52, 0xd9, 0x2a, 0xe8, 0x54, 0xa5, 0xee, 0x0f, 0x49, + 0x74, 0x39, 0x96, 0x5d, 0x60, 0x8f, 0x14, 0x59, 0x86, 0x59, 0x86, 0xfb, + 0x67, 0x71, 0x5c, 0x26, 0x5f, 0xe9, 0xab, 0x32, 0x77, 0x83, 0xdf, 0x02, + 0x19, 0x85, 0xae, 0x4d, 0x7d, 0x9c, 0x8d, 0x4f, 0x61, 0x05, 0x3c, 0x0c, + 0xc6, 0x74, 0x9e, 0x36, 0x33, 0xb8, 0x14, 0x85, 0xab, 0xa2, 0x0b, 0x5d, + 0x22, 0xf2, 0x50, 0x3e, 0xa4, 0x88, 0xac, 0x67, 0xf9, 0x06, 0xe5, 0x30, + 0x8e, 0xf9, 0x67, 0x34, 0xd5, 0x94, 0x5b, 0x35, 0xb7, 0x3d, 0x39, 0x5f, + 0x4e, 0xae, 0xfe, 0xf7, 0x57, 0xd3, 0x95, 0x7b, 0x0a, 0xd9, 0x92, 0x4a, + 0x66, 0x29, 0xa0, 0x18, 0x35, 0x54, 0x14, 0x44, 0x79, 0x72, 0xc3, 0xbc, + 0xa8, 0x1a, 0xd3, 0xa3, 0xbe, 0x6f, 0x9e, 0xcc, 0x68, 0xb6, 0x5f, 0xd4, + 0x42, 0xab, 0xe8, 0x09, 0x60, 0x57, 0x2e, 0xb2, 0x9a, 0x5b, 0x62, 0x38, + 0xfb, 0x0a, 0x35, 0x9c, 0x4f, 0xf7, 0xe0, 0xd2, 0x06, 0x04, 0x1f, 0x79, + 0x7f, 0xa7, 0x7b, 0xd3, 0x63, 0xc9, 0xbd, 0x16, 0x58, 0x38, 0x7b, 0xaa, + 0x08, 0xf3, 0x14, 0x6c, 0x25, 0xf8, 0xa5, 0xe9, 0x4b, 0x45, 0x34, 0x89, + 0x76, 0x74, 0xcb, 0x41, 0x9c, 0x2a, 0xd9, 0xca, 0xb3, 0x12, 0x46, 0x6d, + 0x85, 0x4d, 0x63, 0x2d, 0x24, 0x1b, 0x19, 0x6b, 0x3f, 0x61, 0x6b, 0x4b, + 0x15, 0x83, 0x2d, 0x8f, 0x61, 0xab, 0xd1, 0x55, 0x93, 0x4e, 0x26, 0xd6, + 0x7a, 0x0a, 0x8a, 0xff, 0x58, 0x44, 0xf7, 0x39, 0x31, 0x1a, 0xab, 0xa6, + 0x98, 0x31, 0x41, 0x03, 0xb6, 0xc9, 0xf5, 0x50, 0xe3, 0x7b, 0xc0, 0x59, + 0x74, 0x60, 0x91, 0xb4, 0x79, 0x02, 0x25, 0xc1, 0xb5, 0xbd, 0xcb, 0x6e, + 0x40, 0x61, 0xfe, 0x68, 0x29, 0x83, 0x1b, 0xd2, 0x49, 0xe1, 0x31, 0xde, + 0xdd, 0x53, 0xb0, 0xb8, 0x96, 0xa2, 0xce, 0xea, 0x8b, 0x66, 0x2c, 0x5a, + 0x80, 0x51, 0x0b, 0xc1, 0x2d, 0x9a, 0xfa, 0x9d, 0xc6, 0xcc, 0x2b, 0xbb, + 0xaa, 0xce, 0x98, 0xaa, 0x26, 0x15, 0x8f, 0x4a, 0xe7, 0xdb, 0x17, 0x6c, + 0xe5, 0x58, 0xc9, 0xae, 0xe4, 0x9c, 0x1d, 0xab, 0x59, 0x84, 0x3e, 0x27, + 0x76, 0x03, 0xe3, 0x82, 0x64, 0x6f, 0x6e, 0x6f, 0x63, 0xd2, 0x12, 0x84, + 0xe3, 0x9b, 0x9d, 0x7e, 0x53, 0x1a, 0x54, 0x8d, 0xc1, 0xf0, 0x94, 0xae, + 0xad, 0x8f, 0x6a, 0x12, 0x4e, 0xa7, 0x30, 0xdb, 0x55, 0xbe, 0x09, 0xe2, + 0x56, 0x08, 0xc4, 0x3a, 0xb0, 0x55, 0xb0, 0x24, 0x96, 0xa6, 0x3e, 0x28, + 0xd0, 0x35, 0xfb, 0x58, 0x47, 0xba, 0x2d, 0x51, 0xbb, 0x72, 0x20, 0x59, + 0xd2, 0xdd, 0x9c, 0xe2, 0xb5, 0x31, 0x90, 0xac, 0x74, 0x5d, 0x9f, 0x3d, + 0x8c, 0x1c, 0x96, 0xc0, 0x60, 0x61, 0xa8, 0xbb, 0x3c, 0xb3, 0x6d, 0x6d, + 0x92, 0x4a, 0xca, 0xbb, 0x60, 0x5e, 0x82, 0x0d, 0x7f, 0xab, 0x4b, 0x36, + 0x4c, 0x93, 0x0d, 0x88, 0x71, 0xaf, 0xb6, 0x53, 0xb0, 0x38, 0xb4, 0x1c, + 0xb4, 0x7b, 0xd4, 0x13, 0x32, 0x6c, 0xe4, 0xee, 0x6a, 0xb3, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x88, 0x83, 0x91, 0x4c, + 0x2e, 0x1e, 0xbe, 0xa4, 0xb5, 0x96, 0xff, 0x67, 0x50, 0xe9, 0x81, 0x0e, + 0x5d, 0x0e, 0xad, 0xc4, 0x1f, 0xeb, 0x98, 0x38, 0xcc, 0x54, 0x9d, 0x27, + 0xa6, 0xf1, 0x37, 0x23, 0xce, 0xb4, 0x5b, 0xff, 0x12, 0xb1, 0xb8, 0x35, + 0x5e, 0x03, 0x02, 0x04, 0xad, 0xa6, 0x6f, 0x43, 0xfc, 0xe4, 0xbe, 0x0c, + 0xe0, 0x93, 0xd5, 0xef, 0x09, 0xfa, 0x04, 0xe9, 0x5a, 0x22, 0xd4, 0x81, + 0xc1, 0x27, 0x4f, 0x5f, 0x6e, 0x83, 0x5a, 0x8a, 0x2d, 0xbb, 0x8f, 0xa4, + 0x91, 0xcc, 0x82, 0x37, 0x3b, 0x14, 0x98, 0x58, 0x86, 0x44, 0xb7, 0xa9, + 0x58, 0xf3, 0x3d, 0x49, 0x71, 0x7a, 0x37, 0xcd, 0xc5, 0xb9, 0xc9, 0x46, + 0xd5, 0xd4, 0x17, 0x60, 0x1a, 0xbf, 0x93, 0xa9, 0xe9, 0x08, 0x25, 0x40, + 0xd1, 0x65, 0xae, 0xdd, 0x85, 0xa6, 0xcc, 0x06, 0xca, 0x91, 0xe1, 0x63, + 0xf9, 0x6b, 0x15, 0xa8, 0x04, 0x61, 0xd2, 0xa6, 0x59, 0x21, 0x1a, 0x1c, + 0xc9, 0xa9, 0xa9, 0xc8, 0x54, 0x86, 0xac, 0xa5, 0xd6, 0x95, 0x39, 0x83, + 0x4b, 0x6b, 0x69, 0xa6, 0x94, 0xd8, 0xc0, 0xfb, 0x66, 0x0f, 0x3a, 0xbe, + 0xc7, 0xf3, 0xcc, 0xd5, 0xb7, 0x1b, 0x60, 0x02, 0x95, 0x45, 0x4a, 0x12, + 0xc9, 0xfe, 0x75, 0x7c, 0x1b, 0xb2, 0x86, 0x96, 0x28, 0x07, 0xa2, 0x18, + 0x7a, 0x6c, 0x90, 0x6f, 0x32, 0x0c, 0xc8, 0x34, 0xbc, 0x75, 0x4d, 0x96, + 0x03, 0xa6, 0x0f, 0x3d, 0x35, 0x1b, 0x64, 0x76, 0x95, 0x55, 0xff, 0x25, + 0xd4, 0x71, 0xcf, 0x8a, 0x73, 0x6d, 0x9b, 0x74, 0xfe, 0xff, 0x9e, 0x31, + 0x9e, 0x5e, 0x89, 0x5a, 0x1a, 0xeb, 0x8d, 0x06, 0x3b, 0xf2, 0xf6, 0x06, + 0x5d, 0xc3, 0xba, 0x04, 0xca, 0x0f, 0x07, 0x2c, 0xbd, 0x54, 0x52, 0xd9, + 0x1c, 0x2f, 0x0e, 0x13, 0x5e, 0x25, 0x13, 0xe5, 0xd7, 0x8e, 0x19, 0x42, + 0x1b, 0x52, 0x2e, 0xd2, 0x8f, 0xc5, 0x8e, 0x1c, 0x34, 0x2e, 0x4d, 0xd5, + 0x51, 0x7d, 0x91, 0x64, 0xbc, 0xb4, 0x0d, 0xc9, 0xe7, 0x1c, 0x6c, 0x47, + 0xe9, 0xbb, 0x67, 0x9a, 0x96, 0xde, 0xad, 0xff, 0xba, 0x35, 0x25, 0x6d, + 0x57, 0xa1, 0x93, 0xfe, 0xe2, 0x8d, 0x02, 0xeb, 0xf0, 0x2f, 0x54, 0xfd, + 0x46, 0xc0, 0x8f, 0xea, 0x32, 0x7b, 0x57, 0xda, 0xe0, 0x29, 0x1c, 0x19, + 0xba, 0xa4, 0xa6, 0x1c, 0x6e, 0xeb, 0x7a, 0xa8, 0x8a, 0xe1, 0xc6, 0x12, + 0xf5, 0xa3, 0x24, 0x1a, 0x96, 0xe1, 0x02, 0xc0, 0xf4, 0x7d, 0x14, 0x72, + 0xd6, 0x12, 0x8e, 0x6c, 0x8c, 0xd2, 0xfd, 0x88, 0x78, 0x48, 0xf3, 0x74, + 0x38, 0x86, 0x04, 0x68, 0x6d, 0x7c, 0xf4, 0x4c, 0x40, 0x17, 0xf6, 0x8f, + 0xb2, 0x6c, 0xd7, 0x66, 0x66, 0x3b, 0x38, 0xa1, 0xbb, 0x1e, 0xff, 0x72, + 0x1f, 0x64, 0x56, 0xc2, 0x53, 0x1c, 0x6f, 0x84, 0x2b, 0xbd, 0x23, 0xd9, + 0xb4, 0x6b, 0x87, 0x79, 0x99, 0xec, 0x81, 0x8d, 0x1a, 0x58, 0x00, 0xf0, + 0x2c, 0xc1, 0xc4, 0x57, 0x74, 0x0f, 0xce, 0x32, 0xe2, 0x5e, 0xae, 0x02, + 0x1c, 0xe8, 0x94, 0xc6, 0x44, 0xaa, 0x7b, 0x9a, 0x32, 0xb5, 0x33, 0xac, + 0xfc, 0x41, 0x65, 0xf2, 0xca, 0xcc, 0xc6, 0x74, 0x36, 0xb2, 0xc9, 0x0e, + 0x26, 0x73, 0xae, 0x68, 0x98, 0xa4, 0x36, 0xe8, 0x98, 0x39, 0xad, 0x05, + 0x3f, 0xca, 0x12, 0xcc, 0x86, 0xfd, 0xc6, 0x57, 0xf0, 0x02, 0x4e, 0x45, + 0xcb, 0x54, 0x34, 0xdd, 0x66, 0x26, 0xab, 0xda, 0x95, 0xa5, 0x85, 0xec, + 0x02, 0x03, 0xb6, 0x29, 0x30, 0x11, 0x40, 0x54, 0x9a, 0x6a, 0x87, 0x2e, + 0x97, 0xa1, 0x7e, 0xeb, 0x34, 0x39, 0x78, 0x3b, 0xbc, 0x5f, 0x8e, 0xc5, + 0x0e, 0x21, 0x29, 0x4b, 0xb7, 0x1b, 0xe7, 0x14, 0x08, 0x34, 0xb7, 0x9a, + 0x0a, 0xb2, 0x6c, 0x25, 0x76, 0xb5, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0xe2, 0x7d, 0x48, 0xdd, 0x1a, 0xcb, 0xb6, 0x5c, + 0x6f, 0xbe, 0x32, 0x9d, 0xd2, 0x2b, 0x9e, 0x10, 0x65, 0xd7, 0x1e, 0xec, + 0xc8, 0xb5, 0x10, 0x64, 0x8f, 0x5d, 0xef, 0xfe, 0x9b, 0x6c, 0x9b, 0x02, + 0x6a, 0x6d, 0xf7, 0x98, 0x7b, 0xf7, 0x17, 0xfd, 0x49, 0x1b, 0x6a, 0xc5, + 0x3c, 0xa0, 0xfc, 0xa8, 0x94, 0x95, 0xed, 0x48, 0x81, 0x04, 0x53, 0x8c, + 0xbe, 0xe4, 0x4e, 0xaf, 0xc1, 0x9d, 0xc3, 0xdf, 0xc2, 0xb5, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0xae, 0xb0, 0x67, 0x5b, + 0x99, 0x26, 0x07, 0xfb, 0x6c, 0x98, 0xfe, 0xbb, 0x35, 0xf1, 0x5b, 0x02, + 0xc6, 0x03, 0xfc, 0x97, 0x21, 0x16, 0x8d, 0x48, 0xd4, 0x4f, 0x03, 0xd9, + 0x7c, 0x9f, 0xa6, 0x1e, 0x6f, 0x5a, 0x58, 0x17, 0x6d, 0x26, 0xb4, 0xc5, + 0x4c, 0xe5, 0x93, 0x0a, 0x9c, 0xb2, 0x40, 0xbc, 0x60, 0xc7, 0x2b, 0xdb, + 0x3b, 0xc0, 0x3c, 0x5c, 0x44, 0x4b, 0xdd, 0x58, 0xbe, 0xdc, 0xc5, 0xb5, + 0x6a, 0xf9, 0x5e, 0x73, 0x07, 0x58, 0x8f, 0x45, 0x7b, 0xac, 0xba, 0x82, + 0x96, 0x49, 0x4d, 0x22, 0x70, 0x7a, 0x3d, 0x69, 0x26, 0x8b, 0x88, 0x13, + 0xf1, 0x8d, 0xfc, 0xdf, 0x73, 0xd5, 0x20, 0x3c, 0x52, 0x92, 0x16, 0xb1, + 0x6e, 0xb7, 0x41, 0xbe, 0x23, 0x9b, 0x51, 0xf7, 0xc9, 0x38, 0x8a, 0xc7, + 0x6e, 0x68, 0x82, 0xd1, 0x59, 0x50, 0x09, 0x4b, 0x44, 0x3b, 0x28, 0x06, + 0x60, 0x75, 0x7a, 0xe5, 0xa1, 0x36, 0xbb, 0x62, 0x44, 0xe3, 0xd0, 0x68, + 0x14, 0xea, 0xad, 0xf9, 0x18, 0xcc, 0xd5, 0x42, 0x5d, 0x18, 0x53, 0xe6, + 0x4a, 0xfe, 0xde, 0x32, 0xe1, 0xe7, 0xf8, 0x8c, 0x9d, 0x35, 0xf4, 0x4a, + 0xcb, 0x23, 0x2f, 0x91, 0xb5, 0xb0, 0xb2, 0x01, 0x5c, 0x22, 0x8c, 0x42, + 0x42, 0xd5, 0xf0, 0x82, 0x6f, 0x9f, 0x64, 0xe5, 0x99, 0x4d, 0x36, 0x0b, + 0xfc, 0x78, 0x38, 0x30, 0x47, 0x8f, 0x0b, 0x57, 0x86, 0x4f, 0x1b, 0xc9, + 0x05, 0x0e, 0x08, 0xc4, 0xf4, 0xab, 0x9e, 0x90, 0xb4, 0x4f, 0x36, 0x54, + 0xe8, 0xa1, 0x3f, 0x90, 0xd2, 0xf3, 0xb4, 0xb4, 0xdd, 0xf3, 0x43, 0x2f, + 0xc4, 0x43, 0xbb, 0x99, 0x8e, 0xb8, 0x61, 0x59, 0x5e, 0xfa, 0x1b, 0x3c, + 0xc1, 0xeb, 0x9d, 0x35, 0x62, 0x34, 0x82, 0x45, 0xef, 0x41, 0xe9, 0xfc, + 0x35, 0xae, 0xb4, 0x0b, 0xce, 0x52, 0x5b, 0x40, 0x7d, 0xdd, 0x86, 0x83, + 0x52, 0x74, 0x77, 0x11, 0xc2, 0x9b, 0x8c, 0xa3, 0x63, 0xc2, 0x2d, 0xdd, + 0x8c, 0x76, 0x13, 0xc5, 0xc0, 0xde, 0x3e, 0x6b, 0xe1, 0x0f, 0xeb, 0x0f, + 0x0a, 0x25, 0x41, 0x2f, 0x8b, 0x4a, 0x98, 0x30, 0xcb, 0x1a, 0x43, 0xa3, + 0xc1, 0xcc, 0x44, 0x9a, 0x6c, 0xdc, 0x92, 0x40, 0xc4, 0x7a, 0x1f, 0x8a, + 0x6f, 0x74, 0xf3, 0xf5, 0x52, 0x72, 0xf7, 0x81, 0x6e, 0x74, 0x75, 0xe6, + 0xea, 0xd9, 0x57, 0x91, 0xae, 0xf2, 0x3f, 0x35, 0x4b, 0x99, 0xd9, 0x3f, + 0x85, 0xe0, 0x92, 0xaa, 0x35, 0xac, 0x28, 0xbf, 0x43, 0xb8, 0xad, 0xc7, + 0xc5, 0xf6, 0x15, 0x2f, 0x7c, 0xfb, 0x34, 0x48, 0xf3, 0x04, 0x12, 0xf4, + 0x2f, 0x92, 0x74, 0xc8, 0xea, 0xbc, 0x24, 0x6e, 0x3b, 0x0e, 0x9e, 0xf0, + 0xaf, 0x02, 0x97, 0x95, 0xbc, 0x90, 0x7f, 0xc4, 0xf8, 0xe2, 0x04, 0x9a, + 0x8f, 0xfc, 0xbc, 0x50, 0xfe, 0xf7, 0x89, 0x17, 0x2c, 0xdb, 0xd6, 0x5e, + 0xbf, 0xd9, 0x8e, 0x89, 0x8b, 0x06, 0x1d, 0x0b, 0x81, 0x2a, 0x55, 0x5c, + 0x5f, 0xb6, 0xa6, 0xa5, 0xd2, 0xaa, 0x79, 0x9c, 0x39, 0x31, 0x76, 0x03, + 0x98, 0x42, 0xd6, 0xb7, 0x37, 0x1f, 0xc8, 0x51, 0x8a, 0x1c, 0x5d, 0xcd, + 0x9c, 0x78, 0xa4, 0x22, 0x6e, 0x12, 0x10, 0x0a, 0x33, 0xc9, 0xe0, 0xfe, + 0xfc, 0xe8, 0x15, 0xe7, 0xef, 0xd8, 0x6d, 0xc7, 0xc9, 0xc2, 0x8e, 0x18, + 0x82, 0x2f, 0xa6, 0x09, 0x8a, 0xdc, 0x41, 0x6b, 0x89, 0xea, 0xd9, 0xd6, + 0x96, 0xfd, 0xba, 0x6e, 0xae, 0x2d, 0x0c, 0xf9, 0x3c, 0x4c, 0x1a, 0xfa, + 0x98, 0x83, 0x51, 0x45, 0x9d, 0x1e, 0xa5, 0xc1, 0x81, 0x54, 0x37, 0x5d, + 0x28, 0xca, 0xa6, 0xfe, 0x48, 0xf4, 0x77, 0x17, 0x92, 0x1d, 0x0c, 0xb3, + 0x39, 0x77, 0x22, 0xd9, 0xc7, 0xc2, 0xaf, 0x70, 0x0a, 0xd3, 0xa6, 0x57, + 0x69, 0xfb, 0xb9, 0xe0, 0xc4, 0x73, 0x7a, 0x68, 0xee, 0x27, 0x6e, 0x3a, + 0x6e, 0xae, 0x32, 0xf6, 0x09, 0xb3, 0x0b, 0x40, 0x72, 0xc6, 0x26, 0x6e, + 0xc5, 0x88, 0x6b, 0xce, 0x99, 0x88, 0x60, 0x6f, 0x6e, 0xa9, 0xe6, 0xd7, + 0x35, 0x5e, 0x3b, 0x36, 0x0d, 0x14, 0xb8, 0x2f, 0xde, 0x67, 0xc8, 0x2e, + 0x52, 0xc1, 0xf1, 0x58, 0x87, 0x32, 0x2a, 0x52, 0x21, 0x27, 0x1e, 0x04, + 0xed, 0xc4, 0x82, 0xd7, 0xeb, 0x85, 0x12, 0x3e, 0xea, 0xd0, 0x07, 0xa0, + 0x80, 0x48, 0xe9, 0xbd, 0x9b, 0x3a, 0x8e, 0x8b, 0xa0, 0xfc, 0x07, 0xf0, + 0x69, 0x4e, 0xc7, 0x1d, 0xd9, 0x9a, 0x73, 0x18, 0x63, 0xb8, 0xe6, 0x4a, + 0xa0, 0x81, 0xf0, 0xdb, 0xb9, 0x88, 0xf4, 0x2b, 0x1f, 0x0d, 0xda, 0x31, + 0xc0, 0xb0, 0x55, 0x79, 0x56, 0x48, 0x22, 0xbb, 0x49, 0x7f, 0xb1, 0xf1, + 0xf6, 0x6f, 0x42, 0xd3, 0xba, 0x68, 0x3a, 0x8f, 0xe7, 0xac, 0x53, 0x30, + 0x96, 0xec, 0x51, 0x7d, 0xfc, 0xc0, 0x35, 0xe9, 0x59, 0xe7, 0x0e, 0xed, + 0x29, 0x46, 0x50, 0x3c, 0x4b, 0x36, 0xc6, 0x2a, 0xaa, 0x3b, 0xbe, 0xce, + 0xd3, 0xda, 0x4d, 0x65, 0xb0, 0xe8, 0x52, 0x68, 0xf0, 0x23, 0xde, 0x02, + 0x77, 0xb3, 0xcc, 0xce, 0x78, 0xdd, 0x8c, 0xf8, 0xbe, 0x5d, 0x0d, 0xa9, + 0xb6, 0x96, 0x85, 0xbf, 0x92, 0x2a, 0x6b, 0x1b, 0xe8, 0x76, 0x05, 0x13, + 0x30, 0xd8, 0x3d, 0x80, 0xaa, 0xa2, 0xa3, 0xbc, 0x07, 0xba, 0x9c, 0x75, + 0x5b, 0x42, 0x03, 0xd8, 0xde, 0x42, 0x44, 0xf7, 0x29, 0x43, 0x29, 0x0d, + 0x48, 0x2b, 0x02, 0xd0, 0xcc, 0xe9, 0x17, 0x47, 0x23, 0x73, 0x6d, 0xc5, + 0x91, 0x6d, 0x4e, 0xc5, 0xcf, 0xc3, 0x58, 0xaf, 0x6e, 0xa2, 0x9e, 0xe7, + 0xe1, 0x88, 0xac, 0x62, 0xff, 0xbc, 0x69, 0x57, 0xad, 0x0f, 0x08, 0xf8, + 0x32, 0xfd, 0x79, 0xcb, 0x30, 0xbc, 0xd2, 0xe5, 0x20, 0xd9, 0x0f, 0xd1, + 0x33, 0xbf, 0xe4, 0x49, 0x7a, 0x2b, 0x5c, 0xb3, 0x63, 0x13, 0x4d, 0xed, + 0x17, 0xe7, 0x5b, 0xf4, 0x36, 0x9d, 0x3c, 0x4e, 0x51, 0xb2, 0xf7, 0xf2, + 0xcd, 0xfb, 0xec, 0x42, 0x79, 0x46, 0xae, 0x18, 0x50, 0xdf, 0xbf, 0x5b, + 0xb1, 0x9a, 0x49, 0x22, 0xae, 0xe9, 0xf3, 0x86, 0x3f, 0xe0, 0xb4, 0xc6, + 0x9c, 0x08, 0xd6, 0xd9, 0xf4, 0x68, 0xbb, 0x33, 0x0e, 0x59, 0x3d, 0x76, + 0xf0, 0xd7, 0x54, 0x04, 0x19, 0x66, 0xee, 0x61, 0x11, 0x0d, 0x48, 0x10, + 0x21, 0x16, 0x7c, 0xac, 0x49, 0xab, 0xe0, 0x19, 0x85, 0x93, 0x48, 0x65, + 0x7c, 0x5e, 0x6c, 0x1a, 0xf5, 0xb0, 0xc6, 0x80, 0xa1, 0x2a, 0xd5, 0x71, + 0x42, 0xec, 0x2f, 0x25, 0xf7, 0xb8, 0x84, 0xcd, 0xf0, 0x5c, 0xcd, 0xee, + 0x44, 0xcb, 0xeb, 0x74, 0x96, 0x3c, 0xb0, 0x56, 0xcb, 0xaf, 0x7e, 0x9e, + 0x4a, 0x12, 0x06, 0xae, 0x57, 0x43, 0x2d, 0xb2, 0x11, 0x96, 0x05, 0xdb, + 0xb3, 0x1a, 0x01, 0xa7, 0x1d, 0x02, 0x81, 0x1c, 0x36, 0x41, 0x65, 0xf0, + 0x67, 0xd6, 0xd0, 0x0f, 0xec, 0x34, 0x7d, 0xd3, 0x89, 0xac, 0x60, 0x67, + 0x95, 0x81, 0x84, 0xe7, 0xbb, 0x9a, 0x59, 0x36, 0x3b, 0xde, 0xa4, 0x88, + 0xda, 0xf2, 0xd2, 0xa2, 0x0c, 0xba, 0xfb, 0x93, 0xbf, 0xc8, 0xad, 0xe8, + 0x57, 0xa0, 0x2b, 0xbb, 0x4e, 0xa9, 0x38, 0xe7, 0x86, 0x6b, 0x95, 0x34, + 0x24, 0x96, 0xc0, 0x09, 0xd9, 0xfd, 0x5f, 0x1c, 0x93, 0xd9, 0x72, 0xfa, + 0xc4, 0x14, 0x72, 0x9c, 0x19, 0x6f, 0xee, 0x12, 0x17, 0xee, 0x65, 0xb4, + 0x8c, 0x83, 0x39, 0x3c, 0x0f, 0xbf, 0x25, 0xcf, 0xee, 0x05, 0x8c, 0x6a, + 0x56, 0x18, 0xf0, 0x20, 0x72, 0xc1, 0xbf, 0xe4, 0xce, 0x37, 0xbf, 0x2b, + 0xba, 0x70, 0x1e, 0xc2, 0xc8, 0xcd, 0x58, 0xb9, 0x60, 0xc7, 0xfb, 0xd0, + 0xce, 0xb9, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x7c, 0x63, 0x50, 0x90, 0xcb, 0x9c, 0xce, 0x59, 0xb1, 0x47, 0xb0, 0x49, + 0x9b, 0xfc, 0xfb, 0x3d, 0x3d, 0x62, 0xcf, 0x58, 0x4c, 0x2a, 0x79, 0xf0, + 0x72, 0x7f, 0x81, 0x41, 0xac, 0x82, 0x2d, 0xa9, 0xf0, 0x0e, 0x4d, 0xd2, + 0xe0, 0xbd, 0xca, 0x17, 0xb7, 0x59, 0x9f, 0xdb, 0xfe, 0x51, 0x90, 0x88, + 0xb9, 0xeb, 0x4e, 0xac, 0x80, 0x30, 0x64, 0xc4, 0x49, 0xd1, 0xb6, 0x65, + 0x67, 0xef, 0x9d, 0x5c, 0x04, 0xe0, 0x9d, 0xbe, 0x47, 0x75, 0x9b, 0x6e, + 0x30, 0x76, 0xad, 0x37, 0x9a, 0x56, 0xff, 0xcd, 0x40, 0x26, 0x3e, 0xe2, + 0x7d, 0x30, 0x55, 0x09, 0x92, 0x25, 0x36, 0x2f, 0xf8, 0x55, 0xb8, 0x9b, + 0x66, 0x49, 0x41, 0x9d, 0x78, 0x6d, 0x3f, 0x54, 0x41, 0x01, 0x93, 0x9c, + 0x5e, 0x0c, 0x4a, 0x38, 0x79, 0x76, 0xb4, 0x98, 0xae, 0xf9, 0x99, 0x21, + 0x05, 0x6a, 0xfb, 0xbc, 0x44, 0xf7, 0xdc, 0x85, 0x5e, 0x5f, 0x18, 0x49, + 0x22, 0x11, 0x6d, 0xa5, 0x9e, 0x6b, 0x59, 0x60, 0xf8, 0x73, 0x8b, 0xcb, + 0x38, 0xbb, 0xc9, 0xbf, 0x49, 0x0e, 0x57, 0x65, 0x48, 0x41, 0x41, 0xa2, + 0x40, 0x67, 0x91, 0x1d, 0x54, 0xac, 0xa7, 0xef, 0x16, 0x8b, 0xc7, 0xd1, + 0xe6, 0xdb, 0xc5, 0x9c, 0xd4, 0x04, 0x67, 0xd8, 0x75, 0x21, 0x2b, 0x1d, + 0x11, 0xc1, 0x79, 0x45, 0xb4, 0x91, 0x7a, 0x97, 0x00, 0xde, 0xc6, 0xc5, + 0x8a, 0xd1, 0xd7, 0xea, 0xc1, 0x22, 0xe1, 0x58, 0x61, 0xf2, 0x89, 0x3d, + 0xdb, 0x04, 0x3d, 0xe4, 0xe9, 0xe7, 0xbf, 0x4b, 0x99, 0x8a, 0xc6, 0xf2, + 0x09, 0xc4, 0xe2, 0x6d, 0x0b, 0xda, 0x13, 0xfb, 0xff, 0xbf, 0x0b, 0xfc, + 0x78, 0x33, 0xb8, 0x7b, 0x3e, 0xd8, 0xba, 0x27, 0xba, 0xae, 0xdf, 0xce, + 0xea, 0x80, 0x08, 0x38, 0xd8, 0x33, 0x00, 0xa9, 0xb6, 0x88, 0x48, 0xa9, + 0x3b, 0x54, 0xf0, 0x95, 0xda, 0xba, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0xb1, 0xd7, 0x8d, 0x6c, 0xb9, 0x96, 0xdc, 0x64, + 0x9b, 0x0c, 0x74, 0x54, 0x59, 0x82, 0xf6, 0x6e, 0x7c, 0x4e, 0x23, 0x83, + 0x04, 0x2e, 0x49, 0xfb, 0x56, 0x4b, 0xcd, 0x0d, 0x76, 0x29, 0xb1, 0xce, + 0x40, 0xa3, 0xd0, 0x02, 0x16, 0x8e, 0x1c, 0x0a, 0x00, 0x5b, 0x8c, 0x06, + 0xf9, 0x07, 0x97, 0x12, 0x0c, 0x33, 0xd5, 0x48, 0x6d, 0xae, 0x7d, 0x2c, + 0x8f, 0x74, 0x32, 0x24, 0xcf, 0x91, 0xd7, 0xbe, 0xb2, 0x05, 0xcf, 0x2f, + 0x93, 0xd5, 0x43, 0x90, 0xce, 0x02, 0x97, 0xf8, 0x51, 0xb3, 0xba, 0x56, + 0x5d, 0x94, 0x41, 0xa4, 0x11, 0xf3, 0x21, 0xc0, 0xcc, 0x28, 0xf8, 0x5a, + 0x00, 0x0a, 0xd4, 0x53, 0xdd, 0xac, 0xfe, 0x25, 0x03, 0xea, 0x2b, 0x6b, + 0x9d, 0x7e, 0x1a, 0xe1, 0x5f, 0x5c, 0xa7, 0x47, 0xa2, 0x72, 0x4f, 0x92, + 0x60, 0x25, 0x7c, 0x1c, 0xa5, 0x34, 0xa6, 0x86, 0x0e, 0xda, 0x8f, 0x3f, + 0xec, 0xe2, 0xe4, 0xad, 0xa9, 0x41, 0xcc, 0x3d, 0x94, 0x43, 0xfd, 0x28, + 0xd8, 0xb0, 0x0f, 0x05, 0x9e, 0x2b, 0x27, 0x3f, 0xe0, 0x84, 0xbc, 0x9e, + 0x7a, 0xa5, 0x83, 0x3d, 0x3b, 0xac, 0x83, 0xd3, 0x16, 0x92, 0x8c, 0xd2, + 0x4a, 0x81, 0xdd, 0xba, 0x0a, 0xb7, 0xc5, 0x9f, 0x83, 0x0f, 0x78, 0xb8, + 0xab, 0x2d, 0xca, 0xf8, 0x6c, 0x06, 0xd7, 0x82, 0xb8, 0x61, 0x7d, 0x2a, + 0x31, 0x3a, 0x39, 0x97, 0x5f, 0xc7, 0x00, 0x6e, 0x46, 0xf2, 0xc5, 0x12, + 0x71, 0x55, 0x5b, 0x10, 0xaf, 0xbb, 0x07, 0x4c, 0x2f, 0xa3, 0x51, 0x53, + 0x22, 0x20, 0xab, 0xed, 0x02, 0x95, 0xc6, 0x5f, 0xaa, 0xb8, 0xc0, 0xcb, + 0xe5, 0xe0, 0x25, 0x97, 0xf7, 0xda, 0x1d, 0xd8, 0x5a, 0xff, 0x76, 0x0c, + 0x3e, 0x33, 0x1b, 0x7a, 0x15, 0xb8, 0x34, 0x75, 0xcf, 0xe9, 0xf3, 0x53, + 0x61, 0x03, 0x2d, 0x52, 0x29, 0x69, 0x3a, 0xc3, 0xd9, 0x22, 0xc0, 0x2d, + 0x80, 0xed, 0x66, 0xc4, 0xf4, 0x89, 0x60, 0x14, 0xdb, 0xec, 0x7d, 0xcc, + 0x99, 0x5c, 0x94, 0x27, 0xab, 0xed, 0xd2, 0x17, 0xf4, 0x36, 0xfc, 0x7e, + 0x99, 0x98, 0xb6, 0x86, 0xb6, 0x7c, 0x54, 0xd6, 0xec, 0xb5, 0xad, 0x62, + 0xcc, 0xb0, 0xf7, 0x8c, 0x52, 0x99, 0xf2, 0x44, 0x27, 0x3a, 0xb0, 0xff, + 0x8f, 0x09, 0xae, 0xe1, 0x61, 0xd8, 0x9f, 0xdd, 0x2f, 0x6b, 0xea, 0xd0, + 0x12, 0x70, 0x8c, 0x9d, 0x8f, 0x4c, 0x36, 0x98, 0x1e, 0x2e, 0xb5, 0x50, + 0x63, 0x33, 0x9c, 0x4b, 0xc3, 0xd4, 0xa0, 0xe6, 0x96, 0x96, 0x75, 0xfd, + 0x8a, 0xc4, 0x0c, 0xa7, 0xea, 0x9d, 0xf1, 0x23, 0x9e, 0x38, 0xff, 0x1a, + 0x67, 0x36, 0x5f, 0x5f, 0x17, 0x88, 0x1a, 0x43, 0x25, 0xea, 0x76, 0xb5, + 0xcd, 0xce, 0x43, 0xf8, 0x71, 0x2b, 0xdb, 0xf0, 0xcd, 0x76, 0xbd, 0x94, + 0x57, 0xdb, 0x77, 0xcd, 0xb2, 0x8f, 0xd1, 0xc0, 0xeb, 0x00, 0x61, 0x7f, + 0x66, 0xb0, 0x43, 0x6e, 0xe0, 0x9f, 0x11, 0x0e, 0x65, 0xf7, 0x4e, 0x00, + 0x74, 0xc3, 0xeb, 0xb1, 0xeb, 0x0c, 0x24, 0x5d, 0x15, 0x56, 0x16, 0x47, + 0x87, 0xcf, 0x34, 0xbe, 0x2a, 0xdd, 0x77, 0x55, 0xa4, 0x09, 0x15, 0x79, + 0x8c, 0xaa, 0xce, 0x32, 0x90, 0x9b, 0x16, 0x40, 0x94, 0x7f, 0x19, 0x27, + 0xbc, 0xbf, 0x45, 0x4b, 0xa5, 0xf0, 0xd0, 0x9e, 0x5b, 0xb9, 0x46, 0x6e, + 0x72, 0x8f, 0x49, 0x3b, 0x7a, 0xc1, 0x92, 0xb0, 0xd5, 0x25, 0x1b, 0x0b, + 0xf3, 0xd0, 0x8a, 0x47, 0x8b, 0xbe, 0xa4, 0xf9, 0x6a, 0x09, 0x84, 0x9a, + 0x5b, 0x5b, 0xea, 0xbb, 0x6f, 0xd8, 0xaf, 0xcd, 0x67, 0x9b, 0x79, 0x7c, + 0x8f, 0xcc, 0xd7, 0x5f, 0x3a, 0xc3, 0xd0, 0xb7, 0xba, 0x28, 0x83, 0x81, + 0x4a, 0x05, 0x51, 0xaf, 0xa0, 0x52, 0x34, 0xe3, 0x4f, 0xec, 0x82, 0xdc, + 0x97, 0xd8, 0x69, 0xb2, 0x0d, 0x68, 0x35, 0x87, 0x58, 0xc0, 0xcf, 0x58, + 0x0d, 0xf6, 0x6b, 0x6d, 0x2a, 0xc0, 0x72, 0xe4, 0x90, 0x8c, 0x7b, 0x45, + 0xba, 0xf1, 0x13, 0x6f, 0x8c, 0xd2, 0xdd, 0xc5, 0x8e, 0xc8, 0xec, 0xf9, + 0xfb, 0xde, 0xe5, 0xaa, 0xcb, 0xc0, 0xff, 0x77, 0x2d, 0x99, 0xb1, 0x69, + 0x7f, 0xe3, 0x38, 0x61, 0x35, 0xb6, 0x45, 0xdd, 0x73, 0x45, 0x84, 0x89, + 0x1b, 0x96, 0x7e, 0x6a, 0x1d, 0xd9, 0xe6, 0x76, 0xa8, 0x16, 0x0f, 0x42, + 0xc9, 0x41, 0xec, 0x5d, 0x25, 0x01, 0xb0, 0x45, 0xa6, 0xaa, 0x69, 0x87, + 0x11, 0xa1, 0xb8, 0x9e, 0x68, 0x48, 0x68, 0xe9, 0xb5, 0xc2, 0xff, 0x83, + 0x8f, 0x71, 0xb9, 0xd7, 0xbb, 0xae, 0x59, 0x8b, 0x1b, 0x4c, 0x44, 0xd8, + 0xe3, 0xce, 0xab, 0x88, 0xfb, 0x64, 0xd9, 0x61, 0x5a, 0x7d, 0xce, 0x3a, + 0x27, 0xb5, 0xa3, 0xfd, 0x5d, 0xa3, 0xb8, 0xa1, 0x15, 0x63, 0x0b, 0x75, + 0x39, 0xc3, 0xa4, 0xfb, 0x60, 0x53, 0xfd, 0x11, 0x21, 0x35, 0x0f, 0x19, + 0x28, 0x14, 0xcd, 0x8a, 0xcf, 0x33, 0xaa, 0x4f, 0x6a, 0x1e, 0x56, 0x87, + 0xd5, 0x6e, 0x43, 0x9b, 0xa3, 0x72, 0x95, 0x8c, 0x34, 0xa2, 0xac, 0x11, + 0x76, 0x95, 0xd7, 0xdd, 0xbf, 0x10, 0xf4, 0x0f, 0x2a, 0x64, 0xd2, 0x4d, + 0x7b, 0xc6, 0x9b, 0x7d, 0xf7, 0xa5, 0xb3, 0x84, 0x9a, 0x9a, 0x5e, 0xcf, + 0x7f, 0x95, 0x6d, 0x44, 0xd1, 0xb2, 0x19, 0xbb, 0xed, 0x37, 0x42, 0x4b, + 0x4b, 0x6d, 0xb7, 0x10, 0x02, 0x5f, 0x00, 0x1f, 0x24, 0xce, 0xb2, 0x8b, + 0x3e, 0x7d, 0xc6, 0x6e, 0x6c, 0x90, 0x75, 0xad, 0x3f, 0x9d, 0x63, 0x04, + 0x76, 0x20, 0x7a, 0x56, 0x48, 0xa1, 0x6a, 0x37, 0x74, 0xd2, 0xb7, 0x4f, + 0xa3, 0x64, 0x62, 0xaa, 0xce, 0x75, 0x8c, 0x15, 0x75, 0x79, 0xa0, 0xbd, + 0xdd, 0x01, 0x46, 0xca, 0xa0, 0x31, 0x1a, 0x16, 0x1f, 0xef, 0x8b, 0xc6, + 0x54, 0x57, 0xfa, 0x6e, 0x43, 0xdf, 0xb0, 0x99, 0xed, 0xa4, 0xcb, 0xeb, + 0x91, 0x35, 0x14, 0x0c, 0xa9, 0x1d, 0xb5, 0xa9, 0x32, 0x99, 0xe3, 0x89, + 0x74, 0xaa, 0xa4, 0x65, 0x1e, 0x82, 0x47, 0xfa, 0x37, 0x23, 0xe5, 0x86, + 0xb6, 0xc0, 0xb6, 0x89, 0x9a, 0xd9, 0xae, 0x29, 0x39, 0x7b, 0x66, 0xc7, + 0x5b, 0x02, 0x08, 0x86, 0xd4, 0xf0, 0x75, 0xc2, 0x05, 0x86, 0xc3, 0x75, + 0xd2, 0x2a, 0x1e, 0xec, 0x6e, 0x75, 0x29, 0x58, 0x8c, 0x25, 0x3b, 0x95, + 0x21, 0xde, 0x42, 0xd5, 0xb7, 0x15, 0x30, 0x09, 0x49, 0x78, 0x55, 0xd5, + 0xf2, 0x30, 0x80, 0x93, 0x8a, 0xce, 0x84, 0x27, 0xdb, 0x4a, 0x09, 0x30, + 0x0c, 0x7f, 0x4d, 0xd1, 0x0f, 0xda, 0x66, 0x58, 0xe1, 0x01, 0xfd, 0x75, + 0x83, 0xf5, 0x39, 0x2e, 0xe2, 0x6b, 0xde, 0xff, 0x20, 0x8a, 0xf7, 0xcc, + 0x81, 0x8e, 0x99, 0xb4, 0xeb, 0x76, 0x74, 0x38, 0x2b, 0xe0, 0x6d, 0x61, + 0x8f, 0x39, 0x59, 0x10, 0x7d, 0xb5, 0xd3, 0x14, 0x96, 0x04, 0x1d, 0x22, + 0x89, 0xef, 0x15, 0x7c, 0x28, 0x5a, 0xd6, 0x8d, 0xf3, 0xb7, 0x6a, 0x9a, + 0xce, 0x21, 0x77, 0xfd, 0x4f, 0x22, 0x26, 0x28, 0xb8, 0xb5, 0xb3, 0x73, + 0xfd, 0x2a, 0x7b, 0x42, 0x26, 0x77, 0x41, 0x93, 0xed, 0xf9, 0x8f, 0xa9, + 0x92, 0xd5, 0x9f, 0x2e, 0x60, 0xec, 0x60, 0x98, 0xf1, 0xd5, 0x11, 0xe2, + 0xe0, 0xd7, 0x45, 0xa7, 0xe4, 0xf2, 0x82, 0x61, 0x2f, 0x41, 0x1b, 0xd9, + 0x8e, 0x78, 0xd5, 0x6b, 0x68, 0x74, 0xf0, 0xc3, 0x83, 0x01, 0x16, 0x60, + 0x6e, 0x34, 0x88, 0x45, 0x8a, 0x86, 0x44, 0x5b, 0xa5, 0xa8, 0x55, 0xbc, + 0xfa, 0x8f, 0xbd, 0x93, 0x95, 0x3f, 0xab, 0x19, 0x54, 0x8f, 0x06, 0x8e, + 0xca, 0x0b, 0x4a, 0x18, 0x3f, 0x7a, 0x9c, 0x3f, 0xe6, 0xbe, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x81, 0x32, 0x41, 0x46, + 0x59, 0x26, 0xf4, 0xef, 0x93, 0x9f, 0x04, 0xc2, 0x67, 0x13, 0x32, 0x45, + 0xc0, 0x79, 0x70, 0x27, 0x21, 0x2b, 0xaf, 0x35, 0xf3, 0xc4, 0x88, 0x52, + 0x28, 0xea, 0xca, 0x8a, 0x08, 0x01, 0x6f, 0x61, 0xab, 0x10, 0xa3, 0xf0, + 0x6b, 0x3b, 0x54, 0x64, 0xf1, 0x63, 0x83, 0x38, 0x2b, 0x26, 0x18, 0x5a, + 0x67, 0xc4, 0x67, 0x38, 0x3f, 0x2c, 0x9a, 0xc9, 0x48, 0x33, 0x77, 0xb4, + 0xb2, 0xc2, 0xc7, 0x08, 0x21, 0x5e, 0xc4, 0x19, 0x59, 0xe1, 0xfa, 0x32, + 0xa4, 0x4c, 0x3e, 0xba, 0x65, 0x92, 0x98, 0x39, 0x71, 0x2f, 0x99, 0x08, + 0xf8, 0xb3, 0x7a, 0x03, 0x53, 0xd7, 0x68, 0xb2, 0x5e, 0xb0, 0xef, 0xe0, + 0x1e, 0x7d, 0xb2, 0x23, 0x5d, 0x2b, 0xd7, 0x09, 0xa6, 0x78, 0xa4, 0x7c, + 0x08, 0xed, 0x8a, 0xf6, 0x96, 0xa0, 0x10, 0x17, 0x62, 0x8b, 0x8a, 0xa0, + 0xac, 0x22, 0x67, 0x02, 0xa8, 0x66, 0x1a, 0xb5, 0x02, 0xde, 0xa5, 0xfa, + 0x69, 0x29, 0x5f, 0x24, 0x89, 0x46, 0x68, 0xd6, 0x51, 0x2a, 0xfe, 0x88, + 0xf0, 0x40, 0xde, 0xd1, 0x12, 0x2e, 0xed, 0x13, 0x7b, 0x49, 0xf6, 0xe1, + 0x7a, 0xcf, 0x61, 0xcb, 0x70, 0x9d, 0xaa, 0x51, 0x07, 0xc2, 0x54, 0x76, + 0x89, 0x29, 0x94, 0x29, 0x8b, 0x0e, 0xf5, 0xe8, 0x81, 0xc7, 0xdb, 0x59, + 0x1e, 0x75, 0xda, 0x6a, 0x94, 0x18, 0x16, 0xae, 0xbb, 0x43, 0x87, 0x56, + 0x66, 0x8b, 0x84, 0xe9, 0xa9, 0xd0, 0xd2, 0x8f, 0x5b, 0xbf, 0x1d, 0x24, + 0x3a, 0xb7, 0x64, 0xff, 0xe9, 0x22, 0x21, 0x65, 0xaf, 0x2b, 0x45, 0x8d, + 0x28, 0xea, 0xbc, 0x07, 0x10, 0x6e, 0xfb, 0x4d, 0x6f, 0x35, 0xe5, 0xeb, + 0x5d, 0x29, 0x72, 0xe1, 0x94, 0xad, 0xed, 0x25, 0xd7, 0x39, 0x63, 0x32, + 0x37, 0x0b, 0xb2, 0xd7, 0x54, 0x1f, 0xe4, 0x0d, 0xe7, 0xb3, 0xd1, 0xa6, + 0x2a, 0xcf, 0x8e, 0x97, 0xf1, 0xa8, 0xfc, 0xb1, 0x61, 0xdc, 0xb4, 0x8f, + 0x29, 0xa2, 0x68, 0x4a, 0xe6, 0x2f, 0x8a, 0x69, 0x2c, 0xa1, 0x1d, 0xe2, + 0x9e, 0x65, 0x71, 0xb7, 0x83, 0xef, 0x63, 0xf5, 0x36, 0xdc, 0xa0, 0x94, + 0x5a, 0x45, 0x8a, 0x85, 0x5e, 0x28, 0x86, 0x21, 0xd2, 0xbf, 0x7a, 0x2f, + 0x76, 0x1c, 0x2a, 0x15, 0xb2, 0xe8, 0xaf, 0x63, 0x37, 0xbe, 0xd8, 0x0a, + 0xef, 0x54, 0xee, 0xe6, 0xd9, 0xb3, 0xdb, 0x41, 0x55, 0xba, 0xd8, 0x14, + 0x7c, 0x10, 0x61, 0x06, 0x40, 0x45, 0x69, 0x37, 0x60, 0xf7, 0x6a, 0x7a, + 0x23, 0x70, 0x30, 0x57, 0x3e, 0xe5, 0x12, 0x24, 0xbc, 0x5e, 0x82, 0x89, + 0xd8, 0x37, 0xc9, 0x33, 0xb9, 0x38, 0xa5, 0xba, 0xed, 0xdd, 0x93, 0x58, + 0x81, 0x15, 0xec, 0x15, 0x70, 0x2f, 0x30, 0xfa, 0xaf, 0xf7, 0xf5, 0xcb, + 0x41, 0x74, 0xea, 0xc0, 0x91, 0xbe, 0x53, 0x4c, 0xc2, 0x74, 0x1b, 0x5b, + 0x8c, 0x74, 0xd8, 0xc3, 0x4a, 0x12, 0xaa, 0x57, 0xd6, 0x61, 0xb1, 0xb8, + 0x81, 0x5d, 0x81, 0x37, 0x1e, 0x5b, 0x3d, 0x5a, 0xbc, 0xa6, 0xb2, 0x27, + 0xe3, 0x01, 0x4c, 0xf0, 0xad, 0x7b, 0xdf, 0x50, 0xf9, 0xd7, 0xb7, 0xcc, + 0xa8, 0x5c, 0x3d, 0x9a, 0xb7, 0x60, 0x3e, 0x63, 0x3f, 0x6a, 0x08, 0x0b, + 0x82, 0xdc, 0x3e, 0xfa, 0x24, 0x33, 0xd3, 0x01, 0xbf, 0xef, 0xeb, 0x52, + 0x3f, 0x91, 0x61, 0xda, 0xe2, 0x26, 0x10, 0xdf, 0xe4, 0x9b, 0x77, 0x91, + 0x22, 0xc5, 0x4e, 0x9c, 0x0b, 0x32, 0xff, 0x27, 0x85, 0x85, 0x0c, 0x99, + 0x50, 0x8f, 0xad, 0x5d, 0x06, 0x18, 0x52, 0xb4, 0x64, 0x09, 0xc4, 0xa4, + 0x84, 0xd4, 0x81, 0x07, 0x0a, 0x97, 0x55, 0xf8, 0x96, 0x52, 0xb2, 0x9a, + 0xf4, 0x06, 0x2c, 0x9a, 0x3b, 0x8b, 0xaa, 0x67, 0x18, 0x3a, 0xee, 0xbc, + 0xca, 0x8f, 0x46, 0xf6, 0x4a, 0x33, 0x5b, 0x56, 0x09, 0xb2, 0x72, 0x87, + 0xdb, 0xbb, 0x57, 0x67, 0x53, 0x82, 0x77, 0x31, 0x66, 0xbb, 0xf1, 0x33, + 0x6d, 0x55, 0x82, 0xaa, 0x80, 0xd4, 0x4d, 0xb8, 0xab, 0xbd, 0x2a, 0xda, + 0x10, 0x3a, 0xc8, 0xf0, 0x14, 0x1e, 0xcb, 0x8e, 0x76, 0x6c, 0xc8, 0x74, + 0x05, 0xb3, 0x51, 0xbd, 0x63, 0x06, 0x69, 0x05, 0x2a, 0x21, 0xd6, 0x2f, + 0xe4, 0x38, 0xae, 0xf8, 0xd4, 0xe9, 0xa7, 0xe8, 0xc8, 0x5a, 0x65, 0x7d, + 0x54, 0x34, 0x33, 0x0d, 0xf6, 0x07, 0xd6, 0x8c, 0xe5, 0x72, 0x9b, 0xfb, + 0x60, 0x49, 0xd2, 0xaf, 0xb4, 0x17, 0xc4, 0x74, 0x8d, 0xe5, 0x54, 0xda, + 0x96, 0x56, 0x7d, 0x97, 0x62, 0xe8, 0xec, 0x0d, 0x2b, 0x02, 0x2e, 0x59, + 0xf8, 0xa1, 0x06, 0x6a, 0xb6, 0x3e, 0x15, 0xeb, 0x64, 0x1a, 0x48, 0x3d, + 0x53, 0x2c, 0x42, 0x3b, 0x97, 0xa1, 0x3f, 0x47, 0x8b, 0x74, 0x87, 0x8b, + 0x96, 0x63, 0x08, 0x4c, 0x99, 0x38, 0x5a, 0xb6, 0x93, 0xa8, 0xcc, 0xee, + 0x62, 0x3a, 0x00, 0x6d, 0x5c, 0xab, 0x77, 0x3c, 0x46, 0xae, 0x6e, 0xeb, + 0xf1, 0xf9, 0x63, 0xf1, 0xa2, 0x31, 0x21, 0x38, 0xc3, 0x4f, 0xe2, 0x3a, + 0x33, 0x7f, 0xe7, 0xc6, 0x69, 0xd5, 0x1c, 0x7e, 0x5b, 0x4f, 0xb1, 0x50, + 0x3b, 0xbe, 0x31, 0xa7, 0x42, 0xa3, 0x97, 0x7b, 0xe3, 0x90, 0xd0, 0x07, + 0xfd, 0x05, 0xb9, 0xf2, 0x47, 0xc4, 0xc8, 0xdd, 0x1c, 0x3c, 0xa4, 0x22, + 0x96, 0x04, 0xca, 0x28, 0x17, 0xcc, 0x5c, 0x49, 0x7e, 0xc6, 0x93, 0x98, + 0xd3, 0x8b, 0xd2, 0xf6, 0x4a, 0xb6, 0xbe, 0x8d, 0xa2, 0xdd, 0xb6, 0x7c, + 0x66, 0x0c, 0x29, 0xcb, 0x1d, 0x98, 0xf6, 0xe4, 0xe5, 0x30, 0x4c, 0x84, + 0xbf, 0x6f, 0x71, 0x4e, 0xc2, 0x12, 0x9f, 0x35, 0xd6, 0xf8, 0xc6, 0x30, + 0xe9, 0x9e, 0x1a, 0x8a, 0x2f, 0xd1, 0x96, 0xb3, 0x3c, 0x0f, 0xf5, 0x78, + 0xa7, 0xe0, 0xbd, 0x4b, 0xe0, 0xd8, 0x3d, 0x57, 0xa5, 0x44, 0xa0, 0xd9, + 0x10, 0x79, 0xd2, 0x10, 0x50, 0xc7, 0x77, 0x73, 0x09, 0xf8, 0xb4, 0xcf, + 0x66, 0xe3, 0x0c, 0xfb, 0x96, 0xf8, 0x52, 0xb3, 0x7e, 0x44, 0xf0, 0x03, + 0x54, 0xd4, 0xa2, 0x57, 0x38, 0x8a, 0x96, 0xfc, 0x7c, 0x4c, 0x9f, 0x3a, + 0xf2, 0xa2, 0x48, 0xbb, 0x3e, 0xd1, 0x11, 0x2c, 0xab, 0xdf, 0x53, 0x96, + 0xac, 0x58, 0x33, 0xb9, 0xdd, 0xd2, 0x4f, 0x8a, 0x0a, 0x89, 0x0e, 0xd3, + 0x6f, 0x58, 0x8c, 0xa1, 0x0a, 0x0b, 0xa7, 0xd7, 0x1f, 0x0a, 0x70, 0xe3, + 0x43, 0x12, 0x56, 0xb8, 0x6c, 0xf8, 0x75, 0x4e, 0x2b, 0xb0, 0x17, 0x29, + 0xe4, 0x95, 0x85, 0xd8, 0x85, 0x95, 0x63, 0x55, 0xa8, 0x82, 0xf0, 0xe7, + 0x7d, 0xf3, 0xf1, 0x78, 0x66, 0xd1, 0x92, 0x71, 0x99, 0xad, 0x30, 0x94, + 0xe9, 0x54, 0x2c, 0xe1, 0x57, 0xf3, 0x6a, 0xe6, 0x0c, 0x5e, 0xc7, 0x58, + 0xba, 0xb7, 0x61, 0xd3, 0x74, 0x72, 0x96, 0x06, 0x0b, 0x01, 0x3d, 0xc2, + 0xa1, 0xb4, 0x38, 0x81, 0x19, 0x44, 0xbc, 0x84, 0x52, 0x22, 0xc9, 0x67, + 0x81, 0x99, 0xfb, 0x0a, 0xc2, 0xff, 0x50, 0x67, 0xbe, 0x38, 0x5e, 0x13, + 0x16, 0x60, 0x83, 0x35, 0xb9, 0x2f, 0xa9, 0x55, 0xbb, 0x30, 0x6b, 0x19, + 0xfc, 0x2a, 0x40, 0x24, 0x74, 0x20, 0x57, 0x78, 0xb9, 0x55, 0xb7, 0x70, + 0x86, 0x65, 0x43, 0x1c, 0x76, 0x2e, 0x91, 0x83, 0x5e, 0x33, 0xc2, 0xd4, + 0xcc, 0xb5, 0x1c, 0x45, 0xaf, 0xa3, 0x87, 0x95, 0x9b, 0x77, 0x50, 0x44, + 0x7e, 0xdd, 0xca, 0x3f, 0x51, 0x21, 0xae, 0xf2, 0x15, 0xa9, 0x32, 0x94, + 0xca, 0xde, 0x3b, 0x97, 0x13, 0x6b, 0xff, 0xe0, 0x79, 0x39, 0x40, 0xf0, + 0x66, 0x7d, 0x5e, 0xef, 0xec, 0x0a, 0x35, 0xd2, 0x0d, 0x09, 0x19, 0x13, + 0xf2, 0xc2, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0xdc, 0x07, 0x2e, 0x46, 0xab, 0x4d, 0x6d, 0xf7, 0x24, 0xba, 0x02, 0xe3, + 0xc5, 0xe3, 0xed, 0x64, 0xc6, 0x77, 0x5a, 0x14, 0xae, 0x38, 0x52, 0x8c, + 0x16, 0x2c, 0x52, 0x0e, 0xf6, 0x65, 0x99, 0xcc, 0xf6, 0x9f, 0x77, 0xcc, + 0x2e, 0xaf, 0x14, 0xd1, 0xf0, 0x0f, 0xa7, 0x3e, 0x5b, 0x74, 0xff, 0xb9, + 0xd3, 0x30, 0x02, 0x5e, 0x52, 0xc8, 0x6f, 0x57, 0xef, 0x28, 0xf5, 0xfa, + 0x9e, 0x70, 0x00, 0xfc, 0x3e, 0xc3, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0xaa, 0x9f, 0x86, 0xb0, 0x6d, 0xa1, 0x0c, 0xfa, + 0xef, 0xb3, 0x6a, 0x50, 0xa6, 0xfe, 0xff, 0xa9, 0x61, 0x0b, 0x18, 0x72, + 0xee, 0xc6, 0xcd, 0x3a, 0x34, 0x5e, 0xa8, 0x81, 0x31, 0x54, 0x25, 0x05, + 0xc1, 0xd9, 0x66, 0x3d, 0x17, 0xbb, 0x03, 0x21, 0x07, 0x69, 0x3a, 0x37, + 0xe8, 0xd4, 0x6a, 0x68, 0xe1, 0xa3, 0x19, 0x5a, 0x8d, 0x14, 0x11, 0x09, + 0xef, 0xae, 0xfe, 0x94, 0x19, 0x8a, 0xe4, 0xb9, 0x6e, 0xe8, 0xfa, 0x12, + 0x2a, 0x5d, 0x00, 0x29, 0x27, 0x6d, 0x5a, 0xa5, 0x09, 0x34, 0x79, 0x2b, + 0xa8, 0xcc, 0x42, 0xb4, 0xde, 0xe0, 0x91, 0xb9, 0x06, 0x0c, 0x11, 0x17, + 0x25, 0x7a, 0x35, 0x57, 0x51, 0x40, 0xf3, 0xc7, 0xc6, 0x4a, 0x69, 0x98, + 0x2b, 0x2b, 0x3e, 0x5d, 0x32, 0xd8, 0x8f, 0xb0, 0x1d, 0xee, 0x77, 0xe3, + 0xaf, 0x4f, 0x71, 0x05, 0x04, 0xd2, 0xff, 0x51, 0xed, 0xa4, 0x69, 0x50, + 0x24, 0x2a, 0xe5, 0xaa, 0xbb, 0xc6, 0x7a, 0x7f, 0xb2, 0xdf, 0x1d, 0xc2, + 0x02, 0x2e, 0x52, 0xd1, 0xd9, 0x5b, 0xe7, 0x6c, 0x50, 0x31, 0x4e, 0xdf, + 0x8e, 0x3f, 0x37, 0xfc, 0xf5, 0x34, 0x0e, 0xdb, 0x4c, 0x5d, 0x7d, 0xc8, + 0xe4, 0x72, 0x40, 0xcb, 0x95, 0xa5, 0x41, 0xeb, 0x78, 0x5f, 0x64, 0x20, + 0x55, 0x19, 0xc7, 0xf9, 0x9c, 0x71, 0x40, 0x8f, 0xcc, 0x2d, 0x86, 0xc0, + 0xf4, 0x36, 0x2b, 0x0e, 0x28, 0xb4, 0xad, 0x1b, 0xde, 0x60, 0x67, 0x03, + 0x0f, 0x7c, 0x18, 0xd9, 0xc3, 0x73, 0x67, 0x0d, 0x44, 0x3d, 0xbe, 0x7c, + 0xcf, 0x96, 0x22, 0x0b, 0x0e, 0x3a, 0x0b, 0xcf, 0x04, 0x95, 0x92, 0x7d, + 0x4b, 0xa2, 0x6a, 0x0b, 0x47, 0x72, 0x73, 0xa8, 0x9b, 0x96, 0x3d, 0xc6, + 0x03, 0x34, 0xb1, 0x69, 0xc2, 0x50, 0x60, 0x89, 0x8c, 0x55, 0x8f, 0x8e, + 0x74, 0xa8, 0x9e, 0x25, 0xe4, 0x0e, 0x73, 0xef, 0x4f, 0x51, 0xbe, 0xed, + 0x5c, 0x14, 0xd3, 0xfa, 0x94, 0x58, 0x8d, 0x5c, 0xa0, 0xb1, 0xfc, 0x37, + 0x6e, 0x9c, 0x9e, 0x61, 0xe5, 0x12, 0x13, 0xb2, 0x88, 0xc6, 0xcf, 0x60, + 0x3f, 0x0d, 0x51, 0x33, 0x22, 0xfa, 0xfb, 0x2d, 0x2b, 0x8d, 0x43, 0x9b, + 0x3d, 0x1e, 0x88, 0x24, 0x50, 0x78, 0xf7, 0x7e, 0x45, 0xb1, 0x0f, 0xa9, + 0xe6, 0x77, 0xf8, 0x78, 0xff, 0x57, 0x6a, 0x05, 0x06, 0x0c, 0x7e, 0x1e, + 0x7f, 0xe9, 0x90, 0xe8, 0x61, 0x68, 0xbc, 0x9e, 0xc4, 0xe5, 0x06, 0x04, + 0x76, 0xcc, 0x01, 0x57, 0x1a, 0x55, 0x9e, 0x45, 0x26, 0xd6, 0xd8, 0xc2, + 0x50, 0x25, 0xfc, 0x72, 0x4e, 0x18, 0xbe, 0xf2, 0x2f, 0xc0, 0x1b, 0xc8, + 0x14, 0xeb, 0x24, 0xda, 0x15, 0x0a, 0x83, 0x38, 0xc5, 0xdd, 0xc9, 0xd7, + 0x12, 0x35, 0x55, 0xdf, 0x2c, 0x23, 0xea, 0x17, 0xca, 0xbf, 0x18, 0xc9, + 0x80, 0x63, 0x4b, 0x77, 0x8b, 0x17, 0x01, 0x05, 0x1b, 0xa3, 0x0b, 0x0f, + 0xdd, 0xc6, 0xe0, 0xdf, 0xc9, 0xa6, 0x8c, 0x50, 0x95, 0x8d, 0x6c, 0x96, + 0x67, 0xff, 0x88, 0x38, 0x3b, 0x76, 0x72, 0x11, 0x35, 0xa0, 0x1c, 0xc8, + 0x96, 0x9c, 0xe5, 0x90, 0x79, 0x0e, 0x62, 0x57, 0x00, 0xd9, 0x57, 0xf8, + 0xa4, 0xc2, 0xc2, 0x0a, 0x17, 0x8e, 0xd7, 0x03, 0x6d, 0x4d, 0x14, 0xb6, + 0x96, 0x8a, 0x76, 0x67, 0x58, 0xce, 0x9c, 0xb3, 0x10, 0x49, 0x06, 0xeb, + 0x56, 0x43, 0x40, 0xcb, 0xd4, 0xd7, 0x59, 0x42, 0xa4, 0xd7, 0x21, 0x6a, + 0x51, 0x3d, 0x1c, 0x54, 0xd7, 0xd6, 0xa2, 0xcf, 0xf8, 0xf6, 0x72, 0x35, + 0x04, 0xa6, 0xe3, 0x53, 0xca, 0xc5, 0x62, 0xee, 0xa9, 0xc3, 0x6d, 0x1b, + 0xc4, 0xc5, 0xd9, 0xa7, 0x37, 0xc2, 0x04, 0x01, 0xc9, 0x4a, 0x2e, 0x26, + 0xdd, 0x12, 0x6e, 0x41, 0x64, 0xb4, 0xe8, 0xe8, 0xc7, 0xf8, 0xab, 0x8a, + 0xab, 0x1d, 0x7f, 0x2d, 0x58, 0xc2, 0xc4, 0xf0, 0x5d, 0x11, 0x35, 0x52, + 0x88, 0xbc, 0x0f, 0x44, 0x6e, 0x91, 0x1e, 0x87, 0xb4, 0xb1, 0x91, 0x52, + 0x32, 0xe4, 0x38, 0x6d, 0x5e, 0x8d, 0x30, 0xf0, 0xbc, 0xc3, 0x15, 0x80, + 0x47, 0x36, 0x35, 0xb0, 0x93, 0xf3, 0xc4, 0x82, 0xc7, 0x73, 0xc1, 0x67, + 0x0c, 0x7a, 0x31, 0x36, 0xbc, 0x73, 0x67, 0x66, 0xae, 0x48, 0x82, 0x27, + 0x6e, 0x14, 0xd0, 0xd5, 0x12, 0x10, 0xce, 0x5e, 0x37, 0xcd, 0x7e, 0xa5, + 0xcb, 0xff, 0x91, 0xf0, 0x62, 0xdb, 0x95, 0x74, 0x0c, 0x8c, 0x1e, 0x78, + 0x11, 0x02, 0xb3, 0x02, 0x0b, 0x31, 0xe7, 0x4e, 0x8b, 0x58, 0x6a, 0xde, + 0x20, 0x93, 0x8b, 0x8e, 0x62, 0x03, 0x24, 0xc9, 0xca, 0xf8, 0x44, 0x1d, + 0x0c, 0x1b, 0xd8, 0x5d, 0xcc, 0xe2, 0x8e, 0x02, 0xc6, 0x5c, 0x06, 0x45, + 0xe6, 0x94, 0x8f, 0xa2, 0x3e, 0xf5, 0xe9, 0xf5, 0x88, 0x87, 0xb2, 0x84, + 0x1e, 0xb6, 0xb6, 0xfc, 0x9f, 0x8e, 0x79, 0xf5, 0x4b, 0x24, 0x81, 0x3e, + 0x5d, 0xf4, 0x10, 0x6e, 0xdd, 0x8c, 0x8c, 0xae, 0xc6, 0x2c, 0x26, 0xb2, + 0xfc, 0xf3, 0x99, 0xe8, 0x8c, 0x65, 0x5d, 0x6c, 0xa8, 0x1d, 0x6f, 0x1e, + 0x32, 0x0a, 0xee, 0x87, 0xf6, 0xe1, 0xdd, 0x5e, 0x7f, 0x7a, 0x90, 0x8c, + 0x3f, 0xe8, 0x47, 0x95, 0x9b, 0xc8, 0x2c, 0x49, 0xc9, 0xe4, 0x2d, 0xea, + 0x58, 0xfc, 0x29, 0x1a, 0xb7, 0xa1, 0xf9, 0xb8, 0x84, 0x41, 0xa0, 0xf1, + 0x77, 0x83, 0x56, 0x73, 0x86, 0xea, 0xf4, 0xf5, 0x2a, 0xa6, 0x6b, 0x00, + 0x64, 0x39, 0x08, 0x8f, 0xf0, 0x22, 0x1a, 0x4c, 0xf2, 0x5a, 0xd0, 0xaa, + 0x39, 0xae, 0x8a, 0xbc, 0x03, 0x99, 0xf7, 0xcc, 0x80, 0xdf, 0x2b, 0x85, + 0xbe, 0x1a, 0x97, 0x28, 0x63, 0x04, 0x72, 0x75, 0x75, 0xb4, 0x9c, 0xd3, + 0x17, 0xcc, 0x1e, 0xa1, 0xd2, 0x47, 0x18, 0x45, 0xad, 0xb4, 0x0a, 0x32, + 0x31, 0x36, 0x64, 0x48, 0x3f, 0x7b, 0x4b, 0xc0, 0xd6, 0x78, 0x46, 0xaa, + 0x90, 0x89, 0xf9, 0x36, 0x3d, 0xb4, 0xb3, 0x50, 0x51, 0xd9, 0x55, 0x6f, + 0xa9, 0xe7, 0x25, 0xaf, 0xa0, 0xca, 0x9d, 0x45, 0x83, 0xc3, 0x0b, 0x2a, + 0x0c, 0xf9, 0x3f, 0xe4, 0x08, 0xf4, 0xbd, 0x23, 0x45, 0x85, 0xcf, 0x41, + 0x93, 0xd3, 0x21, 0x5f, 0x53, 0xa2, 0x5b, 0xa9, 0xf5, 0xe9, 0x8f, 0x2a, + 0x2d, 0x53, 0x3c, 0x36, 0x17, 0xce, 0x37, 0x35, 0x3e, 0x9e, 0x6b, 0xbc, + 0xba, 0xaa, 0xa5, 0x61, 0x79, 0x98, 0x8e, 0xbd, 0x19, 0xf4, 0x5f, 0xa9, + 0xb8, 0x96, 0xa2, 0xce, 0x32, 0x00, 0xab, 0x51, 0xcb, 0xfa, 0x30, 0x3a, + 0x83, 0x92, 0x91, 0xad, 0x08, 0x61, 0x62, 0x51, 0x7f, 0x19, 0xa9, 0x2a, + 0x84, 0xf2, 0xab, 0x7e, 0x5e, 0xa7, 0x5a, 0x54, 0x7f, 0x68, 0x2a, 0x7b, + 0x4f, 0xde, 0x45, 0x1d, 0xef, 0x73, 0x5f, 0xc0, 0x40, 0x6e, 0xec, 0x6c, + 0xe9, 0xa5, 0x6b, 0x46, 0x54, 0x7c, 0x24, 0x8b, 0xa4, 0xe5, 0xb4, 0x82, + 0x31, 0x1f, 0x3e, 0x79, 0x2e, 0x21, 0x8c, 0xf1, 0xbd, 0xad, 0x7c, 0x28, + 0xcc, 0xbd, 0x58, 0x72, 0xe9, 0x6a, 0x04, 0x56, 0x67, 0x0f, 0x62, 0x98, + 0x5a, 0x97, 0x4b, 0xe2, 0x67, 0x70, 0xbb, 0x17, 0xb1, 0x84, 0x5b, 0xd4, + 0x6e, 0xab, 0x90, 0x29, 0x20, 0x93, 0x34, 0xa8, 0x03, 0x0f, 0xed, 0x1a, + 0xf0, 0x1b, 0x92, 0x87, 0x43, 0xa5, 0x6a, 0x1c, 0xdc, 0xd7, 0x22, 0x68, + 0x83, 0x98, 0x74, 0x2a, 0x4c, 0x51, 0xef, 0x71, 0x19, 0xd5, 0x3d, 0x05, + 0x19, 0x61, 0xb2, 0x52, 0xa8, 0x6e, 0xda, 0x72, 0x51, 0x66, 0x9f, 0xf0, + 0x12, 0xf6, 0x18, 0x60, 0xcc, 0xd7, 0x2f, 0x2e, 0x83, 0x14, 0x09, 0xdb, + 0x55, 0x1c, 0xf2, 0xaf, 0xfd, 0xa4, 0x40, 0xf1, 0x4a, 0xc7, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x9c, 0x52, 0xff, 0x48, + 0x06, 0x61, 0x76, 0x6d, 0xd7, 0x44, 0xb1, 0x0c, 0x32, 0x62, 0x15, 0xa1, + 0xc3, 0x97, 0x03, 0xdd, 0xed, 0x20, 0x3c, 0x3a, 0x09, 0x16, 0xe5, 0x7d, + 0x8c, 0xf9, 0x7b, 0x22, 0x5e, 0x3a, 0xdd, 0xf0, 0xc6, 0xf0, 0x3a, 0xd4, + 0x94, 0x85, 0x1c, 0x60, 0x74, 0x91, 0xa3, 0xe2, 0x8a, 0xe5, 0x3e, 0xd4, + 0x95, 0x28, 0x8b, 0x1a, 0x7b, 0xbe, 0x07, 0xc0, 0xe3, 0x6b, 0xb9, 0x85, + 0x82, 0x0b, 0x24, 0xba, 0x1c, 0xfc, 0xc0, 0x0a, 0x21, 0x33, 0xad, 0x00, + 0x19, 0xce, 0xb5, 0x8f, 0x73, 0x05, 0xf1, 0xac, 0x03, 0xbe, 0x1f, 0x22, + 0xd5, 0x32, 0x5e, 0x50, 0xe3, 0xe0, 0x62, 0x26, 0xf4, 0xb0, 0x85, 0xd8, + 0xf7, 0xa7, 0xf4, 0xa7, 0xff, 0x10, 0xb8, 0xbc, 0xe0, 0x3e, 0x4d, 0xcb, + 0x37, 0x74, 0xcc, 0x85, 0xed, 0xa0, 0x34, 0x6c, 0xfa, 0x37, 0x84, 0x6a, + 0x94, 0x55, 0x3b, 0x1e, 0x14, 0xab, 0x26, 0x7b, 0x3e, 0xac, 0xc3, 0x79, + 0xcd, 0x1b, 0x00, 0x02, 0xb3, 0x01, 0xc3, 0x10, 0xdd, 0x56, 0x7d, 0x0e, + 0x69, 0x39, 0x3c, 0x17, 0xa3, 0xae, 0x9c, 0x2d, 0xc7, 0x5a, 0x0b, 0x7c, + 0xd0, 0xac, 0xa1, 0x91, 0x6a, 0x6d, 0xc0, 0x3f, 0x98, 0xf1, 0x21, 0xf5, + 0xa5, 0x7c, 0xbc, 0x70, 0x0d, 0x7b, 0x2f, 0x0d, 0x5a, 0xa5, 0x4a, 0x5a, + 0xff, 0x51, 0xbf, 0x7f, 0xb5, 0x4f, 0x2c, 0xba, 0xa9, 0x46, 0x81, 0x6b, + 0xac, 0xc6, 0x62, 0x2d, 0xd7, 0xb5, 0x04, 0x5f, 0xd4, 0x5f, 0x1f, 0x6b, + 0x11, 0x7d, 0xe3, 0x58, 0x1f, 0xb5, 0xbf, 0x16, 0x43, 0x88, 0x05, 0xf5, + 0xa4, 0x7b, 0xb5, 0x0e, 0xf4, 0x01, 0xb6, 0x90, 0x69, 0x52, 0x0a, 0x5e, + 0x9b, 0x87, 0x51, 0x5e, 0xd5, 0xed, 0x2c, 0xcc, 0x58, 0xad, 0xe6, 0x77, + 0xa2, 0xc5, 0x7c, 0x1e, 0xc5, 0x92, 0xbe, 0xed, 0x3a, 0x9a, 0x97, 0xed, + 0x56, 0xc8, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x16, 0xe8, 0x24, 0xe3, 0x82, 0x36, 0x8e, 0x50, 0x45, 0xbe, 0xc6, 0x10, + 0x02, 0xb9, 0x6d, 0xf9, 0xed, 0x8f, 0x64, 0x35, 0x4d, 0x2c, 0x9f, 0x99, + 0xdc, 0xee, 0xfa, 0x63, 0x99, 0xc4, 0xb8, 0x3d, 0x77, 0xea, 0xda, 0xd5, + 0x95, 0x8b, 0x8e, 0x76, 0x02, 0x9c, 0x62, 0xa0, 0xad, 0xfe, 0x80, 0x61, + 0x72, 0x59, 0xd6, 0x9f, 0x16, 0x2e, 0x09, 0x71, 0xb8, 0xd7, 0x65, 0x25, + 0xc2, 0x5b, 0x40, 0x67, 0x8e, 0xd6, 0xf8, 0xdf, 0x67, 0x29, 0x19, 0xa2, + 0xa6, 0x07, 0xf3, 0xc8, 0x91, 0x7d, 0xf2, 0x50, 0x71, 0xba, 0x5c, 0x2d, + 0xa7, 0xae, 0xc4, 0xd5, 0xeb, 0xb9, 0x0d, 0x2d, 0x23, 0xe5, 0x8c, 0x65, + 0xf5, 0xf8, 0x97, 0x69, 0xde, 0x25, 0x6f, 0xea, 0x12, 0x72, 0x3e, 0xb9, + 0xa7, 0x8d, 0xcf, 0xa5, 0x66, 0xee, 0x4e, 0x2e, 0x66, 0x6b, 0xec, 0x77, + 0x7f, 0x53, 0xdc, 0x29, 0x73, 0x5e, 0xe9, 0x2f, 0x79, 0xac, 0x8d, 0x0f, + 0x44, 0x09, 0x5d, 0x25, 0x1d, 0x78, 0xb6, 0xe9, 0xd0, 0xfa, 0x8f, 0x5f, + 0x9c, 0xf0, 0xe0, 0xfc, 0x62, 0x9f, 0x52, 0x6b, 0x5b, 0x8e, 0x3f, 0xdf, + 0xb4, 0xf1, 0xdf, 0x35, 0xd0, 0x8f, 0x5a, 0xc9, 0x1f, 0x08, 0x86, 0xaa, + 0x5a, 0x9e, 0xe8, 0xb0, 0xaa, 0xd4, 0xcd, 0x2a, 0x5b, 0x4f, 0x7f, 0x39, + 0x9f, 0x7f, 0x21, 0xf2, 0xfd, 0x05, 0x96, 0x53, 0x09, 0xfd, 0x36, 0x4c, + 0xcd, 0x98, 0x74, 0xf5, 0xbd, 0xcd, 0x9e, 0x14, 0x15, 0x05, 0xb9, 0x3d, + 0x5f, 0x8a, 0x02, 0x86, 0x10, 0xd7, 0xd4, 0x01, 0x20, 0xd9, 0x8c, 0x65, + 0x7d, 0x9d, 0x39, 0x25, 0xbc, 0xce, 0x1a, 0xb1, 0x76, 0x92, 0xc3, 0x03, + 0xed, 0xa2, 0x41, 0x31, 0x0d, 0xc0, 0x40, 0x94, 0x01, 0xbc, 0x9b, 0xe9, + 0x5e, 0x3e, 0x8c, 0x49, 0xf6, 0x98, 0x0c, 0x39, 0x79, 0xdc, 0xd1, 0x1b, + 0xc5, 0xb2, 0x20, 0xb4, 0x6c, 0xb4, 0x4f, 0xce, 0xf4, 0x6c, 0x0b, 0xef, + 0x85, 0xf2, 0x7d, 0x9a, 0x90, 0x58, 0x1b, 0x51, 0x56, 0x52, 0xac, 0x75, + 0x9f, 0x17, 0xe6, 0x48, 0xaf, 0x18, 0x4c, 0xd8, 0x67, 0xe8, 0xd2, 0x61, + 0xbc, 0xa0, 0x95, 0xc9, 0x78, 0xd8, 0xa2, 0x1d, 0x47, 0x59, 0x30, 0xcf, + 0xf3, 0x79, 0x06, 0xd4, 0x25, 0xf8, 0x9c, 0x5c, 0x28, 0xee, 0xb0, 0xd2, + 0xb6, 0xaf, 0x34, 0x0e, 0xe5, 0xe4, 0x16, 0x2e, 0x05, 0x45, 0x23, 0xc1, + 0x88, 0x90, 0x4a, 0x8f, 0xff, 0xfb, 0xe2, 0xc0, 0xb7, 0xae, 0xb5, 0x50, + 0xc9, 0x26, 0xf0, 0xa2, 0xf5, 0x21, 0x23, 0x79, 0x23, 0xb6, 0x8f, 0x57, + 0x64, 0xd1, 0x27, 0xc2, 0x07, 0x63, 0xa6, 0x54, 0x1f, 0x2f, 0xca, 0x16, + 0xb8, 0x28, 0x51, 0x2a, 0x92, 0xe0, 0x06, 0x36, 0x55, 0x00, 0x6c, 0x99, + 0x31, 0xa7, 0x56, 0xb3, 0x7b, 0x15, 0xcd, 0xc1, 0x32, 0x3a, 0xc0, 0x37, + 0x1f, 0xea, 0x29, 0xb6, 0x75, 0xdf, 0x8a, 0x17, 0x09, 0x45, 0xc2, 0x6e, + 0xe2, 0x4c, 0xa5, 0x93, 0x9b, 0x17, 0x08, 0x27, 0x75, 0x33, 0xdb, 0x1f, + 0xab, 0x37, 0xad, 0x8e, 0xaa, 0xef, 0x0b, 0x82, 0xaa, 0xa7, 0xae, 0x2c, + 0x43, 0x4d, 0x8f, 0xa0, 0x43, 0xd7, 0xa1, 0x34, 0xeb, 0xc0, 0x4e, 0xbd, + 0x64, 0xfc, 0xc8, 0x6a, 0x56, 0xa8, 0xfc, 0x9e, 0x2d, 0x5f, 0x7a, 0xa3, + 0x72, 0x06, 0x79, 0x38, 0x33, 0x05, 0xa7, 0xf0, 0x09, 0x48, 0x55, 0xfe, + 0x3f, 0xab, 0x25, 0x8e, 0x76, 0x1d, 0x12, 0x5a, 0x20, 0x68, 0xfb, 0x51, + 0x51, 0x33, 0x40, 0x37, 0x0c, 0x90, 0x98, 0x6f, 0x66, 0x3f, 0x40, 0xa2, + 0x2e, 0x3c, 0xd1, 0x22, 0x51, 0x54, 0x25, 0x7e, 0x4c, 0x5d, 0x96, 0xb2, + 0x65, 0x0f, 0xa3, 0xdf, 0x8e, 0x97, 0xfe, 0xeb, 0xe7, 0xc6, 0x22, 0x2a, + 0x47, 0x3a, 0x78, 0x1b, 0x39, 0x2e, 0xd6, 0xbc, 0x35, 0xb4, 0xf4, 0xc3, + 0xf2, 0x6a, 0x12, 0xc9, 0xe7, 0x6c, 0x9a, 0xfc, 0xed, 0xbc, 0x11, 0xc7, + 0x71, 0x09, 0x8f, 0x56, 0xc1, 0xd8, 0xb6, 0x92, 0x35, 0x97, 0x8e, 0x71, + 0xd2, 0xbb, 0xb4, 0xed, 0xf0, 0x7e, 0xff, 0x58, 0xd9, 0x95, 0x26, 0xea, + 0xa9, 0x4d, 0x38, 0x8d, 0x4e, 0x8e, 0x53, 0xae, 0x7e, 0xe6, 0xe6, 0x82, + 0x35, 0x96, 0xab, 0x0f, 0x04, 0x0f, 0xf2, 0xac, 0x1b, 0xcd, 0x07, 0x17, + 0x1b, 0x25, 0x2f, 0x92, 0xaf, 0x19, 0xa2, 0x1b, 0xa0, 0x7a, 0xc7, 0x4f, + 0xb8, 0x1b, 0x89, 0x21, 0xb5, 0xe2, 0x24, 0xe9, 0x78, 0xae, 0x7d, 0xd7, + 0xcc, 0x8e, 0x3f, 0xa7, 0xe9, 0xbe, 0xe6, 0x79, 0x0f, 0xdf, 0x86, 0xe9, + 0xb9, 0xcd, 0x82, 0x7b, 0xf5, 0x04, 0x89, 0xa0, 0x73, 0x5d, 0xa2, 0x4e, + 0xd6, 0xa0, 0x60, 0x21, 0xe2, 0xfe, 0xd3, 0xf4, 0x19, 0x8b, 0x6a, 0x03, + 0x12, 0x9c, 0x51, 0x9a, 0x41, 0x4e, 0xf6, 0xb4, 0x6e, 0x0c, 0x43, 0xf5, + 0x00, 0x00, 0x78, 0x12, 0xdd, 0x21, 0xa8, 0xc7, 0x21, 0xa1, 0x4e, 0x44, + 0x10, 0xd0, 0xdb, 0x6f, 0x0b, 0x4c, 0xe7, 0x7a, 0x8c, 0x0c, 0xaa, 0xb6, + 0x9a, 0x7d, 0xa9, 0xff, 0x5a, 0x2e, 0x15, 0x9e, 0x6f, 0xea, 0xe1, 0x42, + 0x0c, 0x9c, 0x5a, 0x3b, 0xd5, 0xe6, 0xde, 0x23, 0x3f, 0x9c, 0x45, 0x20, + 0x67, 0x96, 0x50, 0x16, 0x80, 0x42, 0xe7, 0x67, 0x7d, 0x24, 0xdc, 0x00, + 0xaa, 0x01, 0x8a, 0xa3, 0x61, 0xfe, 0x9a, 0xce, 0xc1, 0xe5, 0x2e, 0x19, + 0x85, 0x04, 0xe6, 0x7b, 0xe8, 0x7a, 0xbc, 0x9d, 0xfe, 0x71, 0x29, 0x1d, + 0x17, 0xae, 0x6b, 0x1a, 0x64, 0xd7, 0xfe, 0x18, 0x29, 0x07, 0x9b, 0x49, + 0x43, 0xba, 0x29, 0x37, 0xa8, 0xb0, 0x26, 0x27, 0x6b, 0x7d, 0xde, 0x49, + 0x12, 0x90, 0x05, 0xe2, 0x2c, 0xd8, 0x08, 0xd0, 0x5d, 0x74, 0xa7, 0x15, + 0xbe, 0x34, 0x34, 0x6d, 0xad, 0xfb, 0xa8, 0x01, 0x4a, 0x6c, 0x98, 0xba, + 0x84, 0x38, 0xbd, 0x05, 0xe8, 0x87, 0x27, 0x91, 0x3f, 0xb8, 0xe9, 0x06, + 0x27, 0xda, 0x56, 0x07, 0xaa, 0xea, 0xf4, 0x80, 0x5c, 0x12, 0x44, 0xbe, + 0x23, 0xb3, 0x63, 0x9f, 0x5f, 0x37, 0xa7, 0x53, 0x4c, 0xfc, 0x4d, 0x87, + 0xeb, 0x91, 0xe8, 0xd7, 0x5a, 0xd6, 0xca, 0x67, 0x2d, 0x2f, 0x5a, 0x0e, + 0xc7, 0x82, 0x78, 0xa4, 0xf3, 0x56, 0x07, 0xa5, 0xab, 0x6d, 0x09, 0xd2, + 0x0d, 0x08, 0x6b, 0x6e, 0x1f, 0xc1, 0xf2, 0x91, 0x1a, 0x39, 0xfe, 0x14, + 0x56, 0x3f, 0xeb, 0x9f, 0x14, 0xc2, 0xb3, 0xb2, 0xc2, 0x8d, 0xc2, 0xee, + 0x7e, 0xf0, 0x7d, 0x92, 0xd2, 0xc3, 0x57, 0x3e, 0x2c, 0x07, 0x1b, 0x6a, + 0x9b, 0x3b, 0x79, 0x59, 0xc9, 0x22, 0x96, 0x6c, 0x3e, 0x37, 0xd3, 0x0e, + 0x5c, 0xf6, 0x8f, 0xa9, 0xaa, 0xc9, 0xa4, 0x4b, 0xaf, 0x5d, 0x1a, 0xb6, + 0xf3, 0x91, 0x32, 0x4f, 0xca, 0x72, 0xa0, 0x42, 0x01, 0x51, 0xaf, 0x19, + 0x89, 0xc4, 0xcc, 0x9b, 0xf3, 0x52, 0xe9, 0xa6, 0xf2, 0x71, 0x6f, 0x5a, + 0x38, 0x02, 0xb8, 0x75, 0x88, 0x5f, 0x8d, 0x12, 0xc5, 0x55, 0x4f, 0xd1, + 0xba, 0xf2, 0x24, 0xdc, 0x63, 0x5f, 0x93, 0xc7, 0xf3, 0xe7, 0x59, 0xac, + 0xc3, 0xed, 0xbc, 0x02, 0xe3, 0xad, 0xb2, 0x8e, 0x2c, 0x2d, 0x47, 0xb4, + 0x34, 0x8d, 0xae, 0x44, 0xc8, 0x5f, 0x14, 0xe8, 0x8e, 0x7b, 0xc3, 0x60, + 0x53, 0x9a, 0x51, 0xea, 0x7f, 0x2f, 0xb6, 0x62, 0x61, 0xf7, 0xc0, 0x18, + 0x0f, 0x20, 0x79, 0x13, 0x5c, 0xe8, 0xca, 0x04, 0x29, 0x5f, 0x70, 0x4d, + 0x88, 0xa2, 0x43, 0x20, 0x57, 0x33, 0x04, 0x74, 0x8e, 0x7c, 0x89, 0xd4, + 0x56, 0x8f, 0x93, 0x86, 0x81, 0x6c, 0x11, 0xfc, 0x32, 0x0e, 0xb0, 0x3e, + 0xe5, 0x13, 0xbf, 0x76, 0x62, 0xcc, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x0e, 0xf8, 0x8f, 0xde, 0xfd, 0xfd, 0xcf, 0xd1, + 0x6f, 0x9f, 0xf2, 0xb6, 0xb6, 0x59, 0xb2, 0x73, 0x1c, 0x3c, 0x0d, 0xb0, + 0x4d, 0xb8, 0x96, 0xc6, 0xeb, 0xe5, 0xf8, 0x0d, 0x3e, 0xd7, 0x0c, 0xbd, + 0x9c, 0xaa, 0xd5, 0x1c, 0x19, 0x9a, 0x4c, 0x8e, 0xfa, 0xac, 0x68, 0x74, + 0x16, 0x06, 0xb5, 0x49, 0xe7, 0xd5, 0x6f, 0x4f, 0xcc, 0xd9, 0x02, 0x74, + 0xd6, 0x08, 0x73, 0x7c, 0xa9, 0xfa, 0x3e, 0x50, 0x87, 0xf7, 0xfb, 0xa6, + 0x94, 0xdc, 0xb1, 0x40, 0xec, 0xa7, 0xa9, 0x39, 0xff, 0x40, 0x4a, 0x97, + 0x9b, 0xcc, 0x57, 0x66, 0x68, 0xd6, 0xa8, 0x4d, 0x13, 0x06, 0x0e, 0x03, + 0xc4, 0xdf, 0x7a, 0xe4, 0x2f, 0x0e, 0xd7, 0x54, 0xe0, 0xbd, 0x93, 0xeb, + 0x82, 0xd8, 0x05, 0x2d, 0xa2, 0xf0, 0x4e, 0xd0, 0xf9, 0x3e, 0x3e, 0x6b, + 0x3d, 0x08, 0x39, 0x4e, 0x35, 0x13, 0x7b, 0x3b, 0x39, 0x2c, 0x47, 0x2c, + 0x61, 0x9f, 0xfd, 0x59, 0x88, 0x5f, 0x65, 0x08, 0xa9, 0x66, 0xec, 0xb5, + 0x21, 0xf3, 0xe9, 0xba, 0x11, 0x63, 0x24, 0x6c, 0xf4, 0x50, 0x3a, 0xe5, + 0x0c, 0x06, 0x39, 0x69, 0x2f, 0xca, 0x0f, 0x48, 0xbe, 0x95, 0x7d, 0x13, + 0x3d, 0xa5, 0x75, 0x69, 0x85, 0xc8, 0xb3, 0x72, 0x72, 0x3c, 0x4f, 0x96, + 0xe7, 0xb7, 0xbd, 0xe7, 0x76, 0xba, 0xac, 0xc0, 0x07, 0x4d, 0xc1, 0xed, + 0xb9, 0xf0, 0x91, 0x2e, 0x36, 0xb7, 0x5b, 0x1c, 0xb7, 0xd6, 0xb3, 0x45, + 0x7d, 0x0a, 0xf5, 0x43, 0xdd, 0x7a, 0x8b, 0x4e, 0x18, 0xf2, 0xf3, 0x19, + 0xcd, 0x4a, 0xda, 0x3c, 0x1b, 0x05, 0x27, 0x67, 0x43, 0xa9, 0x8e, 0xe7, + 0x4a, 0x95, 0xa9, 0xad, 0x6c, 0x8c, 0xb2, 0x2e, 0x12, 0xcb, 0xf3, 0xeb, + 0x65, 0x26, 0xf4, 0x3e, 0x86, 0xee, 0x7e, 0xd9, 0xba, 0xce, 0x8d, 0x15, + 0x3e, 0xa8, 0x40, 0x59, 0x1d, 0x27, 0x78, 0x75, 0xf0, 0xf9, 0x33, 0xb5, + 0x32, 0xa9, 0x66, 0xe6, 0x2e, 0x2e, 0x3d, 0xf5, 0x4a, 0xf0, 0x97, 0x2d, + 0xe7, 0x43, 0x85, 0x43, 0x61, 0x25, 0x15, 0x13, 0x9e, 0x8e, 0xf6, 0x78, + 0xe8, 0x67, 0xba, 0xc2, 0x6d, 0xda, 0x46, 0x25, 0x76, 0xd9, 0x9b, 0x69, + 0x95, 0x4b, 0x50, 0x8c, 0xb7, 0x36, 0x49, 0xbc, 0xd7, 0x39, 0x69, 0xb9, + 0xc1, 0x5f, 0x5f, 0xcc, 0x83, 0x4c, 0x16, 0xb8, 0x0c, 0x85, 0xf1, 0xa4, + 0x57, 0x6c, 0x22, 0x1f, 0x60, 0x0c, 0xff, 0xb6, 0xc9, 0xf7, 0x21, 0x2d, + 0x35, 0x78, 0x31, 0x79, 0xd0, 0x6d, 0x61, 0xec, 0x61, 0x04, 0x75, 0x5c, + 0x06, 0xc3, 0x53, 0x1b, 0xb5, 0xdc, 0x23, 0xb9, 0xd9, 0x07, 0xd1, 0xd0, + 0xb3, 0xa5, 0xab, 0xd9, 0xbe, 0xb7, 0xdc, 0xae, 0x3f, 0x3e, 0xd7, 0x2a, + 0x79, 0x3f, 0x9c, 0x27, 0x81, 0x8d, 0x61, 0xe8, 0x46, 0x8f, 0x05, 0xf4, + 0x9c, 0x30, 0x35, 0x9a, 0x2f, 0x62, 0x84, 0x7c, 0xa5, 0x95, 0x68, 0x34, + 0xe6, 0xf0, 0xb9, 0x42, 0xd4, 0x37, 0xc6, 0xd2, 0x35, 0x1f, 0x7b, 0xe0, + 0xa6, 0x92, 0xcf, 0xf7, 0x0f, 0x08, 0x10, 0x79, 0xbd, 0xa8, 0x7c, 0x4e, + 0xef, 0xf1, 0x01, 0x8d, 0x1b, 0x0c, 0x98, 0x46, 0x28, 0xdc, 0xd5, 0xa8, + 0xcf, 0x67, 0x7d, 0x87, 0x2a, 0x8f, 0xdd, 0x52, 0x43, 0x5a, 0x55, 0x80, + 0x88, 0xa6, 0xcd, 0x9c, 0x5d, 0x36, 0xae, 0xef, 0x61, 0x43, 0xec, 0xf0, + 0x7f, 0x92, 0x21, 0x1f, 0xa2, 0xa3, 0x76, 0x0e, 0x5d, 0xf3, 0xa7, 0xe7, + 0x7d, 0xb0, 0x2c, 0x94, 0x36, 0x95, 0x34, 0x4e, 0x04, 0xfb, 0x51, 0xf9, + 0xe6, 0x7e, 0x56, 0x7a, 0x59, 0xce, 0x0a, 0x45, 0x7e, 0xeb, 0xc4, 0xbc, + 0xfd, 0x20, 0xaa, 0x34, 0x6b, 0xee, 0x3b, 0x09, 0xe8, 0x00, 0x4b, 0xfc, + 0x68, 0x24, 0x43, 0xdb, 0x09, 0x58, 0xd0, 0xb6, 0xbf, 0xaf, 0x1d, 0x7f, + 0x8a, 0x4c, 0x9e, 0x51, 0x97, 0x97, 0xe1, 0x0c, 0x0d, 0xaf, 0xd1, 0x1e, + 0x62, 0xad, 0x70, 0xa5, 0x8a, 0x24, 0x2f, 0x4a, 0xa6, 0x55, 0xb1, 0x44, + 0x09, 0x88, 0xab, 0xa5, 0x45, 0x28, 0xa0, 0x34, 0x9e, 0x14, 0x2c, 0xf9, + 0x0f, 0xb8, 0x33, 0x8f, 0xcc, 0xba, 0x50, 0x34, 0x4c, 0x96, 0x89, 0x09, + 0xb9, 0xa8, 0xfb, 0xac, 0x59, 0x73, 0xea, 0x61, 0xbc, 0x0d, 0x24, 0x3a, + 0x20, 0xc2, 0x76, 0xfc, 0x2e, 0xce, 0xfb, 0x75, 0x00, 0xca, 0x58, 0xbd, + 0xab, 0x61, 0x9b, 0x13, 0x2b, 0xa3, 0xf6, 0x15, 0x55, 0x83, 0x23, 0xc4, + 0xf3, 0x4c, 0x89, 0xc5, 0x4a, 0x18, 0x5c, 0x8d, 0x41, 0xcc, 0x06, 0x7b, + 0xe3, 0x2a, 0x1f, 0x6a, 0x57, 0xbc, 0x54, 0x61, 0x0c, 0xf2, 0xec, 0xbf, + 0xb0, 0xf0, 0x21, 0xde, 0xfc, 0xe4, 0xef, 0xce, 0x47, 0xc8, 0xdc, 0x11, + 0xc7, 0x8a, 0x12, 0x97, 0x68, 0x1d, 0x9e, 0x9a, 0xbf, 0xad, 0x62, 0x7e, + 0x4b, 0x88, 0xd7, 0x20, 0x22, 0xce, 0x5e, 0xe3, 0x87, 0x12, 0xa3, 0x05, + 0xef, 0x1f, 0x05, 0xb1, 0xbd, 0x1b, 0x80, 0x43, 0x84, 0x33, 0x8b, 0x87, + 0xa5, 0xc2, 0xe1, 0x49, 0xa8, 0x75, 0x49, 0x9b, 0x1b, 0x64, 0x8a, 0xd0, + 0x86, 0x10, 0xa8, 0x72, 0xeb, 0x2e, 0xe7, 0x3f, 0xaa, 0x6b, 0x4a, 0x22, + 0xae, 0x17, 0x8f, 0x10, 0x22, 0x03, 0x66, 0x67, 0x35, 0x40, 0x29, 0x1e, + 0xf2, 0x05, 0x36, 0xd5, 0xed, 0xe2, 0x2a, 0xcc, 0x77, 0xe2, 0x16, 0xef, + 0xa7, 0x9b, 0xe1, 0x1b, 0xba, 0xf3, 0xf5, 0x74, 0x6c, 0x2a, 0x98, 0x8a, + 0x14, 0xaf, 0x2c, 0xab, 0xfb, 0x51, 0x53, 0x75, 0x17, 0xcb, 0x5c, 0x86, + 0xb5, 0x60, 0x70, 0x29, 0x65, 0x69, 0x49, 0x42, 0x4f, 0x42, 0x6b, 0xc7, + 0xdb, 0x98, 0x7d, 0x1e, 0xf8, 0x45, 0xb2, 0x33, 0xd6, 0x34, 0x26, 0xa6, + 0x7f, 0x76, 0x31, 0x13, 0x13, 0x9d, 0xd2, 0xb0, 0x30, 0x0b, 0x0b, 0x3e, + 0x1a, 0x84, 0xb0, 0xbd, 0x81, 0x34, 0x25, 0x73, 0x99, 0x87, 0x1a, 0xc8, + 0x44, 0x34, 0x9d, 0x1a, 0x3d, 0x76, 0x44, 0x1d, 0xe2, 0x22, 0xad, 0x3d, + 0xb2, 0xa3, 0x1c, 0xd5, 0x27, 0x8c, 0xc6, 0x84, 0xdf, 0x33, 0xbe, 0xb2, + 0xa7, 0xb9, 0xc5, 0x6e, 0x48, 0xdc, 0xe9, 0xf8, 0xef, 0xfc, 0xaa, 0x1f, + 0x5e, 0x41, 0x48, 0x1e, 0xe0, 0xb9, 0xd6, 0x6e, 0x7a, 0x9c, 0xa3, 0x98, + 0x4b, 0xfa, 0x90, 0xa4, 0x58, 0x33, 0x85, 0x3b, 0x11, 0x44, 0x83, 0x4b, + 0x1e, 0x0e, 0x5d, 0x11, 0x36, 0x15, 0xe1, 0xbf, 0x15, 0x04, 0x8e, 0x88, + 0xc6, 0x18, 0x53, 0xc3, 0x8d, 0x28, 0x86, 0x25, 0xef, 0x55, 0x7b, 0xf6, + 0x85, 0xf8, 0xed, 0x3b, 0xcf, 0x5d, 0xa6, 0xc7, 0x66, 0xb7, 0xbe, 0x14, + 0xf0, 0x62, 0x89, 0x1f, 0x32, 0x1e, 0x86, 0x2a, 0x93, 0xd5, 0xca, 0x37, + 0x03, 0x0b, 0xf8, 0x0f, 0xca, 0x50, 0x6c, 0x16, 0x2b, 0xf0, 0x77, 0xca, + 0xbb, 0x8e, 0x95, 0x11, 0xef, 0x5b, 0xbe, 0x2f, 0x62, 0x50, 0xb8, 0x3d, + 0xff, 0xfa, 0x30, 0x21, 0xb2, 0x86, 0x3f, 0x50, 0x57, 0x98, 0x79, 0x15, + 0xce, 0x3e, 0xbf, 0x49, 0x58, 0xb0, 0xb5, 0xd7, 0xbe, 0x01, 0x55, 0xee, + 0x60, 0x14, 0x9d, 0x5b, 0x57, 0x48, 0x05, 0x72, 0x6a, 0x23, 0x29, 0xeb, + 0xf3, 0x36, 0x2a, 0xc1, 0xda, 0x5e, 0x4a, 0x63, 0xc4, 0x6b, 0x04, 0xe8, + 0xe8, 0xc1, 0xb5, 0xc4, 0x2d, 0x60, 0x1f, 0xa0, 0x2b, 0x33, 0xa5, 0xb7, + 0x82, 0x59, 0x21, 0xba, 0x13, 0xda, 0x79, 0xda, 0x5a, 0xb1, 0x82, 0x5b, + 0x52, 0x7f, 0x0c, 0x70, 0x75, 0x65, 0xe0, 0x44, 0xb3, 0xca, 0xd0, 0x09, + 0x38, 0x24, 0x83, 0x8e, 0x0c, 0x4c, 0xef, 0x96, 0xe4, 0x04, 0x30, 0x46, + 0x23, 0x6a, 0x28, 0x13, 0x1d, 0x37, 0x14, 0x75, 0x6e, 0xd0, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x21, 0xa2, 0xf0, 0x7d, + 0x29, 0x8f, 0x62, 0x2e, 0xf4, 0x0e, 0x14, 0x9b, 0x60, 0x38, 0xc0, 0x95, + 0xfb, 0x3c, 0x90, 0x5a, 0xa0, 0x1f, 0x30, 0x09, 0xfc, 0x6d, 0xa9, 0xd1, + 0x7b, 0x0b, 0x7c, 0x78, 0xf9, 0xf6, 0xa8, 0x5e, 0xa6, 0x7a, 0xf6, 0x1c, + 0xab, 0x1b, 0x0e, 0xa9, 0x08, 0xfd, 0xd9, 0x97, 0x08, 0x24, 0x2b, 0xda, + 0x08, 0x8b, 0x0c, 0x07, 0x70, 0x15, 0xa8, 0x0c, 0x86, 0xfc, 0xd1, 0x84, + 0xba, 0xd0, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x35, 0x7a, 0xab, 0xaa, 0xbe, 0xd7, 0xad, 0x22, 0x99, 0x46, 0xbb, 0x78, + 0xfd, 0x47, 0x8f, 0x2a, 0x4a, 0xa6, 0x2f, 0x8d, 0x15, 0x07, 0xed, 0x26, + 0x1d, 0xb3, 0x12, 0xd3, 0x88, 0x0f, 0xf1, 0x75, 0x2a, 0x07, 0x62, 0xac, + 0xbf, 0x52, 0x4a, 0xc3, 0x12, 0xe5, 0x3c, 0xea, 0xa6, 0x1e, 0x57, 0x90, + 0x56, 0x60, 0x7d, 0xcf, 0x4b, 0x65, 0xaf, 0xee, 0x17, 0x56, 0xbe, 0xd2, + 0x38, 0x3f, 0xd6, 0xbc, 0xef, 0xa7, 0x32, 0xb7, 0x10, 0xe9, 0xbd, 0x97, + 0x45, 0x92, 0x3c, 0xd3, 0x35, 0x2e, 0x59, 0x37, 0x65, 0x5c, 0x7f, 0xd0, + 0x99, 0x9c, 0x01, 0xe9, 0x1f, 0x65, 0xe9, 0xec, 0x0f, 0x2d, 0x46, 0xbc, + 0xd4, 0x8f, 0x51, 0x1c, 0xa0, 0xa4, 0x9b, 0x4f, 0x95, 0x54, 0xb0, 0x50, + 0x74, 0xfa, 0x0f, 0xe6, 0x55, 0x81, 0xce, 0x0f, 0xd1, 0x25, 0x56, 0xc8, + 0x2f, 0x3a, 0x65, 0xd4, 0x86, 0x4a, 0x8e, 0xff, 0x5a, 0xcc, 0x67, 0x96, + 0xcc, 0x65, 0x0d, 0x20, 0xee, 0xba, 0x6b, 0xcb, 0xde, 0x10, 0x2f, 0xbf, + 0x67, 0x6d, 0xbe, 0xef, 0x72, 0xfc, 0x25, 0x62, 0xbf, 0xbb, 0xc5, 0xe0, + 0x7b, 0x4c, 0x32, 0xc5, 0xdb, 0x9f, 0xb5, 0xe2, 0x75, 0x8a, 0xba, 0xbb, + 0x69, 0x28, 0xb6, 0x41, 0x25, 0x83, 0x67, 0x35, 0x1b, 0xd7, 0xb3, 0xd7, + 0x58, 0x54, 0x8a, 0x0b, 0x7c, 0xf3, 0x05, 0xcf, 0x2c, 0x78, 0x70, 0xc6, + 0xed, 0x7e, 0x56, 0xb6, 0x4e, 0x48, 0xaa, 0x57, 0xc4, 0xb0, 0xb2, 0xa0, + 0xca, 0x50, 0xe1, 0xc7, 0x41, 0xea, 0xac, 0x5f, 0x18, 0x13, 0xe5, 0x85, + 0x78, 0x3f, 0x05, 0xf3, 0xfd, 0x74, 0x7a, 0x42, 0x61, 0x91, 0x19, 0xc6, + 0x19, 0xe9, 0xd2, 0x78, 0x2c, 0xb1, 0xa3, 0x7f, 0x62, 0xea, 0x2a, 0x35, + 0x1c, 0x55, 0xa3, 0xf7, 0xdc, 0xec, 0x48, 0x23, 0x99, 0x8d, 0xe1, 0x4d, + 0x45, 0xad, 0x92, 0xc6, 0xf4, 0xa2, 0xe5, 0xe6, 0x58, 0xe4, 0xd5, 0x37, + 0xd0, 0x47, 0x0b, 0x64, 0x68, 0x48, 0x7e, 0xeb, 0xbe, 0x5e, 0x74, 0xd1, + 0xc4, 0xa5, 0x60, 0xd0, 0x30, 0x62, 0xbc, 0x81, 0xc4, 0x01, 0x68, 0x18, + 0xf3, 0xac, 0x9d, 0xb1, 0x4d, 0xdd, 0x8b, 0xd2, 0x54, 0x5d, 0xd1, 0x1c, + 0xee, 0x75, 0x9e, 0x99, 0x42, 0x69, 0x38, 0xcc, 0x66, 0x24, 0xd9, 0x8f, + 0x70, 0x98, 0xc3, 0x5e, 0x08, 0xf0, 0xd8, 0x2d, 0xe6, 0x52, 0x48, 0xdf, + 0xd0, 0x03, 0x04, 0x92, 0xab, 0xa1, 0xa1, 0x2f, 0x7d, 0x84, 0xb2, 0x82, + 0x51, 0x56, 0x74, 0x4a, 0x94, 0xff, 0xd2, 0xe4, 0x4e, 0x1a, 0xbd, 0x18, + 0xab, 0x33, 0x68, 0x0e, 0x4f, 0x99, 0x1d, 0x7e, 0x02, 0x3f, 0x1f, 0x50, + 0x05, 0xf8, 0x59, 0x47, 0x97, 0x98, 0x60, 0xb1, 0x30, 0xb1, 0x14, 0xac, + 0x2c, 0x0a, 0xa8, 0x97, 0x83, 0xf5, 0x5a, 0x5c, 0x87, 0xe5, 0x36, 0x26, + 0xec, 0xb4, 0x94, 0x46, 0x9a, 0xad, 0x2b, 0x9a, 0xb7, 0xac, 0xc4, 0x1a, + 0x55, 0x53, 0xc0, 0x16, 0x91, 0x1c, 0xd6, 0xaa, 0x6b, 0xdd, 0x85, 0x6a, + 0x54, 0xec, 0x7c, 0xa1, 0xd5, 0x18, 0x00, 0x74, 0xd2, 0xf1, 0x7e, 0xad, + 0x7c, 0xa8, 0x85, 0x9b, 0xc0, 0x9f, 0x4f, 0x3b, 0xd9, 0x08, 0xc8, 0x9d, + 0x31, 0x22, 0x7a, 0x53, 0xa8, 0xbd, 0x00, 0xdf, 0xe8, 0x39, 0x52, 0xe9, + 0x14, 0x74, 0x7b, 0x53, 0xf9, 0xbd, 0x29, 0x8e, 0x5d, 0xf2, 0x35, 0x3b, + 0xe3, 0x48, 0xbf, 0xa0, 0xc4, 0x3d, 0x40, 0xb4, 0xf2, 0x7c, 0xd0, 0xe3, + 0x17, 0x11, 0x5b, 0xd6, 0x55, 0xd2, 0x54, 0xcf, 0x20, 0x8d, 0x74, 0x4a, + 0x6b, 0xe9, 0x5d, 0xfe, 0x72, 0x14, 0x6a, 0x11, 0x8b, 0x14, 0x19, 0xba, + 0x63, 0xe4, 0x6b, 0x39, 0xb4, 0x90, 0x67, 0x79, 0x56, 0x31, 0xd3, 0xb5, + 0xeb, 0x9e, 0x95, 0x4b, 0x1e, 0x04, 0x20, 0xd8, 0xbe, 0xe8, 0x1c, 0xd7, + 0x95, 0xcb, 0x57, 0x60, 0xe6, 0x11, 0x35, 0x42, 0x90, 0xfd, 0xb2, 0xe4, + 0x9b, 0x24, 0x70, 0xc0, 0xc3, 0xa9, 0x8a, 0xc9, 0x46, 0xd0, 0xea, 0xc9, + 0x93, 0x7d, 0x9f, 0x64, 0x12, 0x54, 0x09, 0xb7, 0xc2, 0x4d, 0x6e, 0xcc, + 0x60, 0x07, 0x36, 0x31, 0x64, 0x3d, 0x1e, 0xd3, 0x86, 0x47, 0x47, 0x42, + 0x76, 0xb6, 0xf0, 0xe5, 0xb4, 0xe7, 0xbe, 0x47, 0x91, 0x78, 0xbe, 0x06, + 0xf1, 0x6e, 0x58, 0xce, 0x32, 0x13, 0x26, 0x34, 0x92, 0xae, 0xb2, 0x29, + 0xd0, 0x30, 0x55, 0xfd, 0x89, 0x6a, 0xbf, 0x3e, 0xdf, 0x11, 0x39, 0xe4, + 0xfd, 0x56, 0xd7, 0x2f, 0x89, 0x96, 0x08, 0x54, 0xaa, 0xab, 0x8b, 0xfa, + 0x65, 0xe5, 0x64, 0xff, 0x24, 0x25, 0x8f, 0x7d, 0xf6, 0xb1, 0x7f, 0x2f, + 0xa6, 0xf6, 0x46, 0xab, 0x61, 0xfd, 0x47, 0xad, 0x6d, 0x38, 0x6d, 0xc1, + 0xe9, 0x4a, 0xf1, 0x85, 0x05, 0x0e, 0x69, 0x48, 0x7c, 0xa6, 0x76, 0x61, + 0xe3, 0x94, 0xf2, 0xd6, 0x7a, 0x9c, 0x79, 0xc0, 0x2a, 0x51, 0x23, 0xc6, + 0xaf, 0x29, 0x04, 0x0f, 0x47, 0xc2, 0x93, 0xd7, 0x64, 0xe5, 0x37, 0x2e, + 0x53, 0x3b, 0xb7, 0x7c, 0x9c, 0xb4, 0x63, 0x13, 0xc7, 0x56, 0x90, 0xe9, + 0x53, 0xd5, 0x86, 0x2b, 0x96, 0x41, 0x42, 0x56, 0xc5, 0x16, 0xd7, 0x9e, + 0x30, 0xce, 0xa1, 0x0d, 0x93, 0x5d, 0x11, 0x07, 0xb2, 0x95, 0xfd, 0xf6, + 0x0b, 0x28, 0x95, 0x1a, 0x8f, 0xfa, 0xe1, 0x57, 0x7e, 0x06, 0xff, 0x18, + 0xaf, 0xe3, 0x4f, 0x3c, 0x34, 0x5b, 0xd4, 0x46, 0x1a, 0xd1, 0xd1, 0x7e, + 0x55, 0xba, 0x5d, 0x2a, 0x1f, 0x42, 0x49, 0x95, 0x75, 0x5f, 0x80, 0x60, + 0x02, 0x01, 0xdb, 0x36, 0xad, 0x68, 0x69, 0x1e, 0x0b, 0x90, 0x3f, 0xa6, + 0xb6, 0x2f, 0x66, 0xa6, 0x7d, 0x81, 0x8c, 0xa0, 0xee, 0x05, 0x95, 0xbc, + 0xb3, 0x7c, 0x18, 0xd4, 0x1b, 0x40, 0x96, 0xf5, 0x05, 0x9d, 0x27, 0x3b, + 0x78, 0xfc, 0x19, 0x18, 0xc0, 0x61, 0xa0, 0xd6, 0xf9, 0xc0, 0x3f, 0xe5, + 0x48, 0x35, 0x0f, 0x8b, 0x0d, 0xfb, 0x31, 0xb7, 0x32, 0x40, 0x1d, 0x69, + 0x12, 0x5a, 0x23, 0xf0, 0xce, 0xe9, 0x5e, 0xa6, 0x68, 0x6b, 0xe1, 0xe2, + 0x68, 0x07, 0x02, 0x0d, 0x7a, 0xc2, 0x0a, 0x40, 0x10, 0x5e, 0x94, 0xba, + 0x77, 0x1d, 0xf7, 0xac, 0xec, 0x79, 0xa9, 0xa1, 0x8a, 0xb8, 0x49, 0x32, + 0x08, 0xe0, 0x18, 0xa8, 0x3d, 0x69, 0x41, 0x5d, 0x30, 0x3b, 0xb6, 0x91, + 0x46, 0x8d, 0x81, 0x10, 0xb0, 0xc2, 0xed, 0xa0, 0x4e, 0x59, 0x48, 0xd8, + 0x64, 0x7d, 0x2d, 0x46, 0xf2, 0x8a, 0x2e, 0x5d, 0x0c, 0x4d, 0x9f, 0xfe, + 0x7b, 0x5e, 0xbf, 0x1a, 0x78, 0xdf, 0xfc, 0x0f, 0x04, 0x37, 0x72, 0x1a, + 0x09, 0xb8, 0x6e, 0x1b, 0xf1, 0x18, 0x7d, 0x83, 0x44, 0xaa, 0x9b, 0x71, + 0xe1, 0x03, 0x04, 0x83, 0xe5, 0xaa, 0xc0, 0xd4, 0xa7, 0x80, 0x10, 0x35, + 0x09, 0xae, 0xf7, 0xe1, 0x5e, 0x7c, 0x31, 0x20, 0x43, 0x82, 0xda, 0x07, + 0x39, 0xfe, 0x8f, 0x9d, 0x70, 0x3c, 0x57, 0x43, 0x01, 0x51, 0x37, 0x2e, + 0x97, 0xef, 0xcf, 0x05, 0x44, 0x75, 0x69, 0xf7, 0xdb, 0xda, 0x80, 0x78, + 0x0c, 0xcc, 0xc1, 0x49, 0xac, 0x3b, 0x7e, 0x27, 0x6a, 0xbb, 0xdf, 0x45, + 0x5b, 0x3b, 0x29, 0xf6, 0x1b, 0xa9, 0x25, 0xf9, 0x2f, 0xcf, 0x37, 0x71, + 0x33, 0xb4, 0x90, 0xd7, 0x9b, 0x87, 0x41, 0x15, 0xd1, 0xa6, 0x39, 0xa7, + 0xa9, 0xcd, 0x66, 0x29, 0x59, 0xb4, 0x53, 0x12, 0xa1, 0x20, 0xd5, 0x04, + 0xca, 0x40, 0x31, 0xfa, 0x6f, 0xbb, 0x92, 0x04, 0xf3, 0xc2, 0x10, 0x0d, + 0xc1, 0x19, 0x78, 0x8c, 0x82, 0xed, 0x92, 0x3a, 0x6b, 0xd1, 0x3d, 0xe8, + 0xac, 0x55, 0xe4, 0x8c, 0xc6, 0xd4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0xc2, 0x1d, 0x86, 0xe4, 0xf6, 0xa1, 0xbe, 0xf5, + 0xf3, 0x36, 0x9d, 0x32, 0x80, 0x17, 0x3b, 0x1f, 0x18, 0x21, 0xed, 0xa7, + 0xf5, 0xaf, 0xf1, 0x94, 0xe2, 0xa7, 0x08, 0xd5, 0xca, 0x18, 0x45, 0xf5, + 0x68, 0x94, 0x82, 0x61, 0xf7, 0xb7, 0xb2, 0xfa, 0xd4, 0x5e, 0x32, 0xd0, + 0xf0, 0x20, 0x66, 0x83, 0xd1, 0x6b, 0x3c, 0xdf, 0x73, 0xeb, 0x73, 0x82, + 0x09, 0x9b, 0xd0, 0xc5, 0xb0, 0x9f, 0x01, 0x77, 0x85, 0xcc, 0x6e, 0x23, + 0xb7, 0x00, 0x45, 0xe0, 0xa6, 0x01, 0x29, 0x1d, 0x8b, 0xc4, 0xe0, 0xc2, + 0xe0, 0x4f, 0x3b, 0x07, 0xd5, 0xac, 0x6b, 0x88, 0xb8, 0xa4, 0xe2, 0x5c, + 0x19, 0xe9, 0x98, 0x72, 0xa5, 0x6b, 0xf5, 0xa4, 0xf7, 0x15, 0xaf, 0xfb, + 0xb4, 0x80, 0x9a, 0xe3, 0xa5, 0x35, 0x2f, 0x45, 0x81, 0xf1, 0x8b, 0x2d, + 0x26, 0x5c, 0x65, 0xa9, 0x5b, 0x6e, 0x83, 0xc3, 0x62, 0x2f, 0x84, 0xef, + 0x11, 0xa5, 0x58, 0x48, 0xe9, 0x67, 0x7e, 0xd3, 0x0b, 0x5d, 0x51, 0x80, + 0x39, 0x08, 0x8e, 0xc1, 0x0d, 0x04, 0x11, 0x5f, 0x72, 0x64, 0x1f, 0x83, + 0xf8, 0xd3, 0x09, 0x38, 0xb6, 0x7f, 0x50, 0x78, 0x27, 0x20, 0xe5, 0xbd, + 0x16, 0xbf, 0x51, 0xd8, 0x4f, 0x67, 0x60, 0xf6, 0x9e, 0xff, 0x08, 0xfe, + 0xc6, 0x96, 0xd6, 0x64, 0x94, 0x28, 0xc6, 0x9a, 0x09, 0x1a, 0x34, 0x08, + 0x31, 0x4b, 0x0b, 0x97, 0x5a, 0x18, 0x72, 0x49, 0xe9, 0x1d, 0xbb, 0x9c, + 0xed, 0x7e, 0xb5, 0xc5, 0xa7, 0xf4, 0x25, 0x7a, 0x26, 0xe9, 0x15, 0x61, + 0x85, 0x32, 0xc9, 0xb3, 0xcf, 0x95, 0xbf, 0x35, 0x10, 0x2d, 0x71, 0xfe, + 0x03, 0xd6, 0x69, 0x75, 0x8d, 0xb7, 0x16, 0xa7, 0x3d, 0x0e, 0xb7, 0x55, + 0x6d, 0xa7, 0x9f, 0x10, 0x7e, 0x7e, 0xff, 0x39, 0xee, 0x8e, 0xa7, 0x81, + 0x7d, 0x11, 0xea, 0xa9, 0xd6, 0xed, 0x54, 0xf8, 0xd2, 0xd5, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0xf9, 0xde, 0x41, 0xe7, + 0xa6, 0x88, 0x53, 0x76, 0x5a, 0x26, 0xc3, 0x5c, 0xf2, 0x58, 0x68, 0x9c, + 0xc7, 0x4e, 0x53, 0x18, 0x53, 0x67, 0x39, 0x23, 0x96, 0xb0, 0xef, 0x58, + 0x29, 0xe1, 0x68, 0xd8, 0xce, 0xc0, 0x41, 0xc2, 0x35, 0x5f, 0x74, 0xfa, + 0xdf, 0xc7, 0x0f, 0x80, 0x50, 0xd1, 0xf6, 0x5a, 0x3a, 0x81, 0xe0, 0xd9, + 0x9b, 0x47, 0x96, 0xcd, 0xc5, 0x0f, 0x91, 0x12, 0x81, 0x77, 0x1e, 0xef, + 0x2e, 0xba, 0x16, 0x51, 0x70, 0x78, 0xdc, 0xa3, 0x84, 0x12, 0x7c, 0x9e, + 0x21, 0x7d, 0xa3, 0x5f, 0xce, 0xa1, 0x25, 0x84, 0x99, 0xa4, 0x2d, 0xa6, + 0x0f, 0x95, 0xef, 0xef, 0x31, 0xe6, 0xf2, 0x18, 0x08, 0x47, 0xd2, 0x5a, + 0x39, 0x01, 0x7a, 0xca, 0xd3, 0x03, 0xb1, 0xc2, 0x48, 0xf4, 0x1f, 0x6d, + 0xc2, 0x8c, 0x5c, 0xda, 0xf5, 0x10, 0xed, 0xfc, 0x2e, 0x0c, 0xb3, 0x52, + 0xaa, 0xa9, 0xed, 0xbc, 0x41, 0xcc, 0xd4, 0x4b, 0x1c, 0xd0, 0xa3, 0x1d, + 0xf4, 0xe7, 0x48, 0x34, 0x4e, 0xcf, 0x3b, 0xb3, 0x71, 0x06, 0xbe, 0x0c, + 0x35, 0xbb, 0xb4, 0x17, 0xd8, 0x8b, 0xba, 0xdd, 0x32, 0x30, 0x51, 0xb1, + 0xb1, 0xd6, 0x3a, 0xdc, 0x3b, 0x25, 0x9a, 0x57, 0xc7, 0x4d, 0xd3, 0x75, + 0x93, 0x59, 0x3e, 0x9b, 0x10, 0xcf, 0xdb, 0x38, 0x75, 0x51, 0xb2, 0x2a, + 0x48, 0x78, 0xfc, 0xaa, 0xe3, 0x91, 0xe7, 0x93, 0xe7, 0x0a, 0x07, 0x2c, + 0xf8, 0x88, 0x93, 0xde, 0x2f, 0xba, 0x7b, 0x72, 0xcd, 0x92, 0xdd, 0xb1, + 0xac, 0x1e, 0xe4, 0xe3, 0x5d, 0xa4, 0x7f, 0x86, 0xa7, 0xcb, 0xb5, 0x81, + 0x86, 0xf1, 0xf5, 0xad, 0xd6, 0x36, 0x08, 0x09, 0x9f, 0x75, 0x6f, 0x4a, + 0x5b, 0x30, 0xf8, 0xaf, 0xd2, 0xbc, 0xb5, 0xbe, 0xf2, 0xeb, 0x9b, 0xbc, + 0x11, 0xd4, 0x0c, 0x14, 0xa6, 0x6f, 0x43, 0xd3, 0xc9, 0x4e, 0xca, 0x9b, + 0x4e, 0x46, 0x60, 0x4c, 0x63, 0xcc, 0x07, 0x36, 0x8c, 0xf2, 0xd1, 0x93, + 0x7a, 0x51, 0x49, 0x15, 0xbf, 0xbf, 0x9e, 0x82, 0x21, 0x06, 0xa0, 0x39, + 0x11, 0x1d, 0x6c, 0x41, 0x72, 0xcd, 0x2a, 0x8a, 0x4a, 0xd0, 0x13, 0x6c, + 0x56, 0xf4, 0x00, 0x48, 0xaf, 0xab, 0xdf, 0xa9, 0xe9, 0xa6, 0xaa, 0x06, + 0x61, 0x79, 0xc4, 0x57, 0x42, 0xca, 0x12, 0x18, 0xcf, 0x81, 0xec, 0x79, + 0x19, 0xd2, 0xd2, 0xe3, 0x1d, 0xc6, 0x6c, 0xd0, 0xd6, 0x0a, 0xfb, 0x70, + 0x42, 0x28, 0x25, 0x23, 0xb6, 0x23, 0x15, 0x28, 0x5e, 0x9f, 0x49, 0xf2, + 0x7b, 0x69, 0x74, 0xa5, 0xb9, 0x26, 0x81, 0xfe, 0x39, 0x3e, 0x3f, 0xc8, + 0x7e, 0x9e, 0x5e, 0x8e, 0xf2, 0xdb, 0x6b, 0xfd, 0xe1, 0xc3, 0x01, 0x4a, + 0xba, 0x8f, 0x33, 0x71, 0x09, 0x80, 0x5d, 0x9c, 0x58, 0x64, 0xb7, 0x90, + 0x13, 0x2a, 0xe9, 0x1d, 0x07, 0x2c, 0x06, 0x70, 0x43, 0x0d, 0xb6, 0x57, + 0x02, 0x3c, 0xbe, 0x3c, 0x42, 0xab, 0x77, 0x15, 0x0e, 0x98, 0xfb, 0xf2, + 0x1d, 0x14, 0xd9, 0xb8, 0xd1, 0x59, 0x2a, 0x67, 0x6f, 0xfc, 0x59, 0x39, + 0x33, 0xe0, 0x49, 0x0b, 0x4e, 0x65, 0x81, 0x9f, 0x71, 0xf2, 0xa5, 0x90, + 0x4f, 0x24, 0xc7, 0x05, 0xfb, 0x77, 0x1e, 0x14, 0xca, 0x2f, 0xfc, 0xac, + 0xec, 0xbf, 0xa2, 0x69, 0x15, 0x0a, 0x6b, 0xa9, 0xa0, 0x74, 0xee, 0xad, + 0xa9, 0x50, 0x4d, 0x4d, 0xab, 0x6e, 0xc1, 0xb3, 0xda, 0xbb, 0xbd, 0xab, + 0x00, 0x05, 0x14, 0xc1, 0xc4, 0x53, 0x7b, 0x78, 0x97, 0x68, 0x3c, 0x05, + 0xf2, 0xed, 0x87, 0xca, 0x86, 0xd1, 0xdf, 0xda, 0xb3, 0x2f, 0x17, 0x87, + 0x87, 0x2f, 0xd8, 0xe9, 0xb2, 0x96, 0xdc, 0x7f, 0x22, 0xf1, 0x2a, 0x9f, + 0xfe, 0x54, 0x55, 0xa1, 0x96, 0xab, 0x9f, 0x61, 0x74, 0xcd, 0x4d, 0x77, + 0x38, 0x02, 0x23, 0x29, 0x28, 0x5b, 0xfc, 0x86, 0x17, 0x40, 0xd4, 0x42, + 0x2a, 0x9b, 0x84, 0xf7, 0x67, 0x2b, 0x3a, 0xc1, 0x31, 0x89, 0x4b, 0x67, + 0xd1, 0x7d, 0x6b, 0x36, 0xec, 0x69, 0x6b, 0x24, 0xca, 0xd6, 0x2d, 0xbb, + 0x21, 0xc8, 0x0c, 0x53, 0x41, 0x29, 0x0b, 0xc1, 0xfe, 0xd5, 0xa3, 0x4c, + 0x66, 0x2f, 0xc7, 0xf1, 0xa8, 0xc0, 0x3d, 0x9a, 0xb9, 0x09, 0x50, 0x3f, + 0x09, 0x87, 0xa4, 0x3f, 0x7a, 0x33, 0xef, 0xf0, 0xfb, 0x77, 0x02, 0x7d, + 0x92, 0xaf, 0x73, 0xaa, 0xcc, 0x3f, 0x66, 0x56, 0xd0, 0x21, 0xd1, 0xe8, + 0x0e, 0x47, 0x03, 0x5e, 0x3b, 0xe9, 0xa2, 0xe3, 0x83, 0x0b, 0x73, 0xd3, + 0xaa, 0x94, 0x80, 0xef, 0x7c, 0xdf, 0xde, 0x86, 0xc3, 0xa9, 0x62, 0x34, + 0x76, 0xee, 0x4d, 0x15, 0x73, 0x7b, 0xd7, 0x6d, 0xd4, 0x21, 0x05, 0xd4, + 0xcf, 0xf3, 0x54, 0xdc, 0x49, 0x5f, 0x5a, 0x2a, 0x37, 0x19, 0x89, 0x61, + 0x1d, 0x95, 0x17, 0x8b, 0x09, 0x95, 0x5d, 0x9f, 0xde, 0x86, 0x03, 0x93, + 0x76, 0xec, 0x54, 0xec, 0x13, 0xc3, 0xf9, 0x38, 0x8f, 0xa9, 0x11, 0xf0, + 0x9a, 0x0e, 0x5e, 0x38, 0x69, 0xeb, 0x62, 0x41, 0x9e, 0xd0, 0x1b, 0x59, + 0x8c, 0xfd, 0x16, 0xfa, 0xd8, 0x99, 0x0d, 0x83, 0x7e, 0xba, 0x5b, 0xc6, + 0x59, 0xe1, 0xae, 0xba, 0xb9, 0xb8, 0xba, 0xa5, 0x4d, 0x20, 0x00, 0xc9, + 0x0c, 0xe1, 0x77, 0xdf, 0xc4, 0x95, 0xca, 0x7c, 0xa5, 0xef, 0x0a, 0xed, + 0x9b, 0x31, 0x06, 0xe1, 0xc9, 0xa3, 0x88, 0x0a, 0xcc, 0x3d, 0xc8, 0xb6, + 0x01, 0xe2, 0xa9, 0x29, 0x03, 0x8a, 0x28, 0xf8, 0x0d, 0x70, 0x77, 0xb9, + 0xe1, 0x1b, 0x06, 0x19, 0x86, 0xc1, 0xd3, 0xcf, 0x6b, 0x9c, 0x09, 0x70, + 0x50, 0xed, 0xb5, 0xf6, 0x69, 0xcc, 0xac, 0x30, 0x6a, 0x1f, 0x1d, 0xe6, + 0x75, 0x33, 0xab, 0x55, 0x48, 0xfa, 0x81, 0xb8, 0x06, 0x3a, 0x78, 0xee, + 0xde, 0xef, 0xe2, 0x17, 0xc4, 0x3e, 0xe5, 0x22, 0xa7, 0xd1, 0x45, 0x5b, + 0x57, 0xb0, 0xde, 0x69, 0x30, 0xd1, 0x9a, 0xd7, 0x6b, 0x0e, 0x7a, 0x30, + 0x0d, 0xb5, 0xec, 0x60, 0xa7, 0x05, 0x87, 0x42, 0x4b, 0x92, 0x1f, 0x68, + 0x8e, 0x1a, 0x90, 0x84, 0x27, 0x2a, 0xc0, 0xd2, 0xff, 0xbc, 0x8e, 0x34, + 0x53, 0x9d, 0x04, 0x50, 0xcb, 0x79, 0xd9, 0x55, 0xd5, 0x4d, 0x3c, 0xe2, + 0xb4, 0x9b, 0x57, 0x07, 0x1f, 0xce, 0xd0, 0xa7, 0x84, 0xe1, 0xb7, 0x3a, + 0xaf, 0xc5, 0x67, 0x64, 0xbc, 0x02, 0xbe, 0xb0, 0x65, 0x7e, 0xb0, 0x4c, + 0xc2, 0x2d, 0xcd, 0xf8, 0x60, 0xcb, 0xfe, 0xd1, 0x8d, 0x14, 0x5a, 0xd3, + 0x38, 0xd4, 0x71, 0x5a, 0xca, 0xbb, 0xfe, 0x0e, 0x54, 0xf9, 0xb4, 0x25, + 0xa5, 0x71, 0x13, 0x95, 0x14, 0xdc, 0x86, 0xb8, 0x21, 0xa7, 0x2e, 0x13, + 0xc6, 0x2f, 0xce, 0xe7, 0x6c, 0xb8, 0x0d, 0xc9, 0xe4, 0xc4, 0x64, 0x12, + 0x78, 0x1c, 0x95, 0x92, 0xc2, 0xec, 0xaa, 0xd3, 0xc3, 0x3a, 0xd2, 0xe8, + 0x95, 0xf0, 0x6b, 0x03, 0x8c, 0xcf, 0x6b, 0xdb, 0x21, 0xa0, 0xcf, 0xf4, + 0x05, 0xc8, 0xe7, 0x77, 0x05, 0x55, 0x7b, 0x6b, 0xfa, 0x96, 0xf1, 0x7c, + 0x30, 0x62, 0x75, 0xbe, 0x6e, 0xea, 0xba, 0x9f, 0x40, 0x2e, 0x9a, 0x86, + 0x93, 0xcc, 0x38, 0xf7, 0xee, 0xd8, 0xbb, 0x24, 0xcd, 0x85, 0x3e, 0x85, + 0x16, 0x8c, 0x33, 0x23, 0x73, 0xe6, 0x43, 0xc4, 0x67, 0xbf, 0xef, 0x85, + 0xb1, 0x44, 0xf9, 0x55, 0x93, 0x4d, 0x0b, 0x8e, 0xc1, 0x42, 0x13, 0xc6, + 0xc8, 0x09, 0x63, 0xab, 0xb3, 0xc7, 0xc4, 0xa4, 0x8b, 0x72, 0xfb, 0xa5, + 0x99, 0xa1, 0x5d, 0x07, 0x02, 0x82, 0x56, 0x11, 0x3c, 0xc2, 0x5a, 0x55, + 0xf9, 0x3a, 0x93, 0x61, 0x89, 0x46, 0xb7, 0x6a, 0x42, 0x76, 0x1e, 0x70, + 0xde, 0xd9, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x32, 0xc1, 0x61, 0xaa, 0xdb, 0xe9, 0xae, 0x88, 0xcb, 0xf7, 0x28, 0xdd, + 0x82, 0x62, 0x61, 0x41, 0x4e, 0xbb, 0xf9, 0xb7, 0xe8, 0x81, 0x99, 0x18, + 0xe2, 0xa7, 0xb4, 0x7c, 0xb7, 0x08, 0x44, 0x6f, 0x24, 0xb3, 0xda, 0x57, + 0x62, 0x29, 0xc7, 0xa6, 0x84, 0xb1, 0x5d, 0xc5, 0x00, 0x4c, 0x30, 0x16, + 0xf0, 0x0a, 0x74, 0x73, 0xec, 0xaf, 0xb5, 0xde, 0xb0, 0xa7, 0x75, 0x22, + 0x8f, 0x9e, 0x43, 0x01, 0x68, 0xae, 0x91, 0xeb, 0x46, 0x52, 0x3f, 0x2c, + 0x4e, 0xc5, 0xd0, 0xc8, 0x15, 0xea, 0x99, 0xc2, 0x37, 0x5b, 0x68, 0xb5, + 0xce, 0x41, 0x92, 0xbf, 0xd6, 0xdb, 0x85, 0xad, 0x08, 0xd1, 0x11, 0x93, + 0xe8, 0xd4, 0x78, 0x43, 0x3b, 0x7d, 0xcb, 0x42, 0x84, 0xf3, 0x61, 0x88, + 0x9e, 0x6a, 0x73, 0xb9, 0x78, 0x17, 0x9a, 0x9f, 0xfb, 0x97, 0xcb, 0xd6, + 0xb5, 0x3f, 0x00, 0x41, 0xb0, 0x30, 0x2f, 0x6f, 0x89, 0xdd, 0xfa, 0x13, + 0xd1, 0x07, 0xbe, 0x2f, 0xea, 0x91, 0x62, 0xaa, 0xed, 0xcb, 0xfd, 0x07, + 0x82, 0xbb, 0x3f, 0xf4, 0xa6, 0x94, 0x66, 0x71, 0x20, 0x61, 0xac, 0x84, + 0x04, 0x70, 0xf2, 0xd3, 0xdf, 0xac, 0x44, 0xfd, 0x47, 0x26, 0x81, 0x64, + 0xb3, 0xa6, 0x90, 0x2b, 0xd2, 0x2c, 0xd0, 0x77, 0x81, 0x53, 0x45, 0x78, + 0x5f, 0x30, 0x77, 0x91, 0x83, 0x13, 0x33, 0xd1, 0x91, 0xa6, 0x35, 0x21, + 0xcb, 0x26, 0x54, 0x0a, 0xf7, 0x70, 0x5e, 0xdb, 0xd8, 0x92, 0xc7, 0xdf, + 0xf9, 0x2a, 0x46, 0x91, 0x22, 0x3b, 0xe6, 0xe1, 0x91, 0xeb, 0xa6, 0x78, + 0x81, 0x57, 0xf3, 0x04, 0xdf, 0x34, 0x55, 0x74, 0x0a, 0xfe, 0xf2, 0xbd, + 0xb3, 0xeb, 0xa3, 0x8e, 0x71, 0x15, 0xa9, 0x2f, 0x53, 0xe2, 0xa1, 0x45, + 0xdf, 0xe8, 0x29, 0x40, 0xf1, 0x4b, 0x23, 0xdb, 0x8e, 0xee, 0x19, 0xa8, + 0xd4, 0x15, 0x90, 0x8c, 0x04, 0x46, 0x81, 0x49, 0x92, 0xe5, 0xe1, 0xfe, + 0x99, 0x06, 0xfc, 0x3e, 0x43, 0x58, 0x3b, 0x19, 0x7f, 0xd2, 0x13, 0x65, + 0xc2, 0x64, 0x27, 0x6d, 0x93, 0x6a, 0xcf, 0x48, 0x2a, 0x3d, 0xdd, 0x79, + 0x9f, 0x05, 0x32, 0xeb, 0xfd, 0xb4, 0xd2, 0x1d, 0x16, 0x61, 0x3d, 0x17, + 0x4c, 0xb8, 0xad, 0x63, 0x0e, 0x6b, 0x8a, 0x4a, 0x34, 0x4c, 0xb5, 0x3c, + 0x0f, 0x05, 0x28, 0x8c, 0x8b, 0xdf, 0xf4, 0xa0, 0x49, 0xbf, 0x34, 0x6c, + 0x6a, 0x5f, 0x40, 0x95, 0x48, 0x4b, 0x93, 0x1e, 0x61, 0x6d, 0x58, 0xc3, + 0x86, 0x98, 0x70, 0x11, 0x4e, 0x44, 0x65, 0xc1, 0x0d, 0xea, 0x2f, 0xda, + 0x38, 0x16, 0xbd, 0xd4, 0x7b, 0x3e, 0x31, 0xee, 0x42, 0x4c, 0xdc, 0xe9, + 0x8b, 0x1f, 0xa9, 0xcf, 0xab, 0x60, 0xb5, 0xb1, 0xd2, 0xf2, 0x6a, 0xe9, + 0xbc, 0xcc, 0xcb, 0x60, 0x4a, 0xca, 0x70, 0x79, 0x64, 0x9d, 0x07, 0x1e, + 0xdb, 0xef, 0x34, 0xaf, 0x17, 0x93, 0x6b, 0x60, 0x73, 0x2d, 0x8c, 0x08, + 0x27, 0x1e, 0x46, 0x9f, 0xcb, 0x33, 0xdd, 0x76, 0xef, 0x17, 0x58, 0x9a, + 0x5f, 0x82, 0x78, 0x0f, 0xbf, 0xe7, 0x0f, 0x3a, 0x1e, 0xa8, 0x30, 0xbf, + 0xff, 0xc7, 0xc7, 0x82, 0x8b, 0xc3, 0x65, 0x04, 0xfd, 0x45, 0xc9, 0x88, + 0x99, 0x8e, 0x44, 0xc5, 0x23, 0x1e, 0xbf, 0xf1, 0x95, 0x70, 0x35, 0xe6, + 0x56, 0x4a, 0x53, 0xb2, 0xac, 0x0c, 0xfd, 0xf5, 0x61, 0x26, 0x5b, 0x70, + 0xd6, 0x4c, 0xfc, 0x0f, 0xcc, 0x53, 0x6e, 0x25, 0xca, 0x1d, 0x0c, 0x56, + 0xf7, 0x9c, 0x95, 0xf6, 0x3c, 0x08, 0x0c, 0x64, 0xb1, 0x1c, 0x5c, 0xe6, + 0x25, 0xa4, 0xa3, 0xb7, 0xaf, 0x8b, 0xbc, 0xe1, 0x68, 0xdf, 0x10, 0xab, + 0xbb, 0xd5, 0x30, 0x64, 0x42, 0xf6, 0xe6, 0x9a, 0xb5, 0x59, 0x12, 0x76, + 0x92, 0xac, 0x29, 0xe9, 0x45, 0xdb, 0x2e, 0x62, 0x22, 0x58, 0x24, 0x89, + 0xc8, 0x6a, 0x2a, 0xa7, 0x3f, 0x04, 0x53, 0x4e, 0x07, 0x41, 0x4e, 0x5f, + 0x95, 0x5f, 0x6e, 0x14, 0x5b, 0xa7, 0xa7, 0xd3, 0x5a, 0xa2, 0x95, 0x4a, + 0xc8, 0xe9, 0x3c, 0x5a, 0x84, 0x50, 0xbc, 0xe1, 0x9c, 0x7a, 0x16, 0xe5, + 0xc7, 0x04, 0x9d, 0x60, 0x2e, 0x7d, 0xb3, 0x77, 0x5d, 0x86, 0x2e, 0xac, + 0x57, 0x2a, 0x31, 0x26, 0x23, 0x6e, 0xcc, 0x7f, 0xb8, 0x36, 0x29, 0xa9, + 0xa8, 0xd9, 0xc6, 0x75, 0xee, 0x16, 0x23, 0x27, 0x0f, 0xe1, 0xb0, 0x3d, + 0x91, 0x3a, 0x26, 0x4a, 0x60, 0x72, 0x14, 0xf9, 0x3c, 0x66, 0x66, 0xe8, + 0x7d, 0x4a, 0x6f, 0x7e, 0x63, 0x58, 0x6a, 0x28, 0x78, 0x50, 0xef, 0x3b, + 0x9d, 0xeb, 0xb6, 0x4b, 0x5d, 0x55, 0x80, 0x84, 0x97, 0x9b, 0x74, 0x4b, + 0x5c, 0x09, 0x1d, 0xe7, 0x57, 0xfc, 0x40, 0x3f, 0xa9, 0xbd, 0xdf, 0x61, + 0x2a, 0x89, 0x62, 0x51, 0xfc, 0x24, 0xee, 0xee, 0x97, 0x10, 0xca, 0xb6, + 0x0e, 0x8e, 0x71, 0x67, 0x2a, 0x79, 0x4f, 0xc4, 0xe6, 0x3e, 0x27, 0xc2, + 0x9b, 0x85, 0xfd, 0xde, 0xfb, 0x58, 0x75, 0xf3, 0x1c, 0x31, 0xa2, 0x56, + 0x3e, 0xdc, 0x24, 0xf4, 0x4f, 0xcb, 0x5a, 0x1a, 0x77, 0x5c, 0x28, 0xd1, + 0x5a, 0x55, 0xa9, 0x8c, 0xb5, 0xdd, 0x77, 0x93, 0x58, 0xd8, 0x2f, 0x7d, + 0x5a, 0x67, 0xa1, 0x95, 0x0a, 0xd2, 0x6a, 0x93, 0xa6, 0xf0, 0x5f, 0x7f, + 0x0a, 0x29, 0xdb, 0x1d, 0x8c, 0xa7, 0x12, 0x0a, 0xf4, 0xc9, 0xcd, 0x70, + 0xd1, 0xbd, 0x48, 0xd4, 0x9a, 0xbb, 0xbb, 0x24, 0xbf, 0x52, 0x25, 0xb9, + 0x75, 0xc2, 0x17, 0x36, 0x6f, 0x4a, 0xc0, 0x53, 0x6d, 0x38, 0xfb, 0x7a, + 0x60, 0xc8, 0x5d, 0x03, 0xc1, 0x1c, 0x0c, 0x31, 0xf0, 0x59, 0xed, 0x0a, + 0x5f, 0x84, 0xf2, 0x89, 0x6c, 0xb4, 0xd5, 0x24, 0x2d, 0x2a, 0xda, 0xbe, + 0x74, 0x1d, 0x22, 0xe2, 0xc6, 0xf0, 0x9b, 0x98, 0x5a, 0x41, 0x11, 0x4c, + 0x51, 0x97, 0x16, 0xa7, 0xc9, 0xd8, 0x53, 0x12, 0x53, 0xdd, 0x22, 0xa9, + 0xf2, 0xae, 0x52, 0x49, 0x02, 0xf9, 0x5c, 0x78, 0x00, 0xa2, 0x64, 0xff, + 0x91, 0x62, 0x20, 0x6a, 0x87, 0x6a, 0x40, 0x01, 0x85, 0x30, 0xf5, 0xdd, + 0xa7, 0x64, 0x0a, 0x85, 0x8d, 0x37, 0x99, 0xcb, 0x03, 0xc8, 0x29, 0x56, + 0x7e, 0x75, 0x4f, 0xa1, 0xc3, 0x76, 0xce, 0xdb, 0xa3, 0xb4, 0x7e, 0x91, + 0x95, 0xbe, 0x53, 0x0e, 0x20, 0xc9, 0xe7, 0x71, 0x78, 0xad, 0x3d, 0x4c, + 0xbb, 0x59, 0xb9, 0x77, 0xcf, 0x7d, 0x7b, 0xff, 0x15, 0xdb, 0x1d, 0xae, + 0x1f, 0xbe, 0x33, 0x88, 0x01, 0x04, 0x95, 0xe5, 0xe9, 0x6a, 0x1c, 0xbf, + 0xc8, 0xc3, 0x33, 0x3b, 0xd8, 0x2f, 0x75, 0x4a, 0xc3, 0x6f, 0x09, 0x88, + 0x26, 0x46, 0x90, 0x89, 0x53, 0x12, 0x27, 0xc2, 0x7d, 0x23, 0x6b, 0xc4, + 0xe3, 0x0a, 0x0f, 0xc2, 0x86, 0x6d, 0x20, 0x35, 0x82, 0x33, 0xec, 0xdd, + 0xa7, 0x6a, 0xc3, 0xa8, 0x11, 0xdc, 0x02, 0xd9, 0x05, 0x1b, 0x04, 0x75, + 0x92, 0x6c, 0x08, 0x9e, 0x38, 0x72, 0xd9, 0x7d, 0x9b, 0xbc, 0xfd, 0xca, + 0xb8, 0x06, 0x0e, 0x24, 0x89, 0x90, 0xde, 0x52, 0xe4, 0xd1, 0xcc, 0x99, + 0x87, 0x0b, 0x87, 0xbb, 0x5c, 0xa9, 0xab, 0xec, 0xb5, 0xe4, 0xdd, 0x5d, + 0xfa, 0xb1, 0x97, 0x5f, 0x61, 0xf7, 0x58, 0xd6, 0x08, 0x02, 0xf2, 0x51, + 0x7c, 0x7a, 0xe6, 0xf1, 0xcb, 0x43, 0xd0, 0x21, 0x09, 0xb8, 0x82, 0xa9, + 0x52, 0xd9, 0xa8, 0x7f, 0x2b, 0xe1, 0x0f, 0x31, 0xbc, 0x16, 0xa2, 0xce, + 0x35, 0x55, 0x2e, 0xd6, 0xda, 0x38, 0xd9, 0xc2, 0x5e, 0xca, 0x27, 0xd9, + 0xa6, 0xd6, 0x4b, 0xa2, 0x73, 0xc4, 0xce, 0x66, 0x30, 0x60, 0xa2, 0x01, + 0xfa, 0xc1, 0xd6, 0xc8, 0xea, 0xdd, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x70, 0xe2, 0x62, 0x68, 0xff, 0x60, 0x67, 0x64, + 0x88, 0xdd, 0x81, 0x79, 0x82, 0xf5, 0x46, 0xf9, 0x7e, 0x0e, 0xa9, 0x26, + 0xf6, 0xcf, 0x5d, 0xef, 0x10, 0x11, 0xe1, 0x71, 0x72, 0x77, 0xcf, 0x02, + 0x7b, 0xf1, 0x6e, 0xc4, 0xb4, 0xfa, 0x2a, 0x12, 0xfe, 0x7e, 0x3c, 0x66, + 0xef, 0x41, 0x98, 0x3a, 0x1f, 0xa9, 0x14, 0x8f, 0x46, 0x22, 0xa0, 0xc2, + 0xee, 0x93, 0x25, 0x34, 0xf2, 0xb7, 0x6d, 0x0a, 0x36, 0xde, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0xd4, 0x17, 0x62, 0x25, + 0xfd, 0x5b, 0x75, 0xeb, 0xec, 0x06, 0xc9, 0x39, 0x86, 0x6d, 0xc5, 0x60, + 0x2d, 0x33, 0x3d, 0xce, 0x6a, 0x9f, 0x07, 0x3b, 0xb9, 0x70, 0x0f, 0xc7, + 0x13, 0x46, 0x35, 0x46, 0x26, 0xe4, 0xbc, 0x6e, 0x54, 0x89, 0x29, 0xd5, + 0xa4, 0x94, 0xa0, 0x3a, 0x7a, 0x61, 0xcf, 0xd1, 0x48, 0x27, 0x7a, 0x72, + 0x95, 0xde, 0x93, 0xd1, 0x19, 0x1f, 0xc9, 0xc8, 0x8f, 0x0d, 0xce, 0x34, + 0x03, 0x39, 0x0a, 0x92, 0x16, 0x09, 0xc4, 0x49, 0xf9, 0x30, 0x2e, 0x19, + 0xd1, 0x69, 0x7e, 0x78, 0x00, 0x25, 0x30, 0x6f, 0x6b, 0xe1, 0xbe, 0xad, + 0xb2, 0x05, 0xde, 0xc7, 0xc2, 0xf7, 0xd5, 0xa7, 0x4d, 0x03, 0x6f, 0x6b, + 0xcd, 0xcb, 0x42, 0xfa, 0x88, 0x16, 0xd5, 0xa6, 0x60, 0x08, 0xd4, 0xa5, + 0x5b, 0x3b, 0x7b, 0xa2, 0xca, 0xa3, 0xa2, 0x5d, 0x63, 0x7f, 0xc0, 0x37, + 0xc5, 0x7e, 0x99, 0x04, 0x5d, 0x9a, 0xb9, 0xa5, 0xac, 0xd1, 0xe2, 0x5d, + 0xb2, 0x2b, 0x7e, 0xbb, 0xb9, 0x66, 0x13, 0xa7, 0x30, 0xbf, 0x80, 0x0c, + 0x2b, 0x8d, 0x45, 0xe1, 0x8d, 0x96, 0x25, 0x27, 0x47, 0x3d, 0x21, 0x7d, + 0x1c, 0x42, 0xac, 0x31, 0x26, 0x47, 0x59, 0xb3, 0x44, 0x85, 0xf2, 0x8e, + 0x7d, 0x01, 0x96, 0x6d, 0xb2, 0x64, 0xc3, 0xfc, 0xa7, 0x82, 0x06, 0x4a, + 0x87, 0x75, 0x9b, 0x99, 0x47, 0x7e, 0xa6, 0x4d, 0x2c, 0x36, 0xff, 0xac, + 0x2b, 0x77, 0x96, 0x52, 0x14, 0x8d, 0x07, 0x0d, 0x28, 0x9d, 0x84, 0xa2, + 0xda, 0xd6, 0x45, 0x3a, 0xd4, 0xe6, 0xb7, 0x9a, 0xf3, 0x34, 0xe3, 0xda, + 0x39, 0xdf, 0x35, 0x9c, 0xe4, 0x87, 0x55, 0xc8, 0x43, 0xd0, 0x61, 0x46, + 0x52, 0x2f, 0x75, 0x63, 0xbb, 0x98, 0x97, 0xeb, 0xfb, 0x15, 0xaf, 0x8e, + 0x96, 0xdc, 0xff, 0x0a, 0x90, 0xda, 0x09, 0x63, 0x28, 0x7b, 0x92, 0x73, + 0x0b, 0xd4, 0x2b, 0x72, 0x2a, 0x86, 0x32, 0xc3, 0xc1, 0x3e, 0xe4, 0x2c, + 0x07, 0x89, 0x53, 0xb7, 0xfe, 0x78, 0x6c, 0x95, 0xb4, 0x62, 0x4d, 0x4b, + 0xfe, 0x6c, 0xfc, 0x5e, 0x4e, 0xa7, 0x8c, 0x07, 0x4f, 0x85, 0x27, 0xe0, + 0x7b, 0xd9, 0x7a, 0xe5, 0x1d, 0xbc, 0x36, 0xda, 0x8e, 0x21, 0xff, 0xb3, + 0x60, 0x2c, 0x5e, 0x23, 0x0f, 0xde, 0x3f, 0xae, 0xa5, 0x3a, 0x50, 0xa9, + 0x99, 0x39, 0x45, 0xaf, 0xd3, 0x5f, 0x4a, 0x15, 0xad, 0x9c, 0x66, 0x7f, + 0x92, 0xe0, 0x02, 0x81, 0x3e, 0x06, 0x6a, 0x5e, 0xd0, 0x0c, 0x42, 0xe7, + 0xcf, 0xe2, 0xeb, 0xa3, 0xe0, 0xf7, 0x2d, 0x8a, 0x21, 0xdb, 0x64, 0x28, + 0x2a, 0xb3, 0x2b, 0xc4, 0xc9, 0xd5, 0x60, 0xaf, 0xfc, 0x15, 0xa1, 0x44, + 0x9c, 0x96, 0x04, 0x42, 0x1c, 0x55, 0x8c, 0xa5, 0xce, 0x80, 0xce, 0x75, + 0x64, 0xa9, 0xf6, 0xa5, 0x5a, 0x0f, 0x8a, 0x4b, 0x8b, 0x72, 0xcf, 0x3e, + 0xd7, 0xeb, 0xe1, 0xd0, 0xd3, 0x2d, 0x04, 0x6c, 0x9e, 0x02, 0x75, 0x43, + 0x5c, 0xc1, 0x57, 0x66, 0xd9, 0x14, 0x5b, 0x08, 0x10, 0x44, 0x8d, 0x8e, + 0x89, 0xd1, 0x65, 0x27, 0x2a, 0x0b, 0x99, 0x6f, 0x09, 0xa6, 0x20, 0xa5, + 0x75, 0x24, 0xe4, 0xf7, 0xf5, 0xe0, 0xed, 0x79, 0x37, 0x18, 0x13, 0x1c, + 0xd9, 0xd1, 0xf5, 0x69, 0x0c, 0xa5, 0x02, 0xdf, 0x6a, 0xfd, 0x2e, 0x35, + 0x8e, 0xd0, 0x41, 0x91, 0x61, 0x0f, 0x5c, 0xdd, 0x70, 0xbf, 0x1c, 0x49, + 0xcb, 0xe9, 0xc9, 0x33, 0xc4, 0x99, 0x1e, 0x8b, 0x75, 0x48, 0xc2, 0x58, + 0xa4, 0x70, 0x1f, 0xbb, 0xcd, 0xd3, 0x0e, 0x79, 0x25, 0xbe, 0x53, 0xfa, + 0x32, 0x32, 0xf6, 0xb9, 0xf0, 0x0a, 0x52, 0x5b, 0xe0, 0x69, 0xff, 0x43, + 0xda, 0x98, 0x1f, 0xee, 0x54, 0x60, 0xf8, 0x24, 0x43, 0xc5, 0x37, 0x72, + 0xd1, 0xfc, 0x99, 0x9a, 0x3e, 0x24, 0xe0, 0xd9, 0xc2, 0x61, 0x47, 0xb3, + 0x26, 0x09, 0x85, 0x74, 0xa1, 0x2b, 0x4a, 0x70, 0xd0, 0x1b, 0x90, 0x03, + 0x25, 0xd9, 0x22, 0xc2, 0x16, 0x22, 0x3a, 0x62, 0x20, 0xd4, 0x13, 0xce, + 0xa2, 0xc7, 0x02, 0xfb, 0x9a, 0xbf, 0xf1, 0x1c, 0x80, 0x01, 0x97, 0x90, + 0x7f, 0x5a, 0x98, 0x70, 0x30, 0x61, 0x77, 0xe5, 0xd4, 0x3b, 0x03, 0x42, + 0x57, 0x31, 0x5e, 0xc6, 0x64, 0xe1, 0xf4, 0x64, 0x77, 0x21, 0x9b, 0x44, + 0x1c, 0xd9, 0x8c, 0x95, 0x8a, 0xf1, 0xcb, 0x82, 0xac, 0xc1, 0x26, 0x31, + 0xf2, 0x22, 0x41, 0xab, 0xbb, 0x23, 0xd3, 0x8d, 0xcc, 0x5c, 0x9d, 0x9b, + 0x1d, 0x9c, 0x4d, 0xf3, 0x62, 0xde, 0x15, 0x6a, 0x94, 0x8d, 0x24, 0xe7, + 0x52, 0x8d, 0x2a, 0xa4, 0x1d, 0x54, 0x5a, 0xda, 0xaf, 0xab, 0x05, 0x27, + 0x4b, 0xbb, 0xb4, 0xda, 0x0c, 0xb9, 0x20, 0xb3, 0xaf, 0x4a, 0xeb, 0x37, + 0xe5, 0x43, 0xe4, 0xc1, 0xf6, 0x9e, 0xf8, 0x6c, 0xd8, 0xa1, 0x0c, 0xf9, + 0xd1, 0x4b, 0x96, 0xa0, 0x6d, 0x38, 0x64, 0x41, 0xd3, 0x14, 0xfb, 0xad, + 0x89, 0xa9, 0xf7, 0x36, 0x01, 0x0f, 0xbe, 0x8e, 0xd7, 0x76, 0xc6, 0x70, + 0x22, 0x32, 0x8b, 0x08, 0xca, 0x95, 0xbf, 0xcf, 0x5e, 0xb8, 0xc0, 0x3f, + 0xd9, 0xaa, 0x84, 0xab, 0x30, 0x5b, 0xe3, 0x7a, 0x61, 0x32, 0xe5, 0x54, + 0x01, 0x5e, 0xb6, 0x1c, 0x9c, 0x78, 0x52, 0x2a, 0xa7, 0xf5, 0x29, 0xa6, + 0x0f, 0x14, 0xa5, 0x3a, 0x34, 0xd4, 0xf5, 0xc2, 0xb2, 0x8d, 0x12, 0x7b, + 0x8a, 0x64, 0x00, 0xfd, 0x02, 0x0e, 0x02, 0x26, 0x5a, 0xb9, 0xeb, 0xfd, + 0x30, 0xce, 0x51, 0xec, 0x5f, 0xbc, 0xee, 0x53, 0x21, 0xec, 0x0e, 0xee, + 0xc4, 0x28, 0x1a, 0xec, 0x2a, 0x39, 0x4e, 0xe1, 0x50, 0x11, 0x3f, 0x16, + 0xdd, 0xbf, 0xaf, 0x3e, 0xbe, 0xd4, 0xfe, 0x34, 0x1e, 0x62, 0x3f, 0x5a, + 0xea, 0x05, 0xfc, 0xd5, 0x45, 0x08, 0x47, 0xce, 0x38, 0x3f, 0x75, 0x7e, + 0x0c, 0x3a, 0x2a, 0x14, 0xa7, 0x61, 0xba, 0x3a, 0xa1, 0x41, 0xa2, 0x72, + 0x19, 0xfa, 0x33, 0x43, 0xa7, 0xf4, 0x4e, 0x5b, 0xf9, 0xb1, 0x45, 0x16, + 0x57, 0x8e, 0xb1, 0xad, 0x7d, 0x88, 0xd3, 0x93, 0xa2, 0x08, 0xf3, 0x96, + 0x4d, 0x84, 0x63, 0x08, 0xfa, 0x9d, 0xf3, 0x04, 0x33, 0xbd, 0x7e, 0x7a, + 0xc7, 0x63, 0xc5, 0x31, 0x5a, 0x82, 0x33, 0x90, 0x56, 0x44, 0xe9, 0xd3, + 0xc4, 0xd4, 0x76, 0x29, 0x2f, 0xdb, 0xa3, 0x9d, 0xff, 0xd4, 0xd2, 0xb1, + 0xce, 0xf1, 0xcb, 0x7f, 0x10, 0x3b, 0x90, 0xa4, 0x1b, 0xa0, 0x9b, 0xa7, + 0xfa, 0x27, 0x40, 0x11, 0x35, 0xc9, 0x7f, 0x01, 0x97, 0x76, 0x9f, 0x33, + 0xc5, 0xd6, 0x8d, 0x20, 0x07, 0x73, 0x93, 0x0b, 0x24, 0x88, 0x4e, 0x73, + 0x68, 0x79, 0x92, 0x20, 0x2a, 0x71, 0xed, 0x22, 0x0b, 0xfb, 0x42, 0xb5, + 0xd9, 0xc3, 0xaa, 0xed, 0x45, 0x03, 0x64, 0xde, 0x6f, 0x25, 0x8e, 0x3b, + 0x9a, 0xef, 0xc5, 0x63, 0xc2, 0x7f, 0x34, 0xd0, 0x1b, 0x20, 0xa3, 0xab, + 0x9d, 0x54, 0x41, 0x0e, 0x7b, 0x2e, 0x96, 0x12, 0x75, 0x58, 0xdf, 0xd5, + 0xaa, 0x3c, 0xf2, 0x26, 0xc1, 0xf1, 0x18, 0x37, 0x56, 0xf2, 0xd2, 0x86, + 0x6f, 0xd4, 0x9f, 0x57, 0x2b, 0x32, 0xe9, 0x08, 0x94, 0x53, 0x40, 0xc5, + 0x4d, 0x77, 0x39, 0xc6, 0x4c, 0x63, 0x53, 0xf9, 0xbf, 0x35, 0x08, 0xc5, + 0x0d, 0xd0, 0x89, 0x82, 0xa7, 0x2d, 0x6a, 0xb4, 0x22, 0xb1, 0x10, 0x7f, + 0xcf, 0x2e, 0x21, 0x27, 0x9c, 0x12, 0xc6, 0x0e, 0xca, 0xd2, 0x32, 0xb1, + 0x6d, 0xfd, 0x59, 0x12, 0x23, 0x60, 0x46, 0x89, 0xe0, 0x75, 0x5e, 0xc9, + 0xf4, 0x3d, 0x8a, 0x89, 0xd4, 0x23, 0xc2, 0xbe, 0x30, 0x32, 0x4a, 0x95, + 0x42, 0xe2, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0xa7, 0x0b, 0x48, 0xe2, 0xeb, 0xd7, 0x12, 0x42, 0x4c, 0x71, 0xfb, 0x25, + 0x17, 0x23, 0x0e, 0x01, 0xa6, 0x21, 0xb9, 0x17, 0x6e, 0xf0, 0x24, 0x66, + 0x9e, 0x9d, 0x0f, 0x71, 0xf8, 0x5b, 0x79, 0xb0, 0x1b, 0x1f, 0xe7, 0xa2, + 0xc0, 0x17, 0x16, 0x08, 0x5e, 0x24, 0x7b, 0xf9, 0x7a, 0x1e, 0x70, 0xe2, + 0x05, 0x40, 0x16, 0x56, 0xe7, 0x79, 0xf2, 0x30, 0xa3, 0xdc, 0xe3, 0x7a, + 0x7e, 0x22, 0x88, 0xc0, 0xf7, 0xc8, 0x5c, 0x93, 0x95, 0x86, 0x02, 0x6c, + 0x73, 0x76, 0xef, 0x03, 0x2d, 0xcb, 0xa5, 0x22, 0xfe, 0x05, 0xbb, 0xe6, + 0xfd, 0x19, 0x8c, 0x8b, 0x67, 0x58, 0x81, 0x81, 0x2d, 0x36, 0xd0, 0xc1, + 0x20, 0xb2, 0x87, 0x87, 0xdb, 0xe4, 0xe5, 0xd1, 0xd1, 0xd5, 0x81, 0x34, + 0x4c, 0xd6, 0x09, 0xa2, 0x5d, 0xcc, 0x99, 0x12, 0xa5, 0x06, 0x0f, 0x06, + 0x7e, 0xbb, 0x67, 0x26, 0x69, 0x15, 0x6e, 0x5f, 0xb1, 0x8e, 0xd6, 0x34, + 0xfc, 0x4d, 0xd9, 0x03, 0xb7, 0x5a, 0xf4, 0xaa, 0x03, 0x00, 0x88, 0x6b, + 0x5a, 0xc9, 0xf2, 0xfb, 0x67, 0x72, 0xbc, 0xf7, 0xb9, 0xdc, 0x97, 0xdf, + 0x80, 0x91, 0xfa, 0x30, 0x18, 0x02, 0x89, 0xc7, 0xc9, 0x62, 0x1d, 0xc0, + 0x0b, 0xa6, 0xfe, 0x7e, 0xb9, 0xa9, 0x1f, 0x11, 0x71, 0xe1, 0xd1, 0xfe, + 0x8d, 0x90, 0x2c, 0x09, 0x82, 0x2e, 0x36, 0x79, 0xa5, 0x75, 0x54, 0xfb, + 0xd3, 0x3c, 0xb4, 0x18, 0x2f, 0x4e, 0x3f, 0x37, 0xc4, 0xf8, 0xc5, 0x59, + 0xa3, 0xfd, 0x0c, 0x62, 0x9e, 0xa8, 0x7a, 0x56, 0xc5, 0x97, 0x89, 0x35, + 0xc7, 0xb0, 0x29, 0x87, 0xbf, 0x6a, 0xdc, 0xb1, 0x2f, 0x01, 0xf4, 0x0d, + 0x7c, 0x25, 0x95, 0x39, 0x81, 0xdd, 0x1a, 0x81, 0x36, 0xc0, 0x6b, 0xbf, + 0x6b, 0x4d, 0xea, 0x23, 0xc0, 0x3e, 0x5c, 0x39, 0xe5, 0x6b, 0x59, 0xa0, + 0x50, 0x02, 0x99, 0xdf, 0x4e, 0xe3, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x17, 0x88, 0xf8, 0xda, 0x3d, 0x57, 0x83, 0x63, + 0x76, 0xa0, 0x5c, 0x13, 0x1a, 0x00, 0x64, 0x30, 0x19, 0xfd, 0x2e, 0x9c, + 0x64, 0xb6, 0xda, 0x51, 0x7b, 0x55, 0xe8, 0xc4, 0x67, 0x1b, 0xda, 0xfc, + 0x4c, 0xd0, 0x27, 0x58, 0x56, 0xa1, 0x52, 0xd2, 0xb8, 0xd8, 0xd5, 0x94, + 0x69, 0xcf, 0xd0, 0xd5, 0x72, 0xeb, 0x2b, 0x05, 0xf3, 0x12, 0xa6, 0xac, + 0xa6, 0xf7, 0x90, 0x24, 0x1f, 0x22, 0x97, 0x5e, 0x8b, 0x7c, 0x2c, 0x30, + 0x61, 0x11, 0x9b, 0xdf, 0x83, 0x2b, 0x10, 0x09, 0x42, 0x77, 0x2b, 0xd9, + 0x43, 0xb3, 0x27, 0x69, 0x75, 0xf2, 0x2e, 0x72, 0xed, 0x50, 0xea, 0xbf, + 0x7f, 0x47, 0x39, 0x9c, 0xf8, 0x1e, 0xce, 0x6f, 0xdd, 0xe8, 0x40, 0xc5, + 0x14, 0x01, 0x7e, 0xbb, 0x0f, 0x43, 0x2d, 0x36, 0x70, 0x54, 0xc6, 0xbe, + 0x69, 0x24, 0xd1, 0x65, 0x49, 0x77, 0xf0, 0xd2, 0x99, 0xb4, 0x50, 0x8d, + 0x98, 0xcb, 0xbf, 0x7a, 0x7c, 0x65, 0xd3, 0x46, 0xcf, 0x90, 0x69, 0x56, + 0x15, 0xa2, 0xae, 0x11, 0x94, 0x60, 0xf9, 0x45, 0x17, 0x54, 0x6b, 0xbd, + 0xeb, 0xd8, 0x74, 0x41, 0x5c, 0xf6, 0x49, 0x0a, 0x14, 0xce, 0x43, 0x1f, + 0x67, 0xc3, 0x6c, 0xf4, 0x01, 0xce, 0x3f, 0x85, 0xed, 0x19, 0xa1, 0xf7, + 0x1b, 0xf8, 0x46, 0x45, 0xb4, 0xe9, 0xa7, 0x1f, 0x2a, 0x65, 0x00, 0x2a, + 0xd3, 0x8b, 0x6a, 0x3b, 0xac, 0x78, 0xab, 0xf4, 0xc8, 0x62, 0x76, 0xc8, + 0x24, 0xf8, 0xf8, 0x08, 0xe0, 0x64, 0x00, 0x64, 0x74, 0x9e, 0x55, 0x2e, + 0xf8, 0xc9, 0xc8, 0x58, 0x0e, 0x1f, 0x27, 0x32, 0xfd, 0x30, 0x24, 0x68, + 0xc8, 0xa4, 0x8c, 0x1c, 0xf3, 0xa7, 0x32, 0xae, 0x84, 0x0a, 0x8a, 0x1e, + 0x11, 0xce, 0xb2, 0x02, 0xf1, 0xb3, 0x5f, 0x7d, 0x5e, 0x54, 0x8c, 0xe0, + 0xeb, 0x46, 0x6e, 0x8a, 0x5f, 0x3f, 0x71, 0x47, 0x2a, 0x8a, 0xe6, 0xf0, + 0xb0, 0x04, 0x49, 0x64, 0xb3, 0x7e, 0x16, 0x09, 0x83, 0x5f, 0x12, 0xe0, + 0x85, 0xb7, 0x36, 0xc0, 0x8a, 0xa5, 0xcd, 0xae, 0xc0, 0xb4, 0xa2, 0x62, + 0x9b, 0xfa, 0x64, 0x18, 0x16, 0x8e, 0xb6, 0x50, 0xf2, 0x9b, 0xc4, 0x7d, + 0x0c, 0x4c, 0x8b, 0x58, 0xcf, 0x9b, 0x87, 0x09, 0xb1, 0x37, 0xbb, 0xaf, + 0xa7, 0x72, 0x79, 0x81, 0x09, 0x55, 0xa1, 0x6a, 0x87, 0xb0, 0x7d, 0xc8, + 0xb0, 0xc1, 0xa4, 0xa9, 0xdf, 0xcf, 0x95, 0x77, 0x36, 0x8e, 0x2b, 0xae, + 0xeb, 0x4b, 0xf9, 0x2a, 0x83, 0x6c, 0x53, 0x3c, 0x89, 0xa6, 0x08, 0xae, + 0x00, 0x4e, 0xb8, 0xf6, 0x34, 0x7c, 0xc6, 0x76, 0x87, 0x1a, 0x02, 0xb0, + 0x89, 0xa3, 0x0f, 0x00, 0xc6, 0x7b, 0xeb, 0xf7, 0x95, 0x40, 0xc5, 0x0d, + 0x6f, 0x74, 0xd8, 0x21, 0x2f, 0x9f, 0x24, 0xac, 0x43, 0xdb, 0x3a, 0x39, + 0x6c, 0x34, 0x59, 0x62, 0x66, 0xbc, 0x28, 0x7f, 0x8c, 0x64, 0x62, 0x8c, + 0x28, 0x6c, 0xf5, 0x79, 0x24, 0xb1, 0x00, 0x9c, 0x58, 0x6b, 0x09, 0xef, + 0xb0, 0x73, 0xcd, 0x47, 0xbb, 0x52, 0xfd, 0x26, 0x6a, 0xff, 0xb9, 0xf1, + 0xd5, 0x82, 0x59, 0x01, 0xfa, 0x87, 0x14, 0x24, 0x10, 0xb0, 0xf7, 0xdf, + 0xf9, 0x3f, 0x67, 0x19, 0xbd, 0xc7, 0x85, 0xb0, 0xad, 0x47, 0xa8, 0x4c, + 0x3e, 0xb6, 0x2e, 0x8a, 0xb3, 0xcc, 0x35, 0xa0, 0x48, 0xc7, 0x90, 0x81, + 0xb7, 0x53, 0x1c, 0x38, 0x63, 0xf2, 0x2f, 0xa0, 0x71, 0x82, 0xe2, 0x56, + 0xdb, 0x68, 0xe8, 0x5f, 0xf8, 0x42, 0xf2, 0xf6, 0xb8, 0x10, 0x6b, 0x54, + 0x21, 0xa0, 0xc1, 0xfe, 0xcb, 0xce, 0x12, 0xa2, 0x49, 0x51, 0x86, 0x53, + 0x56, 0xec, 0x33, 0xb3, 0x72, 0xce, 0xa4, 0x46, 0xe3, 0x37, 0xcb, 0xc0, + 0x95, 0xaa, 0xe2, 0xa3, 0xc5, 0xe9, 0x36, 0x40, 0xfe, 0xf7, 0xe2, 0x5a, + 0x6d, 0x58, 0x39, 0xb2, 0x41, 0x5d, 0xe2, 0x71, 0x72, 0xd0, 0xf0, 0x5c, + 0x16, 0x88, 0x95, 0x30, 0x0a, 0xfb, 0x8d, 0xda, 0x14, 0x80, 0xf4, 0x15, + 0xf2, 0xf6, 0xac, 0xf3, 0xd8, 0x8d, 0x13, 0x24, 0x2c, 0x74, 0x60, 0x6e, + 0x8c, 0xa1, 0x59, 0xcf, 0x74, 0x7c, 0x2d, 0x0b, 0xbb, 0x06, 0x5c, 0x9d, + 0xcd, 0xf3, 0x1e, 0x4a, 0xba, 0x3f, 0x9c, 0x4a, 0xc4, 0xd7, 0xf9, 0xf0, + 0xa5, 0x56, 0x7f, 0xb0, 0xa2, 0x57, 0xd0, 0xc3, 0xaa, 0xa7, 0xd0, 0x49, + 0xe2, 0x28, 0x9b, 0xc4, 0x64, 0x0c, 0xe0, 0x71, 0x9c, 0x05, 0x04, 0x95, + 0x00, 0x1f, 0x7b, 0xa9, 0xb9, 0xb3, 0x2b, 0x8f, 0x0b, 0x45, 0x1e, 0x23, + 0xaa, 0x27, 0x89, 0x4a, 0xb0, 0x7d, 0x03, 0xdf, 0xae, 0xdb, 0xcb, 0xc4, + 0xec, 0x3b, 0x02, 0xe2, 0x85, 0x3a, 0xb7, 0x25, 0xfb, 0xab, 0xca, 0xc1, + 0x33, 0x00, 0x5b, 0xd2, 0xcf, 0xb0, 0x11, 0x1d, 0x51, 0xb5, 0x5b, 0xea, + 0x94, 0xf7, 0xa0, 0x98, 0x33, 0xba, 0x58, 0xfc, 0x12, 0xea, 0xdd, 0x89, + 0xbd, 0x63, 0x03, 0xbe, 0x7e, 0x3b, 0x69, 0xc4, 0x9d, 0x57, 0x0f, 0xd6, + 0xbe, 0xea, 0x5b, 0xd0, 0x97, 0x63, 0x89, 0xb0, 0xa0, 0xc0, 0xd6, 0x39, + 0xc1, 0x69, 0x12, 0x6a, 0xfb, 0xac, 0x74, 0x7f, 0xfb, 0xf4, 0x7f, 0x38, + 0x44, 0x4c, 0x8a, 0xa2, 0x41, 0x15, 0xc0, 0x54, 0xc0, 0xed, 0x14, 0x83, + 0xef, 0xbc, 0x9c, 0xc7, 0xdd, 0x21, 0xd6, 0xf0, 0x9b, 0x7f, 0x09, 0xd5, + 0x96, 0xe5, 0xf7, 0xc5, 0xa9, 0xb3, 0x41, 0xb0, 0x9d, 0xeb, 0x49, 0x68, + 0x9d, 0x2b, 0xea, 0x47, 0x80, 0x3b, 0x54, 0xb8, 0xf4, 0x14, 0x5e, 0xd6, + 0x66, 0x89, 0x04, 0xb3, 0x00, 0xa3, 0xa8, 0x32, 0x62, 0x2e, 0xc3, 0x15, + 0xc6, 0x93, 0x7d, 0x40, 0x32, 0xb1, 0x6b, 0x60, 0xd3, 0x52, 0xdf, 0x09, + 0x8c, 0x80, 0x2b, 0x01, 0xe7, 0x97, 0x8d, 0xbb, 0x14, 0xd6, 0x10, 0x15, + 0x64, 0x00, 0x4a, 0x2c, 0x67, 0xca, 0xd0, 0xa1, 0x37, 0x33, 0x7b, 0xa1, + 0x2a, 0x5b, 0x5b, 0x78, 0xf8, 0x2f, 0xdd, 0x76, 0xab, 0x8a, 0xc3, 0xe3, + 0x37, 0x00, 0xd1, 0x29, 0xb0, 0x96, 0x1d, 0x18, 0xbe, 0x5d, 0x32, 0x7e, + 0xb7, 0x11, 0xa9, 0x78, 0x72, 0xa2, 0x2d, 0x29, 0x1c, 0x32, 0xa4, 0xff, + 0xc7, 0xce, 0xfe, 0xaf, 0xb7, 0x17, 0x43, 0xe5, 0x2f, 0xae, 0x45, 0xd3, + 0xaf, 0x10, 0xe3, 0xd0, 0x58, 0xb6, 0xee, 0xee, 0x7a, 0xb5, 0x06, 0x70, + 0x26, 0x7e, 0x2d, 0x5b, 0xd5, 0xe1, 0x7b, 0x9a, 0x37, 0x02, 0xfc, 0x1d, + 0x08, 0x4f, 0x1a, 0xf5, 0x44, 0x63, 0xde, 0x4b, 0x14, 0x68, 0x54, 0x0b, + 0x6a, 0x22, 0x4e, 0x02, 0x65, 0xcd, 0xf4, 0x04, 0xec, 0xcc, 0x8a, 0x0b, + 0xe0, 0x59, 0xf8, 0x65, 0x25, 0x63, 0xed, 0x0f, 0xa6, 0xc5, 0x3c, 0xcb, + 0x5d, 0xc5, 0xd8, 0x9f, 0x5a, 0xd3, 0x88, 0x3d, 0xd4, 0x2c, 0xb3, 0x04, + 0xf6, 0x97, 0xc7, 0xe2, 0xfd, 0xb6, 0xf4, 0x7d, 0x0d, 0xb9, 0x75, 0x7e, + 0x9d, 0x81, 0xdc, 0xdf, 0x8e, 0x90, 0x40, 0x0c, 0x7b, 0x45, 0xfe, 0x68, + 0xfd, 0xff, 0x1c, 0xf1, 0x16, 0x09, 0x33, 0x74, 0x27, 0x7b, 0x4d, 0xd9, + 0x9b, 0x48, 0x6d, 0x84, 0xeb, 0x96, 0x8f, 0x4b, 0x82, 0x73, 0xd5, 0x69, + 0x7d, 0x14, 0x45, 0x8c, 0xb8, 0x71, 0x87, 0x70, 0x09, 0x26, 0xfc, 0x89, + 0x6f, 0x0f, 0xb6, 0xc1, 0xd6, 0xe1, 0xbf, 0xdb, 0x85, 0x8f, 0x94, 0xad, + 0x94, 0x01, 0x01, 0xbb, 0x3f, 0xc0, 0xb5, 0xff, 0xf5, 0xbb, 0x4f, 0x50, + 0x09, 0xca, 0x7d, 0x36, 0x47, 0x66, 0x9a, 0x8c, 0xee, 0x84, 0x73, 0x9a, + 0x1f, 0x49, 0x75, 0xb4, 0xab, 0x66, 0xf7, 0x3b, 0xfe, 0x81, 0x67, 0xc9, + 0xd1, 0x16, 0xde, 0x1f, 0xc2, 0x24, 0xed, 0x6a, 0x5a, 0xe7, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0xc5, 0xd7, 0x14, 0x84, + 0xf8, 0xcf, 0x9b, 0xf4, 0xb7, 0x6f, 0x47, 0x90, 0x47, 0x30, 0x80, 0x4b, + 0x9e, 0x32, 0x25, 0xa9, 0xf1, 0x33, 0xb5, 0xde, 0xa1, 0x68, 0xf4, 0xe2, + 0x85, 0x1f, 0x07, 0x2f, 0xcc, 0x00, 0xfc, 0xaa, 0x7c, 0xa6, 0x20, 0x61, + 0x71, 0x7a, 0x48, 0xe5, 0x2e, 0x29, 0xa3, 0xfa, 0x37, 0x9a, 0x95, 0x3f, + 0xaa, 0x68, 0x93, 0xe3, 0x2e, 0xc5, 0xa2, 0x7b, 0x94, 0x5e, 0x60, 0x5f, + 0x10, 0x85, 0xf3, 0x23, 0x2d, 0x42, 0x4c, 0x13, 0x29, 0xc8, 0x8d, 0x78, + 0x6e, 0xd6, 0x8c, 0xe6, 0xfc, 0xb6, 0x2a, 0xa6, 0x3b, 0xf9, 0xab, 0x61, + 0x7c, 0x08, 0x8a, 0x3b, 0x70, 0xbe, 0x57, 0xaa, 0xda, 0x1f, 0x33, 0x4a, + 0x70, 0x17, 0x25, 0x0d, 0x3f, 0x60, 0x3d, 0xc8, 0x2e, 0xbd, 0x3b, 0x12, + 0x0b, 0x63, 0x5e, 0x3f, 0xf5, 0x6b, 0x1f, 0x0b, 0xd9, 0x33, 0x85, 0x23, + 0x71, 0x24, 0x9a, 0xb3, 0xdf, 0x5c, 0x1f, 0xef, 0x14, 0x33, 0xc8, 0x66, + 0x85, 0xb7, 0xf0, 0x56, 0x68, 0x1d, 0x51, 0x52, 0xaf, 0x80, 0x3c, 0xe2, + 0x59, 0x06, 0xf1, 0xd1, 0x9f, 0xb6, 0xc6, 0x80, 0x4e, 0x06, 0xea, 0x28, + 0xab, 0x17, 0x8f, 0x45, 0x7a, 0xf6, 0xb4, 0x93, 0xb7, 0x43, 0x9e, 0xc6, + 0xd4, 0x29, 0x00, 0x62, 0xab, 0x51, 0x7a, 0x72, 0xe5, 0xc1, 0xd4, 0x10, + 0xcd, 0xd6, 0x17, 0x54, 0xe4, 0x20, 0x84, 0x50, 0xe4, 0xf9, 0x00, 0x13, + 0xfd, 0xa6, 0x9f, 0xef, 0x19, 0xd4, 0x60, 0x2a, 0x42, 0x07, 0xcd, 0xd5, + 0xa1, 0x01, 0x6d, 0x07, 0x01, 0x32, 0x61, 0x3c, 0x65, 0x9a, 0x8f, 0x5d, + 0x33, 0xf3, 0xcb, 0x29, 0x0b, 0x8c, 0xe7, 0x3b, 0x83, 0x44, 0xb1, 0x3a, + 0x4f, 0x8e, 0x09, 0x15, 0x14, 0x69, 0x84, 0xa1, 0xbb, 0x15, 0xfd, 0xea, + 0xde, 0xbe, 0x5b, 0x6a, 0xc0, 0x95, 0x04, 0x46, 0x4d, 0x8a, 0xaa, 0xac, + 0xbc, 0x2f, 0xad, 0x12, 0x15, 0x8a, 0x53, 0x4c, 0x94, 0xb8, 0xca, 0x42, + 0x96, 0x3a, 0xf4, 0x7a, 0x18, 0x9d, 0x5b, 0x24, 0x9a, 0xce, 0xa8, 0x99, + 0xd4, 0x37, 0x32, 0xf6, 0xf2, 0xac, 0xaf, 0x3f, 0xf5, 0x3b, 0xfe, 0xda, + 0x13, 0x9a, 0xab, 0x4f, 0x55, 0xc0, 0x2c, 0x21, 0x2b, 0x65, 0x71, 0x1f, + 0xc5, 0x04, 0x32, 0xc9, 0x94, 0xe5, 0xfa, 0x6f, 0xd8, 0x2a, 0xbc, 0x70, + 0x85, 0x55, 0xdc, 0x62, 0xb7, 0x3a, 0x20, 0x0e, 0xe7, 0x67, 0x3c, 0xfe, + 0xcb, 0x83, 0x6a, 0x15, 0x6e, 0x4a, 0x35, 0x65, 0xea, 0xc1, 0xb9, 0x4d, + 0x35, 0xf9, 0x4b, 0xcf, 0xd8, 0xfd, 0xa5, 0xff, 0xff, 0x67, 0x70, 0x04, + 0xae, 0xa2, 0xa4, 0x12, 0x4b, 0x83, 0x4f, 0xc2, 0x96, 0xf0, 0x21, 0x2b, + 0x14, 0x21, 0x73, 0x42, 0x14, 0x99, 0x07, 0xe5, 0xa9, 0x52, 0x4c, 0xeb, + 0xbe, 0xc3, 0x11, 0x2e, 0x27, 0xda, 0x69, 0x94, 0xd5, 0xf6, 0xc6, 0x77, + 0x0a, 0x00, 0x5d, 0x9a, 0x82, 0xaa, 0x21, 0xfc, 0x86, 0x9b, 0xd0, 0xc4, + 0xc4, 0x1f, 0x53, 0x41, 0x7a, 0x92, 0xab, 0x1c, 0x12, 0xf6, 0xd5, 0x48, + 0xfb, 0x29, 0x4d, 0xb4, 0xd2, 0x12, 0xee, 0xc5, 0xea, 0x18, 0x33, 0xf1, + 0x4d, 0x0a, 0x10, 0x43, 0xa5, 0x35, 0xb1, 0x63, 0xc4, 0xfb, 0x38, 0x1e, + 0xef, 0xac, 0x3f, 0x97, 0x41, 0xc6, 0x96, 0x3e, 0x60, 0x13, 0xc8, 0xe3, + 0xbe, 0x61, 0xe9, 0xb6, 0x26, 0x16, 0x14, 0xf8, 0x82, 0x0d, 0x6e, 0x75, + 0x2f, 0xd7, 0x9c, 0x3a, 0x4a, 0xda, 0xd8, 0x2b, 0x35, 0xd4, 0x20, 0x32, + 0xd4, 0x4f, 0x0f, 0xe4, 0xdc, 0xd5, 0x0f, 0xfe, 0xa6, 0x81, 0x28, 0xb4, + 0x24, 0x3e, 0xb7, 0x0f, 0xb0, 0xb2, 0x5b, 0x05, 0x76, 0xbb, 0x24, 0x49, + 0x6a, 0x01, 0x68, 0x3f, 0x03, 0x96, 0xbc, 0x0c, 0x77, 0x48, 0x5f, 0xe8, + 0x39, 0xf4, 0xb0, 0x84, 0x42, 0x0e, 0x6a, 0xb9, 0xab, 0xf2, 0x95, 0x97, + 0xa7, 0x5e, 0x29, 0x34, 0x9d, 0x50, 0xc0, 0x4b, 0x40, 0x72, 0xa1, 0x7c, + 0x79, 0x5e, 0x95, 0xbe, 0xd6, 0x17, 0x43, 0x0a, 0xc9, 0x27, 0x25, 0x43, + 0xd7, 0x99, 0xd5, 0x48, 0xd8, 0x98, 0xb5, 0x2b, 0x7f, 0xe3, 0xbd, 0x1d, + 0xc0, 0xd1, 0x04, 0xd5, 0xa4, 0xe1, 0x68, 0xbe, 0x96, 0xf1, 0x2e, 0x5e, + 0x37, 0x8d, 0x39, 0x4e, 0xe4, 0xcc, 0x5e, 0xd7, 0xdd, 0x59, 0x7e, 0xe8, + 0xae, 0x48, 0xb5, 0xec, 0x2c, 0xf7, 0x68, 0x96, 0x00, 0xe5, 0xec, 0x03, + 0x6f, 0x98, 0x3a, 0x9a, 0x4f, 0xd9, 0xf1, 0x2f, 0xfe, 0x76, 0xcf, 0x8f, + 0x0b, 0x3d, 0x8a, 0x14, 0x00, 0x83, 0xcb, 0xca, 0xe3, 0x34, 0x81, 0xb5, + 0x91, 0x64, 0x2b, 0x12, 0x24, 0x86, 0x9c, 0xae, 0x3c, 0x7f, 0x53, 0x22, + 0xd4, 0x94, 0x90, 0x44, 0x6b, 0x35, 0xd2, 0xce, 0x8e, 0x95, 0xe2, 0xbe, + 0x46, 0x50, 0x3f, 0x3d, 0xc3, 0xcd, 0xef, 0x47, 0x99, 0xb5, 0xf2, 0xd4, + 0x6f, 0xf4, 0xfa, 0xa2, 0xfc, 0x1e, 0xe3, 0x99, 0x49, 0xfd, 0x1a, 0x6e, + 0x0d, 0xb5, 0xf1, 0xc8, 0x05, 0x22, 0x29, 0xca, 0x03, 0xb8, 0x15, 0x3b, + 0x01, 0x8a, 0x95, 0x74, 0x48, 0x93, 0x61, 0x35, 0xde, 0xeb, 0xa9, 0xc4, + 0x56, 0xa9, 0xd7, 0xde, 0x4b, 0xe5, 0x4b, 0xa1, 0x42, 0x6a, 0x5f, 0xe3, + 0xb2, 0xc7, 0xda, 0xfb, 0xc7, 0x70, 0x64, 0xe0, 0x68, 0x19, 0xc6, 0x11, + 0x77, 0x2b, 0x5f, 0xba, 0x1d, 0x58, 0x77, 0x98, 0x2c, 0x91, 0xb4, 0xd2, + 0xea, 0x1b, 0xdc, 0xe8, 0xfa, 0x82, 0xf3, 0x6e, 0xac, 0x88, 0x15, 0x16, + 0x1a, 0x53, 0xb3, 0x01, 0x94, 0x03, 0x47, 0x20, 0xdb, 0x71, 0xcb, 0x71, + 0xe8, 0x62, 0xad, 0x34, 0x2b, 0xa3, 0xa5, 0xe9, 0xa6, 0x82, 0x0e, 0x16, + 0x61, 0xbc, 0x29, 0x6b, 0xb1, 0x60, 0x67, 0x80, 0x9a, 0x9f, 0xc4, 0x82, + 0xf6, 0xb0, 0x7a, 0x16, 0x9c, 0x25, 0x04, 0xeb, 0xfd, 0xe0, 0x18, 0xd3, + 0xfc, 0xeb, 0xe1, 0x3c, 0x2b, 0x29, 0x7b, 0x32, 0x4e, 0xd3, 0x6d, 0xe1, + 0x27, 0xda, 0xc9, 0x14, 0x5c, 0x7f, 0xfa, 0x70, 0x41, 0x8e, 0xb4, 0xa3, + 0xde, 0x36, 0x92, 0x67, 0x97, 0xe2, 0xec, 0x85, 0x8b, 0x76, 0x08, 0x3c, + 0x32, 0x58, 0xd4, 0x7f, 0x6f, 0x91, 0x03, 0xdb, 0x19, 0x3e, 0xc4, 0x8b, + 0x3c, 0xb7, 0x75, 0x90, 0x71, 0x7a, 0x21, 0x9d, 0xa7, 0x77, 0xbf, 0xf5, + 0x92, 0x57, 0x46, 0x07, 0xa7, 0xbb, 0x0c, 0x42, 0xca, 0x4f, 0x5a, 0x27, + 0x45, 0x69, 0xfe, 0x6d, 0x78, 0x43, 0x77, 0xc4, 0xb4, 0x43, 0xff, 0x37, + 0x0d, 0xb7, 0xfa, 0xe9, 0x9e, 0x06, 0x70, 0x53, 0xfd, 0xf6, 0xa0, 0x28, + 0x84, 0x46, 0xcd, 0x61, 0xa2, 0x95, 0xc4, 0x1e, 0x6a, 0x13, 0xa1, 0x7f, + 0xaf, 0xe1, 0x73, 0x85, 0xb0, 0x53, 0x9c, 0x08, 0xb6, 0x1d, 0x4d, 0xb4, + 0x0b, 0xfb, 0x1f, 0x0c, 0x7b, 0x17, 0x06, 0x73, 0xa7, 0x22, 0x1f, 0xb0, + 0xd8, 0x45, 0x6e, 0xe5, 0xde, 0x48, 0xb7, 0x9f, 0x5a, 0xa8, 0xd1, 0xc3, + 0x04, 0xd1, 0x87, 0xec, 0x15, 0x3e, 0xd1, 0xc7, 0x57, 0x01, 0x46, 0x4b, + 0x28, 0xa8, 0x79, 0x5a, 0x7e, 0x0b, 0x56, 0x56, 0x28, 0xda, 0x35, 0xea, + 0x4c, 0x14, 0x81, 0xae, 0xc0, 0x0d, 0x12, 0xfe, 0x2d, 0xb7, 0x95, 0x4d, + 0xea, 0x78, 0xb6, 0x53, 0xcf, 0xac, 0x8a, 0xfc, 0xc9, 0x07, 0x9f, 0x93, + 0xf0, 0x11, 0x86, 0x13, 0xe9, 0xca, 0x3d, 0xce, 0xb1, 0xfd, 0x1a, 0x0a, + 0x8b, 0x11, 0x82, 0x94, 0x6a, 0xae, 0xc5, 0x80, 0x6a, 0x3b, 0xa8, 0x7c, + 0xb4, 0x53, 0x4e, 0xa9, 0x04, 0x1a, 0x4f, 0xb0, 0xb9, 0x95, 0x96, 0xa5, + 0xfd, 0xce, 0xdc, 0x57, 0x00, 0x48, 0x16, 0xe2, 0x40, 0xae, 0x04, 0xf5, + 0x83, 0x60, 0x23, 0xd9, 0x8e, 0x59, 0x56, 0x20, 0x50, 0x38, 0xc4, 0xde, + 0x88, 0x9f, 0x91, 0x06, 0xdb, 0x8f, 0x84, 0xa2, 0xaf, 0x61, 0xdd, 0x48, + 0x03, 0x4f, 0xc4, 0xb8, 0xed, 0x12, 0xd2, 0x74, 0x08, 0xb9, 0x51, 0x63, + 0xb5, 0xfe, 0x09, 0x7f, 0x7b, 0x8c, 0x5e, 0xd7, 0x27, 0xe5, 0x79, 0xe6, + 0x33, 0x60, 0x54, 0xe1, 0x21, 0xda, 0xca, 0x8b, 0x81, 0xdf, 0xb6, 0xa7, + 0x2e, 0x9d, 0x0f, 0xfc, 0x05, 0x80, 0x67, 0xcb, 0xc5, 0xdf, 0xc7, 0x13, + 0xee, 0xb5, 0x40, 0x8e, 0xa7, 0x0c, 0xcb, 0xf2, 0x45, 0x15, 0x29, 0xb1, + 0xb8, 0x02, 0x23, 0x61, 0x38, 0xf1, 0x16, 0xa1, 0x0c, 0xa1, 0xc9, 0x40, + 0x8c, 0xd0, 0x48, 0x4b, 0xce, 0x9c, 0x1e, 0x53, 0x40, 0x44, 0xf6, 0x17, + 0x16, 0xc6, 0x5c, 0xb0, 0x2a, 0x29, 0x59, 0x87, 0x67, 0x85, 0xa7, 0x81, + 0x84, 0xe9, 0x4f, 0xe5, 0x4e, 0x13, 0x5a, 0x11, 0xa1, 0x24, 0x62, 0xe9, + 0x7a, 0xea, 0x51, 0xaa, 0x45, 0xf3, 0x1d, 0x2a, 0xaf, 0x01, 0x28, 0x35, + 0xda, 0xb4, 0xe7, 0xab, 0xc1, 0xb9, 0x3c, 0x45, 0xa2, 0x0b, 0x5d, 0x40, + 0x09, 0xac, 0x62, 0x16, 0xd3, 0x1f, 0x9f, 0xc7, 0x1a, 0x56, 0xb7, 0x27, + 0xd1, 0x1b, 0xe1, 0xb5, 0x82, 0x9e, 0xe8, 0xd3, 0x5c, 0x0f, 0xe8, 0x87, + 0x61, 0xc6, 0x20, 0xb7, 0x31, 0x3f, 0x0d, 0xb3, 0x0a, 0x5a, 0xce, 0x06, + 0xa5, 0xe9, 0xfd, 0xf3, 0x29, 0x1a, 0xcd, 0x86, 0x0e, 0x31, 0x29, 0xaa, + 0xb7, 0x32, 0xf1, 0x10, 0x4e, 0x92, 0x12, 0x00, 0xc0, 0xac, 0x50, 0x4b, + 0x52, 0x59, 0x51, 0x7c, 0xa8, 0x0c, 0xf7, 0xcb, 0x16, 0x73, 0x7b, 0x90, + 0xa8, 0x57, 0x79, 0xb4, 0x73, 0x53, 0xd7, 0xed, 0xba, 0x46, 0xc5, 0x06, + 0x53, 0x02, 0xc7, 0x58, 0x4c, 0x09, 0x0c, 0xa5, 0x01, 0x13, 0x18, 0x39, + 0x4b, 0x4e, 0xc2, 0x0d, 0xd6, 0xdf, 0xaa, 0x7e, 0x46, 0xba, 0x6e, 0xcc, + 0x25, 0x42, 0xd0, 0xb3, 0x31, 0xdc, 0xdf, 0x7d, 0xf1, 0xc3, 0x73, 0xca, + 0x7a, 0xf6, 0xcb, 0x23, 0x81, 0x8d, 0xbe, 0x0b, 0xf2, 0x79, 0x8d, 0x14, + 0xa4, 0xc8, 0x36, 0x18, 0x49, 0xc8, 0x0d, 0xd7, 0xc9, 0xdd, 0x35, 0xeb, + 0xec, 0x52, 0x56, 0xae, 0xf2, 0xd2, 0x51, 0x91, 0x39, 0xbc, 0xb0, 0x49, + 0xb7, 0xf2, 0x1b, 0x64, 0x83, 0x5a, 0xa6, 0x97, 0xc2, 0x15, 0x95, 0xdc, + 0x11, 0xd2, 0x89, 0xc0, 0x6a, 0xb1, 0x44, 0x43, 0x38, 0xb6, 0x54, 0x0f, + 0xdc, 0xcb, 0xed, 0x26, 0x27, 0xd9, 0x46, 0x56, 0x4e, 0x6a, 0x54, 0x74, + 0x0f, 0x45, 0xfc, 0xb6, 0x93, 0xab, 0x3c, 0xd1, 0x86, 0x51, 0xaf, 0xa9, + 0x4a, 0xc0, 0x9c, 0x78, 0xc1, 0xb1, 0xc7, 0xf1, 0x9c, 0xd1, 0xd0, 0x32, + 0x4e, 0x4b, 0x02, 0x36, 0x68, 0x38, 0x88, 0x56, 0xc0, 0x2b, 0x12, 0x05, + 0x3b, 0xb9, 0xf6, 0xa2, 0x37, 0xe7, 0xbc, 0x81, 0xf9, 0x75, 0x51, 0x27, + 0x56, 0x0d, 0x55, 0xd1, 0x6a, 0xe0, 0xcf, 0x87, 0x0a, 0x44, 0xc6, 0x57, + 0xe1, 0x1b, 0xc0, 0x2c, 0xcf, 0xab, 0x77, 0xe9, 0x14, 0xf5, 0x34, 0x89, + 0xfb, 0xc9, 0xf2, 0x87, 0x5c, 0x75, 0xba, 0x51, 0x9a, 0x49, 0xe9, 0x23, + 0x23, 0xf4, 0xc9, 0xd1, 0x2f, 0x87, 0xf6, 0x75, 0x38, 0x97, 0x48, 0xb8, + 0x30, 0x46, 0x1d, 0x46, 0x65, 0x03, 0x10, 0xcf, 0xfb, 0x36, 0xf2, 0xb1, + 0xaf, 0x31, 0x02, 0x7b, 0x74, 0xfe, 0x9f, 0x8c, 0x73, 0x04, 0xfd, 0xb5, + 0xae, 0x2e, 0x27, 0x9c, 0xd8, 0x73, 0xbc, 0xc3, 0x4a, 0x76, 0x93, 0x66, + 0xf6, 0xb7, 0x90, 0xc4, 0x42, 0x3d, 0xcd, 0xb5, 0xf1, 0x75, 0xbf, 0xb7, + 0xdd, 0x8e, 0xb7, 0xcd, 0x90, 0x35, 0xf5, 0x95, 0x3d, 0xe4, 0x4e, 0xb0, + 0x7c, 0x5f, 0xad, 0xff, 0x75, 0x38, 0xc4, 0xc7, 0xed, 0xec, 0x70, 0xcc, + 0x9f, 0xf9, 0x77, 0xa1, 0x00, 0x2f, 0xf1, 0xa2, 0xc9, 0x74, 0xdc, 0x18, + 0x14, 0xd0, 0x2f, 0x86, 0x66, 0xa7, 0x5b, 0x39, 0x5c, 0xba, 0x0e, 0x77, + 0x16, 0x04, 0xc3, 0x02, 0x42, 0x3b, 0x66, 0x29, 0xee, 0x65, 0x00, 0xd4, + 0x22, 0x5a, 0x77, 0x74, 0xd4, 0xc3, 0xf3, 0x00, 0xdf, 0x6b, 0xc3, 0x15, + 0x89, 0x0e, 0xb1, 0xbc, 0xac, 0xe8, 0x44, 0x2f, 0x80, 0x34, 0x34, 0x8b, + 0x0c, 0x48, 0x45, 0xc2, 0x6a, 0xa3, 0x67, 0xd7, 0x3d, 0x36, 0xf3, 0x3f, + 0xe5, 0xf0, 0x5b, 0xe8, 0xad, 0x41, 0xd5, 0x82, 0xc1, 0x28, 0xab, 0x77, + 0xe8, 0x7f, 0xb3, 0xf6, 0xd2, 0x0c, 0xe4, 0x03, 0xcf, 0xe4, 0x72, 0xdb, + 0x7b, 0x81, 0xf4, 0xf3, 0x48, 0x74, 0xe1, 0x91, 0xb8, 0xf8, 0x4c, 0x2c, + 0x60, 0x99, 0x3e, 0x1e, 0x4f, 0xaf, 0x12, 0xab, 0x52, 0xef, 0xc7, 0x60, + 0xd2, 0xfe, 0x62, 0x55, 0xc8, 0x18, 0xad, 0x60, 0xa7, 0x5d, 0xde, 0x4d, + 0xfc, 0x6d, 0xe1, 0x10, 0x7c, 0xf9, 0xa2, 0x64, 0x00, 0x16, 0x1f, 0x44, + 0x7c, 0xe2, 0x72, 0x37, 0xd9, 0x92, 0xad, 0xfc, 0x62, 0x53, 0xbe, 0xb6, + 0xe0, 0xc8, 0xe0, 0xa2, 0xef, 0x22, 0x4b, 0x70, 0x3a, 0x4f, 0xc9, 0xed, + 0x6b, 0xbc, 0x17, 0x0a, 0xcf, 0x6a, 0x2c, 0xd3, 0xd2, 0x6b, 0x02, 0x45, + 0xfa, 0x9e, 0xc2, 0x21, 0x28, 0xfc, 0x07, 0x68, 0xd6, 0xb8, 0x9f, 0x2a, + 0x0b, 0x7a, 0x0e, 0xbc, 0x4e, 0xee, 0x84, 0x38, 0xe4, 0x8e, 0x70, 0xc3, + 0xc4, 0xad, 0x74, 0x87, 0x2d, 0x16, 0x4f, 0xa1, 0xf8, 0x20, 0xf5, 0xde, + 0xa3, 0xc5, 0x0c, 0x3b, 0xde, 0x44, 0x48, 0x0f, 0x3c, 0xdc, 0x7e, 0x10, + 0x8b, 0x87, 0xc4, 0x3b, 0xb0, 0x95, 0xbf, 0x61, 0x1e, 0xad, 0x07, 0x52, + 0xfd, 0x0b, 0x84, 0xa9, 0x46, 0xb0, 0x32, 0xd5, 0x22, 0x80, 0x35, 0x26, + 0x41, 0xf8, 0x11, 0x72, 0xb1, 0x31, 0x6f, 0x5a, 0x75, 0xcc, 0x67, 0xe0, + 0xb2, 0x50, 0x89, 0xb2, 0x66, 0x6e, 0xee, 0xa0, 0x41, 0x8d, 0x00, 0x2a, + 0xa7, 0x9d, 0xa5, 0x11, 0x2b, 0x07, 0x95, 0x3a, 0x55, 0x8c, 0x67, 0xb1, + 0xe5, 0x2d, 0xd4, 0xd1, 0x3e, 0x29, 0xed, 0xa5, 0x59, 0x97, 0x7b, 0xdf, + 0x92, 0x10, 0x0b, 0x04, 0x89, 0x27, 0xa0, 0xa2, 0x93, 0x18, 0x7f, 0x47, + 0x84, 0x1c, 0xc6, 0xd6, 0x8f, 0x73, 0x81, 0xa0, 0xfa, 0xe5, 0x3e, 0xd8, + 0xbf, 0x56, 0x1a, 0x76, 0xf4, 0xc4, 0x0f, 0x7a, 0x29, 0x9d, 0x32, 0x5d, + 0x41, 0xe0, 0x07, 0xb9, 0xd3, 0x3f, 0x7e, 0xff, 0x90, 0x89, 0xce, 0xdc, + 0xf1, 0x1d, 0x54, 0xb6, 0x67, 0x7f, 0x4d, 0x71, 0x9a, 0x4a, 0x5f, 0x80, + 0x0d, 0x5c, 0x77, 0xd5, 0x50, 0x7c, 0x41, 0x56, 0x7e, 0x99, 0x0a, 0xeb, + 0x66, 0x1f, 0xd2, 0x55, 0xc3, 0xc6, 0x6c, 0xc5, 0xfc, 0x34, 0x40, 0x2c, + 0x05, 0x29, 0x05, 0x7c, 0xca, 0xe6, 0x8d, 0xd3, 0xb0, 0xca, 0x84, 0x27, + 0x50, 0x7c, 0x6b, 0x17, 0x1b, 0x22, 0xe4, 0x7f, 0xe6, 0x44, 0x94, 0x06, + 0x4b, 0xb3, 0xb7, 0xbb, 0x98, 0x81, 0x44, 0x0b, 0xf5, 0x66, 0xcb, 0xad, + 0xf2, 0x9a, 0xe1, 0x47, 0xf3, 0x97, 0xa9, 0xb2, 0xc2, 0xca, 0xcd, 0x98, + 0x78, 0x60, 0xdc, 0x6e, 0x87, 0x55, 0x47, 0xf3, 0xae, 0x84, 0xdd, 0x9a, + 0xe9, 0x1a, 0x63, 0x83, 0xea, 0x23, 0x09, 0x67, 0x34, 0x83, 0x00, 0x6e, + 0x5e, 0x58, 0xb8, 0x89, 0x04, 0x08, 0x0a, 0x55, 0x9e, 0x78, 0xc9, 0xff, + 0xb9, 0xb5, 0x2c, 0xdd, 0x3b, 0x0c, 0x58, 0x07, 0x8b, 0xb4, 0x6a, 0xc4, + 0x64, 0xa3, 0x5e, 0x5b, 0xfe, 0x4d, 0xd0, 0x74, 0x01, 0x1b, 0xdf, 0x10, + 0x45, 0x2b, 0xd6, 0x9e, 0xa9, 0x60, 0x1f, 0xad, 0x46, 0xa1, 0x8c, 0xf8, + 0xf6, 0xa9, 0x8a, 0x27, 0xea, 0x51, 0x37, 0x84, 0xcf, 0xe5, 0xd7, 0x51, + 0xd6, 0x40, 0x39, 0x39, 0x5f, 0xf6, 0x96, 0x33, 0xd9, 0x86, 0x8d, 0x38, + 0xb6, 0x26, 0x04, 0x14, 0x07, 0x46, 0x3e, 0xd0, 0xc5, 0xf6, 0x0d, 0xa0, + 0x47, 0x2b, 0xc8, 0x73, 0x18, 0x6b, 0xd3, 0x0e, 0x18, 0xcc, 0x43, 0x98, + 0xd0, 0xcf, 0x1c, 0xe4, 0x4a, 0x41, 0x6a, 0x56, 0x2d, 0xf0, 0x93, 0x89, + 0x81, 0x6c, 0xce, 0x04, 0x1a, 0x23, 0x05, 0x91, 0x4f, 0x48, 0x44, 0x3a, + 0xaa, 0x03, 0xa5, 0x4a, 0xa9, 0x20, 0x2c, 0xbe, 0x6a, 0x81, 0xe6, 0xa9, + 0xf8, 0xf0, 0x2b, 0x29, 0xa1, 0xe0, 0xc4, 0xce, 0xf5, 0xda, 0x25, 0x70, + 0x49, 0xcc, 0xa0, 0x4b, 0x24, 0x49, 0x4f, 0x11, 0xc4, 0x3b, 0x22, 0x89, + 0x9a, 0xb4, 0xf4, 0xcd, 0xa3, 0xee, 0xb0, 0x76, 0x13, 0xc4, 0xbb, 0xaf, + 0x03, 0x7f, 0x27, 0xf3, 0x38, 0xbc, 0xde, 0x7c, 0x0c, 0x39, 0x14, 0xb7, + 0x14, 0xbb, 0x5c, 0xae, 0x89, 0xf8, 0xf7, 0xd6, 0x00, 0x78, 0xf4, 0xb0, + 0x52, 0x16, 0xf5, 0x54, 0xc5, 0x93, 0xf7, 0x6d, 0x0d, 0xe8, 0x58, 0xe2, + 0xa1, 0xa7, 0xdc, 0x49, 0xdb, 0xc8, 0x79, 0xbc, 0xc3, 0x97, 0x7b, 0x6c, + 0x82, 0x7b, 0xbe, 0xe9, 0x79, 0xac, 0x4a, 0xa4, 0x7c, 0x49, 0x83, 0x58, + 0x3a, 0xe4, 0xf5, 0x68, 0x5c, 0xb7, 0x7f, 0x2d, 0xfe, 0x6b, 0x96, 0xc7, + 0x8b, 0x67, 0xb5, 0xd0, 0xa1, 0x0a, 0x16, 0x62, 0x64, 0x53, 0xea, 0x29, + 0x80, 0x93, 0xf9, 0xd6, 0xa0, 0xc5, 0x1b, 0x3a, 0x1e, 0xab, 0x51, 0x88, + 0xe0, 0x9e, 0xd4, 0xf6, 0xbf, 0x70, 0x2d, 0x29, 0x2e, 0x08, 0xa9, 0x31, + 0x78, 0x0a, 0x15, 0x30, 0x9f, 0x2e, 0xc8, 0x41, 0x65, 0x8e, 0x97, 0x51, + 0x5e, 0x73, 0x46, 0x42, 0x74, 0x84, 0xfd, 0x9b, 0x4a, 0x8a, 0x68, 0x28, + 0x45, 0xd0, 0x5d, 0x65, 0x08, 0xb3, 0xf5, 0x40, 0x8a, 0x29, 0x8e, 0x70, + 0x02, 0x49, 0x6a, 0x01, 0xd6, 0x41, 0x4a, 0xf8, 0x15, 0xa3, 0x70, 0x59, + 0xe9, 0xa2, 0xe2, 0x76, 0x8c, 0x60, 0x33, 0xb3, 0xfa, 0x8b, 0xb4, 0x90, + 0x6f, 0x92, 0xc8, 0x21, 0x59, 0xc0, 0x3a, 0x30, 0x46, 0xeb, 0x49, 0xd8, + 0x85, 0x63, 0x5a, 0x23, 0x87, 0xe1, 0xa7, 0xc0, 0x1a, 0xb0, 0xc7, 0xc4, + 0x40, 0x4d, 0x11, 0x9c, 0xe3, 0xd4, 0x6b, 0xef, 0x68, 0xc8, 0x2c, 0x31, + 0xcd, 0x3e, 0xee, 0x55, 0x10, 0x67, 0x77, 0x7b, 0x30, 0xc1, 0xd0, 0x23, + 0x6c, 0x65, 0x6f, 0xfb, 0x2e, 0x62, 0x33, 0x42, 0x63, 0xdc, 0xca, 0x86, + 0xf1, 0x0e, 0xb3, 0xb0, 0x69, 0x11, 0x65, 0xe1, 0x6e, 0x6c, 0x03, 0x49, + 0x79, 0xe8, 0xf1, 0x2e, 0x8d, 0x94, 0xc8, 0xa8, 0x98, 0x2d, 0x3f, 0xfe, + 0xbd, 0x2d, 0x75, 0x45, 0xd1, 0x7a, 0x09, 0xf8, 0x90, 0x49, 0xbd, 0x4a, + 0x3b, 0xa4, 0xa3, 0x26, 0xb8, 0x62, 0x66, 0x97, 0xd9, 0xc1, 0xca, 0x12, + 0x49, 0xe1, 0x27, 0x93, 0x4f, 0x60, 0xfa, 0xb3, 0x4f, 0x4c, 0xdb, 0x87, + 0x6c, 0x3b, 0x50, 0x47, 0xe2, 0xd8, 0x5b, 0x13, 0x99, 0xf0, 0x2b, 0xbb, + 0x32, 0x33, 0xfd, 0x7d, 0x15, 0x0f, 0x2c, 0xee, 0x85, 0x83, 0xc0, 0x53, + 0x79, 0x3e, 0x51, 0xfe, 0x7c, 0x06, 0x73, 0x49, 0x49, 0x4f, 0x5a, 0x22, + 0x36, 0x8f, 0x30, 0x8a, 0xef, 0x84, 0xd6, 0x15, 0x26, 0x48, 0xe7, 0x1e, + 0xb1, 0xaa, 0x82, 0xd0, 0xc7, 0x0b, 0x97, 0x7b, 0x6c, 0x2d, 0x49, 0x7e, + 0x6d, 0xe7, 0xa3, 0x05, 0x80, 0xd7, 0x42, 0xa9, 0xc6, 0x66, 0x98, 0x30, + 0xe3, 0x8a, 0x79, 0x86, 0x9c, 0x2b, 0xbc, 0x4a, 0xe6, 0x0d, 0xc5, 0xe5, + 0x1a, 0x92, 0xd9, 0xef, 0x63, 0x52, 0x03, 0x88, 0x36, 0xc5, 0x83, 0x65, + 0xf8, 0xf1, 0x87, 0xce, 0x43, 0xfe, 0x89, 0x58, 0x07, 0x6a, 0xad, 0x85, + 0x37, 0x0f, 0xdf, 0x9e, 0xa5, 0x62, 0xa9, 0xd2, 0x41, 0x3f, 0x7f, 0xb7, + 0xf1, 0xe2, 0x58, 0xb5, 0xda, 0xdf, 0xd1, 0xba, 0x36, 0x2c, 0xe7, 0x43, + 0x31, 0x07, 0xc5, 0xf5, 0x79, 0xc9, 0x31, 0xd7, 0x1d, 0x97, 0x57, 0x9a, + 0x8e, 0x3f, 0xac, 0x00, 0x49, 0x00, 0x2f, 0xad, 0xac, 0xe7, 0x65, 0x7c, + 0xbf, 0xec, 0x85, 0x57, 0xe6, 0xcc, 0x07, 0x34, 0x02, 0x36, 0xa8, 0x6a, + 0x9f, 0x3a, 0x9a, 0x2f, 0x34, 0x93, 0x1f, 0x7d, 0x38, 0x54, 0xe3, 0x54, + 0x54, 0xee, 0x84, 0x55, 0xe1, 0x0d, 0xc1, 0x08, 0x3e, 0x33, 0x9e, 0x2a, + 0xc3, 0x6a, 0x83, 0xc4, 0x75, 0xed, 0xbc, 0x5f, 0xd9, 0x04, 0xd7, 0x77, + 0x91, 0xb1, 0xa0, 0xf2, 0xef, 0x81, 0xb0, 0x8b, 0x53, 0x5f, 0x71, 0xec, + 0xa5, 0x0b, 0xbe, 0xf2, 0x92, 0x7e, 0x0a, 0x34, 0xeb, 0x5d, 0x65, 0xc7, + 0xa9, 0x44, 0x10, 0xfb, 0xd3, 0xef, 0xe1, 0xbc, 0x06, 0x65, 0x68, 0x22, + 0xfb, 0x43, 0x2c, 0xcf, 0x8e, 0x6a, 0x28, 0xdb, 0x0b, 0xf4, 0xaf, 0x01, + 0x65, 0x97, 0xd6, 0xe5, 0x91, 0x20, 0x13, 0x2c, 0xb1, 0xc2, 0xd3, 0xc3, + 0x76, 0x90, 0xf8, 0xcd, 0x00, 0xde, 0x93, 0xf8, 0x4e, 0xcc, 0xdc, 0xca, + 0x9a, 0xf0, 0xbd, 0x9b, 0xd6, 0x57, 0xb1, 0x13, 0xd9, 0xe0, 0xe1, 0x9e, + 0x21, 0x74, 0xa9, 0x76, 0xc0, 0x0c, 0xad, 0x4f, 0x5d, 0xfe, 0x23, 0x32, + 0x5a, 0x10, 0x75, 0x5b, 0x05, 0xdf, 0xdc, 0x5b, 0x94, 0xcb, 0xe1, 0x9f, + 0x13, 0x51, 0xf5, 0x50, 0x36, 0x3b, 0xf2, 0x90, 0x9c, 0x9a, 0xc8, 0x10, + 0x88, 0xa9, 0xec, 0x22, 0x1e, 0x96, 0x70, 0xe8, 0x9e, 0x69, 0xc1, 0x22, + 0xd9, 0x14, 0x15, 0x2e, 0xbc, 0x03, 0x96, 0x9e, 0x1d, 0x00, 0x10, 0x16, + 0x4f, 0x56, 0xf0, 0x29, 0x47, 0x0a, 0x45, 0x34, 0x27, 0x21, 0x3b, 0x67, + 0x33, 0xf9, 0xdd, 0x29, 0x3a, 0xf2, 0xe4, 0x56, 0x34, 0x46, 0xbe, 0xd8, + 0x42, 0x29, 0x11, 0x7f, 0x30, 0xc1, 0xbe, 0xa5, 0xc8, 0x9d, 0x7b, 0x2e, + 0x4e, 0xcf, 0xba, 0x91, 0xb4, 0xbf, 0x0a, 0x04, 0x00, 0x49, 0x83, 0x6b, + 0x46, 0x5f, 0x3b, 0xfa, 0xf7, 0x40, 0x8d, 0x85, 0x47, 0x14, 0x58, 0xb3, + 0xa5, 0x66, 0x30, 0xfd, 0x4a, 0x80, 0xa4, 0x61, 0x3b, 0x7c, 0xb4, 0xcc, + 0x34, 0x8c, 0xc6, 0xb6, 0x10, 0xa9, 0x76, 0xc9, 0x11, 0xd7, 0x8a, 0x51, + 0x86, 0x17, 0x89, 0x28, 0xab, 0xd5, 0x03, 0x88, 0x74, 0x5b, 0x81, 0xbd, + 0x3a, 0x57, 0xfe, 0x66, 0x25, 0xd0, 0x92, 0x15, 0x84, 0x02, 0x0f, 0x51, + 0xa8, 0x58, 0xcf, 0x77, 0x65, 0x10, 0x61, 0xe8, 0xe6, 0xab, 0xb1, 0xba, + 0x3b, 0x08, 0xd6, 0xba, 0x5f, 0xf5, 0x74, 0xc5, 0x07, 0x60, 0xfd, 0xd3, + 0xc8, 0x52, 0x4e, 0xdb, 0xc3, 0xe3, 0x6d, 0x81, 0x20, 0x51, 0x01, 0x9a, + 0x5e, 0x32, 0x4e, 0x80, 0x5a, 0xcb, 0x83, 0xd7, 0xa4, 0xd9, 0xfb, 0xed, + 0x3d, 0x80, 0xa1, 0x83, 0x81, 0x91, 0xc0, 0x0b, 0xff, 0x67, 0xd8, 0x8b, + 0xd0, 0x12, 0x0b, 0xd4, 0x2b, 0x8e, 0x0d, 0x0f, 0xfc, 0xc7, 0xb3, 0xf1, + 0xe3, 0xf3, 0x5e, 0x0c, 0xb6, 0x6b, 0x9d, 0xdc, 0x22, 0x70, 0x31, 0x54, + 0xe8, 0x41, 0xfe, 0xa1, 0xe1, 0x4f, 0xfa, 0x81, 0xfb, 0xae, 0x72, 0x16, + 0xb8, 0x87, 0xc9, 0x31, 0x9d, 0x42, 0x47, 0x4a, 0x20, 0xae, 0x63, 0x16, + 0x0d, 0xfa, 0xf1, 0x27, 0x19, 0x47, 0xee, 0x45, 0x84, 0x29, 0x9a, 0xb6, + 0x42, 0xef, 0xbd, 0x15, 0xa8, 0x34, 0x33, 0x38, 0x9c, 0x9d, 0xbb, 0x5c, + 0x03, 0xf3, 0xcf, 0xcf, 0x6d, 0x2e, 0xd5, 0x88, 0xf8, 0xdd, 0xfc, 0xc0, + 0x4a, 0xdb, 0x69, 0xd9, 0x62, 0x89, 0x24, 0x46, 0xee, 0xa4, 0xb9, 0x95, + 0xe6, 0xaf, 0x7d, 0x53, 0xec, 0x41, 0xae, 0x70, 0xfe, 0x4f, 0x31, 0xe3, + 0xa2, 0x59, 0x2c, 0xa1, 0x53, 0x8b, 0xb6, 0x3b, 0x39, 0xc1, 0xa4, 0xa7, + 0x9e, 0xaa, 0x00, 0x60, 0x9a, 0x5f, 0x56, 0x51, 0xf3, 0x7b, 0x28, 0x84, + 0x36, 0x1a, 0xc1, 0x2d, 0xc8, 0xed, 0xf8, 0x48, 0x48, 0x1d, 0x39, 0x4d, + 0x3d, 0xce, 0x30, 0x90, 0x29, 0x33, 0x6f, 0x9a, 0xce, 0x58, 0xe7, 0x88, + 0xac, 0x59, 0xce, 0x85, 0x5a, 0x52, 0x2b, 0x6c, 0xb7, 0xe9, 0x2e, 0xa9, + 0xd9, 0x9a, 0xea, 0x1c, 0x47, 0xb2, 0x59, 0xff, 0x73, 0x76, 0x21, 0x40, + 0xe1, 0xde, 0x32, 0xb8, 0x73, 0x3d, 0xa5, 0x44, 0x66, 0x79, 0xa1, 0xfe, + 0xaf, 0xf6, 0x8a, 0x97, 0x09, 0x5c, 0x8b, 0x64, 0x38, 0x9f, 0xe1, 0x59, + 0x38, 0x18, 0xe9, 0xc0, 0xd6, 0xa2, 0xac, 0x74, 0xa9, 0xfd, 0x4a, 0x0d, + 0xf6, 0x47, 0x00, 0x2b, 0x09, 0x46, 0x38, 0x1c, 0xa4, 0x9f, 0x63, 0x20, + 0x18, 0x75, 0x5a, 0xb8, 0xc4, 0xbc, 0xd6, 0x6b, 0xc8, 0x14, 0x72, 0x03, + 0xe4, 0x05, 0xd4, 0x4e, 0x66, 0x20, 0x42, 0xa2, 0x8f, 0x96, 0xe7, 0xaf, + 0xd3, 0xfb, 0xa8, 0x88, 0x9b, 0xe3, 0xaa, 0xcd, 0xab, 0xce, 0x8f, 0x07, + 0x6d, 0xef, 0x98, 0xce, 0xdb, 0x42, 0x5b, 0xf4, 0x61, 0x57, 0x62, 0x27, + 0x8a, 0x53, 0x5e, 0xf8, 0x3e, 0xf6, 0x7f, 0xde, 0x5e, 0x3b, 0x1b, 0x13, + 0x2e, 0x30, 0x46, 0x4b, 0x6b, 0xb7, 0xbb, 0x33, 0x31, 0xc0, 0xfa, 0x40, + 0xab, 0x68, 0x72, 0xe3, 0x92, 0x30, 0x47, 0xd6, 0x30, 0x60, 0x42, 0x5b, + 0x88, 0x8d, 0xa6, 0x56, 0xe4, 0xac, 0x33, 0x2e, 0xca, 0x05, 0x1f, 0x60, + 0xaf, 0xde, 0x7f, 0xa9, 0xda, 0x3f, 0xa8, 0x21, 0xf6, 0xfc, 0x98, 0x7d, + 0xc4, 0x1e, 0xb0, 0xa9, 0x56, 0x2d, 0x8d, 0xea, 0x03, 0x51, 0x48, 0xac, + 0xe8, 0x22, 0xc7, 0x8b, 0xef, 0x91, 0x0e, 0xcf, 0x0c, 0xe9, 0x38, 0x43, + 0x99, 0xa8, 0x98, 0x4f, 0xfa, 0xe3, 0x03, 0xa6, 0x4f, 0xd4, 0x0d, 0x98, + 0x5b, 0x50, 0x28, 0xd7, 0xe7, 0x46, 0xd7, 0xad, 0x43, 0xb8, 0x56, 0x2a, + 0x2f, 0x7c, 0x39, 0x67, 0xf4, 0x62, 0x0e, 0xc0, 0xa8, 0x87, 0xb5, 0x81, + 0xe2, 0x13, 0x9f, 0xe4, 0xdd, 0x72, 0xf2, 0x07, 0xca, 0xac, 0x6d, 0xb2, + 0x96, 0x53, 0x5a, 0x8f, 0x66, 0x3c, 0xb4, 0xc1, 0x4f, 0x9a, 0x82, 0x55, + 0xcf, 0x0e, 0x27, 0x5f, 0xc7, 0xd2, 0x28, 0x27, 0x7f, 0x22, 0x6e, 0xa5, + 0xe7, 0x32, 0x56, 0x51, 0x18, 0xe0, 0x85, 0x6d, 0x1f, 0xfc, 0x25, 0x08, + 0x18, 0x60, 0x57, 0xfc, 0x66, 0x94, 0x2c, 0x4c, 0xbe, 0x00, 0xab, 0x9e, + 0x73, 0x9b, 0x06, 0xd3, 0xb5, 0x24, 0xa8, 0x8f, 0xb1, 0x33, 0x99, 0x4c, + 0xb4, 0x13, 0x07, 0xcd, 0x04, 0xdd, 0x77, 0xdc, 0xee, 0x96, 0x02, 0x59, + 0xe8, 0x22, 0x07, 0x16, 0x2e, 0x41, 0xc9, 0xc4, 0x59, 0x70, 0x37, 0x0f, + 0x14, 0xc9, 0xcf, 0x90, 0x57, 0xc2, 0x0d, 0xa3, 0xd7, 0x66, 0xb6, 0x7d, + 0x10, 0xd4, 0xfc, 0x18, 0x66, 0xad, 0xea, 0x5e, 0x64, 0x6c, 0x12, 0x66, + 0x3d, 0x96, 0xa5, 0xa8, 0x9c, 0x49, 0x5c, 0xd4, 0x8d, 0x1c, 0xc3, 0x38, + 0xfe, 0x53, 0xc2, 0x71, 0xd1, 0xc6, 0x41, 0xe2, 0xb9, 0x17, 0x74, 0x6e, + 0xcc, 0xf8, 0x72, 0x28, 0x38, 0x4e, 0x54, 0x9b, 0x0e, 0xa3, 0x3a, 0x43, + 0x5c, 0xd5, 0x83, 0x06, 0xbb, 0x46, 0x16, 0x6e, 0xe3, 0x8a, 0xd5, 0x1e, + 0x7f, 0x88, 0x62, 0xac, 0x35, 0x89, 0xfb, 0xbe, 0x96, 0x1d, 0x87, 0x37, + 0xb7, 0x91, 0x63, 0xae, 0x77, 0x7b, 0x66, 0x60, 0xc1, 0x3e, 0x80, 0x56, + 0xb1, 0xc8, 0x0d, 0x16, 0xde, 0x38, 0x82, 0x66, 0x99, 0x2b, 0x35, 0xd8, + 0xb4, 0x5b, 0x4b, 0x3e, 0x93, 0x96, 0x59, 0xf8, 0x96, 0x7e, 0x7b, 0x27, + 0xf4, 0x62, 0xb7, 0xda, 0x89, 0xa7, 0x34, 0x47, 0xed, 0xb3, 0x42, 0x20, + 0xeb, 0xcd, 0xf6, 0xa3, 0x9f, 0xf7, 0x48, 0x91, 0x17, 0xd2, 0x21, 0xed, + 0x5a, 0x22, 0x39, 0xc9, 0x76, 0x95, 0x36, 0xd9, 0x97, 0x0f, 0x19, 0xce, + 0xd3, 0xbc, 0x74, 0x7d, 0x53, 0x37, 0x3b, 0x4a, 0x97, 0xb7, 0xf8, 0x7e, + 0xdd, 0x4c, 0x5f, 0xae, 0x5c, 0x0b, 0xab, 0x4c, 0x34, 0xa1, 0x7e, 0x34, + 0x35, 0xf4, 0xfc, 0x92, 0xab, 0x2e, 0x6a, 0x15, 0xce, 0x84, 0xae, 0x70, + 0xae, 0x85, 0x21, 0xe6, 0x41, 0x13, 0x31, 0xe0, 0x8f, 0xab, 0x82, 0xe3, + 0x09, 0xaf, 0xa4, 0x7c, 0xb4, 0xb9, 0xb7, 0xc0, 0x67, 0x08, 0xc9, 0x9d, + 0xcd, 0x0b, 0x3c, 0xa0, 0x0c, 0xde, 0x49, 0x2f, 0x40, 0x19, 0x95, 0x64, + 0xb9, 0x7c, 0x2a, 0x72, 0xdd, 0xa2, 0x92, 0x0a, 0x21, 0xeb, 0x8c, 0xc3, + 0x6d, 0x52, 0xe7, 0x05, 0x50, 0x01, 0x55, 0x19, 0x2f, 0xbd, 0x1b, 0x72, + 0x73, 0xfe, 0x82, 0x9f, 0xbf, 0xa0, 0xfe, 0x19, 0x7c, 0x42, 0x6d, 0x76, + 0x32, 0x47, 0x36, 0x15, 0x2e, 0xde, 0xe8, 0xe6, 0xca, 0x07, 0xa3, 0x6b, + 0x40, 0x99, 0x96, 0xcd, 0x19, 0xea, 0x7e, 0xc9, 0x87, 0x9d, 0x3d, 0xa0, + 0x82, 0x88, 0xe7, 0xe4, 0x34, 0x9f, 0xa5, 0x27, 0xdf, 0xae, 0x03, 0x37, + 0xa8, 0x35, 0x64, 0x02, 0x09, 0x09, 0x9e, 0xec, 0x38, 0x0a, 0xff, 0x79, + 0x8c, 0x9a, 0x87, 0x66, 0xcd, 0xe4, 0xf4, 0x9d, 0xa9, 0x07, 0x96, 0x36, + 0xae, 0x2e, 0x4e, 0xc5, 0xe9, 0x86, 0xb2, 0x8e, 0x71, 0x5d, 0xe8, 0xee, + 0x84, 0xf3, 0x30, 0x2a, 0x58, 0x1a, 0x80, 0xb8, 0xaa, 0xb8, 0x1d, 0xc4, + 0xae, 0x59, 0x91, 0xf3, 0x16, 0x9b, 0xa3, 0x8a, 0xa3, 0x26, 0xb2, 0x0a, + 0xe5, 0x58, 0xb7, 0x96, 0x87, 0xfb, 0x00, 0xe4, 0x50, 0x7c, 0xb1, 0x77, + 0x3a, 0x18, 0xc2, 0xe3, 0xc1, 0x12, 0xa6, 0x0d, 0x06, 0xeb, 0x80, 0x6c, + 0x5a, 0xee, 0x34, 0xcc, 0x1c, 0x87, 0x35, 0x46, 0x1d, 0x05, 0x83, 0xd8, + 0x91, 0x22, 0xaa, 0xf6, 0xad, 0x87, 0xab, 0x76, 0x18, 0x79, 0xe2, 0x09, + 0xc3, 0xa3, 0x15, 0x67, 0x3a, 0x7c, 0x0f, 0xa0, 0x4c, 0x7b, 0xfc, 0xfc, + 0xdd, 0x5c, 0xe4, 0x86, 0x58, 0x13, 0xb8, 0x97, 0xae, 0x8c, 0x75, 0xc8, + 0x02, 0x1e, 0x33, 0x45, 0xa9, 0x54, 0x09, 0x15, 0x53, 0x4f, 0x28, 0x47, + 0x4d, 0x5f, 0xd0, 0xc7, 0x09, 0xbd, 0x93, 0xb0, 0x08, 0x79, 0x05, 0xbc, + 0xbc, 0xaf, 0x2c, 0xbd, 0xbb, 0x21, 0xd1, 0x60, 0xb8, 0x81, 0x4c, 0x6c, + 0x5e, 0x45, 0x39, 0xa3, 0x31, 0x54, 0xb7, 0x82, 0xef, 0x86, 0xe4, 0x5e, + 0xca, 0xd6, 0xb8, 0x31, 0xa2, 0x4c, 0x84, 0x5b, 0xac, 0xe5, 0x29, 0xbf, + 0xbf, 0x89, 0xb4, 0x4c, 0xd3, 0x69, 0x66, 0x50, 0xeb, 0xda, 0x7d, 0x00, + 0xbb, 0x45, 0x0f, 0xe1, 0xd1, 0x30, 0x1a, 0xc6, 0x94, 0x66, 0xdc, 0x01, + 0x75, 0xce, 0xf8, 0xfc, 0xd9, 0xce, 0xcf, 0x1f, 0x9e, 0x5a, 0x55, 0xa4, + 0x3e, 0xe6, 0x51, 0xc7, 0x74, 0x40, 0x82, 0x09, 0xea, 0xa0, 0xf5, 0xb2, + 0x70, 0x9f, 0x0e, 0xfb, 0x46, 0x8a, 0x69, 0xbf, 0x07, 0x92, 0xdc, 0x74, + 0x03, 0x70, 0xc6, 0x44, 0x81, 0x66, 0x40, 0xc7, 0xf5, 0xb8, 0xf0, 0x45, + 0x0f, 0xca, 0xd8, 0xb0, 0x9e, 0x48, 0x94, 0xff, 0x85, 0xcb, 0x7b, 0xec, + 0x67, 0x5d, 0xfe, 0xe9, 0x13, 0xd1, 0x67, 0x95, 0xd9, 0x35, 0x9e, 0x8a, + 0x53, 0x4d, 0x6b, 0x9d, 0x42, 0x53, 0xb1, 0x6b, 0x51, 0x1e, 0x35, 0x40, + 0x81, 0x92, 0x91, 0x5f, 0x1f, 0x8e, 0xbe, 0x37, 0xd3, 0x85, 0xab, 0x85, + 0x37, 0x1c, 0x0f, 0xae, 0xd9, 0xf7, 0xa2, 0x75, 0x3d, 0xd9, 0xd7, 0x2a, + 0x80, 0xb0, 0x4c, 0x14, 0x04, 0x40, 0xc5, 0xba, 0x0e, 0xbe, 0xab, 0xcc, + 0x38, 0x35, 0x62, 0x6c, 0xa5, 0xce, 0x49, 0x15, 0x2a, 0x10, 0xb5, 0x6a, + 0xd2, 0x3b, 0xd2, 0x6a, 0xad, 0x2e, 0x34, 0x46, 0x8b, 0x78, 0x57, 0x6e, + 0xc4, 0xde, 0x65, 0x68, 0x05, 0x8f, 0xd6, 0x6e, 0x34, 0xb9, 0xaa, 0x80, + 0x77, 0xff, 0x6c, 0x1a, 0x37, 0x87, 0xdd, 0x33, 0x13, 0x33, 0xa7, 0xa9, + 0x3a, 0x90, 0x32, 0x7b, 0x9b, 0x21, 0x31, 0xc8, 0xf5, 0x4c, 0xa6, 0x73, + 0x42, 0x79, 0x46, 0x14, 0x1b, 0xef, 0xf4, 0x78, 0xd9, 0x7e, 0x6f, 0x31, + 0xaa, 0x59, 0x97, 0x34, 0xe5, 0xe6, 0x67, 0xf3, 0x86, 0xf5, 0x61, 0xe7, + 0x51, 0x6d, 0xce, 0xb3, 0xdc, 0x86, 0xc7, 0x55, 0x43, 0xfa, 0x38, 0x78, + 0xb0, 0x8d, 0x03, 0x9c, 0xe4, 0x6c, 0xca, 0x73, 0x94, 0xa1, 0x0c, 0xb8, + 0x11, 0xda, 0x0c, 0x0b, 0x18, 0x1b, 0xd0, 0x99, 0xe7, 0xa9, 0x0d, 0xc3, + 0x36, 0xd7, 0x8c, 0x16, 0xad, 0x16, 0x1f, 0xb2, 0x3c, 0x07, 0x32, 0x11, + 0x6c, 0xd2, 0x8f, 0x33, 0x37, 0x5c, 0x3e, 0x4f, 0x7a, 0x76, 0xf7, 0x85, + 0xcc, 0x68, 0x1a, 0xf9, 0x26, 0x74, 0x42, 0xc9, 0xea, 0x21, 0x7e, 0x74, + 0x3c, 0x4f, 0xde, 0xfb, 0xd7, 0x83, 0x62, 0x12, 0xc7, 0x4f, 0xfc, 0x47, + 0x18, 0x9d, 0xc5, 0xf5, 0xe9, 0xd7, 0xaa, 0x76, 0x20, 0x99, 0x79, 0xae, + 0x9b, 0x7a, 0xde, 0x8b, 0x95, 0xc2, 0xa5, 0xa3, 0x6a, 0x30, 0x9b, 0x99, + 0x63, 0x34, 0x7c, 0xd1, 0x53, 0xa1, 0x6c, 0xd6, 0xed, 0x7d, 0x8c, 0xba, + 0xc8, 0x21, 0xf3, 0xe1, 0x31, 0x55, 0x3d, 0x88, 0x87, 0x04, 0xc7, 0xc9, + 0x65, 0x0c, 0x53, 0x1e, 0xd4, 0xd9, 0xaa, 0xda, 0xc2, 0x14, 0x88, 0xf2, + 0x07, 0x2c, 0x12, 0x4d, 0x79, 0x54, 0xaa, 0xd9, 0x47, 0x95, 0xf9, 0x7e, + 0x26, 0x89, 0x4b, 0x63, 0x7e, 0x44, 0x06, 0x0e, 0xe2, 0x8d, 0x9a, 0x0a, + 0xc3, 0xee, 0x55, 0x13, 0x55, 0x04, 0xcc, 0xb5, 0x2e, 0xa0, 0x0d, 0xec, + 0x76, 0x84, 0xc1, 0x1e, 0xdd, 0xe6, 0xfa, 0x54, 0x6e, 0x38, 0x30, 0x6f, + 0xcc, 0xa4, 0x8d, 0x76, 0x1e, 0xa3, 0x8e, 0x2c, 0x5e, 0x37, 0xeb, 0x0b, + 0xf4, 0xb5, 0x80, 0xde, 0x58, 0x13, 0x5a, 0x52, 0xdc, 0x65, 0x99, 0x1a, + 0x1b, 0x75, 0x0c, 0xbd, 0x83, 0xe8, 0x90, 0x8e, 0xa9, 0xbf, 0x42, 0x22, + 0xe1, 0x3a, 0x31, 0x4e, 0x54, 0xad, 0xd4, 0x6f, 0x80, 0xb4, 0xb5, 0x82, + 0x05, 0x20, 0xd7, 0x38, 0xd7, 0xeb, 0x25, 0x33, 0xe9, 0x4b, 0xc3, 0x5e, + 0xd1, 0x11, 0xb0, 0xd9, 0x8e, 0x90, 0x48, 0x2a, 0xe3, 0xa0, 0x60, 0x16, + 0x70, 0xe3, 0xd1, 0x45, 0x11, 0x64, 0x91, 0x69, 0x87, 0x1c, 0xbb, 0x91, + 0xc4, 0x43, 0x12, 0x62, 0x99, 0x69, 0xe5, 0x96, 0x01, 0x15, 0xdb, 0xdf, + 0x05, 0x55, 0x34, 0xbb, 0xd6, 0x76, 0x89, 0xcd, 0xb5, 0x4f, 0x2e, 0xa7, + 0x6e, 0x15, 0xc9, 0xc0, 0x8e, 0xa8, 0x63, 0x79, 0x12, 0xfb, 0x7e, 0x69, + 0x8f, 0x52, 0x5e, 0xe7, 0x76, 0x16, 0x28, 0x76, 0xca, 0xcb, 0xd8, 0x0e, + 0x4a, 0x93, 0x9d, 0x16, 0x68, 0x98, 0xf8, 0xc3, 0x39, 0xb2, 0x2d, 0xea, + 0xba, 0x72, 0x16, 0x33, 0xb7, 0xec, 0x61, 0x9e, 0x94, 0x32, 0x01, 0x22, + 0xde, 0x66, 0xfd, 0x68, 0xfa, 0xcf, 0xf2, 0x52, 0x4f, 0x02, 0xe8, 0x25, + 0xd3, 0xa3, 0x5b, 0x29, 0xae, 0xe9, 0x62, 0xfa, 0xd6, 0x1a, 0x50, 0x80, + 0x95, 0x96, 0xdf, 0x00, 0xfc, 0x23, 0xf1, 0x95, 0xef, 0xbb, 0xf5, 0x23, + 0x9d, 0x6b, 0xd6, 0xed, 0xb4, 0xe2, 0x4a, 0xf6, 0xb8, 0x20, 0x83, 0x6b, + 0x45, 0x92, 0x29, 0x5a, 0x02, 0xe9, 0xf7, 0x8e, 0x5c, 0x02, 0xde, 0xb4, + 0x9a, 0xdf, 0x18, 0x10, 0x17, 0x7f, 0xd8, 0x2e, 0x17, 0xc0, 0xf0, 0x6b, + 0x3b, 0x88, 0x09, 0x58, 0xf2, 0x18, 0x22, 0x09, 0x80, 0x4a, 0xe0, 0x51, + 0x6f, 0x7a, 0x70, 0x09, 0x1f, 0xe5, 0xfa, 0xa9, 0x4d, 0x24, 0x1f, 0x18, + 0x1c, 0x74, 0xcd, 0x87, 0x04, 0xfd, 0x85, 0x33, 0x4c, 0x28, 0xbd, 0xa3, + 0x66, 0x6c, 0x99, 0x7e, 0x50, 0x5e, 0xb5, 0x22, 0x33, 0x92, 0xd4, 0xd8, + 0x82, 0x4e, 0x38, 0xbe, 0xcb, 0x3d, 0x5f, 0x19, 0xd1, 0x0f, 0x8b, 0xa1, + 0x78, 0x08, 0x1c, 0x10, 0x0b, 0x77, 0xa7, 0x39, 0x2e, 0x91, 0x83, 0xee, + 0x1d, 0x36, 0xd8, 0x77, 0x87, 0x8a, 0x38, 0x45, 0x3c, 0xbd, 0xb9, 0x88, + 0xbb, 0x1b, 0x20, 0xd1, 0x95, 0xb9, 0x8f, 0x03, 0x46, 0xfa, 0xab, 0x70, + 0x68, 0x26, 0xd9, 0xb1, 0x25, 0x52, 0x5a, 0x77, 0x2d, 0x92, 0xc2, 0x1d, + 0xb6, 0x6e, 0xec, 0x67, 0xef, 0x34, 0xe2, 0x64, 0xb3, 0xa0, 0xae, 0x0c, + 0xd9, 0x36, 0xa1, 0xc7, 0xd8, 0xbf, 0x7a, 0x43, 0xbf, 0xc0, 0xc6, 0x90, + 0x60, 0x6a, 0x23, 0xc0, 0x6a, 0x5d, 0x62, 0x18, 0xac, 0xc1, 0x20, 0x35, + 0x17, 0xba, 0x4e, 0x54, 0xb7, 0xec, 0xd4, 0xad, 0x99, 0x94, 0xa4, 0xda, + 0x57, 0xe7, 0x46, 0xed, 0x47, 0xd1, 0xb4, 0xa2, 0x3e, 0x0f, 0x4a, 0xb6, + 0xa6, 0x68, 0x3e, 0x94, 0xb9, 0x18, 0x30, 0xe0, 0x75, 0x08, 0xe8, 0xf3, + 0x21, 0x79, 0x26, 0x68, 0x6a, 0x65, 0xb6, 0xbe, 0x03, 0x98, 0x8f, 0x04, + 0xad, 0x1e, 0xb0, 0x54, 0xd2, 0x28, 0xdd, 0x4a, 0xe9, 0xf3, 0xa0, 0x06, + 0xbf, 0x0b, 0x2a, 0xee, 0xf8, 0x03, 0x7e, 0x1d, 0x37, 0xc1, 0x32, 0xd1, + 0x41, 0xf4, 0x9b, 0xc5, 0x02, 0x10, 0x6f, 0x55, 0x5a, 0xec, 0x5b, 0xe7, + 0x61, 0x05, 0x17, 0xf0, 0xf8, 0xc6, 0x89, 0xe8, 0xad, 0x32, 0x57, 0x14, + 0xe5, 0xf8, 0xf5, 0x88, 0xd9, 0x73, 0x17, 0x10, 0xa7, 0xc3, 0xf8, 0x78, + 0x0b, 0x66, 0xab, 0x63, 0x4f, 0x96, 0x5d, 0xdf, 0x36, 0x83, 0xc4, 0x6f, + 0x20, 0xbd, 0xcb, 0x4c, 0xd2, 0xfa, 0x35, 0x87, 0xd8, 0xb6, 0xbb, 0xcc, + 0xb6, 0xd2, 0x85, 0x03, 0x6a, 0xea, 0xbb, 0x6d, 0x2f, 0xa2, 0x06, 0xc0, + 0xd6, 0x68, 0xd9, 0x7f, 0xd6, 0xa2, 0x3b, 0x08, 0x6a, 0x98, 0x26, 0x6d, + 0x9a, 0x2b, 0x68, 0x51, 0x78, 0xde, 0xa6, 0x96, 0x50, 0x7b, 0xfc, 0x03, + 0x43, 0xf8, 0x21, 0x01, 0x9d, 0xe2, 0x89, 0x65, 0x47, 0xae, 0x9c, 0x45, + 0x5e, 0xa5, 0xce, 0x97, 0xb3, 0xe6, 0xf6, 0xd4, 0x5a, 0xe8, 0x6b, 0x87, + 0xd6, 0xdf, 0xfb, 0x1f, 0xaf, 0xfb, 0xaf, 0x19, 0xa5, 0xfd, 0xba, 0xe0, + 0x22, 0x2f, 0x91, 0x97, 0xdf, 0xae, 0xe9, 0x39, 0xb1, 0xe4, 0xd3, 0x10, + 0xcb, 0xb3, 0x03, 0xb5, 0x0b, 0xf0, 0xd9, 0x70, 0x1e, 0x9c, 0x63, 0x6f, + 0x3a, 0xcf, 0x3c, 0x1b, 0x86, 0xa3, 0xad, 0x1a, 0xe7, 0x4c, 0x09, 0xd0, + 0x80, 0xf6, 0x8b, 0x72, 0x96, 0x53, 0x7e, 0x66, 0xfb, 0x7c, 0x7c, 0x8a, + 0xb0, 0x60, 0xa6, 0x4c, 0x20, 0xc4, 0x63, 0x69, 0x6a, 0xc3, 0x53, 0xf8, + 0x9a, 0x28, 0x30, 0x9d, 0x6f, 0x0e, 0x1b, 0xb2, 0x2c, 0xe6, 0x94, 0x9f, + 0xfc, 0xc0, 0x8d, 0x71, 0xbe, 0x37, 0xa6, 0xc9, 0xbd, 0x3c, 0x4a, 0xf3, + 0xc4, 0xb3, 0x88, 0x4c, 0x45, 0x26, 0x4e, 0x2f, 0x83, 0x16, 0x70, 0xb6, + 0xc7, 0xb2, 0x36, 0xf0, 0x0c, 0x67, 0xd2, 0x0a, 0xd3, 0xd9, 0x7c, 0x35, + 0x29, 0xac, 0xd4, 0x9c, 0x6d, 0xfc, 0xec, 0x58, 0x92, 0xf0, 0xba, 0x32, + 0x00, 0xae, 0xb1, 0xeb, 0x4d, 0x8c, 0x1a, 0x20, 0xe7, 0x5c, 0xfc, 0x9a, + 0x4d, 0x51, 0x24, 0x7b, 0x52, 0xeb, 0x13, 0x3d, 0xb4, 0xab, 0xda, 0xb3, + 0x74, 0x39, 0xd2, 0xf8, 0x2d, 0xef, 0x9b, 0x0f, 0xae, 0xf5, 0x3c, 0x99, + 0x34, 0xbe, 0x15, 0x5c, 0x9f, 0x5d, 0xae, 0xf4, 0x72, 0xc2, 0xac, 0x06, + 0xbe, 0xad, 0xe4, 0x68, 0xea, 0xd5, 0xa1, 0xdc, 0xdb, 0xf4, 0x61, 0x51, + 0xf5, 0x1a, 0x62, 0x15, 0xfd, 0x00, 0x51, 0x35, 0x53, 0x6c, 0x39, 0x3e, + 0xdb, 0x60, 0x0a, 0x52, 0xc1, 0x52, 0x3c, 0xd7, 0xab, 0x73, 0xea, 0x1e, + 0x38, 0x38, 0x65, 0x35, 0x35, 0x2b, 0x28, 0x04, 0x5c, 0x82, 0xea, 0x4a, + 0x9e, 0x96, 0x72, 0xa4, 0x8e, 0x42, 0xfd, 0x55, 0xa8, 0x66, 0x7a, 0x40, + 0xc9, 0xf2, 0xc2, 0x1e, 0x5d, 0x09, 0x90, 0x32, 0x18, 0xdb, 0x11, 0x4c, + 0x6c, 0x9c, 0x27, 0x62, 0x0a, 0xe6, 0xc1, 0xdf, 0xf2, 0x6a, 0x8c, 0x26, + 0xb4, 0xfb, 0xda, 0xa9, 0x08, 0x10, 0x3a, 0xf0, 0xe1, 0x64, 0xe5, 0x03, + 0x81, 0x7d, 0x15, 0x74, 0xa1, 0x8d, 0x10, 0xc8, 0xbb, 0x6a, 0x7c, 0x60, + 0xa1, 0x09, 0x35, 0x19, 0x2d, 0x70, 0xb5, 0x36, 0xc8, 0x8b, 0x66, 0x5f, + 0xe0, 0xe7, 0xea, 0x70, 0x2f, 0x5d, 0x3f, 0xae, 0x5e, 0x25, 0x84, 0xdd, + 0x9b, 0x69, 0x44, 0x37, 0x7c, 0x6b, 0x9e, 0x81, 0x18, 0x36, 0x4b, 0xff, + 0x86, 0x44, 0x2a, 0x39, 0x66, 0x7f, 0x71, 0x43, 0xe7, 0x65, 0xfe, 0xfd, + 0x34, 0xb9, 0xd9, 0x5a, 0x00, 0xd1, 0x41, 0x43, 0xc7, 0xbc, 0x65, 0x68, + 0xb7, 0x73, 0xff, 0x19, 0xd3, 0xed, 0x15, 0xa4, 0x67, 0xa1, 0x53, 0x0e, + 0xa6, 0xfb, 0x25, 0xce, 0x9d, 0x5b, 0x73, 0x08, 0xf3, 0x3b, 0x69, 0xe4, + 0x94, 0x9b, 0x94, 0x03, 0xb3, 0x8a, 0x2e, 0x07, 0x0c, 0xef, 0x18, 0x4c, + 0x2b, 0x1c, 0x83, 0x9f, 0x25, 0x20, 0x29, 0x72, 0x11, 0xa0, 0xaa, 0xed, + 0x0c, 0xf9, 0xce, 0x94, 0x0d, 0x7a, 0xb6, 0xb3, 0xa4, 0x57, 0xd6, 0x61, + 0xca, 0x1a, 0x0e, 0x89, 0x6d, 0x99, 0x4d, 0x06, 0xcd, 0x83, 0x7e, 0x09, + 0x14, 0x5b, 0xe7, 0x4c, 0x72, 0xa8, 0x98, 0xc8, 0x27, 0xf3, 0x70, 0x89, + 0x87, 0x11, 0xbb, 0x98, 0x82, 0x77, 0x9d, 0xaa, 0x95, 0x8c, 0xc1, 0xf8, + 0x39, 0x27, 0xd5, 0x64, 0x59, 0x6a, 0x8c, 0xbe, 0xe2, 0xe1, 0xd1, 0x6b, + 0xe3, 0xaf, 0x30, 0x6f, 0xf4, 0x9e, 0x35, 0x0b, 0x10, 0x24, 0x77, 0xd8, + 0xa4, 0x30, 0x2e, 0xf7, 0x97, 0xfd, 0xef, 0x1e, 0x9e, 0xf2, 0xbd, 0xf2, + 0x41, 0x73, 0x19, 0xe6, 0x7b, 0x7f, 0x74, 0x11, 0x91, 0x38, 0xc5, 0xac, + 0xd5, 0xb0, 0x48, 0xc4, 0xe9, 0x41, 0xd4, 0x50, 0x76, 0x13, 0xbf, 0xec, + 0xe8, 0x3a, 0xa8, 0x84, 0x42, 0x98, 0x12, 0x64, 0x95, 0x85, 0x79, 0x29, + 0xea, 0x3a, 0xf9, 0xa4, 0x5c, 0x9c, 0x35, 0x01, 0x68, 0x71, 0xb9, 0x5b, + 0xbe, 0xaa, 0x76, 0x9e, 0x63, 0x1c, 0xc1, 0x83, 0x94, 0xc6, 0x89, 0x2b, + 0x1d, 0x00, 0x43, 0x74, 0x00, 0x41, 0x93, 0x58, 0x52, 0xf9, 0x13, 0xfe, + 0x9f, 0x7a, 0xb7, 0x3d, 0x6b, 0x70, 0x4e, 0x4f, 0x8f, 0xf4, 0x9c, 0xe4, + 0x97, 0x62, 0xaf, 0x69, 0x45, 0xec, 0xf4, 0x53, 0x71, 0xdc, 0xc7, 0x8d, + 0x6f, 0xb2, 0x9d, 0xec, 0x43, 0xdd, 0xc0, 0xe5, 0xd1, 0x6c, 0x1a, 0x82, + 0x19, 0xf6, 0x18, 0xd3, 0x59, 0x0e, 0x07, 0x81, 0x5a, 0x23, 0x10, 0x8b, + 0xaa, 0x0b, 0x99, 0xc8, 0x34, 0xc2, 0xd0, 0xa9, 0x69, 0x7f, 0x54, 0xe3, + 0xc4, 0xa0, 0xe7, 0x4b, 0x31, 0x90, 0xe7, 0x3b, 0x45, 0x9b, 0x7f, 0xae, + 0xd2, 0xab, 0x22, 0xb9, 0xfc, 0x07, 0x39, 0x4b, 0x45, 0x83, 0x8d, 0x41, + 0x7a, 0x52, 0xb2, 0xae, 0x71, 0x78, 0x17, 0x63, 0xfa, 0xbe, 0x59, 0xca, + 0xf0, 0xfd, 0x68, 0xe5, 0xc4, 0x9a, 0x74, 0x3d, 0xec, 0xd4, 0x8b, 0xa1, + 0x2c, 0x31, 0x4d, 0x73, 0xfd, 0x5c, 0x1e, 0xeb, 0x5f, 0xf6, 0x42, 0x0d, + 0x79, 0x5f, 0x64, 0x10, 0xae, 0xb2, 0xf6, 0x9e, 0xa8, 0xab, 0xa5, 0x2b, + 0x9a, 0xcf, 0x25, 0xfa, 0xa2, 0xb3, 0xdc, 0x30, 0x3d, 0x08, 0x4e, 0xbb, + 0x7b, 0x0c, 0x28, 0x34, 0x9d, 0xda, 0xc4, 0x94, 0xa4, 0xf4, 0x1e, 0x78, + 0x8b, 0xa9, 0xd3, 0xa7, 0x1c, 0x2a, 0x27, 0x14, 0xa0, 0x44, 0x1a, 0x9a, + 0x87, 0x72, 0xa5, 0x6d, 0x69, 0x46, 0xe5, 0xc1, 0x4f, 0x29, 0x87, 0xc0, + 0xa7, 0xa8, 0x96, 0xde, 0xa9, 0x63, 0x08, 0xd8, 0x4a, 0xa1, 0x25, 0x43, + 0x76, 0x41, 0xf7, 0x9f, 0x17, 0xe3, 0xe1, 0x4b, 0xc6, 0x2b, 0x79, 0xea, + 0xd5, 0xa7, 0x72, 0x16, 0x0a, 0x8c, 0xcd, 0x49, 0x70, 0x75, 0xd4, 0x59, + 0x4a, 0x19, 0x7b, 0x31, 0x02, 0x7a, 0x3a, 0x20, 0x15, 0x62, 0x7e, 0x4e, + 0x6f, 0xac, 0xd0, 0xd1, 0x29, 0xbd, 0x2d, 0xa1, 0xc6, 0x3e, 0xa6, 0x1a, + 0x26, 0x18, 0x96, 0x98, 0x12, 0x56, 0x37, 0xbf, 0xb4, 0x91, 0x57, 0xe8, + 0xda, 0x61, 0x7c, 0x2f, 0x3e, 0xd4, 0x51, 0xfe, 0xe8, 0x5b, 0x00, 0x30, + 0x08, 0xf6, 0x4e, 0x69, 0xa8, 0x1a, 0x2b, 0x82, 0x41, 0x85, 0xa9, 0xd9, + 0x3c, 0xc8, 0x02, 0x91, 0x99, 0xd4, 0xa2, 0xfd, 0x9d, 0x1b, 0x08, 0xfc, + 0x41, 0x3e, 0x10, 0x6b, 0x80, 0x74, 0x3d, 0x72, 0x61, 0x97, 0xdd, 0x96, + 0xec, 0xf4, 0xd6, 0x6d, 0x68, 0x02, 0x6e, 0xbb, 0x55, 0x9d, 0x6f, 0x11, + 0xde, 0xd1, 0xad, 0x6d, 0x42, 0x96, 0x2c, 0x42, 0x1e, 0xa9, 0x19, 0x42, + 0x22, 0x38, 0x38, 0x18, 0x3c, 0x4b, 0xc1, 0x9c, 0x0f, 0xe1, 0x34, 0x61, + 0x06, 0x77, 0x54, 0x04, 0xe0, 0x87, 0x94, 0x5c, 0xc9, 0xa1, 0x35, 0x55, + 0x3d, 0x4a, 0xf2, 0x4f, 0x05, 0x11, 0x98, 0x6f, 0x3c, 0x85, 0x84, 0xe6, + 0xf8, 0x71, 0x8a, 0xdf, 0xe9, 0x9a, 0xe3, 0x70, 0xd6, 0x36, 0xd6, 0xc8, + 0x66, 0x3e, 0xba, 0x7c, 0x0a, 0x23, 0x0a, 0xd0, 0xb6, 0x66, 0x68, 0xa8, + 0xdf, 0x37, 0x17, 0xfb, 0xdd, 0x9c, 0x8b, 0xc7, 0x8e, 0xc4, 0x4f, 0x40, + 0x08, 0x23, 0x58, 0x15, 0xa2, 0xba, 0xef, 0xdf, 0x67, 0xcd, 0x1f, 0xb6, + 0xc4, 0xea, 0xce, 0x81, 0x38, 0x58, 0x92, 0x57, 0xcf, 0x83, 0x47, 0x29, + 0x9f, 0xde, 0x9b, 0xde, 0x01, 0xfe, 0x68, 0x91, 0x67, 0x06, 0x9d, 0x31, + 0xd0, 0xb9, 0xc3, 0xbb, 0xc3, 0x6b, 0xa0, 0x04, 0x1e, 0x34, 0xd5, 0x38, + 0xd4, 0xac, 0x70, 0xae, 0xab, 0xb2, 0xbd, 0x4b, 0xa0, 0xad, 0x2b, 0x82, + 0xaf, 0x8c, 0x90, 0x4d, 0xd3, 0xca, 0x71, 0x35, 0x75, 0x89, 0xe5, 0x42, + 0x91, 0x46, 0x8d, 0x18, 0x04, 0x7a, 0xb9, 0xaa, 0x3b, 0xe7, 0x1e, 0x8c, + 0x4e, 0xf9, 0x6e, 0x74, 0xaa, 0x2e, 0x36, 0x86, 0xfb, 0xef, 0x9c, 0xd7, + 0xba, 0x5e, 0x2e, 0x3c, 0x40, 0xce, 0x8b, 0x2b, 0x94, 0x55, 0xf2, 0xd4, + 0x7d, 0xbf, 0x8c, 0x8a, 0xa8, 0x59, 0x84, 0x6f, 0x32, 0x95, 0xc5, 0xcc, + 0xad, 0xee, 0x30, 0x23, 0x7c, 0x54, 0xea, 0x60, 0xb8, 0x88, 0x12, 0x45, + 0x03, 0xbc, 0xe3, 0x92, 0x9f, 0xa8, 0x5b, 0x07, 0x97, 0x53, 0x0d, 0xe1, + 0xe3, 0x3d, 0xdf, 0xf2, 0x2a, 0x12, 0xee, 0xdf, 0x73, 0x8d, 0x41, 0xf4, + 0xe4, 0x2c, 0xb4, 0xd4, 0x9e, 0xfe, 0xf2, 0xe6, 0xa0, 0x9e, 0x2a, 0x3a, + 0x36, 0x26, 0x7e, 0xd9, 0xe1, 0x22, 0xee, 0x0b, 0x5b, 0x48, 0xd2, 0xa9, + 0x55, 0xab, 0x50, 0x7c, 0xf6, 0xc8, 0x56, 0x31, 0xbb, 0x51, 0xe9, 0x31, + 0x4d, 0xaa, 0x13, 0x3a, 0x99, 0x9f, 0x8c, 0x59, 0x6a, 0xc9, 0xf1, 0x0a, + 0x89, 0xcc, 0x39, 0x98, 0xbd, 0xc3, 0x93, 0x97, 0x28, 0xe5, 0x73, 0x94, + 0xf2, 0x0a, 0x7a, 0x09, 0x38, 0x0b, 0xab, 0xd8, 0x49, 0x98, 0x14, 0x34, + 0x32, 0x9d, 0xef, 0x9d, 0x47, 0xdb, 0x82, 0xb9, 0x84, 0xd6, 0xd7, 0x9f, + 0xf7, 0xdf, 0x79, 0x5b, 0xe8, 0x92, 0x44, 0x31, 0x5d, 0x42, 0x80, 0x90, + 0x8d, 0x36, 0xa2, 0x39, 0x02, 0x64, 0x21, 0xa2, 0xb8, 0xfc, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc8, 0xeb, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0xd8, 0x03, 0x00, 0x00, 0xdc, 0x03, 0x00, 0x00, + 0xe0, 0x03, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0xa8, 0x03, 0x00, 0x00, + 0x50, 0x03, 0x00, 0x00, 0x04, 0x03, 0x00, 0x00, 0xac, 0x02, 0x00, 0x00, + 0x74, 0x02, 0x00, 0x00, 0x2c, 0x02, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, + 0xac, 0x01, 0x00, 0x00, 0x74, 0x01, 0x00, 0x00, 0x2c, 0x01, 0x00, 0x00, + 0xe4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9e, 0xfc, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xac, 0x12, 0x00, 0x00, - 0x3c, 0x12, 0x00, 0x00, 0xdc, 0x11, 0x00, 0x00, 0x90, 0x11, 0x00, 0x00, - 0x24, 0x11, 0x00, 0x00, 0xac, 0x10, 0x00, 0x00, 0x5c, 0x10, 0x00, 0x00, - 0x10, 0x10, 0x00, 0x00, 0xa8, 0x0f, 0x00, 0x00, 0x58, 0x0f, 0x00, 0x00, - 0x04, 0x0f, 0x00, 0x00, 0xb8, 0x0e, 0x00, 0x00, 0x4c, 0x0e, 0x00, 0x00, - 0xe4, 0x0d, 0x00, 0x00, 0x94, 0x0d, 0x00, 0x00, 0x48, 0x0d, 0x00, 0x00, - 0xe0, 0x0c, 0x00, 0x00, 0x90, 0x0c, 0x00, 0x00, 0x3c, 0x0c, 0x00, 0x00, - 0xf0, 0x0b, 0x00, 0x00, 0x84, 0x0b, 0x00, 0x00, 0x1c, 0x0b, 0x00, 0x00, - 0xcc, 0x0a, 0x00, 0x00, 0x80, 0x0a, 0x00, 0x00, 0x18, 0x0a, 0x00, 0x00, - 0xc8, 0x09, 0x00, 0x00, 0x74, 0x09, 0x00, 0x00, 0x28, 0x09, 0x00, 0x00, - 0xbc, 0x08, 0x00, 0x00, 0x54, 0x08, 0x00, 0x00, 0x04, 0x08, 0x00, 0x00, - 0xb8, 0x07, 0x00, 0x00, 0x50, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, - 0xac, 0x06, 0x00, 0x00, 0x60, 0x06, 0x00, 0x00, 0xf4, 0x05, 0x00, 0x00, - 0x8c, 0x05, 0x00, 0x00, 0x3c, 0x05, 0x00, 0x00, 0xe8, 0x04, 0x00, 0x00, - 0x9c, 0x04, 0x00, 0x00, 0x30, 0x04, 0x00, 0x00, 0xc8, 0x03, 0x00, 0x00, - 0x78, 0x03, 0x00, 0x00, 0x24, 0x03, 0x00, 0x00, 0xd8, 0x02, 0x00, 0x00, - 0x6c, 0x02, 0x00, 0x00, 0x04, 0x02, 0x00, 0x00, 0xb4, 0x01, 0x00, 0x00, - 0x68, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x3a, 0xee, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x2c, 0x00, 0x00, 0x00, 0x94, 0xee, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x82, 0xee, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x2c, 0x00, 0x00, 0x00, 0xdc, 0xee, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0xd7, 0x23, 0x3a, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0xca, 0xee, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x44, 0x00, 0x00, 0x00, 0xbc, 0xee, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x80, 0x37, 0x01, 0x00, 0x00, 0x00, - 0xc2, 0xff, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00, 0xd2, 0x6f, 0x75, 0x36, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2a, 0xef, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, - 0x48, 0x00, 0x00, 0x00, 0x1c, 0xef, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x16, 0x49, 0x3d, - 0x01, 0x00, 0x00, 0x00, 0x87, 0x19, 0xb1, 0x40, 0x01, 0x00, 0x00, 0x00, - 0x58, 0x80, 0xdf, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0xfa, 0xef, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x2c, 0x00, 0x00, 0x00, - 0xec, 0xef, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x5d, 0xd1, 0xce, 0x39, 0x00, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x5e, 0xfd, 0xff, 0xff, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x96, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x88, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0xca, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x78, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, + 0x0e, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0xbc, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x52, 0xfe, 0xff, 0xff, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x96, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x88, 0xfe, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xff, 0xff, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x78, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x1d, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x42, 0xff, 0xff, 0xff, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf0, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x86, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x78, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x68, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0d, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x14, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x1c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x42, 0xf0, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, - 0x34, 0xf0, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x23, 0x20, 0xb6, 0x3b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x22, 0xf0, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, - 0x14, 0xf0, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xa2, 0x5a, 0x91, 0x3d, 0x01, 0x00, 0x00, 0x00, - 0x47, 0xc9, 0x90, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0xf2, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x07, 0x48, 0x00, 0x00, 0x00, 0x7c, 0xf0, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x81, 0xb7, 0xf1, 0x39, 0x01, 0x00, 0x00, 0x00, 0x9e, 0xb5, 0x71, 0x41, - 0x01, 0x00, 0x00, 0x00, 0x33, 0x20, 0x70, 0xc1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x5a, 0xf1, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x2c, 0x00, 0x00, 0x00, 0x4c, 0xf1, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7a, 0x08, 0x97, 0x35, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0xa2, 0xf1, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x30, 0x00, 0x00, 0x00, 0x94, 0xf1, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x34, 0x10, 0x00, 0x00, 0xd4, 0x0f, 0x00, 0x00, 0x7c, 0x0f, 0x00, 0x00, + 0x38, 0x0f, 0x00, 0x00, 0xdc, 0x0e, 0x00, 0x00, 0x74, 0x0e, 0x00, 0x00, + 0x2c, 0x0e, 0x00, 0x00, 0xe8, 0x0d, 0x00, 0x00, 0x90, 0x0d, 0x00, 0x00, + 0x48, 0x0d, 0x00, 0x00, 0x04, 0x0d, 0x00, 0x00, 0xc0, 0x0c, 0x00, 0x00, + 0x64, 0x0c, 0x00, 0x00, 0x0c, 0x0c, 0x00, 0x00, 0xc4, 0x0b, 0x00, 0x00, + 0x80, 0x0b, 0x00, 0x00, 0x28, 0x0b, 0x00, 0x00, 0xe0, 0x0a, 0x00, 0x00, + 0x9c, 0x0a, 0x00, 0x00, 0x58, 0x0a, 0x00, 0x00, 0xfc, 0x09, 0x00, 0x00, + 0xa4, 0x09, 0x00, 0x00, 0x5c, 0x09, 0x00, 0x00, 0x18, 0x09, 0x00, 0x00, + 0xc0, 0x08, 0x00, 0x00, 0x78, 0x08, 0x00, 0x00, 0x34, 0x08, 0x00, 0x00, + 0xf0, 0x07, 0x00, 0x00, 0x94, 0x07, 0x00, 0x00, 0x3c, 0x07, 0x00, 0x00, + 0xf4, 0x06, 0x00, 0x00, 0xb0, 0x06, 0x00, 0x00, 0x58, 0x06, 0x00, 0x00, + 0x10, 0x06, 0x00, 0x00, 0xcc, 0x05, 0x00, 0x00, 0x88, 0x05, 0x00, 0x00, + 0x2c, 0x05, 0x00, 0x00, 0xd4, 0x04, 0x00, 0x00, 0x8c, 0x04, 0x00, 0x00, + 0x48, 0x04, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0xa8, 0x03, 0x00, 0x00, + 0x50, 0x03, 0x00, 0x00, 0x08, 0x03, 0x00, 0x00, 0xc4, 0x02, 0x00, 0x00, + 0x80, 0x02, 0x00, 0x00, 0x24, 0x02, 0x00, 0x00, 0xcc, 0x01, 0x00, 0x00, + 0x84, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, + 0x8c, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0xb2, 0xf0, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x28, 0x00, 0x00, 0x00, 0xfc, 0xf0, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2f, 0xf5, 0x1f, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0xf2, 0xf1, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, - 0xe4, 0xf1, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xc7, 0xea, 0x1a, 0x3c, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0xd2, 0xf1, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, - 0xc4, 0xf1, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xb2, 0x78, 0x3f, 0x3d, 0x01, 0x00, 0x00, 0x00, - 0x39, 0xb9, 0x3e, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xb0, 0xf3, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x07, 0x48, 0x00, 0x00, 0x00, 0x2c, 0xf2, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x89, 0x25, 0xf2, 0x39, 0x01, 0x00, 0x00, 0x00, 0xde, 0xdc, 0x1d, 0x41, - 0x01, 0x00, 0x00, 0x00, 0xa5, 0x23, 0x72, 0xc1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x0a, 0xf3, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x2c, 0x00, 0x00, 0x00, 0xfc, 0xf2, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x42, 0xe0, 0x90, 0x35, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x52, 0xf3, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x30, 0x00, 0x00, 0x00, 0x44, 0xf3, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1a, 0x2a, 0x19, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0xa2, 0xf3, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, - 0x94, 0xf3, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xe9, 0x36, 0xdd, 0x3b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x82, 0xf3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, - 0x74, 0xf3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xdd, 0x43, 0x7e, 0x3d, 0x01, 0x00, 0x00, 0x00, - 0x99, 0x45, 0x7d, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, 0xf5, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x07, 0x48, 0x00, 0x00, 0x00, 0xdc, 0xf3, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x5c, 0xfd, 0xa9, 0x39, 0x01, 0x00, 0x00, 0x00, 0x1e, 0xaa, 0x87, 0x40, - 0x01, 0x00, 0x00, 0x00, 0x08, 0xfc, 0x29, 0xc1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0xba, 0xf4, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x2c, 0x00, 0x00, 0x00, 0xac, 0xf4, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x55, 0xf7, 0x52, 0x35, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x02, 0xf5, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x30, 0x00, 0x00, 0x00, 0xf4, 0xf4, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xd0, 0xda, 0x1e, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x52, 0xf5, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, - 0x44, 0xf5, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x8e, 0x0b, 0xa8, 0x3b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x32, 0xf5, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, - 0x24, 0xf5, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xf5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x12, 0x1c, 0x6e, 0x3d, 0x01, 0x00, 0x00, 0x00, - 0xdd, 0x4a, 0x00, 0x41, 0x01, 0x00, 0x00, 0x00, 0x31, 0xc6, 0xd9, 0xc0, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0xf6, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x02, 0x2c, 0x00, 0x00, 0x00, 0xf4, 0xf5, 0xff, 0xff, - 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x80, 0x9d, 0x16, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x4a, 0xf6, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, 0x3c, 0xf6, 0xff, 0xff, - 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xa4, 0x34, 0xab, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x2a, 0xf6, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, 0x1c, 0xf6, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2e, 0x36, 0xe1, 0x3c, 0x01, 0x00, 0x00, 0x00, 0xf8, 0x54, 0xe0, 0x40, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x08, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, - 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x48, 0x00, 0x00, 0x00, 0x84, 0xf6, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe1, 0xd0, 0xa2, 0x39, - 0x01, 0x00, 0x00, 0x00, 0x9b, 0xcf, 0x22, 0x41, 0x01, 0x00, 0x00, 0x00, - 0xea, 0x23, 0x12, 0xc1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, - 0x62, 0xf7, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x2c, 0x00, 0x00, 0x00, - 0x54, 0xf7, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x99, 0xd3, 0xf7, 0x34, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0xaa, 0xf7, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x30, 0x00, 0x00, 0x00, - 0x9c, 0xf7, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xd5, 0xc2, 0x3a, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xfa, 0xf7, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, 0xec, 0xf7, 0xff, 0xff, - 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x8f, 0x84, 0xa2, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0xda, 0xf7, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, 0xcc, 0xf7, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf7, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x64, 0xeb, 0x8e, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x3b, 0xf3, 0x17, 0x41, - 0x01, 0x00, 0x00, 0x00, 0xb7, 0xc5, 0x04, 0xc1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0xaa, 0xf8, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x2c, 0x00, 0x00, 0x00, 0x9c, 0xf8, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x92, 0xa8, 0x98, 0x39, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0xf2, 0xf8, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, - 0x2c, 0x00, 0x00, 0x00, 0xe4, 0xf8, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x76, 0xb9, 0x3b, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xd2, 0xf8, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, - 0x48, 0x00, 0x00, 0x00, 0xc4, 0xf8, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x43, 0xb8, 0x52, 0x3d, - 0x01, 0x00, 0x00, 0x00, 0x8b, 0xe5, 0x51, 0x41, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0xb0, 0xfa, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x48, 0x00, 0x00, 0x00, - 0x2c, 0xf9, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xe3, 0xa1, 0xf0, 0x39, 0x01, 0x00, 0x00, 0x00, - 0x02, 0xa0, 0x70, 0x41, 0x01, 0x00, 0x00, 0x00, 0x87, 0x08, 0x65, 0xc1, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x0a, 0xfa, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x02, 0x2c, 0x00, 0x00, 0x00, 0xfc, 0xf9, 0xff, 0xff, - 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xcc, 0x98, 0x41, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x52, 0xfa, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x07, 0x30, 0x00, 0x00, 0x00, 0x44, 0xfa, 0xff, 0xff, + 0x00, 0x01, 0x00, 0x38, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0xf2, 0xf0, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x07, 0x28, 0x00, 0x00, 0x00, 0x3c, 0xf1, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xed, 0xf5, 0xcd, 0x3a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0xa2, 0xfa, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, - 0x2c, 0x00, 0x00, 0x00, 0x94, 0xfa, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x9d, 0xca, 0xd4, 0x3b, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x82, 0xfa, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, - 0x48, 0x00, 0x00, 0x00, 0x74, 0xfa, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x58, 0xce, 0x3d, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x49, 0x41, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x06, 0x52, 0xc1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x52, 0xfb, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x2c, 0x00, 0x00, 0x00, - 0x44, 0xfb, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x9b, 0x9c, 0xe1, 0x39, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x9a, 0xfb, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, - 0x8c, 0xfb, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xf8, 0xb6, 0xc3, 0x3b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x7a, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, - 0x6c, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0a, 0xd7, 0x23, 0x3a, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x32, 0xf1, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x40, 0x00, 0x00, 0x00, + 0x24, 0xf1, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x94, 0x8d, 0x93, 0x3d, 0x01, 0x00, 0x00, 0x00, - 0x06, 0xfa, 0x92, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x58, 0xfd, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x07, 0x48, 0x00, 0x00, 0x00, 0xd4, 0xfb, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x7a, 0xf6, 0x5f, 0x3a, 0x01, 0x00, 0x00, 0x00, 0xba, 0xf4, 0xdf, 0x41, - 0x01, 0x00, 0x00, 0x00, 0xf4, 0x7c, 0xcf, 0xc1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x00, 0xb2, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x2c, 0x00, 0x00, 0x00, 0xa4, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x80, 0x37, 0x01, 0x00, 0x00, 0x00, + 0xc2, 0xff, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00, 0xd2, 0x6f, 0x75, 0x36, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x8a, 0xf1, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x3c, 0x00, 0x00, 0x00, 0x7c, 0xf1, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x06, 0x16, 0x49, 0x3d, 0x01, 0x00, 0x00, 0x00, + 0x87, 0x19, 0xb1, 0x40, 0x01, 0x00, 0x00, 0x00, 0x58, 0x80, 0xdf, 0xc0, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x3a, 0xf2, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0x2c, 0xf2, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x5d, 0xd1, 0xce, 0x39, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x7a, 0xf2, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, + 0x6c, 0xf2, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x23, 0x20, 0xb6, 0x3b, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x62, 0xf2, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x3c, 0x00, 0x00, 0x00, 0x54, 0xf2, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0xa2, 0x5a, 0x91, 0x3d, 0x01, 0x00, 0x00, 0x00, + 0x47, 0xc9, 0x90, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0xf4, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x07, 0x3c, 0x00, 0x00, 0x00, 0xac, 0xf2, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x81, 0xb7, 0xf1, 0x39, + 0x01, 0x00, 0x00, 0x00, 0x9e, 0xb5, 0x71, 0x41, 0x01, 0x00, 0x00, 0x00, + 0x33, 0x20, 0x70, 0xc1, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x6a, 0xf3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, + 0x5c, 0xf3, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7a, 0x08, 0x97, 0x35, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xaa, 0xf3, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + 0x24, 0x00, 0x00, 0x00, 0x9c, 0xf3, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x2f, 0xc4, 0x35, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0xfa, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x30, 0x00, 0x00, 0x00, 0xec, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2f, 0xf5, 0x1f, 0x3b, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0xea, 0xf3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, 0xdc, 0xf3, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xc7, 0xea, 0x1a, 0x3c, 0x02, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xd2, 0xf3, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x3c, 0x00, 0x00, 0x00, + 0xc4, 0xf3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0xb2, 0x78, 0x3f, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x39, 0xb9, 0x3e, 0x41, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x70, 0xf5, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + 0x3c, 0x00, 0x00, 0x00, 0x1c, 0xf4, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x89, 0x25, 0xf2, 0x39, 0x01, 0x00, 0x00, 0x00, + 0xde, 0xdc, 0x1d, 0x41, 0x01, 0x00, 0x00, 0x00, 0xa5, 0x23, 0x72, 0xc1, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0xda, 0xf4, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0xcc, 0xf4, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x42, 0xe0, 0x90, 0x35, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x1a, 0xf5, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x24, 0x00, 0x00, 0x00, + 0x0c, 0xf5, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1a, 0x2a, 0x19, 0x3b, 0x02, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x5a, 0xf5, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x28, 0x00, 0x00, 0x00, 0x4c, 0xf5, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x8f, 0x3f, 0xe0, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x4a, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, - 0x3c, 0xfd, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xe9, 0x36, 0xdd, 0x3b, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x42, 0xf5, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x3c, 0x00, 0x00, 0x00, 0x34, 0xf5, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xdd, 0x43, 0x7e, 0x3d, + 0x01, 0x00, 0x00, 0x00, 0x99, 0x45, 0x7d, 0x41, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0xe0, 0xf6, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x3c, 0x00, 0x00, 0x00, + 0x8c, 0xf5, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x5c, 0xfd, 0xa9, 0x39, 0x01, 0x00, 0x00, 0x00, 0x1e, 0xaa, 0x87, 0x40, + 0x01, 0x00, 0x00, 0x00, 0x08, 0xfc, 0x29, 0xc1, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x4a, 0xf6, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x28, 0x00, 0x00, 0x00, 0x3c, 0xf6, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x55, 0xf7, 0x52, 0x35, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x8a, 0xf6, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x07, 0x24, 0x00, 0x00, 0x00, 0x7c, 0xf6, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xd0, 0xda, 0x1e, 0x3b, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0xca, 0xf6, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, + 0xbc, 0xf6, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x25, 0xd7, 0xa9, 0x3b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, - 0x1c, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x8e, 0x0b, 0xa8, 0x3b, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xb2, 0xf6, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x3c, 0x00, 0x00, 0x00, 0xa4, 0xf6, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xf5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x12, 0x1c, 0x6e, 0x3d, 0x01, 0x00, 0x00, 0x00, + 0xdd, 0x4a, 0x00, 0x41, 0x01, 0x00, 0x00, 0x00, 0x31, 0xc6, 0xd9, 0xc0, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x62, 0xf7, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0x54, 0xf7, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x80, 0x9d, 0x16, 0x39, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0xa2, 0xf7, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, + 0x94, 0xf7, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa4, 0x34, 0xab, 0x3b, + 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x8a, 0xf7, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x3c, 0x00, 0x00, 0x00, 0x7c, 0xf7, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x2e, 0x36, 0xe1, 0x3c, 0x01, 0x00, 0x00, 0x00, + 0xf8, 0x54, 0xe0, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x28, 0xf9, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x07, 0x3c, 0x00, 0x00, 0x00, 0xd4, 0xf7, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe1, 0xd0, 0xa2, 0x39, + 0x01, 0x00, 0x00, 0x00, 0x9b, 0xcf, 0x22, 0x41, 0x01, 0x00, 0x00, 0x00, + 0xea, 0x23, 0x12, 0xc1, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x92, 0xf8, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, + 0x84, 0xf8, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x99, 0xd3, 0xf7, 0x34, + 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xd2, 0xf8, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + 0x24, 0x00, 0x00, 0x00, 0xc4, 0xf8, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xd5, 0xc2, 0x3a, + 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x12, 0xf9, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, 0x04, 0xf9, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x8f, 0x84, 0xa2, 0x3b, 0x02, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xfa, 0xf8, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x3c, 0x00, 0x00, 0x00, + 0xec, 0xf8, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x64, 0xeb, 0x8e, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x3b, 0xf3, 0x17, 0x41, + 0x01, 0x00, 0x00, 0x00, 0xb7, 0xc5, 0x04, 0xc1, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xaa, 0xf9, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x28, 0x00, 0x00, 0x00, 0x9c, 0xf9, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x92, 0xa8, 0x98, 0x39, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xea, 0xf9, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, 0xdc, 0xf9, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x58, 0x76, 0xb9, 0x3b, 0x02, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xd2, 0xf9, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x3c, 0x00, 0x00, 0x00, + 0xc4, 0xf9, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x43, 0xb8, 0x52, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x8b, 0xe5, 0x51, 0x41, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x70, 0xfb, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + 0x3c, 0x00, 0x00, 0x00, 0x1c, 0xfa, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xe3, 0xa1, 0xf0, 0x39, 0x01, 0x00, 0x00, 0x00, + 0x02, 0xa0, 0x70, 0x41, 0x01, 0x00, 0x00, 0x00, 0x87, 0x08, 0x65, 0xc1, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, + 0xda, 0xfa, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0xcc, 0xfa, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xcc, 0x98, 0x41, 0x35, 0x01, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x1a, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x24, 0x00, 0x00, 0x00, + 0x0c, 0xfb, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xed, 0xf5, 0xcd, 0x3a, 0x02, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x5a, 0xfb, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x28, 0x00, 0x00, 0x00, 0x4c, 0xfb, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x9d, 0xca, 0xd4, 0x3b, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x42, 0xfb, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x3c, 0x00, 0x00, 0x00, 0x34, 0xfb, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x58, 0xce, 0x3d, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x49, 0x41, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x06, 0x52, 0xc1, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0xf2, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, + 0xe4, 0xfb, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x9b, 0x9c, 0xe1, 0x39, + 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x32, 0xfc, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x28, 0x00, 0x00, 0x00, 0x24, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xf8, 0xb6, 0xc3, 0x3b, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x1a, 0xfc, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x3c, 0x00, 0x00, 0x00, 0x0c, 0xfc, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x94, 0x8d, 0x93, 0x3d, + 0x01, 0x00, 0x00, 0x00, 0x06, 0xfa, 0x92, 0x41, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0xb8, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x3c, 0x00, 0x00, 0x00, + 0x64, 0xfc, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x7a, 0xf6, 0x5f, 0x3a, 0x01, 0x00, 0x00, 0x00, 0xba, 0xf4, 0xdf, 0x41, + 0x01, 0x00, 0x00, 0x00, 0xf4, 0x7c, 0xcf, 0xc1, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x22, 0xfd, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x28, 0x00, 0x00, 0x00, 0x14, 0xfd, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x46, 0x2f, 0xc4, 0x35, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x62, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x07, 0x24, 0x00, 0x00, 0x00, 0x54, 0xfd, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x8f, 0x3f, 0xe0, 0x3a, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0xa2, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, + 0x94, 0xfd, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x25, 0xd7, 0xa9, 0x3b, + 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x8a, 0xfd, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x3c, 0x00, 0x00, 0x00, 0x7c, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xc4, 0xf4, 0x39, 0x3e, 0x01, 0x00, 0x00, 0x00, 0xf4, 0x1f, 0xe3, 0x41, 0x01, 0x00, 0x00, 0x00, 0xaa, 0x55, 0x8f, 0xc1, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xfa, 0xfd, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x02, 0x2c, 0x00, 0x00, 0x00, 0xec, 0xfd, 0xff, 0xff, - 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x8b, 0x00, 0x4b, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x42, 0xfe, 0xff, 0xff, - 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x2c, 0x00, 0x00, 0x00, 0x34, 0xfe, 0xff, 0xff, - 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xd7, 0xdf, 0xc3, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x22, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, 0x14, 0xfe, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x68, 0xa8, 0x04, 0x3e, 0x01, 0x00, 0x00, 0x00, 0xc0, 0x23, 0x04, 0x42, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x3a, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0x2c, 0xfe, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x8b, 0x00, 0x4b, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x7a, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x28, 0x00, 0x00, 0x00, + 0x6c, 0xfe, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x10, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x07, 0x48, 0x00, 0x00, 0x00, 0x8c, 0xfe, 0xff, 0xff, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xd7, 0xdf, 0xc3, 0x3b, + 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x62, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0x3c, 0x00, 0x00, 0x00, 0x54, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x68, 0xa8, 0x04, 0x3e, 0x01, 0x00, 0x00, 0x00, + 0xc0, 0x23, 0x04, 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x3c, 0x00, 0x00, 0x00, + 0xbc, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3b, 0xda, 0x75, 0x3b, 0x01, 0x00, 0x00, 0x00, 0x4f, 0xd8, 0xf5, 0x42, - 0x01, 0x00, 0x00, 0x00, 0xa8, 0x2a, 0x61, 0xc2, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x00, 0x6a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x2c, 0x00, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xcf, 0x37, 0x69, 0x37, + 0x01, 0x00, 0x00, 0x00, 0xa8, 0x2a, 0x61, 0xc2, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x7a, 0xff, 0xff, 0xff, + 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x28, 0x00, 0x00, 0x00, 0x6c, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0xb2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x2c, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0xd8, 0x72, 0x3b, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xcf, 0x37, 0x69, 0x37, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0xba, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x07, 0x28, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, + 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0xd8, 0x72, 0x3b, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, - 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x3c, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xd4, 0x42, 0x16, 0x3c, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, - 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, - 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xa8, 0x41, 0x5b, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x66, 0x66, 0x5a, 0x41, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x60, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, - 0xb4, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x8c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, - 0x68, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, - 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x96, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x72, 0x9e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x19, - 0xa6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0xae, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x1b, 0xb6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x1b, - 0xbe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x1b, 0xc6, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x09, 0xce, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x1b, - 0xd6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0xde, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x1b, 0xe6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, - 0xfa, 0xff, 0xff, 0xff, 0x00, 0x1b, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b}; + 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x09, 0x30, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xd4, 0x42, 0x16, 0x3c, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x10, 0x00, 0x0c, 0x00, + 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x4c, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xa8, 0x41, 0x5b, 0x3d, 0x01, 0x00, 0x00, 0x00, + 0x66, 0x66, 0x5a, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, + 0xa4, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, + 0x5c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x0b, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x06, 0x96, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x72, + 0x9e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x19, 0xa6, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x09, 0xae, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x1b, + 0xb6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x1b, 0xbe, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x1b, 0xc6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, + 0xce, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x1b, 0xd6, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x09, 0xde, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x1b, + 0xe6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0xfa, 0xff, 0xff, 0xff, + 0x00, 0x1b, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x09, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x1b}; const unsigned int g_keyword_scrambled_model_data_length = 34520; From 8111489d9ca17c2982fbf2a156cafcb8a43b76d2 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 9 Nov 2020 17:24:30 -0800 Subject: [PATCH 086/220] Remove `CHECK` in `Set16`, move upwards. This is first step in removing `CHECK` from `TensorShapeBase::InitDims`: we are simply moving the `CHECK` from the static method upwards to the caller. Next step would be to transform this method to return `Status` instead of `CHECK`, after all callers are updated to accept the `Status` argument. PiperOrigin-RevId: 341518149 Change-Id: I37734284094e6e80c39a34e8864ba4130276645c --- tensorflow/core/framework/tensor_shape.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/tensor_shape.cc b/tensorflow/core/framework/tensor_shape.cc index b564ac144cc..e8a8f0f25ef 100644 --- a/tensorflow/core/framework/tensor_shape.cc +++ b/tensorflow/core/framework/tensor_shape.cc @@ -169,8 +169,6 @@ static inline bool Set16(bool partial, uint16* dst, int dim, int64 val) { dst[dim] = std::numeric_limits::max(); return true; } - } else { - CHECK_GE(val, 0); } dst[dim] = val; return false; @@ -193,6 +191,14 @@ void TensorShapeBase::InitDims(gtl::ArraySlice dim_sizes) { } } + // TODO(mihaimaruseac): Remove this CHECK as the refactoring continues + // Temporaryly moving the CHECK from Set16 here + if (!kIsPartial && !large_size) { + for (auto s : dim_sizes) { + CHECK_GE(s, 0); + } + } + if (!large_size) { // Every size fits in 16 bits; use fast-paths for dims in {1,2,3,4}. uint16* dst = as16()->dims_; From 7df2d83a63edc34bf051e329e08fe01d68538a36 Mon Sep 17 00:00:00 2001 From: Jian Li Date: Mon, 9 Nov 2020 19:37:52 -0800 Subject: [PATCH 087/220] Add risc max op register. PiperOrigin-RevId: 341534052 Change-Id: I58e27e65bfa3d88667b99587c6b3a9a960688b33 --- .../api_def/base_api/api_def_RiscMax.pbtxt | 11 +++++ .../core/kernels/risc/experimental/BUILD | 11 +++++ .../kernels/risc/experimental/risc_max_op.cc | 48 +++++++++++++++++++ tensorflow/core/ops/risc_ops.cc | 7 +++ tensorflow/python/ops/risc/risc_grad.py | 7 +++ tensorflow/python/ops/risc/risc_ops.py | 4 ++ 6 files changed, 88 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_RiscMax.pbtxt create mode 100644 tensorflow/core/kernels/risc/experimental/risc_max_op.cc diff --git a/tensorflow/core/api_def/base_api/api_def_RiscMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_RiscMax.pbtxt new file mode 100644 index 00000000000..7bcf020e844 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RiscMax.pbtxt @@ -0,0 +1,11 @@ +op { + graph_op_name: "RiscMax" + visibility: HIDDEN + summary: "Returns max(x, y) element-wise." + description: < +class RiscMaxOp : public OpKernel { + public: + explicit RiscMaxOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + // TODO(b/171294012): Implement RiscMax op. + } +}; + +#define REGISTER_CPU(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("RiscMax").Device(DEVICE_CPU).TypeConstraint("T"), \ + RiscMaxOp); + +REGISTER_CPU(bfloat16); +REGISTER_CPU(Eigen::half); +REGISTER_CPU(float); +REGISTER_CPU(double); + +} // namespace experimental +} // namespace risc +} // namespace tensorflow diff --git a/tensorflow/core/ops/risc_ops.cc b/tensorflow/core/ops/risc_ops.cc index a5b1e37fa84..f60f65b7dbc 100644 --- a/tensorflow/core/ops/risc_ops.cc +++ b/tensorflow/core/ops/risc_ops.cc @@ -41,4 +41,11 @@ REGISTER_OP("RiscConv") .SetShapeFn(shape_inference::UnknownShape) .Attr("dilations: list(int) = [1, 1, 1, 1]"); +REGISTER_OP("RiscMax") + .Input("x: T") + .Input("y: T") + .Output("max: T") + .Attr("T: {bfloat16, half, float, double}") + .SetShapeFn(shape_inference::UnchangedShape); + } // namespace tensorflow diff --git a/tensorflow/python/ops/risc/risc_grad.py b/tensorflow/python/ops/risc/risc_grad.py index 5c0f76ba3a4..2d006025b29 100644 --- a/tensorflow/python/ops/risc/risc_grad.py +++ b/tensorflow/python/ops/risc/risc_grad.py @@ -35,3 +35,10 @@ def _RiscConvGrad(_, grad): # pylint: disable=unused-argument # TODO(b/171294012): Implement gradient of RISC with RISC ops. return None, None + + +@ops.RegisterGradient("RiscMax") +def _RiscMaxGrad(_, grad): + # pylint: disable=unused-argument + # TODO(b/171294012): Implement gradient of RISC with RISC ops. + return None, None diff --git a/tensorflow/python/ops/risc/risc_ops.py b/tensorflow/python/ops/risc/risc_ops.py index f59e42dbf6e..be16a50b3ae 100644 --- a/tensorflow/python/ops/risc/risc_ops.py +++ b/tensorflow/python/ops/risc/risc_ops.py @@ -47,3 +47,7 @@ def risc_conv(x, data_format=data_format, dilations=dilations, name=name) + + +def risc_max(input_lhs, input_rhs, name='RISC_MAX'): + return gen_risc_ops.risc_max(input_lhs, input_rhs, name=name) From 8dac266a2dc28d19bf034e901fd9c28ed677c6a1 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 9 Nov 2020 19:59:03 -0800 Subject: [PATCH 088/220] Remove usage of internal convert_n_to_tensor PiperOrigin-RevId: 341536375 Change-Id: If893759e6be89b605cd1edff5402670e8ba2c9bb --- tensorflow/python/keras/metrics_test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py index 1ce86e0f355..af3b9e2140b 100644 --- a/tensorflow/python/keras/metrics_test.py +++ b/tensorflow/python/keras/metrics_test.py @@ -191,7 +191,10 @@ class MeanTest(keras_parameterized.TestCase): self.assertEqual(self.evaluate(m.count), 1) # check update_state() and result() + state accumulation + tensor input - update_op = m.update_state(ops.convert_n_to_tensor([1, 5])) + update_op = m.update_state([ + ops.convert_to_tensor_v2_with_dispatch(1), + ops.convert_to_tensor_v2_with_dispatch(5) + ]) self.evaluate(update_op) self.assertAlmostEqual(self.evaluate(m.result()), 106 / 3, 2) self.assertEqual(self.evaluate(m.total), 106) # 100 + 1 + 5 @@ -1414,7 +1417,10 @@ class MeanTensorTest(test.TestCase, parameterized.TestCase): self.assertAllClose(self.evaluate(m.count), [1, 1]) # check update_state() and result() + state accumulation + tensor input - update_op = m.update_state(ops.convert_n_to_tensor([1, 5])) + update_op = m.update_state([ + ops.convert_to_tensor_v2_with_dispatch(1), + ops.convert_to_tensor_v2_with_dispatch(5) + ]) self.evaluate(update_op) self.assertAllClose(self.evaluate(m.result()), [50.5, 22.5]) self.assertAllClose(self.evaluate(m.total), [101, 45]) From 039cb9d993558cadb9e9a83e785517825677665d Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 9 Nov 2020 20:00:46 -0800 Subject: [PATCH 089/220] Remove usage of internal dtypes.float32_ref from Keras PiperOrigin-RevId: 341536551 Change-Id: I73af40e040e935f34f3f9eccbca78a14270145e1 --- tensorflow/python/keras/legacy_tf_layers/normalization_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/legacy_tf_layers/normalization_test.py b/tensorflow/python/keras/legacy_tf_layers/normalization_test.py index 668fab885cc..6b8d4cab4ab 100644 --- a/tensorflow/python/keras/legacy_tf_layers/normalization_test.py +++ b/tensorflow/python/keras/legacy_tf_layers/normalization_test.py @@ -301,7 +301,7 @@ class BNTest(test.TestCase): self.assertEqual(len(bn.trainable_variables), 2) self.assertEqual(len(bn.non_trainable_variables), 2) for var in bn.variables: - self.assertEqual(var.dtype, dtypes.float32_ref) + self.assertTrue(var.dtype._is_ref_dtype) # Test that updates were created and added to UPDATE_OPS. self.assertEqual(len(bn.updates), 2) From 0027d1b3c379fadef3c2f62c4f8e56f0b01ff8be Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 9 Nov 2020 20:02:30 -0800 Subject: [PATCH 090/220] Remove usage of internal sparse_tensor.is_sparse from keras PiperOrigin-RevId: 341536761 Change-Id: Iaf7c46746bd5245e15ddf963fa014aeccad300e1 --- tensorflow/python/keras/engine/base_layer_utils.py | 3 ++- tensorflow/python/keras/engine/base_preprocessing_layer.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index 8755be24c57..399726f82ef 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -213,7 +213,8 @@ def _create_keras_history_helper(tensors, processed_ops, created_layers): for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue - if sparse_tensor.is_sparse(tensor): + if isinstance( + tensor, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): sparse_ops.append(tensor.op) continue if tf_utils.is_ragged(tensor): diff --git a/tensorflow/python/keras/engine/base_preprocessing_layer.py b/tensorflow/python/keras/engine/base_preprocessing_layer.py index cbdf7b53e10..09fca11bd59 100644 --- a/tensorflow/python/keras/engine/base_preprocessing_layer.py +++ b/tensorflow/python/keras/engine/base_preprocessing_layer.py @@ -265,8 +265,6 @@ def convert_to_list(values, sparse_default_value=None): values = K.get_session(values).run(values) values = values.to_list() - # TODO(momernick): Add a sparse_tensor.is_sparse() method to replace this - # check. if isinstance(values, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): if sparse_default_value is None: From 85f39a8604ddb75188f0a2547e3652a78eab6d53 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 20:17:50 -0800 Subject: [PATCH 091/220] Update ops-related pbtxt files. PiperOrigin-RevId: 341538364 Change-Id: Ic9e420fd54b0149f59a79d4c0a8439cd43958307 --- .../ops/compat/ops_history_v2/RiscMax.pbtxt | 27 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 27 +++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt new file mode 100644 index 00000000000..11c4517d756 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt @@ -0,0 +1,27 @@ +op { + name: "RiscMax" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "max" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 9a3ea84fc18..aa458f4b764 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -41399,6 +41399,33 @@ op { } } } +op { + name: "RiscMax" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "max" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} op { name: "RngReadAndSkip" input_arg { From 852bf4bada3ea07c1941bfbefd0140f7a575a229 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Mon, 9 Nov 2020 20:45:21 -0800 Subject: [PATCH 092/220] Fix end-of-file newline in `types.proto` PiperOrigin-RevId: 341541235 Change-Id: I51ca7c4328d3820e11af34d3596f1543a753dcc6 --- tensorflow/core/framework/types.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/types.proto b/tensorflow/core/framework/types.proto index 61549ae08ce..01b598591e4 100644 --- a/tensorflow/core/framework/types.proto +++ b/tensorflow/core/framework/types.proto @@ -84,4 +84,4 @@ enum SpecializedType { ST_INVALID = 0; // "tensorflow::TensorList" in the variant type registry. ST_TENSOR_LIST = 1; -} \ No newline at end of file +} From f2c163919f51657ec0684bf415b3821af1afdc4f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Nov 2020 20:46:51 -0800 Subject: [PATCH 093/220] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 341541392 Change-Id: Ib3245dd6b7868bfebbeb1925c603e9d0f9f58f5e --- tensorflow/go/op/wrappers.go | 160 ++++++++++++++++++++--------------- 1 file changed, 90 insertions(+), 70 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index d65bd33f48f..676526f0d1c 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -14585,6 +14585,23 @@ func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (fi return op.Output(0) } +// Generate a sharded filename. The filename is printf formatted as +// +// %s-%05d-of-%05d, basename, shard, num_shards. +func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShardedFilename", + Input: []tf.Input{ + basename, shard, num_shards, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Saves the input tensors to disk. // // The size of `tensor_names` must match the number of tensors in `data`. `data[i]` @@ -15496,59 +15513,6 @@ func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, option return op.Output(0) } -// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. -type SelfAdjointEigV2Attr func(optionalAttr) - -// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. -// -// value: If `True` then eigenvectors will be computed and returned in `v`. -// Otherwise, only the eigenvalues will be computed. -// If not specified, defaults to true -func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { - return func(m optionalAttr) { - m["compute_v"] = value - } -} - -// Computes the eigen decomposition of one or more square self-adjoint matrices. -// -// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in -// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues -// are sorted in non-decreasing order. -// -// ```python -// # a is a tensor. -// # e is a tensor of eigenvalues. -// # v is a tensor of eigenvectors. -// e, v = self_adjoint_eig(a) -// e = self_adjoint_eig(a, compute_v=False) -// ``` -// -// Arguments: -// input: `Tensor` input of shape `[N, N]`. -// -// Returns: -// e: Eigenvalues. Shape is `[N]`. -// v: Eigenvectors. Shape is `[N, N]`. -func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SelfAdjointEigV2", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - // Computes the Eigen Decomposition of a batch of square self-adjoint matrices. // // DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead. @@ -23478,23 +23442,6 @@ func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y return op.Output(0) } -// Generate a sharded filename. The filename is printf formatted as -// -// %s-%05d-of-%05d, basename, shard, num_shards. -func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ShardedFilename", - Input: []tf.Input{ - basename, shard, num_shards, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Elementwise computes the bitwise OR of `x` and `y`. // // The result will have those bits set, that are set in `x`, `y` or both. The @@ -37373,6 +37320,26 @@ func Erfc(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// Returns max(x, y) element-wise. +// +// *NOTE*: `RiscMax` does not supports broadcasting. +// +// Given two input tensors, the `tf.risc_max` operation computes the maximum for every element in the tensor. +// +func RiscMax(scope *Scope, x tf.Output, y tf.Output) (max tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RiscMax", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // RandomUniformIntAttr is an optional argument to RandomUniformInt. type RandomUniformIntAttr func(optionalAttr) @@ -45067,6 +45034,59 @@ func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear t return scope.AddOperation(opspec) } +// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. +type SelfAdjointEigV2Attr func(optionalAttr) + +// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. +// +// value: If `True` then eigenvectors will be computed and returned in `v`. +// Otherwise, only the eigenvalues will be computed. +// If not specified, defaults to true +func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { + return func(m optionalAttr) { + m["compute_v"] = value + } +} + +// Computes the eigen decomposition of one or more square self-adjoint matrices. +// +// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in +// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues +// are sorted in non-decreasing order. +// +// ```python +// # a is a tensor. +// # e is a tensor of eigenvalues. +// # v is a tensor of eigenvectors. +// e, v = self_adjoint_eig(a) +// e = self_adjoint_eig(a, compute_v=False) +// ``` +// +// Arguments: +// input: `Tensor` input of shape `[N, N]`. +// +// Returns: +// e: Eigenvalues. Shape is `[N]`. +// v: Eigenvectors. Shape is `[N, N]`. +func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SelfAdjointEigV2", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // Computes softmax cross entropy cost and gradients to backpropagate. // // Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept From 312e6bacca8377d94ca4f62ca899683817728c67 Mon Sep 17 00:00:00 2001 From: Deven Desai <36858332+deven-amd@users.noreply.github.com> Date: Tue, 10 Nov 2020 00:50:23 -0800 Subject: [PATCH 094/220] PR #44471: [ROCm] Update to use ROCm 3.9 (when building TF with --config=rocm) Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/44471 PR https://github.com/tensorflow/tensorflow/pull/43636 is a pre-requisite for this PR. For the time being, this PR includes commits from it's pre-req as well. Once the pre-req PR is merged, I will rebase this PR to remove those commits. -------------------------------------- /cc @cheshire @chsigg @nvining-work Copybara import of the project: -- 3f0d378c14f55ac850ace17ac154e2333169329b by Deven Desai : Adding #defines for ROCm / MIOpen / HIP Runtime version numbers This PR/commit introduces the following #defines in the `rocm/rocm_config.h` file ``` #define TF_ROCM_VERSION #define TF_MIOPEN_VERSION #define TF_HIPRUNTIME_VERSION ``` These #defines should be used within TF code to add ROCm/MIOpen/HIp Runtime version specific code. Details on how we go about determining these version numbers can found on the following wiki-page https://github.com/ROCmSoftwarePlatform/tensorflow-internal/wiki/How-to-add-ROCm-version-specific-code-changes-in-the-TensorFlow-code%3F A new script `find_rocm_config.py` is being added by this commit. This script does all the work of determining the version number information and it is pretty to extend it to query more information about the ROCM install. The information collected by the script is available to `rocm_configure.bzl` and hence can be used to add version specific code in `rocm_configure.bzl` as well. -- 922e0e556c4f31f7ff8da1053f014964d01c0859 by Deven Desai : Updating Dockerfile.rocm to use ROCm 3.9 -- cc0b4ae28218a83b3cc262ac83d0b2cf476939c8 by Deven Desai : Changing CI scripts to use ROCm 3.9 -- fbfdb64c3375f79674a4f56433f944e1e4fd6b6e by Deven Desai : Updating rocm_config.py to account for the new location of the rocblas version header file (in ROCm 3.8) -- 3f191faf8b8f2a0111bc386f41316079cad4aaaa by Deven Desai : Removing references to TENSORFLOW_COMPILER_IS_HIP_CLANG Now that we are way past the switch to use ROCm 3.5 and above (i.e. hip-clang), the codes within `#ifdef TENSORFLOW_COMPILER_IS_HIP_CLANG` are always enabled, and the codes within the corresponding `#else` blocks are deadcodes. This commit removes the references to `#ifdef TENSORFLOW_COMPILER_IS_HIP_CLANG` and their corresponding `#else` blocks -- 9a4841c9bb8117e8228946be1f3752bdaea4a359 by Deven Desai : Removing -DTENSORFLOW_COMPILER_IS_HIP_CLANG from the list of compile flags -- 745e2ad6db4282f5efcfef3155d9a46d9235dbf6 by Deven Desai : Removing deadcode for the ROCm platform within the third_party/gpus dir -- c96dc03986636badce7dbd87fb85cf26dff7a43b by Deven Desai : Updating XLA code to account for the device lib files location change in ROCm 3.9 The location of the ROCm device lib files is changing in ROCm 3.9 Current (ROCm 3.8 and before) location is $ROCM_PATH/lib ``` root@ixt-rack-04:/opt/rocm-3.8.0# find . -name *.bc ./lib/oclc_isa_version_701.amdgcn.bc ./lib/ocml.amdgcn.bc ./lib/oclc_daz_opt_on.amdgcn.bc ./lib/oclc_isa_version_700.amdgcn.bc ./lib/oclc_isa_version_810.amdgcn.bc ./lib/oclc_unsafe_math_off.amdgcn.bc ./lib/oclc_wavefrontsize64_off.amdgcn.bc ./lib/oclc_isa_version_803.amdgcn.bc ./lib/oclc_isa_version_1011.amdgcn.bc ./lib/oclc_isa_version_1012.amdgcn.bc ./lib/opencl.amdgcn.bc ./lib/oclc_unsafe_math_on.amdgcn.bc ./lib/oclc_isa_version_1010.amdgcn.bc ./lib/oclc_finite_only_off.amdgcn.bc ./lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc ./lib/oclc_daz_opt_off.amdgcn.bc ./lib/oclc_isa_version_802.amdgcn.bc ./lib/ockl.amdgcn.bc ./lib/oclc_isa_version_906.amdgcn.bc ./lib/oclc_isa_version_1030.amdgcn.bc ./lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc ./lib/hip.amdgcn.bc ./lib/oclc_isa_version_908.amdgcn.bc ./lib/oclc_isa_version_900.amdgcn.bc ./lib/oclc_isa_version_702.amdgcn.bc ./lib/oclc_wavefrontsize64_on.amdgcn.bc ./lib/hc.amdgcn.bc ./lib/oclc_isa_version_902.amdgcn.bc ./lib/oclc_isa_version_801.amdgcn.bc ./lib/oclc_finite_only_on.amdgcn.bc ./lib/oclc_isa_version_904.amdgcn.bc ``` New (ROCm 3.9 and above) location is $ROCM_PATH/amdgcn/bitcode ``` root@ixt-hq-99:/opt/rocm-3.9.0-3703# find -name *.bc ./amdgcn/bitcode/oclc_isa_version_700.bc ./amdgcn/bitcode/ocml.bc ./amdgcn/bitcode/oclc_isa_version_1030.bc ./amdgcn/bitcode/oclc_isa_version_1010.bc ./amdgcn/bitcode/oclc_isa_version_904.bc ./amdgcn/bitcode/hip.bc ./amdgcn/bitcode/hc.bc ./amdgcn/bitcode/oclc_daz_opt_off.bc ./amdgcn/bitcode/oclc_wavefrontsize64_off.bc ./amdgcn/bitcode/oclc_wavefrontsize64_on.bc ./amdgcn/bitcode/oclc_isa_version_900.bc ./amdgcn/bitcode/oclc_isa_version_1012.bc ./amdgcn/bitcode/oclc_isa_version_702.bc ./amdgcn/bitcode/oclc_daz_opt_on.bc ./amdgcn/bitcode/oclc_unsafe_math_off.bc ./amdgcn/bitcode/ockl.bc ./amdgcn/bitcode/oclc_isa_version_803.bc ./amdgcn/bitcode/oclc_isa_version_908.bc ./amdgcn/bitcode/oclc_isa_version_802.bc ./amdgcn/bitcode/oclc_correctly_rounded_sqrt_off.bc ./amdgcn/bitcode/oclc_finite_only_on.bc ./amdgcn/bitcode/oclc_isa_version_701.bc ./amdgcn/bitcode/oclc_unsafe_math_on.bc ./amdgcn/bitcode/oclc_isa_version_902.bc ./amdgcn/bitcode/oclc_finite_only_off.bc ./amdgcn/bitcode/opencl.bc ./amdgcn/bitcode/oclc_isa_version_906.bc ./amdgcn/bitcode/oclc_isa_version_810.bc ./amdgcn/bitcode/oclc_isa_version_801.bc ./amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc ./amdgcn/bitcode/oclc_isa_version_1011.bc ``` Also not the change in the filename(s) This commit updates the XLA code, that has the device lib path + filename(s) hardcoded, to account for the change in location / filename -- 6f981a91c8d8a349c88b450c2191df9c62b2b38b by Deven Desai : Adding "-fcuda-flush-denormals-to-zero" as a default hipcc option Prior to ROCm 3.8, hipcc (hipclang) flushed denormal values to zero by default. Starting with ROCm 3.8 that is no longer true, denormal values are kept as is. TF expects denormals to be flushed to zero. This is enforced on the CUDA side by explicitly passing the "-fcuda-flush-denormals-to-zero" (see tensorflow.bzl). This commit does the same for the ROCm side. Also removing the no_rocm tag from the corresponding unit test - //tensorflow/python/kernel_tests:denormal_test_gpu -- 74810439720e0692f81ffb0cc3b97dc6ed50876d by Deven Desai : Fix for TF build failure with ROCm 3.9 (error: call to 'min' is ambiguous) When building TF with ROCm 3.9, we are running into the following compile error ``` In file included from tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc:20: ./tensorflow/core/kernels/reduction_gpu_kernels.cu.h:430:9: error: call to 'min' is ambiguous min(blockDim.y, num_rows - blockIdx.y * blockDim.y); ^~~ /opt/rocm-3.9.0-3805/llvm/lib/clang/12.0.0/include/__clang_hip_math.h:1183:23: note: candidate function __DEVICE__ inline int min(int __arg1, int __arg2) { ^ /opt/rocm-3.9.0-3805/llvm/lib/clang/12.0.0/include/__clang_hip_math.h:1197:14: note: candidate function inline float min(float __x, float __y) { return fminf(__x, __y); } ^ /opt/rocm-3.9.0-3805/llvm/lib/clang/12.0.0/include/__clang_hip_math.h:1200:15: note: candidate function inline double min(double __x, double __y) { return fmin(__x, __y); } ^ 1 error generated when compiling for gfx803. ``` The build error seems to be because ROCm 3.9 uses llvm header files from `llvm/lib/clang/12.0.0/include` (ROCm 3.8 uses the `11.0.0` version). `12.0.0` has a new `__clang_hip_math.h` file, which is not present in `11.0.0`. This file has the `min` function overloaded for the `float` and `double` types. The first argument in the call to `min` (which leads to the error) is `blockDim.y` which has a `uint` type, and hence the compiler gets confused as to which overloaded type to resole to. Previously (i.e. ROCm 3.8 and before) there was only one option (`int`), with ROCm 3.9 there are three (`int`, `float`, and `double`) and hence the error. The "fix" is to explicitly cast the first argument to `int` to remove the ambiguity (the second argument is already an `int` type). COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/44471 from ROCmSoftwarePlatform:google_upstream_rocm_switch_to_rocm39 74810439720e0692f81ffb0cc3b97dc6ed50876d PiperOrigin-RevId: 341569721 Change-Id: Ia614893881bf8db1ef8901034c35cc585a82dba8 --- .../xla/service/gpu/llvm_gpu_backend/BUILD | 30 ++++---- .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 15 ++++ tensorflow/core/kernels/conv_2d_gpu.h | 2 +- .../core/kernels/reduction_gpu_kernels.cu.h | 2 +- tensorflow/core/kernels/scan_ops_gpu.h | 2 - tensorflow/core/platform/BUILD | 4 +- .../core/platform/default/rocm_rocdl_path.cc | 6 +- .../core/platform/rocm_rocdl_path_test.cc | 8 +++ tensorflow/core/util/gpu_launch_config.h | 28 -------- tensorflow/tools/ci_build/Dockerfile.rocm | 4 +- .../tools/ci_build/linux/rocm/run_cc_core.sh | 2 +- .../ci_build/linux/rocm/run_csb_tests.sh | 2 +- .../tools/ci_build/linux/rocm/run_py3_core.sh | 2 +- .../tools/ci_build/xla/linux/rocm/run_py3.sh | 2 +- .../bin/crosstool_wrapper_driver_rocm.tpl | 42 ++--------- third_party/gpus/rocm/BUILD.tpl | 1 - third_party/gpus/rocm_configure.bzl | 69 ++----------------- 17 files changed, 65 insertions(+), 156 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD index eb6291172fe..c2c9489986c 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD @@ -1,5 +1,9 @@ load("//tensorflow/core/platform:rules_cc.bzl", "cc_library") load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm_is_configured", +) package( default_visibility = [":friends"], @@ -26,21 +30,11 @@ cc_library( "utils.h", ], deps = [ - "//tensorflow/compiler/xla:status_macros", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla/service:hlo_module_config", - "//tensorflow/compiler/xla/service/gpu:gpu_types", - "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/profiler/lib:traceme", + "@llvm-project//llvm:AMDGPUCodeGen", "@com_google_absl//absl/base", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", - "@llvm-project//llvm:AMDGPUCodeGen", "@llvm-project//llvm:Analysis", "@llvm-project//llvm:BitReader", "@llvm-project//llvm:BitWriter", @@ -54,7 +48,19 @@ cc_library( "@llvm-project//llvm:Scalar", "@llvm-project//llvm:Support", "@llvm-project//llvm:Target", - ], + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/compiler/xla/service/gpu:gpu_types", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/profiler/lib:traceme", + ] + if_rocm_is_configured([ + "@local_config_rocm//rocm:rocm_headers", + ]), ) tf_cc_test( diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 51583117706..1b16f41ef77 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -67,6 +67,10 @@ limitations under the License. #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/util/env_var.h" +#if !defined(PLATFORM_GOOGLE) && TENSORFLOW_USE_ROCM +#include "rocm/rocm_config.h" +#endif + namespace xla { namespace gpu { namespace { @@ -560,11 +564,18 @@ namespace { static std::vector GetROCDLPaths(int amdgpu_version, const string& rocdl_dir_path) { // AMDGPU version-neutral bitcodes. +#if TF_ROCM_VERSION >= 30900 + static std::vector* rocdl_filenames = new std::vector( + {"hc.bc", "opencl.bc", "ocml.bc", "ockl.bc", "oclc_finite_only_off.bc", + "oclc_daz_opt_off.bc", "oclc_correctly_rounded_sqrt_on.bc", + "oclc_unsafe_math_off.bc", "oclc_wavefrontsize64_on.bc"}); +#else static std::vector* rocdl_filenames = new std::vector( {"hc.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc", "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", "oclc_daz_opt_off.amdgcn.bc", "oclc_correctly_rounded_sqrt_on.amdgcn.bc", "oclc_unsafe_math_off.amdgcn.bc", "oclc_wavefrontsize64_on.amdgcn.bc"}); +#endif // Construct full path to ROCDL bitcode libraries. std::vector result; @@ -575,7 +586,11 @@ static std::vector GetROCDLPaths(int amdgpu_version, // Add AMDGPU version-specific bitcodes. result.push_back(tensorflow::io::JoinPath( rocdl_dir_path, +#if TF_ROCM_VERSION >= 30900 + absl::StrCat("oclc_isa_version_", amdgpu_version, ".bc"))); +#else absl::StrCat("oclc_isa_version_", amdgpu_version, ".amdgcn.bc"))); +#endif return result; } diff --git a/tensorflow/core/kernels/conv_2d_gpu.h b/tensorflow/core/kernels/conv_2d_gpu.h index 1ed88ca753c..67126f31e27 100644 --- a/tensorflow/core/kernels/conv_2d_gpu.h +++ b/tensorflow/core/kernels/conv_2d_gpu.h @@ -287,7 +287,7 @@ __global__ void SwapDimension1And2InTensor3UsingTiles( // One extra line in the inner dimension to avoid share memory bank conflict. // This is to mimic the following, but no constructor of T can be invoked. // __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1]; -#if GOOGLE_CUDA // || TENSORFLOW_COMPILER_IS_HIP_CLANG +#if GOOGLE_CUDA __shared__ __align__( alignof(T)) char shared_mem_raw[TileSizeI * (TileSizeJ + 1) * sizeof(T)]; typedef T(*SharedMemoryTile)[TileSizeJ + 1]; diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index fc439a08df1..790fb3af1c8 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -387,7 +387,7 @@ __global__ __launch_bounds__(1024) void ColumnReduceKernel( // - = // = const int numRowsThisBlock = - min(blockDim.y, num_rows - blockIdx.y * blockDim.y); + min(static_cast(blockDim.y), num_rows - blockIdx.y * blockDim.y); for (int row = 1; row < numRowsThisBlock; ++row) { value_type t = partial_sums[threadIdx.x * (TF_RED_WARPSIZE + 1) + row]; diff --git a/tensorflow/core/kernels/scan_ops_gpu.h b/tensorflow/core/kernels/scan_ops_gpu.h index f99f8af3190..7914b7a1103 100644 --- a/tensorflow/core/kernels/scan_ops_gpu.h +++ b/tensorflow/core/kernels/scan_ops_gpu.h @@ -248,10 +248,8 @@ void LaunchScan(const GPUDevice& d, typename TTypes::ConstTensor in, int num_blocks = dimx * dimz; int ideal_block_size = dimy / items_per_thread; -#if TENSORFLOW_COMPILER_IS_HIP_CLANG const int rocm_threads_per_warp = 64; ideal_block_size = std::max(ideal_block_size, rocm_threads_per_warp); -#endif // There seems to be a bug when the type is not float and block_size 1024. // Launch on the smallest power of 2 block size that we can. diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 418882d1fde..76920c5e55b 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -1297,7 +1297,9 @@ tf_cuda_cc_test( "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", - ], + ] + if_rocm_is_configured([ + "@local_config_rocm//rocm:rocm_headers", + ]), ) # -------------------------------------------------------------------------- diff --git a/tensorflow/core/platform/default/rocm_rocdl_path.cc b/tensorflow/core/platform/default/rocm_rocdl_path.cc index 9e9261d26c8..7e43286897c 100644 --- a/tensorflow/core/platform/default/rocm_rocdl_path.cc +++ b/tensorflow/core/platform/default/rocm_rocdl_path.cc @@ -36,10 +36,10 @@ string RocmRoot() { } string RocdlRoot() { -#if TENSORFLOW_COMPILER_IS_HIP_CLANG - return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "lib"); +#if TF_ROCM_VERSION >= 30900 + return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "amdgcn/bitcode"); #else - return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "hcc/lib"); + return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "lib"); #endif } diff --git a/tensorflow/core/platform/rocm_rocdl_path_test.cc b/tensorflow/core/platform/rocm_rocdl_path_test.cc index 166e99bb509..0037b756b92 100644 --- a/tensorflow/core/platform/rocm_rocdl_path_test.cc +++ b/tensorflow/core/platform/rocm_rocdl_path_test.cc @@ -20,6 +20,10 @@ limitations under the License. #include "tensorflow/core/platform/path.h" #include "tensorflow/core/platform/test.h" +#if !defined(PLATFORM_GOOGLE) && TENSORFLOW_USE_ROCM +#include "rocm/rocm_config.h" +#endif + namespace tensorflow { #if TENSORFLOW_USE_ROCM @@ -27,7 +31,11 @@ TEST(RocmRocdlPathTest, ROCDLPath) { VLOG(2) << "ROCm-Device-Libs root = " << RocdlRoot(); std::vector rocdl_files; TF_EXPECT_OK(Env::Default()->GetMatchingPaths( +#if TF_ROCM_VERSION >= 30900 + io::JoinPath(RocdlRoot(), "*.bc"), &rocdl_files)); +#else io::JoinPath(RocdlRoot(), "*.amdgcn.bc"), &rocdl_files)); +#endif EXPECT_LT(0, rocdl_files.size()); } #endif diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h index 4c2df39e1a2..0b943e917da 100644 --- a/tensorflow/core/util/gpu_launch_config.h +++ b/tensorflow/core/util/gpu_launch_config.h @@ -168,25 +168,10 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count, block_size_limit); CHECK_EQ(err, cudaSuccess); #elif TENSORFLOW_USE_ROCM -#if TENSORFLOW_COMPILER_IS_HIP_CLANG hipError_t err = hipOccupancyMaxPotentialBlockSize( &block_count, &thread_per_block, func, dynamic_shared_memory_size, block_size_limit); CHECK_EQ(err, hipSuccess); -#else - // Earlier versions of this HIP routine incorrectly returned void. - // TODO re-enable hipError_t error checking when HIP is fixed. - // ROCm interface uses unsigned int, convert after checking - uint32_t block_count_uint = 0; - uint32_t thread_per_block_uint = 0; - CHECK_GE(block_size_limit, 0); - uint32_t block_size_limit_uint = static_cast(block_size_limit); - hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint, - func, dynamic_shared_memory_size, - block_size_limit_uint); - block_count = static_cast(block_count_uint); - thread_per_block = static_cast(thread_per_block_uint); -#endif #endif block_count = @@ -216,22 +201,9 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize( &block_count, func, fixed_block_size, dynamic_shared_memory_size); CHECK_EQ(err, cudaSuccess); #elif TENSORFLOW_USE_ROCM -#if TENSORFLOW_COMPILER_IS_HIP_CLANG hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor( &block_count, func, fixed_block_size, dynamic_shared_memory_size); CHECK_EQ(err, hipSuccess); -#else - // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&) - // that the kernel is quite simple and will largely be memory-limited. - const int physical_thread_count = std::min( - d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(), - work_element_count); - // Assume the kernel be simple enough that it is okay to use 1024 threads - // per workgroup. - int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); - block_count = std::min(DivUp(physical_thread_count, thread_per_block), - d.getNumGpuMultiProcessors()); -#endif #endif block_count = std::min(block_count * d.getNumGpuMultiProcessors(), DivUp(work_element_count, fixed_block_size)); diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm index a72915504be..89293c54e4a 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rocm +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -3,10 +3,10 @@ FROM ubuntu:bionic MAINTAINER Jeff Poznanovic -ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/ +ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.9/ ARG ROCM_BUILD_NAME=xenial ARG ROCM_BUILD_NUM=main -ARG ROCM_PATH=/opt/rocm-3.7.0 +ARG ROCM_PATH=/opt/rocm-3.9.0 ENV DEBIAN_FRONTEND noninteractive ENV TF_NEED_ROCM 1 diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh index 92d21cb133b..44f60b53070 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh @@ -28,7 +28,7 @@ echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} echo "" # First positional argument (if any) specifies the ROCM_INSTALL_DIR -ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +ROCM_INSTALL_DIR=/opt/rocm-3.9.0 if [[ -n $1 ]]; then ROCM_INSTALL_DIR=$1 fi diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh index 80c0686e647..f6ed1bef84f 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh @@ -28,7 +28,7 @@ echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} echo "" # First positional argument (if any) specifies the ROCM_INSTALL_DIR -ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +ROCM_INSTALL_DIR=/opt/rocm-3.9.0 if [[ -n $1 ]]; then ROCM_INSTALL_DIR=$1 fi diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh index 3a09081dd6a..586ec1520ad 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh @@ -28,7 +28,7 @@ echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} echo "" # First positional argument (if any) specifies the ROCM_INSTALL_DIR -ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +ROCM_INSTALL_DIR=/opt/rocm-3.9.0 if [[ -n $1 ]]; then ROCM_INSTALL_DIR=$1 fi diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh index d623b77d533..dc9a8b50ee1 100755 --- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh +++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh @@ -28,7 +28,7 @@ echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} echo "" # First positional argument (if any) specifies the ROCM_INSTALL_DIR -ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +ROCM_INSTALL_DIR=/opt/rocm-3.9.0 if [[ -n $1 ]]; then ROCM_INSTALL_DIR=$1 fi diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl index d5bfe78c644..161bc7c8df4 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -26,12 +26,9 @@ import pipes # Template values set by rocm_configure.bzl. CPU_COMPILER = ('%{cpu_compiler}') -GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}') HIPCC_PATH = '%{hipcc_path}' -PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) HIPCC_ENV = '%{hipcc_env}' -HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True" HIP_RUNTIME_PATH = '%{hip_runtime_path}' HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}' ROCR_RUNTIME_PATH = '%{rocr_runtime_path}' @@ -98,27 +95,6 @@ def GetHostCompilerOptions(argv): return opts -def GetHipccOptions(argv): - """Collect the -hipcc_options values from argv. - - Args: - argv: A list of strings, possibly the argv passed to main(). - - Returns: - The string that can be passed directly to hipcc. - """ - - parser = ArgumentParser() - parser.add_argument('-hipcc_options', nargs='*', action='append') - - args, _ = parser.parse_known_args(argv) - - if args.hipcc_options: - options = _update_options(sum(args.hipcc_options, [])) - return ' '.join(['--'+a for a in options]) - return '' - - def system(cmd): """Invokes cmd with os.system(). @@ -148,7 +124,6 @@ def InvokeHipcc(argv, log=False): """ host_compiler_options = GetHostCompilerOptions(argv) - hipcc_compiler_options = GetHipccOptions(argv) opt_option = GetOptionValue(argv, 'O') m_options = GetOptionValue(argv, 'm') m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) @@ -193,14 +168,13 @@ def InvokeHipcc(argv, log=False): # Otherwise, we get build error. # Also we need to retain warning about uninitialised shared variable as # warning only, even when -Werror option is specified. - if HIPCC_IS_HIPCLANG: - hipccopts += ' --include=hip/hip_runtime.h ' - hipccopts += ' ' + hipcc_compiler_options + hipccopts += ' --include=hip/hip_runtime.h ' # Use -fno-gpu-rdc by default for early GPU kernel finalization # This flag would trigger GPU kernels be generated at compile time, instead # of link time. This allows the default host compiler (gcc) be used as the # linker for TensorFlow on ROCm platform. hipccopts += ' -fno-gpu-rdc ' + hipccopts += ' -fcuda-flush-denormals-to-zero ' hipccopts += undefines hipccopts += defines hipccopts += std_options @@ -211,22 +185,19 @@ def InvokeHipcc(argv, log=False): depfile = depfiles[0] cmd = (HIPCC_PATH + ' ' + hipccopts + host_compiler_options + - ' ' + GCC_HOST_COMPILER_PATH + ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile) + cmd = HIPCC_ENV.replace(';', ' ') + ' ' + cmd if log: Log(cmd) + if VERBOSE: print(cmd) exit_status = os.system(cmd) if exit_status != 0: return exit_status cmd = (HIPCC_PATH + ' ' + hipccopts + host_compiler_options + ' -fPIC' + - ' ' + GCC_HOST_COMPILER_PATH + ' -I .' + opt + includes + ' -c ' + srcs + out) - # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'. - # Need to investigate and fix. - cmd = 'PATH=' + PREFIX_DIR + ':$PATH '\ - + HIPCC_ENV.replace(';', ' ') + ' '\ + cmd = HIPCC_ENV.replace(';', ' ') + ' '\ + cmd if log: Log(cmd) if VERBOSE: print(cmd) @@ -268,8 +239,7 @@ def main(): gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH) gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH) gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY) - if HIPCC_IS_HIPCLANG: - gpu_linker_flags.append("-lrt") + gpu_linker_flags.append("-lrt") if VERBOSE: print(' '.join([CPU_COMPILER] + gpu_linker_flags)) return subprocess.call([CPU_COMPILER] + gpu_linker_flags) diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl index d2533a08de1..ecbb4b5cebc 100644 --- a/third_party/gpus/rocm/BUILD.tpl +++ b/third_party/gpus/rocm/BUILD.tpl @@ -147,7 +147,6 @@ filegroup( name = "rocm_root", srcs = [ "rocm/bin/clang-offload-bundler", - "rocm/bin/bin2c.py", ], ) diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 80e35681b6b..10f03bfec24 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -186,6 +186,7 @@ def _rocm_include_path(repository_ctx, rocm_config, bash_bin): inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/9.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/10.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/11.0.0/include") + inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/12.0.0/include") # Support hcc based off clang 10.0.0 (for ROCm 3.3) inc_dirs.append(rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/") @@ -215,7 +216,7 @@ def _amdgpu_targets(repository_ctx, rocm_toolkit_path, bash_bin): amdgpu_targets_str = ",".join(targets) amdgpu_targets = amdgpu_targets_str.split(",") for amdgpu_target in amdgpu_targets: - if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit(): + if amdgpu_target[:3] != "gfx": auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target) return amdgpu_targets @@ -244,51 +245,6 @@ def _hipcc_env(repository_ctx): hipcc_env = (hipcc_env + " " + name + "=\"" + env_value + "\";") return hipcc_env.strip() -def _hipcc_is_hipclang(repository_ctx, rocm_config, bash_bin): - """Returns if hipcc is based on hip-clang toolchain. - - Args: - repository_ctx: The repository context. - rocm_config: The path to the hip compiler. - bash_bin: the path to the bash interpreter - - Returns: - A string "True" if hipcc is based on hip-clang toolchain. - The functions returns "False" if not (ie: based on HIP/HCC toolchain). - """ - - # check user-defined hip-clang environment variables - for name in ["HIP_CLANG_PATH", "HIP_VDI_HOME"]: - if get_host_environ(repository_ctx, name): - return "True" - - # grep for "HIP_COMPILER=clang" in /opt/rocm/hip/lib/.hipInfo - cmd = "grep HIP_COMPILER=clang %s/hip/lib/.hipInfo || true" % rocm_config.rocm_toolkit_path - grep_result = execute(repository_ctx, [bash_bin, "-c", cmd], empty_stdout_fine = True) - result = grep_result.stdout.strip() - if result == "HIP_COMPILER=clang": - return "True" - return "False" - -def _if_hipcc_is_hipclang(repository_ctx, rocm_config, bash_bin, if_true, if_false = []): - """ - Returns either the if_true or if_false arg based on whether hipcc - is based on the hip-clang toolchain - - Args : - repository_ctx: The repository context. - rocm_config: The path to the hip compiler. - if_true : value to return if hipcc is hip-clang based - if_false : value to return if hipcc is not hip-clang based - (optional, defaults to empty list) - - Returns : - either the if_true arg or the of_False arg - """ - if _hipcc_is_hipclang(repository_ctx, rocm_config, bash_bin) == "True": - return if_true - return if_false - def _crosstool_verbose(repository_ctx): """Returns the environment variable value CROSSTOOL_VERBOSE. @@ -633,13 +589,7 @@ def _create_local_rocm_repository(repository_ctx): outs = rocm_lib_outs, )) - clang_offload_bundler_path = rocm_toolkit_path + _if_hipcc_is_hipclang( - repository_ctx, - rocm_config, - bash_bin, - "/llvm/bin/", - "/hcc/bin/", - ) + "clang-offload-bundler" + clang_offload_bundler_path = rocm_toolkit_path + "/llvm/bin/clang-offload-bundler" # copy files mentioned in third_party/gpus/rocm/BUILD copy_rules.append(make_copy_files_rule( @@ -712,17 +662,7 @@ def _create_local_rocm_repository(repository_ctx): "-DTENSORFLOW_USE_ROCM=1", "-D__HIP_PLATFORM_HCC__", "-DEIGEN_USE_HIP", - ] + _if_hipcc_is_hipclang(repository_ctx, rocm_config, bash_bin, [ - # - # define "TENSORFLOW_COMPILER_IS_HIP_CLANG" when we are using clang - # based hipcc to compile/build tensorflow - # - # Note that this #define should not be used to check whether or not - # tensorflow is being built with ROCm support - # (only TENSORFLOW_USE_ROCM should be used for that purpose) - # - "-DTENSORFLOW_COMPILER_IS_HIP_CLANG=1", - ])) + ]) rocm_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc" @@ -753,7 +693,6 @@ def _create_local_rocm_repository(repository_ctx): "%{cpu_compiler}": str(cc), "%{hipcc_path}": rocm_config.rocm_toolkit_path + "/hip/bin/hipcc", "%{hipcc_env}": _hipcc_env(repository_ctx), - "%{hipcc_is_hipclang}": _hipcc_is_hipclang(repository_ctx, rocm_config, bash_bin), "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib", "%{rocr_runtime_library}": "hsa-runtime64", "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib", From 2fb081e0ba60cd6913ed86abe49d3103624fc118 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 10 Nov 2020 00:54:01 -0800 Subject: [PATCH 095/220] Add missing dependency for mlir_bridge_rollout_policy. PiperOrigin-RevId: 341570168 Change-Id: I880e90f4f8b6d82686ccfe5deb5231215eecbaef --- tensorflow/compiler/mlir/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD index 258b08d30ad..129e8230b95 100644 --- a/tensorflow/compiler/mlir/BUILD +++ b/tensorflow/compiler/mlir/BUILD @@ -198,6 +198,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/compiler/jit:flags", + "//tensorflow/core:graph", "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/types:optional", ], From 8d6304b20e4d4733e516fbe4eee3f9a4c26a78ae Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Tue, 10 Nov 2020 01:00:30 -0800 Subject: [PATCH 096/220] add tflite::tools::prefix to delegate provider, this prevents custom delegate provider not under tflite::tools namespace issue PiperOrigin-RevId: 341570936 Change-Id: I462af4be341f15d5e019743fbfb3d9917feadf13 --- tensorflow/lite/tools/delegates/delegate_provider.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/tools/delegates/delegate_provider.h b/tensorflow/lite/tools/delegates/delegate_provider.h index 80bb2843f0b..ef721f4705d 100644 --- a/tensorflow/lite/tools/delegates/delegate_provider.h +++ b/tensorflow/lite/tools/delegates/delegate_provider.h @@ -95,8 +95,8 @@ class DelegateProviderRegistrar { }; #define REGISTER_DELEGATE_PROVIDER_VNAME(T) gDelegateProvider_##T##_ -#define REGISTER_DELEGATE_PROVIDER(T) \ - static DelegateProviderRegistrar::Register \ +#define REGISTER_DELEGATE_PROVIDER(T) \ + static tflite::tools::DelegateProviderRegistrar::Register \ REGISTER_DELEGATE_PROVIDER_VNAME(T); // A global helper function to get all registered delegate providers. From 292003085bca74fe113e6c968f438f830fdc0a4b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 01:01:45 -0800 Subject: [PATCH 097/220] Update GraphDef version to 581. PiperOrigin-RevId: 341571083 Change-Id: Ie93872e7b4f6950550a1511b619efd8d4907813b --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 32637ef237c..00ab2053cbb 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 580 // Updated: 2020/11/9 +#define TF_GRAPH_DEF_VERSION 581 // Updated: 2020/11/10 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 81ac9df8b63ee47eb08393d77a119d9d96880217 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 01:01:46 -0800 Subject: [PATCH 098/220] compat: Update forward compatibility horizon to 2020-11-10 PiperOrigin-RevId: 341571085 Change-Id: Iaa5dca76e6b30216edc6c2195c07b1327ea052c7 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index d87dd689fd7..3083f161947 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 9) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 10) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 15ded23b5da7692afb8e18607538c0a264aead99 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 01:13:28 -0800 Subject: [PATCH 099/220] Add option to delegate configuration proto/flatbuffer for fallback for any delegate. PiperOrigin-RevId: 341572298 Change-Id: I717543629d7b8063441053c04fbc3fa11571415c --- .../acceleration/configuration/configuration.proto | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/experimental/acceleration/configuration/configuration.proto b/tensorflow/lite/experimental/acceleration/configuration/configuration.proto index 15ff046cb05..290f1f0f6cd 100644 --- a/tensorflow/lite/experimental/acceleration/configuration/configuration.proto +++ b/tensorflow/lite/experimental/acceleration/configuration/configuration.proto @@ -107,8 +107,10 @@ message NNAPISettings { // case. optional int32 no_of_nnapi_instances_to_cache = 5; + // Deprecated; use the fallback_settings in TFLiteSettings. + // // Whether to automatically fall back to TFLite CPU path. - optional FallbackSettings fallback_settings = 6; + optional FallbackSettings fallback_settings = 6 [deprecated = true]; // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android // 10+ when an accelerator name is not specified. The NNAPI CPU typically @@ -238,6 +240,9 @@ message TFLiteSettings { // For configuring the EdgeTpuDelegate. optional EdgeTpuSettings edgetpu_settings = 8; + + // Whether to automatically fall back to TFLite CPU path. + optional FallbackSettings fallback_settings = 9; } // Whether to automatically fallback to TFLite CPU path on delegation errors. From 43884b9f6ebcd392b75e0e0735258a1be73dfec3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 01:45:49 -0800 Subject: [PATCH 100/220] [MLIR][KernelGen] Add MLIR-generated sign kernel. PiperOrigin-RevId: 341575688 Change-Id: If3b0d63247af7b564e68b76c356dbb745bdf2ed7 --- tensorflow/core/kernels/cwise_op_sign.cc | 4 ++- tensorflow/core/kernels/mlir_generated/BUILD | 2 ++ .../mlir_generated/unranked_op_gpu_sign.cc | 27 +++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc diff --git a/tensorflow/core/kernels/cwise_op_sign.cc b/tensorflow/core/kernels/cwise_op_sign.cc index b1501555fbc..c4ef05ef8c8 100644 --- a/tensorflow/core/kernels/cwise_op_sign.cc +++ b/tensorflow/core/kernels/cwise_op_sign.cc @@ -19,6 +19,8 @@ namespace tensorflow { REGISTER8(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64, complex64, Eigen::half, bfloat16, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER6(UnaryOp, GPU, "Sign", functor::sign, float, Eigen::half, double, int64, complex64, complex128); @@ -32,6 +34,6 @@ REGISTER_KERNEL_BUILDER(Name("Sign") .TypeConstraint("T"), UnaryOp>); #endif - +#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index c3e700d1d2f..7bdf0aecca0 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -47,6 +47,7 @@ filegroup( "unranked_op_gpu_floor.cc", "unranked_op_gpu_log.cc", "unranked_op_gpu_rsqrt.cc", + "unranked_op_gpu_sign.cc", "unranked_op_gpu_sqrt.cc", "unranked_op_gpu_tanh.cc", "unranked_op_gpu_base.h", @@ -73,6 +74,7 @@ cc_library( ":floor_unranked_kernels", ":log_unranked_kernels", ":rsqrt_unranked_kernels", + ":sign_unranked_kernels", ":sqrt_unranked_kernels", ":tanh_unranked_kernels", "//tensorflow/compiler/mlir/tools/kernel_gen:tf_cuda_runtime_wrappers", diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc new file mode 100644 index 00000000000..b5293ce1b1c --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc @@ -0,0 +1,27 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h" + +namespace tensorflow { + +REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, f16, DT_HALF, Eigen::half); +REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, f32, DT_FLOAT, float); +REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, f64, DT_DOUBLE, double); +REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, i32, DT_INT32, int32); +REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, i64, DT_INT64, int64); + +} // namespace tensorflow From 3af43f7898bcfe08706d52442a3a6f67c0a55639 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 02:37:15 -0800 Subject: [PATCH 101/220] [MLIR][KernelGen] Add MLIR-generated sin kernel PiperOrigin-RevId: 341581400 Change-Id: I5ad0e7c1b7d63cab6761888bcef8470018efdb30 --- tensorflow/core/kernels/mlir_generated/BUILD | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 7bdf0aecca0..b5f13edc801 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -426,6 +426,19 @@ gen_kernel_library( unroll_factors = "4", ) +gen_kernel_library( + name = "sin", + generate_unranked = True, + same_shape = "0,1", + tile_size = "256", + types = [ + "f16", + "f32", + "f64", + ], + unroll_factors = "4", +) + gen_kernel_library( name = "sqrt", generate_unranked = True, From e897624fac6831d13a56cae3949c6cec0f97f386 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 03:05:56 -0800 Subject: [PATCH 102/220] [MLIR][KernelGen] Rename `REGISTER_AND_GENERATE*` macros to `GENERATE_AND_REGISTER*` PiperOrigin-RevId: 341584473 Change-Id: Ibafa9960718507bc6bb37bda9c70ea50294834ee --- .../core/kernels/mlir_generated/cwise_op_gpu_abs.cc | 10 +++++----- .../core/kernels/mlir_generated/cwise_op_gpu_base.h | 2 +- .../core/kernels/mlir_generated/cwise_op_gpu_tanh.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_gpu_add.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_abs.cc | 10 +++++----- .../core/kernels/mlir_generated/unranked_op_gpu_base.h | 4 ++-- .../kernels/mlir_generated/unranked_op_gpu_ceil.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_cos.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_exp.cc | 6 +++--- .../kernels/mlir_generated/unranked_op_gpu_floor.cc | 6 +++--- .../core/kernels/mlir_generated/unranked_op_gpu_log.cc | 6 +++--- .../kernels/mlir_generated/unranked_op_gpu_rsqrt.cc | 6 +++--- .../kernels/mlir_generated/unranked_op_gpu_sign.cc | 10 +++++----- .../kernels/mlir_generated/unranked_op_gpu_sqrt.cc | 6 +++--- .../kernels/mlir_generated/unranked_op_gpu_tanh.cc | 6 +++--- 15 files changed, 48 insertions(+), 48 deletions(-) diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc index 263c7eb0eac..948a7c00437 100644 --- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc @@ -32,9 +32,9 @@ namespace { GENERATE_OP_KERNEL_BASE(Abs); } // namespace -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, F16, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, F32, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, F64, double); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, I32, int32); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, I64, int64); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, F16, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, F32, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, F64, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, I32, int32); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, I64, int64); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h index d2a5d2d9720..466bbead3a5 100644 --- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h @@ -61,7 +61,7 @@ class MlirGeneratedUnaryOp : public OpKernel { ##Op(ctx, k##kernel_name##data_type##Kernel) {} \ }; -#define REGISTER_AND_GENERATE_UNARY_KERNEL(kernel_name, data_type, \ +#define GENERATE_AND_REGISTER_UNARY_KERNEL(kernel_name, data_type, \ native_data_type) \ namespace { \ GENERATE_OP_KERNEL_FOR(kernel_name, data_type) \ diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc index 545071ff595..a9cc0666b0b 100644 --- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc @@ -30,7 +30,7 @@ namespace { GENERATE_OP_KERNEL_BASE(Tanh); } // namespace -REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, F16, Eigen::half) -REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, F32, float) -REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, F64, double) +GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, F16, Eigen::half) +GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, F32, float) +GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, F64, double) } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc b/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc index bbad91a2eb5..16dc1df9aef 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_BINARY_KERNEL(AddV2, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_BINARY_KERNEL(AddV2, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_BINARY_KERNEL(AddV2, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_BINARY_KERNEL(AddV2, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_BINARY_KERNEL(AddV2, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_BINARY_KERNEL(AddV2, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc index 3e1baa2dce0..43eb7bb8a95 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_abs.cc @@ -18,10 +18,10 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, f64, DT_DOUBLE, double); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, i32, DT_INT32, int32); -REGISTER_AND_GENERATE_UNARY_KERNEL(Abs, i64, DT_INT64, int64); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, i32, DT_INT32, int32); +GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, i64, DT_INT64, int64); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h index 4bcfff21ea8..f2043d8ea09 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h @@ -116,7 +116,7 @@ class MlirUnrankedOp : public OpKernel { // memref descriptors and calls mlir-generated unranked kernel. The outputs // are converted back to tensors using MlirTensorBuffer to take ownership of // pre-allocated memory. -#define REGISTER_AND_GENERATE_BINARY_KERNEL(tf_op, mlir_type, tf_data_type, \ +#define GENERATE_AND_REGISTER_BINARY_KERNEL(tf_op, mlir_type, tf_data_type, \ data_type) \ extern "C" ::UnrankedMemRefType MLIR_FUNCTION(tf_op, mlir_type)( \ tensorflow::OpKernelContext * ctx, \ @@ -143,7 +143,7 @@ class MlirUnrankedOp : public OpKernel { Name(#tf_op).Device(DEVICE_GPU).TypeConstraint("T"), \ MlirUnranked##tf_op##mlir_type##Op); -#define REGISTER_AND_GENERATE_UNARY_KERNEL(tf_op, mlir_type, tf_data_type, \ +#define GENERATE_AND_REGISTER_UNARY_KERNEL(tf_op, mlir_type, tf_data_type, \ data_type) \ extern "C" ::UnrankedMemRefType MLIR_FUNCTION(tf_op, mlir_type)( \ tensorflow::OpKernelContext * ctx, \ diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc index d1c51860355..41800d0db1a 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_ceil.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Ceil, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Ceil, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Ceil, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Ceil, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Ceil, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Ceil, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc index fdac05fd4ce..ca3832b9ece 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_cos.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Cos, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Cos, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Cos, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Cos, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Cos, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Cos, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc index 4288bba7eea..b14b5ceb8f9 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_exp.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Exp, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Exp, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Exp, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Exp, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Exp, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Exp, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc index 40f2682db4e..faf7616d343 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_floor.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Floor, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Floor, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Floor, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Floor, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Floor, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Floor, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc index 8ade178691d..afd941b269a 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_log.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Log, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Log, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Log, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Log, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Log, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Log, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc index 6e8974bd64a..f89e1069b61 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_rsqrt.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Rsqrt, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Rsqrt, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Rsqrt, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Rsqrt, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Rsqrt, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Rsqrt, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc index b5293ce1b1c..a29c53a2978 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sign.cc @@ -18,10 +18,10 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, f64, DT_DOUBLE, double); -REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, i32, DT_INT32, int32); -REGISTER_AND_GENERATE_UNARY_KERNEL(Sign, i64, DT_INT64, int64); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sign, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sign, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sign, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sign, i32, DT_INT32, int32); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sign, i64, DT_INT64, int64); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc index 97f9825d173..9b77735c0c1 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_sqrt.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Sqrt, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Sqrt, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Sqrt, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sqrt, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sqrt, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Sqrt, f64, DT_DOUBLE, double); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc index 440ef1a2d97..5a703b95279 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_tanh.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { -REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, f16, DT_HALF, Eigen::half); -REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, f32, DT_FLOAT, float); -REGISTER_AND_GENERATE_UNARY_KERNEL(Tanh, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, f16, DT_HALF, Eigen::half); +GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, f64, DT_DOUBLE, double); } // namespace tensorflow From 2c05a4a796a853f92df819b61792720df0859a38 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 10 Nov 2020 03:26:51 -0800 Subject: [PATCH 103/220] Make use of same_shape and tensorflow abi knowledge propagation passes. PiperOrigin-RevId: 341586499 Change-Id: Ifa6177fba122d53375b47ec69fa8a401d51582ac --- .../mlir/tools/kernel_gen/kernel_creator.cc | 59 ++++++++++++++----- .../kernel_gen/tests/tf_to_kernel/tanh.mlir | 2 +- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc index 5692070dac0..8850a61fc3e 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc @@ -198,32 +198,57 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, return Status::OK(); } -Status LowerGPUToLLVM(mlir::ModuleOp module, bool gpu_binary_only, - llvm::ArrayRef same_shape, - llvm::StringRef gpu_binary_attr_name, - llvm::ArrayRef architectures, - bool generate_fatbin) { +Status AmendKernelLLVMIRWithStaticKnowledge(mlir::ModuleOp module) { + mlir::PassManager pm(module.getContext()); + applyTensorflowAndCLOptions(pm); + + pm.addNestedPass( + mlir::kernel_gen::transforms::CreatePropagateShapeKnowledgeToKernels()); + pm.addNestedPass( + mlir::kernel_gen::transforms::CreatePropagateTfAbiKnowledgeToKernels()); + + return failed(pm.run(module)) + ? InternalError("Amending LLVMIR with static knowledge failed.") + : Status::OK(); +} + +Status GenerateDeviceCode(mlir::ModuleOp module, bool gpu_binary_only, + llvm::ArrayRef same_shape, + llvm::StringRef gpu_binary_attr_name, + llvm::ArrayRef architectures, + bool generate_fatbin) { mlir::PassManager pm(module.getContext()); applyTensorflowAndCLOptions(pm); auto& kernel_pm = pm.nest(); + // TODO(herhut): Remove this. if (gpu_binary_only) { // Grab the original signature from the single function. kernel_pm.addNestedPass( mlir::kernel_gen::transforms::CreatePropagateTensorFlowABIKnowledgePass( same_shape)); } + // Remove debug information to ensure we do not create debug PTX. kernel_pm.addPass(mlir::createStripDebugInfoPass()); kernel_pm.addPass(mlir::kernel_gen::transforms::CreateGpuKernelToBlobPass( gpu_binary_attr_name, architectures, generate_fatbin)); - if (!gpu_binary_only) { - pm.addPass(mlir::kernel_gen::transforms::CreateTFKernelToLLVMPass()); - pm.addPass(mlir::createCanonicalizerPass()); - pm.addPass(mlir::createCSEPass()); - } - return failed(pm.run(module)) ? InternalError("Lowering to LLVM IR failed.") - : Status::OK(); + return failed(pm.run(module)) + ? InternalError("Generating device code failed.") + : Status::OK(); +} + +Status LowerHostSideToFinalForm(mlir::ModuleOp module) { + mlir::PassManager pm(module.getContext()); + applyTensorflowAndCLOptions(pm); + + pm.addPass(mlir::kernel_gen::transforms::CreateTFKernelToLLVMPass()); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createCSEPass()); + + return failed(pm.run(module)) + ? InternalError("Final lowering of host side failed.") + : Status::OK(); } } // namespace @@ -249,9 +274,13 @@ StatusOr GenerateKernelForTfCode( #elif GOOGLE_CUDA TF_RETURN_IF_ERROR(xla::mlir_gpu::LowerKernelBodiesToNVVM(module.get())); #endif - TF_RETURN_IF_ERROR(LowerGPUToLLVM(module.get(), gpu_binary_only, same_shape, - kGpuBinaryAttrName, architectures, - generate_fatbin)); + TF_RETURN_IF_ERROR(AmendKernelLLVMIRWithStaticKnowledge(module.get())); + TF_RETURN_IF_ERROR(GenerateDeviceCode(module.get(), gpu_binary_only, + same_shape, kGpuBinaryAttrName, + architectures, generate_fatbin)); + if (!gpu_binary_only) { + TF_RETURN_IF_ERROR(LowerHostSideToFinalForm(module.get())); + } return module; } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir index 85bea1795a5..3eb736a359e 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir @@ -1,6 +1,6 @@ // RUN: tf_to_kernel --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70,compute_75 -func @tanh(%arg: tensor<*xf32>) -> tensor<*xf32> { +func @tanh(%arg: tensor<*xf32>) -> tensor<*xf32> attributes {tf_entry} { %0 = "tf.Tanh"(%arg) : (tensor<*xf32>) -> tensor<*xf32> return %0 : tensor<*xf32> } From a6241f0271c29d8921ea44d8c0e7243e0373b93d Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Tue, 10 Nov 2020 04:40:18 -0800 Subject: [PATCH 104/220] TFLite OpenCL: CMake build fix Added serialization_generated.h and serialization_base_generated.h The file is generated with the following command. $ flatc -c --scoped-enums -I ./ tensorflow/lite/delegates/gpu/cl/serialization.fbs $ flatc -c --scoped-enums -I ./ tensorflow/lite/delegates/gpu/common/task/serialization_base.fbs PiperOrigin-RevId: 341594596 Change-Id: I2e80a1d01d3b48560c502279443107cce608f369 --- .../gpu/cl/serialization_generated.h | 985 +++++++++ .../task/serialization_base_generated.h | 1839 +++++++++++++++++ tensorflow/lite/micro/tools/make/Makefile | 2 +- tensorflow/lite/tools/make/Makefile | 2 +- tensorflow/opensource_only.files | 4 + 5 files changed, 2830 insertions(+), 2 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/cl/serialization_generated.h create mode 100644 tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h diff --git a/tensorflow/lite/delegates/gpu/cl/serialization_generated.h b/tensorflow/lite/delegates/gpu/cl/serialization_generated.h new file mode 100644 index 00000000000..c564954e867 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/serialization_generated.h @@ -0,0 +1,985 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_SERIALIZATION_TFLITE_GPU_CL_DATA_H_ +#define FLATBUFFERS_GENERATED_SERIALIZATION_TFLITE_GPU_CL_DATA_H_ + +#include "flatbuffers/flatbuffers.h" + +#include "tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h" + +namespace tflite { +namespace gpu { +namespace cl { +namespace data { + +struct OperationDef; +struct OperationDefBuilder; + +struct CompilerOption; +struct CompilerOptionBuilder; + +struct GPUOperation; +struct GPUOperationBuilder; + +struct TensorDescWithId; +struct TensorDescWithIdBuilder; + +struct CLNode; +struct CLNodeBuilder; + +struct PairOfValueIds; +struct PairOfValueIdsBuilder; + +struct InferenceContext; +struct InferenceContextBuilder; + +enum class CalculationsPrecision : int8_t { + F32 = 0, + F32_F16 = 1, + F16 = 2, + MIN = F32, + MAX = F16 +}; + +inline const CalculationsPrecision (&EnumValuesCalculationsPrecision())[3] { + static const CalculationsPrecision values[] = { + CalculationsPrecision::F32, + CalculationsPrecision::F32_F16, + CalculationsPrecision::F16 + }; + return values; +} + +inline const char * const *EnumNamesCalculationsPrecision() { + static const char * const names[4] = { + "F32", + "F32_F16", + "F16", + nullptr + }; + return names; +} + +inline const char *EnumNameCalculationsPrecision(CalculationsPrecision e) { + if (flatbuffers::IsOutRange(e, CalculationsPrecision::F32, CalculationsPrecision::F16)) return ""; + const size_t index = static_cast(e); + return EnumNamesCalculationsPrecision()[index]; +} + +enum class TensorToGrid : int8_t { + CUSTOM = 0, + WB_TO_X_HD_TO_Y_S_TO_Z = 1, + WB_TO_X_HD_TO_Y_Z_IS_1 = 2, + WB_TO_X_H_TO_Y_D_TO_Z = 3, + B_TO_X_Y_IS_1_Z_IS_1 = 4, + MIN = CUSTOM, + MAX = B_TO_X_Y_IS_1_Z_IS_1 +}; + +inline const TensorToGrid (&EnumValuesTensorToGrid())[5] { + static const TensorToGrid values[] = { + TensorToGrid::CUSTOM, + TensorToGrid::WB_TO_X_HD_TO_Y_S_TO_Z, + TensorToGrid::WB_TO_X_HD_TO_Y_Z_IS_1, + TensorToGrid::WB_TO_X_H_TO_Y_D_TO_Z, + TensorToGrid::B_TO_X_Y_IS_1_Z_IS_1 + }; + return values; +} + +inline const char * const *EnumNamesTensorToGrid() { + static const char * const names[6] = { + "CUSTOM", + "WB_TO_X_HD_TO_Y_S_TO_Z", + "WB_TO_X_HD_TO_Y_Z_IS_1", + "WB_TO_X_H_TO_Y_D_TO_Z", + "B_TO_X_Y_IS_1_Z_IS_1", + nullptr + }; + return names; +} + +inline const char *EnumNameTensorToGrid(TensorToGrid e) { + if (flatbuffers::IsOutRange(e, TensorToGrid::CUSTOM, TensorToGrid::B_TO_X_Y_IS_1_Z_IS_1)) return ""; + const size_t index = static_cast(e); + return EnumNamesTensorToGrid()[index]; +} + +enum class CompilerOptions : int8_t { + ADRENO_FULL_SIMD_LINE = 0, + ADRENO_MORE_WAVES = 1, + POWERVR_FP16 = 2, + CL_OPT_DISABLE = 3, + CL_2_0 = 4, + CL_3_0 = 5, + MIN = ADRENO_FULL_SIMD_LINE, + MAX = CL_3_0 +}; + +inline const CompilerOptions (&EnumValuesCompilerOptions())[6] { + static const CompilerOptions values[] = { + CompilerOptions::ADRENO_FULL_SIMD_LINE, + CompilerOptions::ADRENO_MORE_WAVES, + CompilerOptions::POWERVR_FP16, + CompilerOptions::CL_OPT_DISABLE, + CompilerOptions::CL_2_0, + CompilerOptions::CL_3_0 + }; + return values; +} + +inline const char * const *EnumNamesCompilerOptions() { + static const char * const names[7] = { + "ADRENO_FULL_SIMD_LINE", + "ADRENO_MORE_WAVES", + "POWERVR_FP16", + "CL_OPT_DISABLE", + "CL_2_0", + "CL_3_0", + nullptr + }; + return names; +} + +inline const char *EnumNameCompilerOptions(CompilerOptions e) { + if (flatbuffers::IsOutRange(e, CompilerOptions::ADRENO_FULL_SIMD_LINE, CompilerOptions::CL_3_0)) return ""; + const size_t index = static_cast(e); + return EnumNamesCompilerOptions()[index]; +} + +struct OperationDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef OperationDefBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PRECISION = 4, + VT_SRC_TENSORS = 6, + VT_DST_TENSORS = 8 + }; + tflite::gpu::cl::data::CalculationsPrecision precision() const { + return static_cast(GetField(VT_PRECISION, 0)); + } + const flatbuffers::Vector> *src_tensors() const { + return GetPointer> *>(VT_SRC_TENSORS); + } + const flatbuffers::Vector> *dst_tensors() const { + return GetPointer> *>(VT_DST_TENSORS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PRECISION) && + VerifyOffset(verifier, VT_SRC_TENSORS) && + verifier.VerifyVector(src_tensors()) && + verifier.VerifyVectorOfTables(src_tensors()) && + VerifyOffset(verifier, VT_DST_TENSORS) && + verifier.VerifyVector(dst_tensors()) && + verifier.VerifyVectorOfTables(dst_tensors()) && + verifier.EndTable(); + } +}; + +struct OperationDefBuilder { + typedef OperationDef Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_precision(tflite::gpu::cl::data::CalculationsPrecision precision) { + fbb_.AddElement(OperationDef::VT_PRECISION, static_cast(precision), 0); + } + void add_src_tensors(flatbuffers::Offset>> src_tensors) { + fbb_.AddOffset(OperationDef::VT_SRC_TENSORS, src_tensors); + } + void add_dst_tensors(flatbuffers::Offset>> dst_tensors) { + fbb_.AddOffset(OperationDef::VT_DST_TENSORS, dst_tensors); + } + explicit OperationDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateOperationDef( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32, + flatbuffers::Offset>> src_tensors = 0, + flatbuffers::Offset>> dst_tensors = 0) { + OperationDefBuilder builder_(_fbb); + builder_.add_dst_tensors(dst_tensors); + builder_.add_src_tensors(src_tensors); + builder_.add_precision(precision); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateOperationDefDirect( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32, + const std::vector> *src_tensors = nullptr, + const std::vector> *dst_tensors = nullptr) { + auto src_tensors__ = src_tensors ? _fbb.CreateVector>(*src_tensors) : 0; + auto dst_tensors__ = dst_tensors ? _fbb.CreateVector>(*dst_tensors) : 0; + return tflite::gpu::cl::data::CreateOperationDef( + _fbb, + precision, + src_tensors__, + dst_tensors__); +} + +struct CompilerOption FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CompilerOptionBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OPTION = 4 + }; + tflite::gpu::cl::data::CompilerOptions option() const { + return static_cast(GetField(VT_OPTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OPTION) && + verifier.EndTable(); + } +}; + +struct CompilerOptionBuilder { + typedef CompilerOption Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_option(tflite::gpu::cl::data::CompilerOptions option) { + fbb_.AddElement(CompilerOption::VT_OPTION, static_cast(option), 0); + } + explicit CompilerOptionBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCompilerOption( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::gpu::cl::data::CompilerOptions option = tflite::gpu::cl::data::CompilerOptions::ADRENO_FULL_SIMD_LINE) { + CompilerOptionBuilder builder_(_fbb); + builder_.add_option(option); + return builder_.Finish(); +} + +struct GPUOperation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GPUOperationBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ARGUMENTS = 4, + VT_CODE = 6, + VT_WORK_GROUP_SIZE = 8, + VT_COMPILER_OPTIONS = 10, + VT_TENSOR_TO_GRID = 12, + VT_ELEMENTWISE = 14, + VT_LINKABLE = 16, + VT_CHECK_SRC_CHANNELS_SIZE = 18, + VT_DEFINITION = 20, + VT_GRID_DIMENSION = 22, + VT_WORK_GROUP_LAUNCH_ORDER = 24, + VT_GRID_SIZE = 26, + VT_SRC_TENSORS_NAMES = 28, + VT_DST_TENSORS_NAMES = 30, + VT_WORK_GROUPS_COUNT = 32, + VT_LINKABLE_COUNT = 34, + VT_ELEMENTWISE_CODE = 36 + }; + const tflite::gpu::data::Arguments *arguments() const { + return GetPointer(VT_ARGUMENTS); + } + const flatbuffers::String *code() const { + return GetPointer(VT_CODE); + } + const tflite::gpu::data::Int3 *work_group_size() const { + return GetPointer(VT_WORK_GROUP_SIZE); + } + const flatbuffers::Vector> *compiler_options() const { + return GetPointer> *>(VT_COMPILER_OPTIONS); + } + tflite::gpu::cl::data::TensorToGrid tensor_to_grid() const { + return static_cast(GetField(VT_TENSOR_TO_GRID, 0)); + } + bool elementwise() const { + return GetField(VT_ELEMENTWISE, 0) != 0; + } + bool linkable() const { + return GetField(VT_LINKABLE, 0) != 0; + } + bool check_src_channels_size() const { + return GetField(VT_CHECK_SRC_CHANNELS_SIZE, 0) != 0; + } + const tflite::gpu::cl::data::OperationDef *definition() const { + return GetPointer(VT_DEFINITION); + } + int32_t grid_dimension() const { + return GetField(VT_GRID_DIMENSION, 0); + } + const tflite::gpu::data::Int3 *work_group_launch_order() const { + return GetPointer(VT_WORK_GROUP_LAUNCH_ORDER); + } + const tflite::gpu::data::Int3 *grid_size() const { + return GetPointer(VT_GRID_SIZE); + } + const flatbuffers::Vector> *src_tensors_names() const { + return GetPointer> *>(VT_SRC_TENSORS_NAMES); + } + const flatbuffers::Vector> *dst_tensors_names() const { + return GetPointer> *>(VT_DST_TENSORS_NAMES); + } + const tflite::gpu::data::Int3 *work_groups_count() const { + return GetPointer(VT_WORK_GROUPS_COUNT); + } + int32_t linkable_count() const { + return GetField(VT_LINKABLE_COUNT, 0); + } + const flatbuffers::String *elementwise_code() const { + return GetPointer(VT_ELEMENTWISE_CODE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_ARGUMENTS) && + verifier.VerifyTable(arguments()) && + VerifyOffset(verifier, VT_CODE) && + verifier.VerifyString(code()) && + VerifyOffset(verifier, VT_WORK_GROUP_SIZE) && + verifier.VerifyTable(work_group_size()) && + VerifyOffset(verifier, VT_COMPILER_OPTIONS) && + verifier.VerifyVector(compiler_options()) && + verifier.VerifyVectorOfTables(compiler_options()) && + VerifyField(verifier, VT_TENSOR_TO_GRID) && + VerifyField(verifier, VT_ELEMENTWISE) && + VerifyField(verifier, VT_LINKABLE) && + VerifyField(verifier, VT_CHECK_SRC_CHANNELS_SIZE) && + VerifyOffset(verifier, VT_DEFINITION) && + verifier.VerifyTable(definition()) && + VerifyField(verifier, VT_GRID_DIMENSION) && + VerifyOffset(verifier, VT_WORK_GROUP_LAUNCH_ORDER) && + verifier.VerifyTable(work_group_launch_order()) && + VerifyOffset(verifier, VT_GRID_SIZE) && + verifier.VerifyTable(grid_size()) && + VerifyOffset(verifier, VT_SRC_TENSORS_NAMES) && + verifier.VerifyVector(src_tensors_names()) && + verifier.VerifyVectorOfStrings(src_tensors_names()) && + VerifyOffset(verifier, VT_DST_TENSORS_NAMES) && + verifier.VerifyVector(dst_tensors_names()) && + verifier.VerifyVectorOfStrings(dst_tensors_names()) && + VerifyOffset(verifier, VT_WORK_GROUPS_COUNT) && + verifier.VerifyTable(work_groups_count()) && + VerifyField(verifier, VT_LINKABLE_COUNT) && + VerifyOffset(verifier, VT_ELEMENTWISE_CODE) && + verifier.VerifyString(elementwise_code()) && + verifier.EndTable(); + } +}; + +struct GPUOperationBuilder { + typedef GPUOperation Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_arguments(flatbuffers::Offset arguments) { + fbb_.AddOffset(GPUOperation::VT_ARGUMENTS, arguments); + } + void add_code(flatbuffers::Offset code) { + fbb_.AddOffset(GPUOperation::VT_CODE, code); + } + void add_work_group_size(flatbuffers::Offset work_group_size) { + fbb_.AddOffset(GPUOperation::VT_WORK_GROUP_SIZE, work_group_size); + } + void add_compiler_options(flatbuffers::Offset>> compiler_options) { + fbb_.AddOffset(GPUOperation::VT_COMPILER_OPTIONS, compiler_options); + } + void add_tensor_to_grid(tflite::gpu::cl::data::TensorToGrid tensor_to_grid) { + fbb_.AddElement(GPUOperation::VT_TENSOR_TO_GRID, static_cast(tensor_to_grid), 0); + } + void add_elementwise(bool elementwise) { + fbb_.AddElement(GPUOperation::VT_ELEMENTWISE, static_cast(elementwise), 0); + } + void add_linkable(bool linkable) { + fbb_.AddElement(GPUOperation::VT_LINKABLE, static_cast(linkable), 0); + } + void add_check_src_channels_size(bool check_src_channels_size) { + fbb_.AddElement(GPUOperation::VT_CHECK_SRC_CHANNELS_SIZE, static_cast(check_src_channels_size), 0); + } + void add_definition(flatbuffers::Offset definition) { + fbb_.AddOffset(GPUOperation::VT_DEFINITION, definition); + } + void add_grid_dimension(int32_t grid_dimension) { + fbb_.AddElement(GPUOperation::VT_GRID_DIMENSION, grid_dimension, 0); + } + void add_work_group_launch_order(flatbuffers::Offset work_group_launch_order) { + fbb_.AddOffset(GPUOperation::VT_WORK_GROUP_LAUNCH_ORDER, work_group_launch_order); + } + void add_grid_size(flatbuffers::Offset grid_size) { + fbb_.AddOffset(GPUOperation::VT_GRID_SIZE, grid_size); + } + void add_src_tensors_names(flatbuffers::Offset>> src_tensors_names) { + fbb_.AddOffset(GPUOperation::VT_SRC_TENSORS_NAMES, src_tensors_names); + } + void add_dst_tensors_names(flatbuffers::Offset>> dst_tensors_names) { + fbb_.AddOffset(GPUOperation::VT_DST_TENSORS_NAMES, dst_tensors_names); + } + void add_work_groups_count(flatbuffers::Offset work_groups_count) { + fbb_.AddOffset(GPUOperation::VT_WORK_GROUPS_COUNT, work_groups_count); + } + void add_linkable_count(int32_t linkable_count) { + fbb_.AddElement(GPUOperation::VT_LINKABLE_COUNT, linkable_count, 0); + } + void add_elementwise_code(flatbuffers::Offset elementwise_code) { + fbb_.AddOffset(GPUOperation::VT_ELEMENTWISE_CODE, elementwise_code); + } + explicit GPUOperationBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateGPUOperation( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset arguments = 0, + flatbuffers::Offset code = 0, + flatbuffers::Offset work_group_size = 0, + flatbuffers::Offset>> compiler_options = 0, + tflite::gpu::cl::data::TensorToGrid tensor_to_grid = tflite::gpu::cl::data::TensorToGrid::CUSTOM, + bool elementwise = false, + bool linkable = false, + bool check_src_channels_size = false, + flatbuffers::Offset definition = 0, + int32_t grid_dimension = 0, + flatbuffers::Offset work_group_launch_order = 0, + flatbuffers::Offset grid_size = 0, + flatbuffers::Offset>> src_tensors_names = 0, + flatbuffers::Offset>> dst_tensors_names = 0, + flatbuffers::Offset work_groups_count = 0, + int32_t linkable_count = 0, + flatbuffers::Offset elementwise_code = 0) { + GPUOperationBuilder builder_(_fbb); + builder_.add_elementwise_code(elementwise_code); + builder_.add_linkable_count(linkable_count); + builder_.add_work_groups_count(work_groups_count); + builder_.add_dst_tensors_names(dst_tensors_names); + builder_.add_src_tensors_names(src_tensors_names); + builder_.add_grid_size(grid_size); + builder_.add_work_group_launch_order(work_group_launch_order); + builder_.add_grid_dimension(grid_dimension); + builder_.add_definition(definition); + builder_.add_compiler_options(compiler_options); + builder_.add_work_group_size(work_group_size); + builder_.add_code(code); + builder_.add_arguments(arguments); + builder_.add_check_src_channels_size(check_src_channels_size); + builder_.add_linkable(linkable); + builder_.add_elementwise(elementwise); + builder_.add_tensor_to_grid(tensor_to_grid); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateGPUOperationDirect( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset arguments = 0, + const char *code = nullptr, + flatbuffers::Offset work_group_size = 0, + const std::vector> *compiler_options = nullptr, + tflite::gpu::cl::data::TensorToGrid tensor_to_grid = tflite::gpu::cl::data::TensorToGrid::CUSTOM, + bool elementwise = false, + bool linkable = false, + bool check_src_channels_size = false, + flatbuffers::Offset definition = 0, + int32_t grid_dimension = 0, + flatbuffers::Offset work_group_launch_order = 0, + flatbuffers::Offset grid_size = 0, + const std::vector> *src_tensors_names = nullptr, + const std::vector> *dst_tensors_names = nullptr, + flatbuffers::Offset work_groups_count = 0, + int32_t linkable_count = 0, + const char *elementwise_code = nullptr) { + auto code__ = code ? _fbb.CreateString(code) : 0; + auto compiler_options__ = compiler_options ? _fbb.CreateVector>(*compiler_options) : 0; + auto src_tensors_names__ = src_tensors_names ? _fbb.CreateVector>(*src_tensors_names) : 0; + auto dst_tensors_names__ = dst_tensors_names ? _fbb.CreateVector>(*dst_tensors_names) : 0; + auto elementwise_code__ = elementwise_code ? _fbb.CreateString(elementwise_code) : 0; + return tflite::gpu::cl::data::CreateGPUOperation( + _fbb, + arguments, + code__, + work_group_size, + compiler_options__, + tensor_to_grid, + elementwise, + linkable, + check_src_channels_size, + definition, + grid_dimension, + work_group_launch_order, + grid_size, + src_tensors_names__, + dst_tensors_names__, + work_groups_count, + linkable_count, + elementwise_code__); +} + +struct TensorDescWithId FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorDescWithIdBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DESC = 4, + VT_ID = 6 + }; + const tflite::gpu::data::TensorDescriptor *desc() const { + return GetPointer(VT_DESC); + } + int32_t id() const { + return GetField(VT_ID, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DESC) && + verifier.VerifyTable(desc()) && + VerifyField(verifier, VT_ID) && + verifier.EndTable(); + } +}; + +struct TensorDescWithIdBuilder { + typedef TensorDescWithId Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_desc(flatbuffers::Offset desc) { + fbb_.AddOffset(TensorDescWithId::VT_DESC, desc); + } + void add_id(int32_t id) { + fbb_.AddElement(TensorDescWithId::VT_ID, id, 0); + } + explicit TensorDescWithIdBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensorDescWithId( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset desc = 0, + int32_t id = 0) { + TensorDescWithIdBuilder builder_(_fbb); + builder_.add_id(id); + builder_.add_desc(desc); + return builder_.Finish(); +} + +struct CLNode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CLNodeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_GPU_OP = 4, + VT_INPUT_IDS = 6, + VT_OUTPUT_IDS = 8, + VT_NAME = 10 + }; + const tflite::gpu::cl::data::GPUOperation *gpu_op() const { + return GetPointer(VT_GPU_OP); + } + const flatbuffers::Vector *input_ids() const { + return GetPointer *>(VT_INPUT_IDS); + } + const flatbuffers::Vector *output_ids() const { + return GetPointer *>(VT_OUTPUT_IDS); + } + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_GPU_OP) && + verifier.VerifyTable(gpu_op()) && + VerifyOffset(verifier, VT_INPUT_IDS) && + verifier.VerifyVector(input_ids()) && + VerifyOffset(verifier, VT_OUTPUT_IDS) && + verifier.VerifyVector(output_ids()) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + verifier.EndTable(); + } +}; + +struct CLNodeBuilder { + typedef CLNode Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_gpu_op(flatbuffers::Offset gpu_op) { + fbb_.AddOffset(CLNode::VT_GPU_OP, gpu_op); + } + void add_input_ids(flatbuffers::Offset> input_ids) { + fbb_.AddOffset(CLNode::VT_INPUT_IDS, input_ids); + } + void add_output_ids(flatbuffers::Offset> output_ids) { + fbb_.AddOffset(CLNode::VT_OUTPUT_IDS, output_ids); + } + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(CLNode::VT_NAME, name); + } + explicit CLNodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCLNode( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset gpu_op = 0, + flatbuffers::Offset> input_ids = 0, + flatbuffers::Offset> output_ids = 0, + flatbuffers::Offset name = 0) { + CLNodeBuilder builder_(_fbb); + builder_.add_name(name); + builder_.add_output_ids(output_ids); + builder_.add_input_ids(input_ids); + builder_.add_gpu_op(gpu_op); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateCLNodeDirect( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset gpu_op = 0, + const std::vector *input_ids = nullptr, + const std::vector *output_ids = nullptr, + const char *name = nullptr) { + auto input_ids__ = input_ids ? _fbb.CreateVector(*input_ids) : 0; + auto output_ids__ = output_ids ? _fbb.CreateVector(*output_ids) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; + return tflite::gpu::cl::data::CreateCLNode( + _fbb, + gpu_op, + input_ids__, + output_ids__, + name__); +} + +struct PairOfValueIds FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PairOfValueIdsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FIRST = 4, + VT_SECOND = 6 + }; + int32_t first() const { + return GetField(VT_FIRST, 0); + } + int32_t second() const { + return GetField(VT_SECOND, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FIRST) && + VerifyField(verifier, VT_SECOND) && + verifier.EndTable(); + } +}; + +struct PairOfValueIdsBuilder { + typedef PairOfValueIds Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_first(int32_t first) { + fbb_.AddElement(PairOfValueIds::VT_FIRST, first, 0); + } + void add_second(int32_t second) { + fbb_.AddElement(PairOfValueIds::VT_SECOND, second, 0); + } + explicit PairOfValueIdsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreatePairOfValueIds( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t first = 0, + int32_t second = 0) { + PairOfValueIdsBuilder builder_(_fbb); + builder_.add_second(second); + builder_.add_first(first); + return builder_.Finish(); +} + +struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef InferenceContextBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NEED_FLUSH = 4, + VT_FLUSH_PERIODICALLY = 6, + VT_FLUSH_PERIOD = 8, + VT_NEED_MANUAL_RELEASE = 10, + VT_PRECISION = 12, + VT_STORAGE_TYPE = 14, + VT_NODES = 16, + VT_TENSORS = 18, + VT_INPUT_IDS = 20, + VT_VARIABLE_IDS_AND_REFS = 22, + VT_OUTPUT_IDS = 24, + VT_INPUT_REFS = 26, + VT_OUTPUT_REFS = 28 + }; + bool need_flush() const { + return GetField(VT_NEED_FLUSH, 0) != 0; + } + bool flush_periodically() const { + return GetField(VT_FLUSH_PERIODICALLY, 0) != 0; + } + int32_t flush_period() const { + return GetField(VT_FLUSH_PERIOD, 0); + } + bool need_manual_release() const { + return GetField(VT_NEED_MANUAL_RELEASE, 0) != 0; + } + tflite::gpu::cl::data::CalculationsPrecision precision() const { + return static_cast(GetField(VT_PRECISION, 0)); + } + tflite::gpu::data::TensorStorageType storage_type() const { + return static_cast(GetField(VT_STORAGE_TYPE, 0)); + } + const flatbuffers::Vector> *nodes() const { + return GetPointer> *>(VT_NODES); + } + const flatbuffers::Vector> *tensors() const { + return GetPointer> *>(VT_TENSORS); + } + const flatbuffers::Vector *input_ids() const { + return GetPointer *>(VT_INPUT_IDS); + } + const flatbuffers::Vector> *variable_ids_and_refs() const { + return GetPointer> *>(VT_VARIABLE_IDS_AND_REFS); + } + const flatbuffers::Vector *output_ids() const { + return GetPointer *>(VT_OUTPUT_IDS); + } + const flatbuffers::Vector *input_refs() const { + return GetPointer *>(VT_INPUT_REFS); + } + const flatbuffers::Vector *output_refs() const { + return GetPointer *>(VT_OUTPUT_REFS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NEED_FLUSH) && + VerifyField(verifier, VT_FLUSH_PERIODICALLY) && + VerifyField(verifier, VT_FLUSH_PERIOD) && + VerifyField(verifier, VT_NEED_MANUAL_RELEASE) && + VerifyField(verifier, VT_PRECISION) && + VerifyField(verifier, VT_STORAGE_TYPE) && + VerifyOffset(verifier, VT_NODES) && + verifier.VerifyVector(nodes()) && + verifier.VerifyVectorOfTables(nodes()) && + VerifyOffset(verifier, VT_TENSORS) && + verifier.VerifyVector(tensors()) && + verifier.VerifyVectorOfTables(tensors()) && + VerifyOffset(verifier, VT_INPUT_IDS) && + verifier.VerifyVector(input_ids()) && + VerifyOffset(verifier, VT_VARIABLE_IDS_AND_REFS) && + verifier.VerifyVector(variable_ids_and_refs()) && + verifier.VerifyVectorOfTables(variable_ids_and_refs()) && + VerifyOffset(verifier, VT_OUTPUT_IDS) && + verifier.VerifyVector(output_ids()) && + VerifyOffset(verifier, VT_INPUT_REFS) && + verifier.VerifyVector(input_refs()) && + VerifyOffset(verifier, VT_OUTPUT_REFS) && + verifier.VerifyVector(output_refs()) && + verifier.EndTable(); + } +}; + +struct InferenceContextBuilder { + typedef InferenceContext Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_need_flush(bool need_flush) { + fbb_.AddElement(InferenceContext::VT_NEED_FLUSH, static_cast(need_flush), 0); + } + void add_flush_periodically(bool flush_periodically) { + fbb_.AddElement(InferenceContext::VT_FLUSH_PERIODICALLY, static_cast(flush_periodically), 0); + } + void add_flush_period(int32_t flush_period) { + fbb_.AddElement(InferenceContext::VT_FLUSH_PERIOD, flush_period, 0); + } + void add_need_manual_release(bool need_manual_release) { + fbb_.AddElement(InferenceContext::VT_NEED_MANUAL_RELEASE, static_cast(need_manual_release), 0); + } + void add_precision(tflite::gpu::cl::data::CalculationsPrecision precision) { + fbb_.AddElement(InferenceContext::VT_PRECISION, static_cast(precision), 0); + } + void add_storage_type(tflite::gpu::data::TensorStorageType storage_type) { + fbb_.AddElement(InferenceContext::VT_STORAGE_TYPE, static_cast(storage_type), 0); + } + void add_nodes(flatbuffers::Offset>> nodes) { + fbb_.AddOffset(InferenceContext::VT_NODES, nodes); + } + void add_tensors(flatbuffers::Offset>> tensors) { + fbb_.AddOffset(InferenceContext::VT_TENSORS, tensors); + } + void add_input_ids(flatbuffers::Offset> input_ids) { + fbb_.AddOffset(InferenceContext::VT_INPUT_IDS, input_ids); + } + void add_variable_ids_and_refs(flatbuffers::Offset>> variable_ids_and_refs) { + fbb_.AddOffset(InferenceContext::VT_VARIABLE_IDS_AND_REFS, variable_ids_and_refs); + } + void add_output_ids(flatbuffers::Offset> output_ids) { + fbb_.AddOffset(InferenceContext::VT_OUTPUT_IDS, output_ids); + } + void add_input_refs(flatbuffers::Offset> input_refs) { + fbb_.AddOffset(InferenceContext::VT_INPUT_REFS, input_refs); + } + void add_output_refs(flatbuffers::Offset> output_refs) { + fbb_.AddOffset(InferenceContext::VT_OUTPUT_REFS, output_refs); + } + explicit InferenceContextBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateInferenceContext( + flatbuffers::FlatBufferBuilder &_fbb, + bool need_flush = false, + bool flush_periodically = false, + int32_t flush_period = 0, + bool need_manual_release = false, + tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32, + tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN, + flatbuffers::Offset>> nodes = 0, + flatbuffers::Offset>> tensors = 0, + flatbuffers::Offset> input_ids = 0, + flatbuffers::Offset>> variable_ids_and_refs = 0, + flatbuffers::Offset> output_ids = 0, + flatbuffers::Offset> input_refs = 0, + flatbuffers::Offset> output_refs = 0) { + InferenceContextBuilder builder_(_fbb); + builder_.add_output_refs(output_refs); + builder_.add_input_refs(input_refs); + builder_.add_output_ids(output_ids); + builder_.add_variable_ids_and_refs(variable_ids_and_refs); + builder_.add_input_ids(input_ids); + builder_.add_tensors(tensors); + builder_.add_nodes(nodes); + builder_.add_flush_period(flush_period); + builder_.add_storage_type(storage_type); + builder_.add_precision(precision); + builder_.add_need_manual_release(need_manual_release); + builder_.add_flush_periodically(flush_periodically); + builder_.add_need_flush(need_flush); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateInferenceContextDirect( + flatbuffers::FlatBufferBuilder &_fbb, + bool need_flush = false, + bool flush_periodically = false, + int32_t flush_period = 0, + bool need_manual_release = false, + tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32, + tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN, + const std::vector> *nodes = nullptr, + const std::vector> *tensors = nullptr, + const std::vector *input_ids = nullptr, + const std::vector> *variable_ids_and_refs = nullptr, + const std::vector *output_ids = nullptr, + const std::vector *input_refs = nullptr, + const std::vector *output_refs = nullptr) { + auto nodes__ = nodes ? _fbb.CreateVector>(*nodes) : 0; + auto tensors__ = tensors ? _fbb.CreateVector>(*tensors) : 0; + auto input_ids__ = input_ids ? _fbb.CreateVector(*input_ids) : 0; + auto variable_ids_and_refs__ = variable_ids_and_refs ? _fbb.CreateVector>(*variable_ids_and_refs) : 0; + auto output_ids__ = output_ids ? _fbb.CreateVector(*output_ids) : 0; + auto input_refs__ = input_refs ? _fbb.CreateVector(*input_refs) : 0; + auto output_refs__ = output_refs ? _fbb.CreateVector(*output_refs) : 0; + return tflite::gpu::cl::data::CreateInferenceContext( + _fbb, + need_flush, + flush_periodically, + flush_period, + need_manual_release, + precision, + storage_type, + nodes__, + tensors__, + input_ids__, + variable_ids_and_refs__, + output_ids__, + input_refs__, + output_refs__); +} + +inline const tflite::gpu::cl::data::InferenceContext *GetInferenceContext(const void *buf) { + return flatbuffers::GetRoot(buf); +} + +inline const tflite::gpu::cl::data::InferenceContext *GetSizePrefixedInferenceContext(const void *buf) { + return flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyInferenceContextBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedInferenceContextBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishInferenceContextBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.Finish(root); +} + +inline void FinishSizePrefixedInferenceContextBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root); +} + +} // namespace data +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // FLATBUFFERS_GENERATED_SERIALIZATION_TFLITE_GPU_CL_DATA_H_ diff --git a/tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h b/tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h new file mode 100644 index 00000000000..7f089f95082 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h @@ -0,0 +1,1839 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_SERIALIZATIONBASE_TFLITE_GPU_DATA_H_ +#define FLATBUFFERS_GENERATED_SERIALIZATIONBASE_TFLITE_GPU_DATA_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace tflite { +namespace gpu { +namespace data { + +struct Int4; +struct Int4Builder; + +struct Int3; +struct Int3Builder; + +struct Int2; +struct Int2Builder; + +struct StateVariable; +struct StateVariableBuilder; + +struct GPUObjectDescriptor; +struct GPUObjectDescriptorBuilder; + +struct IntValue; +struct IntValueBuilder; + +struct FloatValue; +struct FloatValueBuilder; + +struct HalfValue; +struct HalfValueBuilder; + +struct BufferDescriptor; +struct BufferDescriptorBuilder; + +struct Texture2DDescriptor; +struct Texture2DDescriptorBuilder; + +struct TensorLinearDescriptor; +struct TensorLinearDescriptorBuilder; + +struct BHWDC; +struct BHWDCBuilder; + +struct TensorDescriptor; +struct TensorDescriptorBuilder; + +struct BufferDescriptorMapValue; +struct BufferDescriptorMapValueBuilder; + +struct Texture2DDescriptorMapValue; +struct Texture2DDescriptorMapValueBuilder; + +struct TensorLinearDescriptorMapValue; +struct TensorLinearDescriptorMapValueBuilder; + +struct TensorDescriptorMapValue; +struct TensorDescriptorMapValueBuilder; + +struct Arguments; +struct ArgumentsBuilder; + +enum class AccessType : int8_t { + READ = 0, + WRITE = 1, + READ_WRITE = 2, + MIN = READ, + MAX = READ_WRITE +}; + +inline const AccessType (&EnumValuesAccessType())[3] { + static const AccessType values[] = { + AccessType::READ, + AccessType::WRITE, + AccessType::READ_WRITE + }; + return values; +} + +inline const char * const *EnumNamesAccessType() { + static const char * const names[4] = { + "READ", + "WRITE", + "READ_WRITE", + nullptr + }; + return names; +} + +inline const char *EnumNameAccessType(AccessType e) { + if (flatbuffers::IsOutRange(e, AccessType::READ, AccessType::READ_WRITE)) return ""; + const size_t index = static_cast(e); + return EnumNamesAccessType()[index]; +} + +enum class DataType : int8_t { + UNKNOWN = 0, + FLOAT32 = 1, + FLOAT16 = 2, + MIN = UNKNOWN, + MAX = FLOAT16 +}; + +inline const DataType (&EnumValuesDataType())[3] { + static const DataType values[] = { + DataType::UNKNOWN, + DataType::FLOAT32, + DataType::FLOAT16 + }; + return values; +} + +inline const char * const *EnumNamesDataType() { + static const char * const names[4] = { + "UNKNOWN", + "FLOAT32", + "FLOAT16", + nullptr + }; + return names; +} + +inline const char *EnumNameDataType(DataType e) { + if (flatbuffers::IsOutRange(e, DataType::UNKNOWN, DataType::FLOAT16)) return ""; + const size_t index = static_cast(e); + return EnumNamesDataType()[index]; +} + +enum class MemoryType : int8_t { + GLOBAL = 0, + CONSTANT = 1, + LOCAL = 2, + MIN = GLOBAL, + MAX = LOCAL +}; + +inline const MemoryType (&EnumValuesMemoryType())[3] { + static const MemoryType values[] = { + MemoryType::GLOBAL, + MemoryType::CONSTANT, + MemoryType::LOCAL + }; + return values; +} + +inline const char * const *EnumNamesMemoryType() { + static const char * const names[4] = { + "GLOBAL", + "CONSTANT", + "LOCAL", + nullptr + }; + return names; +} + +inline const char *EnumNameMemoryType(MemoryType e) { + if (flatbuffers::IsOutRange(e, MemoryType::GLOBAL, MemoryType::LOCAL)) return ""; + const size_t index = static_cast(e); + return EnumNamesMemoryType()[index]; +} + +enum class LinearStorageType : int8_t { + BUFFER = 0, + TEXTURE_2D = 1, + MIN = BUFFER, + MAX = TEXTURE_2D +}; + +inline const LinearStorageType (&EnumValuesLinearStorageType())[2] { + static const LinearStorageType values[] = { + LinearStorageType::BUFFER, + LinearStorageType::TEXTURE_2D + }; + return values; +} + +inline const char * const *EnumNamesLinearStorageType() { + static const char * const names[3] = { + "BUFFER", + "TEXTURE_2D", + nullptr + }; + return names; +} + +inline const char *EnumNameLinearStorageType(LinearStorageType e) { + if (flatbuffers::IsOutRange(e, LinearStorageType::BUFFER, LinearStorageType::TEXTURE_2D)) return ""; + const size_t index = static_cast(e); + return EnumNamesLinearStorageType()[index]; +} + +enum class TensorStorageType : int8_t { + UNKNOWN = 0, + BUFFER = 1, + IMAGE_BUFFER = 2, + TEXTURE_2D = 3, + TEXTURE_3D = 4, + TEXTURE_ARRAY = 5, + SINGLE_TEXTURE_2D = 6, + MIN = UNKNOWN, + MAX = SINGLE_TEXTURE_2D +}; + +inline const TensorStorageType (&EnumValuesTensorStorageType())[7] { + static const TensorStorageType values[] = { + TensorStorageType::UNKNOWN, + TensorStorageType::BUFFER, + TensorStorageType::IMAGE_BUFFER, + TensorStorageType::TEXTURE_2D, + TensorStorageType::TEXTURE_3D, + TensorStorageType::TEXTURE_ARRAY, + TensorStorageType::SINGLE_TEXTURE_2D + }; + return values; +} + +inline const char * const *EnumNamesTensorStorageType() { + static const char * const names[8] = { + "UNKNOWN", + "BUFFER", + "IMAGE_BUFFER", + "TEXTURE_2D", + "TEXTURE_3D", + "TEXTURE_ARRAY", + "SINGLE_TEXTURE_2D", + nullptr + }; + return names; +} + +inline const char *EnumNameTensorStorageType(TensorStorageType e) { + if (flatbuffers::IsOutRange(e, TensorStorageType::UNKNOWN, TensorStorageType::SINGLE_TEXTURE_2D)) return ""; + const size_t index = static_cast(e); + return EnumNamesTensorStorageType()[index]; +} + +enum class Layout : int8_t { + UNKNOWN = 0, + HWC = 1, + BHWC = 2, + HWDC = 3, + BHWDC = 4, + MIN = UNKNOWN, + MAX = BHWDC +}; + +inline const Layout (&EnumValuesLayout())[5] { + static const Layout values[] = { + Layout::UNKNOWN, + Layout::HWC, + Layout::BHWC, + Layout::HWDC, + Layout::BHWDC + }; + return values; +} + +inline const char * const *EnumNamesLayout() { + static const char * const names[6] = { + "UNKNOWN", + "HWC", + "BHWC", + "HWDC", + "BHWDC", + nullptr + }; + return names; +} + +inline const char *EnumNameLayout(Layout e) { + if (flatbuffers::IsOutRange(e, Layout::UNKNOWN, Layout::BHWDC)) return ""; + const size_t index = static_cast(e); + return EnumNamesLayout()[index]; +} + +struct Int4 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Int4Builder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_X = 4, + VT_Y = 6, + VT_Z = 8, + VT_W = 10 + }; + int32_t x() const { + return GetField(VT_X, 0); + } + int32_t y() const { + return GetField(VT_Y, 0); + } + int32_t z() const { + return GetField(VT_Z, 0); + } + int32_t w() const { + return GetField(VT_W, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_X) && + VerifyField(verifier, VT_Y) && + VerifyField(verifier, VT_Z) && + VerifyField(verifier, VT_W) && + verifier.EndTable(); + } +}; + +struct Int4Builder { + typedef Int4 Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_x(int32_t x) { + fbb_.AddElement(Int4::VT_X, x, 0); + } + void add_y(int32_t y) { + fbb_.AddElement(Int4::VT_Y, y, 0); + } + void add_z(int32_t z) { + fbb_.AddElement(Int4::VT_Z, z, 0); + } + void add_w(int32_t w) { + fbb_.AddElement(Int4::VT_W, w, 0); + } + explicit Int4Builder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateInt4( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t x = 0, + int32_t y = 0, + int32_t z = 0, + int32_t w = 0) { + Int4Builder builder_(_fbb); + builder_.add_w(w); + builder_.add_z(z); + builder_.add_y(y); + builder_.add_x(x); + return builder_.Finish(); +} + +struct Int3 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Int3Builder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_X = 4, + VT_Y = 6, + VT_Z = 8 + }; + int32_t x() const { + return GetField(VT_X, 0); + } + int32_t y() const { + return GetField(VT_Y, 0); + } + int32_t z() const { + return GetField(VT_Z, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_X) && + VerifyField(verifier, VT_Y) && + VerifyField(verifier, VT_Z) && + verifier.EndTable(); + } +}; + +struct Int3Builder { + typedef Int3 Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_x(int32_t x) { + fbb_.AddElement(Int3::VT_X, x, 0); + } + void add_y(int32_t y) { + fbb_.AddElement(Int3::VT_Y, y, 0); + } + void add_z(int32_t z) { + fbb_.AddElement(Int3::VT_Z, z, 0); + } + explicit Int3Builder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateInt3( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t x = 0, + int32_t y = 0, + int32_t z = 0) { + Int3Builder builder_(_fbb); + builder_.add_z(z); + builder_.add_y(y); + builder_.add_x(x); + return builder_.Finish(); +} + +struct Int2 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Int2Builder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_X = 4, + VT_Y = 6 + }; + int32_t x() const { + return GetField(VT_X, 0); + } + int32_t y() const { + return GetField(VT_Y, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_X) && + VerifyField(verifier, VT_Y) && + verifier.EndTable(); + } +}; + +struct Int2Builder { + typedef Int2 Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_x(int32_t x) { + fbb_.AddElement(Int2::VT_X, x, 0); + } + void add_y(int32_t y) { + fbb_.AddElement(Int2::VT_Y, y, 0); + } + explicit Int2Builder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateInt2( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t x = 0, + int32_t y = 0) { + Int2Builder builder_(_fbb); + builder_.add_y(y); + builder_.add_x(x); + return builder_.Finish(); +} + +struct StateVariable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef StateVariableBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEY = 4, + VT_VALUE = 6 + }; + const flatbuffers::String *key() const { + return GetPointer(VT_KEY); + } + const flatbuffers::String *value() const { + return GetPointer(VT_VALUE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_KEY) && + verifier.VerifyString(key()) && + VerifyOffset(verifier, VT_VALUE) && + verifier.VerifyString(value()) && + verifier.EndTable(); + } +}; + +struct StateVariableBuilder { + typedef StateVariable Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_key(flatbuffers::Offset key) { + fbb_.AddOffset(StateVariable::VT_KEY, key); + } + void add_value(flatbuffers::Offset value) { + fbb_.AddOffset(StateVariable::VT_VALUE, value); + } + explicit StateVariableBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateStateVariable( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset key = 0, + flatbuffers::Offset value = 0) { + StateVariableBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_key(key); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateStateVariableDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *key = nullptr, + const char *value = nullptr) { + auto key__ = key ? _fbb.CreateString(key) : 0; + auto value__ = value ? _fbb.CreateString(value) : 0; + return tflite::gpu::data::CreateStateVariable( + _fbb, + key__, + value__); +} + +struct GPUObjectDescriptor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GPUObjectDescriptorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_STATE_VARS = 4, + VT_ACCESS_TYPE = 6 + }; + const flatbuffers::Vector> *state_vars() const { + return GetPointer> *>(VT_STATE_VARS); + } + tflite::gpu::data::AccessType access_type() const { + return static_cast(GetField(VT_ACCESS_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_STATE_VARS) && + verifier.VerifyVector(state_vars()) && + verifier.VerifyVectorOfTables(state_vars()) && + VerifyField(verifier, VT_ACCESS_TYPE) && + verifier.EndTable(); + } +}; + +struct GPUObjectDescriptorBuilder { + typedef GPUObjectDescriptor Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_state_vars(flatbuffers::Offset>> state_vars) { + fbb_.AddOffset(GPUObjectDescriptor::VT_STATE_VARS, state_vars); + } + void add_access_type(tflite::gpu::data::AccessType access_type) { + fbb_.AddElement(GPUObjectDescriptor::VT_ACCESS_TYPE, static_cast(access_type), 0); + } + explicit GPUObjectDescriptorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateGPUObjectDescriptor( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> state_vars = 0, + tflite::gpu::data::AccessType access_type = tflite::gpu::data::AccessType::READ) { + GPUObjectDescriptorBuilder builder_(_fbb); + builder_.add_state_vars(state_vars); + builder_.add_access_type(access_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateGPUObjectDescriptorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *state_vars = nullptr, + tflite::gpu::data::AccessType access_type = tflite::gpu::data::AccessType::READ) { + auto state_vars__ = state_vars ? _fbb.CreateVector>(*state_vars) : 0; + return tflite::gpu::data::CreateGPUObjectDescriptor( + _fbb, + state_vars__, + access_type); +} + +struct IntValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef IntValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_VALUE = 6, + VT_ACTIVE = 8 + }; + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + int32_t value() const { + return GetField(VT_VALUE, 0); + } + bool active() const { + return GetField(VT_ACTIVE, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_VALUE) && + VerifyField(verifier, VT_ACTIVE) && + verifier.EndTable(); + } +}; + +struct IntValueBuilder { + typedef IntValue Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(IntValue::VT_NAME, name); + } + void add_value(int32_t value) { + fbb_.AddElement(IntValue::VT_VALUE, value, 0); + } + void add_active(bool active) { + fbb_.AddElement(IntValue::VT_ACTIVE, static_cast(active), 0); + } + explicit IntValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateIntValue( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset name = 0, + int32_t value = 0, + bool active = false) { + IntValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_name(name); + builder_.add_active(active); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateIntValueDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + int32_t value = 0, + bool active = false) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return tflite::gpu::data::CreateIntValue( + _fbb, + name__, + value, + active); +} + +struct FloatValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FloatValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_VALUE = 6, + VT_ACTIVE = 8 + }; + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + float value() const { + return GetField(VT_VALUE, 0.0f); + } + bool active() const { + return GetField(VT_ACTIVE, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_VALUE) && + VerifyField(verifier, VT_ACTIVE) && + verifier.EndTable(); + } +}; + +struct FloatValueBuilder { + typedef FloatValue Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(FloatValue::VT_NAME, name); + } + void add_value(float value) { + fbb_.AddElement(FloatValue::VT_VALUE, value, 0.0f); + } + void add_active(bool active) { + fbb_.AddElement(FloatValue::VT_ACTIVE, static_cast(active), 0); + } + explicit FloatValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFloatValue( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset name = 0, + float value = 0.0f, + bool active = false) { + FloatValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_name(name); + builder_.add_active(active); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateFloatValueDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + float value = 0.0f, + bool active = false) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return tflite::gpu::data::CreateFloatValue( + _fbb, + name__, + value, + active); +} + +struct HalfValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef HalfValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_VALUE = 6, + VT_ACTIVE = 8 + }; + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + float value() const { + return GetField(VT_VALUE, 0.0f); + } + bool active() const { + return GetField(VT_ACTIVE, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_VALUE) && + VerifyField(verifier, VT_ACTIVE) && + verifier.EndTable(); + } +}; + +struct HalfValueBuilder { + typedef HalfValue Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(HalfValue::VT_NAME, name); + } + void add_value(float value) { + fbb_.AddElement(HalfValue::VT_VALUE, value, 0.0f); + } + void add_active(bool active) { + fbb_.AddElement(HalfValue::VT_ACTIVE, static_cast(active), 0); + } + explicit HalfValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateHalfValue( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset name = 0, + float value = 0.0f, + bool active = false) { + HalfValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_name(name); + builder_.add_active(active); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateHalfValueDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + float value = 0.0f, + bool active = false) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return tflite::gpu::data::CreateHalfValue( + _fbb, + name__, + value, + active); +} + +struct BufferDescriptor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BufferDescriptorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BASE_OBJ = 4, + VT_ELEMENT_TYPE = 6, + VT_ELEMENT_SIZE = 8, + VT_MEMORY_TYPE = 10, + VT_ATTRIBUTES = 12, + VT_SIZE = 14, + VT_DATA = 16 + }; + const tflite::gpu::data::GPUObjectDescriptor *base_obj() const { + return GetPointer(VT_BASE_OBJ); + } + tflite::gpu::data::DataType element_type() const { + return static_cast(GetField(VT_ELEMENT_TYPE, 0)); + } + int32_t element_size() const { + return GetField(VT_ELEMENT_SIZE, 0); + } + tflite::gpu::data::MemoryType memory_type() const { + return static_cast(GetField(VT_MEMORY_TYPE, 0)); + } + const flatbuffers::Vector> *attributes() const { + return GetPointer> *>(VT_ATTRIBUTES); + } + int32_t size() const { + return GetField(VT_SIZE, 0); + } + const flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_BASE_OBJ) && + verifier.VerifyTable(base_obj()) && + VerifyField(verifier, VT_ELEMENT_TYPE) && + VerifyField(verifier, VT_ELEMENT_SIZE) && + VerifyField(verifier, VT_MEMORY_TYPE) && + VerifyOffset(verifier, VT_ATTRIBUTES) && + verifier.VerifyVector(attributes()) && + verifier.VerifyVectorOfStrings(attributes()) && + VerifyField(verifier, VT_SIZE) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && + verifier.EndTable(); + } +}; + +struct BufferDescriptorBuilder { + typedef BufferDescriptor Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_base_obj(flatbuffers::Offset base_obj) { + fbb_.AddOffset(BufferDescriptor::VT_BASE_OBJ, base_obj); + } + void add_element_type(tflite::gpu::data::DataType element_type) { + fbb_.AddElement(BufferDescriptor::VT_ELEMENT_TYPE, static_cast(element_type), 0); + } + void add_element_size(int32_t element_size) { + fbb_.AddElement(BufferDescriptor::VT_ELEMENT_SIZE, element_size, 0); + } + void add_memory_type(tflite::gpu::data::MemoryType memory_type) { + fbb_.AddElement(BufferDescriptor::VT_MEMORY_TYPE, static_cast(memory_type), 0); + } + void add_attributes(flatbuffers::Offset>> attributes) { + fbb_.AddOffset(BufferDescriptor::VT_ATTRIBUTES, attributes); + } + void add_size(int32_t size) { + fbb_.AddElement(BufferDescriptor::VT_SIZE, size, 0); + } + void add_data(flatbuffers::Offset> data) { + fbb_.AddOffset(BufferDescriptor::VT_DATA, data); + } + explicit BufferDescriptorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBufferDescriptor( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::DataType element_type = tflite::gpu::data::DataType::UNKNOWN, + int32_t element_size = 0, + tflite::gpu::data::MemoryType memory_type = tflite::gpu::data::MemoryType::GLOBAL, + flatbuffers::Offset>> attributes = 0, + int32_t size = 0, + flatbuffers::Offset> data = 0) { + BufferDescriptorBuilder builder_(_fbb); + builder_.add_data(data); + builder_.add_size(size); + builder_.add_attributes(attributes); + builder_.add_element_size(element_size); + builder_.add_base_obj(base_obj); + builder_.add_memory_type(memory_type); + builder_.add_element_type(element_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateBufferDescriptorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::DataType element_type = tflite::gpu::data::DataType::UNKNOWN, + int32_t element_size = 0, + tflite::gpu::data::MemoryType memory_type = tflite::gpu::data::MemoryType::GLOBAL, + const std::vector> *attributes = nullptr, + int32_t size = 0, + const std::vector *data = nullptr) { + auto attributes__ = attributes ? _fbb.CreateVector>(*attributes) : 0; + auto data__ = data ? _fbb.CreateVector(*data) : 0; + return tflite::gpu::data::CreateBufferDescriptor( + _fbb, + base_obj, + element_type, + element_size, + memory_type, + attributes__, + size, + data__); +} + +struct Texture2DDescriptor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Texture2DDescriptorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BASE_OBJ = 4, + VT_ELEMENT_TYPE = 6, + VT_NORMALIZED = 8, + VT_NORMALIZED_TYPE = 10, + VT_SIZE = 12, + VT_DATA = 14 + }; + const tflite::gpu::data::GPUObjectDescriptor *base_obj() const { + return GetPointer(VT_BASE_OBJ); + } + tflite::gpu::data::DataType element_type() const { + return static_cast(GetField(VT_ELEMENT_TYPE, 0)); + } + bool normalized() const { + return GetField(VT_NORMALIZED, 0) != 0; + } + tflite::gpu::data::DataType normalized_type() const { + return static_cast(GetField(VT_NORMALIZED_TYPE, 0)); + } + const tflite::gpu::data::Int2 *size() const { + return GetPointer(VT_SIZE); + } + const flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_BASE_OBJ) && + verifier.VerifyTable(base_obj()) && + VerifyField(verifier, VT_ELEMENT_TYPE) && + VerifyField(verifier, VT_NORMALIZED) && + VerifyField(verifier, VT_NORMALIZED_TYPE) && + VerifyOffset(verifier, VT_SIZE) && + verifier.VerifyTable(size()) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && + verifier.EndTable(); + } +}; + +struct Texture2DDescriptorBuilder { + typedef Texture2DDescriptor Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_base_obj(flatbuffers::Offset base_obj) { + fbb_.AddOffset(Texture2DDescriptor::VT_BASE_OBJ, base_obj); + } + void add_element_type(tflite::gpu::data::DataType element_type) { + fbb_.AddElement(Texture2DDescriptor::VT_ELEMENT_TYPE, static_cast(element_type), 0); + } + void add_normalized(bool normalized) { + fbb_.AddElement(Texture2DDescriptor::VT_NORMALIZED, static_cast(normalized), 0); + } + void add_normalized_type(tflite::gpu::data::DataType normalized_type) { + fbb_.AddElement(Texture2DDescriptor::VT_NORMALIZED_TYPE, static_cast(normalized_type), 0); + } + void add_size(flatbuffers::Offset size) { + fbb_.AddOffset(Texture2DDescriptor::VT_SIZE, size); + } + void add_data(flatbuffers::Offset> data) { + fbb_.AddOffset(Texture2DDescriptor::VT_DATA, data); + } + explicit Texture2DDescriptorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTexture2DDescriptor( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::DataType element_type = tflite::gpu::data::DataType::UNKNOWN, + bool normalized = false, + tflite::gpu::data::DataType normalized_type = tflite::gpu::data::DataType::UNKNOWN, + flatbuffers::Offset size = 0, + flatbuffers::Offset> data = 0) { + Texture2DDescriptorBuilder builder_(_fbb); + builder_.add_data(data); + builder_.add_size(size); + builder_.add_base_obj(base_obj); + builder_.add_normalized_type(normalized_type); + builder_.add_normalized(normalized); + builder_.add_element_type(element_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTexture2DDescriptorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::DataType element_type = tflite::gpu::data::DataType::UNKNOWN, + bool normalized = false, + tflite::gpu::data::DataType normalized_type = tflite::gpu::data::DataType::UNKNOWN, + flatbuffers::Offset size = 0, + const std::vector *data = nullptr) { + auto data__ = data ? _fbb.CreateVector(*data) : 0; + return tflite::gpu::data::CreateTexture2DDescriptor( + _fbb, + base_obj, + element_type, + normalized, + normalized_type, + size, + data__); +} + +struct TensorLinearDescriptor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorLinearDescriptorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BASE_OBJ = 4, + VT_STORAGE_TYPE = 6, + VT_ELEMENT_TYPE = 8, + VT_MEMORY_TYPE = 10, + VT_SIZE = 12, + VT_DATA = 14 + }; + const tflite::gpu::data::GPUObjectDescriptor *base_obj() const { + return GetPointer(VT_BASE_OBJ); + } + tflite::gpu::data::LinearStorageType storage_type() const { + return static_cast(GetField(VT_STORAGE_TYPE, 0)); + } + tflite::gpu::data::DataType element_type() const { + return static_cast(GetField(VT_ELEMENT_TYPE, 0)); + } + tflite::gpu::data::MemoryType memory_type() const { + return static_cast(GetField(VT_MEMORY_TYPE, 0)); + } + int32_t size() const { + return GetField(VT_SIZE, 0); + } + const flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_BASE_OBJ) && + verifier.VerifyTable(base_obj()) && + VerifyField(verifier, VT_STORAGE_TYPE) && + VerifyField(verifier, VT_ELEMENT_TYPE) && + VerifyField(verifier, VT_MEMORY_TYPE) && + VerifyField(verifier, VT_SIZE) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && + verifier.EndTable(); + } +}; + +struct TensorLinearDescriptorBuilder { + typedef TensorLinearDescriptor Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_base_obj(flatbuffers::Offset base_obj) { + fbb_.AddOffset(TensorLinearDescriptor::VT_BASE_OBJ, base_obj); + } + void add_storage_type(tflite::gpu::data::LinearStorageType storage_type) { + fbb_.AddElement(TensorLinearDescriptor::VT_STORAGE_TYPE, static_cast(storage_type), 0); + } + void add_element_type(tflite::gpu::data::DataType element_type) { + fbb_.AddElement(TensorLinearDescriptor::VT_ELEMENT_TYPE, static_cast(element_type), 0); + } + void add_memory_type(tflite::gpu::data::MemoryType memory_type) { + fbb_.AddElement(TensorLinearDescriptor::VT_MEMORY_TYPE, static_cast(memory_type), 0); + } + void add_size(int32_t size) { + fbb_.AddElement(TensorLinearDescriptor::VT_SIZE, size, 0); + } + void add_data(flatbuffers::Offset> data) { + fbb_.AddOffset(TensorLinearDescriptor::VT_DATA, data); + } + explicit TensorLinearDescriptorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensorLinearDescriptor( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::LinearStorageType storage_type = tflite::gpu::data::LinearStorageType::BUFFER, + tflite::gpu::data::DataType element_type = tflite::gpu::data::DataType::UNKNOWN, + tflite::gpu::data::MemoryType memory_type = tflite::gpu::data::MemoryType::GLOBAL, + int32_t size = 0, + flatbuffers::Offset> data = 0) { + TensorLinearDescriptorBuilder builder_(_fbb); + builder_.add_data(data); + builder_.add_size(size); + builder_.add_base_obj(base_obj); + builder_.add_memory_type(memory_type); + builder_.add_element_type(element_type); + builder_.add_storage_type(storage_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTensorLinearDescriptorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::LinearStorageType storage_type = tflite::gpu::data::LinearStorageType::BUFFER, + tflite::gpu::data::DataType element_type = tflite::gpu::data::DataType::UNKNOWN, + tflite::gpu::data::MemoryType memory_type = tflite::gpu::data::MemoryType::GLOBAL, + int32_t size = 0, + const std::vector *data = nullptr) { + auto data__ = data ? _fbb.CreateVector(*data) : 0; + return tflite::gpu::data::CreateTensorLinearDescriptor( + _fbb, + base_obj, + storage_type, + element_type, + memory_type, + size, + data__); +} + +struct BHWDC FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BHWDCBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_B = 4, + VT_H = 6, + VT_W = 8, + VT_D = 10, + VT_C = 12 + }; + int32_t b() const { + return GetField(VT_B, 0); + } + int32_t h() const { + return GetField(VT_H, 0); + } + int32_t w() const { + return GetField(VT_W, 0); + } + int32_t d() const { + return GetField(VT_D, 0); + } + int32_t c() const { + return GetField(VT_C, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_B) && + VerifyField(verifier, VT_H) && + VerifyField(verifier, VT_W) && + VerifyField(verifier, VT_D) && + VerifyField(verifier, VT_C) && + verifier.EndTable(); + } +}; + +struct BHWDCBuilder { + typedef BHWDC Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_b(int32_t b) { + fbb_.AddElement(BHWDC::VT_B, b, 0); + } + void add_h(int32_t h) { + fbb_.AddElement(BHWDC::VT_H, h, 0); + } + void add_w(int32_t w) { + fbb_.AddElement(BHWDC::VT_W, w, 0); + } + void add_d(int32_t d) { + fbb_.AddElement(BHWDC::VT_D, d, 0); + } + void add_c(int32_t c) { + fbb_.AddElement(BHWDC::VT_C, c, 0); + } + explicit BHWDCBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBHWDC( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t b = 0, + int32_t h = 0, + int32_t w = 0, + int32_t d = 0, + int32_t c = 0) { + BHWDCBuilder builder_(_fbb); + builder_.add_c(c); + builder_.add_d(d); + builder_.add_w(w); + builder_.add_h(h); + builder_.add_b(b); + return builder_.Finish(); +} + +struct TensorDescriptor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorDescriptorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BASE_OBJ = 4, + VT_DATA_TYPE = 6, + VT_STORAGE_TYPE = 8, + VT_LAYOUT = 10, + VT_SHAPE = 12, + VT_DATA = 14 + }; + const tflite::gpu::data::GPUObjectDescriptor *base_obj() const { + return GetPointer(VT_BASE_OBJ); + } + tflite::gpu::data::DataType data_type() const { + return static_cast(GetField(VT_DATA_TYPE, 0)); + } + tflite::gpu::data::TensorStorageType storage_type() const { + return static_cast(GetField(VT_STORAGE_TYPE, 0)); + } + tflite::gpu::data::Layout layout() const { + return static_cast(GetField(VT_LAYOUT, 0)); + } + const tflite::gpu::data::BHWDC *shape() const { + return GetPointer(VT_SHAPE); + } + const flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_BASE_OBJ) && + verifier.VerifyTable(base_obj()) && + VerifyField(verifier, VT_DATA_TYPE) && + VerifyField(verifier, VT_STORAGE_TYPE) && + VerifyField(verifier, VT_LAYOUT) && + VerifyOffset(verifier, VT_SHAPE) && + verifier.VerifyTable(shape()) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && + verifier.EndTable(); + } +}; + +struct TensorDescriptorBuilder { + typedef TensorDescriptor Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_base_obj(flatbuffers::Offset base_obj) { + fbb_.AddOffset(TensorDescriptor::VT_BASE_OBJ, base_obj); + } + void add_data_type(tflite::gpu::data::DataType data_type) { + fbb_.AddElement(TensorDescriptor::VT_DATA_TYPE, static_cast(data_type), 0); + } + void add_storage_type(tflite::gpu::data::TensorStorageType storage_type) { + fbb_.AddElement(TensorDescriptor::VT_STORAGE_TYPE, static_cast(storage_type), 0); + } + void add_layout(tflite::gpu::data::Layout layout) { + fbb_.AddElement(TensorDescriptor::VT_LAYOUT, static_cast(layout), 0); + } + void add_shape(flatbuffers::Offset shape) { + fbb_.AddOffset(TensorDescriptor::VT_SHAPE, shape); + } + void add_data(flatbuffers::Offset> data) { + fbb_.AddOffset(TensorDescriptor::VT_DATA, data); + } + explicit TensorDescriptorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensorDescriptor( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::DataType data_type = tflite::gpu::data::DataType::UNKNOWN, + tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN, + tflite::gpu::data::Layout layout = tflite::gpu::data::Layout::UNKNOWN, + flatbuffers::Offset shape = 0, + flatbuffers::Offset> data = 0) { + TensorDescriptorBuilder builder_(_fbb); + builder_.add_data(data); + builder_.add_shape(shape); + builder_.add_base_obj(base_obj); + builder_.add_layout(layout); + builder_.add_storage_type(storage_type); + builder_.add_data_type(data_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTensorDescriptorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset base_obj = 0, + tflite::gpu::data::DataType data_type = tflite::gpu::data::DataType::UNKNOWN, + tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN, + tflite::gpu::data::Layout layout = tflite::gpu::data::Layout::UNKNOWN, + flatbuffers::Offset shape = 0, + const std::vector *data = nullptr) { + auto data__ = data ? _fbb.CreateVector(*data) : 0; + return tflite::gpu::data::CreateTensorDescriptor( + _fbb, + base_obj, + data_type, + storage_type, + layout, + shape, + data__); +} + +struct BufferDescriptorMapValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BufferDescriptorMapValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEY = 4, + VT_VALUE = 6 + }; + const flatbuffers::String *key() const { + return GetPointer(VT_KEY); + } + const tflite::gpu::data::BufferDescriptor *value() const { + return GetPointer(VT_VALUE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_KEY) && + verifier.VerifyString(key()) && + VerifyOffset(verifier, VT_VALUE) && + verifier.VerifyTable(value()) && + verifier.EndTable(); + } +}; + +struct BufferDescriptorMapValueBuilder { + typedef BufferDescriptorMapValue Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_key(flatbuffers::Offset key) { + fbb_.AddOffset(BufferDescriptorMapValue::VT_KEY, key); + } + void add_value(flatbuffers::Offset value) { + fbb_.AddOffset(BufferDescriptorMapValue::VT_VALUE, value); + } + explicit BufferDescriptorMapValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBufferDescriptorMapValue( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset key = 0, + flatbuffers::Offset value = 0) { + BufferDescriptorMapValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_key(key); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateBufferDescriptorMapValueDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *key = nullptr, + flatbuffers::Offset value = 0) { + auto key__ = key ? _fbb.CreateString(key) : 0; + return tflite::gpu::data::CreateBufferDescriptorMapValue( + _fbb, + key__, + value); +} + +struct Texture2DDescriptorMapValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Texture2DDescriptorMapValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEY = 4, + VT_VALUE = 6 + }; + const flatbuffers::String *key() const { + return GetPointer(VT_KEY); + } + const tflite::gpu::data::Texture2DDescriptor *value() const { + return GetPointer(VT_VALUE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_KEY) && + verifier.VerifyString(key()) && + VerifyOffset(verifier, VT_VALUE) && + verifier.VerifyTable(value()) && + verifier.EndTable(); + } +}; + +struct Texture2DDescriptorMapValueBuilder { + typedef Texture2DDescriptorMapValue Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_key(flatbuffers::Offset key) { + fbb_.AddOffset(Texture2DDescriptorMapValue::VT_KEY, key); + } + void add_value(flatbuffers::Offset value) { + fbb_.AddOffset(Texture2DDescriptorMapValue::VT_VALUE, value); + } + explicit Texture2DDescriptorMapValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTexture2DDescriptorMapValue( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset key = 0, + flatbuffers::Offset value = 0) { + Texture2DDescriptorMapValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_key(key); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTexture2DDescriptorMapValueDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *key = nullptr, + flatbuffers::Offset value = 0) { + auto key__ = key ? _fbb.CreateString(key) : 0; + return tflite::gpu::data::CreateTexture2DDescriptorMapValue( + _fbb, + key__, + value); +} + +struct TensorLinearDescriptorMapValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorLinearDescriptorMapValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEY = 4, + VT_VALUE = 6 + }; + const flatbuffers::String *key() const { + return GetPointer(VT_KEY); + } + const tflite::gpu::data::TensorLinearDescriptor *value() const { + return GetPointer(VT_VALUE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_KEY) && + verifier.VerifyString(key()) && + VerifyOffset(verifier, VT_VALUE) && + verifier.VerifyTable(value()) && + verifier.EndTable(); + } +}; + +struct TensorLinearDescriptorMapValueBuilder { + typedef TensorLinearDescriptorMapValue Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_key(flatbuffers::Offset key) { + fbb_.AddOffset(TensorLinearDescriptorMapValue::VT_KEY, key); + } + void add_value(flatbuffers::Offset value) { + fbb_.AddOffset(TensorLinearDescriptorMapValue::VT_VALUE, value); + } + explicit TensorLinearDescriptorMapValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensorLinearDescriptorMapValue( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset key = 0, + flatbuffers::Offset value = 0) { + TensorLinearDescriptorMapValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_key(key); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTensorLinearDescriptorMapValueDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *key = nullptr, + flatbuffers::Offset value = 0) { + auto key__ = key ? _fbb.CreateString(key) : 0; + return tflite::gpu::data::CreateTensorLinearDescriptorMapValue( + _fbb, + key__, + value); +} + +struct TensorDescriptorMapValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorDescriptorMapValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEY = 4, + VT_VALUE = 6 + }; + const flatbuffers::String *key() const { + return GetPointer(VT_KEY); + } + const tflite::gpu::data::TensorDescriptor *value() const { + return GetPointer(VT_VALUE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_KEY) && + verifier.VerifyString(key()) && + VerifyOffset(verifier, VT_VALUE) && + verifier.VerifyTable(value()) && + verifier.EndTable(); + } +}; + +struct TensorDescriptorMapValueBuilder { + typedef TensorDescriptorMapValue Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_key(flatbuffers::Offset key) { + fbb_.AddOffset(TensorDescriptorMapValue::VT_KEY, key); + } + void add_value(flatbuffers::Offset value) { + fbb_.AddOffset(TensorDescriptorMapValue::VT_VALUE, value); + } + explicit TensorDescriptorMapValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensorDescriptorMapValue( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset key = 0, + flatbuffers::Offset value = 0) { + TensorDescriptorMapValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_key(key); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTensorDescriptorMapValueDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *key = nullptr, + flatbuffers::Offset value = 0) { + auto key__ = key ? _fbb.CreateString(key) : 0; + return tflite::gpu::data::CreateTensorDescriptorMapValue( + _fbb, + key__, + value); +} + +struct Arguments FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ArgumentsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_INT_VALUES = 4, + VT_FLOAT_VALUES = 6, + VT_HALF_VALUES = 8, + VT_BUFFER_REFS = 10, + VT_TEXTURE2D_REFS = 12, + VT_TENSOR_LINEAR_REFS = 14, + VT_TENSOR_REFS = 16, + VT_BUFFER_OBJECTS = 18, + VT_TEXTURE2D_OBJECTS = 20, + VT_TENSOR_LINEAR_OBJECTS = 22, + VT_TENSOR_OBJECTS = 24 + }; + const flatbuffers::Vector> *int_values() const { + return GetPointer> *>(VT_INT_VALUES); + } + const flatbuffers::Vector> *float_values() const { + return GetPointer> *>(VT_FLOAT_VALUES); + } + const flatbuffers::Vector> *half_values() const { + return GetPointer> *>(VT_HALF_VALUES); + } + const flatbuffers::Vector> *buffer_refs() const { + return GetPointer> *>(VT_BUFFER_REFS); + } + const flatbuffers::Vector> *texture2d_refs() const { + return GetPointer> *>(VT_TEXTURE2D_REFS); + } + const flatbuffers::Vector> *tensor_linear_refs() const { + return GetPointer> *>(VT_TENSOR_LINEAR_REFS); + } + const flatbuffers::Vector> *tensor_refs() const { + return GetPointer> *>(VT_TENSOR_REFS); + } + const flatbuffers::Vector> *buffer_objects() const { + return GetPointer> *>(VT_BUFFER_OBJECTS); + } + const flatbuffers::Vector> *texture2d_objects() const { + return GetPointer> *>(VT_TEXTURE2D_OBJECTS); + } + const flatbuffers::Vector> *tensor_linear_objects() const { + return GetPointer> *>(VT_TENSOR_LINEAR_OBJECTS); + } + const flatbuffers::Vector> *tensor_objects() const { + return GetPointer> *>(VT_TENSOR_OBJECTS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_INT_VALUES) && + verifier.VerifyVector(int_values()) && + verifier.VerifyVectorOfTables(int_values()) && + VerifyOffset(verifier, VT_FLOAT_VALUES) && + verifier.VerifyVector(float_values()) && + verifier.VerifyVectorOfTables(float_values()) && + VerifyOffset(verifier, VT_HALF_VALUES) && + verifier.VerifyVector(half_values()) && + verifier.VerifyVectorOfTables(half_values()) && + VerifyOffset(verifier, VT_BUFFER_REFS) && + verifier.VerifyVector(buffer_refs()) && + verifier.VerifyVectorOfTables(buffer_refs()) && + VerifyOffset(verifier, VT_TEXTURE2D_REFS) && + verifier.VerifyVector(texture2d_refs()) && + verifier.VerifyVectorOfTables(texture2d_refs()) && + VerifyOffset(verifier, VT_TENSOR_LINEAR_REFS) && + verifier.VerifyVector(tensor_linear_refs()) && + verifier.VerifyVectorOfTables(tensor_linear_refs()) && + VerifyOffset(verifier, VT_TENSOR_REFS) && + verifier.VerifyVector(tensor_refs()) && + verifier.VerifyVectorOfTables(tensor_refs()) && + VerifyOffset(verifier, VT_BUFFER_OBJECTS) && + verifier.VerifyVector(buffer_objects()) && + verifier.VerifyVectorOfTables(buffer_objects()) && + VerifyOffset(verifier, VT_TEXTURE2D_OBJECTS) && + verifier.VerifyVector(texture2d_objects()) && + verifier.VerifyVectorOfTables(texture2d_objects()) && + VerifyOffset(verifier, VT_TENSOR_LINEAR_OBJECTS) && + verifier.VerifyVector(tensor_linear_objects()) && + verifier.VerifyVectorOfTables(tensor_linear_objects()) && + VerifyOffset(verifier, VT_TENSOR_OBJECTS) && + verifier.VerifyVector(tensor_objects()) && + verifier.VerifyVectorOfTables(tensor_objects()) && + verifier.EndTable(); + } +}; + +struct ArgumentsBuilder { + typedef Arguments Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_int_values(flatbuffers::Offset>> int_values) { + fbb_.AddOffset(Arguments::VT_INT_VALUES, int_values); + } + void add_float_values(flatbuffers::Offset>> float_values) { + fbb_.AddOffset(Arguments::VT_FLOAT_VALUES, float_values); + } + void add_half_values(flatbuffers::Offset>> half_values) { + fbb_.AddOffset(Arguments::VT_HALF_VALUES, half_values); + } + void add_buffer_refs(flatbuffers::Offset>> buffer_refs) { + fbb_.AddOffset(Arguments::VT_BUFFER_REFS, buffer_refs); + } + void add_texture2d_refs(flatbuffers::Offset>> texture2d_refs) { + fbb_.AddOffset(Arguments::VT_TEXTURE2D_REFS, texture2d_refs); + } + void add_tensor_linear_refs(flatbuffers::Offset>> tensor_linear_refs) { + fbb_.AddOffset(Arguments::VT_TENSOR_LINEAR_REFS, tensor_linear_refs); + } + void add_tensor_refs(flatbuffers::Offset>> tensor_refs) { + fbb_.AddOffset(Arguments::VT_TENSOR_REFS, tensor_refs); + } + void add_buffer_objects(flatbuffers::Offset>> buffer_objects) { + fbb_.AddOffset(Arguments::VT_BUFFER_OBJECTS, buffer_objects); + } + void add_texture2d_objects(flatbuffers::Offset>> texture2d_objects) { + fbb_.AddOffset(Arguments::VT_TEXTURE2D_OBJECTS, texture2d_objects); + } + void add_tensor_linear_objects(flatbuffers::Offset>> tensor_linear_objects) { + fbb_.AddOffset(Arguments::VT_TENSOR_LINEAR_OBJECTS, tensor_linear_objects); + } + void add_tensor_objects(flatbuffers::Offset>> tensor_objects) { + fbb_.AddOffset(Arguments::VT_TENSOR_OBJECTS, tensor_objects); + } + explicit ArgumentsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateArguments( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> int_values = 0, + flatbuffers::Offset>> float_values = 0, + flatbuffers::Offset>> half_values = 0, + flatbuffers::Offset>> buffer_refs = 0, + flatbuffers::Offset>> texture2d_refs = 0, + flatbuffers::Offset>> tensor_linear_refs = 0, + flatbuffers::Offset>> tensor_refs = 0, + flatbuffers::Offset>> buffer_objects = 0, + flatbuffers::Offset>> texture2d_objects = 0, + flatbuffers::Offset>> tensor_linear_objects = 0, + flatbuffers::Offset>> tensor_objects = 0) { + ArgumentsBuilder builder_(_fbb); + builder_.add_tensor_objects(tensor_objects); + builder_.add_tensor_linear_objects(tensor_linear_objects); + builder_.add_texture2d_objects(texture2d_objects); + builder_.add_buffer_objects(buffer_objects); + builder_.add_tensor_refs(tensor_refs); + builder_.add_tensor_linear_refs(tensor_linear_refs); + builder_.add_texture2d_refs(texture2d_refs); + builder_.add_buffer_refs(buffer_refs); + builder_.add_half_values(half_values); + builder_.add_float_values(float_values); + builder_.add_int_values(int_values); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateArgumentsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *int_values = nullptr, + const std::vector> *float_values = nullptr, + const std::vector> *half_values = nullptr, + const std::vector> *buffer_refs = nullptr, + const std::vector> *texture2d_refs = nullptr, + const std::vector> *tensor_linear_refs = nullptr, + const std::vector> *tensor_refs = nullptr, + const std::vector> *buffer_objects = nullptr, + const std::vector> *texture2d_objects = nullptr, + const std::vector> *tensor_linear_objects = nullptr, + const std::vector> *tensor_objects = nullptr) { + auto int_values__ = int_values ? _fbb.CreateVector>(*int_values) : 0; + auto float_values__ = float_values ? _fbb.CreateVector>(*float_values) : 0; + auto half_values__ = half_values ? _fbb.CreateVector>(*half_values) : 0; + auto buffer_refs__ = buffer_refs ? _fbb.CreateVector>(*buffer_refs) : 0; + auto texture2d_refs__ = texture2d_refs ? _fbb.CreateVector>(*texture2d_refs) : 0; + auto tensor_linear_refs__ = tensor_linear_refs ? _fbb.CreateVector>(*tensor_linear_refs) : 0; + auto tensor_refs__ = tensor_refs ? _fbb.CreateVector>(*tensor_refs) : 0; + auto buffer_objects__ = buffer_objects ? _fbb.CreateVector>(*buffer_objects) : 0; + auto texture2d_objects__ = texture2d_objects ? _fbb.CreateVector>(*texture2d_objects) : 0; + auto tensor_linear_objects__ = tensor_linear_objects ? _fbb.CreateVector>(*tensor_linear_objects) : 0; + auto tensor_objects__ = tensor_objects ? _fbb.CreateVector>(*tensor_objects) : 0; + return tflite::gpu::data::CreateArguments( + _fbb, + int_values__, + float_values__, + half_values__, + buffer_refs__, + texture2d_refs__, + tensor_linear_refs__, + tensor_refs__, + buffer_objects__, + texture2d_objects__, + tensor_linear_objects__, + tensor_objects__); +} + +} // namespace data +} // namespace gpu +} // namespace tflite + +#endif // FLATBUFFERS_GENERATED_SERIALIZATIONBASE_TFLITE_GPU_DATA_H_ diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index 044d4669d54..0571d7d077b 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -485,7 +485,7 @@ microlite: $(MICROLITE_LIB_PATH) # Hack for generating schema file bypassing flatbuffer parsing tensorflow/lite/schema/schema_generated.h: - @cp -u tensorflow/lite/schema/schema_generated.h.OPENSOURCE tensorflow/lite/schema/schema_generated.h + @cp -u tensorflow/lite/schema/schema_generated.h.oss tensorflow/lite/schema/schema_generated.h # Gathers together all the objects we've compiled into a single '.a' archive. $(MICROLITE_LIB_PATH): tensorflow/lite/schema/schema_generated.h $(MICROLITE_LIB_OBJS) diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index 9d90e9526be..38d33eb708c 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -346,7 +346,7 @@ micro: $(LIB_PATH) # Hack for generating schema file bypassing flatbuffer parsing tensorflow/lite/schema/schema_generated.h: - @cp -u tensorflow/lite/schema/schema_generated.h.OPENSOURCE tensorflow/lite/schema/schema_generated.h + @cp -u tensorflow/lite/schema/schema_generated.h.oss tensorflow/lite/schema/schema_generated.h # Gathers together all the objects we've compiled into a single '.a' archive. $(LIB_PATH): tensorflow/lite/schema/schema_generated.h $(LIB_OBJS) diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index 010c9357aa3..ccbdfbffc7c 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -5,7 +5,11 @@ tensorflow/compat_template.__init__.py tensorflow/compat_template_v1.__init__.py tensorflow/compiler/mlir/glob_lit_test.bzl tensorflow/go/op/wrappers.go +tensorflow/lite/delegates/gpu/cl/compiled_program_cache_generated.h +tensorflow/lite/delegates/gpu/cl/serialization_generated.h +tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h tensorflow/lite/micro/build_def.bzl +tensorflow/lite/schema/schema_generated.h tensorflow/python/autograph/core/config.py tensorflow/python/eager/benchmarks_test_base.py tensorflow/python/framework/tfrt_utils.py From c4ca44f0cdb47a7034ccb248a3fe2a6714c94230 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Tue, 10 Nov 2020 21:08:39 +0800 Subject: [PATCH 105/220] fix tpu_executor_dlsym_initializer windows build error --- tensorflow/core/tpu/BUILD | 5 ++- .../tpu_executor_dlsym_initializer_windows.cc | 33 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/tpu/tpu_executor_dlsym_initializer_windows.cc diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index f6e058a7efe..6887c388485 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -162,7 +162,10 @@ cc_library( # not link in both this and "tpu_api_dlsym_initializer". cc_library( name = "tpu_executor_dlsym_initializer", - srcs = ["tpu_executor_dlsym_initializer.cc"], + srcs = if_windows( + ["tpu_executor_dlsym_initializer_windows.cc"], + otherwise = ["tpu_executor_dlsym_initializer.cc"], + ), visibility = ["//visibility:public"], deps = [ ":tpu_api_dlsym_set_fn", diff --git a/tensorflow/core/tpu/tpu_executor_dlsym_initializer_windows.cc b/tensorflow/core/tpu/tpu_executor_dlsym_initializer_windows.cc new file mode 100644 index 00000000000..0e6d96c55de --- /dev/null +++ b/tensorflow/core/tpu/tpu_executor_dlsym_initializer_windows.cc @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// TODO(skye): this is largely a copy of tpu_api_dlsym_initializer.cc. Figure +// out how to deduplicate these files a little. + + +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/tpu/tpu_api_dlsym_set_fn.h" + +namespace tensorflow { +namespace tpu { + +Status InitializeTpuLibrary(void* library_handle) { + return errors::Unimplemented( + "Loading TPU library is not supported on Windows."); +} + +} // namespace tpu +} // namespace tensorflow From 5f282a30db6da354a6b15392c6b5d10f9ed86f63 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 10 Nov 2020 05:41:31 -0800 Subject: [PATCH 106/220] Also propagate information about strides due to TensorFlow ABI. In principle, this could also be done by propagating constants at the LLVM dialect level (and through structures) but we cannot do this in mlir yet. So manually propagate information, instead. PiperOrigin-RevId: 341601153 Change-Id: If45cf3c9ce1d68bedbdd272c6843071374d3baee --- .../kernel_gen/tests/tf_abi_knowledge.mlir | 6 ++++-- .../tensorflow_abi_knowledge_propagation.cc | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_abi_knowledge.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_abi_knowledge.mlir index d9f6f036b2e..eb7e50c17b6 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_abi_knowledge.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_abi_knowledge.mlir @@ -68,6 +68,7 @@ module attributes {gpu.container_module} { // SHAPE-SAME: %[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr, %[[ARG2:.*]]: !llvm.i64, %[[ARG3:.*]]: !llvm.i64, %[[ARG4:.*]]: !llvm.i64, %[[ARG5:.*]]: !llvm.ptr, %[[ARG6:.*]]: !llvm.ptr, %[[ARG7:.*]]: !llvm.i64, %[[ARG8:.*]]: !llvm.i64, %[[ARG9:.*]]: !llvm.i64 llvm.func @abs_kernel(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.i64, %arg3: !llvm.i64, %arg4: !llvm.i64, %arg5: !llvm.ptr, %arg6: !llvm.ptr, %arg7: !llvm.i64, %arg8: !llvm.i64, %arg9: !llvm.i64) attributes {gpu.kernel} { // ABI: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) + // ABI: %[[ONE:.*]] = llvm.mlir.constant(1 : index) // CHECK: llvm.mlir.undef %0 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // ABI-NEXT: llvm.insertvalue %[[ARG1]] @@ -80,7 +81,8 @@ module attributes {gpu.container_module} { %3 = llvm.insertvalue %arg2, %2[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-NEXT: llvm.insertvalue %[[ARG3]] %4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // CHECK-NEXT: llvm.insertvalue %[[ARG4]] + // ABI-NEXT: llvm.insertvalue %[[ONE]] + // SHAPE-NEXT: llvm.insertvalue %[[ARG4]] %5 = llvm.insertvalue %arg4, %4[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-NEXT: llvm.mlir.undef %6 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> @@ -95,7 +97,7 @@ module attributes {gpu.container_module} { // ABI-NEXT: llvm.insertvalue %[[ARG8]] // SHAPE-NEXT: llvm.insertvalue %[[ARG3]] %10 = llvm.insertvalue %arg8, %9[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - // ABI-NEXT: llvm.insertvalue %[[ARG9]] + // ABI-NEXT: llvm.insertvalue %[[ONE]] // SHAPE-NEXT: llvm.insertvalue %[[ARG4]] %11 = llvm.insertvalue %arg9, %10[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %12 = nvvm.read.ptx.sreg.ctaid.x : !llvm.i32 diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc index 4328bb22743..b1e23309775 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc @@ -85,6 +85,8 @@ struct PropagateTfAbiKnowledgeToKernelsPass int kernel_p = 0; OpBuilder b = OpBuilder::atBlockBegin(&kernel.body().front()); Value zero; + Value one; + auto loc = kernel.getLoc(); for (auto operand : launch.operands()) { auto memref = operand.getType().dyn_cast(); if (!memref) { @@ -94,17 +96,25 @@ struct PropagateTfAbiKnowledgeToKernelsPass } if (allocated_by_runtime.contains(operand)) { // This was allocated by the tf runtime, so it is aligned, has no - // offset and the two pointers in the descriptor coincide. Rewrite - // the kernel accordingly. + // offset, an inner stride of 1 and the two pointers in the descriptor + // coincide. Rewrite the kernel accordingly. Value alloc_ptr = kernel.getArgument(kernel_p); Value align_ptr = kernel.getArgument(kernel_p + 1); alloc_ptr.replaceAllUsesWith(align_ptr); Value offset = kernel.getArgument(kernel_p + 2); if (!zero) { - zero = b.create(kernel.getLoc(), offset.getType(), + zero = b.create(loc, offset.getType(), b.getIndexAttr(0)); } offset.replaceAllUsesWith(zero); + // The stride is the last argument belonging to this memref. + Value inner_stride = + kernel.getArgument(kernel_p + 2 + memref.getRank() * 2); + if (!one) { + one = b.create(loc, offset.getType(), + b.getIndexAttr(1)); + } + inner_stride.replaceAllUsesWith(one); kernel.setArgAttr( kernel_p + 1, kLLVMAlignAttrName, b.getIndexAttr( @@ -134,7 +144,7 @@ struct PropagateTfAbiKnowledgeToKernelsPass Value candidate = worklist.pop_back_val(); for (auto user : candidate.getUsers()) { if (auto reshape = dyn_cast(user)) { - // Reshape propagates alignment and offset. + // Reshape propagates alignment, offset and innermost stride. // TODO(herhut): This should be a trait. if (allocated_by_runtime.insert(reshape.result()).second) { worklist.push_back(reshape.result()); From 48cb8c133d35de09a1eaf0f0fb50dae1bc013fb0 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 10 Nov 2020 06:48:14 -0800 Subject: [PATCH 107/220] Do not generate ranked sin GPU kernel. It seems this doesn't work with ROCM. PiperOrigin-RevId: 341608987 Change-Id: I75bf5f40a4a49d9b313d34c3dd81607f6fc622f5 --- tensorflow/core/kernels/mlir_generated/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index b5f13edc801..fc414fe78b8 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -428,6 +428,7 @@ gen_kernel_library( gen_kernel_library( name = "sin", + generate_ranked = False, generate_unranked = True, same_shape = "0,1", tile_size = "256", From 7ff73bb4b2b287b84ee1a73db72ac4cc5d84ff9e Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 10 Nov 2020 09:37:24 -0800 Subject: [PATCH 108/220] Create a new ROCM RBE Docker image with ROCM 3.9. Update the hash accordingly. PiperOrigin-RevId: 341636759 Change-Id: Id49d6889c2686fe038bcd67faacabb8e5a478928 --- .../Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython | 4 ++-- third_party/toolchains/preconfig/generate/containers.bzl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython index 1f27ab1d502..5a59371cf41 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython @@ -16,8 +16,8 @@ ARG DEBIAN_FRONTEND=noninteractive # Install ROCm packages RUN apt-get update && apt-get install -y --no-install-recommends \ curl libnuma-dev gnupg sudo libelf1 build-essential \ - && curl -sL http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | apt-key add - \ - && printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main" | tee /etc/apt/sources.list.d/rocm.list \ + && curl -sL http://repo.radeon.com/rocm/apt/3.9/rocm.gpg.key | apt-key add - \ + && printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/3.9/ xenial main" | tee /etc/apt/sources.list.d/rocm.list \ && apt-get update && apt-get install -y --no-install-recommends \ rocm-dev rocm-libs hipcub rocm-utils rocm-cmake \ rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl \ diff --git a/third_party/toolchains/preconfig/generate/containers.bzl b/third_party/toolchains/preconfig/generate/containers.bzl index 8efa702bf1c..31a85f430c3 100644 --- a/third_party/toolchains/preconfig/generate/containers.bzl +++ b/third_party/toolchains/preconfig/generate/containers.bzl @@ -12,6 +12,6 @@ container_digests = { "cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython": "sha256:3f890a951c81a201d60d0161a56ce628a90323be0c7f795550caa37f6f41a85c", "cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython": "sha256:bd7666d1ef49b2b2e2a64981f1c9234deeccdb0d5198b30ff4289c3dfcffedbf", "cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython": "sha256:f436545b7e14b014393b42975923dcd01f408496b1399abb5a35608f888ca140", - "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:8c6ba5a831c23906716cc9e9c201081f2b5632e3bf3cbc0207da0ddbef18d525", + "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:bf6fd9711e82ae2c43229ef97f972db6d35975fd5774da3fb2e64bf952c426d0", "windows-1803": "sha256:f109576c7c0c8a1783ff22b666e8923b52dbbe7933f69a1c7a7275202c304a12", } From 7d2981e88c94582276e3054338f7ff8f05b620f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 09:42:44 -0800 Subject: [PATCH 109/220] Refactor mhlo->tf legalization for mhlo::ReduceOp * Improve reduction function matching to be more strict against invalid reduction functions. * Share more code between the 3 reduce op rewrite patterns. * Move the reduction function matching into a new function for future reuse by the mhlo::ReduceWindowOp legaliser. PiperOrigin-RevId: 341637842 Change-Id: I9546edd8c6be6a5d54e676bd040d84dc024c2125 --- .../tensorflow/transforms/legalize_hlo.cc | 189 ++++++++---------- 1 file changed, 87 insertions(+), 102 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc index 94fb123026c..4e9f9871964 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc @@ -282,7 +282,7 @@ void Append(llvm::SmallVectorImpl &values, Range &&range) { // Appends all elements in `range` to `values`. template void Append(llvm::SmallVectorImpl &values, Range &&range, - RangeTs &&... ranges) { + RangeTs &&...ranges) { values.insert(values.end(), range.begin(), range.end()); Append(values, ranges...); } @@ -295,13 +295,13 @@ size_t Size(Range &&range) { // Returns the total number of elements in a variadic number of `ranges`. template -size_t Size(Range &&range, RangeTs &&... ranges) { +size_t Size(Range &&range, RangeTs &&...ranges) { return range.size() + Size(std::forward(ranges)...); } // Concats all elements in `ranges` and returns a small vector as a result. template -llvm::SmallVector Concat(RangeTs &&... ranges) { +llvm::SmallVector Concat(RangeTs &&...ranges) { llvm::SmallVector results; results.reserve(Size(std::forward(ranges)...)); Append(results, std::forward(ranges)...); @@ -472,29 +472,34 @@ Value ConvertDotGeneralOp(PatternRewriter &rewriter, Operation *old_op) { return reshaped.getResult(); } -// This function tries to match that the "mhlo::ReduceOp" only has one -// input, one init_value and one result. Also "mhlo::ReduceOp" has two ops -// in the region, and the last one is return op. -LogicalResult MatchReduceOpInput(mhlo::ReduceOp reduce_op) { - if (reduce_op.operands().size() != 1 || reduce_op.init_values().size() != 1 || - reduce_op.getResults().size() != 1) +// Checks if the specified region is a binary reduction function what takes 2 +// inputs, passes it to an instance of the specifiied reduction op and then +// returns the result. +template +LogicalResult MatchBinaryReduceFunction(mlir::Region &function) { + Block &body = function.front(); + if (body.getNumArguments() != 2) return failure(); + if (body.getOperations().size() != 2) return failure(); + + ReductionOp reduce_op = dyn_cast(body.front()); + if (!reduce_op) return failure(); + if (reduce_op.lhs() != body.getArgument(0) || + reduce_op.rhs() != body.getArgument(1)) return failure(); - if (!reduce_op.operands()[0].getType().isa()) - return failure(); - if (!reduce_op.getType(0).isa()) return failure(); - - auto block = &reduce_op.body().front(); - if (block->getOperations().size() != 2 || isa(block->back())) + mhlo::ReturnOp return_op = dyn_cast(body.back()); + if (!return_op) return failure(); + if (return_op.getNumOperands() != 1 || + return_op.results().front() != reduce_op) return failure(); return success(); } -// TODO(jingpu): This "mhlo::ReduceOp" can corresponds to many TF ops -// with different ops in reduce_op.body. Now we only match to "tf.Max", "tf.Min" -// and "tf.Sum". -class ConvertReduceOpToTfSum : public OpConversionPattern { +// Converts an mhlo.reduce op with the specified BinaryOp as the reduction +// operation into the specified TfOp. +template +class ConvertReduceOpToTfOp : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -503,116 +508,96 @@ class ConvertReduceOpToTfSum : public OpConversionPattern { ConversionPatternRewriter &rewriter) const final { if (failed(MatchReduceOpInput(reduce_op))) return failure(); - Operation *first_op = &reduce_op.body().front().front(); - if (!llvm::isa(first_op)) return failure(); + if (failed(MatchBinaryReduceFunction(reduce_op.body()))) + return failure(); // In `MatchReduceOpInput` function, we already match that the // "mhlo::ReduceOp" only has one input, one init_value and one result. + if (failed(MatchInitValue(reduce_op.init_values()[0]))) return failure(); + auto input = reduce_op.operands()[0]; + // Get reduction dimension. DenseIntElementsAttr dimension = reduce_op.dimensions(); SmallVector reduce_dims; for (const int64_t &dim : dimension.getValues()) { reduce_dims.emplace_back(dim); } - - // Check initial value is zero. - DenseFPElementsAttr init_value; - if (!matchPattern(reduce_op.init_values()[0], m_Constant(&init_value)) || - !init_value.isSplat() || !init_value.getSplatValue().isZero()) - return failure(); - auto dim_type = RankedTensorType::get( {static_cast(reduce_dims.size())}, rewriter.getI64Type()); auto reduction_indices = rewriter.create( reduce_op.getLoc(), dim_type, rewriter.getI64TensorAttr(reduce_dims)); - rewriter.replaceOpWithNewOp( - reduce_op, reduce_op.getType(0), input, reduction_indices, - /*keep_dim=*/rewriter.getBoolAttr(false)); + + rewriter.replaceOpWithNewOp(reduce_op, reduce_op.getType(0), input, + reduction_indices, + /*keep_dim=*/rewriter.getBoolAttr(false)); return success(); - }; + } + + private: + // Checks that the init value matches with the init value expected for the + // target TfOp. + virtual LogicalResult MatchInitValue(Value init_value) const = 0; + + // This function tries to match that the "mhlo::ReduceOp" only has one + // input, one init_value and one result. + LogicalResult MatchReduceOpInput(mhlo::ReduceOp reduce_op) const { + if (reduce_op.operands().size() != 1 || + reduce_op.init_values().size() != 1 || + reduce_op.getResults().size() != 1) + return failure(); + + if (!reduce_op.operands()[0].getType().isa()) + return failure(); + if (!reduce_op.getType(0).isa()) return failure(); + return success(); + } }; -class ConvertReduceOpToTfMax : public OpConversionPattern { +class ConvertReduceOpToTfSum + : public ConvertReduceOpToTfOp { public: - using OpConversionPattern::OpConversionPattern; + using ConvertReduceOpToTfOp::ConvertReduceOpToTfOp; - LogicalResult matchAndRewrite( - mhlo::ReduceOp reduce_op, ArrayRef args, - ConversionPatternRewriter &rewriter) const final { - if (failed(MatchReduceOpInput(reduce_op))) return failure(); - - Operation *first_op = &reduce_op.body().front().front(); - if (!llvm::isa(first_op)) return failure(); - - // In `MatchReduceOpInput` function, we already match that the - // "mhlo::ReduceOp" only has one input, one init_value and one result. - auto input = reduce_op.operands()[0]; - // Get reduction dimension. - DenseIntElementsAttr dimension = reduce_op.dimensions(); - SmallVector reduce_dims; - for (const int64_t &dim : dimension.getValues()) { - reduce_dims.emplace_back(dim); - } - - // Check initial value is float.minimum. - DenseFPElementsAttr init_value; - if (!matchPattern(reduce_op.init_values()[0], m_Constant(&init_value)) || - !init_value.isSplat() || - !init_value.getSplatValue().isInfinity() || - !init_value.getSplatValue().isNegative()) + LogicalResult MatchInitValue(Value init_value) const override { + DenseFPElementsAttr init_attr; + if (!matchPattern(init_value, m_Constant(&init_attr)) || + !init_attr.isSplat() || !init_attr.getSplatValue().isZero()) return failure(); - - auto dim_type = RankedTensorType::get( - {static_cast(reduce_dims.size())}, rewriter.getI64Type()); - auto reduction_indices = rewriter.create( - reduce_op.getLoc(), dim_type, rewriter.getI64TensorAttr(reduce_dims)); - rewriter.replaceOpWithNewOp( - reduce_op, reduce_op.getType(0), input, reduction_indices, - /*keep_dim=*/rewriter.getBoolAttr(false)); return success(); - }; + } }; -class ConvertReduceOpToTfMin : public OpConversionPattern { +class ConvertReduceOpToTfMax + : public ConvertReduceOpToTfOp { public: - using OpConversionPattern::OpConversionPattern; + using ConvertReduceOpToTfOp::ConvertReduceOpToTfOp; - LogicalResult matchAndRewrite( - mhlo::ReduceOp reduce_op, ArrayRef args, - ConversionPatternRewriter &rewriter) const final { - if (failed(MatchReduceOpInput(reduce_op))) return failure(); - - Operation *first_op = &reduce_op.body().front().front(); - if (!llvm::isa(first_op)) return failure(); - - // In `MatchReduceOpInput` function, we already match that the - // "mhlo::ReduceOp" only has one input, one init_value and one result. - Value input = reduce_op.operands()[0]; - // Get reduction dimension. - DenseIntElementsAttr dimension = reduce_op.dimensions(); - SmallVector reduce_dims; - for (const int64_t &dim : dimension.getValues()) { - reduce_dims.emplace_back(dim); - } - - // Check initial value is +INF. - DenseFPElementsAttr init_value; - if (!matchPattern(reduce_op.init_values()[0], m_Constant(&init_value)) || - !init_value.isSplat() || - !init_value.getSplatValue().isInfinity() || - init_value.getSplatValue().isNegative()) + LogicalResult MatchInitValue(Value init_value) const override { + DenseFPElementsAttr init_attr; + if (!matchPattern(init_value, m_Constant(&init_attr)) || + !init_attr.isSplat() || + !init_attr.getSplatValue().isInfinity() || + !init_attr.getSplatValue().isNegative()) return failure(); - - auto dim_type = RankedTensorType::get( - {static_cast(reduce_dims.size())}, rewriter.getI64Type()); - auto reduction_indices = rewriter.create( - reduce_op.getLoc(), dim_type, rewriter.getI64TensorAttr(reduce_dims)); - rewriter.replaceOpWithNewOp( - reduce_op, reduce_op.getType(0), input, reduction_indices, - /*keep_dim=*/rewriter.getBoolAttr(false)); return success(); - }; + } +}; + +class ConvertReduceOpToTfMin + : public ConvertReduceOpToTfOp { + public: + using ConvertReduceOpToTfOp::ConvertReduceOpToTfOp; + + LogicalResult MatchInitValue(Value init_value) const override { + DenseFPElementsAttr init_attr; + if (!matchPattern(init_value, m_Constant(&init_attr)) || + !init_attr.isSplat() || + !init_attr.getSplatValue().isInfinity() || + init_attr.getSplatValue().isNegative()) + return failure(); + return success(); + } }; class ConvertIotaOpToTfRange : public OpConversionPattern { From ad95899595cba5d6bc3e4936340b62ed7b7cea58 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 10 Nov 2020 10:04:38 -0800 Subject: [PATCH 110/220] Allow autograph to be applied in internal helper utility. That avoids downstream calls from being confused about whether autograph is enabled in their context. This adds a small overhead to the building of model metrics (~200ms). This overhead should not be noticeable outside of tests which create very large numbers of models. PiperOrigin-RevId: 341642593 Change-Id: I7d1e13d70d5df072b5215c69f9480f18480b92b5 --- .../python/keras/saving/saving_utils.py | 3 +- .../python/keras/saving/saving_utils_test.py | 71 +++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/saving/saving_utils.py b/tensorflow/python/keras/saving/saving_utils.py index d16f69bd89d..e459d174fa9 100644 --- a/tensorflow/python/keras/saving/saving_utils.py +++ b/tensorflow/python/keras/saving/saving_utils.py @@ -122,8 +122,7 @@ def trace_model_call(model, input_signature=None): if input_signature is None: raise_model_input_error(model) - # TODO(mdan): Should the model's call be autographed by default? - @def_function.function(input_signature=input_signature, autograph=False) + @def_function.function(input_signature=input_signature) def _wrapped_model(*args): """A concrete tf.function that wraps the model's call function.""" # When given a single input, Keras models will call the model on the tensor diff --git a/tensorflow/python/keras/saving/saving_utils_test.py b/tensorflow/python/keras/saving/saving_utils_test.py index 85f421a8507..f0a04d86707 100644 --- a/tensorflow/python/keras/saving/saving_utils_test.py +++ b/tensorflow/python/keras/saving/saving_utils_test.py @@ -45,6 +45,7 @@ from tensorflow.python.keras.optimizer_v2 import gradient_descent from tensorflow.python.keras.saving import saving_utils from tensorflow.python.ops import array_ops from tensorflow.python.platform import test +from tensorflow.python.saved_model import load as load_lib from tensorflow.python.saved_model import loader from tensorflow.python.saved_model import save as save_lib from tensorflow.python.saved_model import signature_constants @@ -268,10 +269,80 @@ def _import_and_infer(save_dir, inputs): return session.run(output_dict, feed_dict=feed_dict) +class AutographedMetric(keras.metrics.Metric): + + def build(self, input_shape): + pass + + def update_state(self, values): + if constant_op.constant(False): + x = 1 + else: + x = 2 + return x + + def reset_states(self): + pass + + def result(self): + return constant_op.constant(0) + + def GetMean(self): + return constant_op.constant(0) + + def GetCount(self): + return constant_op.constant(0) + + +class BasicAutographedMetricLayer(keras.layers.Layer): + + def build(self, input_shape): + self._metric = AutographedMetric() + + def call(self, inp): + self._metric.update_state(inp) + # TODO(b/172853147): Test control flow here. + return inp + + +class BasicAutographedMetricModel(keras.models.Model): + + def __init__(self): + super(BasicAutographedMetricModel, self).__init__(name='test_model') + self._layer = BasicAutographedMetricLayer() + + def call(self, inputs, **kwargs): + return self._layer(inputs) + + @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes(always_skip_v1=True) class ModelSaveTest(keras_parameterized.TestCase): + def test_model_save_preserves_autograph(self): + model = BasicAutographedMetricModel() + inputs = array_ops.ones((8, 5)) + model._set_inputs(inputs) + + save_dir = os.path.join(self.get_temp_dir(), 'saved_model') + save_lib.save(model, save_dir) + + if model.output_names: + output_name = model.output_names[0] + input_name = model.input_names[0] + else: + output_name = 'output_1' + input_name = 'input_1' + + self.assertAllClose({output_name: model.predict_on_batch(inputs)}, + _import_and_infer(save_dir, + {input_name: np.ones((8, 5))})) + + # Test v2 loading. + # TODO(mdan): tests using _import_and_infer should uniformly do this. + self.assertAllClose(model.predict_on_batch(inputs), + load_lib.load(save_dir)(inputs)) + def test_model_save(self): input_dim = 5 model = testing_utils.get_small_mlp(10, 3, input_dim) From 86acd2c0aa0e172ce159899c1af661d010eaa944 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 10 Nov 2020 10:21:12 -0800 Subject: [PATCH 111/220] Enable xla detailed logging by default. PiperOrigin-RevId: 341646294 Change-Id: Ia488f3d003746df9b69badc45d6ebdd9693163ee --- tensorflow/compiler/xla/debug_options_flags.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc index 41860deb2e6..757fe9dbe7e 100644 --- a/tensorflow/compiler/xla/debug_options_flags.cc +++ b/tensorflow/compiler/xla/debug_options_flags.cc @@ -74,7 +74,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_cpu_enable_xprof_traceme(false); opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false); opts.set_xla_multiheap_size_constraint_per_heap(-1); - + opts.set_xla_detailed_logging(true); return opts; } From 5ca77e649bb0174c4ec06491291989c7d5d4a040 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 10:29:55 -0800 Subject: [PATCH 112/220] Explicitly define the default constructor for the mobile implementation of PercentileSampler. PiperOrigin-RevId: 341648278 Change-Id: I737ece652fdf18a4632f6c351a24d0f671aae285 --- tensorflow/core/lib/monitoring/mobile_percentile_sampler.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h index 2c792f0e0cb..914a3841ab2 100644 --- a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h +++ b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h @@ -51,6 +51,8 @@ class PercentileSampler { private: PercentileSamplerCell default_cell_; + PercentileSampler() = default; + TF_DISALLOW_COPY_AND_ASSIGN(PercentileSampler); }; From 38ec8b71e807554fd59b158364a9220e680375ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 10:39:07 -0800 Subject: [PATCH 113/220] [TF:TRT] Baseline of TensorRT testing samples. Run models with or without TensorRT and analyze the timing and numerics. We assume that the models are stored in the SavedModel format. We invoke the TF-TRT converter to convert the model with static TensorRT engines to run the models with TensorRT. PiperOrigin-RevId: 341650235 Change-Id: I3d8169dab366216427ec74059e9c61d523a83bc5 --- .../compiler/tensorrt/model_tests/BUILD | 48 +++ .../tensorrt/model_tests/model_handler.py | 323 ++++++++++++++++++ .../tensorrt/model_tests/run_models.py | 79 +++++ .../model_tests/sample_model/saved_model.pb | Bin 0 -> 12490 bytes 4 files changed, 450 insertions(+) create mode 100644 tensorflow/python/compiler/tensorrt/model_tests/BUILD create mode 100644 tensorflow/python/compiler/tensorrt/model_tests/model_handler.py create mode 100644 tensorflow/python/compiler/tensorrt/model_tests/run_models.py create mode 100644 tensorflow/python/compiler/tensorrt/model_tests/sample_model/saved_model.pb diff --git a/tensorflow/python/compiler/tensorrt/model_tests/BUILD b/tensorflow/python/compiler/tensorrt/model_tests/BUILD new file mode 100644 index 00000000000..d00a0dd42f2 --- /dev/null +++ b/tensorflow/python/compiler/tensorrt/model_tests/BUILD @@ -0,0 +1,48 @@ +# Description: +# Run sample models with TensorRT through TF-TRT bridge. Test TensorRT +# numerics and latency. + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files(glob([ + "models/*", +])) + +py_library( + name = "model_handler", + srcs = ["model_handler.py"], + srcs_version = "PY3", + deps = [ + "//tensorflow/core:protos_all_py", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework", + "//tensorflow/python:framework_ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:session", + "//tensorflow/python/compiler/tensorrt:trt_convert_py", + "//tensorflow/python/saved_model:loader", + "//tensorflow/python/saved_model:signature_constants", + "//tensorflow/python/saved_model:tag_constants", + "//third_party/py/numpy", + "@absl_py//absl/logging", + ], +) + +py_binary( + name = "run_models", + srcs = ["run_models.py"], + data = ["sample_model/saved_model.pb"], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + ":model_handler", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python/compiler/tensorrt:trt_convert_py", + "@absl_py//absl:app", + "@absl_py//absl/logging", + ], +) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py new file mode 100644 index 00000000000..c23337ff3f5 --- /dev/null +++ b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py @@ -0,0 +1,323 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Loads, converts, and runs sample models.""" + +import abc +import collections +import functools +import tempfile +import time +from typing import List, Mapping, Optional, Sequence, Union + +from absl import logging +import numpy as np + +from tensorflow.core.framework import graph_pb2 +from tensorflow.core.framework import tensor_shape_pb2 +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import meta_graph_pb2 +from tensorflow.python.client import session +from tensorflow.python.compiler.tensorrt import trt_convert as trt +from tensorflow.python.framework import convert_to_constants +from tensorflow.python.framework import dtypes as tf_dtypes +from tensorflow.python.framework import importer +from tensorflow.python.framework import ops as framework_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.saved_model import loader as saved_model_loader +from tensorflow.python.saved_model import signature_constants +from tensorflow.python.saved_model import tag_constants + +DEFAULT_SAVED_MODEL_TAGS = (tag_constants.SERVING,) +DEFAULT_SAVED_MODEL_SIGNATURE_KEY = ( + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) + + +# pylint: disable=bad-whitespace +### Helper Functions +def _get_concrete_tensor_shape( + tensor_shape: tensor_shape_pb2.TensorShapeProto, + batch_size: Optional[int] = None) -> Sequence[int]: + """Gets a concrete tensor shape without dynamic dimensions.""" + if tensor_shape.unknown_rank: + raise ValueError("Cannot generates random tensors for unknown rank!") + shape = [dim.size for dim in tensor_shape.dim] + if not shape: + raise ValueError("The tensor cannot have a rank of 0!") + if shape[0] < 0: + if batch_size is None or batch_size <= 0: + raise ValueError("Must provide a valid batch size " + "as the tensor has a dynamic batch size!") + shape[0] = batch_size + if any(filter(lambda x: x < 0, shape)): + raise ValueError("Cannot have dynamic dimensions except for batch size!") + return shape + + +def _generate_random_tensor_v1(tensor_info: meta_graph_pb2.TensorInfo, + batch_size: Optional[int] = None) -> np.ndarray: + """Generates a random tensor based on the data type and tensor shape.""" + dtype = tf_dtypes.as_dtype(tensor_info.dtype) + shape = _get_concrete_tensor_shape(tensor_info.tensor_shape, batch_size) + with session.Session(): + return random_ops.random_uniform( + shape=shape, dtype=dtype, name=tensor_info.name.split(":")[0]).eval() + + +# Models are repeatedly loaded for different TensorRT conversion settings. +# Using cache can reduce I/O. +@functools.lru_cache() +def load_meta_graph( + saved_model_dir: str, saved_model_tags: str, + saved_model_signature_key: str) -> meta_graph_pb2.MetaGraphDef: + """Loads a `tf.MetaGraphDef` in TF1.""" + with session.Session() as sess: + meta_graph = saved_model_loader.load( + sess=sess, + export_dir=saved_model_dir, + tags=saved_model_tags, + ) + output_node_names = [ + tensor.name.split(":")[0] for tensor in + meta_graph.signature_def[saved_model_signature_key].outputs.values() + ] + graph_def = ( + convert_to_constants.convert_variables_to_constants_from_session_graph( + sess, meta_graph.graph_def, output_node_names)) + meta_graph.graph_def.CopyFrom(graph_def) + return meta_graph + + +### Test Classes +class TestResult( + collections.namedtuple("TestResult", + ["outputs", "latency", "trt_convert_params"])): + + def __new__(cls, + outputs: Mapping[str, np.ndarray], + latency: List[float], + trt_convert_params: trt.TrtConversionParams = None): + return super(TestResult, cls).__new__(cls, outputs, latency, + trt_convert_params) + + +class _ModelHandlerBase(metaclass=abc.ABCMeta): + """Base class for running a model.""" + + def __init__( + self, + *, + saved_model_dir: str, + saved_model_tags: Sequence[str] = DEFAULT_SAVED_MODEL_TAGS, + saved_model_signature_key: str = DEFAULT_SAVED_MODEL_SIGNATURE_KEY): + self._saved_model_dir = saved_model_dir + self._saved_model_tags = saved_model_tags + self._saved_model_signature_key = saved_model_signature_key + + def __str__(self) -> str: + return "Directory: {}; Tags: {}; Signature: {}".format( + self._saved_model_dir, + self._saved_model_tags, + self._saved_model_signature_key, + ) + + def __repr__(self) -> str: + return "{}({})".format(self.__class__.__name__, str(self)) + + @property + def input_tensort_names(self) -> Sequence[str]: + """Names of input tensors.""" + + @property + def output_tensor_names(self) -> Sequence[str]: + """Names of output tensors.""" + + @abc.abstractmethod + def generate_random_inputs( + self, + batch_size: Optional[int] = None + ) -> Mapping[str, Union[np.ndarray, framework_ops.Tensor]]: + """Generates mapping from names to input tensors.""" + + @abc.abstractmethod + def run(self, + inputs=None, + warmup_iterations: int = 10, + benchmark_iterations: int = 100, + allow_to_use_gpu: bool = False) -> TestResult: + """Runs the model with provided or randomly generated input tensors. + + Args: + inputs: Mapping from names to input tensors. If `None`, ramdomly generated + inputs will be used instead. + warmup_iterations: Number of inferences to warm up the runtime. + benchmark_iterations: Number of inferences to measure the latency. + allow_to_use_gpu: Whether it is allowed to use GPU or not. + + Returns: + `TestResult` summarizing timing and numerics information. + """ + + +class ModelHandlerV1(_ModelHandlerBase): + """Runs a model in TF1.""" + + @property + def meta_graph(self) -> meta_graph_pb2.MetaGraphDef: + return load_meta_graph( + saved_model_dir=self._saved_model_dir, + saved_model_tags=self._saved_model_tags, + saved_model_signature_key=self._saved_model_signature_key) + + @property + def input_tensor_info(self) -> Mapping[str, meta_graph_pb2.TensorInfo]: + return self.meta_graph.signature_def[self._saved_model_signature_key].inputs + + @property + def output_tensor_info(self) -> Mapping[str, meta_graph_pb2.TensorInfo]: + return self.meta_graph.signature_def[ + self._saved_model_signature_key].outputs + + @property + def input_tensort_names(self) -> Sequence[str]: + return [info.name for info in self.input_tensor_info.values()] + + @property + def output_tensor_names(self) -> Sequence[str]: + return [info.name for info in self.output_tensor_info.values()] + + def generate_random_inputs(self, + batch_size: Optional[int] = None + ) -> Mapping[str, np.ndarray]: + return { + tensor_info.name: _generate_random_tensor_v1(tensor_info, batch_size) + for tensor_info in self.input_tensor_info.values() + } + + def run(self, + inputs: Optional[Mapping[str, np.ndarray]] = None, + warmup_iterations=10, + benchmark_iterations=100, + allow_to_use_gpu=False) -> TestResult: + inputs = inputs or self.generate_random_inputs() + config_proto = None + if not allow_to_use_gpu: + config_proto = config_pb2.ConfigProto(device_count={"CPU": 1, "GPU": 0}) + with session.Session(config=config_proto) as sess: + importer.import_graph_def(self.meta_graph.graph_def) + try: + for _ in range(warmup_iterations): + sess.run(fetches=self.output_tensor_names, feed_dict=inputs) + latency = [] + for _ in range(benchmark_iterations): + before = time.time() + outputs = sess.run(fetches=self.output_tensor_names, feed_dict=inputs) + latency.append(time.time() - before) + except Exception as exc: + raise RuntimeError("Failed to run model inference!" + "Model information: {}".format(str(self))) from exc + outputs = dict(zip(self.output_tensor_names, outputs)) + return TestResult(latency=latency, outputs=outputs if inputs else None) + + +class _TrtModelHandlerBase(_ModelHandlerBase): + """Base class for converting and running a model.""" + + def __init__( + self, + *, + trt_convert_params: trt.TrtConversionParams, + saved_model_dir: str, + saved_model_tags: Sequence[str] = DEFAULT_SAVED_MODEL_TAGS, + saved_model_signature_key: str = DEFAULT_SAVED_MODEL_SIGNATURE_KEY): + super(_TrtModelHandlerBase, self).__init__( + saved_model_dir=saved_model_dir, + saved_model_tags=saved_model_tags, + saved_model_signature_key=saved_model_signature_key) + + self._converter = self._create_converter(trt_convert_params) + logging.info("Converting to TensorRT!") + self._check_conversion(self._converter.convert()) + + self._trt_convert_params = trt_convert_params + self._conversion_is_saved = False + + @abc.abstractmethod + def _create_converter(self, trt_convert_params: trt.TrtConversionParams): + """Creates a converter for the corresponding TF version.""" + + @abc.abstractmethod + def _check_conversion(self, conversion_output): + """Checks if conversion output has any TensorRT engines.""" + + def _check_contains_trt_engine(self, graph_def: graph_pb2.GraphDef): + if "TRTEngineOp" not in [node.op for node in graph_def.node]: + raise RuntimeError("Failed to convert to TensorRT! " + "Model Information: {}".format(str(self))) + + def __str__(self) -> str: + base = super(_TrtModelHandlerBase, self).__str__() + return "{}, TrtConversionParams: {}".format(base, + str(self._trt_convert_params)) + + @property + def trt_convert_params(self) -> trt.TrtConversionParams: + return self._trt_convert_params + + def save(self, + output_saved_model_dir: Optional[str] = None, + overwrite=True) -> None: + if self._conversion_is_saved and not overwrite: + return + output_saved_model_dir = output_saved_model_dir or tempfile.mkdtemp() + logging.info("Saving TensorRT model to %s!", output_saved_model_dir) + self._converter.save(output_saved_model_dir) + self._saved_model_dir = output_saved_model_dir + self._conversion_is_saved = True + + +class TrtModelHandlerV1(_TrtModelHandlerBase, ModelHandlerV1): + """Converts a TF1 model with TensorRT and runs the converted model.""" + + def _create_converter(self, trt_convert_params: trt.TrtConversionParams): + conversion_nodes_denylist = self.output_tensor_names + return trt.TrtGraphConverter( + input_saved_model_dir=self._saved_model_dir, + input_saved_model_tags=self._saved_model_tags, + input_saved_model_signature_key=self._saved_model_signature_key, + nodes_denylist=conversion_nodes_denylist, + max_batch_size=trt_convert_params.max_batch_size, + max_workspace_size_bytes=trt_convert_params.max_workspace_size_bytes, + precision_mode=trt_convert_params.precision_mode, + minimum_segment_size=trt_convert_params.minimum_segment_size, + is_dynamic_op=trt_convert_params.is_dynamic_op, + maximum_cached_engines=trt_convert_params.maximum_cached_engines, + use_calibration=trt_convert_params.use_calibration, + ) + + _check_conversion = _TrtModelHandlerBase._check_contains_trt_engine + + def run(self, + inputs: Optional[Mapping[str, np.ndarray]] = None, + warmup_iterations=10, + benchmark_iterations=100) -> TestResult: + self.save(overwrite=False) + logging.info("Running with TensorRT!") + test_result = ModelHandlerV1.run( + self, + inputs, + warmup_iterations, + benchmark_iterations, + allow_to_use_gpu=True) + return test_result._replace(trt_convert_params=self._trt_convert_params) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py new file mode 100644 index 00000000000..0e930130403 --- /dev/null +++ b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py @@ -0,0 +1,79 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Runs sample models with TensorRT and analyzes numerics and timing information.""" + +import os + +from absl import app +from absl import logging + +from tensorflow.python.compiler.tensorrt import trt_convert as trt +from tensorflow.python.compiler.tensorrt.model_tests import model_handler +from tensorflow.python.framework import ops as framework_ops +from tensorflow.python.platform import test as platform_test + +DEFAUL_TRT_CONVERT_PARAMS = trt.DEFAULT_TRT_CONVERSION_PARAMS + + +def _get_mean_latency(result: model_handler.TestResult): + return (sum(result.latency) / len(result.latency)) * 1000.0 + + +def run_all_tests(): + """Runs all sample model with TensorRT FP32/FP16 and reports latency.""" + # The model_configs contains (saved_model_dir, batch_size) for each model + model_configs = ((platform_test.test_src_dir_path( + "python/compiler/tensorrt/model_tests/sample_model"), 128),) + model_handler_cls = model_handler.ModelHandlerV1 + trt_model_handeler_cls = model_handler.TrtModelHandlerV1 + default_trt_convert_params = DEFAUL_TRT_CONVERT_PARAMS._replace( + is_dynamic_op=False) + for saved_model_dir, batch_size in model_configs: + base_model = model_handler_cls(saved_model_dir=saved_model_dir) + random_inputs = base_model.generate_random_inputs(batch_size) + base_model_result = base_model.run(random_inputs) + trt_fp32_model_result = trt_model_handeler_cls( + saved_model_dir=saved_model_dir, + trt_convert_params=default_trt_convert_params._replace( + precision_mode=trt.TrtPrecisionMode.FP32, + max_batch_size=batch_size)).run(random_inputs) + trt_fp16_model_result = trt_model_handeler_cls( + saved_model_dir=saved_model_dir, + trt_convert_params=default_trt_convert_params._replace( + precision_mode=trt.TrtPrecisionMode.FP16, + max_batch_size=batch_size)).run(random_inputs) + + logging.info("Base model latency: %f ms", + _get_mean_latency(base_model_result)) + logging.info("TensorRT FP32 model latency: %f ms", + _get_mean_latency(trt_fp32_model_result)) + logging.info("TensorRT FP16 model latency: %f ms", + _get_mean_latency(trt_fp16_model_result)) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + + os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False" + + logging.info("Running in TF1 mode. Eager execution is disabled.") + framework_ops.disable_eager_execution() + + run_all_tests() + + +if __name__ == "__main__": + app.run(main) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/sample_model/saved_model.pb b/tensorflow/python/compiler/tensorrt/model_tests/sample_model/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..72cb4e94b763afa97802f5728ae844438bf852a9 GIT binary patch literal 12490 zcmd;J6gry7wT@M23M*F~7rRqtVzFaNiVznUYguAWX{r)qh!7_iOHyWHu@Ym56gL-J zerZWTX^9eJh>`*qV~7w-No7H*l_;06ksu=zGYcyl2PZcVFQ1^0u$Z`#E*E!7Vo73r zT7FS(Vu=u2aY<2TUb+;U5R0EjxU&@>7Y~Fb#Ny}d5zb}7#p;}&S6m{+54VOj1!TSw zKh!xwY$d6A#rZ`_yj&0&i2Yo%nYh@T^YhA#TtF_*%!9a4h?|QoEitr;8AWYeayLr?Y2>k`k9- zYDGa#W^!gpJX9O9T~bV33?zn{k^&cJN@h-CNoIZ?icuU~Y*H+YjEsz2dR!cyDXDoS znI)CT!Gju`obV9ga^~U=$Vp62&B)J5NiD*R4b;F_65(Pk&PXgs6@pMwELKbsj9ji< zEJ3L`r9!-19BHYEC8b5F;1ndl#hsX3l350EFDMzwz%8~CJS3sUgPm3^B+DgjB`L(s z@gD{lg_t;GpqN>U8dXkU>?G!g=>=<7jH5s&86sPrxxX<<_JMjua+8yE5ThpdpTZSv-z-~-};5# z+PxJCGJDUQn7wzxeOViR$CZ2ktO>NOOX;;$U<5`?0pnC#rDU>pEeFF5A0E3S!KKA?(aQQUhlJgv)*9uk-d}l zew)^5+vB{=wr$^>y{A@O-CO2bz4w64Uz;oKO?#XtRqXxJymRkbwh4Qirrq8fD!qEI zglxv1O}>Y01+@QL|EoD?o6yv4JEy1IHeuTQJrP$T_9!@?u$j@@W8+hGY>$uqRa@Th zjJ-O~{_ZK5qG5Yu53`Mphi&t%sV* zUYV5hd*_y|-K)3zp6w2f>9!YUc3I!(cx}VucX4mj!C8Bsu(sKLm~&{)nR%(UGZ;ko zDs*t|z7*7Lt8o31jle`B+Z$Kw_B@bEvAr`r*Y?Yb2-^a#H`aT^8uvDNSMJT7oW0lJ zNVRQ~E%)B)*}L}YnW^n7 zjpG?xEw}x)8IyO~nlu#KPSSs3yP>eq=7!87n~d8$Hrxtndw(U(v|X@fqRstXIeXn^ zZMB`fz;EvcS+_m=`wI4My?J%7fQE-{$hr#K3GJ=6PHRrvDwcQdU6!b~x9zOS-Yd(S z_CD*mVEZK`#P-=%mc50syY?2W?X!vL*}OL=n#EQiAm8R)Vy`XprU!cqmNoAcFetWd`-ugdzZ@~BGHVu{{wja*Eve{9)!e+sz8e5V2nR_}oHroEPp1C(H zZG)}BnQU7g<2|;w_Jr&eNcv<`H+$3Gq9yfvl??m$Zsktd8*#1O*5|s)-VE=>HY+<< z?e!>JXEQZ@_ud2kb8XgLzOa|$AFplEE5p67WM}L>pgP+sV$E{fHNTeF_OYMZTQ+6J z-nMwRy?ZWk?=6{gdCz6;D|;XHEV31t_hXL~v(Vm41)O^oW~bWhSoy|AW4VEi!=4#? zlEn-5_Bb@}`QZ3)&m^ZwwgpcQ*+i^cVQa$IwP%;E-d>M)Ww!qwcG=Fc;Ip04$ZfNn zxpr^g zdj-#)i&a%l}!Z4YMYMe?KZX=YxXXXEVl`8QQY$(eur(5#<{)g=g04D zJIAzF?4Rr2DZ3U z3+&xwH+wJ3^Jd#x^`m2U5XcyM&j>|@*aetsFd_gBLWTlPCU_A=C3?%_E#e=mb&nTU z*cy~=+~X18WIN&HJllQ8j@kNz80`JA{g+J)bE+-ZWUjqp9j9&8VsiIx2!CW9p>@>u zhyKF7-`G~~eGp-6YqHvM@2v@3dmXmUu`P8<-}^ze(B_50{XH+vPO^Q#pt|R0^2WU< zyz6Y`e$BDHU^H{LK-LD^h_-3A0juBJFxW)zu|9rgufd%=d*Uv%?Coi)+Iwa?hwU)| zW?@bSp*D|dTsP5ig)nZfL0TiI8(cY@DkTMzjT8~MdY_8uwBw#~@q zu$^#WkF82&?_U4(n|t3h>Dzwdda_sKaJa1q?{3?W;MaS;{5RXnuq%4^w<8B_ALNMJ zJ}{WPTg7two&^im+q_{sZTrQm&US&ycH6$z8G9Ww>J~+Hr&-RpUqeIzV zA6c!vJD$nfCPe4$Zr~`i`KT4LSMFiEZTP3PdzVSR_qFZpYG58Mv7HCf|fTafn5#%rVd-U&*pZP&=0+GFy( z+4e(rn=M1O;oeQ#7uYsDs%bjtPv-^JY~i#OSNtT?-8kH^Nn7EC;Q z*BIQ}6Z2<@ZISt8TZ1KOwmPo$wvQ57_r8!3x4psbGhr4BFek5NC5BPtJy+X{jv(&s{4e<70a_DEIAFIOn>j;nDIvZ?5;*CTO(Vrgh)k z>%z`%d(7HvZ_JIkdl@)lc4e-Zu~#K>m+i;d*Y`4<f^Q&zoy&D zUEaAjVvUbYOyizCEoS>{nLe-Id;U?|-c7US?UlLdWc%*>#J$VjN$p*-vfg%0n47Ib z$txSK)ed`w3-<1P5&z%DLB(TF&IdPJn`;+rOt2sHhPFDZD)zSAWZ2vAnZZV4iNqcivm<+y*BspY!te5)72Qkr7|7hV_Hl}}z1KW* zuTR|7Jt8qid#ffn?{&DCu)D%tW=~Y>9oqu|J8TMS&e*c(p4t22@G=_-x7B;&yruUx zOgg&v-XyEN3jObQ_pF?@=fc63J$0K;>|K%dz*Zsk*PaDCEqDKVa%1m~ZUx&90$R2? za|LX#U0WCqBnh&M6B9StC73Ny+&0lwa@8h#gdmo;O+IxYq zWB1FEnV}%PMwjulVk>d*2-4x9vWEV6Vmdg|-Qge($z6 zTfFz$gFUt_TNG_w!W{Qz%dXga@BS{^eG^XYwaEEwqfn*0$8T?st)kh1y&BEMwjQk4 z_Zr-KVk2`-a4*Z$roC204{g_27}&1(cVX|PS-1CYT5;4?LX+KAX8Ky&ccI5^84UTX z#coZpweg+4X99=#-b;R~Y(38C+y2{cw6|!+T-yoXt86OzWo>T+EZfVr>A+r-$eFeg zGq2g|)ZE>3;N=CI7Z3045h!_KbEMQ&PeYrnkH}NoCG+BKC(QG;J(81Y>ywsa zE4OQct%CX=>j(EU_q6$Kv{hK+vira}R+|@lcG~oKde~|_6x_q$b#r$>$qJh}94WR1 z#(aCO+$*qs@R(<>T*zEollA-eUP=A5r|nsft&?t>EyE&9tE=smdl~$Dc7^I3+N;1C zYFqL~YOj|35!(r?r`j4A`s}$=?r!^`B4JOJ zR!lD4b4kC&cE`+uIN&YAdmOu8l&Z#@?_uXKXL5jD_GYZ=xBYN)?jFs~ zOMB(=8*Lk+O?EA~ld@Z2{Q+B@%4@daH*f8|F>B9WzZ;wPR-6&HWw_~K<+}39UX9CH zwo7&%+0#=PzjsE~FB^}q`L+=pw`}exMeQwM-Dt}pCb7pwY_sj2)#vuC*z99twoz1#XwQSh&fOgs=j=78I=)xp<}{lN&-(Y; ze4n>hxckE1mW-~w6ZV+c{$1&}H<^Fy-aBD8_DXbYu&G+OcCSXfi_Kf6w!J=#F1819 zUhL-lb$9QnpA&2g0#x=qTiUR9+POP>k904yEtp`pS3z8KZ~M-bdo$0q?S1gD#k%0D z_g)*uBepud2DTM1oNO3uxb`G?tM2w;n6xM1%>O+R?0S10*6`V^(7m;%t59L@67GAp zKXPa6<(hkY?VMxY(-{Y*;5jiYkNfN+TLx&8*SHBKiYdJylZdVlIeRR7VfZl zae1pvo9GkU2OJGHI$i9x2mHR;On4e>n=<#(-W#tQZH)qr_i}u;-|JxNw70IU%l3i8 zmOUIr!nO=?&3hkc+}q95cxZ2$f4}XHGiz=3oVaKE^zr7s7j8A~mO034Ymw+@)A3E! zc8i&kZNL%<+Y@)CY;1nH*)p|GvUPB{WtHF{x_2SpuDw>%O!k`P9kgB3ExxzsXYXFO zQ}b-MUEX55VDIWZ3Y~ZNK6{kES4?+;t;?Q+d$uHc+0K)=wpU~CgFPZ5y0*{e?XsOO z{dBKM%mdrX+Kqd6l$P5vthul!#5~USiHO-=o&249chsiZG9)_fu5(^u`{0Vn9)rey z+ec59*zWkLY9n{+oUMTNhTRQc^Y@%6Kenf7!CG6ll*Ya0e)slzeE4g#$l!vl#+ws1 zJ3J5DT+3QzJM}=q-WS@1mDE5Enc2Bgfe^$DA|=i2vgwsq0GX3M2DZ||iz z?Y#=pr}o;c*WSBFV$R;AkSTjNv~}5>XrE}SAkuGB^WxN=WS;4JEyP&%+8YM$-EsYk z&4=a8duL@IvAr;VgYBs+H*I%JYOx7o>Dp_MbIEqq#4Wa2KScMw+xF1*#c!#-O9YB- z_xxOIYwLe&Z^8=~>wjk&_AWcL+P31Z$XoD9?(Nmk6SQUhb#L#685{QgSopw} zAwF+UOYI@s6=H{Mcf8nUqxMB_?~JGMdsvpc?3H-vyjSCo-rgs+yKFaDI_>cl%h{`7 z?P(jDuy*ei=AC;RJa$+Iywl#Rz?8dp+r-+v2|p+A$vQu4Z;y|!E$i9)dw;CHY-2R% z#9kGT*|s5nJoir8bH;YY zCfPVd+Sn-P&e*GVBWbUD^P;^~DQj&f=q|Qx*cNBQ^NhpR;vSFf_RMp8HFOnhr_A5J zr+C+~y((9)*>=n_*~=oo$#!dU-rlNH&U>rxb?+534%;hWE3$XbZ6VuFF1&jebjI2o zke<0W#^j;x4rv+NjJ7GZ4yB#8@o!J;4G6hvt5M*-*JknQJuU+EwhiC5?L8uqV!Ola zkIfOIoV}A0yZ3IhKe?B|Gh|oFdj;Dhj+J|3jArb;pmT0-O{ewmfMO7_!){zt7`eEiIzhwQuu(fHj8QudE;cI`%#pfK*g&0+ zlNSf05Lkth0|SMCE00S6$yMmVg6v{Mpvb`kMTAR8NB}udKwi_r<0r0PjPT)0=*T&A z1YHVwBvlGaY$5C?a+p;@h=~J)1++xbCIleE|5~ERL)h?fYb|kXgXK(I3|c~5@G*CG z=oq_}Fk2@*VF4jwm` zLUSMawsL(@;o^sQHr|lvp=!_&H3)-7p*c{8s+Xw1a=Ym5!kif6XlF2(FUVJ)#wA%em7n$v3A8I|X4o*i$t4e(*z^J>R^+YJ{y zZ0V)sH&bRX*)CA(d(I&8TrmfIdXe|vAEsE8edKG)u04R$+) zAiurcb!YY#%+=bLkh#P9#)+jiU4I*GZwPDdn|zVQuAx79uffWTdv$gR+A<^*S#S1H zuoEktZM(QraNo3hM{Ez=Zniz!!e}?k;h3$I(h=K$^%HGh$erJFNq?v9r%{)j_QkEMduHrEut%rRc<-0D4z|pX#rDmZ|76dDHR-kz#a(+MBpUZLw=(bR zTEEhEb{^Zl!ekLUA6J*XJeOH)zr{}58*D3KcY9{v-UOG4){L{2?GmM=?G8+9-up=O zku6V(;of!q;`@?958BS~Y_d6eMaeEtQ)^$tulak#Lhjo3to5;-^0nSp>|yO*2F?H0 z!V&EIZajRqha*yWU-S`GJ0HFZ+kny;dzs~x?XIQ#w%t5M+Rn^syKS4m&%L+Im)rKt zO4u9y`tx2R;oG*y6t>xF?YeEtz2ef|o{Qys#V+pIyKjf!z72D`_PWXZvEA)CjNZ3uN>)U&1=Wg3?EAsdL4qm!9D*vPdV6>doFJJ9!fuft}!eI}cY zZPy$Ww@u5I-se>I#8#uI#YQas(O!f3_Inb9wDzq~TV<1V|DbKPyRhA*>rZX7+4}e1 z*gnf9XJer4tB@(S8>B+^G~8XX_e&k0?S_=)dz&TL_f7r4ZWomOz;>a4u-y#jn>LkZ z(spg9{@5nI6|#HK>$+E2MQGpa7n}DcI33%~TdrkyY1K(vhwjyTj_EA5{pKRKPs`<@ zEl;@k-s**lcDp|d*v09~?mK3A&Gu5{TU)VD^K3=BOl`l_y6knhwsG%_{Kh>RPV@Ij zDRtRe-<`f!tjA{WG>y`|4LuL{2r$pFWi#p7TgLdwwmYO_uS2@S?zX;fwr3`L+Nx>q z-)hYxA)TX0`= z--cV)_c|Pyv}Z$~#h&a;Zo6O^{(T2_HtjWHT5o$mQE8upPM@uqT+ZH}Yzf;P?27wt zt8U%f=*D1oV9INo1p%|I4Su%TZrI?k$HX<-*6rN4y$sz#yCP;!u{pr|-6mnn#J$OC zl6F@)#O)ZOHTFuEz1X|qO46Pe$6nfm8E&!VIUH&m@$Zt&6ay~XWqRWK81gsnol+la z+we_ikH}{;+t-Va?|txl*WR@954Ja&-tD=@aC&cs!djc#ZlCsEelELDS&+q!AtJ>( z#9CzU1P85s47&1rHk9t!d)bp|AE%VCT|+0^z6+J}Y;(4-?p={JVb7Pd?0c&>$nM+l zUCPE`wae}qFZbFoY~HaaWB#-~8xrmJ9GJY*ra)rv-ebT1?|pRTzU_)R=lAU1Ew%61 zsztOd~&#nArwo~*t_r{j<+qJQ;vklsH!nWw?Mq9Q@rF{ysBW=UZrtEF=I|GGJes99w>((U>6}A&fTlStVmD#sz=0@9F?U!ud9$~XaneDR<%38uxhV?=}KF^H$3|$ zpJB8+wrh&*3gKCMUi8e_W7e|Q_KGiu?a2~0I}uekTZUElEmvJ`vkja1elJ_Z@x5Z{ zjQhm;=j>hZ>AW>t%ly4Fq#xKc{8!y~pl7koI!?iT)zd%hO)L7aSNrG0z1-(C>^|A; zvVGz9$R^5qqHRO@(Y+1_v-Ua^3+%pN$Y--5Zi;QghTVI_L@wAK_;zUT0j{ZgCw!6F z7x2H}A+czayYbZqJ3E@irS+#P__=J7cpUp?*)%mh-j@`pWx! zg5>tT{(5rnq@5RRUy0wb{g!vu_JZsBy(PD+Z5RB>*qfGdeebo+y|!X+RQ7$`Bx5%_ z{O{g1i<9?$dwR#VY5hIhhKTe%J;rvn1@?WmI(enG#b2fNMX%`C>-KiK?TprwHqk=u zd;jSu?z0wQ*yrZ(es4kc&%JE3=Ij+X*t)li<+Sa+54`&f!X)-g<7u?bD15MILf@;s z+SLp8cF(xHmr3W0t@Vy8dwuqX+Maoyxwq$8jIGWcCEM=Gb$c7O&9%DqZL+QH98tS{ ztEBfa{X1*x@cQFk?rxra8-iqQ7JTX2yCOtpA47-Su7zs-#6ptj6EAR zO764k<=baHan;^qtkZ0@?Ecug9lT^)_(k0A*v)CS4NY=;HvKqZ%Qd0LHix0oHetgB zYll}K_s(YN*qgxe$$Ccr^F0|`9@|JW@a{W#g56H&=386m83Oy(1Q^@uR0(LubnshzJ^!LyB@5uwspw-vbSu`WZP-sv-T#ejm9A1HFVoL zZQO1f;j(7W~4ckLCq?`_La z$YPWEk<%_puFdwvp+z2pT+m9~o{d|Gl?pWD8+f#41+rE9H zY4>gWC0p%yp?z!KE7*$el(0+IIR!u$YZLR%8?TVwt?QZpcwY~dN-mXDs z(;l8Im%ZM43-?}5l()NZ$9K<0BLTb5bNTm$J*nL*{b}Fcgb=&EyM*@IPC9lhx#HP)*HSk@x_d(M8y?qB5 z_Z>ChvTKN1V>8M9fNjs2SlcxnCAJEEakgeh-r5H2tF$>_z_gD+W$E6w)SY{`wH?~a z-8XZuu`2IA!}lt74J_*W60WA~&HC`h_Gu%Zo%XTIdmqeszE`7wXJ5jC*SmGhm29Wk z$?tnCAi3{!gu*`ErLuMoyp#6kl!@+jSjek?TzhszAsIw>N1)%c$5Wysgw zyD@;lZoyF{+Yoz>y$&lnY!znI>{Xt}ZI>|r#crl~*KE%w%Gy~LaoUAg*xO2%O7COf zO50<-fzz&G*LTz#`Fo1+A^N*M6S0O>>#PS5Q#JZkzUNTL#Gr zn+UJ3dlHhoZ4*)o_jXR^u`7#n-77ZbyRAUUyuA!Nr1xxbm9~|-Jl!_I)X8>(l-HhR zv!>Xtb>g=xn|^<?F1CP~8dJ1#9bV9bO3T+wqEPUqG~q zO|=V?-4+SOy%M?iY*J6~*_r4o@6||{x+g7t@m_}6kv7uTXY5V*{KlF`BHH%X=Fhe| z)oyzeSpV$(@Qrm}!k1IK8@Bt}-adX~ufrX_ec#s1vo)BXW^*m=xb5K;()$dbNZS=` zdbamiqw>B}K09p>XdkzEv3L5O*WF9@IxN)N+fevzPjS7lo!b6WwhsiIZ6(+y?rGy* zzn3jtecuAb7rPlg`P*hW_t6EC>;+Ggc_$*cM8elZQym!Ou zrF)$>DA+xdd1Slro~WI~!i9UPo!ITRBvjeXn9jeCVVU{fgz&kxWqW7sU7+S|8}QiB zCT+$m+l=?fG?O720!MbE-gzW|1LYoGaEjG8BmfBvlSF~%0*}P}xM|nGg zo?;uhO!a+-w=wSvTf5HIGP&?a<<9BTwx<1>qF`u*z_>i+FI&$mY zh!Yd{EC_jPt?(#l?}Be`wmN4SZO!Te_DVP%+|#zUWv}NxO}pT7CcEf{BYSyxRQ7JQ z5U`u8$7Oe6OPozY|DW9wv6pN%FJQKN7Wv3FlSj<%^zQb(4(HeHt#jnwccXKf&6e{# zdk@r2vRSbGnRWL?VY`I*yVgyH_id{}KiCFz8|^XkKWTefN62p5p`N|#bd~oxObXij zp=R6O5}731wuJe6Z!P$0`}#unJvSVdAkn8 zaaw0F?Y6BFYp~t6bDOPCXO*qOZ9&`LLd|<~?ilT5FqydL&G&fQ4F-Pk+xlbRjdoBMk+ z)}OY?mi)4JwSaj-V%?V!ux-z+Yb$N9?fYxXc1B?z)9IJCXPmihjn`=IbJ+A` zZ`}6w~u(x4Rq0IwM!yziIFC))-~1)E(* zx@|9U-?B}Sct^FWed3WosJZ->ry#tYyI!VUWYH!_L|w0 z?RB{3xYyzj$G(V%b8Q@cr&+T-6W`|$&a+RTym#-0Z%j64epK&0SIxffWst00!vD~{ zaeV^&!V(|v^*Nupw^?4*PTJzv-iQbC`)r)zzP4VmD{RBg{M)-B z&d}yo)Ja>P)`Yzb1szt`nm^c@aAepX?q}ckZPS^(%Xr%NE?d9JcEbK;wi1Vy+Z?l- zYR**huzk?o2TOKeKi*loMBIQMyOmbP2^md!4z_m%A{ zu|C^vsXY5i*2~%YwDaxda2472VX6GSxN|D|+HNn{JEf=G)?u&OJ}067wgFB0HXj7u z?A3W)ymyL6knOW)5BECn7Pm{>scOfd?77$M&^p`SUtaHZ*z#siL-F-J+Fw}g4sh$( zW+iQ~U6AnET4CnXy%$QQZN5lK?@ch>ySE{(%BHLGkL}qd3U&-rcr1e&-`FbaO4~`u z-nNmtcH8!};mo}k+PL;vO}uOyz4`p!M;AZZIwU4rGjZ;)J+R=KO+#zO-hwro_6o`~ z*(n*lwcYYZ%eE{0k!`@enR~A>>+dz-^xAWHfyO?rHbyQHv|%UoQBCw=Da0U`5`1J+ zf=f(D1ZiXwK8A&7z!x+Q1;PSCOdK3sWw3$Hctd^AuqfdXU$g-)Q`mTyj*}L2#Fs-! zfzW_2R|#yW8)`4Q@1aB8P+5fEaSwIFl;R%O<>30qh&0*<8N(EUkL*I?LmxEED1|k0 z5UwC{Y*$Q(i35Z=2F=*+L`K+1dc2{&6LdX0JCq@XNOF*{femm&gBBb|R1XBMM5H7L z3Vb2R7%y~e8R1UyZRUzaTI7u}of2;dj&t&j7h+Ii(3-=@#SdDJotc*&pOTuESejEJ z)WOJQ%Eby^aWAC9#f!9V+{!?JQ405xYawIUB7Y%GE-u*mbnKcy2@Hfeq@*D0|I>2v R%k`m7(Jv@UP036y0RV6JO=|!E literal 0 HcmV?d00001 From 4e82719ae3b7296447e7bb0e857adca23cc4ed31 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Tue, 10 Nov 2020 10:51:13 -0800 Subject: [PATCH 114/220] Switch tf.While back to using an explicit 'output_shapes' attribute. tf.While allows for different input and output shapes (via dynamic shapes) using its 'output_shapes' attribute. Swapping out the derived attribute for an explicit attribute allows retaining information from the Graph import. Custom builders are added which set `output_shapes` from result types. PiperOrigin-RevId: 341652717 Change-Id: I83a8b6a2fd8f5972a405324bae752ca8280c53d6 --- .../compiler/mlir/tensorflow/ir/tf_ops.td | 13 ++++++- .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 37 ++++++++++++++++--- .../mlir/tensorflow/tests/canonicalize.mlir | 18 --------- .../mlir2graphdef/functional-while-ops.mlir | 4 +- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 8f96f849b8f..61a55c3534d 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -684,6 +684,7 @@ body: A function that takes a list of tensors and returns another FlatSymbolRefAttr:$cond, FlatSymbolRefAttr:$body, + DefaultValuedAttr:$output_shapes, DefaultValuedAttr:$parallel_iterations, // Used to map StatelessWhile and While op defined in TensorFlow to a common @@ -696,12 +697,20 @@ body: A function that takes a list of tensors and returns another ); TF_DerivedOperandTypeListAttr T = TF_DerivedOperandTypeListAttr<0>; - TF_DerivedResultShapeListAttr output_shapes = TF_DerivedResultShapeListAttr<0>; let verifier = [{ return Verify(*this); }]; - let hasCanonicalizer = 1; + + let builders = [ + OpBuilderDAG<(ins "TypeRange":$output, "ValueRange":$input, + "FlatSymbolRefAttr":$cond, "FlatSymbolRefAttr":$body, + "IntegerAttr":$parallel_iterations, + "BoolAttr":$is_stateless)>, + OpBuilderDAG<(ins "TypeRange":$output, "ValueRange":$input, + "StringRef":$cond, "StringRef":$body, + "uint64_t":$parallel_iterations, "bool":$is_stateless)> + ]; let extraClassDeclaration = [{ // Get the condition function. diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 4e2fdbd0014..ca54b817b70 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -2590,12 +2590,37 @@ static LogicalResult Verify(WhileOp op) { return success(); } -//===----------------------------------------------------------------------===// -// WhileOp canonicalization. -//===----------------------------------------------------------------------===// -void WhileOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert>(context); +namespace { + +ArrayAttr GetShapeArrayAttrFromTypes(mlir::MLIRContext *context, + TypeRange types) { + SmallVector shapes; + shapes.reserve(types.size()); + for (Type type : types) + shapes.push_back(ShapeAttr::get(context, type.cast())); + return ArrayAttr::get(shapes, context); +} + +} // namespace + +void WhileOp::build(OpBuilder &builder, OperationState &result, + TypeRange output, ValueRange input, FlatSymbolRefAttr cond, + FlatSymbolRefAttr body, IntegerAttr parallel_iterations, + BoolAttr is_stateless) { + ArrayAttr output_shapes = + GetShapeArrayAttrFromTypes(builder.getContext(), output); + build(builder, result, output, input, cond, body, output_shapes, + parallel_iterations, is_stateless); +} + +void WhileOp::build(OpBuilder &builder, OperationState &result, + TypeRange output, ValueRange input, StringRef cond, + StringRef body, uint64_t parallel_iterations, + bool is_stateless) { + ArrayAttr output_shapes = + GetShapeArrayAttrFromTypes(builder.getContext(), output); + build(builder, result, output, input, cond, body, output_shapes, + parallel_iterations, is_stateless); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index e77dd365abf..7cb4aefd28c 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -1259,24 +1259,6 @@ func @testIfDropOutputShapes(tensor, tensor<2xf32>) -> tensor<2xf32> { return %1 : tensor<2xf32> } -// Check that output_shapes attribute is removed for tf.Whileß -func @testWhileCond(tensor<*xf32>) -> (tensor) -func @testWhileBody(tensor<*xf32>) -> (tensor<*xf32>) -// CHECK-LABEL: func @testWhileDropOutputShapes -func @testWhileDropOutputShapes(tensor<*xf32>) -> (tensor<*xf32>) { -^bb0(%arg0: tensor<*xf32>): - // CHECK: "tf.While" - // CHECK-NOT: output_shapes - %1 = "tf.While"(%arg0) { - cond = @testWhileCond, - body = @testWhileBody, - is_stateless = false, - output_shapes = [#tf.shape<>] - } : (tensor<*xf32>) -> (tensor<*xf32>) - - return %1 : tensor<*xf32> -} - // CHECK-LABEL: testNMSV3ToNMSV4 func @testNMSV3ToNMSV4(%arg0: tensor<3x4xf32>, %arg1: tensor<3xf32>, %arg2: tensor, %arg3: tensor) -> tensor<2xi32> { %max_size = constant dense<2> : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir index c7a4630d985..4bb324d0d85 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir @@ -2,8 +2,8 @@ func @main(%arg0: tensor, %arg1: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) { %0:2 = tf_executor.graph { - %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatefulWhile") - %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatelessWhile") + %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false, output_shapes = [#tf.shape<>, #tf.shape<5>]} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatefulWhile") + %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true, output_shapes = [#tf.shape<>, #tf.shape<5>]} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatelessWhile") tf_executor.fetch %outputs_2#1, %outputs_4#1 : tensor<5xf32>, tensor<5xf32> } return %0#0, %0#1 : tensor<5xf32>, tensor<5xf32> From 4d52dc52845e88e0188382a86008f83599d1c7eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 10:55:33 -0800 Subject: [PATCH 115/220] Update `tf.saved_model.load` docstring. PiperOrigin-RevId: 341653654 Change-Id: I3346b8b8cdd222cd177ab9a94b60b6b7946df9fe --- tensorflow/python/saved_model/load.py | 86 ++++++++++++++------------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py index 381fe95bff0..03a10487130 100644 --- a/tensorflow/python/saved_model/load.py +++ b/tensorflow/python/saved_model/load.py @@ -771,45 +771,47 @@ def load(export_dir, tags=None, options=None): Signatures associated with the SavedModel are available as functions: - ```python - imported = tf.saved_model.load(path) - f = imported.signatures["serving_default"] - print(f(x=tf.constant([[1.]]))) - ``` + >>> class Adder(tf.Module): + ... @tf.function( + ... input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) + ... def add(self, x): + ... return x + x + >>> model = Adder() + >>> model.add(tf.constant(1.)) + 2.0 + >>> tf.saved_model.save(model, "/tmp/adder") + >>> imported = tf.saved_model.load("/tmp/adder") + >>> f = imported.signatures["serving_default"] + >>> f(x=tf.constant(1.)) + {'output_0': } - Objects exported with `tf.saved_model.save` additionally have trackable - objects and functions assigned to attributes: + Any trackable attributes on the exported object will be restored on load: - ```python - exported = tf.train.Checkpoint(v=tf.Variable(3.)) - exported.f = tf.function( - lambda x: exported.v * x, - input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) - tf.saved_model.save(exported, path) - imported = tf.saved_model.load(path) - assert 3. == imported.v.numpy() - assert 6. == imported.f(x=tf.constant(2.)).numpy() - ``` + >>> exported = tf.train.Checkpoint(v=tf.Variable(3.)) + >>> exported.multiply = tf.function( + ... lambda x: exported.v * x, + ... input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) + >>> tf.saved_model.save(exported, "/tmp/exported") + >>> imported = tf.saved_model.load("/tmp/exported") + >>> imported.v.numpy() + 3.0 + >>> imported.multiply(x=tf.constant(2.)).numpy() + 6.0 _Loading Keras models_ - Keras models are trackable, so they can be saved to SavedModel. The object - returned by `tf.saved_model.load` is not a Keras object (i.e. doesn't have - `.fit`, `.predict`, etc. methods). A few attributes and functions are still - available: `.variables`, `.trainable_variables` and `.__call__`. + Keras models are trackable, so they can be saved and loaded via SavedModel. + The object returned by `tf.saved_model.load` is not a Keras object, however + (i.e. it doesn't have `.fit`, `.predict`, etc. methods). A few attributes and + functions are still available: `.variables`, `.trainable_variables` and + `.__call__`. - ```python - model = tf.keras.Model(...) - tf.saved_model.save(model, path) - imported = tf.saved_model.load(path) - outputs = imported(inputs) - ``` - - Use `tf.keras.models.load_model` to restore the Keras model. + To restore a full Keras model along with all its attributes and functions, + use `tf.keras.models.load_model` instead. _Importing SavedModels from TensorFlow 1.x_ - SavedModels from `tf.estimator.Estimator` or 1.x SavedModel APIs have a flat + SavedModels from `tf.estimator.Estimator` and 1.x SavedModel APIs have a flat graph instead of `tf.function` objects. These SavedModels will be loaded with the following attributes: @@ -830,14 +832,16 @@ def load(export_dir, tags=None, options=None): * `.restore(save_path)`: A function that restores variables from a checkpoint saved from `tf.compat.v1.Saver`. - _Consuming SavedModels asynchronously_ + _Making sure a SavedModel is ready to be loaded_ - When consuming SavedModels asynchronously (the producer is a separate - process), the SavedModel directory will appear before all files have been - written, and `tf.saved_model.load` will fail if pointed at an incomplete - SavedModel. Rather than checking for the directory, check for - "saved_model_dir/saved_model.pb". This file is written atomically as the last - `tf.saved_model.save` file operation. + When exporting a SavedModel, TensorFlow first creates `export_dir` and then + writes a number of additional files. Calling `tf.saved_model.load` on a + directory in a partially-written state will fail. + + If you would like to make sure a SavedModel is fully written and ready for + loading, check for the presence of `"saved_model_dir/saved_model.pb"` rather + than `export_dir`. This file is written atomically as the last step in + saving. Args: export_dir: The SavedModel directory to load from. @@ -848,10 +852,10 @@ def load(export_dir, tags=None, options=None): loading. Returns: - A trackable object with a `signatures` attribute mapping from signature - keys to functions. If the SavedModel was exported by `tf.saved_model.load`, - it also points to trackable objects, functions, debug info which it has been - saved. + A trackable object with a `signatures` attribute mapping signature keys to + functions. If the SavedModel was exported by `tf.saved_model.save`, it will + also have attributes pointing to any trackable objects attached to the + originally exported object. Raises: ValueError: If `tags` don't match a MetaGraph in the SavedModel. From c9d79fa32f063753740ef0ff1d7be6b4b49e4b48 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Mon, 9 Nov 2020 23:39:24 -0800 Subject: [PATCH 116/220] Fix and clean up how we run the bluepill renode tests. * With #44457, we were not checking for a specific string on the UART, so even if a test failed, the test_bluepill_binary.sh script would still report everything as passing. The current changes checks for "~~~ALL TESTS PASSED~~~" on the UART. * Only run binaries with _test suffix as part of the test suite. * Use the Robot Framework variables mechanism for passing parameters from the command line, instead of using environment variables. * Added some pointers into the Renode and Robot Framework documentation. * We will do a more in-depth documentation in renode.md once we settle on how we are going to use renode (currently in flux). Tested that all bluepill tests pass. ``` make -f tensorflow/lite/micro/tools/make/Makefile clean make -f tensorflow/lite/micro/tools/make/Makefile TARGET=bluepill build -j8 tensorflow/lite/micro/testing/test_bluepill_binary.sh tensorflow/lite/micro/tools/make/gen/bluepill_cortex-m3/bin/ ``` Output: ``` ... +++++ Finished test 'bluepill.Run All Bluepill Tests' in 25.37 seconds with status OK ... Tests finished successfully :) PASS ``` Manually changed a test to fail and confirmed that the failuer was properly reported. --- tensorflow/lite/micro/docs/renode.md | 29 +++++++++- tensorflow/lite/micro/testing/bluepill.robot | 44 ++++++--------- .../micro/testing/test_bluepill_binary.sh | 54 ++++++++++--------- 3 files changed, 74 insertions(+), 53 deletions(-) diff --git a/tensorflow/lite/micro/docs/renode.md b/tensorflow/lite/micro/docs/renode.md index daf3159ff6c..6e411bdbcf0 100644 --- a/tensorflow/lite/micro/docs/renode.md +++ b/tensorflow/lite/micro/docs/renode.md @@ -11,8 +11,9 @@ https://github.com/ekalinin/github-markdown-toc#auto-insert-and-update-toc * [Running Unit Tests](#running-unit-tests) * [Under the hood of the Testing Infrastructure](#under-the-hood-of-the-testing-infrastructure) * [Running a non-test Binary with Renode](#running-a-non-test-binary-with-renode) + * [Useful External Links for Renode and Robot Documentation](#useful-external-links-for-renode-and-robot-documentation) - + @@ -76,3 +77,29 @@ failing. It may be useful to run binaries on Renode that are not tests, independent of the robot framework. We will be adding some documentation for that in this section. + +# Useful External Links for Renode and Robot Documentation + + * [Testing with Renode](https://renode.readthedocs.io/en/latest/introduction/testing.html?highlight=robot#running-the-robot-test-script) + + * [Robot Testing Framework on Github](https://github.com/robotframework/robotframework). For someone new to + the Robot Framework, the documentation can be a bit hard to navigate, so + here are some links that are relevant to the use of the Robot Framework with + Renode for TFLM: + + * [Creating Test Data](http://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#creating-test-data) + section of the user guide. + + * Renode-specific additions to the Robot test description format are in the + [RobotFrameworkEngine directory](https://github.com/renode/renode/tree/master/src/Renode/RobotFrameworkEngine). For example, + + * [Start Emulation](https://github.com/renode/renode/blob/master/src/Renode/RobotFrameworkEngine/RenodeKeywords.cs#L41-L42) + * [Wait For Line On Uart](https://github.com/renode/renode/blob/master/src/Renode/RobotFrameworkEngine/UartKeywords.cs#L62-L63) + is where `Wait For Line On Uart` is defined. + + * Some documentation for all the [Standard Libraries](http://robotframework.org/robotframework/#standard-libraries) + that define commands such as: + + * [Remove File](http://robotframework.org/robotframework/latest/libraries/OperatingSystem.html#Remove%20File) + * [List Files In Directory](https://robotframework.org/robotframework/latest/libraries/OperatingSystem.html#List%20Files%20In%20Directory) + diff --git a/tensorflow/lite/micro/testing/bluepill.robot b/tensorflow/lite/micro/testing/bluepill.robot index c216aa66438..0a31f085d38 100644 --- a/tensorflow/lite/micro/testing/bluepill.robot +++ b/tensorflow/lite/micro/testing/bluepill.robot @@ -8,51 +8,39 @@ Resource ${RENODEKEYWORDS} *** Variables *** ${CREATE_SNAPSHOT_ON_FAIL} False ${UART} sysbus.cpu.uartSemihosting +${RESC} undefined_RESC +${RENODE_LOG} /tmp/renode.log +${UART_LINE_ON_SUCCESS} ~~~ALL TESTS PASSED~~~ +${DIR_WITH_TESTS} undefined_DIR_WTH_TESTS *** Keywords *** Prepare Tests - [Documentation] Make environment variables avaiable in whole test suite and list files in ${BIN_DIR} + [Documentation] List all binaries with _test suffix and make available from test cases Setup - ${SCRIPT} = Get Environment Variable SCRIPT - ${LOGFILE} = Get Environment Variable LOGFILE - ${EXPECTED} = Get Environment Variable EXPECTED - Set Suite Variable ${SCRIPT} - Set Suite Variable ${EXPECTED} - Set Suite Variable ${LOGFILE} - List All Test Binaries + @{tests} = List Files In Directory ${DIR_WITH_TESTS} pattern=*_test absolute=True + Set Suite Variable @{tests} Teardown With Custom Message [Documentation] Replace robot fail message with shorter one to avoid duplicated UART output in log Set Test Message ${file} - FAILED Test Teardown -List All Test Binaries - [Documentation] List all files in ${BIN_DIR} and make it available from test cases - Setup - ${BIN_DIR} = Get Environment Variable BIN_DIR - @{binaries} = List Files In Directory ${BIN_DIR} absolute=True - Set Suite Variable @{binaries} - Test Binary - Remove File ${LOGFILE} - Execute Command $logfile = @${LOGFILE} - Execute Script ${SCRIPT} - + Remove File ${RENODE_LOG} + Execute Command $logfile = @${RENODE_LOG} + Execute Script ${RESC} Create Terminal Tester ${UART} timeout=2 Start Emulation - - Wait For Line On Uart ${EXPECTED} + Wait For Line On Uart ${UART_LINE_ON_SUCCESS} *** Test Cases *** -Should Run All Bluepill Tests +Run All Bluepill Tests [Documentation] Runs Bluepill tests and waits for a specific string on the semihosting UART - [Tags] bluepill uart tensorflow arm - FOR ${BIN} IN @{binaries} - Execute Command $bin = @${BIN} - ${_} ${file} = Split Path ${BIN} + FOR ${TEST} IN @{tests} + Execute Command Clear + Execute Command $bin = @${TEST} + ${_} ${file} = Split Path ${TEST} Set Test Variable ${file} Test Binary - Execute Command Clear - Log \t${file} - PASSED console=True END diff --git a/tensorflow/lite/micro/testing/test_bluepill_binary.sh b/tensorflow/lite/micro/testing/test_bluepill_binary.sh index 4a11b7a320f..45cdbaa9a12 100755 --- a/tensorflow/lite/micro/testing/test_bluepill_binary.sh +++ b/tensorflow/lite/micro/testing/test_bluepill_binary.sh @@ -13,24 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# -# Tests a 'bluepill' STM32F103 ELF by parsing the log output of Renode emulation. -# -# First argument is the ELF location. -# Second argument is a regular expression that's required to be in the output logs -# for the test to pass. -# -# This script must be run from the top-level folder of the tensorflow github -# repository as it mounts `pwd` to the renode docker image (via docker run -v) -# and paths in the docker run command assume the entire tensorflow repo is mounted. -declare -r ROOT_DIR=`pwd` -declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/ -declare -r MICRO_LOG_PATH=${TEST_TMPDIR} -declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}logs.txt -mkdir -p ${MICRO_LOG_PATH} +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TFLM_ROOT_DIR=${SCRIPT_DIR}/.. + +# The renode script for the board being emulated. +RESC_PATH=${TFLM_ROOT_DIR}/testing/bluepill.resc + +# Renode's entrypoint for using the Robot Framework. +RENODE_TEST_SCRIPT=${TFLM_ROOT_DIR}/tools/make/downloads/renode/test.sh -declare -r RENODE_TEST_SCRIPT=${ROOT_DIR}/tensorflow/lite/micro/tools/make/downloads/renode/test.sh if [ ! -f "${RENODE_TEST_SCRIPT}" ]; then echo "The renode test script: ${RENODE_TEST_SCRIPT} does not exist. Please " \ "make sure that you have correctly installed Renode for TFLM. See " \ @@ -48,13 +40,26 @@ fi exit_code=0 -if ! BIN_DIR=${ROOT_DIR}/$1 \ - SCRIPT=${ROOT_DIR}/tensorflow/lite/micro/testing/bluepill.resc \ - LOGFILE=$MICRO_LOG_FILENAME \ - EXPECTED="$2" \ - ${RENODE_TEST_SCRIPT} \ - ${ROOT_DIR}/tensorflow/lite/micro/testing/bluepill.robot \ - -r $TEST_TMPDIR +# The logs from this script will go in the RESULTS_DIRECTORY. These include: +# 1. RENODE_LOG: Output log from the renode process. +# 2. html and xml files generated by the Robot Framework. +# +# Note that with the current approach (in bluepill.robot), multiple test +# binaries are run in a loop and RENODE_LOG only has logs from the last test +# binary since it is deleted prior to running each test binary. +RESULTS_DIRECTORY=/tmp/renode_bluepill_logs +mkdir -p ${RESULTS_DIRECTORY} +RENODE_LOG=${RESULTS_DIRECTORY}/renode_log.txt + +ROBOT_COMMAND="${RENODE_TEST_SCRIPT} ${TFLM_ROOT_DIR}/testing/bluepill.robot \ + -r ${RESULTS_DIRECTORY} \ + --variable RESC:${RESC_PATH} \ + --variable RENODE_LOG:${RENODE_LOG} \ + --variable DIR_WITH_TESTS:${1}" + +echo "${ROBOT_COMMAND}" + +if ! ${ROBOT_COMMAND} then exit_code=1 fi @@ -65,6 +70,7 @@ then else echo "UART LOGS:" # Extract output from renode log - cat ${MICRO_LOG_FILENAME} |grep 'uartSemihosting' |sed 's/^.*from start] *//g' + cat ${RENODE_LOG} |grep 'uartSemihosting' |sed 's/^.*from start] *//g' fi + exit $exit_code From 85c22590fb18e17ae5688de1da9cb390df358d1a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 11:17:25 -0800 Subject: [PATCH 117/220] [TFLite/MLIR] Adds constant folders for integer casts. PiperOrigin-RevId: 341658771 Change-Id: Ia35978215c05e68c66e85e1d1ff8c1531e6099d8 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 37 +++++++++++++ tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 2 + .../compiler/mlir/lite/tests/const-fold.mlir | 52 +++++++++++++++++++ .../tests/end2end/unroll_batch_matmul.pbtxt | 16 +++--- 4 files changed, 99 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 7b99e5f5230..215812a6d1d 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -1972,6 +1972,43 @@ OpFoldResult ConstOp::fold(ArrayRef operands) { return value(); } +//===----------------------------------------------------------------------===// +// CastOp +//===----------------------------------------------------------------------===// + +OpFoldResult CastOp::fold(ArrayRef operands) { + assert(operands.size() == 1); + // For now, only supports cast between integer types. + auto elements_attr = operands[0].dyn_cast_or_null(); + if (!elements_attr) { + return nullptr; + } + + auto result_element_type = + getType().cast().getElementType().dyn_cast(); + auto operand_element_type = input() + .getType() + .cast() + .getElementType() + .dyn_cast(); + // Returns nullptr if either result/operand element type is not integer. + if (!result_element_type || !operand_element_type) { + return nullptr; + } + + const bool is_input_unsigned = operand_element_type.isUnsigned(); + const int output_bitwidth = result_element_type.getWidth(); + // The integer cast op is the same as C integer cast. Depends on the operand + // type's signedness, we will determine whether or not sign extension is + // needed. + auto cast = [&](APInt value) { + return is_input_unsigned ? value.zextOrTrunc(output_bitwidth) + : value.sextOrTrunc(output_bitwidth); + }; + + return elements_attr.mapValues(result_element_type, cast); +} + //===----------------------------------------------------------------------===// // SelectV2Op //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 84a46c3be30..5f1d9eadfe2 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -3443,6 +3443,8 @@ def TFL_CastOp : TFL_Op<"cast", [ // TFLite's cast op does not utilize CastOptions, instead derives types // from the TfLiteTensors. let hasOptions = 0; + + let hasFolder = 1; } def TFL_MirrorPadOp: TFL_Op<"mirror_pad", [ diff --git a/tensorflow/compiler/mlir/lite/tests/const-fold.mlir b/tensorflow/compiler/mlir/lite/tests/const-fold.mlir index 69009ae594b..27a7068cda6 100644 --- a/tensorflow/compiler/mlir/lite/tests/const-fold.mlir +++ b/tensorflow/compiler/mlir/lite/tests/const-fold.mlir @@ -587,3 +587,55 @@ func @rsqrt_bf16() -> tensor { // CHECK: %[[CST:.*]] = constant dense<5.000000e-01> : tensor // CHECK: return %[[CST]] } + +// CHECK-LABEL: @cast_i64_to_i32 +func @cast_i64_to_i32() -> tensor<5xi32> { + %cst = constant dense<[-1, 0, 1, 2147483647, 2147483648]> : tensor<5xi64> + %0 = "tfl.cast"(%cst) : (tensor<5xi64>) -> tensor<5xi32> + return %0 : tensor<5xi32> + +// CHECK: %[[CST:.*]] = constant dense<[-1, 0, 1, 2147483647, -2147483648]> : tensor<5xi32> +// CHECK: return %[[CST]] +} + +// CHECK-LABEL: @cast_i32_to_ui8 +func @cast_i32_to_ui8() -> tensor<6xui8> { + %cst = constant dense<[0, -1, 256, 127, -128, -129]> : tensor<6xi32> + %0 = "tfl.cast"(%cst) : (tensor<6xi32>) -> tensor<6xui8> + return %0 : tensor<6xui8> + +// CHECK: %[[CST:.*]] = constant dense<[0, 255, 0, 127, 128, 127]> : tensor<6xui8> +// CHECK: return %[[CST]] +} + +// CHECK-LABEL: @cast_ui8_to_i8 +func @cast_ui8_to_i8() -> tensor<4xi8> { + %cst = constant dense<[0, 255, 127, 128]> : tensor<4xui8> + %0 = "tfl.cast"(%cst) : (tensor<4xui8>) -> tensor<4xi8> + return %0 : tensor<4xi8> + +// CHECK: %[[CST:.*]] = constant dense<[0, -1, 127, -128]> : tensor<4xi8> +// CHECK: return %[[CST]] +} + +// CHECK-LABEL: @cast_i8_to_i32 +func @cast_i8_to_i32() -> tensor<4xi32> { + %cst = constant dense<[0, 128, -1, -128]> : tensor<4xi8> + %0 = "tfl.cast"(%cst) : (tensor<4xi8>) -> tensor<4xi32> + return %0 : tensor<4xi32> + +// CHECK: %[[CST:.*]] = constant dense<[0, -128, -1, -128]> : tensor<4xi32> +// CHECK: return %[[CST]] +} + +// CHECK-LABEL: @cast_ui8_to_i32 +func @cast_ui8_to_i32() -> tensor<4xi32> { + %cst = constant dense<[0, 128, 129, 255]> : tensor<4xui8> + %0 = "tfl.cast"(%cst) : (tensor<4xui8>) -> tensor<4xi32> + return %0 : tensor<4xi32> + +// CHECK: %[[CST:.*]] = constant dense<[0, 128, 129, 255]> : tensor<4xi32> +// CHECK: return %[[CST]] +} + + diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/unroll_batch_matmul.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/unroll_batch_matmul.pbtxt index 096033e37cb..95b970c9f57 100644 --- a/tensorflow/compiler/mlir/lite/tests/end2end/unroll_batch_matmul.pbtxt +++ b/tensorflow/compiler/mlir/lite/tests/end2end/unroll_batch_matmul.pbtxt @@ -78,14 +78,14 @@ versions { } # CHECK: func @main(%[[VAL_0:.*]]: tensor<2x5x3xf32>, %[[VAL_1:.*]]: tensor<3x7xf32>) -> tensor<2x5x7xf32> attributes {tf.entry_function = {control_outputs = "", inputs = "Placeholder,Placeholder_1", outputs = "MatMul"}} { -# CHECK: %[[VAL_2:.*]] = constant dense<[1, 0]> : tensor<2xi32> -# CHECK: %[[VAL_3:.*]] = constant dense<[5, 3]> : tensor<2xi32> -# CHECK: %[[VAL_4:.*]] = constant dense<[3, 7]> : tensor<2xi32> -# CHECK: %[[VAL_5:.*]] = constant unit -# CHECK: %[[VAL_6:.*]] = constant dense<[1, 0, 0]> : tensor<3xi32> -# CHECK: %[[VAL_7:.*]] = constant dense<[1, 5, 3]> : tensor<3xi32> -# CHECK: %[[VAL_8:.*]] = constant dense<0> : tensor<3xi32> -# CHECK: %[[VAL_9:.*]] = constant dense<[1, 3, 7]> : tensor<3xi32> +# CHECK-DAG: %[[VAL_2:.*]] = constant dense<[1, 0]> : tensor<2xi32> +# CHECK-DAG: %[[VAL_3:.*]] = constant dense<[5, 3]> : tensor<2xi32> +# CHECK-DAG: %[[VAL_4:.*]] = constant dense<[3, 7]> : tensor<2xi32> +# CHECK-DAG: %[[VAL_5:.*]] = constant unit +# CHECK-DAG: %[[VAL_6:.*]] = constant dense<[1, 0, 0]> : tensor<3xi32> +# CHECK-DAG: %[[VAL_7:.*]] = constant dense<[1, 5, 3]> : tensor<3xi32> +# CHECK-DAG: %[[VAL_8:.*]] = constant dense<0> : tensor<3xi32> +# CHECK-DAG: %[[VAL_9:.*]] = constant dense<[1, 3, 7]> : tensor<3xi32> # CHECK: %[[VAL_10:.*]] = "tfl.slice"(%[[VAL_0]], %[[VAL_8]], %[[VAL_7]]) : (tensor<2x5x3xf32>, tensor<3xi32>, tensor<3xi32>) -> tensor<1x5x3xf32> # CHECK: %[[VAL_11:.*]] = "tfl.reshape"(%[[VAL_10]], %[[VAL_3]]) : (tensor<1x5x3xf32>, tensor<2xi32>) -> tensor<5x3xf32> # CHECK: %[[VAL_12:.*]] = "tfl.slice"(%[[VAL_0]], %[[VAL_6]], %[[VAL_7]]) : (tensor<2x5x3xf32>, tensor<3xi32>, tensor<3xi32>) -> tensor<1x5x3xf32> From 1368ca69f00a7a56a09f1a2c8cc161b0f05b55b8 Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Tue, 10 Nov 2020 11:24:57 -0800 Subject: [PATCH 118/220] Add kernel_gen support for AddV2 of int64 PiperOrigin-RevId: 341660429 Change-Id: Ic1b14ed92c6a6b33fa1f924700c282e8c1688de6 --- tensorflow/core/kernels/cwise_op_add_2.cc | 9 ++++ tensorflow/core/kernels/mlir_generated/BUILD | 9 ++-- .../kernels/mlir_generated/gpu_add_test.cc | 53 +++++++++++-------- .../mlir_generated/unranked_gpu_add.cc | 1 + 4 files changed, 43 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_add_2.cc b/tensorflow/core/kernels/cwise_op_add_2.cc index 98cdbbd9823..e30055286b1 100644 --- a/tensorflow/core/kernels/cwise_op_add_2.cc +++ b/tensorflow/core/kernels/cwise_op_add_2.cc @@ -31,8 +31,17 @@ REGISTER6(BinaryOp, CPU, "AddV2", functor::add, int8, int16, uint32, complex64, #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(BinaryOp, GPU, "Add", functor::add, uint8, int64, complex64, complex128); + +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED) REGISTER5(BinaryOp, GPU, "AddV2", functor::add, uint8, uint32, int64, complex64, complex128); +#else +// There is an MLIR generated kernel for int64 +REGISTER4(BinaryOp, GPU, "AddV2", functor::add, uint8, uint32, complex64, + complex128); +#endif + #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #endif // !defined(__ANDROID_TYPES_SLIM__) diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index fc414fe78b8..7322e7912a5 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -94,12 +94,8 @@ cc_library( cc_library( name = "kernel_deps", deps = if_mlir_unranked_kernels_enabled( - [ - ":unranked_kernel_deps", - ], - [ - ":ranked_kernel_deps", - ], + [":unranked_kernel_deps"], + [":ranked_kernel_deps"], ), ) @@ -476,6 +472,7 @@ gen_kernel_library( "f16", "f32", "f64", + "i64", ], unroll_factors = "4", ) diff --git a/tensorflow/core/kernels/mlir_generated/gpu_add_test.cc b/tensorflow/core/kernels/mlir_generated/gpu_add_test.cc index 9bf68e3872e..79a2019bc7f 100644 --- a/tensorflow/core/kernels/mlir_generated/gpu_add_test.cc +++ b/tensorflow/core/kernels/mlir_generated/gpu_add_test.cc @@ -39,7 +39,7 @@ class GpuAddTest : public OpsTestBase { SetDevice(tensorflow::DEVICE_GPU, std::move(device_gpu)); } - template + template void RunAddOp(std::vector input1, TensorShape shape1, std::vector input2, TensorShape shape2, std::vector output, TensorShape output_shape) { @@ -59,8 +59,8 @@ class GpuAddTest : public OpsTestBase { test::ExpectEqual(expected_tensor, *GetOutput(0)); } - template - void RunBroadcastingAddOp() { + template + void TestBroadcastingAddOp() { auto input1 = { static_cast(10), static_cast(20), @@ -73,38 +73,45 @@ class GpuAddTest : public OpsTestBase { static_cast(21), static_cast(22), static_cast(23), }; auto expected_shape = TensorShape({2, 3}); - RunAddOp(input1, shape1, input2, shape2, expected, expected_shape); + RunAddOp(input1, shape1, input2, shape2, expected, + expected_shape); } - template + template void RunAddOp() { - auto input1 = {static_cast(-std::numeric_limits::infinity()), - static_cast(-0.1), - static_cast(-0.0), - static_cast(0.0), - static_cast(0.1), - static_cast(std::numeric_limits::infinity())}; - auto input2 = {static_cast(-std::numeric_limits::infinity()), - static_cast(-0.1), - static_cast(-0.0), - static_cast(0.0), - static_cast(0.1), - static_cast(std::numeric_limits::infinity())}; + auto input1 = { + static_cast(-std::numeric_limits::infinity()), + static_cast(-0.1), + static_cast(-0.0), + static_cast(0.0), + static_cast(0.1), + static_cast(std::numeric_limits::infinity())}; + auto input2 = { + static_cast(-std::numeric_limits::infinity()), + static_cast(-0.1), + static_cast(-0.0), + static_cast(0.0), + static_cast(0.1), + static_cast(std::numeric_limits::infinity())}; std::vector expected; for (const T& inp : input2) { - expected.push_back( - static_cast(static_cast(inp) + static_cast(inp))); + expected.push_back(static_cast(static_cast(inp) + + static_cast(inp))); } - RunAddOp(input1, {2, 3}, input2, {2, 3}, expected, {2, 3}); + RunAddOp(input1, {2, 3}, input2, {2, 3}, expected, {2, 3}); } }; TEST_F(GpuAddTest, AddFloat) { RunAddOp(); } TEST_F(GpuAddTest, AddDouble) { RunAddOp(); } TEST_F(GpuAddTest, AddHalf) { RunAddOp(); } -TEST_F(GpuAddTest, BCastAddFloat) { RunBroadcastingAddOp(); } -TEST_F(GpuAddTest, BCastAddDouble) { RunBroadcastingAddOp(); } -TEST_F(GpuAddTest, BCastAddHalf) { RunBroadcastingAddOp(); } +TEST_F(GpuAddTest, BCastAddFloat) { TestBroadcastingAddOp(); } +TEST_F(GpuAddTest, BCastAddDouble) { TestBroadcastingAddOp(); } +TEST_F(GpuAddTest, BCastAddHalf) { + TestBroadcastingAddOp(); +} + +TEST_F(GpuAddTest, BCastAddInt64) { TestBroadcastingAddOp(); } // TEST_F(GpuAddTest, AddV2Half) { RunAddOp(); } } // namespace diff --git a/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc b/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc index 16dc1df9aef..decfd990c68 100644 --- a/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc +++ b/tensorflow/core/kernels/mlir_generated/unranked_gpu_add.cc @@ -20,5 +20,6 @@ namespace tensorflow { GENERATE_AND_REGISTER_BINARY_KERNEL(AddV2, f16, DT_HALF, Eigen::half); GENERATE_AND_REGISTER_BINARY_KERNEL(AddV2, f32, DT_FLOAT, float); GENERATE_AND_REGISTER_BINARY_KERNEL(AddV2, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_BINARY_KERNEL(AddV2, i64, DT_INT64, int64); } // namespace tensorflow From 9779f869699b15b12a32f5f8b1b224bcdfa34566 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 10 Nov 2020 11:31:36 -0800 Subject: [PATCH 119/220] Parallel device lib: accept output shape hints to avoid blocking PiperOrigin-RevId: 341661900 Change-Id: Iffc7e56595866d1880f5700ea0afac61a2aae264 --- tensorflow/c/eager/parallel_device/BUILD | 1 + .../parallel_device/parallel_device_lib.cc | 62 ++++++++++++++----- .../parallel_device/parallel_device_lib.h | 21 ++++++- .../parallel_device_lib_test.cc | 36 +++++++++++ 4 files changed, 100 insertions(+), 20 deletions(-) diff --git a/tensorflow/c/eager/parallel_device/BUILD b/tensorflow/c/eager/parallel_device/BUILD index 473ab503834..a18e47c9743 100644 --- a/tensorflow/c/eager/parallel_device/BUILD +++ b/tensorflow/c/eager/parallel_device/BUILD @@ -76,6 +76,7 @@ cc_library( "//tensorflow/c:c_api", "//tensorflow/c/eager:c_api", "//tensorflow/c/eager:c_api_experimental", + "//tensorflow/core:framework", "//tensorflow/core:lib", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", diff --git a/tensorflow/c/eager/parallel_device/parallel_device_lib.cc b/tensorflow/c/eager/parallel_device/parallel_device_lib.cc index 095f33ff303..a468169356d 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device_lib.cc +++ b/tensorflow/c/eager/parallel_device/parallel_device_lib.cc @@ -328,6 +328,17 @@ ParallelDevice::Execute(TFE_Context* context, const char* operation_name, const TFE_OpAttrs* attributes, int expected_max_outputs, TF_Status* status) const { + std::vector expected_output_shapes(expected_max_outputs); + return Execute(context, inputs, operation_name, attributes, + expected_output_shapes, status); +} + +absl::optional>> +ParallelDevice::Execute( + TFE_Context* context, const std::vector& inputs, + const char* operation_name, const TFE_OpAttrs* attributes, + const std::vector& expected_output_shapes, + TF_Status* status) const { absl::optional>> result; // Compute per-device per-output tensors std::vector> per_device_output_tensors; @@ -344,7 +355,7 @@ ParallelDevice::Execute(TFE_Context* context, } device_thread->StartExecute(context, operation_name, std::move(device_inputs), attributes, - expected_max_outputs); + expected_output_shapes.size()); } StatusPtr first_bad_status(nullptr); for (int device_index = 0; device_index < underlying_devices_.size(); @@ -386,8 +397,15 @@ ParallelDevice::Execute(TFE_Context* context, for (int j = 0; j < underlying_devices_.size(); ++j) { components.push_back(std::move(per_device_output_tensors[j][i])); } - per_device_outputs.push_back(ParallelTensor::FromTensorHandles( - *this, std::move(components), status)); + if (expected_output_shapes[i].IsFullyDefined()) { + per_device_outputs.push_back(ParallelTensor::FromTensorHandles( + *this, std::move(components), + absl::Span(expected_output_shapes[i].dim_sizes()), + status)); + } else { + per_device_outputs.push_back(ParallelTensor::FromTensorHandles( + *this, std::move(components), status)); + } if (TF_GetCode(status) != TF_OK) return result; } result.emplace(std::move(per_device_outputs)); @@ -396,9 +414,27 @@ ParallelDevice::Execute(TFE_Context* context, std::unique_ptr ParallelTensor::FromTensorHandles( const ParallelDevice& parallel_device, - std::vector components, TF_Status* status) { + std::vector components, absl::Span shape, + TF_Status* status) { TF_DataType dtype = TFE_TensorHandleDataType(components[0].get()); - std::vector shape( + // Verify that the TensorHandle's shape and dtype match all of the component + // shapes and dtypes. + for (TensorHandlePtr& component : components) { + if (TFE_TensorHandleDataType(component.get()) != dtype) { + TF_SetStatus(status, TF_INTERNAL, + "Components of a ParallelTensor must all have " + "the same dtype"); + return nullptr; + } + } + return std::unique_ptr( + new ParallelTensor(parallel_device, std::move(components), shape, dtype)); +} + +std::unique_ptr ParallelTensor::FromTensorHandles( + const ParallelDevice& parallel_device, + std::vector components, TF_Status* status) { + std::vector shape( TFE_TensorHandleNumDims(components[0].get(), status)); if (TF_GetCode(status) != TF_OK) return nullptr; for (int i = 0; i < shape.size(); ++i) { @@ -406,11 +442,10 @@ std::unique_ptr ParallelTensor::FromTensorHandles( if (TF_GetCode(status) != TF_OK) return nullptr; } - // Verify that the TensorHandle's shape and dtype match all of the component - // shapes and dtypes. + // Verify that the TensorHandle's shape matches all of the component shapes. for (TensorHandlePtr& component : components) { for (int i = 0; i < shape.size(); ++i) { - int64_t tensor_dim = TFE_TensorHandleDim(component.get(), i, status); + int64 tensor_dim = TFE_TensorHandleDim(component.get(), i, status); if (TF_GetCode(status) != TF_OK) return nullptr; if (tensor_dim != shape[i]) { // TODO(allenl): Allow shapes to differ. @@ -419,17 +454,10 @@ std::unique_ptr ParallelTensor::FromTensorHandles( "the same shape"); return nullptr; } - if (TFE_TensorHandleDataType(component.get()) != dtype) { - TF_SetStatus(status, TF_INTERNAL, - "Components of a ParallelTensor must all have " - "the same dtype"); - return nullptr; - } } } - - return std::unique_ptr(new ParallelTensor( - parallel_device, std::move(components), std::move(shape), dtype)); + return FromTensorHandles(parallel_device, std::move(components), + absl::Span(shape), status); } } // namespace parallel_device diff --git a/tensorflow/c/eager/parallel_device/parallel_device_lib.h b/tensorflow/c/eager/parallel_device/parallel_device_lib.h index 1bb9ce0f663..f2513e09ccc 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device_lib.h +++ b/tensorflow/c/eager/parallel_device/parallel_device_lib.h @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/c/c_api.h" #include "tensorflow/c/eager/c_api.h" #include "tensorflow/c/eager/c_api_experimental.h" +#include "tensorflow/core/framework/tensor_shape.h" namespace tensorflow { namespace parallel_device { @@ -93,6 +94,15 @@ class ParallelDevice { const char* operation_name, const TFE_OpAttrs* attributes, int expected_max_outputs, TF_Status* status) const; + // Accepts inferred shapes for outputs, which if fully defined will avoid + // querying the shapes of the underlying TensorHandles. This allows async + // computation to continue without blocking. + absl::optional>> Execute( + TFE_Context* context, const std::vector& inputs, + const char* operation_name, const TFE_OpAttrs* attributes, + const std::vector& expected_output_shapes, + TF_Status* status) const; + private: // A sequence of device names, indicating which devices replicated operations // are forwarded to. @@ -117,10 +127,15 @@ class ParallelDevice { class ParallelTensor { public: // Construct a ParallelTensor from TensorHandles placed on the component - // devices of a ParallelDevice. + // devices of a ParallelDevice. Inspects `components` to determine a shape. static std::unique_ptr FromTensorHandles( const ParallelDevice& parallel_device, std::vector components, TF_Status* status); + // Uses the provided shape without additional checks, which avoids blocking. + static std::unique_ptr FromTensorHandles( + const ParallelDevice& parallel_device, + std::vector components, absl::Span shape, + TF_Status* status); size_t num_tensors() const { return tensors_.size(); } TFE_TensorHandle* tensor(size_t index) const { return tensors_[index].get(); } @@ -132,10 +147,10 @@ class ParallelTensor { private: ParallelTensor(const ParallelDevice& device, std::vector tensors, - std::vector shape, const TF_DataType dtype) + absl::Span shape, const TF_DataType dtype) : device_(device), tensors_(std::move(tensors)), - shape_(std::move(shape)), + shape_(shape.begin(), shape.end()), dtype_(dtype) {} const ParallelDevice& device_; diff --git a/tensorflow/c/eager/parallel_device/parallel_device_lib_test.cc b/tensorflow/c/eager/parallel_device/parallel_device_lib_test.cc index 35befe959cb..0bfd0285fdf 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device_lib_test.cc +++ b/tensorflow/c/eager/parallel_device/parallel_device_lib_test.cc @@ -80,5 +80,41 @@ TEST(PARALLEL_DEVICE_LIB, TestOpWithError) { ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); } +TEST(PARALLEL_DEVICE_LIB, TestExplicitOutputShape) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + std::unique_ptr opts( + TFE_NewContextOptions(), TFE_DeleteContextOptions); + std::unique_ptr config( + TF_CreateConfig( + /*xla*/ false, + /* gpu_memory_allow_growth */ true, /* num_cpu_devices */ + 2), + TF_DeleteBuffer); + TFE_ContextOptionsSetConfig(opts.get(), config->data, config->length, + status.get()); + std::unique_ptr context( + TFE_NewContext(opts.get(), status.get()), TFE_DeleteContext); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + + std::vector devices{ + "/job:localhost/replica:0/task:0/device:CPU:0", + "/job:localhost/replica:0/task:0/device:CPU:1"}; + ParallelDevice parallel_device(std::move(devices)); + std::unique_ptr handle_op( + TFE_NewOp(context.get(), "VarHandleOp", status.get()), TFE_DeleteOp); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + TFE_OpSetAttrType(handle_op.get(), "dtype", TF_FLOAT); + TFE_OpSetAttrShape(handle_op.get(), "shape", /*dims=*/nullptr, /*num_dims=*/0, + status.get()); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + auto outputs = parallel_device.Execute( + context.get(), std::vector(), "VarHandleOp", + TFE_OpGetAttrs(handle_op.get()), {PartialTensorShape({})}, status.get()); + ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); + const std::vector>& handles = *outputs; + EXPECT_EQ(0, handles[0]->shape().size()); +} + } // namespace parallel_device } // namespace tensorflow From fa595eb8fa99e58dff7e800f660f200ffffd0f4a Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Tue, 10 Nov 2020 11:38:31 -0800 Subject: [PATCH 120/220] [TF] Add TensorRT version to tf.sysconfig.get_build_info. Add is_tensorrt_build and tensorrt_version to build_info. PiperOrigin-RevId: 341663396 Change-Id: I239d7d52c388767932716abd501061d31324ff10 --- tensorflow/opensource_only.files | 1 + tensorflow/python/platform/build_info_test.py | 8 ++++-- tensorflow/tensorflow.bzl | 1 + tensorflow/tools/build_info/BUILD | 1 + tensorflow/tools/build_info/gen_build_info.py | 9 +++++++ third_party/tensorrt/BUILD.tpl | 5 ++++ .../tensorrt/tensorrt/tensorrt_config.py.tpl | 17 ++++++++++++ third_party/tensorrt/tensorrt_configure.bzl | 27 +++++++++++++++++++ 8 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 third_party/tensorrt/tensorrt/tensorrt_config.py.tpl diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index ccbdfbffc7c..fe8a0aecf26 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -201,6 +201,7 @@ tensorflow/third_party/tensorrt/BUILD.tpl tensorflow/third_party/tensorrt/LICENSE tensorflow/third_party/tensorrt/build_defs.bzl.tpl tensorflow/third_party/tensorrt/tensorrt/include/tensorrt_config.h.tpl +tensorflow/third_party/tensorrt/tensorrt/tensorrt_config.py.tpl tensorflow/third_party/tensorrt/tensorrt_configure.bzl tensorflow/third_party/termcolor.BUILD tensorflow/third_party/tf_toolchains.BUILD diff --git a/tensorflow/python/platform/build_info_test.py b/tensorflow/python/platform/build_info_test.py index 5d4b3cfa251..11ae3a9c9aa 100644 --- a/tensorflow/python/platform/build_info_test.py +++ b/tensorflow/python/platform/build_info_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled from tensorflow.python.platform import build_info from tensorflow.python.platform import test @@ -29,12 +30,15 @@ class BuildInfoTest(test.TestCase): test.is_built_with_rocm()) self.assertEqual(build_info.build_info['is_cuda_build'], test.is_built_with_cuda()) + self.assertEqual(build_info.build_info['is_tensorrt_build'], + is_tensorrt_enabled()) def testDeterministicOrder(self): # The dict may contain other keys depending on the platform, but the ones # it always contains should be in order. - self.assertContainsSubsequence(build_info.build_info.keys(), - ('is_cuda_build', 'is_rocm_build')) + self.assertContainsSubsequence( + build_info.build_info.keys(), + ('is_cuda_build', 'is_rocm_build', 'is_tensorrt_build')) if __name__ == '__main__': diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index bb66a4c831f..c9419753437 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -2457,6 +2457,7 @@ def tf_py_build_info_genrule(name, out): " --key_value" + " is_rocm_build=" + if_rocm("True", "False") + " is_cuda_build=" + if_cuda("True", "False") + + " is_tensorrt_build=" + if_tensorrt("True", "False") + if_windows(_dict_to_kv({ "msvcp_dll_names": "msvcp140.dll,msvcp140_1.dll", }), "") + if_windows_cuda(_dict_to_kv({ diff --git a/tensorflow/tools/build_info/BUILD b/tensorflow/tools/build_info/BUILD index 938ab967235..04e819571e0 100644 --- a/tensorflow/tools/build_info/BUILD +++ b/tensorflow/tools/build_info/BUILD @@ -15,6 +15,7 @@ py_binary( tags = ["no-remote-exec"], deps = [ "@local_config_cuda//cuda:cuda_config_py", + "@local_config_tensorrt//:tensorrt_config_py", "@six_archive//:six", ], ) diff --git a/tensorflow/tools/build_info/gen_build_info.py b/tensorflow/tools/build_info/gen_build_info.py index 8f84ac4584c..456a52cd7f5 100755 --- a/tensorflow/tools/build_info/gen_build_info.py +++ b/tensorflow/tools/build_info/gen_build_info.py @@ -28,6 +28,12 @@ try: except ImportError: cuda_config = None +# tensorrt.tensorrt is only valid in OSS +try: + from tensorrt.tensorrt import tensorrt_config # pylint: disable=g-import-not-at-top +except ImportError: + tensorrt_config = None + def write_build_info(filename, key_value_list): """Writes a Python that describes the build. @@ -43,6 +49,9 @@ def write_build_info(filename, key_value_list): if cuda_config: build_info.update(cuda_config.config) + if tensorrt_config: + build_info.update(tensorrt_config.config) + for arg in key_value_list: key, value = six.ensure_str(arg).split("=") if value.lower() == "true": diff --git a/third_party/tensorrt/BUILD.tpl b/third_party/tensorrt/BUILD.tpl index dfa06ced2ed..ed12d09afaf 100644 --- a/third_party/tensorrt/BUILD.tpl +++ b/third_party/tensorrt/BUILD.tpl @@ -40,4 +40,9 @@ bzl_library( ], ) +py_library( + name = "tensorrt_config_py", + srcs = ["tensorrt/tensorrt_config.py"] +) + %{copy_rules} diff --git a/third_party/tensorrt/tensorrt/tensorrt_config.py.tpl b/third_party/tensorrt/tensorrt/tensorrt_config.py.tpl new file mode 100644 index 00000000000..709eaaa2719 --- /dev/null +++ b/third_party/tensorrt/tensorrt/tensorrt_config.py.tpl @@ -0,0 +1,17 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +config = %{tensorrt_config} diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl index 9c980a92cf8..f385630e2b1 100644 --- a/third_party/tensorrt/tensorrt_configure.bzl +++ b/third_party/tensorrt/tensorrt_configure.bzl @@ -79,6 +79,14 @@ def _create_dummy_repository(repository_ctx): {}, ) + # Set up tensorrt_config.py, which is used by gen_build_info to provide + # build environment info to the API + _tpl( + repository_ctx, + "tensorrt/tensorrt_config.py", + _py_tmpl_dict({}), + ) + def enable_tensorrt(repository_ctx): """Returns whether to build with TensorRT support.""" return int(get_host_environ(repository_ctx, _TF_NEED_TENSORRT, False)) @@ -93,6 +101,7 @@ def _create_local_tensorrt_repository(repository_ctx): "build_defs.bzl": _tpl_path(repository_ctx, "build_defs.bzl"), "BUILD": _tpl_path(repository_ctx, "BUILD"), "tensorrt/include/tensorrt_config.h": _tpl_path(repository_ctx, "tensorrt/include/tensorrt_config.h"), + "tensorrt/tensorrt_config.py": _tpl_path(repository_ctx, "tensorrt/tensorrt_config.py"), } config = find_cuda_config(repository_ctx, find_cuda_config_path, ["tensorrt"]) @@ -148,6 +157,19 @@ def _create_local_tensorrt_repository(repository_ctx): {"%{tensorrt_version}": trt_version}, ) + # Set up tensorrt_config.py, which is used by gen_build_info to provide + # build environment info to the API + repository_ctx.template( + "tensorrt/tensorrt_config.py", + tpl_paths["tensorrt/tensorrt_config.py"], + _py_tmpl_dict({ + "tensorrt_version": trt_version, + }), + ) + +def _py_tmpl_dict(d): + return {"%{tensorrt_config}": str(d)} + def _tensorrt_configure_impl(repository_ctx): """Implementation of the tensorrt_configure repository rule.""" @@ -165,6 +187,11 @@ def _tensorrt_configure_impl(repository_ctx): config_repo_label(remote_config_repo, ":tensorrt/include/tensorrt_config.h"), {}, ) + repository_ctx.template( + "tensorrt/tensorrt_config.py", + config_repo_label(remote_config_repo, ":tensorrt/tensorrt_config.py"), + {}, + ) repository_ctx.template( "LICENSE", config_repo_label(remote_config_repo, ":LICENSE"), From f4307fa6f576763591afec4026b1125051b0916a Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Tue, 10 Nov 2020 12:05:51 -0800 Subject: [PATCH 121/220] Don't outside compile tf.Assert Op even if it contains string operands. The tf.Assert op is removed during legalization and shouldn't be outside compiled for performance reasons. PiperOrigin-RevId: 341669459 Change-Id: I956662b63aafaef05a269d4786d5504825d20dca --- .../mark_ops_for_outside_compilation.mlir | 22 +++++++++++++++++-- .../mark_ops_for_outside_compilation.cc | 10 +++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir index c8a6d5489c3..ae5f98da85f 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir @@ -1,7 +1,7 @@ // RUN: tf-opt %s -tf-mark-ops-for-outside-compilation | FILECHECK_OPTS="" FileCheck %s -// CHECK-LABEL: func @unsupported_op_no_soft_placement -func @unsupported_op_no_soft_placement() -> tensor { +// CHECK-LABEL: func @unsupported_op_missing_soft_placement_attribute +func @unsupported_op_missing_soft_placement_attribute() -> tensor { %0 = "tf_device.cluster"() ( { // CHECK: "tf.UnsupportedOp" // CHECK-NOT: _xla_outside_compilation @@ -28,6 +28,24 @@ func @unsupported_op_soft_placement_false() -> tensor { return %0 : tensor } +// CHECK-LABEL: func @assert_op_string_operand +func @assert_op_string_operand(%arg0: tensor) -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.Assert" + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.UnsupportedOp" + // CHECK-SAME: _xla_outside_compilation + // CHECK: "tf.Identity" + // CHECK-NOT: _xla_outside_compilation + %t = constant dense : tensor + "tf.Assert"(%t, %arg0) {summarize = 3} : (tensor, tensor) -> () + %1 = "tf.UnsupportedOp"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Identity"(%1) : (tensor) -> tensor + tf_device.return %2 : tensor + }) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} + // CHECK-LABEL: func @unsupported_op func @unsupported_op() -> tensor { %0 = "tf_device.cluster"() ( { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index ac844b925ce..b5607d63af9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -164,10 +164,12 @@ bool IsSupportedOp(Operation& op, const Dialect* tf_dialect) { if (op.getDialect() != tf_dialect) return true; - else - return !HasStringOperand(op) && !HasStringResult(op) && - (MatchesPattern(op, supported_ops) || - mhlo::IsOpAllowedTf2XlaFallback(&op)); + // Assert has a legalization that later removes it so we don't want to outside + // compile it ever for performance reasons. + if (llvm::isa(op)) return true; + return !HasStringOperand(op) && !HasStringResult(op) && + (MatchesPattern(op, supported_ops) || + mhlo::IsOpAllowedTf2XlaFallback(&op)); } // Checks all regions of `op` for captured string operands. From 1086e0653e009a1a0d685157befad31f2b760c01 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Tue, 10 Nov 2020 12:10:37 -0800 Subject: [PATCH 122/220] Remove outdated MWMS tests We already have similar coverage via all the correctness tests. PiperOrigin-RevId: 341670424 Change-Id: I719fefc0248a77aae87f6d30ee69173ccfb8d563 --- .../collective_all_reduce_strategy_test.py | 280 +----------------- 1 file changed, 2 insertions(+), 278 deletions(-) diff --git a/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py b/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py index 1c78d934959..13eb3eb095e 100644 --- a/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py +++ b/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py @@ -19,294 +19,19 @@ from __future__ import division from __future__ import print_function from absl.testing import parameterized -import numpy as np -from tensorflow.core.protobuf import config_pb2 from tensorflow.python.compat import v2_compat from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.distribute import collective_all_reduce_strategy as mwms_lib from tensorflow.python.distribute import combinations as ds_combinations from tensorflow.python.distribute import multi_process_runner -from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import strategy_combinations -from tensorflow.python.distribute import strategy_test_lib -from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver -from tensorflow.python.framework import config as tf_config +from tensorflow.python.eager import test from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.framework import test_combinations as combinations -from tensorflow.python.framework import test_util from tensorflow.python.keras import layers -from tensorflow.python.keras import testing_utils -from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training -from tensorflow.python.keras.mixed_precision import policy -from tensorflow.python.keras.mixed_precision import test_util as mp_test_util from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras from tensorflow.python.ops import array_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variables -from tensorflow.python.ops.losses import losses -from tensorflow.python.platform import test -from tensorflow.python.training import adam -from tensorflow.python.training import gradient_descent -from tensorflow.python.training import training_util -from tensorflow.python.training.experimental import loss_scale as loss_scale_module -from tensorflow.python.training.experimental import loss_scale_optimizer -from tensorflow.python.training.server_lib import ClusterSpec - - -def create_test_objects(cluster_spec=None, - task_type=None, - task_id=None, - num_gpus=None): - sess_config = config_pb2.ConfigProto() - if num_gpus is None: - num_gpus = len(tf_config.list_logical_devices('GPU')) - - if cluster_spec and task_type and task_id is not None: - cluster_resolver = SimpleClusterResolver( - cluster_spec=ClusterSpec(cluster_spec), - task_type=task_type, - task_id=task_id, - num_accelerators={'GPU': num_gpus}) - target = 'grpc://' + cluster_spec[task_type][task_id] - else: - cluster_resolver = SimpleClusterResolver( - ClusterSpec({}), num_accelerators={'GPU': num_gpus}) - target = '' - - strategy = mwms_lib.CollectiveAllReduceStrategy( - cluster_resolver=cluster_resolver) - sess_config = strategy.update_config_proto(sess_config) - - return strategy, target, sess_config - - -class CollectiveAllReduceStrategyTestBase( - multi_worker_test_base.MultiWorkerTestBase): - - collective_key_base = 0 - - def setUp(self): - # We use a different key_base for each test so that collective keys won't be - # reused. - mwms_lib.CollectiveAllReduceStrategy._collective_key_base += 100000 - super(CollectiveAllReduceStrategyTestBase, self).setUp() - - def _get_test_object(self, task_type, task_id, num_gpus=0): - strategy, target, session_config = create_test_objects( - cluster_spec=self._cluster_spec, - task_type=task_type, - task_id=task_id, - num_gpus=num_gpus) - return strategy, target, session_config - - def _test_complex_model(self, task_type, task_id, num_gpus): - d, master_target, config = self._get_test_object(task_type, task_id, - num_gpus) - - def model_fn(): - """Mnist model with synthetic input.""" - data_format = 'channels_last' - input_shape = [28, 28, 1] - l = layers - max_pool = l.MaxPooling2D((2, 2), (2, 2), - padding='same', - data_format=data_format) - model = sequential.Sequential([ - l.Reshape(target_shape=input_shape, input_shape=(28 * 28,)), - l.Conv2D( - 32, - 5, - padding='same', - data_format=data_format, - activation=nn.relu), max_pool, - l.Conv2D( - 64, - 5, - padding='same', - data_format=data_format, - activation=nn.relu), max_pool, - l.Flatten(), - l.Dense(1024, activation=nn.relu), - l.Dropout(0.4), - l.Dense(10) - ]) - image = random_ops.random_uniform([2, 28, 28]) - label = random_ops.random_uniform([2, 1], maxval=10, dtype=dtypes.int32) - logits = model(image, training=True) - # TODO(yuefengz): make loss a callable for eager mode. - loss = losses.sparse_softmax_cross_entropy(labels=label, logits=logits) - optimizer = adam.AdamOptimizer(learning_rate=1e-4) - train_op = optimizer.minimize(loss, - training_util.get_or_create_global_step()) - return train_op - - with ops.Graph().as_default(), \ - self.cached_session(config=config, - target=master_target) as sess: - with d.scope(): - train_op = d.extended.call_for_each_replica(model_fn) - train_op = d.group(d.experimental_local_results(train_op)) - - sess.run(variables.global_variables_initializer()) - sess.run(train_op) - - def _test_mixed_precision(self, task_type, task_id, num_gpus): - """Tests mixed precision works with the CollectiveAllReduceStrategy. - - This tests: - 1. Variables are in float32, by running with a small enough learning rate - that if the variables are float16, their values wouldn't change when - gradients are applied. - 2. The loss scale is doubled if there are no NaNs. - 3. The loss scale is halved if the first worker has a NaN, even if the - other works do not have NaNs. - - Args: - task_type: A string, such as "worker", indicating the type of the replica. - task_id: Zero-indexed ID of the task. - num_gpus: The number of GPUs to use. - """ - d, master_target, config = self._get_test_object(task_type, task_id, - num_gpus) - # Should be set to mixed_float16 by caller. - self.assertEqual(policy.global_policy().name, 'mixed_float16') - - with ops.Graph().as_default(), \ - self.cached_session(config=config, - target=master_target) as sess: - # The loss on the first worker is multiplied by this value. Allows - # testing the first worker having NaN loss and gradients while keeping the - # other workers' losses and gradients finite. - loss_multiplier_for_first_worker = variables.Variable( - 1., dtype='float16', trainable=False) - with d.scope(): - model = sequential.Sequential([ - mp_test_util.MultiplyLayer(assert_type=dtypes.float16, - input_shape=(1,)), - ]) - loss_scale = loss_scale_module.DynamicLossScale(2 ** 10, - increment_period=1) - def model_fn(): - """Simple model to test mixed precision.""" - x = np.ones((1, 1)) - loss = model(x, training=True) - - if ((task_type == 'worker' and task_id == 0) or - task_type is task_id is None): - loss *= loss_multiplier_for_first_worker - # Learning rate is small enough that if applied to a float16 variable, - # the variable will not change. So this tests the learning rate is not - # applied to a float16 value, but instead the float32 variable. - optimizer = gradient_descent.GradientDescentOptimizer(2 ** -14) - optimizer = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer( - optimizer, loss_scale) - train_op = optimizer.minimize( - loss, training_util.get_or_create_global_step()) - return train_op - - train_op = d.extended.call_for_each_replica(model_fn) - train_op = d.group(d.experimental_local_results(train_op)) - - sess.run(variables.global_variables_initializer()) - sess.run(train_op) - - (var,) = model.trainable_weights - # Variable starts at 1. Each worker's gradient is 2 ** -14, the learning - # rate, and each worker's gradient will be subtracted from the variable. - expected = 1 - d.num_replicas_in_sync * 2 ** -14 - self.assertEqual(sess.run(var), expected) - # Loss scale should double, as are gradients are finite. - self.assertEqual(sess.run(loss_scale()), 2 ** 11) - - # Set the first worker to have NaN loss and gradients. - sess.run(loss_multiplier_for_first_worker.assign(float('NaN'))) - sess.run(train_op) - # Variable should not change, since first worker had NaN - self.assertEqual(sess.run(var), expected) - # Loss scale should halve due to NaN - self.assertEqual(sess.run(loss_scale()), 2 ** 10) - - -class DistributedCollectiveAllReduceStrategyTest( - CollectiveAllReduceStrategyTestBase, - strategy_test_lib.DistributionTestBase, - parameterized.TestCase): - - @classmethod - def setUpClass(cls): - """Create a local cluster with 3 workers.""" - cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( - num_workers=3, num_ps=0) - - @ds_combinations.generate( - combinations.combine(mode=['graph'], required_gpus=[0, 1, 2])) - def testComplexModel(self, required_gpus): - self._run_between_graph_clients( - self._test_complex_model, self._cluster_spec, num_gpus=required_gpus) - - @ds_combinations.generate( - combinations.combine(mode=['graph'], required_gpus=[0, 1, 2])) - @testing_utils.enable_v2_dtype_behavior - def testMixedPrecision(self, required_gpus): - if test_util.is_xla_enabled(): - self.skipTest('Test gets NaNs with XLA') - with policy.policy_scope('mixed_float16'): - self._run_between_graph_clients( - self._test_mixed_precision, - self._cluster_spec, - num_gpus=required_gpus) - - -class DistributedCollectiveAllReduceStrategyTestWithChief( - CollectiveAllReduceStrategyTestBase, parameterized.TestCase): - - @classmethod - def setUpClass(cls): - """Create a local cluster with 3 workers and 1 chief.""" - cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( - num_workers=3, num_ps=0, has_chief=True) - - @ds_combinations.generate( - combinations.combine(mode=['graph'], required_gpus=[0, 1, 2])) - def testComplexModel(self, required_gpus): - self._run_between_graph_clients( - self._test_complex_model, self._cluster_spec, num_gpus=required_gpus) - - @ds_combinations.generate( - combinations.combine(mode=['graph'], required_gpus=[0, 1, 2])) - @testing_utils.enable_v2_dtype_behavior - def testMixedPrecision(self, required_gpus): - if test_util.is_xla_enabled(): - return # Test gets NaNs with XLA - with policy.policy_scope('mixed_float16'): - self._run_between_graph_clients( - self._test_mixed_precision, - self._cluster_spec, - num_gpus=required_gpus) - - -class LocalCollectiveAllReduceStrategy( - CollectiveAllReduceStrategyTestBase, - strategy_test_lib.DistributionTestBase, - strategy_test_lib.TwoDeviceDistributionTestBase, - parameterized.TestCase): - - @ds_combinations.generate( - combinations.combine(mode=['graph'], required_gpus=[2, 4])) - def testComplexModel(self, required_gpus): - self._test_complex_model(None, None, required_gpus) - - @ds_combinations.generate( - combinations.combine(mode=['graph'], required_gpus=[2, 4])) - @testing_utils.enable_v2_dtype_behavior - def testMixedPrecision(self, required_gpus): - with policy.policy_scope('mixed_float16'): - self._test_mixed_precision(None, None, required_gpus) @ds_combinations.generate( @@ -316,8 +41,7 @@ class LocalCollectiveAllReduceStrategy( strategy_combinations.multi_worker_mirrored_2x1_gpu, ], mode=['eager'])) -class DistributedCollectiveAllReduceStrategyEagerTest(test.TestCase, - parameterized.TestCase): +class MultiWorkerMirroredStrategyTest(test.TestCase, parameterized.TestCase): def testFitWithoutStepsPerEpochPartialBatch(self, strategy): From 052ed437e1864ef594b32504df023e9cf5769a03 Mon Sep 17 00:00:00 2001 From: Lucy Fox Date: Tue, 10 Nov 2020 12:22:08 -0800 Subject: [PATCH 123/220] Add SCCP pass to MLIR bridge pipeline. This enables constant folding across function boundaries. PiperOrigin-RevId: 341672786 Change-Id: I328a33e7b46d9cf24d7030dfb094cafe2d7908b9 --- tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index 8850f5330f9..a94d20b1d2a 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -294,6 +294,9 @@ void CreateConvertMlirToXlaHloPipeline( // with a tuple argument which break the assumption of resource lifting // inside PromoteResourcesToArgs. pm.addPass(mlir::mhlo::createLegalizeTFControlFlowPass()); + // The SCCP pass performs constant propagation across the IR, which, for + // example, propagates constant arguments into callee functions. + pm.addPass(mlir::createSCCPPass()); pm.addNestedPass(mlir::mhlo::createLegalizeTFPass( /*allow_partial_conversion=*/true, /*legalize_chlo=*/true, From 04fda3e0427349f94bfae4b05bf19f2335d7fa08 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 10 Nov 2020 12:52:30 -0800 Subject: [PATCH 124/220] DeviceInfo renamed into GpuInfo. PiperOrigin-RevId: 341678576 Change-Id: I0779af664434dc322fa9aa38f8edc3974d983e40 --- tensorflow/lite/delegates/gpu/metal/api.cc | 47 +++++++++---------- tensorflow/lite/delegates/gpu/metal/api.h | 2 +- .../lite/delegates/gpu/metal/device_info.cc | 42 +++++++---------- .../lite/delegates/gpu/metal/device_info.h | 25 ++++++---- .../lite/delegates/gpu/metal/kernels/conv.cc | 29 ++++++------ .../lite/delegates/gpu/metal/kernels/conv.h | 4 +- .../delegates/gpu/metal/kernels/conv_test.mm | 6 +-- .../gpu/metal/kernels/fully_connected.cc | 20 ++++---- .../gpu/metal/kernels/fully_connected.h | 2 +- .../delegates/gpu/metal/kernels/softmax.cc | 8 ++-- .../delegates/gpu/metal/kernels/softmax.h | 2 +- .../delegates/gpu/metal/kernels/test_util.mm | 4 +- .../gpu/metal/kernels/transpose_conv.cc | 24 +++++----- .../gpu/metal/kernels/transpose_conv.h | 8 ++-- .../lite/delegates/gpu/metal_delegate.mm | 6 +-- 15 files changed, 112 insertions(+), 117 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index f8fed600ed9..acb9ec79e17 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -69,13 +69,13 @@ std::vector SelectDepthWiseConv( std::vector SelectConvolutionTransposed( int id, ValueId input_id, ValueId output_id, - const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info, + const ConvolutionTransposedAttributes& attr, const GpuInfo& gpu_info, const metal::RuntimeOptions& options) { if (CheckConvolutionTransposed4x4Support(attr)) { - return ConvolutionTransposed4x4(id, input_id, output_id, attr, device_info, + return ConvolutionTransposed4x4(id, input_id, output_id, attr, gpu_info, options); } else { - return ConvolutionTransposed(id, input_id, output_id, attr, device_info, + return ConvolutionTransposed(id, input_id, output_id, attr, gpu_info, options); } } @@ -116,12 +116,13 @@ std::vector SelectReshape( } } -std::vector SelectSoftmax( - const GraphFloat32& graph, int id, ValueId input_id, ValueId output_id, - const DeviceInfo& device_info) { +std::vector SelectSoftmax(const GraphFloat32& graph, + int id, ValueId input_id, + ValueId output_id, + const GpuInfo& gpu_info) { const auto src_shape = graph.FindInputs(id)[0]->tensor.shape; if (src_shape.w == 1 && src_shape.h == 1) { - return Softmax1x1(id, input_id, output_id, device_info, src_shape.c); + return Softmax1x1(id, input_id, output_id, gpu_info, src_shape.c); } else { return Softmax(id, input_id, output_id, src_shape.c); } @@ -135,9 +136,9 @@ std::vector SelectSpaceToDepth( std::vector SelectWinograd4x4To36( int id, ValueId input_id, ValueId output_id, - const Winograd4x4To36Attributes& attr, const DeviceInfo& device_info, + const Winograd4x4To36Attributes& attr, const GpuInfo& gpu_info, const metal::RuntimeOptions& options) { - if (device_info.IsAppleGPU()) { + if (gpu_info.IsApple()) { return Winograd4x4To36(id, input_id, output_id, attr); } else { return Winograd4x4To36TileX6(id, input_id, output_id, attr, options); @@ -146,9 +147,9 @@ std::vector SelectWinograd4x4To36( std::vector SelectWinograd36To4x4( int id, ValueId input_id, ValueId output_id, - const Winograd36To4x4Attributes& attr, const DeviceInfo& device_info, + const Winograd36To4x4Attributes& attr, const GpuInfo& gpu_info, const metal::RuntimeOptions& options) { - if (device_info.IsAppleGPU()) { + if (gpu_info.IsApple()) { return Winograd36To4x4(id, input_id, output_id, options, attr); } else { return Winograd36To4x4Tile4x1(id, input_id, output_id, options, attr); @@ -176,7 +177,7 @@ bool IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes& attr, absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, const std::vector& inputs, const std::vector& outputs, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const RuntimeOptions& options, int* last_node_id, int* last_value_id, std::vector* tasks) { @@ -236,13 +237,12 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, (*last_node_id) += 1; int value_id = *last_value_id + 1; *tasks = SelectWinograd4x4To36(*last_node_id, inputs[0], value_id, - wino_up_attr, device_info, options); + wino_up_attr, gpu_info, options); BHWC conv_shape{dst_shape.b, 36, tiles_x * tiles_y, dst_shape.c}; (*last_node_id) += 1; - auto t1 = - ConvolutionWino4x4To6x6(*last_node_id, value_id, value_id + 1, - conv_shape, attr, device_info, options); + auto t1 = ConvolutionWino4x4To6x6(*last_node_id, value_id, value_id + 1, + conv_shape, attr, gpu_info, options); tasks->insert(tasks->end(), t1.begin(), t1.end()); Winograd36To4x4Attributes wino_down_attr; @@ -250,12 +250,12 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, wino_down_attr.biases = attr.bias; (*last_node_id) += 1; auto t2 = SelectWinograd36To4x4(*last_node_id, value_id + 1, outputs[0], - wino_down_attr, device_info, options); + wino_down_attr, gpu_info, options); tasks->insert(tasks->end(), t2.begin(), t2.end()); (*last_value_id) += 2; } else { *tasks = ConvolutionGeneric(node_id, inputs[0], outputs[0], dst_shape, - attr, device_info, options); + attr, gpu_info, options); } break; } @@ -264,7 +264,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, node_id, inputs[0], outputs[0], absl::any_cast( node->operation.attributes), - device_info, options); + gpu_info, options); break; case OperationType::DEPTHWISE_CONVOLUTION: if (graph.FindInputs(node->id).size() != 1) { @@ -282,7 +282,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, *tasks = FullyConnected( node_id, inputs[0], outputs[0], absl::any_cast(node->operation.attributes), - device_info, options); + gpu_info, options); break; case OperationType::MAX_UNPOOLING_2D: *tasks = MaxUnpooling( @@ -360,8 +360,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, return absl::UnimplementedError( "Softmax supports only CHANNELS dimension"); } - *tasks = - SelectSoftmax(graph, node_id, inputs[0], outputs[0], device_info); + *tasks = SelectSoftmax(graph, node_id, inputs[0], outputs[0], gpu_info); break; } case OperationType::SPACE_TO_DEPTH: @@ -437,7 +436,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, } // namespace -absl::Status Compile(const GraphFloat32& graph, const DeviceInfo& device_info, +absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info, const RuntimeOptions& options, CompiledModel* compiled_model) { int last_node_id = 0; @@ -462,7 +461,7 @@ absl::Status Compile(const GraphFloat32& graph, const DeviceInfo& device_info, RegisterCustomOps(graph, node, inputs, outputs, options, &tasks); if (!custom_status.ok()) { auto primary_status = - RegisterPrimaryOps(graph, node, inputs, outputs, device_info, options, + RegisterPrimaryOps(graph, node, inputs, outputs, gpu_info, options, &last_node_id, &last_value_id, &tasks); if (!primary_status.ok()) { return absl::UnimplementedError( diff --git a/tensorflow/lite/delegates/gpu/metal/api.h b/tensorflow/lite/delegates/gpu/metal/api.h index 407434c8257..09eb6511935 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.h +++ b/tensorflow/lite/delegates/gpu/metal/api.h @@ -27,7 +27,7 @@ namespace gpu { namespace metal { // Builds CompiledModel out of GraphFloat32 graph using provided RuntimeOptions. -absl::Status Compile(const GraphFloat32& graph, const DeviceInfo& device_info, +absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info, const RuntimeOptions& options, CompiledModel* compiled_model); diff --git a/tensorflow/lite/delegates/gpu/metal/device_info.cc b/tensorflow/lite/delegates/gpu/metal/device_info.cc index ef3dfff5fc9..250ca9b7ca2 100644 --- a/tensorflow/lite/delegates/gpu/metal/device_info.cc +++ b/tensorflow/lite/delegates/gpu/metal/device_info.cc @@ -22,18 +22,18 @@ namespace tflite { namespace gpu { namespace metal { namespace { -Vendor GetVendorFromString(const std::string& device_name) { - const std::map kMapping = { - {"Apple", Vendor::kApple}, - {"Intel", Vendor::kIntel}, - {"AMD", Vendor::kAMD}, +GpuVendor GetVendorFromString(const std::string& device_name) { + const std::map kMapping = { + {"Apple", GpuVendor::kApple}, + {"Intel", GpuVendor::kIntel}, + {"AMD", GpuVendor::kAMD}, }; - for (auto v : kMapping) { + for (const auto& v : kMapping) { if (device_name.find(v.first) != std::string::npos) { return v.second; } } - return Vendor::kUnknown; + return GpuVendor::kUnknown; } } // namespace @@ -108,43 +108,37 @@ int AppleGPUInfo::GetComputeUnitsCount() const { } } -DeviceInfo::DeviceInfo(const std::string& device_name) +GpuInfo::GpuInfo(const std::string& device_name) : vendor(GetVendorFromString(device_name)) { - if (vendor == Vendor::kApple) { + if (vendor == GpuVendor::kApple) { apple_info = AppleGPUInfo(device_name); } } -bool DeviceInfo::IsIntelGPU() const { - return vendor == Vendor::kIntel; -} +bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; } -bool DeviceInfo::IsAppleGPU() const { - return vendor == Vendor::kApple; -} +bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; } -bool DeviceInfo::IsAMDGPU() const { - return vendor == Vendor::kAMD; -} +bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; } -bool DeviceInfo::IsRoundToNearestSupported() const { - if (vendor == Vendor::kApple) { +bool GpuInfo::IsRoundToNearestSupported() const { + if (vendor == GpuVendor::kApple) { return apple_info.IsRoundToNearestSupported(); } else { return true; } } -bool DeviceInfo::IsWaveSizeEqualTo32() const { - if (vendor == Vendor::kApple) { +bool GpuInfo::IsWaveSizeEqualTo32() const { + if (vendor == GpuVendor::kApple) { return apple_info.IsWaveSizeEqualTo32(); } else { return false; } } -int DeviceInfo::GetComputeUnitsCount() const { - if (vendor == Vendor::kApple) { +int GpuInfo::GetComputeUnitsCount() const { + if (vendor == GpuVendor::kApple) { return apple_info.GetComputeUnitsCount(); } else { return 1; diff --git a/tensorflow/lite/delegates/gpu/metal/device_info.h b/tensorflow/lite/delegates/gpu/metal/device_info.h index 6211d248e35..f77d6950167 100644 --- a/tensorflow/lite/delegates/gpu/metal/device_info.h +++ b/tensorflow/lite/delegates/gpu/metal/device_info.h @@ -22,11 +22,16 @@ namespace tflite { namespace gpu { namespace metal { -enum class Vendor { - kUnknown, +// The VendorID returned by the GPU driver. +enum class GpuVendor { kApple, - kIntel, + kQualcomm, + kMali, + kPowerVR, + kNvidia, kAMD, + kIntel, + kUnknown }; enum class AppleGPU { @@ -64,17 +69,17 @@ struct AppleGPUInfo { int GetComputeUnitsCount() const; }; -struct DeviceInfo { - DeviceInfo() = default; - explicit DeviceInfo(const std::string& device_name); +struct GpuInfo { + GpuInfo() = default; + explicit GpuInfo(const std::string& device_name); - Vendor vendor; + GpuVendor vendor = GpuVendor::kUnknown; AppleGPUInfo apple_info; - bool IsIntelGPU() const; - bool IsAppleGPU() const; - bool IsAMDGPU() const; + bool IsIntel() const; + bool IsApple() const; + bool IsAMD() const; // floating point rounding mode bool IsRoundToNearestSupported() const; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc b/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc index 401089f9fa1..136609ce3da 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc @@ -980,19 +980,18 @@ ConvParams GetConvParamsForAMD(const Convolution2DAttributes& attr, return params; } -ConvParams GetConvParams(const DeviceInfo& device_info, +ConvParams GetConvParams(const GpuInfo& gpu_info, const Convolution2DAttributes& attr, const RuntimeOptions& options, const BHWC& dst_shape) { - if (device_info.IsAppleGPU()) { - if (device_info.apple_info.IsLocalMemoryPreferredOverGlobal()) { - return GetConvParamsForA7A8(device_info.apple_info, attr, dst_shape); + if (gpu_info.IsApple()) { + if (gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal()) { + return GetConvParamsForA7A8(gpu_info.apple_info, attr, dst_shape); } else { - return GetConvParamsForA9AndHigher(device_info.apple_info, attr, - dst_shape); + return GetConvParamsForA9AndHigher(gpu_info.apple_info, attr, dst_shape); } - } else if (device_info.IsIntelGPU()) { + } else if (gpu_info.IsIntel()) { return GetConvParamsForIntel(attr, options, dst_shape); - } else if (device_info.IsAMDGPU()) { + } else if (gpu_info.IsAMD()) { return GetConvParamsForAMD(attr, options, dst_shape); } else { ConvParams params; @@ -1048,9 +1047,9 @@ std::pair GetDispatchSizes(const ConvParams& params, std::vector ConvolutionGeneric( int id, ValueId input_id, ValueId output_id, const BHWC& dst_shape, - const Convolution2DAttributes& attr, const DeviceInfo& device_info, + const Convolution2DAttributes& attr, const GpuInfo& gpu_info, const metal::RuntimeOptions& options) { - ConvParams params = GetConvParams(device_info, attr, options, dst_shape); + ConvParams params = GetConvParams(gpu_info, attr, options, dst_shape); auto desc = std::make_shared(); desc->id = id; @@ -1103,7 +1102,7 @@ std::vector ConvolutionGeneric( std::vector ConvolutionWino4x4To6x6( int id, ValueId input_id, ValueId output_id, const BHWC& dst_shape, - const Convolution2DAttributes& attr, const DeviceInfo& device_info, + const Convolution2DAttributes& attr, const GpuInfo& gpu_info, const RuntimeOptions& options) { const int dst_slices = DivideRoundUp(attr.weights.shape.o, 4); ConvParams params; @@ -1116,9 +1115,9 @@ std::vector ConvolutionWino4x4To6x6( params.different_weights_for_height = true; params.x_kernel_is_1 = true; params.y_kernel_is_1 = true; - if (device_info.IsAppleGPU()) { + if (gpu_info.IsApple()) { params.weight_layout = WeightsInnerBlockLayout::O4I4; - if (device_info.apple_info.IsLocalMemoryPreferredOverGlobal()) { + if (gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal()) { params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS; params.work_group_size = int3(32, 1, 1); params.block_size = int3(4, 1, 4); @@ -1127,12 +1126,12 @@ std::vector ConvolutionWino4x4To6x6( params.work_group_size = int3(8, 4, 1); params.block_size = int3(4, 1, 4); } - } else if (device_info.IsIntelGPU()) { + } else if (gpu_info.IsIntel()) { params.weight_layout = WeightsInnerBlockLayout::I4O4; params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD8_BROADCAST; params.work_group_size = int3(16, 1, 1); params.block_size = int3(1, 1, 4); - } else if (device_info.IsAMDGPU()) { + } else if (gpu_info.IsAMD()) { params.weight_layout = WeightsInnerBlockLayout::I4O4; params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; params.work_group_size = int3(32, 1, 1); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv.h b/tensorflow/lite/delegates/gpu/metal/kernels/conv.h index 1e45323e609..41875478309 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/conv.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv.h @@ -30,12 +30,12 @@ namespace metal { std::vector ConvolutionGeneric( int id, ValueId input_id, ValueId output_id, const BHWC& dst_shape, - const Convolution2DAttributes& attr, const DeviceInfo& device_info, + const Convolution2DAttributes& attr, const GpuInfo& gpu_info, const RuntimeOptions& options); std::vector ConvolutionWino4x4To6x6( int id, ValueId input_id, ValueId output_id, const BHWC& dst_shape, - const Convolution2DAttributes& attr, const DeviceInfo& device_info, + const Convolution2DAttributes& attr, const GpuInfo& gpu_info, const RuntimeOptions& options); } // namespace metal diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm index fc9e0157ac0..7842412f1f9 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm +++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm @@ -297,8 +297,8 @@ using ::tflite::gpu::metal::SingleOpModel; outputs_v0[1].data.resize(dst_shape.DimensionsProduct()); std::string device_name = std::string([[device name] UTF8String]); - tflite::gpu::metal::DeviceInfo device_info(device_name); - auto tasks_v0 = ConvolutionGeneric(0, 0, 1, dst_shape, attr, device_info, options); + tflite::gpu::metal::GpuInfo gpu_info(device_name); + auto tasks_v0 = ConvolutionGeneric(0, 0, 1, dst_shape, attr, gpu_info, options); auto status = RunGraph(tasks_v0, device, inputs_v0, &outputs_v0); XCTAssertTrue(status.ok(), @"%s", status.error_message().c_str()); @@ -313,7 +313,7 @@ using ::tflite::gpu::metal::SingleOpModel; wino_up_attr.padding = attr.padding; auto tasks_v1 = tflite::gpu::metal::Winograd4x4To36(0, 0, 2, wino_up_attr); - auto tasks_v2 = ConvolutionWino4x4To6x6(1, 2, 3, conv_shape, attr, device_info, options); + auto tasks_v2 = ConvolutionWino4x4To6x6(1, 2, 3, conv_shape, attr, gpu_info, options); tflite::gpu::metal::Winograd36To4x4Attributes wino_down_attr; wino_down_attr.output_shape = dst_shape; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc index 00754b09dd4..79aee9493ce 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc @@ -38,12 +38,11 @@ namespace gpu { namespace metal { namespace { -std::string GetFullyConnectedCode(const DeviceInfo& device_info, - int src_channels, int dst_channels) { - bool shared_memory = - device_info.IsAppleGPU() && - device_info.apple_info.IsLocalMemoryPreferredOverGlobal(); - const std::string barrier = device_info.IsWaveSizeEqualTo32() +std::string GetFullyConnectedCode(const GpuInfo& gpu_info, int src_channels, + int dst_channels) { + bool shared_memory = gpu_info.IsApple() && + gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal(); + const std::string barrier = gpu_info.IsWaveSizeEqualTo32() ? "SIMDGROUP_BARRIER" : "threadgroup_barrier"; const int src_depth = DivideRoundUp(src_channels, 4); @@ -118,12 +117,12 @@ std::string GetFullyConnectedCode(const DeviceInfo& device_info, std::vector FullyConnected( int id, ValueId input_id, ValueId output_id, - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const RuntimeOptions& options) { auto desc = std::make_shared(); desc->id = id; desc->is_linkable = false; - desc->shader_source = GetFullyConnectedCode(device_info, attr.weights.shape.i, + desc->shader_source = GetFullyConnectedCode(gpu_info, attr.weights.shape.i, attr.weights.shape.o); desc->args.AddInt("dst_channels", attr.weights.shape.o); @@ -141,9 +140,8 @@ std::vector FullyConnected( return CalculateOutputShape(buffers.find(input_id)->second, attr); }}; - bool shared_memory = - device_info.IsAppleGPU() && - device_info.apple_info.IsLocalMemoryPreferredOverGlobal(); + bool shared_memory = gpu_info.IsApple() && + gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal(); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); const int src_depth_aligned = AlignByN(src_depth, shared_memory ? 32 : 4); const int dst_channels_aligned = AlignByN(attr.weights.shape.o, 8); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h index 87e31a7dc33..769ffa36550 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h @@ -34,7 +34,7 @@ namespace metal { // will be inefficient std::vector FullyConnected( int id, ValueId input_id, ValueId output_id, - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const RuntimeOptions& options); } // namespace metal diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc index 1fda7cfafa1..3dc5bca0007 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc @@ -32,8 +32,8 @@ namespace tflite { namespace gpu { namespace metal { namespace { -std::string GetSoftmax1x1Code(const DeviceInfo& device_info) { - const std::string barrier = device_info.IsWaveSizeEqualTo32() +std::string GetSoftmax1x1Code(const GpuInfo& gpu_info) { + const std::string barrier = gpu_info.IsWaveSizeEqualTo32() ? "SIMDGROUP_BARRIER" : "threadgroup_barrier"; std::string code = R"( @@ -179,12 +179,12 @@ std::vector Softmax(int id, ValueId input_id, std::vector Softmax1x1(int id, ValueId input_id, ValueId output_id, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, int channels_count) { auto desc = std::make_shared(); desc->id = id; desc->is_linkable = false; - desc->shader_source = GetSoftmax1x1Code(device_info); + desc->shader_source = GetSoftmax1x1Code(gpu_info); desc->input_buffers = { {input_id, "device FLT4* const src_buffer"}, diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h index 4a5ccdeba49..81f45d909ef 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h @@ -36,7 +36,7 @@ std::vector Softmax(int id, ValueId input_id, // We have this case in MobilenetV1/V2. std::vector Softmax1x1(int id, ValueId input_id, ValueId output_id, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, int channels_count); } // namespace metal diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm index 4458e81e71c..d7c0507ac1d 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm +++ b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm @@ -80,12 +80,12 @@ absl::Status SingleOpModel::Invoke() { id device = MTLCreateSystemDefaultDevice(); std::string device_name = std::string([[device name] UTF8String]); - DeviceInfo device_info(device_name); + GpuInfo gpu_info(device_name); RuntimeOptions options; options.storage_precision = RuntimeOptions::Precision::FP32; options.accumulator_precision = RuntimeOptions::Precision::FP32; CompiledModel compiled_model; - RETURN_IF_ERROR(Compile(graph_, device_info, options, &compiled_model)); + RETURN_IF_ERROR(Compile(graph_, gpu_info, options, &compiled_model)); CompiledModel optimized_model; RETURN_IF_ERROR(ValidateOptimizeModel(input_ids, output_ids, compiled_model, &optimized_model)); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc index 3bea0c5d8e2..fcf06c4ae14 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc @@ -276,15 +276,15 @@ std::string GetDeconvolutionShared(const ConvolutionTransposedAttributes& attr, } std::string GetDeconvolution4x4(const int2& block_size, - const DeviceInfo& device_info) { + const GpuInfo& gpu_info) { bool use_local_mem = false; - if (device_info.IsAppleGPU() && device_info.apple_info.IsBionic()) { + if (gpu_info.IsApple() && gpu_info.apple_info.IsBionic()) { use_local_mem = true; } - if (device_info.IsIntelGPU()) { + if (gpu_info.IsIntel()) { use_local_mem = true; } - const std::string barrier = device_info.IsWaveSizeEqualTo32() + const std::string barrier = gpu_info.IsWaveSizeEqualTo32() ? "SIMDGROUP_BARRIER" : "threadgroup_barrier"; std::string c = R"( @@ -454,8 +454,8 @@ std::string GetDeconvolution4x4(const int2& block_size, std::vector ConvolutionTransposed( int id, ValueId input_id, ValueId output_id, - const ConvolutionTransposedAttributes& params, - const DeviceInfo& device_info, const RuntimeOptions& options) { + const ConvolutionTransposedAttributes& params, const GpuInfo& gpu_info, + const RuntimeOptions& options) { auto desc = std::make_shared(); desc->id = id; desc->is_linkable = false; @@ -468,7 +468,7 @@ std::vector ConvolutionTransposed( const int shared_size = sizeof(float) * 4 * src_depth * src_local_size_x * src_local_size_y; if (shared_size < 1000 * 16 && - device_info.apple_info.IsLocalMemoryPreferredOverGlobal()) { + gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal()) { desc->shader_source = GetDeconvolutionShared(params, kThreadGroupWidth, kThreadGroupHeight); } else { @@ -554,8 +554,8 @@ std::vector ConvolutionTransposed( std::vector ConvolutionTransposed4x4( int id, ValueId input_id, ValueId output_id, - const ConvolutionTransposedAttributes& params, - const DeviceInfo& device_info, const RuntimeOptions& options) { + const ConvolutionTransposedAttributes& params, const GpuInfo& gpu_info, + const RuntimeOptions& options) { const int src_depth = DivideRoundUp(params.weights.shape.i, 4); const int dst_depth = DivideRoundUp(params.weights.shape.o, 4); const int kernel_x = 4; @@ -609,8 +609,8 @@ std::vector ConvolutionTransposed4x4( desc->is_linkable = false; bool recommended_2x = false; - if (device_info.IsAppleGPU()) { - if (device_info.apple_info.IsBionic() && + if (gpu_info.IsApple()) { + if (gpu_info.apple_info.IsBionic() && options.storage_precision == RuntimeOptions::Precision::FP16) { recommended_2x = true; } @@ -621,7 +621,7 @@ std::vector ConvolutionTransposed4x4( } const int2 block_size(recommended_2x ? 2 : 1, 1); - desc->shader_source = GetDeconvolution4x4(block_size, device_info); + desc->shader_source = GetDeconvolution4x4(block_size, gpu_info); desc->input_buffers = { {input_id, "device FLT4* const src_buffer"}, diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h index 8e798c74eb4..5a4410d9ba3 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h @@ -30,13 +30,13 @@ namespace metal { std::vector ConvolutionTransposed( int id, ValueId input_id, ValueId output_id, - const ConvolutionTransposedAttributes& params, - const DeviceInfo& device_info, const RuntimeOptions& options); + const ConvolutionTransposedAttributes& params, const GpuInfo& gpu_info, + const RuntimeOptions& options); std::vector ConvolutionTransposed4x4( int id, ValueId input_id, ValueId output_id, - const ConvolutionTransposedAttributes& params, - const DeviceInfo& device_info, const RuntimeOptions& options); + const ConvolutionTransposedAttributes& params, const GpuInfo& gpu_info, + const RuntimeOptions& options); bool CheckConvolutionTransposed4x4Support( const ConvolutionTransposedAttributes& attr); diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.mm b/tensorflow/lite/delegates/gpu/metal_delegate.mm index 933a95d3a10..b4a8b91499f 100644 --- a/tensorflow/lite/delegates/gpu/metal_delegate.mm +++ b/tensorflow/lite/delegates/gpu/metal_delegate.mm @@ -338,13 +338,13 @@ class Delegate { } std::string device_name = std::string([[metal_device_ name] UTF8String]); - DeviceInfo device_info(device_name); + GpuInfo gpu_info(device_name); size_t storage_type_size; RuntimeOptions runtime_options; if (options_.allow_precision_loss) { storage_type_size = sizeof(HalfBits); runtime_options.storage_precision = RuntimeOptions::Precision::FP16; - if (device_info.IsRoundToNearestSupported()) { + if (gpu_info.IsRoundToNearestSupported()) { runtime_options.accumulator_precision = RuntimeOptions::Precision::FP16; } else { runtime_options.accumulator_precision = RuntimeOptions::Precision::FP32; @@ -437,7 +437,7 @@ class Delegate { // TODO(impjdi): Merge these. CompiledModel compiled_model; - RETURN_IF_ERROR(Compile(graph, device_info, runtime_options, &compiled_model)); + RETURN_IF_ERROR(Compile(graph, gpu_info, runtime_options, &compiled_model)); CompiledModel optimized_model; RETURN_IF_ERROR(ValidateOptimizeModel(input_ids, output_ids, compiled_model, &optimized_model)); From bc041671ef5e50cfa378b7bdf6bcf21136cd5cea Mon Sep 17 00:00:00 2001 From: Hye Soo Yang Date: Tue, 10 Nov 2020 12:53:56 -0800 Subject: [PATCH 125/220] Fix for tf.image NMS regression. PiperOrigin-RevId: 341678843 Change-Id: I5ce82a2e6e1a17505cebbccbb639ac2c87d2aacf --- tensorflow/core/kernels/image/non_max_suppression_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/image/non_max_suppression_op.cc b/tensorflow/core/kernels/image/non_max_suppression_op.cc index 4175d5d56dd..1c4166058fb 100644 --- a/tensorflow/core/kernels/image/non_max_suppression_op.cc +++ b/tensorflow/core/kernels/image/non_max_suppression_op.cc @@ -228,7 +228,7 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores, next_candidate.score *= suppress_weight(similarity); // First decide whether to perform hard suppression - if (similarity >= static_cast(similarity_threshold)) { + if (similarity > static_cast(similarity_threshold)) { should_hard_suppress = true; break; } From bccd445da9e75a9f196e52a8a9d1b388598851a4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 13:03:22 -0800 Subject: [PATCH 126/220] Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff --- tensorflow/core/BUILD | 4 ---- tensorflow/core/lib/monitoring/BUILD | 14 ++++++++++---- tensorflow/core/lib/monitoring/counter.h | 3 +++ tensorflow/core/lib/monitoring/gauge.h | 3 +++ tensorflow/core/lib/monitoring/mobile_counter.h | 8 ++++++++ tensorflow/core/lib/monitoring/mobile_gauge.h | 8 ++++++++ .../lib/monitoring/mobile_percentile_sampler.h | 11 +++++++++++ tensorflow/core/lib/monitoring/mobile_sampler.h | 8 ++++++++ .../core/lib/monitoring/percentile_sampler.h | 4 ++++ tensorflow/core/lib/monitoring/sampler.h | 3 +++ 10 files changed, 58 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 35bdbce34aa..7c3c5c01472 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1343,10 +1343,6 @@ cc_library( "//tensorflow/core/lib/monitoring:counter", "//tensorflow/core/lib/monitoring:gauge", "//tensorflow/core/lib/monitoring:metric_def", - "//tensorflow/core/lib/monitoring:mobile_counter", - "//tensorflow/core/lib/monitoring:mobile_gauge", - "//tensorflow/core/lib/monitoring:mobile_percentile_sampler", - "//tensorflow/core/lib/monitoring:mobile_sampler", "//tensorflow/core/lib/monitoring:percentile_sampler", "//tensorflow/core/lib/monitoring:sampler", "//tensorflow/core/lib/monitoring:timed", diff --git a/tensorflow/core/lib/monitoring/BUILD b/tensorflow/core/lib/monitoring/BUILD index fc1ab3fb59b..f4f88b4eca7 100644 --- a/tensorflow/core/lib/monitoring/BUILD +++ b/tensorflow/core/lib/monitoring/BUILD @@ -113,7 +113,8 @@ cc_library( cc_library( name = "mobile_counter", - hdrs = ["mobile_counter.h"], + textual_hdrs = ["mobile_counter.h"], + visibility = ["//visibility:private"], deps = [ "//tensorflow/core/lib/core:status", "//tensorflow/core/platform:macros", @@ -123,7 +124,8 @@ cc_library( cc_library( name = "mobile_gauge", - hdrs = ["mobile_gauge.h"], + textual_hdrs = ["mobile_gauge.h"], + visibility = ["//visibility:private"], deps = [ "//tensorflow/core/lib/core:status", "//tensorflow/core/platform:macros", @@ -133,7 +135,8 @@ cc_library( cc_library( name = "mobile_sampler", - hdrs = ["mobile_sampler.h"], + textual_hdrs = ["mobile_sampler.h"], + visibility = ["//visibility:private"], deps = [ ":metric_def", "//tensorflow/core/framework:summary_proto_cc", @@ -163,7 +166,8 @@ cc_library( cc_library( name = "mobile_percentile_sampler", - hdrs = ["mobile_percentile_sampler.h"], + textual_hdrs = ["mobile_percentile_sampler.h"], + visibility = ["//visibility:private"], deps = [ ":collection_registry", ":metric_def", @@ -200,7 +204,9 @@ filegroup( "metric_def.h", "mobile_counter.h", "mobile_gauge.h", + "mobile_percentile_sampler.h", "mobile_sampler.h", + "percentile_sampler.h", "sampler.h", "timed.h", "types.h", diff --git a/tensorflow/core/lib/monitoring/counter.h b/tensorflow/core/lib/monitoring/counter.h index 19cc7c29221..084c35b1c72 100644 --- a/tensorflow/core/lib/monitoring/counter.h +++ b/tensorflow/core/lib/monitoring/counter.h @@ -24,7 +24,10 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_COUNTER_H // prevent accidental use of + // mobile_counter.h #include "tensorflow/core/lib/monitoring/mobile_counter.h" +#undef TENSORFLOW_INCLUDED_FROM_COUNTER_H #else #include diff --git a/tensorflow/core/lib/monitoring/gauge.h b/tensorflow/core/lib/monitoring/gauge.h index 0aa47ad1f88..221ab73f8e3 100644 --- a/tensorflow/core/lib/monitoring/gauge.h +++ b/tensorflow/core/lib/monitoring/gauge.h @@ -24,7 +24,10 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_GAUGE_H // prevent accidental use of + // mobile_gauge.h #include "tensorflow/core/lib/monitoring/mobile_gauge.h" +#undef TENSORFLOW_INCLUDED_FROM_GAUGE_H #else #include diff --git a/tensorflow/core/lib/monitoring/mobile_counter.h b/tensorflow/core/lib/monitoring/mobile_counter.h index db46072a3ee..a7f5db6b94a 100644 --- a/tensorflow/core/lib/monitoring/mobile_counter.h +++ b/tensorflow/core/lib/monitoring/mobile_counter.h @@ -18,6 +18,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_COUNTER_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_COUNTER_H_ +#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_COUNTER_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_counter.h directly; use counter.h instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_COUNTER_H) + #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/lib/monitoring/mobile_gauge.h b/tensorflow/core/lib/monitoring/mobile_gauge.h index 0f75b54f676..264e6c927a5 100644 --- a/tensorflow/core/lib/monitoring/mobile_gauge.h +++ b/tensorflow/core/lib/monitoring/mobile_gauge.h @@ -18,6 +18,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_ +#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_GAUGE_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_gauge.h directly; use gauge.h instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_GAUGE_H) + #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h index 914a3841ab2..e1211d8fbfa 100644 --- a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h +++ b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h @@ -13,9 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// Null implementation of the PercentileSampler metric for mobile platforms. + #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_PERCENTILE_SAMPLER_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_PERCENTILE_SAMPLER_H_ +#if !defined(IS_MOBILE_PLATFORM) || \ + !defined(TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_percentile_sampler.h directly; use percetile_sampler.h instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H) + #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/monitoring/collection_registry.h" #include "tensorflow/core/lib/monitoring/metric_def.h" diff --git a/tensorflow/core/lib/monitoring/mobile_sampler.h b/tensorflow/core/lib/monitoring/mobile_sampler.h index 5233f0ff472..71851c30511 100644 --- a/tensorflow/core/lib/monitoring/mobile_sampler.h +++ b/tensorflow/core/lib/monitoring/mobile_sampler.h @@ -18,6 +18,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_ +#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_SAMPLER_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_sampler.h directly; use sampler.h to include it instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_SAMPLER_H) + #include #include "tensorflow/core/framework/summary.pb.h" diff --git a/tensorflow/core/lib/monitoring/percentile_sampler.h b/tensorflow/core/lib/monitoring/percentile_sampler.h index ddedf497557..31bc6837c19 100644 --- a/tensorflow/core/lib/monitoring/percentile_sampler.h +++ b/tensorflow/core/lib/monitoring/percentile_sampler.h @@ -24,7 +24,11 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H // prevent accidental use + // of +// mobile_percentile_sampler.h #include "tensorflow/core/lib/monitoring/mobile_percentile_sampler.h" +#undef TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H #else #include diff --git a/tensorflow/core/lib/monitoring/sampler.h b/tensorflow/core/lib/monitoring/sampler.h index 2deaf54d2ad..0c3399757fe 100644 --- a/tensorflow/core/lib/monitoring/sampler.h +++ b/tensorflow/core/lib/monitoring/sampler.h @@ -24,7 +24,10 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_SAMPLER_H // prevent accidental use of + // mobile_sampler.h #include "tensorflow/core/lib/monitoring/mobile_sampler.h" +#undef TENSORFLOW_INCLUDED_FROM_SAMPLER_H #else #include From 916f71f7f6c5c734be9af30c756f148ad8c2c02a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 13:50:18 -0800 Subject: [PATCH 127/220] [TF:TRT] Use ModelConfig to simplify APIs in model testing. PiperOrigin-RevId: 341689979 Change-Id: I079946d57267553569773111e046d98540eb35c8 --- .../tensorrt/model_tests/model_handler.py | 82 ++++++++++--------- .../tensorrt/model_tests/run_models.py | 29 +++---- 2 files changed, 59 insertions(+), 52 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py index c23337ff3f5..7db998e54f7 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py +++ b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py @@ -39,10 +39,6 @@ from tensorflow.python.saved_model import loader as saved_model_loader from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants -DEFAULT_SAVED_MODEL_TAGS = (tag_constants.SERVING,) -DEFAULT_SAVED_MODEL_SIGNATURE_KEY = ( - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) - # pylint: disable=bad-whitespace ### Helper Functions @@ -112,29 +108,40 @@ class TestResult( trt_convert_params) +class ModelConfig( + collections.namedtuple("ModelConfig", [ + "saved_model_dir", "saved_model_tags", "saved_model_signature_key", + "default_batch_size" + ])): + """Configurations for test models.""" + + def __new__(cls, + saved_model_dir: str, + saved_model_tags: Sequence[str] = (tag_constants.SERVING,), + saved_model_signature_key: str = ( + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY), + default_batch_size: int = 1): + return super(ModelConfig, + cls).__new__(cls, saved_model_dir, saved_model_tags, + saved_model_signature_key, default_batch_size) + + class _ModelHandlerBase(metaclass=abc.ABCMeta): """Base class for running a model.""" - def __init__( - self, - *, - saved_model_dir: str, - saved_model_tags: Sequence[str] = DEFAULT_SAVED_MODEL_TAGS, - saved_model_signature_key: str = DEFAULT_SAVED_MODEL_SIGNATURE_KEY): - self._saved_model_dir = saved_model_dir - self._saved_model_tags = saved_model_tags - self._saved_model_signature_key = saved_model_signature_key + def __init__(self, model_config: ModelConfig): + self._model_config = model_config def __str__(self) -> str: - return "Directory: {}; Tags: {}; Signature: {}".format( - self._saved_model_dir, - self._saved_model_tags, - self._saved_model_signature_key, - ) + return str(self._model_config) def __repr__(self) -> str: return "{}({})".format(self.__class__.__name__, str(self)) + @property + def model_config(self) -> ModelConfig: + return self._model_config + @property def input_tensort_names(self) -> Sequence[str]: """Names of input tensors.""" @@ -176,18 +183,19 @@ class ModelHandlerV1(_ModelHandlerBase): @property def meta_graph(self) -> meta_graph_pb2.MetaGraphDef: return load_meta_graph( - saved_model_dir=self._saved_model_dir, - saved_model_tags=self._saved_model_tags, - saved_model_signature_key=self._saved_model_signature_key) + saved_model_dir=self.model_config.saved_model_dir, + saved_model_tags=self.model_config.saved_model_tags, + saved_model_signature_key=self.model_config.saved_model_signature_key) @property def input_tensor_info(self) -> Mapping[str, meta_graph_pb2.TensorInfo]: - return self.meta_graph.signature_def[self._saved_model_signature_key].inputs + return self.meta_graph.signature_def[ + self.model_config.saved_model_signature_key].inputs @property def output_tensor_info(self) -> Mapping[str, meta_graph_pb2.TensorInfo]: return self.meta_graph.signature_def[ - self._saved_model_signature_key].outputs + self.model_config.saved_model_signature_key].outputs @property def input_tensort_names(self) -> Sequence[str]: @@ -200,6 +208,7 @@ class ModelHandlerV1(_ModelHandlerBase): def generate_random_inputs(self, batch_size: Optional[int] = None ) -> Mapping[str, np.ndarray]: + batch_size = batch_size or self.model_config.default_batch_size return { tensor_info.name: _generate_random_tensor_v1(tensor_info, batch_size) for tensor_info in self.input_tensor_info.values() @@ -225,7 +234,7 @@ class ModelHandlerV1(_ModelHandlerBase): outputs = sess.run(fetches=self.output_tensor_names, feed_dict=inputs) latency.append(time.time() - before) except Exception as exc: - raise RuntimeError("Failed to run model inference!" + raise RuntimeError("Failed to run model inference! " "Model information: {}".format(str(self))) from exc outputs = dict(zip(self.output_tensor_names, outputs)) return TestResult(latency=latency, outputs=outputs if inputs else None) @@ -236,21 +245,15 @@ class _TrtModelHandlerBase(_ModelHandlerBase): def __init__( self, - *, + model_config: ModelConfig, trt_convert_params: trt.TrtConversionParams, - saved_model_dir: str, - saved_model_tags: Sequence[str] = DEFAULT_SAVED_MODEL_TAGS, - saved_model_signature_key: str = DEFAULT_SAVED_MODEL_SIGNATURE_KEY): - super(_TrtModelHandlerBase, self).__init__( - saved_model_dir=saved_model_dir, - saved_model_tags=saved_model_tags, - saved_model_signature_key=saved_model_signature_key) + ): + super(_TrtModelHandlerBase, self).__init__(model_config) + self._trt_convert_params = trt_convert_params self._converter = self._create_converter(trt_convert_params) logging.info("Converting to TensorRT!") self._check_conversion(self._converter.convert()) - - self._trt_convert_params = trt_convert_params self._conversion_is_saved = False @abc.abstractmethod @@ -278,12 +281,14 @@ class _TrtModelHandlerBase(_ModelHandlerBase): def save(self, output_saved_model_dir: Optional[str] = None, overwrite=True) -> None: + """Saves a TensorRT converted model.""" if self._conversion_is_saved and not overwrite: return output_saved_model_dir = output_saved_model_dir or tempfile.mkdtemp() logging.info("Saving TensorRT model to %s!", output_saved_model_dir) self._converter.save(output_saved_model_dir) - self._saved_model_dir = output_saved_model_dir + self._model_config = self.model_config._replace( + saved_model_dir=output_saved_model_dir) self._conversion_is_saved = True @@ -293,9 +298,10 @@ class TrtModelHandlerV1(_TrtModelHandlerBase, ModelHandlerV1): def _create_converter(self, trt_convert_params: trt.TrtConversionParams): conversion_nodes_denylist = self.output_tensor_names return trt.TrtGraphConverter( - input_saved_model_dir=self._saved_model_dir, - input_saved_model_tags=self._saved_model_tags, - input_saved_model_signature_key=self._saved_model_signature_key, + input_saved_model_dir=self.model_config.saved_model_dir, + input_saved_model_tags=self.model_config.saved_model_tags, + input_saved_model_signature_key=( + self.model_config.saved_model_signature_key), nodes_denylist=conversion_nodes_denylist, max_batch_size=trt_convert_params.max_batch_size, max_workspace_size_bytes=trt_convert_params.max_workspace_size_bytes, diff --git a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py index 0e930130403..a43821d2d6f 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py +++ b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py @@ -33,27 +33,28 @@ def _get_mean_latency(result: model_handler.TestResult): def run_all_tests(): """Runs all sample model with TensorRT FP32/FP16 and reports latency.""" - # The model_configs contains (saved_model_dir, batch_size) for each model - model_configs = ((platform_test.test_src_dir_path( - "python/compiler/tensorrt/model_tests/sample_model"), 128),) + model_configs = (model_handler.ModelConfig( + saved_model_dir=platform_test.test_src_dir_path( + "python/compiler/tensorrt/model_tests/sample_model"), + default_batch_size=128),) model_handler_cls = model_handler.ModelHandlerV1 trt_model_handeler_cls = model_handler.TrtModelHandlerV1 default_trt_convert_params = DEFAUL_TRT_CONVERT_PARAMS._replace( is_dynamic_op=False) - for saved_model_dir, batch_size in model_configs: - base_model = model_handler_cls(saved_model_dir=saved_model_dir) - random_inputs = base_model.generate_random_inputs(batch_size) + for model_config in model_configs: + trt_convert_params = default_trt_convert_params._replace( + max_batch_size=model_config.default_batch_size) + base_model = model_handler_cls(model_config) + random_inputs = base_model.generate_random_inputs() base_model_result = base_model.run(random_inputs) trt_fp32_model_result = trt_model_handeler_cls( - saved_model_dir=saved_model_dir, - trt_convert_params=default_trt_convert_params._replace( - precision_mode=trt.TrtPrecisionMode.FP32, - max_batch_size=batch_size)).run(random_inputs) + model_config=model_config, + trt_convert_params=trt_convert_params._replace( + precision_mode=trt.TrtPrecisionMode.FP32)).run(random_inputs) trt_fp16_model_result = trt_model_handeler_cls( - saved_model_dir=saved_model_dir, - trt_convert_params=default_trt_convert_params._replace( - precision_mode=trt.TrtPrecisionMode.FP16, - max_batch_size=batch_size)).run(random_inputs) + model_config=model_config, + trt_convert_params=trt_convert_params._replace( + precision_mode=trt.TrtPrecisionMode.FP16)).run(random_inputs) logging.info("Base model latency: %f ms", _get_mean_latency(base_model_result)) From 83a51f49fa6fce64035d4960f74fdf366c9dee3c Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 10 Nov 2020 14:00:34 -0800 Subject: [PATCH 128/220] DeviceInfo renamed into GpuInfo. PiperOrigin-RevId: 341692138 Change-Id: I129db695d3cabaa423059822ecaf75111e98c878 --- .../lite/delegates/gpu/cl/cl_arguments.cc | 14 +- .../lite/delegates/gpu/cl/cl_arguments.h | 4 +- .../delegates/gpu/cl/cl_arguments_test.cc | 9 +- .../lite/delegates/gpu/cl/cl_command_queue.cc | 8 +- .../lite/delegates/gpu/cl/cl_command_queue.h | 2 +- .../lite/delegates/gpu/cl/cl_context.cc | 2 +- tensorflow/lite/delegates/gpu/cl/cl_device.cc | 6 +- tensorflow/lite/delegates/gpu/cl/cl_device.h | 4 +- .../lite/delegates/gpu/cl/device_info.cc | 28 ++-- .../lite/delegates/gpu/cl/device_info.h | 4 +- .../lite/delegates/gpu/cl/environment.cc | 4 +- .../lite/delegates/gpu/cl/environment.h | 4 +- .../delegates/gpu/cl/inference_context.cc | 18 +-- .../lite/delegates/gpu/cl/inference_context.h | 5 +- .../lite/delegates/gpu/cl/kernels/BUILD | 1 + .../lite/delegates/gpu/cl/kernels/cl_test.cc | 1 + .../lite/delegates/gpu/cl/kernels/concat_z.cc | 7 +- .../lite/delegates/gpu/cl/kernels/concat_z.h | 2 +- .../gpu/cl/kernels/conv_buffer_1x1.cc | 47 +++---- .../gpu/cl/kernels/conv_buffer_1x1.h | 18 +-- .../gpu/cl/kernels/conv_buffer_1x1_test.cc | 4 +- .../gpu/cl/kernels/conv_constants.cc | 15 +- .../delegates/gpu/cl/kernels/conv_constants.h | 4 +- .../gpu/cl/kernels/conv_constants_test.cc | 4 +- .../delegates/gpu/cl/kernels/conv_powervr.cc | 133 +++++++++--------- .../delegates/gpu/cl/kernels/conv_powervr.h | 54 +++---- .../gpu/cl/kernels/conv_powervr_test.cc | 8 +- .../gpu/cl/kernels/convolution_transposed.cc | 41 +++--- .../gpu/cl/kernels/convolution_transposed.h | 16 +-- .../cl/kernels/convolution_transposed_3x3.cc | 18 +-- .../cl/kernels/convolution_transposed_3x3.h | 8 +- .../convolution_transposed_3x3_test.cc | 2 +- .../convolution_transposed_3x3_thin.cc | 2 +- .../kernels/convolution_transposed_3x3_thin.h | 4 +- .../convolution_transposed_3x3_thin_test.cc | 4 +- .../cl/kernels/convolution_transposed_4x4.cc | 14 +- .../cl/kernels/convolution_transposed_4x4.h | 8 +- .../convolution_transposed_4x4_test.cc | 2 +- .../cl/kernels/convolution_transposed_test.cc | 4 +- .../cl/kernels/convolution_transposed_thin.cc | 8 +- .../cl/kernels/convolution_transposed_thin.h | 6 +- .../convolution_transposed_thin_test.cc | 4 +- .../gpu/cl/kernels/depthwise_conv.cc | 14 +- .../delegates/gpu/cl/kernels/depthwise_conv.h | 6 +- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 19 ++- .../gpu/cl/kernels/depthwise_conv_3x3.h | 8 +- .../gpu/cl/kernels/depthwise_conv_3x3_test.cc | 8 +- .../gpu/cl/kernels/depthwise_conv_test.cc | 6 +- .../delegates/gpu/cl/kernels/elementwise.cc | 23 ++- .../delegates/gpu/cl/kernels/elementwise.h | 2 +- .../gpu/cl/kernels/elementwise_test.cc | 43 +++--- .../gpu/cl/kernels/fully_connected.cc | 30 ++-- .../gpu/cl/kernels/fully_connected.h | 10 +- .../gpu/cl/kernels/fully_connected_test.cc | 6 +- .../delegates/gpu/cl/kernels/gpu_operation.cc | 15 +- .../delegates/gpu/cl/kernels/gpu_operation.h | 8 +- .../lite/delegates/gpu/cl/kernels/lstm.cc | 10 +- .../lite/delegates/gpu/cl/kernels/lstm.h | 2 +- .../lite/delegates/gpu/cl/kernels/mean.cc | 14 +- .../lite/delegates/gpu/cl/kernels/mean.h | 6 +- .../cl/kernels/mean_stddev_normalization.cc | 20 +-- .../cl/kernels/mean_stddev_normalization.h | 6 +- .../lite/delegates/gpu/cl/kernels/prelu.cc | 4 +- .../lite/delegates/gpu/cl/kernels/prelu.h | 2 +- .../delegates/gpu/cl/kernels/prelu_test.cc | 6 +- .../delegates/gpu/cl/kernels/reduce_test.cc | 8 +- .../delegates/gpu/cl/kernels/softmax1x1.h | 2 +- .../gpu/cl/kernels/special/fc_fc_add.cc | 33 +++-- .../gpu/cl/kernels/special/fc_fc_add.h | 11 +- .../gpu/cl/kernels/tuning_parameters.h | 2 +- .../lite/delegates/gpu/cl/kernels/util.cc | 8 +- .../lite/delegates/gpu/cl/kernels/util.h | 2 +- .../lite/delegates/gpu/cl/kernels/winograd.cc | 26 ++-- .../lite/delegates/gpu/cl/kernels/winograd.h | 17 ++- .../delegates/gpu/cl/kernels/winograd_test.cc | 6 +- .../gpu/cl/kernels/work_group_picking.cc | 50 +++---- .../gpu/cl/kernels/work_group_picking.h | 12 +- .../gpu/cl/selectors/convolution_selector.cc | 98 ++++++------- .../gpu/cl/selectors/convolution_selector.h | 11 +- .../convolution_transposed_selector.cc | 42 +++--- .../convolution_transposed_selector.h | 2 +- .../cl/selectors/default/default_selector.cc | 5 +- .../gpu/cl/selectors/default_selector.h | 5 +- .../cl/selectors/dw_convolution_selector.cc | 36 ++--- .../cl/selectors/dw_convolution_selector.h | 2 +- .../cl/selectors/fully_connected_selector.cc | 44 +++--- .../cl/selectors/fully_connected_selector.h | 2 +- .../gpu/cl/selectors/operation_selector.cc | 56 ++++---- .../gpu/cl/selectors/operation_selector.h | 2 +- .../gpu/cl/selectors/simple_selectors.cc | 30 ++-- .../gpu/cl/selectors/simple_selectors.h | 19 ++- .../gpu/cl/selectors/special_selector.cc | 14 +- .../gpu/cl/selectors/special_selector.h | 2 +- .../delegates/gpu/cl/storage_type_util.cc | 52 +++---- .../lite/delegates/gpu/cl/storage_type_util.h | 6 +- 95 files changed, 693 insertions(+), 731 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments.cc b/tensorflow/lite/delegates/gpu/cl/cl_arguments.cc index c7d665fc996..b242e88a5e9 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_arguments.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments.cc @@ -136,12 +136,12 @@ std::string GetImageModifier(AccessType access) { } } -std::string GetDefaultSamplers(const DeviceInfo& device_info) { +std::string GetDefaultSamplers(const GpuInfo& gpu_info) { std::string result; result += "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | " "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; - if (device_info.IsAdreno() && device_info.adreno_info.IsAdreno3xx()) { + if (gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx()) { // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and // we can observe huge register overhead when compared to other modes. @@ -209,7 +209,7 @@ absl::Status CreateCLObject(GPUObjectDescriptor* desc, CLContext* context, constexpr char CLArguments::kArgsPrefix[]; absl::Status CLArguments::Init( - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const std::map& linkables, CLContext* context, Arguments* args, std::string* code) { RETURN_IF_ERROR(AllocateObjects(*args, context)); @@ -217,22 +217,22 @@ absl::Status CLArguments::Init( RETURN_IF_ERROR(ResolveSelectorsPass(*args, linkables, code)); object_refs_ = std::move(args->object_refs_); args->GetActiveArguments(kArgsPrefix, *code); - const bool use_f32_for_halfs = device_info.IsPowerVR(); + const bool use_f32_for_halfs = gpu_info.IsPowerVR(); CopyArguments(*args, use_f32_for_halfs); RETURN_IF_ERROR(SetObjectsResources(*args)); RenameArgumentsInCode(code); ResolveArgsPass(code); *code = absl::Substitute(*code, GetListOfArgs()); - *code = GetDefaultSamplers(device_info) + *code; + *code = GetDefaultSamplers(gpu_info) + *code; return absl::OkStatus(); } -absl::Status CLArguments::Init(const DeviceInfo& device_info, Arguments* args, +absl::Status CLArguments::Init(const GpuInfo& gpu_info, Arguments* args, CLContext* context) { RETURN_IF_ERROR(AllocateObjects(*args, context)); RETURN_IF_ERROR(AddObjectArgs(args)); object_refs_ = std::move(args->object_refs_); - const bool use_f32_for_halfs = device_info.IsPowerVR(); + const bool use_f32_for_halfs = gpu_info.IsPowerVR(); CopyArguments(*args, use_f32_for_halfs); RETURN_IF_ERROR(SetObjectsResources(*args)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments.h b/tensorflow/lite/delegates/gpu/cl/cl_arguments.h index eaafde862fd..673b24f63e2 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_arguments.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments.h @@ -34,10 +34,10 @@ class CLArguments : public ArgumentsBinder { public: CLArguments() = default; - absl::Status Init(const DeviceInfo& device_info, + absl::Status Init(const GpuInfo& gpu_info, const std::map& linkables, CLContext* context, Arguments* args, std::string* code); - absl::Status Init(const DeviceInfo& device_info, Arguments* args, + absl::Status Init(const GpuInfo& gpu_info, Arguments* args, CLContext* context); // Temporary, will be resolved later diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc index f682f3d1e69..ddca3d4dc3a 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc @@ -44,8 +44,8 @@ __kernel void main_function($0) { })"; CLArguments cl_args; - DeviceInfo device_info; - ASSERT_OK(cl_args.Init(device_info, {}, nullptr, &args, &sample_code)); + GpuInfo gpu_info; + ASSERT_OK(cl_args.Init(gpu_info, {}, nullptr, &args, &sample_code)); EXPECT_TRUE(absl::StrContains(sample_code, "value = weights_buffer[id];")); EXPECT_TRUE( absl::StrContains(sample_code, "__global float4* weights_buffer")); @@ -66,9 +66,8 @@ TEST(CLArgumentsTest, TestNoSelector) { } )"; CLArguments cl_args; - DeviceInfo device_info; - EXPECT_FALSE( - cl_args.Init(device_info, {}, nullptr, &args, &sample_code).ok()); + GpuInfo gpu_info; + EXPECT_FALSE(cl_args.Init(gpu_info, {}, nullptr, &args, &sample_code).ok()); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc index a7173881841..83b9f152459 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc @@ -216,19 +216,19 @@ ProfilingInfo ProfilingCommandQueue::GetProfilingInfo() const { } absl::Status ProfilingCommandQueue::GetBestWorkGroupIndex( - const CLKernel& kernel, const DeviceInfo& device_info, + const CLKernel& kernel, const GpuInfo& gpu_info, const std::vector& work_groups_count, const std::vector& work_group_sizes, int* index) { // Some Adreno 3xx can have wrong numbers for some events const bool possible_bug_with_events = - device_info.IsAdreno() && device_info.adreno_info.IsAdreno3xx(); + gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx(); events_.resize(work_group_sizes.size()); for (int i = 0; i < work_group_sizes.size(); ++i) { RETURN_IF_ERROR(CLCommandQueue::Dispatch(kernel, work_groups_count[i], work_group_sizes[i], &events_[i])); // reducing the speed of memory leak on Mali for some kernels - if (device_info.IsMali() && i % 8 == 7) { + if (gpu_info.IsMali() && i % 8 == 7) { events_[i - 7].Wait(); } if (possible_bug_with_events) { @@ -240,7 +240,7 @@ absl::Status ProfilingCommandQueue::GetBestWorkGroupIndex( RETURN_IF_ERROR(WaitForCompletion()); // To release memory of some kernel pool on Mali. - if (device_info.IsMali()) { + if (gpu_info.IsMali()) { RETURN_IF_ERROR(kernel.ReInit()); } diff --git a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h index 519b87640e7..1b02e551dd8 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h @@ -116,7 +116,7 @@ class ProfilingCommandQueue : public CLCommandQueue { // will write index for fastest work_group among work_group_sizes absl::Status GetBestWorkGroupIndex(const CLKernel& kernel, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const std::vector& work_groups_count, const std::vector& work_group_sizes, int* index); diff --git a/tensorflow/lite/delegates/gpu/cl/cl_context.cc b/tensorflow/lite/delegates/gpu/cl/cl_context.cc index 9a8f404c46e..32a5e43d799 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_context.cc @@ -50,7 +50,7 @@ bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type, image_format.image_channel_order == ToChannelOrder(num_channels); } -void AddSupportedImageFormats(cl_context context, DeviceInfo* info) { +void AddSupportedImageFormats(cl_context context, GpuInfo* info) { auto supported_formats = GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE); for (auto format : supported_formats) { diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc index 7ae25bd51b0..26206529235 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc @@ -156,8 +156,8 @@ bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) { } } // namespace -DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) { - DeviceInfo info; +GpuInfo GpuInfoFromDeviceID(cl_device_id id) { + GpuInfo info; const auto device_name = GetDeviceInfo(id, CL_DEVICE_NAME); const auto vendor_name = GetDeviceInfo(id, CL_DEVICE_VENDOR); const auto opencl_c_version = @@ -267,7 +267,7 @@ DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) { } CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id) - : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {} + : info_(GpuInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {} CLDevice::CLDevice(const CLDevice& device) : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {} diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h index c6595997857..3614e2211f1 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h @@ -67,10 +67,10 @@ class CLDevice { // To track bug on some Adreno. b/131099086 void DisableOneLayerTextureArray(); - const DeviceInfo& GetInfo() const { return info_; } + const GpuInfo& GetInfo() const { return info_; } // We update device info during context creation, so as supported texture // formats can be requested from context only. - mutable DeviceInfo info_; + mutable GpuInfo info_; private: cl_device_id id_ = nullptr; diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.cc b/tensorflow/lite/delegates/gpu/cl/device_info.cc index 89630653f58..77843421128 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.cc +++ b/tensorflow/lite/delegates/gpu/cl/device_info.cc @@ -296,15 +296,15 @@ bool MaliInfo::IsValhall() const { gpu_version == MaliGPU::G68 || gpu_version == MaliGPU::G78; } -bool DeviceInfo::SupportsTextureArray() const { +bool GpuInfo::SupportsTextureArray() const { return cl_version >= OpenCLVersion::CL_1_2; } -bool DeviceInfo::SupportsImageBuffer() const { +bool GpuInfo::SupportsImageBuffer() const { return cl_version >= OpenCLVersion::CL_1_2; } -bool DeviceInfo::SupportsImage3D() const { +bool GpuInfo::SupportsImage3D() const { if (IsMali() && mali_info.IsMidgard()) { // On Mali T880 read_imageh doesn't compile with image3d_t return false; @@ -312,7 +312,7 @@ bool DeviceInfo::SupportsImage3D() const { return supports_image3d_writes; } -bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const { +bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const { if (channels == 1) { return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d : supports_r_f16_tex2d; @@ -330,7 +330,7 @@ bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const { } } -bool DeviceInfo::SupportsExtension(const std::string& extension) const { +bool GpuInfo::SupportsExtension(const std::string& extension) const { for (const auto& ext : extensions) { if (ext == extension) { return true; @@ -339,13 +339,13 @@ bool DeviceInfo::SupportsExtension(const std::string& extension) const { return false; } -bool DeviceInfo::IsCL20OrHigher() const { +bool GpuInfo::IsCL20OrHigher() const { return cl_version != OpenCLVersion::CL_1_0 && cl_version != OpenCLVersion::CL_1_1 && cl_version != OpenCLVersion::CL_1_2; } -bool DeviceInfo::SupportsSubGroupWithSize(int sub_group_size) const { +bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const { for (auto subgroup_size : supported_subgroup_sizes) { if (sub_group_size == subgroup_size) { return true; @@ -354,19 +354,19 @@ bool DeviceInfo::SupportsSubGroupWithSize(int sub_group_size) const { return false; } -bool DeviceInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; } +bool GpuInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; } -bool DeviceInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; } +bool GpuInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; } -bool DeviceInfo::IsMali() const { return gpu_vendor == GpuVendor::kMali; } +bool GpuInfo::IsMali() const { return gpu_vendor == GpuVendor::kMali; } -bool DeviceInfo::IsPowerVR() const { return gpu_vendor == GpuVendor::kPowerVR; } +bool GpuInfo::IsPowerVR() const { return gpu_vendor == GpuVendor::kPowerVR; } -bool DeviceInfo::IsNvidia() const { return gpu_vendor == GpuVendor::kNvidia; } +bool GpuInfo::IsNvidia() const { return gpu_vendor == GpuVendor::kNvidia; } -bool DeviceInfo::IsAMD() const { return gpu_vendor == GpuVendor::kAMD; } +bool GpuInfo::IsAMD() const { return gpu_vendor == GpuVendor::kAMD; } -bool DeviceInfo::IsIntel() const { return gpu_vendor == GpuVendor::kIntel; } +bool GpuInfo::IsIntel() const { return gpu_vendor == GpuVendor::kIntel; } } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.h b/tensorflow/lite/delegates/gpu/cl/device_info.h index 81913194b71..8bf55c71c81 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.h +++ b/tensorflow/lite/delegates/gpu/cl/device_info.h @@ -176,8 +176,8 @@ struct MaliInfo { bool IsValhall() const; }; -struct DeviceInfo { - DeviceInfo() = default; +struct GpuInfo { + GpuInfo() = default; bool IsAdreno() const; bool IsApple() const; diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc index 932232b78fb..9b2fef288fe 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.cc +++ b/tensorflow/lite/delegates/gpu/cl/environment.cc @@ -173,7 +173,7 @@ bool Environment::IsSupported(TensorStorageType storage_type) const { return false; } -TensorStorageType GetFastestStorageType(const DeviceInfo& gpu_info) { +TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) { if (gpu_info.IsAdreno()) { if (gpu_info.adreno_info.IsAdreno6xxOrHigher()) { return TensorStorageType::TEXTURE_ARRAY; @@ -203,7 +203,7 @@ TensorStorageType GetFastestStorageType(const DeviceInfo& gpu_info) { } TensorStorageType GetStorageTypeWithMinimalMemoryConsumption( - const DeviceInfo& gpu_info) { + const GpuInfo& gpu_info) { if (gpu_info.IsAdreno()) { if (gpu_info.adreno_info.IsAdreno3xx() || gpu_info.adreno_info.IsAdreno4xx()) { diff --git a/tensorflow/lite/delegates/gpu/cl/environment.h b/tensorflow/lite/delegates/gpu/cl/environment.h index 86884e0ad05..8917351841f 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.h +++ b/tensorflow/lite/delegates/gpu/cl/environment.h @@ -75,9 +75,9 @@ class Environment { ProgramCache program_cache_; }; -TensorStorageType GetFastestStorageType(const DeviceInfo& gpu_info); +TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info); TensorStorageType GetStorageTypeWithMinimalMemoryConsumption( - const DeviceInfo& gpu_info); + const GpuInfo& gpu_info); absl::Status CreateEnvironment(Environment* result); diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 3d7e2afbc9d..0b16ff247c8 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -160,7 +160,7 @@ absl::Status InferenceContext::InitFromGraph( creation_context.queue = env->queue(); creation_context.cache = env->program_cache(); - ReserveGraphTensors(create_info, creation_context.GetDeviceInfo(), graph); + ReserveGraphTensors(create_info, creation_context.GetGpuInfo(), graph); precision_ = create_info.precision; storage_type_ = create_info.storage_type; if (env->device().IsMali()) { @@ -174,7 +174,7 @@ absl::Status InferenceContext::InitFromGraph( need_flush_ = true; } CopyInAndOutIds(graph); - RETURN_IF_ERROR(ConvertOperations(creation_context.GetDeviceInfo(), graph, + RETURN_IF_ERROR(ConvertOperations(creation_context.GetGpuInfo(), graph, create_info.hints)); RETURN_IF_ERROR(Merge()); RETURN_IF_ERROR(AllocateMemory(creation_context.context)); @@ -284,7 +284,7 @@ void InferenceContext::CopyInAndOutIds(const GraphFloat32& graph) { } void InferenceContext::ReserveGraphTensors( - const CreateInferenceInfo& create_info, const DeviceInfo& device_info, + const CreateInferenceInfo& create_info, const GpuInfo& gpu_info, const GraphFloat32& graph) { ValueId max_id = 0; auto tensors = graph.values(); @@ -296,14 +296,14 @@ void InferenceContext::ReserveGraphTensors( if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) { if (shape.c < 4 && CanCreateTensorWithShape( - device_info, shape, + gpu_info, shape, TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D, layout})) { storage_type = TensorStorageType::SINGLE_TEXTURE_2D; } } - storage_type = SelectBestStorageType(device_info, shape, storage_type, - data_type, layout); + storage_type = + SelectBestStorageType(gpu_info, shape, storage_type, data_type, layout); tensor_reserver_.Add( t->id, {shape, TensorDescriptor{data_type, storage_type, layout}}); max_id = std::max(max_id, t->id); @@ -311,7 +311,7 @@ void InferenceContext::ReserveGraphTensors( tensor_reserver_.SetNext(max_id + 1); } -absl::Status InferenceContext::ConvertOperations(const DeviceInfo& device_info, +absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info, const GraphFloat32& graph, ModelHints hints) { std::map tensor_descriptors; @@ -335,7 +335,7 @@ absl::Status InferenceContext::ConvertOperations(const DeviceInfo& device_info, std::string op_name = node.operation.type + " " + std::to_string(node.id); GPUOperationsSubgraph gpu_subgraph; if (hints.Check(ModelHints::kAllowSpecialKernels) && - GPUSubgraphFromGraph(device_info, precision_, graph, node.id, + GPUSubgraphFromGraph(gpu_info, precision_, graph, node.id, tensor_descriptors, &consumed_nodes, &gpu_subgraph, &op_name) .ok()) { @@ -375,7 +375,7 @@ absl::Status InferenceContext::ConvertOperations(const DeviceInfo& device_info, op_def.dst_tensors.push_back( tensor_reserver_.Get(outputs[j]->id).descriptor); } - RETURN_IF_ERROR(GPUOperationFromNode(device_info, op_def, hints, inputs, + RETURN_IF_ERROR(GPUOperationFromNode(gpu_info, op_def, hints, inputs, outputs, node, &gpu_subgraph)); } absl::flat_hash_map mapping_to_global_ids; diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h index b427462614b..cfbea54f478 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.h +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h @@ -111,12 +111,11 @@ class InferenceContext { InferenceContext* inference); void CopyInAndOutIds(const GraphFloat32& graph); - absl::Status ConvertOperations(const DeviceInfo& device_info, + absl::Status ConvertOperations(const GpuInfo& gpu_info, const GraphFloat32& graph, ModelHints hints); void CreateLinks(); void ReserveGraphTensors(const CreateInferenceInfo& create_info, - const DeviceInfo& device_info, - const GraphFloat32& graph); + const GpuInfo& gpu_info, const GraphFloat32& graph); absl::Status Merge(); absl::Status AllocateMemory(CLContext* context); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 7c9d2755962..98b182c2afe 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -48,6 +48,7 @@ cc_library( ":gpu_operation", "//tensorflow/lite/delegates/gpu/cl:environment", "//tensorflow/lite/delegates/gpu/cl:opencl_wrapper", + "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:tensor", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc index efe97f9931b..e640b22b495 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/kernels/cl_test.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc index 2c027c91a81..5be1581efb9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc @@ -124,7 +124,7 @@ std::string GetConcatKernelCode(const OperationDef& op_def, GPUOperation CreateConcatZ(const OperationDef& definition, const std::vector& channels, - const DeviceInfo& device_info) { + const GpuInfo& gpu_info) { GPUOperation op(definition); for (int i = 0; i < definition.src_tensors.size(); ++i) { const std::string name = "src_tensor_" + std::to_string(i); @@ -140,14 +140,13 @@ GPUOperation CreateConcatZ(const OperationDef& definition, } op.AddDstTensor("dst_tensor", dst_desc); op.code_ = GetConcatKernelCode(definition, channels); - if (device_info.IsPowerVR() && + if (gpu_info.IsPowerVR() && definition.precision == CalculationsPrecision::F32 && !IsAllChannelsX4(channels)) { // BUG, some PowerVRs (GE8320) produce incorrect result without it op.compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE); } - if (device_info.IsAMD() && - definition.precision != CalculationsPrecision::F32 && + if (gpu_info.IsAMD() && definition.precision != CalculationsPrecision::F32 && definition.src_tensors[0].storage_type != TensorStorageType::BUFFER && !IsAllChannelsX4(channels)) { // BUG, some AMD gpus crash without it diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h index b209d8f3cd2..16341af4187 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h @@ -31,7 +31,7 @@ namespace cl { GPUOperation CreateConcatZ(const OperationDef& definition, const std::vector& channels, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 51836eeafeb..ed15df0bea7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -79,19 +79,19 @@ std::string GetComputationPart(const int3& block_size, int element_size, return c; } -ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo& device_info, +ConvBuffer1x1::ConvParams GetBestParams(const GpuInfo& gpu_info, const OperationDef& definition, const BHWC& shape, int src_depth, int dst_depth) { ConvBuffer1x1::ConvParams conv_params; conv_params.element_size = 4; conv_params.block_size = int3(1, 1, 1); - if (!device_info.IsMali()) { + if (!gpu_info.IsMali()) { return conv_params; } bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 && definition.precision != CalculationsPrecision::F32; - bool is_midgard = device_info.IsMali() && device_info.mali_info.IsMidgard(); + bool is_midgard = gpu_info.IsMali() && gpu_info.mali_info.IsMidgard(); if (is_midgard) { if (can_use_flt8) { conv_params.element_size = 8; @@ -103,8 +103,8 @@ ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo& device_info, } int task_size = shape.w * shape.b * shape.h * dst_depth; - int block_size = GetRecommendedBlockSizeForConv( - device_info, definition.precision, task_size); + int block_size = + GetRecommendedBlockSizeForConv(gpu_info, definition.precision, task_size); if (!can_use_flt8 && block_size > 4) { block_size = 4; @@ -132,15 +132,14 @@ ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo& device_info, return conv_params; } -ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo& device_info, +ConvBuffer1x1::ConvParams GetBestParams(const GpuInfo& gpu_info, const OperationDef& definition, int src_depth, int dst_depth) { ConvBuffer1x1::ConvParams conv_params; conv_params.element_size = 4; conv_params.block_size = int3(1, 1, 1); - if (device_info.IsMali() && - definition.precision == CalculationsPrecision::F16 && - device_info.compute_units_count <= 4) { + if (gpu_info.IsMali() && definition.precision == CalculationsPrecision::F16 && + gpu_info.compute_units_count <= 4) { conv_params.block_size.x *= 2; } return conv_params; @@ -315,9 +314,9 @@ int3 ConvBuffer1x1::GetGridSize() const { } void ConvBuffer1x1::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { - GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroupsConv(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); } @@ -344,7 +343,7 @@ bool IsConvBuffer1x1Supported(const OperationDef& definition, attr.padding.appended.w == 0 && attr.padding.appended.h == 0; } -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, +ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* shape) { @@ -353,16 +352,16 @@ ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, ConvBuffer1x1::ConvParams conv_params; if (shape) { conv_params = - GetBestParams(device_info, definition, *shape, src_depth, dst_depth); + GetBestParams(gpu_info, definition, *shape, src_depth, dst_depth); } else { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); + conv_params = GetBestParams(gpu_info, definition, src_depth, dst_depth); } ConvBuffer1x1 result(definition, conv_params); result.UploadData(attr.weights, attr.bias); return result; } -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, +ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* shape) { @@ -371,9 +370,9 @@ ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, ConvBuffer1x1::ConvParams conv_params; if (shape) { conv_params = - GetBestParams(device_info, definition, *shape, src_depth, dst_depth); + GetBestParams(gpu_info, definition, *shape, src_depth, dst_depth); } else { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); + conv_params = GetBestParams(gpu_info, definition, src_depth, dst_depth); } conv_params.block_size.x *= conv_params.block_size.y; conv_params.block_size.y = 1; @@ -383,16 +382,16 @@ ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, } ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); ConvBuffer1x1::ConvParams conv_params; if (shape) { conv_params = - GetBestParams(device_info, definition, *shape, src_depth, dst_depth); + GetBestParams(gpu_info, definition, *shape, src_depth, dst_depth); } else { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); + conv_params = GetBestParams(gpu_info, definition, src_depth, dst_depth); } conv_params.block_size.x *= conv_params.block_size.y; conv_params.block_size.y = 1; @@ -403,17 +402,17 @@ ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6( } ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(weights_shape.b, 4); const int src_depth = DivideRoundUp(weights_shape.c, 4); ConvBuffer1x1::ConvParams conv_params; if (dst_shape) { - conv_params = GetBestParams(device_info, definition, *dst_shape, src_depth, - dst_depth); + conv_params = + GetBestParams(gpu_info, definition, *dst_shape, src_depth, dst_depth); } else { - conv_params = GetBestParams(device_info, definition, src_depth, dst_depth); + conv_params = GetBestParams(gpu_info, definition, src_depth, dst_depth); } ConvBuffer1x1 result(definition, conv_params); result.UploadBiases(attr.bias); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index 9f9369df254..d93e9a0460c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -47,7 +47,7 @@ class ConvBuffer1x1 : public GPUOperation { ConvBuffer1x1& operator=(const ConvBuffer1x1&) = delete; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override; int3 GetGridSize() const override; @@ -71,19 +71,19 @@ class ConvBuffer1x1 : public GPUOperation { private: ConvBuffer1x1(const OperationDef& definition, const ConvParams& conv_params); - friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, + friend ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* shape); - friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, + friend ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* shape); friend ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* shape); friend ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape); @@ -177,23 +177,23 @@ bool IsConvBuffer1x1Supported(const OperationDef& definition, const BHWC& weights_shape, const Convolution2DAttributes& attr); -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, +ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* shape = nullptr); -ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo& device_info, +ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* shape = nullptr); ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape = nullptr); ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* shape = nullptr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc index d43329c91d9..5d4bdf1dbdf 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc @@ -57,7 +57,7 @@ TEST_F(OpenCLOperationTest, ConvBuffer1x1SimpleWeights) { {data_type, TensorStorageType::BUFFER, Layout::HWC}); TensorFloat32 dst_tensor; ConvBuffer1x1 operation = CreateConvBuffer1x1( - creation_context_.GetDeviceInfo(), op_def, attr, &src_tensor.shape); + creation_context_.GetGpuInfo(), op_def, attr, &src_tensor.shape); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -92,7 +92,7 @@ TEST_F(OpenCLOperationTest, ConvBuffer1x1) { {data_type, TensorStorageType::BUFFER, Layout::HWC}); TensorFloat32 dst_tensor; ConvBuffer1x1 operation = CreateConvBuffer1x1( - creation_context_.GetDeviceInfo(), op_def, attr, &src_tensor.shape); + creation_context_.GetGpuInfo(), op_def, attr, &src_tensor.shape); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 4), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index 8c9394187be..cb3479de4c8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -36,7 +36,7 @@ int GetAdrenoOptimalMaxConstantSize(const AdrenoInfo& adreno_info) { } } -int GetOptimalMaxConstantSize(const DeviceInfo& info) { +int GetOptimalMaxConstantSize(const GpuInfo& info) { if (!info.IsAdreno()) { // In general we do not expect that this kernel will be used with non Adreno // so as it tuned for __constant memory that have big profit on Adreno @@ -237,11 +237,10 @@ bool IsDotConvBetter(int src_channels, int dst_channels) { } // namespace -bool IsConvConstantsSupported(const DeviceInfo& device_info, +bool IsConvConstantsSupported(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr) { - if (device_info.IsAMD() && - definition.precision != CalculationsPrecision::F32 && + if (gpu_info.IsAMD() && definition.precision != CalculationsPrecision::F32 && definition.src_tensors[0].storage_type != TensorStorageType::BUFFER) { // BUG, some AMD gpus crashe without it return false; @@ -259,12 +258,12 @@ bool IsConvConstantsSupported(const DeviceInfo& device_info, ? sizeof(float) : sizeof(half); const int filters_buffer_size = filters_count * float_size; - const int kConstantMaxSize = GetOptimalMaxConstantSize(device_info); + const int kConstantMaxSize = GetOptimalMaxConstantSize(gpu_info); const int flt4_registers = DivideRoundUp(w_shape.o, 4); return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8; } -GPUOperation CreateConvConstants(const DeviceInfo& device_info, +GPUOperation CreateConvConstants(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr) { const bool use_dot_conv = @@ -286,11 +285,11 @@ GPUOperation CreateConvConstants(const DeviceInfo& device_info, op.code_ = GenerateConvolutionConstantCode( definition, attr.weights.shape, stride_correction, use_dot_conv, &op); if (definition.precision == CalculationsPrecision::F16 && - device_info.IsAdreno() && device_info.adreno_info.IsAdreno3xx()) { + gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx()) { op.compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); } if (definition.precision != CalculationsPrecision::F32 && - device_info.IsPowerVR()) { + gpu_info.IsPowerVR()) { // BUG, some PowerVRs (GE8320) produce incorrect result without it op.compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h index e80bcbdd14a..5a7cd248999 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h @@ -152,11 +152,11 @@ void UploadWeightsForConvConstants(const tflite::gpu::Tensor& weights, absl::make_unique(std::move(desc))); } -bool IsConvConstantsSupported(const DeviceInfo& device_info, +bool IsConvConstantsSupported(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr); -GPUOperation CreateConvConstants(const DeviceInfo& device_info, +GPUOperation CreateConvConstants(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc index 17821e14e0a..0a38cee3738 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc @@ -56,7 +56,7 @@ TEST_F(OpenCLOperationTest, ConvConstantsSimpleWeights) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreateConvConstants(creation_context_.GetDeviceInfo(), op_def, attr); + CreateConvConstants(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -91,7 +91,7 @@ TEST_F(OpenCLOperationTest, ConvConstants) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreateConvConstants(creation_context_.GetDeviceInfo(), op_def, attr); + CreateConvConstants(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index 580a0507563..66d9906fda2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -150,35 +150,35 @@ std::string GenerateBlockCoords(const int4& block_size, ConvPowerVR::ConvPowerVR(const OperationDef& definition, const Convolution2DAttributes& attr, - const DeviceInfo& device_info, const BHWC* dst_shape) + const GpuInfo& gpu_info, const BHWC* dst_shape) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 1, 1), dilation_(attr.dilations.w, attr.dilations.h, 1, 1), - conv_params_(GuessBestParams(device_info, definition, attr, dst_shape)) {} + conv_params_(GuessBestParams(gpu_info, definition, attr, dst_shape)) {} ConvPowerVR::ConvPowerVR(const OperationDef& definition, const Convolution2DAttributes& attr, - const BHWC& weights_shape, - const DeviceInfo& device_info, const BHWC* dst_shape) + const BHWC& weights_shape, const GpuInfo& gpu_info, + const BHWC* dst_shape) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), kernel_size_(weights_shape.w, weights_shape.h, 1, 1), dilation_(attr.dilations.w, attr.dilations.h, 1, 1), - conv_params_(GuessBestParams(device_info, definition, attr, weights_shape, + conv_params_(GuessBestParams(gpu_info, definition, attr, weights_shape, dst_shape)) {} ConvPowerVR::ConvPowerVR(const OperationDef& definition, const FullyConnectedAttributes& attr, - const DeviceInfo& device_info, const BHWC* dst_shape) + const GpuInfo& gpu_info, const BHWC* dst_shape) : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1), dilation_(1, 1, 1, 1), - conv_params_(GuessBestParams(device_info, definition, attr, dst_shape)) {} + conv_params_(GuessBestParams(gpu_info, definition, attr, dst_shape)) {} ConvPowerVR::ConvPowerVR(const OperationDef& definition) : GPUOperation(definition), @@ -197,7 +197,7 @@ ConvPowerVR::ConvPowerVR(ConvPowerVR&& operation) ConvPowerVR::ConvPowerVR(const OperationDef& definition, const Convolution3DAttributes& attr, - const DeviceInfo& device_info, const BHWDC* dst_shape) + const GpuInfo& gpu_info, const BHWDC* dst_shape) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 1), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, @@ -205,7 +205,7 @@ ConvPowerVR::ConvPowerVR(const OperationDef& definition, kernel_size_(attr.weights.shape.w, attr.weights.shape.h, attr.weights.shape.d, 1), dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 1), - conv_params_(GuessBestParams(device_info, definition, attr, dst_shape)) {} + conv_params_(GuessBestParams(gpu_info, definition, attr, dst_shape)) {} ConvPowerVR& ConvPowerVR::operator=(ConvPowerVR&& operation) { if (this != &operation) { @@ -219,19 +219,18 @@ ConvPowerVR& ConvPowerVR::operator=(ConvPowerVR&& operation) { return *this; } -void ConvPowerVR::GenerateCode(const DeviceInfo& device_info) { +void ConvPowerVR::GenerateCode(const GpuInfo& gpu_info) { if (conv_params_.linear_spatial) { grid_dimension_ = 2; } const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - code_ = - GenerateConv(device_info, definition_, stride_correction, conv_params_); + code_ = GenerateConv(gpu_info, definition_, stride_correction, conv_params_); if (definition_.precision == CalculationsPrecision::F16 && - device_info.IsPowerVR()) { + gpu_info.IsPowerVR()) { compiler_options_.push_back(CompilerOptions::POWERVR_FP16); } - if (conv_params_.IsPrivateMemBroadcast() && device_info.IsCL20OrHigher()) { + if (conv_params_.IsPrivateMemBroadcast() && gpu_info.IsCL20OrHigher()) { compiler_options_.push_back(CompilerOptions::CL_2_0); } bool kernel_is_trivial = @@ -239,7 +238,7 @@ void ConvPowerVR::GenerateCode(const DeviceInfo& device_info) { if (definition_.src_tensors[0].HasAxis(Axis::DEPTH)) { kernel_is_trivial = kernel_is_trivial & conv_params_.z_kernel_is_1; } - if (device_info.IsAdreno() && device_info.adreno_info.IsAdreno3xx() && + if (gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx() && definition_.precision == CalculationsPrecision::F16 && kernel_is_trivial) { compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); @@ -306,7 +305,7 @@ int3 ConvPowerVR::GetGridSize() const { } void ConvPowerVR::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { if (conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP || @@ -316,11 +315,11 @@ void ConvPowerVR::GetPossibleKernelWorkGroups( work_groups->push_back(work_group_size_); return; } - GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroupsConv(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); } -std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, +std::string ConvPowerVR::GenerateConv(const GpuInfo& gpu_info, const OperationDef& op_def, bool stride_correction, const ConvParams& conv_params) { @@ -446,9 +445,9 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, std::string c = GetCommonDefines(op_def.precision); if (use_simd_broadcast) { - if (device_info.cl_version == OpenCLVersion::CL_2_0) { + if (gpu_info.cl_version == OpenCLVersion::CL_2_0) { c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; - } else if (device_info.SupportsExtension("cl_intel_subgroups")) { + } else if (gpu_info.SupportsExtension("cl_intel_subgroups")) { c += "#pragma OPENCL EXTENSION cl_intel_subgroups : enable\n"; } } @@ -459,7 +458,7 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, std::to_string(work_group_size_.y) + ", " + std::to_string(work_group_size_.z) + ")))\n"; } - if (use_simd_broadcast && device_info.IsIntel()) { + if (use_simd_broadcast && gpu_info.IsIntel()) { c += "__attribute__((intel_reqd_sub_group_size(" + std::to_string(simd_size) + ")))\n"; } @@ -714,7 +713,7 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, } } }; - const bool conditional_read = device_info.IsMali(); + const bool conditional_read = gpu_info.IsMali(); auto read_src = [&]() { const std::string cl_type = ToCLDataType(conv_params.weights_data_type); for (int z = 0; z < block_size.z; ++z) { @@ -1012,8 +1011,8 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, } ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( - const DeviceInfo& device_info, const OperationDef& definition, - int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1, + const GpuInfo& gpu_info, const OperationDef& definition, int src_depth, + int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1, bool different_weights_for_height, const BHWC* dst_shape) { ConvParams conv_params; conv_params.linear_spatial = false; @@ -1022,7 +1021,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( conv_params.x_kernel_is_1 = x_kernel_is_1; conv_params.y_kernel_is_1 = y_kernel_is_1; conv_params.different_weights_for_height = different_weights_for_height; - if (device_info.IsNvidia()) { + if (gpu_info.IsNvidia()) { if (different_weights_for_height) { work_group_size_ = int3(32, 1, 1); work_group_launch_order_ = int3(2, 0, 1); @@ -1046,7 +1045,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( if (dst_shape) { int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; float task_size_per_cu = - static_cast(task_size) / device_info.compute_units_count; + static_cast(task_size) / gpu_info.compute_units_count; int block_size = conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.w; float threads_per_cu = task_size_per_cu / block_size; @@ -1067,7 +1066,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( if (src_depth % 4 == 0 && conv_params.block_size.w <= 2) { conv_params.src_depth_loop_size = 4; } - } else if (device_info.IsPowerVR()) { + } else if (gpu_info.IsPowerVR()) { if (different_weights_for_height) { work_group_size_ = int3(32, 1, 1); work_group_launch_order_ = int3(2, 0, 1); @@ -1115,7 +1114,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } conv_params.block_size.x = 2; } - } else if (device_info.IsAMD()) { + } else if (gpu_info.IsAMD()) { if (different_weights_for_height) { work_group_size_ = int3(32, 1, 1); work_group_launch_order_ = int3(2, 0, 1); @@ -1144,12 +1143,12 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( if (src_depth % 2 == 0 && src_depth >= 16) { conv_params.src_depth_loop_size = 2; } - } else if (device_info.IsMali()) { + } else if (gpu_info.IsMali()) { int block_size = 2; if (dst_shape) { int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; block_size = GetRecommendedBlockSizeForConv( - device_info, definition.precision, task_size); + gpu_info, definition.precision, task_size); } if (!x_kernel_is_1 || !y_kernel_is_1) { block_size = std::min(block_size, 4); @@ -1172,7 +1171,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( conv_params.block_size = int4(1, 1, 1, 1); } conv_params.src_depth_loop_size = 1; - MaliInfo mali_info = device_info.mali_info; + MaliInfo mali_info = gpu_info.mali_info; if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard()) { conv_params.src_depth_loop_size = 2; } @@ -1184,9 +1183,9 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( work_group_launch_order_ = int3(0, 1, 2); conv_params.fixed_work_group_size = false; conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; - } else if (device_info.IsAdreno()) { + } else if (gpu_info.IsAdreno()) { conv_params.block_size = int4(2, 2, 1, 2); - if (device_info.adreno_info.IsAdreno3xx()) { + if (gpu_info.adreno_info.IsAdreno3xx()) { if (definition.precision == CalculationsPrecision::F16) { conv_params.block_size = int4(2, 2, 1, 2); } else if (definition.precision == CalculationsPrecision::F32_F16) { @@ -1205,7 +1204,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } else { conv_params.weights_upload_type = WeightsUploadType::TEXTURES_MEM_X4; } - } else if (device_info.IsIntel()) { + } else if (gpu_info.IsIntel()) { if (different_weights_for_height) { work_group_size_ = int3(16, 1, 1); work_group_launch_order_ = int3(0, 1, 2); @@ -1220,12 +1219,12 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( conv_params.src_depth_loop_size = 1; int sub_group_size = 16; const bool supports_subgroups = - device_info.SupportsExtension("cl_khr_subgroups") || - device_info.SupportsExtension("cl_intel_subgroups"); + gpu_info.SupportsExtension("cl_khr_subgroups") || + gpu_info.SupportsExtension("cl_intel_subgroups"); if (definition.precision != CalculationsPrecision::F32_F16 && supports_subgroups && - device_info.SupportsExtension("cl_intel_required_subgroup_size") && - device_info.SupportsSubGroupWithSize(sub_group_size)) { + gpu_info.SupportsExtension("cl_intel_required_subgroup_size") && + gpu_info.SupportsSubGroupWithSize(sub_group_size)) { conv_params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST; conv_params.simd_size = sub_group_size; @@ -1271,7 +1270,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); @@ -1283,12 +1282,12 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( attr.dilations.h == 1 && attr.padding.prepended.h == 0 && attr.padding.appended.h == 0; - return GuessBestParams(device_info, definition, src_depth, dst_depth, + return GuessBestParams(gpu_info, definition, src_depth, dst_depth, x_kernel_is_1, y_kernel_is_1, false, dst_shape); } ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution3DAttributes& attr, const BHWDC* dst_shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); @@ -1312,10 +1311,10 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( shape.h = dst_shape->h * dst_shape->d; shape.w = dst_shape->w; shape.c = dst_shape->c; - result = GuessBestParams(device_info, definition, src_depth, dst_depth, + result = GuessBestParams(gpu_info, definition, src_depth, dst_depth, x_kernel_is_1, y_kernel_is_1, false, &shape); } else { - result = GuessBestParams(device_info, definition, src_depth, dst_depth, + result = GuessBestParams(gpu_info, definition, src_depth, dst_depth, x_kernel_is_1, y_kernel_is_1, false, nullptr); } result.z_kernel_is_1 = z_kernel_is_1; @@ -1323,7 +1322,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(weights_shape.b, 4); @@ -1334,18 +1333,17 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( const bool y_kernel_is_1 = weights_shape.h == 1 && attr.strides.h == 1 && attr.dilations.h == 1 && attr.padding.prepended.h == 0 && attr.padding.appended.h == 0; - return GuessBestParams(device_info, definition, src_depth, dst_depth, + return GuessBestParams(gpu_info, definition, src_depth, dst_depth, x_kernel_is_1, y_kernel_is_1, false, dst_shape); } ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - ConvPowerVR::ConvParams params = - GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, - false, dst_shape); + ConvPowerVR::ConvParams params = GuessBestParams( + gpu_info, definition, src_depth, dst_depth, true, true, false, dst_shape); work_group_size_.x *= work_group_size_.y; work_group_size_.y = 1; params.block_size.x *= params.block_size.y; @@ -1354,67 +1352,66 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } ConvPowerVR::ConvParams ConvPowerVR::GuessBestParamsWinograd( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - ConvPowerVR::ConvParams params = - GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, - true, dst_shape); + ConvPowerVR::ConvParams params = GuessBestParams( + gpu_info, definition, src_depth, dst_depth, true, true, true, dst_shape); params.block_size.x *= params.block_size.y; params.block_size.y = 1; return params; } -ConvPowerVR CreateConvPowerVR(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVR(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape) { - ConvPowerVR result(definition, attr, device_info, dst_shape); - result.GenerateCode(device_info); + ConvPowerVR result(definition, attr, gpu_info, dst_shape); + result.GenerateCode(gpu_info); result.UploadData(attr.weights, attr.bias); return result; } -ConvPowerVR CreateConvPowerVR(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVR(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* dst_shape) { - ConvPowerVR result(definition, attr, device_info, dst_shape); - result.GenerateCode(device_info); + ConvPowerVR result(definition, attr, gpu_info, dst_shape); + result.GenerateCode(gpu_info); result.UploadData(attr.weights, attr.bias); return result; } -ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVRDynamicWeights(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape) { - ConvPowerVR result(definition, attr, weights_shape, device_info, dst_shape); - result.GenerateCode(device_info); + ConvPowerVR result(definition, attr, weights_shape, gpu_info, dst_shape); + result.GenerateCode(gpu_info); result.UploadBias(attr.bias); return result; } -ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVRWino4x4To6x6(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape) { ConvPowerVR result(definition); result.conv_params_ = - result.GuessBestParamsWinograd(device_info, definition, attr, dst_shape); - result.GenerateCode(device_info); + result.GuessBestParamsWinograd(gpu_info, definition, attr, dst_shape); + result.GenerateCode(gpu_info); result.UploadDataForWinograd4x4To6x6(attr.weights); return result; } -ConvPowerVR CreateConvPowerVR3D(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVR3D(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution3DAttributes& attr, const BHWDC* dst_shape) { - ConvPowerVR result(definition, attr, device_info, dst_shape); - result.GenerateCode(device_info); + ConvPowerVR result(definition, attr, gpu_info, dst_shape); + result.GenerateCode(gpu_info); result.UploadWeights(attr.weights); result.UploadBias(attr.bias); return result; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h index 30e412cd923..85289f63339 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h @@ -44,7 +44,7 @@ class ConvPowerVR : public GPUOperation { public: ConvPowerVR() = default; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override; absl::Status BindArguments(ArgumentsBinder* args) override; @@ -106,20 +106,20 @@ class ConvPowerVR : public GPUOperation { }; ConvPowerVR(const OperationDef& definition, - const Convolution2DAttributes& attr, - const DeviceInfo& device_info, const BHWC* dst_shape = nullptr); + const Convolution2DAttributes& attr, const GpuInfo& gpu_info, + const BHWC* dst_shape = nullptr); ConvPowerVR(const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, - const DeviceInfo& device_info, const BHWC* dst_shape = nullptr); + const GpuInfo& gpu_info, const BHWC* dst_shape = nullptr); ConvPowerVR(const OperationDef& definition, - const FullyConnectedAttributes& attr, - const DeviceInfo& device_info, const BHWC* dst_shape = nullptr); + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, + const BHWC* dst_shape = nullptr); explicit ConvPowerVR(const OperationDef& definition); ConvPowerVR(const OperationDef& definition, - const Convolution3DAttributes& attr, - const DeviceInfo& device_info, const BHWDC* dst_shape = nullptr); + const Convolution3DAttributes& attr, const GpuInfo& gpu_info, + const BHWDC* dst_shape = nullptr); - void GenerateCode(const DeviceInfo& device_info); + void GenerateCode(const GpuInfo& gpu_info); template void UploadData(const tflite::gpu::Tensor& weights, @@ -137,60 +137,60 @@ class ConvPowerVR : public GPUOperation { template void UploadBias(const tflite::gpu::Tensor& bias); - friend ConvPowerVR CreateConvPowerVR(const DeviceInfo& device_info, + friend ConvPowerVR CreateConvPowerVR(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape); - friend ConvPowerVR CreateConvPowerVR(const DeviceInfo& device_info, + friend ConvPowerVR CreateConvPowerVR(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* dst_shape); friend ConvPowerVR CreateConvPowerVRDynamicWeights( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape); friend ConvPowerVR CreateConvPowerVRWino4x4To6x6( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape); - friend ConvPowerVR CreateConvPowerVR3D(const DeviceInfo& device_info, + friend ConvPowerVR CreateConvPowerVR3D(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution3DAttributes& attr, const BHWDC* dst_shape); - ConvParams GuessBestParams(const DeviceInfo& device_info, + ConvParams GuessBestParams(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo& device_info, + ConvParams GuessBestParams(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo& device_info, + ConvParams GuessBestParams(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* dst_shape = nullptr); - ConvParams GuessBestParamsWinograd(const DeviceInfo& device_info, + ConvParams GuessBestParamsWinograd(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo& device_info, + ConvParams GuessBestParams(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution3DAttributes& attr, const BHWDC* dst_shape = nullptr); - ConvParams GuessBestParams(const DeviceInfo& device_info, + ConvParams GuessBestParams(const GpuInfo& gpu_info, const OperationDef& definition, int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1, bool different_weights_for_height, const BHWC* dst_shape = nullptr); - std::string GenerateConv(const DeviceInfo& device_info, - const OperationDef& op_def, bool stride_correction, + std::string GenerateConv(const GpuInfo& gpu_info, const OperationDef& op_def, + bool stride_correction, const ConvParams& conv_params); int4 stride_; @@ -372,28 +372,28 @@ void ConvPowerVR::UploadWeights(const tflite::gpu::Tensor& weights) { } } -ConvPowerVR CreateConvPowerVR(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVR(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape = nullptr); -ConvPowerVR CreateConvPowerVR(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVR(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr, const BHWC* dst_shape = nullptr); -ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVRDynamicWeights(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, const BHWC* dst_shape = nullptr); -ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVRWino4x4To6x6(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC* dst_shape = nullptr); -ConvPowerVR CreateConvPowerVR3D(const DeviceInfo& device_info, +ConvPowerVR CreateConvPowerVR3D(const GpuInfo& gpu_info, const OperationDef& definition, const Convolution3DAttributes& attr, const BHWDC* dst_shape = nullptr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc index 4fc5f57578f..b639306531c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc @@ -56,7 +56,7 @@ TEST_F(OpenCLOperationTest, ConvPowerVR1x1SimpleWeights) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvPowerVR operation = - CreateConvPowerVR(creation_context_.GetDeviceInfo(), op_def, attr); + CreateConvPowerVR(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -91,7 +91,7 @@ TEST_F(OpenCLOperationTest, ConvPowerVR1x1) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvPowerVR operation = - CreateConvPowerVR(creation_context_.GetDeviceInfo(), op_def, attr); + CreateConvPowerVR(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -126,7 +126,7 @@ TEST_F(OpenCLOperationTest, ConvPowerVRSimpleWeights) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvPowerVR operation = - CreateConvPowerVR(creation_context_.GetDeviceInfo(), op_def, attr); + CreateConvPowerVR(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -161,7 +161,7 @@ TEST_F(OpenCLOperationTest, ConvPowerVR) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvPowerVR operation = - CreateConvPowerVR(creation_context_.GetDeviceInfo(), op_def, attr); + CreateConvPowerVR(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index ef32abb7c87..53ffa1f577f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -31,14 +31,14 @@ namespace cl { ConvolutionTransposed::ConvolutionTransposed( const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const DeviceInfo& device_info) + const GpuInfo& gpu_info) : GPUOperation(definition), stride_(attr.stride.w, attr.stride.h, 1, 1), block_size_(2, 2, 1, 2) { - const bool weights_are_buffer = device_info.IsMali(); + const bool weights_are_buffer = gpu_info.IsMali(); const bool is_f16 = definition.precision == CalculationsPrecision::F16; - if (device_info.IsMali()) { - if (device_info.mali_info.IsMidgard()) { + if (gpu_info.IsMali()) { + if (gpu_info.mali_info.IsMidgard()) { block_size_ = is_f16 ? int4(2, 1, 1, 2) : int4(2, 1, 1, 1); } else { block_size_ = is_f16 ? int4(2, 2, 1, 2) : int4(2, 2, 1, 1); @@ -46,7 +46,7 @@ ConvolutionTransposed::ConvolutionTransposed( } const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); if (dst_depth == 1 || dst_depth == 3) { - if (!device_info.IsMali()) { + if (!gpu_info.IsMali()) { block_size_.y *= block_size_.w; } block_size_.w = 1; @@ -58,22 +58,21 @@ ConvolutionTransposed::ConvolutionTransposed( args_.AddInt("padding_y", attr.padding.prepended.h); args_.AddInt("kernel_size_x", attr.weights.shape.w); args_.AddInt("kernel_size_y", attr.weights.shape.h); - code_ = GenerateConvolutionTransposedCode(definition_, device_info, + code_ = GenerateConvolutionTransposedCode(definition_, gpu_info, weights_are_buffer, block_size_); UploadWeights(attr.weights, weights_are_buffer); } ConvolutionTransposed::ConvolutionTransposed( const OperationDef& definition, - const ConvolutionTransposed3DAttributes& attr, - const DeviceInfo& device_info) + const ConvolutionTransposed3DAttributes& attr, const GpuInfo& gpu_info) : GPUOperation(definition), stride_(attr.stride.w, attr.stride.h, attr.stride.d, 1), block_size_(2, 2, 1, 2) { - const bool weights_are_buffer = device_info.IsMali(); + const bool weights_are_buffer = gpu_info.IsMali(); const bool is_f16 = definition.precision == CalculationsPrecision::F16; - if (device_info.IsMali()) { - if (device_info.mali_info.IsMidgard()) { + if (gpu_info.IsMali()) { + if (gpu_info.mali_info.IsMidgard()) { block_size_ = is_f16 ? int4(2, 1, 1, 2) : int4(2, 1, 1, 1); } else { block_size_ = is_f16 ? int4(2, 2, 1, 2) : int4(2, 2, 1, 1); @@ -81,7 +80,7 @@ ConvolutionTransposed::ConvolutionTransposed( } const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); if (dst_depth == 1 || dst_depth == 3) { - if (!device_info.IsMali()) { + if (!gpu_info.IsMali()) { block_size_.y *= block_size_.w; } block_size_.w = 1; @@ -97,7 +96,7 @@ ConvolutionTransposed::ConvolutionTransposed( args_.AddInt("kernel_size_y", attr.weights.shape.h); args_.AddInt("kernel_size_z", attr.weights.shape.d); args_.AddInt("grid_size_y"); - code_ = GenerateConvolutionTransposedCode(definition_, device_info, + code_ = GenerateConvolutionTransposedCode(definition_, gpu_info, weights_are_buffer, block_size_); UploadWeights(attr.weights, weights_are_buffer); } @@ -118,7 +117,7 @@ ConvolutionTransposed& ConvolutionTransposed::operator=( } std::string ConvolutionTransposed::GenerateConvolutionTransposedCode( - const OperationDef& op_def, const DeviceInfo& device_info, + const OperationDef& op_def, const GpuInfo& gpu_info, bool weights_are_buffer, const int4& block_size) { auto src_desc = op_def.src_tensors[0]; src_desc.SetAddressMode(AddressMode::kZero); @@ -398,7 +397,7 @@ std::string ConvolutionTransposed::GenerateConvolutionTransposedCode( c += " int x_c = kernel_index * args.src_tensor.Slices();\n"; } c += " for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n"; - const bool conditional_read = device_info.IsMali(); + const bool conditional_read = gpu_info.IsMali(); for (int z = 0; z < block_size.z; ++z) { const std::string zind = std::to_string(z); for (int y = 0; y < block_size.y; ++y) { @@ -536,16 +535,16 @@ int3 ConvolutionTransposed::GetGridSize() const { } void ConvolutionTransposed::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { - GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroupsConv(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); } ConvolutionTransposed CreateConvolutionTransposed( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr) { - ConvolutionTransposed result(definition, attr, device_info); + ConvolutionTransposed result(definition, attr, gpu_info); TensorLinearDescriptor desc; desc.storage_type = @@ -558,9 +557,9 @@ ConvolutionTransposed CreateConvolutionTransposed( } ConvolutionTransposed CreateConvolutionTransposed3D( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposed3DAttributes& attr) { - ConvolutionTransposed result(definition, attr, device_info); + ConvolutionTransposed result(definition, attr, gpu_info); TensorLinearDescriptor desc; desc.storage_type = diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h index 5aa86f33e5a..05dee07ba21 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h @@ -41,7 +41,7 @@ class ConvolutionTransposed : public GPUOperation { public: ConvolutionTransposed() = default; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override; absl::Status BindArguments(ArgumentsBinder* args) override; @@ -55,17 +55,17 @@ class ConvolutionTransposed : public GPUOperation { private: friend ConvolutionTransposed CreateConvolutionTransposed( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); friend ConvolutionTransposed CreateConvolutionTransposed3D( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposed3DAttributes& attr); ConvolutionTransposed(const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); ConvolutionTransposed(const OperationDef& definition, const ConvolutionTransposed3DAttributes& attr, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); template void UploadWeights(const tflite::gpu::Tensor& weights, @@ -76,7 +76,7 @@ class ConvolutionTransposed : public GPUOperation { bool weights_are_buffer); std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, bool weights_are_buffer, const int4& block_size); int4 stride_; @@ -206,11 +206,11 @@ void ConvolutionTransposed::UploadWeights( } ConvolutionTransposed CreateConvolutionTransposed( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); ConvolutionTransposed CreateConvolutionTransposed3D( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposed3DAttributes& attr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc index edd3d1e0de2..ed5a40d57ed 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc @@ -26,15 +26,15 @@ namespace tflite { namespace gpu { namespace cl { ConvolutionTransposed3x3::ConvolutionTransposed3x3( - const OperationDef& definition, const DeviceInfo& device_info, int2 padding) + const OperationDef& definition, const GpuInfo& gpu_info, int2 padding) : GPUOperation(definition), padding_(padding) { work_group_size_ = int3(8, 4, 1); work_group_launch_order_ = int3(2, 0, 1); - if (device_info.IsPowerVR()) { + if (gpu_info.IsPowerVR()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; - } else if (device_info.IsNvidia() || device_info.IsIntel()) { + } else if (gpu_info.IsNvidia() || gpu_info.IsIntel()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS; - } else if (device_info.IsAMD()) { + } else if (gpu_info.IsAMD()) { weights_upload_type_ = WeightsUploadType::CONSTANT_MEM; } else { weights_upload_type_ = WeightsUploadType::GLOBAL_MEM; @@ -42,7 +42,7 @@ ConvolutionTransposed3x3::ConvolutionTransposed3x3( code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type_, padding_, work_group_launch_order_); if (definition_.precision == CalculationsPrecision::F16 && - device_info.IsPowerVR()) { + gpu_info.IsPowerVR()) { compiler_options_.push_back(CompilerOptions::POWERVR_FP16); } } @@ -332,14 +332,14 @@ absl::Status ConvolutionTransposed3x3::BindArguments(ArgumentsBinder* args) { } void ConvolutionTransposed3x3::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { if (weights_upload_type_ == WeightsUploadType::LOCAL_MEM_ASYNC || weights_upload_type_ == WeightsUploadType::LOCAL_MEM_BY_THREADS) { work_groups->push_back(work_group_size_); return; } - GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroupsConv(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); } @@ -358,10 +358,10 @@ bool IsConvolutionTransposed3x3Supported( } ConvolutionTransposed3x3 CreateConvolutionTransposed3x3( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr) { const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h); - ConvolutionTransposed3x3 result(definition, device_info, padding); + ConvolutionTransposed3x3 result(definition, gpu_info, padding); result.UploadWeights(attr.weights); TensorLinearDescriptor desc; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h index 074fc23b0e7..89abf70498a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h @@ -38,7 +38,7 @@ class ConvolutionTransposed3x3 : public GPUOperation { public: ConvolutionTransposed3x3() = default; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override; absl::Status BindArguments(ArgumentsBinder* args) override; @@ -59,9 +59,9 @@ class ConvolutionTransposed3x3 : public GPUOperation { private: ConvolutionTransposed3x3(const OperationDef& definition, - const DeviceInfo& device_info, int2 padding); + const GpuInfo& gpu_info, int2 padding); friend ConvolutionTransposed3x3 CreateConvolutionTransposed3x3( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); template void UploadWeights(const tflite::gpu::Tensor& weights); @@ -174,7 +174,7 @@ bool IsConvolutionTransposed3x3Supported( const ConvolutionTransposedAttributes& attr); ConvolutionTransposed3x3 CreateConvolutionTransposed3x3( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_test.cc index 8fbf6b05b43..fae3f24e589 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_test.cc @@ -55,7 +55,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposed3x3 operation = CreateConvolutionTransposed3x3( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc index fda84236734..4cabbbee376 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc @@ -198,7 +198,7 @@ bool IsConvolutionTransposed3x3ThinSupported( } ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr) { ConvolutionTransposed3x3Thin result(definition, attr); result.UploadData(attr.weights, attr.bias); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h index 5905f6f6404..8ff50f95f31 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h @@ -49,7 +49,7 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { private: friend ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); explicit ConvolutionTransposed3x3Thin( const OperationDef& definition, @@ -160,7 +160,7 @@ bool IsConvolutionTransposed3x3ThinSupported( const ConvolutionTransposedAttributes& attr); ConvolutionTransposed3x3Thin CreateConvolutionTransposed3x3Thin( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc index 83df267a884..dae8b811a01 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc @@ -55,7 +55,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3ThinSimpleWeights) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposed3x3Thin operation = - CreateConvolutionTransposed3x3Thin(creation_context_.GetDeviceInfo(), + CreateConvolutionTransposed3x3Thin(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); @@ -91,7 +91,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3Thin) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposed3x3Thin operation = - CreateConvolutionTransposed3x3Thin(creation_context_.GetDeviceInfo(), + CreateConvolutionTransposed3x3Thin(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc index 2f186eadf9f..54671d973f4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc @@ -26,16 +26,16 @@ namespace tflite { namespace gpu { namespace cl { ConvolutionTransposed4x4::ConvolutionTransposed4x4( - const OperationDef& definition, const DeviceInfo& device_info, + const OperationDef& definition, const GpuInfo& gpu_info, const ConvolutionTransposedAttributes& attr) : GPUOperation(definition) { work_group_size_ = int3(8, 4, 1); WeightsUploadType weights_upload_type = WeightsUploadType::GLOBAL_MEM; - if (device_info.IsPowerVR()) { + if (gpu_info.IsPowerVR()) { weights_upload_type = WeightsUploadType::LOCAL_MEM_ASYNC; - } else if (device_info.IsNvidia() || device_info.IsIntel()) { + } else if (gpu_info.IsNvidia() || gpu_info.IsIntel()) { weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS; - } else if (device_info.IsAMD()) { + } else if (gpu_info.IsAMD()) { weights_upload_type = WeightsUploadType::CONSTANT_MEM; } else { weights_upload_type = WeightsUploadType::GLOBAL_MEM; @@ -44,7 +44,7 @@ ConvolutionTransposed4x4::ConvolutionTransposed4x4( code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type); UploadWeights(attr.weights, weights_upload_type); if (definition_.precision == CalculationsPrecision::F16 && - device_info.IsPowerVR()) { + gpu_info.IsPowerVR()) { compiler_options_.push_back(CompilerOptions::POWERVR_FP16); } } @@ -332,9 +332,9 @@ bool IsConvolutionTransposed4x4Supported( } ConvolutionTransposed4x4 CreateConvolutionTransposed4x4( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr) { - ConvolutionTransposed4x4 result(definition, device_info, attr); + ConvolutionTransposed4x4 result(definition, gpu_info, attr); TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h index 17d63233864..febbc575c33 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h @@ -38,7 +38,7 @@ class ConvolutionTransposed4x4 : public GPUOperation { public: ConvolutionTransposed4x4() = default; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override { work_groups->push_back(work_group_size_); @@ -61,10 +61,10 @@ class ConvolutionTransposed4x4 : public GPUOperation { private: ConvolutionTransposed4x4(const OperationDef& definition, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const ConvolutionTransposedAttributes& attr); friend ConvolutionTransposed4x4 CreateConvolutionTransposed4x4( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); template void UploadWeights(const tflite::gpu::Tensor& weights, @@ -161,7 +161,7 @@ bool IsConvolutionTransposed4x4Supported( const ConvolutionTransposedAttributes& attr); ConvolutionTransposed4x4 CreateConvolutionTransposed4x4( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc index a65479d72b8..c54dfec060b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc @@ -56,7 +56,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed4x4) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposed4x4 operation = CreateConvolutionTransposed4x4( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc index 1da989d111d..e2571ea8e15 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc @@ -56,7 +56,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedSimpleWeights) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposed operation = CreateConvolutionTransposed( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 2), &dst_tensor)); EXPECT_THAT( @@ -94,7 +94,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposed operation = CreateConvolutionTransposed( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); EXPECT_THAT( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc index 46066fe5a26..47aa6a70175 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc @@ -28,13 +28,13 @@ namespace cl { ConvolutionTransposedThin::ConvolutionTransposedThin( const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const DeviceInfo& device_info) + const GpuInfo& gpu_info) : GPUOperation(definition) { code_ = GenerateConvolutionTransposedCode( definition_, DivideRoundUp(attr.weights.shape.i, 4), attr.weights.shape.o, int2(attr.weights.shape.w, attr.weights.shape.h)); if (definition_.precision == CalculationsPrecision::F16 && - device_info.IsAdreno() && device_info.adreno_info.IsAdreno3xx()) { + gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx()) { compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); } } @@ -166,9 +166,9 @@ bool IsConvolutionTransposedThinSupported( } ConvolutionTransposedThin CreateConvolutionTransposedThin( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr) { - ConvolutionTransposedThin result(definition, attr, device_info); + ConvolutionTransposedThin result(definition, attr, gpu_info); result.UploadData(attr.weights, attr.bias); return result; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h index 7599ad23fde..8b57ac03e5f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h @@ -48,11 +48,11 @@ class ConvolutionTransposedThin : public GPUOperation { private: friend ConvolutionTransposedThin CreateConvolutionTransposedThin( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); ConvolutionTransposedThin(const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); template void UploadData(const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases); @@ -141,7 +141,7 @@ bool IsConvolutionTransposedThinSupported( const ConvolutionTransposedAttributes& attr); ConvolutionTransposedThin CreateConvolutionTransposedThin( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const ConvolutionTransposedAttributes& attr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc index 16968008e24..01de62ec8d0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc @@ -56,7 +56,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThinSimpleWeights) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposedThin operation = CreateConvolutionTransposedThin( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 2), &dst_tensor)); EXPECT_THAT( @@ -94,7 +94,7 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThin) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConvolutionTransposedThin operation = CreateConvolutionTransposedThin( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); EXPECT_THAT( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index 3f625b20bc3..538183231b0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -235,9 +235,9 @@ std::string GenerateDepthwiseConvolutionCode( } // namespace GPUOperation CreateDepthwiseConvolution2D( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr) { - bool weights_are_buffer = device_info.IsMali(); + bool weights_are_buffer = gpu_info.IsMali(); GPUOperation op(definition); op.args_.AddInt("kernel_size_x", attr.weights.shape.w); op.args_.AddInt("stride_x", attr.strides.w); @@ -270,7 +270,7 @@ GPUOperation CreateDepthwiseConvolution2D( } GPUOperation CreateDepthwiseConvolution2DDynamicWeights( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr) { GPUOperation op(definition); op.args_.AddInt("stride_x", attr.strides.w); @@ -286,8 +286,8 @@ GPUOperation CreateDepthwiseConvolution2DDynamicWeights( op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; TensorLinearDescriptor desc; - desc.storage_type = device_info.IsMali() ? LinearStorageType::BUFFER - : LinearStorageType::TEXTURE_2D; + desc.storage_type = gpu_info.IsMali() ? LinearStorageType::BUFFER + : LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); desc.UploadLinearData(attr.bias); op.args_.AddObject( @@ -296,9 +296,9 @@ GPUOperation CreateDepthwiseConvolution2DDynamicWeights( } GPUOperation CreateDepthwiseConvolution3D( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution3DAttributes& attr) { - bool weights_are_buffer = device_info.IsMali(); + bool weights_are_buffer = gpu_info.IsMali(); GPUOperation op(definition); op.args_.AddInt("kernel_size_x", attr.weights.shape.w); op.args_.AddInt("stride_x", attr.strides.w); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h index 3bb034849bc..5c708622ff9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h @@ -183,15 +183,15 @@ void UploadWeightsForDWConv3D(const tflite::gpu::Tensor& weights, } GPUOperation CreateDepthwiseConvolution2D( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr); GPUOperation CreateDepthwiseConvolution2DDynamicWeights( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr); GPUOperation CreateDepthwiseConvolution3D( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution3DAttributes& attr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index 520c8405bb4..7b532f29dd9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -29,15 +29,14 @@ namespace cl { DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition, bool weights_are_buffer, bool local_mem_uploads, - const DeviceInfo& device_info) - : GPUOperation(definition), - local_mem_uploads_(local_mem_uploads) { + const GpuInfo& gpu_info) + : GPUOperation(definition), local_mem_uploads_(local_mem_uploads) { work_group_size_ = int3(8, 4, 1); code_ = GenerateDepthwiseConvCode(definition_, weights_are_buffer, local_mem_uploads_); if (definition_.precision == CalculationsPrecision::F16 && - device_info.IsPowerVR()) { + gpu_info.IsPowerVR()) { compiler_options_.push_back(CompilerOptions::POWERVR_FP16); } } @@ -293,12 +292,12 @@ int3 DepthwiseConv3x3::GetGridSize() const { } void DepthwiseConv3x3::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { if (local_mem_uploads_) { work_groups->push_back(work_group_size_); } else { - GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); } } @@ -313,12 +312,12 @@ bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) { } DepthwiseConv3x3 CreateDepthwiseConv3x3( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr) { - bool weights_are_buffer = device_info.IsPowerVR() || device_info.IsMali(); - bool local_mem_uploads = weights_are_buffer && device_info.IsPowerVR(); + bool weights_are_buffer = gpu_info.IsPowerVR() || gpu_info.IsMali(); + bool local_mem_uploads = weights_are_buffer && gpu_info.IsPowerVR(); DepthwiseConv3x3 result(definition, weights_are_buffer, local_mem_uploads, - device_info); + gpu_info); result.UploadWeightsAndBiases(attr.weights, attr.bias, weights_are_buffer); return result; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index bbe759fe5d4..b4f706a779c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -39,7 +39,7 @@ class DepthwiseConv3x3 : public GPUOperation { public: DepthwiseConv3x3() = default; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override; int3 GetGridSize() const override; @@ -53,14 +53,14 @@ class DepthwiseConv3x3 : public GPUOperation { private: explicit DepthwiseConv3x3(const OperationDef& definition, bool weights_are_buffer, bool local_mem_uploads, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); template void UploadWeightsAndBiases(const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, bool weights_are_buffer); friend DepthwiseConv3x3 CreateDepthwiseConv3x3( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr); template @@ -151,7 +151,7 @@ void DepthwiseConv3x3::RearrangeWeightsAndBiasesData( bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr); DepthwiseConv3x3 CreateDepthwiseConv3x3( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3_test.cc index 24f9e5c1f08..330c03a869e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3_test.cc @@ -56,8 +56,8 @@ TEST_F(OpenCLOperationTest, DepthwiseConv3x3SimpleWeights) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - DepthwiseConv3x3 operation = CreateDepthwiseConv3x3( - creation_context_.GetDeviceInfo(), op_def, attr); + DepthwiseConv3x3 operation = + CreateDepthwiseConv3x3(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -92,8 +92,8 @@ TEST_F(OpenCLOperationTest, DepthwiseConv3x3) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - DepthwiseConv3x3 operation = CreateDepthwiseConv3x3( - creation_context_.GetDeviceInfo(), op_def, attr); + DepthwiseConv3x3 operation = + CreateDepthwiseConv3x3(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_test.cc index eb43c0c30e3..a3cbb621bf0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_test.cc @@ -56,7 +56,7 @@ TEST_F(OpenCLOperationTest, DepthwiseConvSimpleWeights) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = CreateDepthwiseConvolution2D( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -91,7 +91,7 @@ TEST_F(OpenCLOperationTest, DepthwiseConvNoMultiplier) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = CreateDepthwiseConvolution2D( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -127,7 +127,7 @@ TEST_F(OpenCLOperationTest, DepthwiseConvMultiplier2) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = CreateDepthwiseConvolution2D( - creation_context_.GetDeviceInfo(), op_def, attr); + creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 4), &dst_tensor)); EXPECT_THAT( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index f50045131c2..c3b41c6e786 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -197,14 +197,14 @@ GPUOperation CreateElementwiseOneRuntimeOneScalar( // Creates simple two input(first input is runtime tensor and second input is // constant linear tensor) operation, for example sub, div and etc. GPUOperation CreateElementwiseTwoInput( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, bool swap_inputs) { const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v); - TensorStorageType storage_type = SelectBestStorageType( - device_info, shape, definition.GetPrimaryStorageType(), - definition.GetDataType(), Layout::HWC); + TensorStorageType storage_type = + SelectBestStorageType(gpu_info, shape, definition.GetPrimaryStorageType(), + definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; desc.UploadData(constant_tensor); @@ -228,15 +228,15 @@ GPUOperation CreateElementwiseTwoInput( // Creates simple two input(first input is runtime tensor and second input is // constant HWC tensor) operation, for example sub, div and etc. GPUOperation CreateElementwiseTwoInput( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, bool swap_inputs) { const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w, constant_tensor.shape.c); - TensorStorageType storage_type = SelectBestStorageType( - device_info, shape, definition.GetPrimaryStorageType(), - definition.GetDataType(), Layout::HWC); + TensorStorageType storage_type = + SelectBestStorageType(gpu_info, shape, definition.GetPrimaryStorageType(), + definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; desc.UploadData(constant_tensor); @@ -270,7 +270,7 @@ GPUOperation CreateElementwiseOneInput(const OperationDef& definition, return op; } -GPUOperation CreateElementwise(const DeviceInfo& device_info, +GPUOperation CreateElementwise(const GpuInfo& gpu_info, const OperationDef& definition, const OperationType& op_type, const ElementwiseAttributes& attr) { @@ -284,12 +284,11 @@ GPUOperation CreateElementwise(const DeviceInfo& device_info, return CreateElementwiseOneRuntimeOneScalar(definition, op_type, *scalar, attr.runtime_tensor_is_second); } else if (linear_tensor) { - return CreateElementwiseTwoInput(device_info, definition, op_type, + return CreateElementwiseTwoInput(gpu_info, definition, op_type, *linear_tensor, attr.runtime_tensor_is_second); } else if (hwc_tensor) { - return CreateElementwiseTwoInput(device_info, definition, op_type, - *hwc_tensor, + return CreateElementwiseTwoInput(gpu_info, definition, op_type, *hwc_tensor, attr.runtime_tensor_is_second); } else { return GPUOperation(definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h index c16899071d6..572b731d908 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h @@ -33,7 +33,7 @@ GPUOperation CreateElementwiseOneInput(const OperationDef& definition, // Creates simple two input(first input is runtime tensor and second input is // constant or linear/hwc tensor) operation, for example sub, div and etc. -GPUOperation CreateElementwise(const DeviceInfo& device_info, +GPUOperation CreateElementwise(const GpuInfo& gpu_info, const OperationDef& definition, const OperationType& op_type, const ElementwiseAttributes& attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc index b48f66ce600..4163ad136db 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc @@ -570,9 +570,8 @@ TEST_F(OpenCLOperationTest, MaximumWithScalar) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, - OperationType::MAXIMUM, attr); + GPUOperation operation = CreateElementwise( + creation_context_.GetGpuInfo(), op_def, OperationType::MAXIMUM, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 4, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -601,9 +600,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantLinearTensor) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, - OperationType::MAXIMUM, attr); + GPUOperation operation = CreateElementwise( + creation_context_.GetGpuInfo(), op_def, OperationType::MAXIMUM, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -632,9 +630,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensor) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, - OperationType::MAXIMUM, attr); + GPUOperation operation = CreateElementwise( + creation_context_.GetGpuInfo(), op_def, OperationType::MAXIMUM, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -662,9 +659,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensorBroadcastChannels) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, - OperationType::MAXIMUM, attr); + GPUOperation operation = CreateElementwise( + creation_context_.GetGpuInfo(), op_def, OperationType::MAXIMUM, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -718,9 +714,8 @@ TEST_F(OpenCLOperationTest, MinimumWithScalar) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, - OperationType::MINIMUM, attr); + GPUOperation operation = CreateElementwise( + creation_context_.GetGpuInfo(), op_def, OperationType::MINIMUM, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 4, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -832,7 +827,7 @@ TEST_F(OpenCLOperationTest, SubWithScalarAtFirstPosition) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = CreateElementwise( - creation_context_.GetDeviceInfo(), op_def, OperationType::SUB, attr); + creation_context_.GetGpuInfo(), op_def, OperationType::SUB, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 4, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -887,7 +882,7 @@ TEST_F(OpenCLOperationTest, LessEqual) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, + CreateElementwise(creation_context_.GetGpuInfo(), op_def, OperationType::LESS_EQUAL, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -914,9 +909,8 @@ TEST_F(OpenCLOperationTest, Greater) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, - OperationType::GREATER, attr); + GPUOperation operation = CreateElementwise( + creation_context_.GetGpuInfo(), op_def, OperationType::GREATER, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -943,7 +937,7 @@ TEST_F(OpenCLOperationTest, GreaterEqual) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, + CreateElementwise(creation_context_.GetGpuInfo(), op_def, OperationType::GREATER_EQUAL, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -970,9 +964,8 @@ TEST_F(OpenCLOperationTest, Equal) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, - OperationType::EQUAL, attr); + GPUOperation operation = CreateElementwise( + creation_context_.GetGpuInfo(), op_def, OperationType::EQUAL, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -999,7 +992,7 @@ TEST_F(OpenCLOperationTest, NotEqual) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreateElementwise(creation_context_.GetDeviceInfo(), op_def, + CreateElementwise(creation_context_.GetGpuInfo(), op_def, OperationType::NOT_EQUAL, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index 7739f62b977..b5caef81b43 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -32,32 +32,32 @@ namespace tflite { namespace gpu { namespace cl { namespace { -bool UseBufferForWeights(const DeviceInfo& device_info) { - return device_info.IsAdreno() || device_info.IsAMD() || device_info.IsMali(); +bool UseBufferForWeights(const GpuInfo& gpu_info) { + return gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsMali(); } } // namespace FullyConnected::FullyConnected(const OperationDef& definition, - const DeviceInfo& device_info) + const GpuInfo& gpu_info) : GPUOperation(definition) { - if (device_info.IsAdreno()) { - if (device_info.adreno_info.IsAdreno3xx()) { + if (gpu_info.IsAdreno()) { + if (gpu_info.adreno_info.IsAdreno3xx()) { work_group_size_ = int3(16, 4, 1); - } else if (device_info.adreno_info.IsAdreno4xx()) { + } else if (gpu_info.adreno_info.IsAdreno4xx()) { work_group_size_ = int3(32, 4, 1); } else { work_group_size_ = int3(32, 4, 1); } - } else if (device_info.IsIntel()) { + } else if (gpu_info.IsIntel()) { work_group_size_ = int3(8, 4, 1); - } else if (device_info.IsNvidia()) { + } else if (gpu_info.IsNvidia()) { work_group_size_ = int3(8, 4, 1); - } else if (device_info.IsPowerVR()) { + } else if (gpu_info.IsPowerVR()) { work_group_size_ = int3(8, 4, 1); } else { work_group_size_ = int3(16, 4, 1); } - code_ = GetFullyConnectedKernelCode(definition_, device_info); + code_ = GetFullyConnectedKernelCode(definition_, gpu_info); } FullyConnected::FullyConnected(FullyConnected&& kernel) @@ -77,11 +77,11 @@ FullyConnected& FullyConnected::operator=(FullyConnected&& kernel) { // optimized shaders std::string FullyConnected::GetFullyConnectedKernelCode( - const OperationDef& op_def, const DeviceInfo& device_info) { + const OperationDef& op_def, const GpuInfo& gpu_info) { AddSrcTensor("src_tensor", op_def.src_tensors[0]); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); - const bool weights_are_buffer = UseBufferForWeights(device_info); + const bool weights_are_buffer = UseBufferForWeights(gpu_info); std::string c = GetCommonDefines(op_def.precision); switch (op_def.precision) { @@ -150,11 +150,11 @@ int3 FullyConnected::GetGridSize() const { return int3(dst_[0]->Slices(), 1, 1); } -FullyConnected CreateFullyConnected(const DeviceInfo& device_info, +FullyConnected CreateFullyConnected(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr) { - FullyConnected result(definition, device_info); - result.UploadWeights(attr.weights, UseBufferForWeights(device_info)); + FullyConnected result(definition, gpu_info); + result.UploadWeights(attr.weights, UseBufferForWeights(gpu_info)); TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index b2d1552bf63..1fdeb1f6db7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -121,7 +121,7 @@ class FullyConnected : public GPUOperation { public: FullyConnected() = default; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override { work_groups->push_back(work_group_size_); @@ -135,9 +135,9 @@ class FullyConnected : public GPUOperation { FullyConnected& operator=(const FullyConnected&) = delete; private: - FullyConnected(const OperationDef& definition, const DeviceInfo& device_info); + FullyConnected(const OperationDef& definition, const GpuInfo& gpu_info); friend FullyConnected CreateFullyConnected( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr); template @@ -145,7 +145,7 @@ class FullyConnected : public GPUOperation { bool weights_are_buffer); std::string GetFullyConnectedKernelCode(const OperationDef& op_def, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); }; template @@ -195,7 +195,7 @@ void FullyConnected::UploadWeights(const tflite::gpu::Tensor& weights, } } -FullyConnected CreateFullyConnected(const DeviceInfo& device_info, +FullyConnected CreateFullyConnected(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc index 23e69a4631c..1d991f1a058 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc @@ -58,7 +58,7 @@ TEST_F(OpenCLOperationTest, FullyConnected) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; FullyConnected operation = - CreateFullyConnected(creation_context_.GetDeviceInfo(), op_def, attr); + CreateFullyConnected(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {14.5f, 37.5f})) @@ -102,7 +102,7 @@ TEST_F(OpenCLOperationTest, FullyConnectedLarge) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; FullyConnected operation = - CreateFullyConnected(creation_context_.GetDeviceInfo(), op_def, attr); + CreateFullyConnected(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 12), &dst_tensor)); EXPECT_THAT( @@ -151,7 +151,7 @@ TEST_F(OpenCLOperationTest, FullyConnectedExtraLarge) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; FullyConnected operation = - CreateFullyConnected(creation_context_.GetDeviceInfo(), op_def, attr); + CreateFullyConnected(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, kOutputSize), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), expected)) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index eb53840f841..a47a725a4a2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -236,7 +236,7 @@ absl::Status GPUOperation::UpdateParams() { return absl::OkStatus(); } -absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info, +absl::Status GPUOperation::AssembleCode(const GpuInfo& gpu_info, CLContext* context) { if (elementwise_) { auto src_desc = @@ -258,14 +258,13 @@ absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info, elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_; code_ = GetElementWiseCode(definition_, check_src_channels_size_); } - return cl_args_.Init(device_info, - {{dst_tensors_names_[0], elementwise_code_}}, context, - &args_, &code_); + return cl_args_.Init(gpu_info, {{dst_tensors_names_[0], elementwise_code_}}, + context, &args_, &code_); } absl::Status GPUOperation::Compile(const CreationContext& creation_context) { RETURN_IF_ERROR( - AssembleCode(creation_context.GetDeviceInfo(), creation_context.context)); + AssembleCode(creation_context.GetGpuInfo(), creation_context.context)); RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device, &kernel_)); @@ -274,7 +273,7 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { absl::Status GPUOperation::CompileDeserialized( const CreationContext& creation_context) { - RETURN_IF_ERROR(cl_args_.Init(creation_context.GetDeviceInfo(), &args_, + RETURN_IF_ERROR(cl_args_.Init(creation_context.GetGpuInfo(), &args_, creation_context.context)); return creation_context.cache->GetOrCreateCLKernel( code_, "main_function", compiler_options_, *creation_context.context, @@ -282,9 +281,9 @@ absl::Status GPUOperation::CompileDeserialized( } void GPUOperation::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { - GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 49188469c92..3793e2523f4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -74,7 +74,7 @@ struct CreationContext { CLCommandQueue* queue; ProgramCache* cache; - const DeviceInfo& GetDeviceInfo() const { return device->info_; } + const GpuInfo& GetGpuInfo() const { return device->info_; } }; struct OperationDef { @@ -126,18 +126,18 @@ class GPUOperation { } virtual void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const; absl::Status Tune(const TuningParameters& params); - absl::Status AssembleCode(const DeviceInfo& device_info, CLContext* context); + absl::Status AssembleCode(const GpuInfo& gpu_info, CLContext* context); absl::Status Compile(const CreationContext& creation_context); absl::Status CompileDeserialized(const CreationContext& creation_context); - virtual absl::Status PostCompileCheck(const DeviceInfo& device_info, + virtual absl::Status PostCompileCheck(const GpuInfo& gpu_info, const KernelInfo& kernel_info) { return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc index edae706c63f..11d2e209428 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc @@ -24,8 +24,7 @@ namespace tflite { namespace gpu { namespace cl { namespace { -std::string GetLSTMCode(const OperationDef& op_def, - const DeviceInfo& device_info) { +std::string GetLSTMCode(const OperationDef& op_def, const GpuInfo& gpu_info) { std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; c += "$0) {\n"; @@ -39,8 +38,7 @@ std::string GetLSTMCode(const OperationDef& op_def, c += " FLT4 r1 = args.intermediate.Read(0, 0, Z + state_stride, B);\n"; c += " FLT4 r2 = args.intermediate.Read(0, 0, Z + state_stride * 2, B);\n"; c += " FLT4 r3 = args.intermediate.Read(0, 0, Z + state_stride * 3, B);\n"; - if (op_def.precision != CalculationsPrecision::F32 && - device_info.IsAdreno()) { + if (op_def.precision != CalculationsPrecision::F32 && gpu_info.IsAdreno()) { c += " FLT4 input_gate;\n"; c += " FLT4 new_input;\n"; c += " FLT4 forget_gate;\n"; @@ -88,13 +86,13 @@ std::string GetLSTMCode(const OperationDef& op_def, } // namespace GPUOperation CreateLSTM(const OperationDef& definition, - const DeviceInfo& device_info) { + const GpuInfo& gpu_info) { GPUOperation op(definition); op.AddSrcTensor("intermediate", definition.src_tensors[0]); op.AddSrcTensor("prev_state", definition.src_tensors[1]); op.AddDstTensor("new_state", definition.dst_tensors[0]); op.AddDstTensor("activation", definition.dst_tensors[1]); - op.code_ = GetLSTMCode(definition, device_info); + op.code_ = GetLSTMCode(definition, gpu_info); op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; return op; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h index 5d827d46bc3..fa0aa270158 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h @@ -26,7 +26,7 @@ namespace gpu { namespace cl { GPUOperation CreateLSTM(const OperationDef& definition, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc index 6d11a7dd726..731a7c9874b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc @@ -26,19 +26,19 @@ namespace tflite { namespace gpu { namespace cl { -Mean::Mean(const OperationDef& definition, const DeviceInfo& device_info) +Mean::Mean(const OperationDef& definition, const GpuInfo& gpu_info) : GPUOperation(definition) { // for workgroup size: // must be: (x * y) % 4 = 0; // must be: z = 1; work_group_size_ = int3(16, 16, 1); - if (device_info.IsAdreno()) { - if (device_info.adreno_info.IsAdreno3xx()) { + if (gpu_info.IsAdreno()) { + if (gpu_info.adreno_info.IsAdreno3xx()) { work_group_size_ = int3(16, 8, 1); } } - if (device_info.IsMali()) { - const MaliInfo& mali_info = device_info.mali_info; + if (gpu_info.IsMali()) { + const MaliInfo& mali_info = gpu_info.mali_info; if (mali_info.IsMaliT6xx() || mali_info.IsMaliT7xx() || mali_info.IsMaliT8xx()) { work_group_size_ = int3(8, 4, 1); @@ -135,8 +135,8 @@ int3 Mean::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -Mean CreateMean(const OperationDef& definition, const DeviceInfo& device_info) { - return Mean(definition, device_info); +Mean CreateMean(const OperationDef& definition, const GpuInfo& gpu_info) { + return Mean(definition, gpu_info); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h index 0d233302e12..69fe994264c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h @@ -28,10 +28,10 @@ namespace cl { class Mean : public GPUOperation { public: Mean() = default; - Mean(const OperationDef& definition, const DeviceInfo& device_info); + Mean(const OperationDef& definition, const GpuInfo& gpu_info); void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override { work_groups->push_back(work_group_size_); @@ -50,7 +50,7 @@ class Mean : public GPUOperation { const int3& work_group_size); }; -Mean CreateMean(const OperationDef& definition, const DeviceInfo& device_info); +Mean CreateMean(const OperationDef& definition, const GpuInfo& gpu_info); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index a14b9e99560..e701d60a998 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -86,7 +86,7 @@ float4 filter_outside_tensor(float4 x, int num_channels, int slice) { } // namespace MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const int tensor_slices) : GPUOperation(definition) { // The kernel code does not inherently need a fixed size, but in order to not @@ -95,15 +95,15 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition, // For now, fix workgroup size to the biggest supported by the device, but not // larger than the number of tensor slices. int desired_work_group_size = - std::min(tensor_slices, device_info.max_work_group_size_x); - if (device_info.IsMali()) { + std::min(tensor_slices, gpu_info.max_work_group_size_x); + if (gpu_info.IsMali()) { // Don't use more than 64 work items per work group on ARM Mali. They // implement local memory using the global memory, larger workgroups have // severe performance penalty. desired_work_group_size = 64; } - if (device_info.IsAdreno()) { - AdrenoInfo info = device_info.adreno_info; + if (gpu_info.IsAdreno()) { + AdrenoInfo info = gpu_info.adreno_info; if (info.IsAdreno3xx()) { if (info.adreno_gpu == AdrenoGpu::kAdreno320 || info.adreno_gpu == AdrenoGpu::kAdreno330) { @@ -126,7 +126,7 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition, } } } - if (device_info.IsPowerVR()) { + if (gpu_info.IsPowerVR()) { desired_work_group_size = 64; } while (desired_work_group_size >= tensor_slices * 2) { @@ -136,9 +136,9 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition, work_group_size_.y = 1; // Required work_group_size_.z = 1; // Required code_ = GetNormalizationCode(); - if (device_info.cl_version >= OpenCLVersion::CL_3_0) { + if (gpu_info.cl_version >= OpenCLVersion::CL_3_0) { compiler_options_.push_back(CompilerOptions::CL_3_0); - } else if (device_info.cl_version >= OpenCLVersion::CL_2_0) { + } else if (gpu_info.cl_version >= OpenCLVersion::CL_2_0) { compiler_options_.push_back(CompilerOptions::CL_2_0); } } @@ -205,9 +205,9 @@ int3 MeanStdDevNormalization::GetGridSize() const { } MeanStdDevNormalization CreateMeanStdDevNormalization( - const OperationDef& definition, const DeviceInfo& device_info, + const OperationDef& definition, const GpuInfo& gpu_info, const int tensor_slices) { - return MeanStdDevNormalization(definition, device_info, tensor_slices); + return MeanStdDevNormalization(definition, gpu_info, tensor_slices); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h index 3312d23122f..6a4a1848394 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h @@ -30,11 +30,11 @@ namespace cl { class MeanStdDevNormalization : public GPUOperation { public: explicit MeanStdDevNormalization(const OperationDef& definition, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const int tensor_slices); void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override { work_groups->push_back(work_group_size_); @@ -53,7 +53,7 @@ class MeanStdDevNormalization : public GPUOperation { }; MeanStdDevNormalization CreateMeanStdDevNormalization( - const OperationDef& definition, const DeviceInfo& device_info, + const OperationDef& definition, const GpuInfo& gpu_info, const int tensor_slices); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc index bcda1f6a628..088dc5b027f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc @@ -25,7 +25,7 @@ namespace tflite { namespace gpu { namespace cl { -GPUOperation CreatePReLU(const DeviceInfo& device_info, +GPUOperation CreatePReLU(const GpuInfo& gpu_info, const OperationDef& definition, const PReLUAttributes& attr) { GPUOperation result(definition); @@ -51,7 +51,7 @@ GPUOperation CreatePReLU(const DeviceInfo& device_info, const BHWC shape = BHWC(1, alpha_hwc->shape.h, alpha_hwc->shape.w, alpha_hwc->shape.c); TensorStorageType storage_type = SelectBestStorageType( - device_info, shape, definition.GetPrimaryStorageType(), + gpu_info, shape, definition.GetPrimaryStorageType(), definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; desc.UploadData(*alpha_hwc); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h index 5d2a41bc6de..1e98d043eed 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h @@ -31,7 +31,7 @@ namespace tflite { namespace gpu { namespace cl { -GPUOperation CreatePReLU(const DeviceInfo& device_info, +GPUOperation CreatePReLU(const GpuInfo& gpu_info, const OperationDef& definition, const PReLUAttributes& attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc index ef4b8c17324..ef23ed380ff 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc @@ -53,7 +53,7 @@ TEST_F(OpenCLOperationTest, PReLUAlpha) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreatePReLU(creation_context_.GetDeviceInfo(), op_def, attr); + CreatePReLU(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -84,7 +84,7 @@ TEST_F(OpenCLOperationTest, PReLUAlphaClip) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreatePReLU(creation_context_.GetDeviceInfo(), op_def, attr); + CreatePReLU(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -115,7 +115,7 @@ TEST_F(OpenCLOperationTest, PReLUHWCAlpha) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation = - CreatePReLU(creation_context_.GetDeviceInfo(), op_def, attr); + CreatePReLU(creation_context_.GetGpuInfo(), op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc index 7f100410d3c..8bcbf7bd1df 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc @@ -38,7 +38,7 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) { src_tensor.shape = BHWC(1, 2, 1, 5); src_tensor.data = {1.1, 2.1, 0.7, 0.3, 1.2, 3.1, 4.1, 0.0, 1.0, 4.4}; ReduceAttributes attr; - attr.axis = Axis::CHANNELS; + attr.dims = {Axis::CHANNELS}; for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { @@ -63,7 +63,7 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) { src_tensor.shape = BHWC(1, 2, 1, 2); src_tensor.data = {1.1, 2.0, 3.1, 4.0}; ReduceAttributes attr; - attr.axis = Axis::CHANNELS; + attr.dims = {Axis::CHANNELS}; for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { @@ -89,7 +89,7 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) { src_tensor.data = {1.1, 2.0, -0.3, -100.0, 32.6, 1.1, -3.1, -4.0, -5.0, -7.0, -2.0, -100.0}; ReduceAttributes attr; - attr.axis = Axis::CHANNELS; + attr.dims = {Axis::CHANNELS}; for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { @@ -115,7 +115,7 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) { src_tensor.data = {1.1, 2.0, -0.3, -100.0, 32.6, 1.1, -3.1, -4.0, -5.0, -7.0, -2.0, 100.0}; ReduceAttributes attr; - attr.axis = Axis::CHANNELS; + attr.dims = {Axis::CHANNELS}; for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h index 030e53728f3..2a50dee2d63 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h @@ -29,7 +29,7 @@ class Softmax1x1 : public GPUOperation { Softmax1x1() = default; explicit Softmax1x1(const OperationDef& definition); void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override { work_groups->push_back(work_group_size_); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc index be5197f01e9..f1c3ddb7045 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc @@ -32,31 +32,31 @@ namespace tflite { namespace gpu { namespace cl { namespace { -bool UseBufferForWeights(const DeviceInfo& device_info) { - return device_info.IsAdreno() || device_info.IsAMD() || device_info.IsMali(); +bool UseBufferForWeights(const GpuInfo& gpu_info) { + return gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsMali(); } } // namespace -FCFCAdd::FCFCAdd(const OperationDef& definition, const DeviceInfo& device_info) +FCFCAdd::FCFCAdd(const OperationDef& definition, const GpuInfo& gpu_info) : GPUOperation(definition) { - if (device_info.IsAdreno()) { - if (device_info.adreno_info.IsAdreno3xx()) { + if (gpu_info.IsAdreno()) { + if (gpu_info.adreno_info.IsAdreno3xx()) { work_group_size_ = int3(16, 4, 1); - } else if (device_info.adreno_info.IsAdreno4xx()) { + } else if (gpu_info.adreno_info.IsAdreno4xx()) { work_group_size_ = int3(32, 4, 1); } else { work_group_size_ = int3(32, 4, 1); } - } else if (device_info.IsIntel()) { + } else if (gpu_info.IsIntel()) { work_group_size_ = int3(8, 4, 1); - } else if (device_info.IsNvidia()) { + } else if (gpu_info.IsNvidia()) { work_group_size_ = int3(8, 4, 1); - } else if (device_info.IsPowerVR()) { + } else if (gpu_info.IsPowerVR()) { work_group_size_ = int3(8, 4, 1); } else { work_group_size_ = int3(16, 4, 1); } - code_ = GetFCFCAddKernelCode(definition_, device_info); + code_ = GetFCFCAddKernelCode(definition_, gpu_info); } FCFCAdd::FCFCAdd(FCFCAdd&& kernel) : GPUOperation(std::move(kernel)) {} @@ -75,12 +75,12 @@ FCFCAdd& FCFCAdd::operator=(FCFCAdd&& kernel) { // optimized shaders std::string FCFCAdd::GetFCFCAddKernelCode(const OperationDef& op_def, - const DeviceInfo& device_info) { + const GpuInfo& gpu_info) { AddSrcTensor("src_tensor_0", op_def.src_tensors[0]); AddSrcTensor("src_tensor_1", op_def.src_tensors[1]); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); - const bool weights_are_buffer = UseBufferForWeights(device_info); + const bool weights_are_buffer = UseBufferForWeights(gpu_info); std::string c = GetCommonDefines(op_def.precision); switch (op_def.precision) { @@ -172,15 +172,14 @@ std::string FCFCAdd::GetFCFCAddKernelCode(const OperationDef& op_def, int3 FCFCAdd::GetGridSize() const { return int3(dst_[0]->Slices(), 1, 1); } -FCFCAdd CreateFCFCAdd(const DeviceInfo& device_info, - const OperationDef& definition, +FCFCAdd CreateFCFCAdd(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr0, const FullyConnectedAttributes& attr1) { - FCFCAdd result(definition, device_info); + FCFCAdd result(definition, gpu_info); result.UploadWeights(attr0.weights, "weights0", - UseBufferForWeights(device_info)); + UseBufferForWeights(gpu_info)); result.UploadWeights(attr1.weights, "weights1", - UseBufferForWeights(device_info)); + UseBufferForWeights(gpu_info)); TensorLinearDescriptor desc0; desc0.storage_type = LinearStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h index 7efa6d79735..65b392d03e3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h @@ -97,7 +97,7 @@ class FCFCAdd : public GPUOperation { public: FCFCAdd() = default; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override { work_groups->push_back(work_group_size_); @@ -111,8 +111,8 @@ class FCFCAdd : public GPUOperation { FCFCAdd& operator=(const FCFCAdd&) = delete; private: - FCFCAdd(const OperationDef& definition, const DeviceInfo& device_info); - friend FCFCAdd CreateFCFCAdd(const DeviceInfo& device_info, + FCFCAdd(const OperationDef& definition, const GpuInfo& gpu_info); + friend FCFCAdd CreateFCFCAdd(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr0, const FullyConnectedAttributes& attr1); @@ -122,7 +122,7 @@ class FCFCAdd : public GPUOperation { const std::string& name, bool weights_are_buffer); std::string GetFCFCAddKernelCode(const OperationDef& op_def, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); }; template @@ -175,8 +175,7 @@ void FCFCAdd::UploadWeights(const tflite::gpu::Tensor& weights, } } -FCFCAdd CreateFCFCAdd(const DeviceInfo& device_info, - const OperationDef& definition, +FCFCAdd CreateFCFCAdd(const GpuInfo& gpu_info, const OperationDef& definition, const FullyConnectedAttributes& attr0, const FullyConnectedAttributes& attr1); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h b/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h index c57ccade4b2..b7221e53ad4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h @@ -27,7 +27,7 @@ enum class TuningType { EXHAUSTIVE, FAST }; struct TuningParameters { ProfilingCommandQueue* queue; - const DeviceInfo* info; + const GpuInfo* info; TuningType tuning_type = TuningType::EXHAUSTIVE; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index aca06f60ae0..2530c73571b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -114,19 +114,19 @@ int3 GetFirstSuitableWorkGroup(const std::vector& wgs, int max_wg_size) { return {1, 1, 1}; } -int GetRecommendedBlockSizeForConv(const DeviceInfo& device_info, +int GetRecommendedBlockSizeForConv(const GpuInfo& gpu_info, CalculationsPrecision precision, int task_size) { const float task_size_per_cu = - task_size / static_cast(device_info.compute_units_count); + task_size / static_cast(gpu_info.compute_units_count); int block_size = 1; float threshold_1 = FLT_MAX; float threshold_2 = FLT_MAX; float threshold_4 = FLT_MAX; - if (!device_info.IsMali()) { + if (!gpu_info.IsMali()) { return 1; } - MaliInfo mali_info = device_info.mali_info; + MaliInfo mali_info = gpu_info.mali_info; switch (precision) { case CalculationsPrecision::F16: if (mali_info.IsBifrostGen1()) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.h b/tensorflow/lite/delegates/gpu/cl/kernels/util.h index 8656da1b687..519f1f117b2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.h @@ -210,7 +210,7 @@ float4 GetMaskForLastPlane(int channels); int3 GetFirstSuitableWorkGroup(const std::vector& wgs, int max_wg_size); // task_size as amount of FLT4 processed elements. -int GetRecommendedBlockSizeForConv(const DeviceInfo& device, +int GetRecommendedBlockSizeForConv(const GpuInfo& gpu_info, CalculationsPrecision precision, int task_size); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index 71bbb93fa91..6af8e429435 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -33,15 +33,15 @@ namespace cl { Winograd4x4To36::Winograd4x4To36(const OperationDef& definition, const Padding2D& padding, - const DeviceInfo& device_info) + const GpuInfo& gpu_info) : GPUOperation(definition), padding_(padding) { work_group_size_ = int3(32, 1, 1); code_ = GetWinograd4x4To36Code(definition_); - if (device_info.IsAdreno()) { + if (gpu_info.IsAdreno()) { compiler_options_.push_back(CompilerOptions::ADRENO_MORE_WAVES); } if (definition_.precision == CalculationsPrecision::F16 && - device_info.IsPowerVR()) { + gpu_info.IsPowerVR()) { compiler_options_.push_back(CompilerOptions::POWERVR_FP16); } } @@ -282,11 +282,11 @@ int3 Winograd4x4To36::GetGridSize() const { } void Winograd4x4To36::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { switch (tuning_type) { case TuningType::EXHAUSTIVE: - GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); return; case TuningType::FAST: @@ -296,20 +296,20 @@ void Winograd4x4To36::GetPossibleKernelWorkGroups( } } -Winograd4x4To36 CreateWinograd4x4To36(const DeviceInfo& device_info, +Winograd4x4To36 CreateWinograd4x4To36(const GpuInfo& gpu_info, const OperationDef& definition, const Padding2D& padding) { - Winograd4x4To36 result(definition, padding, device_info); + Winograd4x4To36 result(definition, padding, gpu_info); result.UploadBt(); return result; } Winograd36To4x4::Winograd36To4x4(const OperationDef& definition, - const DeviceInfo& device_info) + const GpuInfo& gpu_info) : GPUOperation(definition) { work_group_size_ = int3(32, 1, 1); if (definition_.precision == CalculationsPrecision::F16 && - device_info.IsPowerVR()) { + gpu_info.IsPowerVR()) { compiler_options_.push_back(CompilerOptions::POWERVR_FP16); } code_ = GetWinograd36To4x4Code(definition_); @@ -478,11 +478,11 @@ int3 Winograd36To4x4::GetGridSize() const { } void Winograd36To4x4::GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { switch (tuning_type) { case TuningType::EXHAUSTIVE: - GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); return; case TuningType::FAST: @@ -493,9 +493,9 @@ void Winograd36To4x4::GetPossibleKernelWorkGroups( } Winograd36To4x4 CreateWinograd36To4x4( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const tflite::gpu::Tensor& biases) { - Winograd36To4x4 result(definition, device_info); + Winograd36To4x4 result(definition, gpu_info); TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h index 56134d20bd9..11430c99c0b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h @@ -34,11 +34,11 @@ class Winograd4x4To36 : public GPUOperation { public: Winograd4x4To36() = default; Winograd4x4To36(const OperationDef& definition, const Padding2D& padding, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); absl::Status BindArguments(ArgumentsBinder* args) override; int3 GetGridSize() const override; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override; @@ -49,7 +49,7 @@ class Winograd4x4To36 : public GPUOperation { Winograd4x4To36& operator=(const Winograd4x4To36&) = delete; private: - friend Winograd4x4To36 CreateWinograd4x4To36(const DeviceInfo& device_info, + friend Winograd4x4To36 CreateWinograd4x4To36(const GpuInfo& gpu_info, const OperationDef& definition, const Padding2D& padding); @@ -63,19 +63,18 @@ class Winograd4x4To36 : public GPUOperation { Padding2D padding_; }; -Winograd4x4To36 CreateWinograd4x4To36(const DeviceInfo& device_info, +Winograd4x4To36 CreateWinograd4x4To36(const GpuInfo& gpu_info, const OperationDef& definition, const Padding2D& padding); class Winograd36To4x4 : public GPUOperation { public: Winograd36To4x4() = default; - Winograd36To4x4(const OperationDef& definition, - const DeviceInfo& device_info); + Winograd36To4x4(const OperationDef& definition, const GpuInfo& gpu_info); absl::Status BindArguments(ArgumentsBinder* args) override; int3 GetGridSize() const override; void GetPossibleKernelWorkGroups( - TuningType tuning_type, const DeviceInfo& device_info, + TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const override; @@ -87,7 +86,7 @@ class Winograd36To4x4 : public GPUOperation { private: friend Winograd36To4x4 CreateWinograd36To4x4( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const tflite::gpu::Tensor& biases); void UploadAt(); @@ -99,7 +98,7 @@ class Winograd36To4x4 : public GPUOperation { }; Winograd36To4x4 CreateWinograd36To4x4( - const DeviceInfo& device_info, const OperationDef& definition, + const GpuInfo& gpu_info, const OperationDef& definition, const tflite::gpu::Tensor& biases); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc index bbc89ffdc6b..c7a7f1e691c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc @@ -92,7 +92,7 @@ TEST_F(OpenCLOperationTest, Winograd4x4To36) { padding.prepended = HW(1, 1); padding.appended = HW(1, 1); Winograd4x4To36 wino_up = CreateWinograd4x4To36( - creation_context_.GetDeviceInfo(), op_def, padding); + creation_context_.GetGpuInfo(), op_def, padding); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &wino_up, BHWC(1, 36, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), dst_ref.data)); @@ -159,8 +159,8 @@ TEST_F(OpenCLOperationTest, Winograd36To4x4) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Winograd36To4x4 wino_down = CreateWinograd36To4x4( - creation_context_.GetDeviceInfo(), op_def, biases); + Winograd36To4x4 wino_down = + CreateWinograd36To4x4(creation_context_.GetGpuInfo(), op_def, biases); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &wino_down, BHWC(1, 4, 4, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), dst_ref.data)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc index 817e4613a36..d30675f798f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc @@ -35,7 +35,7 @@ std::vector Get2DWorkgroupsEqualTo128() { std::vector GenerateWorkGroupSizesXYMultipleOf( int multiplier, int3 grid, const KernelInfo& kernel_info, - const DeviceInfo& device_info, WorkGroupSizeAlignment z_alignment) { + const GpuInfo& gpu_info, WorkGroupSizeAlignment z_alignment) { std::vector work_groups; work_groups.reserve(32); @@ -52,9 +52,9 @@ std::vector GenerateWorkGroupSizesXYMultipleOf( if (work_group_size_xy * z > kernel_info.max_work_group_size) { continue; } - if (x <= device_info.max_work_group_size_x && - y <= device_info.max_work_group_size_y && - z <= device_info.max_work_group_size_z) { + if (x <= gpu_info.max_work_group_size_x && + y <= gpu_info.max_work_group_size_y && + z <= gpu_info.max_work_group_size_z) { work_groups.push_back({x, y, z}); } } @@ -65,7 +65,7 @@ std::vector GenerateWorkGroupSizesXYMultipleOf( std::vector GenerateWorkGroupSizesXMultipleOf( int multiplier, int3 grid, const KernelInfo& kernel_info, - const DeviceInfo& device_info, WorkGroupSizeAlignment z_alignment) { + const GpuInfo& gpu_info, WorkGroupSizeAlignment z_alignment) { std::vector work_groups; work_groups.reserve(32); @@ -78,9 +78,9 @@ std::vector GenerateWorkGroupSizesXMultipleOf( x += multiplier) { for (auto y : possible_y_sizes) { for (auto z : possible_z_sizes) { - if (x <= device_info.max_work_group_size_x && - y <= device_info.max_work_group_size_y && - z <= device_info.max_work_group_size_z && + if (x <= gpu_info.max_work_group_size_x && + y <= gpu_info.max_work_group_size_y && + z <= gpu_info.max_work_group_size_z && x * y * z <= kernel_info.max_work_group_size) { work_groups.push_back({x, y, z}); } @@ -90,13 +90,13 @@ std::vector GenerateWorkGroupSizesXMultipleOf( return work_groups; } -void GetWorkGroupsAlignedToGrid(const DeviceInfo& device_info, +void GetWorkGroupsAlignedToGrid(const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups) { int3 max_wg_size; - max_wg_size.x = device_info.max_work_group_size_x; - max_wg_size.y = device_info.max_work_group_size_y; - max_wg_size.z = device_info.max_work_group_size_z; + max_wg_size.x = gpu_info.max_work_group_size_x; + max_wg_size.y = gpu_info.max_work_group_size_y; + max_wg_size.z = gpu_info.max_work_group_size_z; GenerateWorkGroupSizesAlignedToGrid( grid, max_wg_size, kernel_info.max_work_group_size, work_groups); } @@ -214,24 +214,22 @@ int3 GetWorkGroupConv(const int3& grid, int max_size, int max_z_size) { return int3(wg_x, wg_y, wg_z); } -void GetPossibleWorkGroupsXYMultipleOf(int multiplier, - const DeviceInfo& device_info, +void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, WorkGroupSizeAlignment z_alignment, std::vector* work_groups) { *work_groups = GenerateWorkGroupSizesXYMultipleOf( - multiplier, grid, kernel_info, device_info, z_alignment); + multiplier, grid, kernel_info, gpu_info, z_alignment); } -void GetPossibleWorkGroupsXMultipleOf(int multiplier, - const DeviceInfo& device_info, +void GetPossibleWorkGroupsXMultipleOf(int multiplier, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, WorkGroupSizeAlignment z_alignment, std::vector* work_groups) { *work_groups = GenerateWorkGroupSizesXMultipleOf( - multiplier, grid, kernel_info, device_info, z_alignment); + multiplier, grid, kernel_info, gpu_info, z_alignment); } bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height) { @@ -250,8 +248,7 @@ bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height) { return !have_equal_work_groups; } -void GetPossibleWorkGroups(TuningType tuning_type, - const DeviceInfo& device_info, +void GetPossibleWorkGroups(TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups) { switch (tuning_type) { @@ -260,7 +257,7 @@ void GetPossibleWorkGroups(TuningType tuning_type, GetWorkGroup(grid, kernel_info.max_work_group_size)); return; case TuningType::EXHAUSTIVE: { - GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups); + GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups); return; } default: @@ -269,23 +266,22 @@ void GetPossibleWorkGroups(TuningType tuning_type, } } -void GetPossibleWorkGroupsConv(TuningType tuning_type, - const DeviceInfo& device_info, +void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups) { switch (tuning_type) { case TuningType::FAST: { int max_z_size = 16; - if (device_info.IsAdreno()) { - max_z_size = device_info.adreno_info.IsAdreno3xx() ? 16 : 64; + if (gpu_info.IsAdreno()) { + max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64; } - max_z_size = std::min(max_z_size, device_info.max_work_group_size_z); + max_z_size = std::min(max_z_size, gpu_info.max_work_group_size_z); work_groups->push_back( GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size)); return; } case TuningType::EXHAUSTIVE: { - GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups); + GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups); return; } default: diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h index ea58ff25bc2..90afe44729c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h @@ -29,15 +29,13 @@ namespace gpu { namespace cl { // multiplier can be power of two only -void GetPossibleWorkGroupsXYMultipleOf(int multiplier, - const DeviceInfo& device_info, +void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, WorkGroupSizeAlignment z_alignment, std::vector* work_groups); -void GetPossibleWorkGroupsXMultipleOf(int multiplier, - const DeviceInfo& device_info, +void GetPossibleWorkGroupsXMultipleOf(int multiplier, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, WorkGroupSizeAlignment z_alignment, @@ -50,13 +48,11 @@ int3 GetWorkGroupXY128Conv(const int3& grid); bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height); -void GetPossibleWorkGroups(TuningType tuning_type, - const DeviceInfo& device_info, +void GetPossibleWorkGroups(TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups); -void GetPossibleWorkGroupsConv(TuningType tuning_type, - const DeviceInfo& device_info, +void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc index dfeaaaff32e..b6b0131aeb9 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc @@ -31,98 +31,98 @@ namespace { std::unique_ptr SelectConvolutionAdreno( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def, + const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints) { - if (IsConvConstantsSupported(device_info, op_def, attr)) { - GPUOperation conv = CreateConvConstants(device_info, op_def, attr); + if (IsConvConstantsSupported(gpu_info, op_def, attr)) { + GPUOperation conv = CreateConvConstants(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); + ConvPowerVR conv = CreateConvPowerVR(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } } std::unique_ptr SelectConvolutionWinogradAdreno( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def, + const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints) { ConvPowerVR conv = - CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape); + CreateConvPowerVRWino4x4To6x6(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } std::unique_ptr SelectConvolutionDynamicWeightsAdreno( const Convolution2DAttributes& attr, const BHWC& weights_shape, - const BHWC& dst_shape, const DeviceInfo& device_info, + const BHWC& dst_shape, const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints, ConvWeightsDescription* weights_desc) { ConvPowerVR conv = CreateConvPowerVRDynamicWeights( - device_info, op_def, attr, weights_shape, &dst_shape); + gpu_info, op_def, attr, weights_shape, &dst_shape); *weights_desc = conv.GetConvWeightsDescription(); return absl::make_unique(std::move(conv)); } std::unique_ptr SelectConvolutionNVidia( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def) { - if (IsConvConstantsSupported(device_info, op_def, attr)) { - GPUOperation conv = CreateConvConstants(device_info, op_def, attr); + const GpuInfo& gpu_info, const OperationDef& op_def) { + if (IsConvConstantsSupported(gpu_info, op_def, attr)) { + GPUOperation conv = CreateConvConstants(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); + ConvPowerVR conv = CreateConvPowerVR(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } } std::unique_ptr SelectConvolutionPowerVR( - const Convolution2DAttributes& attr, const DeviceInfo& device_info, + const Convolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr); + ConvPowerVR conv = CreateConvPowerVR(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } std::unique_ptr SelectConvolutionMali( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def) { + const GpuInfo& gpu_info, const OperationDef& op_def) { if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER && IsConvBuffer1x1Supported(op_def, attr)) { ConvBuffer1x1 conv = - CreateConvBuffer1x1(device_info, op_def, attr, &dst_shape); + CreateConvBuffer1x1(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } else { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); + ConvPowerVR conv = CreateConvPowerVR(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } } std::unique_ptr SelectConvolutionWinogradMali( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def) { + const GpuInfo& gpu_info, const OperationDef& op_def) { if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER) { ConvBuffer1x1 conv = - CreateConvBuffer1x1Wino4x4To6x6(device_info, op_def, attr, &dst_shape); + CreateConvBuffer1x1Wino4x4To6x6(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } else { ConvPowerVR conv = - CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape); + CreateConvPowerVRWino4x4To6x6(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } } std::unique_ptr SelectConvolutionDynamicWeightsMali( const Convolution2DAttributes& attr, const BHWC& weights_shape, - const BHWC& dst_shape, const DeviceInfo& device_info, + const BHWC& dst_shape, const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints, ConvWeightsDescription* weights_desc) { if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER && IsConvBuffer1x1Supported(op_def, weights_shape, attr)) { ConvBuffer1x1 conv = CreateConvBuffer1x1DynamicWeights( - device_info, op_def, attr, weights_shape, &dst_shape); + gpu_info, op_def, attr, weights_shape, &dst_shape); *weights_desc = conv.GetConvWeightsDescription(); return absl::make_unique(std::move(conv)); } else { ConvPowerVR conv = CreateConvPowerVRDynamicWeights( - device_info, op_def, attr, weights_shape, &dst_shape); + gpu_info, op_def, attr, weights_shape, &dst_shape); *weights_desc = conv.GetConvWeightsDescription(); return absl::make_unique(std::move(conv)); } @@ -132,58 +132,58 @@ std::unique_ptr SelectConvolutionDynamicWeightsMali( std::unique_ptr SelectConvolution( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def, + const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints) { - if (device_info.IsAdreno()) { - return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints); - } else if (device_info.IsPowerVR() || device_info.IsAMD() || - device_info.IsIntel()) { - return SelectConvolutionPowerVR(attr, device_info, op_def); - } else if (device_info.IsNvidia()) { - return SelectConvolutionNVidia(attr, dst_shape, device_info, op_def); - } else if (device_info.IsMali()) { - return SelectConvolutionMali(attr, dst_shape, device_info, op_def); + if (gpu_info.IsAdreno()) { + return SelectConvolutionAdreno(attr, dst_shape, gpu_info, op_def, hints); + } else if (gpu_info.IsPowerVR() || gpu_info.IsAMD() || + gpu_info.IsIntel()) { + return SelectConvolutionPowerVR(attr, gpu_info, op_def); + } else if (gpu_info.IsNvidia()) { + return SelectConvolutionNVidia(attr, dst_shape, gpu_info, op_def); + } else if (gpu_info.IsMali()) { + return SelectConvolutionMali(attr, dst_shape, gpu_info, op_def); } else { - return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints); + return SelectConvolutionAdreno(attr, dst_shape, gpu_info, op_def, hints); } } std::unique_ptr SelectConvolutionForWinograd( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def, + const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints) { - if (device_info.IsAdreno()) { - return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, + if (gpu_info.IsAdreno()) { + return SelectConvolutionWinogradAdreno(attr, dst_shape, gpu_info, op_def, hints); - } else if (device_info.IsPowerVR() || device_info.IsAMD() || - device_info.IsNvidia() || device_info.IsIntel()) { + } else if (gpu_info.IsPowerVR() || gpu_info.IsAMD() || + gpu_info.IsNvidia() || gpu_info.IsIntel()) { ConvPowerVR conv = - CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape); + CreateConvPowerVRWino4x4To6x6(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); - } else if (device_info.IsMali()) { - return SelectConvolutionWinogradMali(attr, dst_shape, device_info, op_def); + } else if (gpu_info.IsMali()) { + return SelectConvolutionWinogradMali(attr, dst_shape, gpu_info, op_def); } else { - return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, + return SelectConvolutionWinogradAdreno(attr, dst_shape, gpu_info, op_def, hints); } } std::unique_ptr SelectConvolutionWithDynamicWeights( const Convolution2DAttributes& attr, const BHWC& weights_shape, - const BHWC& dst_shape, const DeviceInfo& device_info, + const BHWC& dst_shape, const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints, ConvWeightsDescription* weights_desc) { - if (device_info.IsAdreno()) { + if (gpu_info.IsAdreno()) { return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, - device_info, op_def, hints, + gpu_info, op_def, hints, weights_desc); - } else if (device_info.IsMali()) { + } else if (gpu_info.IsMali()) { return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape, - device_info, op_def, hints, + gpu_info, op_def, hints, weights_desc); } else { ConvPowerVR conv = CreateConvPowerVRDynamicWeights( - device_info, op_def, attr, weights_shape, &dst_shape); + gpu_info, op_def, attr, weights_shape, &dst_shape); *weights_desc = conv.GetConvWeightsDescription(); return absl::make_unique(std::move(conv)); } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h index f2bacab304c..4c4c57870d1 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h @@ -31,19 +31,16 @@ namespace cl { std::unique_ptr SelectConvolution( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def, - ModelHints hints); + const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints); std::unique_ptr SelectConvolutionForWinograd( const Convolution2DAttributes& attr, const BHWC& dst_shape, - const DeviceInfo& device_info, const OperationDef& op_def, - ModelHints hints); + const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints); std::unique_ptr SelectConvolutionWithDynamicWeights( const Convolution2DAttributes& attr, const BHWC& weights_shape, - const BHWC& dst_shape, const DeviceInfo& device_info, - const OperationDef& op_def, ModelHints hints, - ConvWeightsDescription* weights_desc); + const BHWC& dst_shape, const GpuInfo& gpu_info, const OperationDef& op_def, + ModelHints hints, ConvWeightsDescription* weights_desc); std::unique_ptr SelectConverterToConvWeights( const ConvWeightsDescription& weights_desc, const OperationDef& op_def, diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc index 2248d5a71e1..bf1f6b51372 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc @@ -29,70 +29,70 @@ namespace cl { namespace { std::unique_ptr SelectConvolutionTransposedAdreno( - const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info, + const ConvolutionTransposedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { if (IsConvolutionTransposedThinSupported(attr)) { ConvolutionTransposedThin conv = - CreateConvolutionTransposedThin(device_info, op_def, attr); + CreateConvolutionTransposedThin(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else if (IsConvolutionTransposed3x3ThinSupported(attr)) { ConvolutionTransposed3x3Thin conv = - CreateConvolutionTransposed3x3Thin(device_info, op_def, attr); + CreateConvolutionTransposed3x3Thin(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else { ConvolutionTransposed conv = - CreateConvolutionTransposed(device_info, op_def, attr); + CreateConvolutionTransposed(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } } std::unique_ptr SelectConvolutionTransposedPowerVR( - const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info, + const ConvolutionTransposedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { if (IsConvolutionTransposedThinSupported(attr)) { ConvolutionTransposedThin conv = - CreateConvolutionTransposedThin(device_info, op_def, attr); + CreateConvolutionTransposedThin(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else if (IsConvolutionTransposed3x3ThinSupported(attr)) { ConvolutionTransposed3x3Thin conv = - CreateConvolutionTransposed3x3Thin(device_info, op_def, attr); + CreateConvolutionTransposed3x3Thin(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else if (IsConvolutionTransposed3x3Supported(op_def, attr)) { ConvolutionTransposed3x3 conv = - CreateConvolutionTransposed3x3(device_info, op_def, attr); + CreateConvolutionTransposed3x3(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else if (IsConvolutionTransposed4x4Supported(op_def, attr)) { ConvolutionTransposed4x4 conv = - CreateConvolutionTransposed4x4(device_info, op_def, attr); + CreateConvolutionTransposed4x4(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else { ConvolutionTransposed conv = - CreateConvolutionTransposed(device_info, op_def, attr); + CreateConvolutionTransposed(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } } std::unique_ptr SelectConvolutionTransposedMali( - const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info, + const ConvolutionTransposedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { ConvolutionTransposed conv = - CreateConvolutionTransposed(device_info, op_def, attr); + CreateConvolutionTransposed(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } } // namespace std::unique_ptr SelectConvolutionTransposed( - const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info, + const ConvolutionTransposedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { - if (device_info.IsAdreno()) { - return SelectConvolutionTransposedAdreno(attr, device_info, op_def); - } else if (device_info.IsPowerVR() || device_info.IsAMD() || - device_info.IsNvidia() || device_info.IsIntel()) { - return SelectConvolutionTransposedPowerVR(attr, device_info, op_def); - } else if (device_info.IsMali()) { - return SelectConvolutionTransposedMali(attr, device_info, op_def); + if (gpu_info.IsAdreno()) { + return SelectConvolutionTransposedAdreno(attr, gpu_info, op_def); + } else if (gpu_info.IsPowerVR() || gpu_info.IsAMD() || + gpu_info.IsNvidia() || gpu_info.IsIntel()) { + return SelectConvolutionTransposedPowerVR(attr, gpu_info, op_def); + } else if (gpu_info.IsMali()) { + return SelectConvolutionTransposedMali(attr, gpu_info, op_def); } else { - return SelectConvolutionTransposedAdreno(attr, device_info, op_def); + return SelectConvolutionTransposedAdreno(attr, gpu_info, op_def); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h index fd241766eba..f6b21b3ed6d 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h @@ -27,7 +27,7 @@ namespace gpu { namespace cl { std::unique_ptr SelectConvolutionTransposed( - const ConvolutionTransposedAttributes& attr, const DeviceInfo& device_info, + const ConvolutionTransposedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/default/default_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/default/default_selector.cc index 071ecca942b..bd3f074b70a 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/default/default_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/default/default_selector.cc @@ -27,9 +27,8 @@ namespace tflite { namespace gpu { namespace cl { -absl::Status SelectDefault(const DeviceInfo& device_info, - const OperationDef& op_def, ModelHints hints, - const std::vector& inputs, +absl::Status SelectDefault(const GpuInfo& gpu_info, const OperationDef& op_def, + ModelHints hints, const std::vector& inputs, const std::vector& outputs, const Node& node, GPUOperationsSubgraph* gpu_subgraph) { return absl::UnimplementedError( diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h index 137d8231d4d..d11606db4b0 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h @@ -30,9 +30,8 @@ namespace tflite { namespace gpu { namespace cl { -absl::Status SelectDefault(const DeviceInfo& device_info, - const OperationDef& op_def, ModelHints hints, - const std::vector& inputs, +absl::Status SelectDefault(const GpuInfo& gpu_info, const OperationDef& op_def, + ModelHints hints, const std::vector& inputs, const std::vector& outputs, const Node& node, GPUOperationsSubgraph* gpu_subgraph); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc index 1d4676ae2c7..bbaca58a7a3 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc @@ -26,58 +26,58 @@ namespace cl { namespace { std::unique_ptr SelectDWConvolutionAdreno( - const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const DepthwiseConvolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { if (IsDepthwiseConv3x3Supported(attr)) { return absl::make_unique( - CreateDepthwiseConv3x3(device_info, op_def, attr)); + CreateDepthwiseConv3x3(gpu_info, op_def, attr)); } else { return absl::make_unique( - CreateDepthwiseConvolution2D(device_info, op_def, attr)); + CreateDepthwiseConvolution2D(gpu_info, op_def, attr)); } } std::unique_ptr SelectDWConvolutionPowerVR( - const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const DepthwiseConvolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { if (IsDepthwiseConv3x3Supported(attr)) { return absl::make_unique( - CreateDepthwiseConv3x3(device_info, op_def, attr)); + CreateDepthwiseConv3x3(gpu_info, op_def, attr)); } else { return absl::make_unique( - CreateDepthwiseConvolution2D(device_info, op_def, attr)); + CreateDepthwiseConvolution2D(gpu_info, op_def, attr)); } } std::unique_ptr SelectDWConvolutionMali( - const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const DepthwiseConvolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { const auto storage_type = op_def.src_tensors[0].storage_type; bool buffer_type = storage_type == TensorStorageType::BUFFER || storage_type == TensorStorageType::IMAGE_BUFFER; - const MaliInfo mali_info = device_info.mali_info; + const MaliInfo mali_info = gpu_info.mali_info; if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && !buffer_type && op_def.precision != CalculationsPrecision::F32) { return absl::make_unique( - CreateDepthwiseConv3x3(device_info, op_def, attr)); + CreateDepthwiseConv3x3(gpu_info, op_def, attr)); } else { return absl::make_unique( - CreateDepthwiseConvolution2D(device_info, op_def, attr)); + CreateDepthwiseConvolution2D(gpu_info, op_def, attr)); } } } // namespace std::unique_ptr SelectDWConvolution( - const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const DepthwiseConvolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { - if (device_info.IsAdreno()) { - return SelectDWConvolutionAdreno(attr, device_info, op_def); - } else if (device_info.IsPowerVR()) { - return SelectDWConvolutionPowerVR(attr, device_info, op_def); - } else if (device_info.IsMali()) { - return SelectDWConvolutionMali(attr, device_info, op_def); + if (gpu_info.IsAdreno()) { + return SelectDWConvolutionAdreno(attr, gpu_info, op_def); + } else if (gpu_info.IsPowerVR()) { + return SelectDWConvolutionPowerVR(attr, gpu_info, op_def); + } else if (gpu_info.IsMali()) { + return SelectDWConvolutionMali(attr, gpu_info, op_def); } else { - return SelectDWConvolutionAdreno(attr, device_info, op_def); + return SelectDWConvolutionAdreno(attr, gpu_info, op_def); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h index 2147b9773e2..647bee97f3d 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h @@ -27,7 +27,7 @@ namespace gpu { namespace cl { std::unique_ptr SelectDWConvolution( - const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const DepthwiseConvolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc index 6c6ee044cdd..92569a0d8ce 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc @@ -27,74 +27,74 @@ namespace gpu { namespace cl { std::unique_ptr SelectFullyConnectedGeneric( - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def, int batch_size) { if (op_def.IsBatchSupported()) { BHWC dst_shape = BHWC(batch_size, 1, 1, attr.weights.shape.o); - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); + ConvPowerVR conv = CreateConvPowerVR(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } else { - FullyConnected fc = CreateFullyConnected(device_info, op_def, attr); + FullyConnected fc = CreateFullyConnected(gpu_info, op_def, attr); return absl::make_unique(std::move(fc)); } } std::unique_ptr SelectFullyConnectedAdreno( - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def, int batch_size) { if (op_def.IsBatchSupported()) { BHWC dst_shape = BHWC(batch_size, 1, 1, attr.weights.shape.o); - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape); + ConvPowerVR conv = CreateConvPowerVR(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } else { - FullyConnected fc = CreateFullyConnected(device_info, op_def, attr); + FullyConnected fc = CreateFullyConnected(gpu_info, op_def, attr); return absl::make_unique(std::move(fc)); } } std::unique_ptr SelectFullyConnectedPowerVR( - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def, int batch_size) { if (op_def.IsBatchSupported()) { - ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr); + ConvPowerVR conv = CreateConvPowerVR(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else { - FullyConnected fc = CreateFullyConnected(device_info, op_def, attr); + FullyConnected fc = CreateFullyConnected(gpu_info, op_def, attr); return absl::make_unique(std::move(fc)); } } std::unique_ptr SelectFullyConnectedMali( - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def, int batch_size) { if (op_def.IsBatchSupported()) { if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER) { - ConvBuffer1x1 conv = CreateConvBuffer1x1(device_info, op_def, attr); + ConvBuffer1x1 conv = CreateConvBuffer1x1(gpu_info, op_def, attr); return absl::make_unique(std::move(conv)); } else { BHWC dst_shape = BHWC(batch_size, 1, 1, attr.weights.shape.o); ConvPowerVR conv = - CreateConvPowerVR(device_info, op_def, attr, &dst_shape); + CreateConvPowerVR(gpu_info, op_def, attr, &dst_shape); return absl::make_unique(std::move(conv)); } } else { - FullyConnected fc = CreateFullyConnected(device_info, op_def, attr); + FullyConnected fc = CreateFullyConnected(gpu_info, op_def, attr); return absl::make_unique(std::move(fc)); } } std::unique_ptr SelectFullyConnected( - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def, int batch_size) { - if (device_info.IsAdreno()) { - return SelectFullyConnectedAdreno(attr, device_info, op_def, batch_size); - } else if (device_info.IsPowerVR() || device_info.IsAMD() || - device_info.IsNvidia() || device_info.IsIntel()) { - return SelectFullyConnectedPowerVR(attr, device_info, op_def, batch_size); - } else if (device_info.IsMali()) { - return SelectFullyConnectedMali(attr, device_info, op_def, batch_size); + if (gpu_info.IsAdreno()) { + return SelectFullyConnectedAdreno(attr, gpu_info, op_def, batch_size); + } else if (gpu_info.IsPowerVR() || gpu_info.IsAMD() || + gpu_info.IsNvidia() || gpu_info.IsIntel()) { + return SelectFullyConnectedPowerVR(attr, gpu_info, op_def, batch_size); + } else if (gpu_info.IsMali()) { + return SelectFullyConnectedMali(attr, gpu_info, op_def, batch_size); } else { - return SelectFullyConnectedGeneric(attr, device_info, op_def, batch_size); + return SelectFullyConnectedGeneric(attr, gpu_info, op_def, batch_size); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.h index 197c243c5d5..5a2639f26f3 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.h @@ -27,7 +27,7 @@ namespace gpu { namespace cl { std::unique_ptr SelectFullyConnected( - const FullyConnectedAttributes& attr, const DeviceInfo& device_info, + const FullyConnectedAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def, int batch_size); } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index ed63288b8da..4b65c242235 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -41,7 +41,7 @@ namespace gpu { namespace cl { namespace { bool IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes& attr, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const BHWC& dst_shape) { const int tiles_x = DivideRoundUp(dst_shape.w, 4); const int tiles_y = DivideRoundUp(dst_shape.h, 4); @@ -51,22 +51,22 @@ bool IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes& attr, attr.weights.shape.w == 3 && attr.weights.shape.h == 3 && attr.dilations == HW(1, 1) && attr.strides == HW(1, 1); // Mali among other devices has smaller SIMD line size - const int min_depth = device_info.IsMali() ? 16 : 32; - const int min_hw = device_info.IsMali() ? 32 : 128; + const int min_depth = gpu_info.IsMali() ? 16 : 32; + const int min_hw = gpu_info.IsMali() ? 32 : 128; const bool recommended_channels = dst_depth % 4 == 0 && src_depth >= min_depth && dst_depth >= min_depth; const bool recommended_hw = tiles_x * tiles_y >= min_hw; return suitable_attributes && recommended_channels && recommended_hw; } -absl::Status WinogradFromNode(const DeviceInfo& device_info, +absl::Status WinogradFromNode(const GpuInfo& gpu_info, const std::vector& inputs, const std::vector& outputs, const OperationDef& op_def, ModelHints hints, const BHWC& input_shape, const BHWC& output_shape, const Convolution2DAttributes& attr, GPUOperationsSubgraph* gpu_subgraph) { - if (!IsSuitableForWinograd4x4To6x6(attr, device_info, output_shape)) { + if (!IsSuitableForWinograd4x4To6x6(attr, gpu_info, output_shape)) { return absl::UnimplementedError("No implementation for this case."); } @@ -76,13 +76,13 @@ absl::Status WinogradFromNode(const DeviceInfo& device_info, const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c}; TensorDescriptor td_0; td_0.storage_type = SelectBestStorageType( - device_info, shape_0, op_def.src_tensors[0].storage_type, + gpu_info, shape_0, op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout); td_0.data_type = op_def.src_tensors[0].data_type; td_0.layout = op_def.src_tensors[0].layout; TensorDescriptor td_1; td_1.storage_type = SelectBestStorageType( - device_info, shape_1, op_def.src_tensors[0].storage_type, + gpu_info, shape_1, op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout); td_1.data_type = op_def.src_tensors[0].data_type; td_1.layout = op_def.src_tensors[0].layout; @@ -96,7 +96,7 @@ absl::Status WinogradFromNode(const DeviceInfo& device_info, winograd_up_def.dst_tensors.push_back(td_0); auto& winograd_up = gpu_subgraph->operations[0]; winograd_up.operation = - SelectWinograd4x4To36(device_info, attr.padding, winograd_up_def); + SelectWinograd4x4To36(gpu_info, attr.padding, winograd_up_def); winograd_up.input_ids = {static_cast(inputs[0]->id)}; winograd_up.output_ids = {-1}; @@ -107,7 +107,7 @@ absl::Status WinogradFromNode(const DeviceInfo& device_info, auto& conv = gpu_subgraph->operations[1]; conv.input_ids = {-1}; conv.output_ids = {-2}; - conv.operation = SelectConvolutionForWinograd(attr, input_shape, device_info, + conv.operation = SelectConvolutionForWinograd(attr, input_shape, gpu_info, conv_def, hints); OperationDef winograd_down_def; @@ -123,13 +123,13 @@ absl::Status WinogradFromNode(const DeviceInfo& device_info, bias_copy.data.resize(attr.weights.shape.o); } winograd_down.operation = - SelectWinograd36To4x4(device_info, winograd_down_def, bias_copy); + SelectWinograd36To4x4(gpu_info, winograd_down_def, bias_copy); return absl::OkStatus(); } } // namespace -absl::Status GPUOperationFromNode(const DeviceInfo& device_info, +absl::Status GPUOperationFromNode(const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints, const std::vector& inputs, const std::vector& outputs, @@ -159,7 +159,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, auto attr = absl::any_cast(node.operation.attributes); GPUOperation operation = - CreateElementwise(device_info, op_def, op_type, attr); + CreateElementwise(gpu_info, op_def, op_type, attr); *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } @@ -191,7 +191,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, op_def.src_tensors[1].storage_type, Layout::BHWC}; transposed_desc.storage_type = SelectBestStorageType( - device_info, weights_shape, transposed_desc.storage_type, + gpu_info, weights_shape, transposed_desc.storage_type, transposed_desc.data_type, transposed_desc.layout); TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type, TensorStorageType::BUFFER, Layout::BHWC}; @@ -206,7 +206,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, conv_def.src_tensors[1] = weights_desc; ConvWeightsDescription conv_weights_desc; conv_op.operation = SelectConvolutionWithDynamicWeights( - attr, weights_shape, dst_shape, device_info, conv_def, hints, + attr, weights_shape, dst_shape, gpu_info, conv_def, hints, &conv_weights_desc); int aligned_output = @@ -246,7 +246,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, for (int i = 0; i < inputs.size(); ++i) { channels[i] = inputs[i]->tensor.shape.c; } - return SelectConcat(attr, channels, op_def, device_info, gpu_op); + return SelectConcat(attr, channels, op_def, gpu_info, gpu_op); } case OperationType::CONVOLUTION_2D: { auto attr = @@ -254,14 +254,14 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, auto input_shape = inputs[0]->tensor.shape; auto output_shape = outputs[0]->tensor.shape; if (inputs.size() == 1) { - if (WinogradFromNode(device_info, inputs, outputs, op_def, hints, + if (WinogradFromNode(gpu_info, inputs, outputs, op_def, hints, input_shape, output_shape, attr, gpu_subgraph) .ok()) { return absl::OkStatus(); } else { gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph); *gpu_op = - SelectConvolution(attr, output_shape, device_info, op_def, hints); + SelectConvolution(attr, output_shape, gpu_info, op_def, hints); return absl::OkStatus(); } } else { @@ -283,7 +283,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, conv_def.src_tensors[1] = weights_desc; ConvWeightsDescription conv_weights_desc; conv_op.operation = SelectConvolutionWithDynamicWeights( - attr, weights_shape, output_shape, device_info, conv_def, hints, + attr, weights_shape, output_shape, gpu_info, conv_def, hints, &conv_weights_desc); int aligned_output = @@ -309,33 +309,33 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, case OperationType::CONVOLUTION_TRANSPOSED: { auto attr = absl::any_cast( node.operation.attributes); - *gpu_op = SelectConvolutionTransposed(attr, device_info, op_def); + *gpu_op = SelectConvolutionTransposed(attr, gpu_info, op_def); return absl::OkStatus(); } case OperationType::DEPTHWISE_CONVOLUTION: { auto attr = absl::any_cast( node.operation.attributes); if (inputs.size() == 1) { - *gpu_op = SelectDWConvolution(attr, device_info, op_def); + *gpu_op = SelectDWConvolution(attr, gpu_info, op_def); } else { if (inputs[1]->tensor.shape.b != 1) { return absl::UnimplementedError( "No support of depthwise runtime weights with channel multiplier " "!= 1"); } - *gpu_op = SelectDWConvolutionDynamicWeights(attr, device_info, op_def); + *gpu_op = SelectDWConvolutionDynamicWeights(attr, gpu_info, op_def); } return absl::OkStatus(); } case OperationType::FULLY_CONNECTED: { auto attr = absl::any_cast(node.operation.attributes); - *gpu_op = SelectFullyConnected(attr, device_info, op_def, + *gpu_op = SelectFullyConnected(attr, gpu_info, op_def, inputs[0]->tensor.shape.b); return absl::OkStatus(); } case OperationType::LSTM: { - *gpu_op = SelectLSTM(op_def, device_info); + *gpu_op = SelectLSTM(op_def, gpu_info); return absl::OkStatus(); } case OperationType::MAX_UNPOOLING_2D: { @@ -346,11 +346,11 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, } case OperationType::MEAN: { auto attr = absl::any_cast(node.operation.attributes); - return SelectMean(attr, op_def, device_info, gpu_op); + return SelectMean(attr, op_def, gpu_info, gpu_op); } case OperationType::MEAN_STDDEV_NORMALIZATION: { MeanStdDevNormalization operation = CreateMeanStdDevNormalization( - op_def, device_info, (inputs[0]->tensor.shape.c + 3) / 4); + op_def, gpu_info, (inputs[0]->tensor.shape.c + 3) / 4); *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); @@ -368,7 +368,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, } case OperationType::PRELU: { auto attr = absl::any_cast(node.operation.attributes); - *gpu_op = SelectPReLU(attr, device_info, op_def); + *gpu_op = SelectPReLU(attr, gpu_info, op_def); return absl::OkStatus(); } case OperationType::QUANTIZE_AND_DEQUANTIZE: { @@ -453,7 +453,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, auto attr = absl::any_cast(node.operation.attributes); GPUOperation operation = - CreateElementwise(device_info, op_def, op_type, attr); + CreateElementwise(gpu_info, op_def, op_type, attr); *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } @@ -474,7 +474,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, return absl::OkStatus(); } default: - return SelectDefault(device_info, op_def, hints, inputs, outputs, node, + return SelectDefault(gpu_info, op_def, hints, inputs, outputs, node, gpu_subgraph); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.h index b837a2a138f..49fe034ac81 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.h @@ -29,7 +29,7 @@ namespace tflite { namespace gpu { namespace cl { -absl::Status GPUOperationFromNode(const DeviceInfo& device_info, +absl::Status GPUOperationFromNode(const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints, const std::vector& inputs, const std::vector& outputs, diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc index 713892f9902..fc56529837b 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc @@ -47,8 +47,8 @@ namespace gpu { namespace cl { std::unique_ptr SelectLSTM(const OperationDef& op_def, - const DeviceInfo& device_info) { - return absl::make_unique(CreateLSTM(op_def, device_info)); + const GpuInfo& gpu_info) { + return absl::make_unique(CreateLSTM(op_def, gpu_info)); } std::unique_ptr SelectReLU(const ReLUAttributes& attr, @@ -57,10 +57,9 @@ std::unique_ptr SelectReLU(const ReLUAttributes& attr, } std::unique_ptr SelectPReLU(const PReLUAttributes& attr, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const OperationDef& op_def) { - return absl::make_unique( - CreatePReLU(device_info, op_def, attr)); + return absl::make_unique(CreatePReLU(gpu_info, op_def, attr)); } std::unique_ptr SelectPooling(const Pooling2DAttributes& attr, @@ -89,12 +88,11 @@ absl::Status SelectResize(const Resize2DAttributes& attr, absl::Status SelectConcat(const ConcatAttributes& attr, const std::vector& channels, - const OperationDef& op_def, - const DeviceInfo& device_info, + const OperationDef& op_def, const GpuInfo& gpu_info, std::unique_ptr* ptr) { switch (attr.axis) { case Axis::CHANNELS: { - GPUOperation operation = CreateConcatZ(op_def, channels, device_info); + GPUOperation operation = CreateConcatZ(op_def, channels, gpu_info); *ptr = absl::make_unique(std::move(operation)); return absl::OkStatus(); } @@ -112,10 +110,10 @@ absl::Status SelectConcat(const ConcatAttributes& attr, } std::unique_ptr SelectDWConvolutionDynamicWeights( - const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const DepthwiseConvolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def) { return absl::make_unique( - CreateDepthwiseConvolution2DDynamicWeights(device_info, op_def, attr)); + CreateDepthwiseConvolution2DDynamicWeights(gpu_info, op_def, attr)); } void SelectReshape(int src_channels, int dst_channels, @@ -150,12 +148,12 @@ void SelectStridedSlice(const SliceAttributes& attr, const OperationDef& op_def, } absl::Status SelectMean(const MeanAttributes& attr, const OperationDef& op_def, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, std::unique_ptr* ptr) { if (attr.dims != std::set({Axis::HEIGHT, Axis::WIDTH})) { return absl::UnimplementedError("Mean operation supports only HW plane"); } - Mean operation = CreateMean(op_def, device_info); + Mean operation = CreateMean(op_def, gpu_info); *ptr = absl::make_unique(std::move(operation)); return absl::OkStatus(); } @@ -179,17 +177,17 @@ void SelectTranspose(const TransposeAttributes& attr, } std::unique_ptr SelectWinograd4x4To36( - const DeviceInfo& device_info, const Padding2D& padding, + const GpuInfo& gpu_info, const Padding2D& padding, const OperationDef& op_def) { return absl::make_unique( - CreateWinograd4x4To36(device_info, op_def, padding)); + CreateWinograd4x4To36(gpu_info, op_def, padding)); } std::unique_ptr SelectWinograd36To4x4( - const DeviceInfo& device_info, const OperationDef& op_def, + const GpuInfo& gpu_info, const OperationDef& op_def, const tflite::gpu::Tensor& biases) { return absl::make_unique( - CreateWinograd36To4x4(device_info, op_def, biases)); + CreateWinograd36To4x4(gpu_info, op_def, biases)); } std::unique_ptr SelectQuantizeAndDequantize( diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h index 084298442e3..78617eba614 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h @@ -29,13 +29,13 @@ namespace gpu { namespace cl { std::unique_ptr SelectLSTM(const OperationDef& op_def, - const DeviceInfo& device_info); + const GpuInfo& gpu_info); std::unique_ptr SelectReLU(const ReLUAttributes& attr, const OperationDef& op_def); std::unique_ptr SelectPReLU(const PReLUAttributes& attr, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, const OperationDef& op_def); std::unique_ptr SelectPooling(const Pooling2DAttributes& attr, @@ -53,12 +53,11 @@ absl::Status SelectResize(const Resize2DAttributes& attr, absl::Status SelectConcat(const ConcatAttributes& attr, const std::vector& channels, - const OperationDef& op_def, - const DeviceInfo& device_info, + const OperationDef& op_def, const GpuInfo& gpu_info, std::unique_ptr* ptr); std::unique_ptr SelectDWConvolutionDynamicWeights( - const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const DepthwiseConvolution2DAttributes& attr, const GpuInfo& gpu_info, const OperationDef& op_def); void SelectReshape(int src_channels, int dst_channels, @@ -72,7 +71,7 @@ void SelectStridedSlice(const SliceAttributes& attr, const OperationDef& op_def, std::unique_ptr* ptr); absl::Status SelectMean(const MeanAttributes& attr, const OperationDef& op_def, - const DeviceInfo& device_info, + const GpuInfo& gpu_info, std::unique_ptr* ptr); void SelectSoftmax(const BHWC& shape, const OperationDef& op_def, @@ -86,12 +85,12 @@ void SelectTranspose(const TransposeAttributes& attr, const OperationDef& op_def, std::unique_ptr* ptr); -std::unique_ptr SelectWinograd4x4To36( - const DeviceInfo& device_info, const Padding2D& padding, - const OperationDef& op_def); +std::unique_ptr SelectWinograd4x4To36(const GpuInfo& gpu_info, + const Padding2D& padding, + const OperationDef& op_def); std::unique_ptr SelectWinograd36To4x4( - const DeviceInfo& device_info, const OperationDef& op_def, + const GpuInfo& gpu_info, const OperationDef& op_def, const tflite::gpu::Tensor& biases); std::unique_ptr SelectQuantizeAndDequantize( diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc index 885064f0691..2294691319c 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc @@ -89,7 +89,7 @@ absl::Status TryDepthwiseConvPlus1x1Conv( // fully connected + fully connected + add absl::Status TryFCFCAdd( - const DeviceInfo& device_info, CalculationsPrecision precision, + const GpuInfo& gpu_info, CalculationsPrecision precision, const GraphFloat32& graph, NodeId first_node_id, const std::map& tensor_descriptors, std::set* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph) { @@ -160,7 +160,7 @@ absl::Status TryFCFCAdd( } std::unique_ptr* gpu_op = InitSingleOpSubgraph(fc0_inputs, add_outputs, gpu_subgraph); - FCFCAdd fc = CreateFCFCAdd(device_info, op_def, fc0_attr, fc1_attr); + FCFCAdd fc = CreateFCFCAdd(gpu_info, op_def, fc0_attr, fc1_attr); *gpu_op = absl::make_unique(std::move(fc)); consumed_nodes->insert(fc0_node->id); consumed_nodes->insert(fc1_node->id); @@ -170,12 +170,12 @@ absl::Status TryFCFCAdd( } // namespace absl::Status GPUSubgraphFromGraph( - const DeviceInfo& device_info, CalculationsPrecision precision, + const GpuInfo& gpu_info, CalculationsPrecision precision, const GraphFloat32& graph, NodeId first_node_id, const std::map& tensor_descriptors, std::set* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph, std::string* name) { - if ((device_info.IsAdreno() || device_info.IsNvidia()) && + if ((gpu_info.IsAdreno() || gpu_info.IsNvidia()) && TryDepthwiseConvPlus1x1Conv(precision, graph, first_node_id, tensor_descriptors, consumed_nodes, gpu_subgraph) @@ -183,9 +183,9 @@ absl::Status GPUSubgraphFromGraph( *name = "depthwise_conv_plus_1x1_conv"; return absl::OkStatus(); } - if ((device_info.IsIntel() || device_info.IsNvidia()) && - TryFCFCAdd(device_info, precision, graph, first_node_id, - tensor_descriptors, consumed_nodes, gpu_subgraph) + if ((gpu_info.IsIntel() || gpu_info.IsNvidia()) && + TryFCFCAdd(gpu_info, precision, graph, first_node_id, tensor_descriptors, + consumed_nodes, gpu_subgraph) .ok()) { *name = "fully_connected_x2_and_add"; return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h index 726af0c142f..09ba31f87b0 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h @@ -31,7 +31,7 @@ namespace gpu { namespace cl { absl::Status GPUSubgraphFromGraph( - const DeviceInfo& device_info, CalculationsPrecision precision, + const GpuInfo& gpu_info, CalculationsPrecision precision, const GraphFloat32& graph, NodeId first_node_id, const std::map& tensor_descriptors, std::set* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph, diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc index 433e96d2d6a..d579368926e 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc @@ -24,7 +24,7 @@ namespace tflite { namespace gpu { namespace cl { -bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape, +bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape, const TensorDescriptor& descriptor) { const int slices = DivideRoundUp(shape.c, 4); switch (descriptor.storage_type) { @@ -33,61 +33,61 @@ bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape, 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2); const int buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size; - return buffer_size <= device_info.buffer_max_size; + return buffer_size <= gpu_info.buffer_max_size; } case TensorStorageType::IMAGE_BUFFER: return shape.b * shape.w * shape.h * shape.d * slices <= - device_info.image_buffer_max_size; + gpu_info.image_buffer_max_size; case TensorStorageType::TEXTURE_3D: - if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) { + if (gpu_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) { // clCreateImage3D (that used in CL 1.0/1.1) can not create image with // depth = 1 by specification; return false; } - return shape.w * shape.b <= device_info.image3d_max_width && - shape.h <= device_info.image3d_max_height && - slices * shape.d <= device_info.image3d_max_depth; + return shape.w * shape.b <= gpu_info.image3d_max_width && + shape.h <= gpu_info.image3d_max_height && + slices * shape.d <= gpu_info.image3d_max_depth; case TensorStorageType::TEXTURE_ARRAY: // Bug on some Adreno. b/131099086 - if (slices == 1 && device_info.IsAdreno() && - !device_info.adreno_info.support_one_layer_texture_array) { + if (slices == 1 && gpu_info.IsAdreno() && + !gpu_info.adreno_info.support_one_layer_texture_array) { return false; } - return shape.w * shape.b <= device_info.image2d_max_width && - shape.h <= device_info.image2d_max_height && - slices * shape.d <= device_info.image_array_max_layers; + return shape.w * shape.b <= gpu_info.image2d_max_width && + shape.h <= gpu_info.image2d_max_height && + slices * shape.d <= gpu_info.image_array_max_layers; case TensorStorageType::TEXTURE_2D: - return shape.w * shape.b * shape.d <= device_info.image2d_max_width && - shape.h * slices <= device_info.image2d_max_height; + return shape.w * shape.b * shape.d <= gpu_info.image2d_max_width && + shape.h * slices <= gpu_info.image2d_max_height; case TensorStorageType::SINGLE_TEXTURE_2D: return shape.c <= 4 && - device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) && - shape.w * shape.b * shape.d <= device_info.image2d_max_width && - shape.h <= device_info.image2d_max_height; + gpu_info.SupportsFloatImage2D(descriptor.data_type, shape.c) && + shape.w * shape.b * shape.d <= gpu_info.image2d_max_width && + shape.h <= gpu_info.image2d_max_height; default: return false; } } -bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape, +bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWC& shape, const TensorDescriptor& descriptor) { const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return CanCreateTensorWithShape(device_info, shape5D, descriptor); + return CanCreateTensorWithShape(gpu_info, shape5D, descriptor); } -TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, +TensorStorageType SelectBestStorageType(const GpuInfo& gpu_info, const BHWC& shape, const TensorStorageType& desired, const DataType& data_type, const Layout& layout) { - if (CanCreateTensorWithShape(device_info, shape, + if (CanCreateTensorWithShape(gpu_info, shape, TensorDescriptor{data_type, desired, layout})) { return desired; } auto GetBestTypeAfterTextureArray = [&]() { - if (device_info.SupportsImageBuffer() && + if (gpu_info.SupportsImageBuffer() && CanCreateTensorWithShape( - device_info, shape, + gpu_info, shape, TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER, layout})) { return TensorStorageType::IMAGE_BUFFER; @@ -96,9 +96,9 @@ TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, } }; auto GetBestTypeAfterTexture2D = [&]() { - if (device_info.SupportsTextureArray() && + if (gpu_info.SupportsTextureArray() && CanCreateTensorWithShape( - device_info, shape, + gpu_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY, layout})) { return TensorStorageType::TEXTURE_ARRAY; @@ -108,7 +108,7 @@ TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, }; auto GetBestTypeAfterTexture3D = [&]() { if (CanCreateTensorWithShape( - device_info, shape, + gpu_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D, layout})) { return TensorStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h index 8fb66e51804..f30219156b4 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h @@ -25,13 +25,13 @@ namespace tflite { namespace gpu { namespace cl { -bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape, +bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape, const TensorDescriptor& descriptor); -bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape, +bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWC& shape, const TensorDescriptor& descriptor); -TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, +TensorStorageType SelectBestStorageType(const GpuInfo& gpu_info, const BHWC& shape, const TensorStorageType& desired, const DataType& data_type, From d6a4bc49de4e85e78e520a331b9dc0b9a1b2a167 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 10 Nov 2020 14:16:50 -0800 Subject: [PATCH 129/220] [XLA/GPU] Cleanup various code: * Instead of doing operands -> slices -> kernel in individual emitters, move them into BuildKernelThunkForMlir as an implementation detail. * Instead of calling ProcessFusionForConversion in emitters, move it to GetOrCreateSubComputationFromRegion as an implementation detail. * Create MlirEmitterContext to capture common bookkeeping. PiperOrigin-RevId: 341695796 Change-Id: I191957d2c8d51a69a71c711e5ffe816e2a0f04e8 --- .../xla/service/gpu/ir_emitter_unnested.cc | 371 ++++++++++-------- .../xla/service/gpu/ir_emitter_unnested.h | 23 +- 2 files changed, 228 insertions(+), 166 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index aac2d36ab79..10a712d7883 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -215,21 +215,6 @@ static bool WritesMlirBuffer(mlir::Operation* op, mlir::Value operand) { }); } -StatusOr> GetMlirBufferSlices( - mlir::Operation* op, mlir::ValueRange operands, - absl::Span allocations) { - std::vector slices; - for (mlir::Value operand : operands) { - slices.emplace_back(); - auto& slice = slices.back(); - TF_ASSIGN_OR_RETURN(slice.buffer_slice, - GetAllocationSliceForMlir(operand, allocations)); - slice.written = WritesMlirBuffer(op, operand); - slice.shape = TypeToShape(operand.getType()); - } - return slices; -} - bool BinarySearchDenseElementsAttr(::mlir::DenseIntElementsAttr elements, int64 v) { ::mlir::APInt value(sizeof(int64) * 8, v, /*isSigned=*/true); @@ -832,8 +817,6 @@ Status IrEmitterUnnested::HandleTriangularSolve(HloInstruction* hlo) { // This function won't be needed once ElementalIrEmitter migrates to take MHLO // instead. static Status ProcessFusionForConversion(mlir::Region* region, - std::vector* operands, - std::vector* outputs, std::vector* operand_shapes) { std::vector loads; std::vector stores; @@ -853,7 +836,6 @@ static Status ProcessFusionForConversion(mlir::Region* region, for (auto load : loads) { auto arg = region->addArgument(load.getType()); load.replaceAllUsesWith(arg); - operands->push_back(load.memref()); Shape shape = TypeToShape(load.getType()); auto attr = mlir::GetLayoutFromMlirHlo(load); if (attr) { @@ -866,14 +848,13 @@ static Status ProcessFusionForConversion(mlir::Region* region, *shape.mutable_layout() = LayoutUtil::MakeDescendingLayout(load.getType().getShape().size()); } - operand_shapes->push_back(shape); + operand_shapes->push_back(std::move(shape)); load.erase(); } std::vector returned_values; for (auto store : stores) { returned_values.push_back(store.tensor()); - outputs->push_back(store.memref()); store.erase(); } @@ -884,43 +865,6 @@ static Status ProcessFusionForConversion(mlir::Region* region, return Status::OK(); } -// Similar to the general GetMlirBufferSlices, but it's specific to fusion, -// since fusion doesn't have any ODS operands and memory side-effect -// annotations. -static StatusOr> CreateFusionSlices( - absl::Span fusion_operands, - absl::Span fusion_outputs, - absl::Span operand_shapes, const Shape& output_shape, - const BufferAssignment& buffer_assignment) { - absl::Span allocations( - buffer_assignment.Allocations()); - - std::vector slices; - for (int i = 0; i < fusion_operands.size(); i++) { - mlir::Value operand = fusion_operands[i]; - MlirBufferSlice slice; - TF_ASSIGN_OR_RETURN(slice.buffer_slice, - GetAllocationSliceForMlir(operand, allocations)); - slice.shape = operand_shapes.at(i); - slices.push_back(slice); - } - for (int i = 0; i < fusion_outputs.size(); i++) { - mlir::Value output = fusion_outputs[i]; - MlirBufferSlice slice; - TF_ASSIGN_OR_RETURN(slice.buffer_slice, - GetAllocationSliceForMlir(output, allocations)); - slice.written = true; - if (output_shape.IsTuple()) { - slice.shape = output_shape.tuple_shapes(i); - } else { - slice.shape = output_shape; - } - slices.push_back(slice); - } - - return slices; -} - StatusOr IrEmitterUnnested::GetMlirEmitterInput( HloInstruction* hlo) { MlirEmitterInput input; @@ -945,51 +889,41 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir(MlirEmitterInput input, const Shape& output_shape, int unroll_factor) { auto fusion = mlir::cast(input.op); - std::string name = mlir::GetNameFromLoc(fusion.getLoc()); - - std::vector fusion_operands; - std::vector fusion_outputs; - std::vector operand_shapes; - TF_RETURN_IF_ERROR(ProcessFusionForConversion( - &fusion.region(), &fusion_operands, &fusion_outputs, &operand_shapes)); - TF_ASSIGN_OR_RETURN( - std::vector slices, - CreateFusionSlices(fusion_operands, fusion_outputs, operand_shapes, - output_shape, - ir_emitter_context_->buffer_assignment())); - if (input.extra_slice) { - slices.push_back(*input.extra_slice); - } + MlirEmitterContext context; + context.SetOperation(fusion); std::vector ir_arrays; Thunk* kernel_thunk; { - std::unique_ptr kernel_thunk_ptr = - BuildKernelThunkForMlir(name, input.thunk_info, slices, &ir_arrays); + TF_ASSIGN_OR_RETURN(std::unique_ptr kernel_thunk_ptr, + BuildKernelThunkForMlir(fusion, input.thunk_info, + input.extra_slice, &ir_arrays)); kernel_thunk = kernel_thunk_ptr.get(); thunk_sequence_.emplace_back(std::move(kernel_thunk_ptr)); } - TF_ASSIGN_OR_RETURN(const HloComputation* fused_computation, - GetOrCreateSubComputationFromRegion(&fusion.region())); - - CHECK_EQ(fusion_operands.size(), fused_computation->num_parameters()); - for (int i = 0; i < fused_computation->num_parameters(); i++) { - *fused_computation->parameter_instruction(i) - ->mutable_shape() - ->mutable_layout() = slices[i].shape.layout(); + auto operand_arrays = + absl::MakeSpan(ir_arrays).subspan(0, context.operand_shapes.size()); + auto output_element_arrays = absl::MakeSpan(ir_arrays).subspan( + context.operand_shapes.size(), context.output_shapes.size()); + const llvm_ir::IrArray* tuple_output_array = nullptr; + if (ir_arrays.size() == + context.operand_shapes.size() + context.output_shapes.size() + 1) { + tuple_output_array = &ir_arrays[context.operand_shapes.size() + + context.output_shapes.size()]; } + TF_ASSIGN_OR_RETURN(const HloComputation* fused_computation, + GetOrCreateSubComputationFromRegion(&fusion.region(), + /*is_fusion=*/true)); + GpuElementalIrEmitter elemental_emitter(hlo_module_config_, module_, &b_, GetNestedComputer()); FusedIrEmitter fused_emitter(&elemental_emitter); - for (int i = 0; i < fusion_operands.size(); i++) { - auto operand_ir_arrays = - absl::MakeSpan(ir_arrays).subspan(0, fusion_operands.size()); - + for (int i = 0; i < context.operand_shapes.size(); i++) { auto* builder = &b_; - auto ir_array = operand_ir_arrays[i]; + auto ir_array = operand_arrays[i]; fused_emitter.BindGenerator( fused_computation->parameter_instruction(i), [builder, ir_array](llvm_ir::IrArray::Index index) { @@ -1000,34 +934,32 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir(MlirEmitterInput input, auto element_generator, fused_emitter.GetGenerator(fused_computation->root_instruction())); - Shape element_shape = TypeToShape(fusion_outputs[0].getType()); + Shape element_shape = context.output_shapes[0]; LaunchDimensions launch_dimensions = CalculateLaunchDimensions( element_shape, ir_emitter_context_->gpu_device_info(), unroll_factor); UpdateLaunchDimensions(launch_dimensions, kernel_thunk, ir_emitter_context_->llvm_module()); - auto output_arrays = absl::MakeSpan(ir_arrays).subspan(fusion_operands.size(), - fusion_outputs.size()); llvm::Type* index_type = GetIndexTypeForKernelFromMlir( fusion, launch_dimensions.launch_bound(), &b_); - if (fusion_outputs.size() > 1) { + if (context.output_shapes.size() > 1) { // Emit the tuple pointers in one thread. We could do this at any point in // the kernel, but we do it at the beginning in the hopes of reducing // register pressure, since we touch threadIdx.x and blockIdx.x at the // beginning of the kernel *anyway*. KernelSupportLibrary{&b_}.If("emit_mof_tuple", IsBlock0Thread0(&b_), [&] { - llvm_ir::EmitTuple(ir_arrays.back(), output_arrays, &b_); + llvm_ir::EmitTuple(*tuple_output_array, output_element_arrays, &b_); }); // For multioutput fusion, we need to emit each operand and the root. - TF_RETURN_IF_ERROR(ParallelLoopEmitter(element_generator, output_arrays, - launch_dimensions, &b_, - unroll_factor) - .EmitLoop(name, index_type)); + TF_RETURN_IF_ERROR( + ParallelLoopEmitter(element_generator, output_element_arrays, + launch_dimensions, &b_, unroll_factor) + .EmitLoop(context.name, index_type)); } else { - TF_RETURN_IF_ERROR(ParallelLoopEmitter(element_generator, output_arrays[0], - launch_dimensions, &b_, - unroll_factor) - .EmitLoop(name, index_type)); + TF_RETURN_IF_ERROR( + ParallelLoopEmitter(element_generator, output_element_arrays[0], + launch_dimensions, &b_, unroll_factor) + .EmitLoop(context.name, index_type)); } b_.SetInsertPoint(b_.GetInsertBlock()->getTerminator()); @@ -1312,27 +1244,28 @@ Status IrEmitterUnnested::HandleSelectAndScatter( Status IrEmitterUnnested::EmitSelectAndScatterFromMlir( MlirEmitterInput mlir_input, std::unique_ptr&& initializer_thunk) { + auto select_and_scatter_op = + ::mlir::cast<::mlir::lmhlo::SelectAndScatterOp>(mlir_input.op); + + std::string name = mlir::GetNameFromLoc(select_and_scatter_op.getLoc()); + std::vector> thunks; thunks.push_back(std::move(initializer_thunk)); absl::Span allocations( ir_emitter_context_->buffer_assignment().Allocations()); - auto select_and_scatter_op = - ::mlir::cast<::mlir::lmhlo::SelectAndScatterOp>(mlir_input.op); - - // Init value is not needed in IR emission. - llvm::SmallVector<::mlir::Value, 4> operands{select_and_scatter_op.operand(), - select_and_scatter_op.source(), - select_and_scatter_op.out()}; - TF_ASSIGN_OR_RETURN( - std::vector operand_slices, - GetMlirBufferSlices(select_and_scatter_op, operands, allocations)); - - std::string name = mlir::GetNameFromLoc(select_and_scatter_op.getLoc()); std::vector ir_arrays; - thunks.push_back(BuildKernelThunkForMlir(name, Thunk::ThunkInfo(), - operand_slices, &ir_arrays)); + thunks.emplace_back(); + // Init value is not needed in IR emission. + TF_ASSIGN_OR_RETURN( + thunks.back(), + BuildKernelThunkForMlir( + select_and_scatter_op, + {select_and_scatter_op.operand(), select_and_scatter_op.source(), + select_and_scatter_op.out()}, + Thunk::ThunkInfo(), mlir_input.extra_slice, &ir_arrays)); + CHECK_EQ(ir_arrays.size(), 3); const IrArray& operand_array = ir_arrays[0]; const IrArray& source_array = ir_arrays[1]; @@ -1474,7 +1407,8 @@ Status IrEmitterUnnested::EmitSelectAndScatterFromMlir( TF_ASSIGN_OR_RETURN( const HloComputation* select_computation, - GetOrCreateSubComputationFromRegion(&select_and_scatter_op.select())); + GetOrCreateSubComputationFromRegion(&select_and_scatter_op.select(), + /*is_fusion=*/false)); TF_RETURN_IF_ERROR(EmitCallToNestedComputation( *select_computation, {selected_value_address, operand_address}, @@ -1518,7 +1452,8 @@ Status IrEmitterUnnested::EmitSelectAndScatterFromMlir( TF_ASSIGN_OR_RETURN( const HloComputation* scatter_computation, - GetOrCreateSubComputationFromRegion(&select_and_scatter_op.scatter())); + GetOrCreateSubComputationFromRegion(&select_and_scatter_op.scatter(), + /*is_fusion=*/false)); return EmitAtomicOperationForNestedComputation( *scatter_computation, output_value_address, source_value_address); @@ -1616,18 +1551,17 @@ Status IrEmitterUnnested::EmitScatterFromMlir(MlirEmitterInput mlir_input) { ShapeUtil::ByteSizeOf(TypeToShape(scatter_op.output().getType())))); } - // Create MLIR buffer slice info for all operands except the first one - // (`operand`). The code generated for scatter below assumes that the input - // operand is already copied into the output, so does not use it in codegen. - TF_ASSIGN_OR_RETURN( - std::vector operand_slices, - GetMlirBufferSlices(scatter_op, scatter_op.getOperands().drop_front(), - allocations)); - - std::string name = mlir::GetNameFromLoc(scatter_op.getLoc()); + // Create kernel thunk for all operands except the first one (`operand`). The + // code generated for scatter below assumes that the input operand is already + // copied into the output, so does not use it in codegen. std::vector ir_arrays; - thunks.push_back(BuildKernelThunkForMlir(name, mlir_input.thunk_info, - operand_slices, &ir_arrays)); + thunks.emplace_back(); + TF_ASSIGN_OR_RETURN( + thunks.back(), + BuildKernelThunkForMlir(scatter_op, scatter_op.getOperands().drop_front(), + mlir_input.thunk_info, mlir_input.extra_slice, + &ir_arrays)); + CHECK_EQ(ir_arrays.size(), 3); const IrArray& scatter_indices = ir_arrays[0]; const IrArray& updates = ir_arrays[1]; @@ -1674,7 +1608,8 @@ Status IrEmitterUnnested::EmitScatter( TF_ASSIGN_OR_RETURN( const HloComputation* update_computation, - GetOrCreateSubComputationFromRegion(&scatter.update_computation())); + GetOrCreateSubComputationFromRegion(&scatter.update_computation(), + /*is_fusion=*/false)); ScatterDescriptor desc; desc.name = mlir::GetNameFromLoc(scatter.getLoc()); @@ -1833,9 +1768,15 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { // This transformation should be migrated off. See b/171334474. StatusOr -IrEmitterUnnested::GetOrCreateSubComputationFromRegion(mlir::Region* region) { +IrEmitterUnnested::GetOrCreateSubComputationFromRegion(mlir::Region* region, + bool is_fusion) { std::unique_ptr& module = scratch_nested_computations_[region]; if (module == nullptr) { + std::vector operand_shapes; + if (is_fusion) { + TF_RETURN_IF_ERROR(ProcessFusionForConversion(region, &operand_shapes)); + } + xla::XlaComputation xla_computation; mlir::MlirToHloConversionOptions options; options.propagate_layouts = true; @@ -1863,6 +1804,15 @@ IrEmitterUnnested::GetOrCreateSubComputationFromRegion(mlir::Region* region) { } } } + if (is_fusion) { + HloComputation* fused_computation = module->entry_computation(); + CHECK_EQ(operand_shapes.size(), fused_computation->num_parameters()); + for (int i = 0; i < fused_computation->num_parameters(); i++) { + *fused_computation->parameter_instruction(i) + ->mutable_shape() + ->mutable_layout() = operand_shapes[i].layout(); + } + } } return module->entry_computation(); } @@ -1888,28 +1838,20 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput mlir_input) { absl::Span allocations( ir_emitter_context_->buffer_assignment().Allocations()); auto sort_op = mlir::cast(mlir_input.op); - std::string name = mlir::GetNameFromLoc(sort_op.getLoc()); - TF_ASSIGN_OR_RETURN( - std::vector operands, - GetMlirBufferSlices(sort_op, sort_op.operands(), allocations)); - TF_ASSIGN_OR_RETURN( - std::vector outputs, - GetMlirBufferSlices(sort_op, sort_op.output(), allocations)); - if (mlir_input.extra_slice) { - outputs.push_back(*mlir_input.extra_slice); - } + MlirEmitterContext context; + context.SetOperation(sort_op); std::vector> thunks; - Shape keys_shape = operands[0].shape; + const Shape& keys_shape = context.operand_shapes[0]; int64 dimension_to_sort = sort_op.dimension(); - for (int64 i = 0; i < operands.size(); ++i) { + for (int64 i = 0; i < context.operand_shapes.size(); ++i) { // We assume that the layout of all involved operands and outputs is the // same. + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, + context.operand_shapes[i])); TF_RET_CHECK( - LayoutUtil::LayoutsInShapesEqual(keys_shape, operands[i].shape)); - TF_RET_CHECK( - LayoutUtil::LayoutsInShapesEqual(keys_shape, outputs[i].shape)); + LayoutUtil::LayoutsInShapesEqual(keys_shape, context.output_shapes[i])); // If possible, we share buffers. If that is not possible, we need to copy // the values, because the emitter does the sorting in-place. @@ -1922,18 +1864,18 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput mlir_input) { if (destination_buffer != source_address) { // TODO(b/26783907): Figure out why we never seem to share buffers for // key/value sort. - VLOG(2) << name << " requires initial D2D copy for operand " << i; + VLOG(2) << context.name << " requires initial D2D copy for operand " << i; thunks.push_back(absl::make_unique( Thunk::ThunkInfo(), /*source_address=*/source_address, /*destination_buffer=*/destination_buffer, - /*mem_size=*/ShapeUtil::ByteSizeOf(operands[i].shape))); + /*mem_size=*/ShapeUtil::ByteSizeOf(context.operand_shapes[i]))); } } uint64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort); int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound); - VLOG(2) << name << " requires " << num_stages << " stages."; + VLOG(2) << context.name << " requires " << num_stages << " stages."; CHECK_GE(1ULL << num_stages, dimension_to_sort_bound); CHECK_LT(1ULL << (num_stages - 1), dimension_to_sort_bound); @@ -1997,10 +1939,10 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput mlir_input) { // we have not enough threads, or not enough shared memory. Also it does not // give a speedup if the tile size is < 128. int64 total_shared_memory_needed = 0; - for (int64 i = 0; i < operands.size(); ++i) { + for (int64 i = 0; i < context.operand_shapes.size(); ++i) { total_shared_memory_needed += - kTileSize * - ShapeUtil::ByteSizeOfPrimitiveType(operands[i].shape.element_type()); + kTileSize * ShapeUtil::ByteSizeOfPrimitiveType( + context.operand_shapes[i].element_type()); } bool no_tiling = kTileSize < 128 || @@ -2013,7 +1955,7 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput mlir_input) { "kTileSize=%d < 128, " "kThreadsPerBlock=%d > threads_per_block_limit=%d, " "total_shared_memory_needed=%d > shared_memory_per_block=%d", - name, (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, + context.name, (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, ir_emitter_context_->gpu_device_info().threads_per_block_limit, total_shared_memory_needed, ir_emitter_context_->gpu_device_info().shared_memory_per_block); @@ -2021,32 +1963,35 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput mlir_input) { uint64 num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); VLOG(2) << absl::StreamFormat("%s launch dims: %d blocks, %d threads/block", - name, num_blocks, kThreadsPerBlock); + context.name, num_blocks, kThreadsPerBlock); std::vector ir_arrays; auto emit_kernel = [&](absl::Span xor_masks) { VLOG(2) << absl::StreamFormat( - "%s uses kernel for xor masks [%s]", name, + "%s uses kernel for xor masks [%s]", context.name, absl::StrJoin(xor_masks, ", ", [](std::string* out, int64 xor_mask) { absl::StrAppendFormat(out, "0x%x", xor_mask); })); - thunks.push_back( - BuildKernelThunkForMlir(name, Thunk::ThunkInfo(), outputs, &ir_arrays)); + thunks.emplace_back(); + TF_ASSIGN_OR_RETURN( + thunks.back(), + BuildKernelThunkForMlir(sort_op, sort_op.output(), Thunk::ThunkInfo(), + mlir_input.extra_slice, &ir_arrays)); LaunchDimensions launch_dimensions = xor_masks.size() > 1 ? tiled_launch_dimensions : standard_launch_dimensions; UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); std::vector values_arrays; - values_arrays.reserve(operands.size()); - for (int64 i = 0; i < operands.size(); ++i) { + values_arrays.reserve(context.operand_shapes.size()); + for (int64 i = 0; i < context.operand_shapes.size(); ++i) { values_arrays.push_back(ir_arrays[i]); } - TF_ASSIGN_OR_RETURN( - const HloComputation* comparator, - GetOrCreateSubComputationFromRegion(&sort_op.comparator())); + TF_ASSIGN_OR_RETURN(const HloComputation* comparator, + GetOrCreateSubComputationFromRegion( + &sort_op.comparator(), /*is_fusion=*/false)); return llvm_ir::EmitSortInPlace( - dimension_to_sort, values_arrays, IrName(name), xor_masks, &b_, + dimension_to_sort, values_arrays, IrName(context.name), xor_masks, &b_, launch_dimensions, xor_masks.size() > 1 ? num_iterations_in_sort_dim : standard_num_iterations_in_sort_dim, @@ -2079,11 +2024,12 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput mlir_input) { TF_RETURN_IF_ERROR(emit_kernel(xor_masks)); } VLOG(2) << absl::StreamFormat( - "%s requires %d thunks (including any D2D copies)", name, thunks.size()); + "%s requires %d thunks (including any D2D copies)", context.name, + thunks.size()); AddThunkToThunkSequence(absl::make_unique( mlir_input.thunk_info, std::move(thunks))); - if (operands.size() > 1) { + if (context.operand_shapes.size() > 1) { // Emit the tuple as part of the last stage of sorting. // We are currently in the block sorted.in_bounds.after. b_.SetInsertPoint(b_.GetInsertBlock()->getTerminator()); @@ -2482,7 +2428,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( }); } -std::unique_ptr IrEmitterUnnested::BuildKernelThunkForMlir( +std::unique_ptr IrEmitterUnnested::BuildKernelThunkForMlirImpl( absl::string_view name, Thunk::ThunkInfo thunk_info, absl::Span slices, std::vector* ir_arrays) { @@ -2512,6 +2458,80 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunkForMlir( }); } +static void GetFusionOperandsAndOutputs(mlir::lmhlo::FusionOp fusion, + std::vector* operands, + std::vector* outputs) { + fusion.region().walk([&](mlir::TensorLoadOp load) { + CHECK(load.memref().getParentRegion() != &fusion.region()); + operands->push_back(load.memref()); + }); + fusion.region().walk([&](mlir::TensorStoreOp store) { + CHECK(store.memref().getParentRegion() != &fusion.region()); + outputs->push_back(store.memref()); + }); +} + +StatusOr> +IrEmitterUnnested::BuildKernelThunkForMlir( + mlir::Operation* op, mlir::ValueRange operands, Thunk::ThunkInfo thunk_info, + absl::optional extra_slice, + std::vector* ir_arrays) { + absl::Span allocations( + ir_emitter_context_->buffer_assignment().Allocations()); + std::vector slices; + for (mlir::Value operand : operands) { + slices.emplace_back(); + auto& slice = slices.back(); + TF_ASSIGN_OR_RETURN(slice.buffer_slice, + GetAllocationSliceForMlir(operand, allocations)); + slice.written = WritesMlirBuffer(op, operand); + slice.shape = TypeToShape(operand.getType()); + } + if (extra_slice) { + slices.push_back(*extra_slice); + } + std::string name = mlir::GetNameFromLoc(op->getLoc()); + return BuildKernelThunkForMlirImpl(name, thunk_info, slices, ir_arrays); +} + +StatusOr> +IrEmitterUnnested::BuildKernelThunkForMlir( + mlir::Operation* op, Thunk::ThunkInfo thunk_info, + absl::optional extra_slice, + std::vector* ir_arrays) { + if (auto fusion = mlir::dyn_cast(op)) { + absl::Span allocations( + ir_emitter_context_->buffer_assignment().Allocations()); + std::vector operands, outputs; + GetFusionOperandsAndOutputs(fusion, &operands, &outputs); + + std::vector slices; + for (auto operand : operands) { + slices.emplace_back(); + auto& slice = slices.back(); + TF_ASSIGN_OR_RETURN(slice.buffer_slice, + GetAllocationSliceForMlir(operand, allocations)); + slice.written = false; + slice.shape = TypeToShape(operand.getType()); + } + for (auto output : outputs) { + slices.emplace_back(); + auto& slice = slices.back(); + TF_ASSIGN_OR_RETURN(slice.buffer_slice, + GetAllocationSliceForMlir(output, allocations)); + slice.written = true; + slice.shape = TypeToShape(output.getType()); + } + std::string name = mlir::GetNameFromLoc(op->getLoc()); + if (extra_slice) { + slices.push_back(*extra_slice); + } + return BuildKernelThunkForMlirImpl(name, thunk_info, slices, ir_arrays); + } + return BuildKernelThunkForMlir(op, op->getOperands(), thunk_info, extra_slice, + ir_arrays); +} + StatusOr> IrEmitterUnnested::BuildInitializerThunk( HloInstruction* hlo, const ShapeIndex& index) { bool fused = HloOpcode::kFusion == hlo->opcode(); @@ -4626,5 +4646,28 @@ Thunk::ThunkInfo IrEmitterUnnested::GetThunkInfo( return info; } +void MlirEmitterContext::SetOperation(mlir::Operation* op) { + this->name = mlir::GetNameFromLoc(op->getLoc()); + + std::vector operands, outputs; + if (auto fusion = mlir::dyn_cast(op)) { + GetFusionOperandsAndOutputs(fusion, &operands, &outputs); + } else { + for (auto buffer : op->getOperands()) { + if (WritesMlirBuffer(op, buffer)) { + outputs.push_back(buffer); + } else { + operands.push_back(buffer); + } + } + } + for (auto operand : operands) { + operand_shapes.push_back(TypeToShape(operand.getType())); + } + for (auto output : outputs) { + output_shapes.push_back(TypeToShape(output.getType())); + } +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 00d78c15779..de35ac2f4fd 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -72,6 +72,15 @@ struct MlirEmitterInput { absl::optional extra_slice; }; +// Convenience struct that contains useful data structures in MLIR emitter. +struct MlirEmitterContext { + void SetOperation(mlir::Operation* op); + + std::string name; + std::vector operand_shapes; + std::vector output_shapes; +}; + // Emits LLVM IR for an "unnested computation". // // An unnested computation is an HloComputation which you run by executing one @@ -593,11 +602,21 @@ class IrEmitterUnnested : public IrEmitter, std::unique_ptr BuildKernelThunk( const HloInstruction* inst, bool implements_whole_instruction); - std::unique_ptr BuildKernelThunkForMlir( + std::unique_ptr BuildKernelThunkForMlirImpl( absl::string_view name, Thunk::ThunkInfo thunk_info, absl::Span slices, std::vector* ir_arrays); + StatusOr> BuildKernelThunkForMlir( + mlir::Operation* op, mlir::ValueRange operands, + Thunk::ThunkInfo thunk_info, absl::optional extra_slice, + std::vector* ir_arrays); + + StatusOr> BuildKernelThunkForMlir( + mlir::Operation* op, Thunk::ThunkInfo thunk_info, + absl::optional extra_slice, + std::vector* ir_arrays); + // Returns a thunk that, given a reduce or select-and-scatter op, // initializes its memory to the appropriate initial value. StatusOr> BuildInitializerThunk( @@ -647,7 +666,7 @@ class IrEmitterUnnested : public IrEmitter, absl::optional block_id_filter = absl::nullopt); StatusOr GetOrCreateSubComputationFromRegion( - mlir::Region* region); + mlir::Region* region, bool is_fusion); StatusOr GetMlirEmitterInput(HloInstruction* hlo); From 8175ff32abc2b5f6051bbef063d580cb72881307 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 10 Nov 2020 14:19:51 -0800 Subject: [PATCH 130/220] Added check for runtime inputs in TransposeConv. PiperOrigin-RevId: 341696451 Change-Id: I876fa503a48eed57b9551551623178209153037e --- .../lite/delegates/gpu/common/model_builder.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 5371ddc9268..f223a5c0128 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -2159,6 +2159,19 @@ class TransposeConvBuiltinOperationParser : public TFLiteOperationParser { const TfLiteNode* tflite_node, const TfLiteRegistration* registration) final { RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 3)); + const int runtime_inputs = + GetNumberOfRuntimeInputsForNode(context, tflite_node); + if (runtime_inputs != 1) { + return absl::InternalError( + absl::StrCat("Expected 1 runtime input tensor, but node has ", + runtime_inputs, " runtime inputs.")); + } + const int runtime_outputs = NumOutputs(tflite_node); + if (runtime_outputs != 1) { + return absl::InternalError( + absl::StrCat("Expected 1 runtime output tensor, but node has ", + runtime_outputs, " runtime outputs.")); + } RETURN_IF_ERROR(CheckTensorIsAvailable(context, tflite_node, 1)); const TfLiteTransposeConvParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); From 9f04e7773ffd907b67323c4f301c18625d4e3f70 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 10 Nov 2020 14:20:56 -0800 Subject: [PATCH 131/220] Added transformation of global average pooling to mean. PiperOrigin-RevId: 341696703 Change-Id: Iee80d46d4781952850510fd09e3775eb4024f226 --- tensorflow/lite/delegates/gpu/cl/BUILD | 1 + .../delegates/gpu/cl/inference_context.cc | 6 ++ .../gpu/common/transformations/BUILD | 53 ++++++++++--- .../global_pooling_to_reduce_op.cc | 78 +++++++++++++++++++ .../global_pooling_to_reduce_op.h | 33 ++++++++ .../global_pooling_to_reduce_op_test.cc | 72 +++++++++++++++++ 6 files changed, 232 insertions(+), 11 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.cc create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.h create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op_test.cc diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 96a28e3d484..a8fcbf1570c 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -408,6 +408,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common/task:tensor_linear_desc", "//tensorflow/lite/delegates/gpu/common/task:texture2d_desc", "//tensorflow/lite/delegates/gpu/common/transformations:add_bias", + "//tensorflow/lite/delegates/gpu/common/transformations:global_pooling_to_reduce_op", "//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 0b16ff247c8..cb26dc24426 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" #include "tensorflow/lite/delegates/gpu/common/transformations/add_bias.h" +#include "tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.h" #include "tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" @@ -727,6 +728,7 @@ absl::Status InferenceContext::GetOutputTensor(ValueId id, absl::Status RunGraphTransforms(GraphFloat32* graph) { auto merge_padding_transform = NewMergePaddingWithAdd(); auto add_bias_transform = NewAddBias(); + auto pooling_to_reduce_op = NewGlobalPoolingToReduceOp(); ModelTransformer transformer(graph, /*reporter=*/nullptr); if (!transformer.Apply("add_bias", add_bias_transform.get())) { return absl::InternalError("Invalid add_bias transform"); @@ -734,6 +736,10 @@ absl::Status RunGraphTransforms(GraphFloat32* graph) { if (!transformer.Apply("merge_padding", merge_padding_transform.get())) { return absl::InternalError("Invalid merge_padding transform"); } + if (!transformer.Apply("global pooling to mean", + pooling_to_reduce_op.get())) { + return absl::InternalError("Invalid global pooling to mean transform"); + } return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/common/transformations/BUILD b/tensorflow/lite/delegates/gpu/common/transformations/BUILD index 6cb358bcc93..b9e332c5476 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/BUILD +++ b/tensorflow/lite/delegates/gpu/common/transformations/BUILD @@ -120,19 +120,34 @@ cc_test( ) cc_library( - name = "model_transformations", - srcs = ["model_transformations.cc"], - hdrs = ["model_transformations.h"], + name = "global_pooling_to_reduce_op", + srcs = ["global_pooling_to_reduce_op.cc"], + hdrs = ["global_pooling_to_reduce_op.h"], deps = [ - ":add_quant_adjustments", - ":fuse_add_to_conv", - ":fuse_mul_to_conv", - ":make_fully_connected", - ":make_padding", - ":merge_padding_with", - ":remove_noop", + "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:model_transformer", - ] + tf_platform_alias("custom_transformations", "//tensorflow/lite/delegates/gpu/common/"), + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:tensor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:any", + ], +) + +cc_test( + name = "global_pooling_to_reduce_op_test", + srcs = ["global_pooling_to_reduce_op_test.cc"], + deps = [ + ":global_pooling_to_reduce_op", + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:model_transformer", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:tensor", + "@com_google_absl//absl/status", + "@com_google_absl//absl/types:any", + "@com_google_googletest//:gtest_main", + ], ) cc_library( @@ -240,6 +255,22 @@ cc_test( ], ) +cc_library( + name = "model_transformations", + srcs = ["model_transformations.cc"], + hdrs = ["model_transformations.h"], + deps = [ + ":add_quant_adjustments", + ":fuse_add_to_conv", + ":fuse_mul_to_conv", + ":make_fully_connected", + ":make_padding", + ":merge_padding_with", + ":remove_noop", + "//tensorflow/lite/delegates/gpu/common:model_transformer", + ] + tf_platform_alias("custom_transformations", "//tensorflow/lite/delegates/gpu/common/"), +) + cc_library( name = "remove_noop", srcs = ["remove_noop.cc"], diff --git a/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.cc b/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.cc new file mode 100644 index 00000000000..377fe752001 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.cc @@ -0,0 +1,78 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.h" + +#include +#include +#include + +#include "absl/memory/memory.h" +#include "absl/types/any.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/tensor.h" + +namespace tflite { +namespace gpu { +namespace { + +bool IsGlobalPooling(const Pooling2DAttributes& attr, const BHWC& src_shape) { + return attr.strides.w == src_shape.w && attr.strides.h == src_shape.h && + attr.kernel.w == src_shape.w && attr.kernel.h == src_shape.h && + attr.padding.appended.w == 0 && attr.padding.appended.h == 0 && + attr.padding.prepended.w == 0 && attr.padding.prepended.h == 0; +} + +bool IsGlobalAveragePooling(const Pooling2DAttributes& attr, + const BHWC& src_shape) { + return attr.type == tflite::gpu::PoolingType::AVERAGE && + attr.output_indices == false && IsGlobalPooling(attr, src_shape); +} + +class GlobalPoolingToReduceOp : public NodeTransformation { + public: + TransformResult ApplyToNode(Node* node, GraphFloat32* graph) final { + if (node->operation.type != ToString(OperationType::POOLING_2D)) { + return {TransformStatus::SKIPPED, ""}; + } + + auto inputs = graph->FindInputs(node->id); + const auto& pool_attr = + absl::any_cast(node->operation.attributes); + if (!IsGlobalAveragePooling(pool_attr, inputs[0]->tensor.shape)) { + return {TransformStatus::SKIPPED, ""}; + } + + MeanAttributes mean_attr; + mean_attr.dims = {Axis::WIDTH, Axis::HEIGHT}; + + node->operation.attributes = mean_attr; + node->operation.type = ToString(OperationType::MEAN); + return {TransformStatus::APPLIED, + "Replaced global average pooling with mean."}; + } +}; + +} // namespace + +std::unique_ptr NewGlobalPoolingToReduceOp() { + return absl::make_unique(); +} + +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.h b/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.h new file mode 100644 index 00000000000..d2eba5d9fe9 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.h @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_GLOBAL_POOLING_TO_REDUCE_OP_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_GLOBAL_POOLING_TO_REDUCE_OP_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" + +namespace tflite { +namespace gpu { + +// Turns global pooling to reduce operation +// currently can convert average pooling into mean. +std::unique_ptr NewGlobalPoolingToReduceOp(); + +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_GLOBAL_POOLING_TO_REDUCE_OP_H_ diff --git a/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op_test.cc new file mode 100644 index 00000000000..4751c84ed98 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op_test.cc @@ -0,0 +1,72 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/common/transformations/global_pooling_to_reduce_op.h" + +#include +#include +#include + +#include +#include "absl/status/status.h" +#include "absl/types/any.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/tensor.h" + +namespace tflite { +namespace gpu { +namespace { + +TEST(MakeMeanFromGlobalAveragePooling, Smoke) { + GraphFloat32 graph; + auto input = graph.NewValue(); + input->tensor.shape = BHWC(1, 4, 4, 8); + + Pooling2DAttributes attr; + attr.padding.prepended = tflite::gpu::HW(0, 0); + attr.padding.appended = tflite::gpu::HW(0, 0); + attr.strides = tflite::gpu::HW(4, 4); + attr.kernel = tflite::gpu::HW(4, 4); + attr.type = tflite::gpu::PoolingType::AVERAGE; + attr.output_indices = false; + + auto pool_node = graph.NewNode(); + pool_node->operation.type = ToString(OperationType::POOLING_2D); + pool_node->operation.attributes = attr; + + ASSERT_TRUE(graph.AddConsumer(pool_node->id, input->id).ok()); + + Value* output = nullptr; + ASSERT_TRUE(AddOutput(&graph, pool_node, &output).ok()); + output->tensor.shape = BHWC(1, 1, 1, 8); + + ASSERT_EQ(1, graph.nodes().size()); + ASSERT_EQ(2, graph.values().size()); + + auto transformation = NewGlobalPoolingToReduceOp(); + ModelTransformer transformer(&graph, nullptr); + transformer.Apply("global_average_pooling_to_mean", transformation.get()); + + ASSERT_EQ(1, graph.nodes().size()); + ASSERT_EQ(2, graph.values().size()); + ASSERT_EQ(ToString(OperationType::MEAN), graph.nodes()[0]->operation.type); +} + +} // namespace +} // namespace gpu +} // namespace tflite From ce4ab82f5ec6ba50ce9800cda5388c95c8f86e0f Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 10 Nov 2020 14:21:41 -0800 Subject: [PATCH 132/220] Extended gpu/common/gpu_info to support AppleInfo. Using gpu/common/gpu_info for Metal backend. PiperOrigin-RevId: 341696861 Change-Id: I0c71c0691c76af4f7282d9aa886b41d61a3ccf96 --- tensorflow/lite/delegates/gpu/BUILD | 2 +- .../lite/delegates/gpu/common/gpu_info.cc | 124 +++++++++++++-- .../lite/delegates/gpu/common/gpu_info.h | 49 +++++- tensorflow/lite/delegates/gpu/metal/BUILD | 10 +- tensorflow/lite/delegates/gpu/metal/api.cc | 2 +- tensorflow/lite/delegates/gpu/metal/api.h | 2 +- .../lite/delegates/gpu/metal/common_test.mm | 2 +- .../lite/delegates/gpu/metal/device_info.cc | 150 ------------------ .../lite/delegates/gpu/metal/device_info.h | 97 ----------- .../lite/delegates/gpu/metal/kernels/BUILD | 14 +- .../lite/delegates/gpu/metal/kernels/conv.cc | 10 +- .../lite/delegates/gpu/metal/kernels/conv.h | 2 +- .../delegates/gpu/metal/kernels/conv_test.mm | 3 +- .../gpu/metal/kernels/fully_connected.cc | 2 +- .../gpu/metal/kernels/fully_connected.h | 2 +- .../delegates/gpu/metal/kernels/softmax.cc | 2 +- .../delegates/gpu/metal/kernels/softmax.h | 2 +- .../delegates/gpu/metal/kernels/test_util.mm | 5 +- .../gpu/metal/kernels/transpose_conv.cc | 2 +- .../gpu/metal/kernels/transpose_conv.h | 2 +- .../lite/delegates/gpu/metal_delegate.mm | 5 +- 21 files changed, 187 insertions(+), 302 deletions(-) delete mode 100644 tensorflow/lite/delegates/gpu/metal/device_info.cc delete mode 100644 tensorflow/lite/delegates/gpu/metal/device_info.h diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD index 3dfab18867f..069230ebcf6 100644 --- a/tensorflow/lite/delegates/gpu/BUILD +++ b/tensorflow/lite/delegates/gpu/BUILD @@ -88,6 +88,7 @@ objc_library( "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:common", "//tensorflow/lite/delegates/gpu/common:convert", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:model_builder", "//tensorflow/lite/delegates/gpu/common:model_transformer", @@ -99,7 +100,6 @@ objc_library( "//tensorflow/lite/delegates/gpu/metal:api", "//tensorflow/lite/delegates/gpu/metal:buffer_convert", "//tensorflow/lite/delegates/gpu/metal:compiled_model", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:inference_context", "@com_google_absl//absl/types:span", ], diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.cc b/tensorflow/lite/delegates/gpu/common/gpu_info.cc index 40a4e4b3c9e..5a816fda188 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.cc +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.cc @@ -24,26 +24,28 @@ namespace tflite { namespace gpu { namespace { -GpuVendor GetGpuVendor(const std::string& renderer) { - if (renderer.find("mali") != renderer.npos) { - return GpuVendor::kMali; - } - if (renderer.find("adreno") != renderer.npos) { - return GpuVendor::kQualcomm; - } - if (renderer.find("powervr") != renderer.npos) { - return GpuVendor::kPowerVR; - } - if (renderer.find("intel") != renderer.npos) { - return GpuVendor::kIntel; - } - if (renderer.find("nvidia") != renderer.npos) { - return GpuVendor::kNvidia; +GpuVendor GetGpuVendor(const std::string& gpu_description) { + const std::map kMapping = { + {"adreno", GpuVendor::kQualcomm}, + {"apple", GpuVendor::kApple}, + {"qualcomm", GpuVendor::kQualcomm}, + {"mali", GpuVendor::kMali}, + {"powervr", GpuVendor::kPowerVR}, + {"advanced micro devices", GpuVendor::kAMD}, + {"intel", GpuVendor::kIntel}, + {"nvidia", GpuVendor::kNvidia}, + {"amd", GpuVendor::kAMD}, + {"power", GpuVendor::kPowerVR}, + }; + for (const auto& v : kMapping) { + if (gpu_description.find(v.first) != std::string::npos) { + return v.second; + } } return GpuVendor::kUnknown; } -AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) { +AdrenoGpu GetAdrenoGpuVersion(const std::string& gpu_description) { const std::map kMapping = { // Adreno 6xx series {"685", AdrenoGpu::kAdreno685}, @@ -93,7 +95,7 @@ AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) { }; for (const auto& v : kMapping) { - if (device_name.find(v.first) != std::string::npos) { + if (gpu_description.find(v.first) != std::string::npos) { return v.second; } } @@ -212,6 +214,70 @@ int AdrenoInfo::GetWaveSize(bool full_wave) const { } } +AppleInfo::AppleInfo(const std::string& gpu_description) { + const std::map kMapping = { + {"apple a7 gpu", AppleGpu::kA7}, {"apple a8 gpu", AppleGpu::kA8}, + {"apple a8x gpu", AppleGpu::kA8X}, {"apple a9 gpu", AppleGpu::kA9}, + {"apple a9x gpu", AppleGpu::kA9X}, {"apple a10 gpu", AppleGpu::kA10}, + {"apple a10x gpu", AppleGpu::kA10X}, {"apple a11 gpu", AppleGpu::kA11}, + {"apple a12 gpu", AppleGpu::kA12}, {"apple a12x gpu", AppleGpu::kA12X}, + {"apple a12z gpu", AppleGpu::kA12Z}, {"apple a13 gpu", AppleGpu::kA13}, + {"apple a14 gpu", AppleGpu::kA14}, + }; + auto it = kMapping.find(gpu_description); + if (it != kMapping.end()) { + gpu_type = it->second; + } else { + gpu_type = AppleGpu::kUnknown; + } +} + +bool AppleInfo::IsLocalMemoryPreferredOverGlobal() const { + return gpu_type == AppleGpu::kA7 || gpu_type == AppleGpu::kA8 || + gpu_type == AppleGpu::kA8X; +} + +bool AppleInfo::IsBionic() const { + return gpu_type == AppleGpu::kA11 || gpu_type == AppleGpu::kA12 || + gpu_type == AppleGpu::kA12X || gpu_type == AppleGpu::kA12Z || + gpu_type == AppleGpu::kA13 || gpu_type == AppleGpu::kA14; +} + +bool AppleInfo::IsRoundToNearestSupported() const { return IsBionic(); } + +int AppleInfo::GetComputeUnitsCount() const { + switch (gpu_type) { + case AppleGpu::kA7: + return 4; + case AppleGpu::kA8: + return 4; + case AppleGpu::kA8X: + return 8; + case AppleGpu::kA9: + return 6; + case AppleGpu::kA9X: + return 12; + case AppleGpu::kA10: + return 6; + case AppleGpu::kA10X: + return 12; + case AppleGpu::kA11: + return 3; + case AppleGpu::kA12: + return 4; + case AppleGpu::kA12X: + return 7; + case AppleGpu::kA12Z: + return 8; + case AppleGpu::kA13: + return 4; + case AppleGpu::kA14: + return 4; + case AppleGpu::kUnknown: + return 1; + } +} + void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, GpuInfo* gpu_info) { std::string lowered = gpu_description; @@ -219,6 +285,9 @@ void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, gpu_info->vendor = GetGpuVendor(lowered); if (gpu_info->IsAdreno()) { gpu_info->adreno_info = AdrenoInfo(lowered); + } else if (gpu_info->IsApple()) { + gpu_info->apple_info = AppleInfo(lowered); + gpu_info->supported_subgroup_sizes = {32}; } } @@ -236,5 +305,26 @@ bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; } bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; } +bool GpuInfo::IsRoundToNearestSupported() const { + if (IsApple()) { + return apple_info.IsRoundToNearestSupported(); + } else { + return true; + } +} + +bool GpuInfo::IsWaveSizeEqualTo32() const { + return supported_subgroup_sizes.size() == 1 && + supported_subgroup_sizes[0] == 32; +} + +int GpuInfo::GetComputeUnitsCount() const { + if (IsApple()) { + return apple_info.GetComputeUnitsCount(); + } else { + return 1; + } +} + } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.h b/tensorflow/lite/delegates/gpu/common/gpu_info.h index 053021bfe2a..ec328234328 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.h +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.h @@ -116,6 +116,38 @@ struct AdrenoInfo { bool support_one_layer_texture_array = true; }; +enum class AppleGpu { + kUnknown, + kA7, + kA8, + kA8X, + kA9, + kA9X, + kA10, + kA10X, + kA11, + kA12, + kA12X, + kA12Z, + kA13, + kA14, +}; + +struct AppleInfo { + AppleInfo() = default; + explicit AppleInfo(const std::string& gpu_description); + AppleGpu gpu_type; + + bool IsLocalMemoryPreferredOverGlobal() const; + + bool IsBionic() const; + + // floating point rounding mode + bool IsRoundToNearestSupported() const; + + int GetComputeUnitsCount() const; +}; + struct GpuInfo { bool IsAdreno() const; bool IsApple() const; @@ -125,6 +157,14 @@ struct GpuInfo { bool IsAMD() const; bool IsIntel() const; + // floating point rounding mode + bool IsRoundToNearestSupported() const; + + // returns true if device have fixed wave size equal to 32 + bool IsWaveSizeEqualTo32() const; + + int GetComputeUnitsCount() const; + GpuVendor vendor = GpuVendor::kUnknown; std::string renderer_name; @@ -141,7 +181,10 @@ struct GpuInfo { int max_image_units = 0; int max_array_texture_layers = 0; + std::vector supported_subgroup_sizes; + AdrenoInfo adreno_info; + AppleInfo apple_info; }; inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) { @@ -149,8 +192,10 @@ inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) { gpu_info.major_version > 3; } -// Currently it initializes vendor and AdrenoInfo if -// vendor is kQualcomm +// Currently it initializes: +// vendor +// AdrenoInfo if vendor is kQualcomm +// AppleInfo if vendor is kApple void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, GpuInfo* gpu_info); diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD index 8d00eeee03f..81b8434b46d 100644 --- a/tensorflow/lite/delegates/gpu/metal/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/BUILD @@ -26,8 +26,8 @@ cc_library( deps = [ ":compiled_model", ":compute_task_descriptor", - ":device_info", ":runtime_options", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", @@ -188,12 +188,6 @@ objc_library( ], ) -cc_library( - name = "device_info", - srcs = ["device_info.cc"], - hdrs = ["device_info.h"], -) - objc_library( name = "gpu_object", hdrs = ["gpu_object.h"], @@ -309,10 +303,10 @@ objc_library( ], sdk_frameworks = ["XCTest"], deps = [ + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/metal:buffer", "//tensorflow/lite/delegates/gpu/metal:common", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:inference_context", "//tensorflow/lite/delegates/gpu/metal:runtime_options", "//tensorflow/lite/delegates/gpu/metal/kernels:test_util", diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index acb9ec79e17..561b0828cd3 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "absl/strings/substitute.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -25,7 +26,6 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/metal/compiled_model.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/add.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/concat.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/conv.h" diff --git a/tensorflow/lite/delegates/gpu/metal/api.h b/tensorflow/lite/delegates/gpu/metal/api.h index 09eb6511935..f7cdfa4245a 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.h +++ b/tensorflow/lite/delegates/gpu/metal/api.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_API_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_METAL_API_H_ +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/metal/compiled_model.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal/common_test.mm b/tensorflow/lite/delegates/gpu/metal/common_test.mm index 9fecc59e235..3e2db5494b9 100644 --- a/tensorflow/lite/delegates/gpu/metal/common_test.mm +++ b/tensorflow/lite/delegates/gpu/metal/common_test.mm @@ -22,7 +22,7 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/common/status.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" using ::tflite::gpu::metal::GetBestSupportedMetalDevice; using ::tflite::gpu::metal::CreateComputeProgram; diff --git a/tensorflow/lite/delegates/gpu/metal/device_info.cc b/tensorflow/lite/delegates/gpu/metal/device_info.cc deleted file mode 100644 index 250ca9b7ca2..00000000000 --- a/tensorflow/lite/delegates/gpu/metal/device_info.cc +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" - -#include -#include - -namespace tflite { -namespace gpu { -namespace metal { -namespace { -GpuVendor GetVendorFromString(const std::string& device_name) { - const std::map kMapping = { - {"Apple", GpuVendor::kApple}, - {"Intel", GpuVendor::kIntel}, - {"AMD", GpuVendor::kAMD}, - }; - for (const auto& v : kMapping) { - if (device_name.find(v.first) != std::string::npos) { - return v.second; - } - } - return GpuVendor::kUnknown; -} -} // namespace - -AppleGPUInfo::AppleGPUInfo(const std::string& device_name) { - const std::map kMapping = { - {"Apple A7 GPU", AppleGPU::kA7}, {"Apple A8 GPU", AppleGPU::kA8}, - {"Apple A8X GPU", AppleGPU::kA8X}, {"Apple A9 GPU", AppleGPU::kA9}, - {"Apple A9X GPU", AppleGPU::kA9X}, {"Apple A10 GPU", AppleGPU::kA10}, - {"Apple A10X GPU", AppleGPU::kA10X}, {"Apple A11 GPU", AppleGPU::kA11}, - {"Apple A12 GPU", AppleGPU::kA12}, {"Apple A12X GPU", AppleGPU::kA12X}, - {"Apple A12Z GPU", AppleGPU::kA12Z}, {"Apple A13 GPU", AppleGPU::kA13}, - {"Apple A14 GPU", AppleGPU::kA14}, - }; - auto it = kMapping.find(device_name); - if (it != kMapping.end()) { - gpu_type = it->second; - } else { - gpu_type = AppleGPU::kUnknown; - } -} - -bool AppleGPUInfo::IsLocalMemoryPreferredOverGlobal() const { - return gpu_type == AppleGPU::kA7 || - gpu_type == AppleGPU::kA8 || - gpu_type == AppleGPU::kA8X; -} - -bool AppleGPUInfo::IsBionic() const { - return gpu_type == AppleGPU::kA11 || gpu_type == AppleGPU::kA12 || - gpu_type == AppleGPU::kA12X || gpu_type == AppleGPU::kA12Z || - gpu_type == AppleGPU::kA13 || gpu_type == AppleGPU::kA14; -} - -bool AppleGPUInfo::IsRoundToNearestSupported() const { - return IsBionic(); -} - -bool AppleGPUInfo::IsWaveSizeEqualTo32() const { - return true; -} - -int AppleGPUInfo::GetComputeUnitsCount() const { - switch (gpu_type) { - case AppleGPU::kA7: - return 4; - case AppleGPU::kA8: - return 4; - case AppleGPU::kA8X: - return 8; - case AppleGPU::kA9: - return 6; - case AppleGPU::kA9X: - return 12; - case AppleGPU::kA10: - return 6; - case AppleGPU::kA10X: - return 12; - case AppleGPU::kA11: - return 3; - case AppleGPU::kA12: - return 4; - case AppleGPU::kA12X: - return 7; - case AppleGPU::kA12Z: - return 8; - case AppleGPU::kA13: - return 4; - case AppleGPU::kA14: - return 4; - case AppleGPU::kUnknown: - return 1; - } -} - -GpuInfo::GpuInfo(const std::string& device_name) - : vendor(GetVendorFromString(device_name)) { - if (vendor == GpuVendor::kApple) { - apple_info = AppleGPUInfo(device_name); - } -} - -bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; } - -bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; } - -bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; } - -bool GpuInfo::IsRoundToNearestSupported() const { - if (vendor == GpuVendor::kApple) { - return apple_info.IsRoundToNearestSupported(); - } else { - return true; - } -} - -bool GpuInfo::IsWaveSizeEqualTo32() const { - if (vendor == GpuVendor::kApple) { - return apple_info.IsWaveSizeEqualTo32(); - } else { - return false; - } -} - -int GpuInfo::GetComputeUnitsCount() const { - if (vendor == GpuVendor::kApple) { - return apple_info.GetComputeUnitsCount(); - } else { - return 1; - } -} - -} // namespace metal -} // namespace gpu -} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/metal/device_info.h b/tensorflow/lite/delegates/gpu/metal/device_info.h deleted file mode 100644 index f77d6950167..00000000000 --- a/tensorflow/lite/delegates/gpu/metal/device_info.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_DEVICE_INFO_H_ -#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_DEVICE_INFO_H_ - -#include - -namespace tflite { -namespace gpu { -namespace metal { - -// The VendorID returned by the GPU driver. -enum class GpuVendor { - kApple, - kQualcomm, - kMali, - kPowerVR, - kNvidia, - kAMD, - kIntel, - kUnknown -}; - -enum class AppleGPU { - kUnknown, - kA7, - kA8, - kA8X, - kA9, - kA9X, - kA10, - kA10X, - kA11, - kA12, - kA12X, - kA12Z, - kA13, - kA14, -}; - -struct AppleGPUInfo { - AppleGPUInfo() = default; - explicit AppleGPUInfo(const std::string& device_name); - AppleGPU gpu_type; - - bool IsLocalMemoryPreferredOverGlobal() const; - - bool IsBionic() const; - - // floating point rounding mode - bool IsRoundToNearestSupported() const; - - // returns true if device have fixed wave size equal to 32 - bool IsWaveSizeEqualTo32() const; - - int GetComputeUnitsCount() const; -}; - -struct GpuInfo { - GpuInfo() = default; - explicit GpuInfo(const std::string& device_name); - - GpuVendor vendor = GpuVendor::kUnknown; - - AppleGPUInfo apple_info; - - bool IsIntel() const; - bool IsApple() const; - bool IsAMD() const; - - // floating point rounding mode - bool IsRoundToNearestSupported() const; - - // returns true if device have fixed wave size equal to 32 - bool IsWaveSizeEqualTo32() const; - - int GetComputeUnitsCount() const; -}; - -} // namespace metal -} // namespace gpu -} // namespace tflite - -#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_DEVICE_INFO_H_ diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD index 4033784fa34..ae35555e2d3 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD @@ -123,6 +123,7 @@ cc_library( hdrs = ["conv.h"], deps = [ "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", @@ -130,7 +131,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/common:winograd_util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], @@ -228,11 +228,11 @@ cc_library( hdrs = ["elementwise.h"], deps = [ "//tensorflow/lite/delegates/gpu/common:convert", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:device_info", "@com_google_absl//absl/strings", ], ) @@ -265,13 +265,13 @@ cc_library( srcs = ["fully_connected.cc"], hdrs = ["fully_connected.h"], deps = [ + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/common/task:buffer_desc", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], @@ -691,13 +691,13 @@ cc_library( srcs = ["softmax.cc"], hdrs = ["softmax.h"], deps = [ + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:runtime_options", ], ) @@ -766,12 +766,12 @@ cc_library( srcs = ["transpose_conv.cc"], hdrs = ["transpose_conv.h"], deps = [ + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], @@ -821,6 +821,7 @@ objc_library( ], deps = [ "//tensorflow/lite/delegates/gpu/common:convert", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", @@ -831,7 +832,6 @@ objc_library( "//tensorflow/lite/delegates/gpu/metal:api", "//tensorflow/lite/delegates/gpu/metal:common", "//tensorflow/lite/delegates/gpu/metal:compiled_model", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:inference_context", "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@FP16", @@ -907,11 +907,11 @@ objc_library( sdk_frameworks = ["XCTest"], deps = [ ":test_util", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:common", - "//tensorflow/lite/delegates/gpu/metal:device_info", "//tensorflow/lite/delegates/gpu/metal:inference_context", "//tensorflow/lite/delegates/gpu/metal:runtime_options", ], diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc b/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc index 136609ce3da..967004ec548 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc @@ -25,6 +25,7 @@ limitations under the License. #include "absl/strings/substitute.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -32,7 +33,6 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/common/winograd_util.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { @@ -729,7 +729,7 @@ bool IsKernelYIs1(const Convolution2DAttributes& attr) { attr.padding.appended.h == 0; } -int GetMaximumPossibleWavesCount(const AppleGPUInfo& apple_info, +int GetMaximumPossibleWavesCount(const AppleInfo& apple_info, const BHWC& dst_shape) { if (apple_info.IsLocalMemoryPreferredOverGlobal()) { return GetGroupsCountForLinearWH(dst_shape, {32, 1, 1}, {1, 1, 1}); @@ -738,7 +738,7 @@ int GetMaximumPossibleWavesCount(const AppleGPUInfo& apple_info, } } -int GetRecommendedBlockSize(const AppleGPUInfo& apple_info, +int GetRecommendedBlockSize(const AppleInfo& apple_info, const BHWC& dst_shape) { const int max_waves = GetMaximumPossibleWavesCount(apple_info, dst_shape); const int cu_count = apple_info.GetComputeUnitsCount(); @@ -753,7 +753,7 @@ int GetRecommendedBlockSize(const AppleGPUInfo& apple_info, } } -ConvParams GetConvParamsForA7A8(const AppleGPUInfo& apple_info, +ConvParams GetConvParamsForA7A8(const AppleInfo& apple_info, const Convolution2DAttributes& attr, const BHWC& dst_shape) { const int dst_slices = DivideRoundUp(dst_shape.c, 4); @@ -830,7 +830,7 @@ ConvParams GetConvParamsForA7A8(const AppleGPUInfo& apple_info, return params; } -ConvParams GetConvParamsForA9AndHigher(const AppleGPUInfo& apple_info, +ConvParams GetConvParamsForA9AndHigher(const AppleInfo& apple_info, const Convolution2DAttributes& attr, const BHWC& dst_shape) { const int dst_slices = DivideRoundUp(dst_shape.c, 4); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv.h b/tensorflow/lite/delegates/gpu/metal/kernels/conv.h index 41875478309..b2f63716203 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/conv.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv.h @@ -18,10 +18,10 @@ limitations under the License. #include +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm index 7842412f1f9..90fe1918600 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm +++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm @@ -297,7 +297,8 @@ using ::tflite::gpu::metal::SingleOpModel; outputs_v0[1].data.resize(dst_shape.DimensionsProduct()); std::string device_name = std::string([[device name] UTF8String]); - tflite::gpu::metal::GpuInfo gpu_info(device_name); + tflite::gpu::GpuInfo gpu_info; + tflite::gpu::GetGpuInfoFromDeviceDescription(device_name, &gpu_info); auto tasks_v0 = ConvolutionGeneric(0, 0, 1, dst_shape, attr, gpu_info, options); auto status = RunGraph(tasks_v0, device, inputs_v0, &outputs_v0); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc index 79aee9493ce..2be17c0c13d 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc @@ -24,13 +24,13 @@ limitations under the License. #include #include "absl/strings/substitute.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h index 769ffa36550..9f07b8aba0b 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h @@ -18,10 +18,10 @@ limitations under the License. #include +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc index 3dc5bca0007..7b03fd1f63b 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc @@ -20,12 +20,12 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h index 81f45d909ef..f27c23fa903 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.h @@ -18,10 +18,10 @@ limitations under the License. #include +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm index d7c0507ac1d..0b843406d1a 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm +++ b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm @@ -34,7 +34,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" #include "tensorflow/lite/delegates/gpu/metal/inference_context.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" namespace tflite { namespace gpu { @@ -80,7 +80,8 @@ absl::Status SingleOpModel::Invoke() { id device = MTLCreateSystemDefaultDevice(); std::string device_name = std::string([[device name] UTF8String]); - GpuInfo gpu_info(device_name); + GpuInfo gpu_info; + GetGpuInfoFromDeviceDescription(device_name, &gpu_info); RuntimeOptions options; options.storage_precision = RuntimeOptions::Precision::FP32; options.accumulator_precision = RuntimeOptions::Precision::FP32; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc index fcf06c4ae14..66fbc3fb420 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc @@ -22,12 +22,12 @@ limitations under the License. #include #include "absl/strings/substitute.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/util.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h index 5a4410d9ba3..56b9c3f3f03 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h +++ b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h @@ -18,10 +18,10 @@ limitations under the License. #include +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.mm b/tensorflow/lite/delegates/gpu/metal_delegate.mm index b4a8b91499f..0f9bbb39bcf 100644 --- a/tensorflow/lite/delegates/gpu/metal_delegate.mm +++ b/tensorflow/lite/delegates/gpu/metal_delegate.mm @@ -43,7 +43,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/metal/buffer_convert.h" #include "tensorflow/lite/delegates/gpu/metal/common.h" #include "tensorflow/lite/delegates/gpu/metal/compiled_model.h" -#include "tensorflow/lite/delegates/gpu/metal/device_info.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/metal/inference_context.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -338,7 +338,8 @@ class Delegate { } std::string device_name = std::string([[metal_device_ name] UTF8String]); - GpuInfo gpu_info(device_name); + GpuInfo gpu_info; + GetGpuInfoFromDeviceDescription(device_name, &gpu_info); size_t storage_type_size; RuntimeOptions runtime_options; if (options_.allow_precision_loss) { From 9c52e4ba25616a62810b0beb725369308b2450e1 Mon Sep 17 00:00:00 2001 From: Phoenix Meadowlark Date: Tue, 10 Nov 2020 14:26:19 -0800 Subject: [PATCH 133/220] Add bypass for reduction no-ops. PiperOrigin-RevId: 341697715 Change-Id: Id6287bbcf914def36346972153b661abd4a145b6 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 2 ++ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 15 +++++++++++++++ .../mlir/tensorflow/tests/canonicalize.mlir | 7 +++++++ 3 files changed, 24 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 4d1af95c395..d1c4df35251 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -12652,6 +12652,8 @@ retained with length 1. OpBuilderDAG<(ins "Value":$input, "Value":$reduction_indices, "BoolAttr":$keep_dims)> ]; + + let hasFolder = 1; } def TF_SymbolicGradientOp : TF_Op<"SymbolicGradient", [NoSideEffect]> { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index ca54b817b70..5ead88b2903 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -1539,6 +1539,21 @@ void SumOp::build(OpBuilder &builder, OperationState &result, Value input, build(builder, result, out_ty, input, reduction_indices, keep_dims); } +// TODO: Templatize this fold for all reduction ops. +OpFoldResult SumOp::fold(ArrayRef operands) { + auto input_ty = input().getType().template dyn_cast(); + if (!input_ty) return {}; + auto result_ty = getType().template dyn_cast(); + if (!result_ty) return {}; + + // Bypass this op if the result has the same shape and type. This can happen + // if the input tensor has size 0 or size 1. + if (!keep_dims() && input_ty == result_ty) { + return input(); + } + return {}; +} + //===----------------------------------------------------------------------===// // StridedSliceOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 7cb4aefd28c..e43141ce106 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -1273,3 +1273,10 @@ func @testFusedBatchNormToBatchNormV3(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor< %0:5 = "tf.FusedBatchNorm"(%arg0, %arg1, %arg2, %arg3, %arg4): (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32> ) return %0#0 : tensor<8x8x8x8xf32> } + +// CHECK-LABEL: func @testSumFoldBypass +func @testSumFoldBypass(%arg0: tensor<4x?xf16>, %arg1: tensor<*xi64>) -> tensor<4x?xf16> { + // CHECK: return %arg0 + %0 = "tf.Sum"(%arg0, %arg1) { keep_dims = false }: (tensor<4x?xf16>, tensor<*xi64>) -> tensor<4x?xf16> + return %0 : tensor<4x?xf16> +} From 8478ef526666e7ffea28671b7fdb4dae4cf209ef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 15:12:35 -0800 Subject: [PATCH 134/220] [TF:TRT] Adds TF2 behavior into model testing. With TF2, we invoke a different TF-TRT converter (TrtGraphConverterV2) to convert the model and use dynamic TensorRT engines to run the models. PiperOrigin-RevId: 341707227 Change-Id: Ibbc04448e39a0fc585525df38b9241791a216c6d --- .../compiler/tensorrt/model_tests/BUILD | 1 + .../tensorrt/model_tests/model_handler.py | 106 +++++++++++++++++- .../tensorrt/model_tests/run_models.py | 27 ++++- 3 files changed, 126 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/BUILD b/tensorflow/python/compiler/tensorrt/model_tests/BUILD index d00a0dd42f2..b82ea805a41 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/BUILD +++ b/tensorflow/python/compiler/tensorrt/model_tests/BUILD @@ -43,6 +43,7 @@ py_binary( "//tensorflow/python:framework_ops", "//tensorflow/python/compiler/tensorrt:trt_convert_py", "@absl_py//absl:app", + "@absl_py//absl/flags", "@absl_py//absl/logging", ], ) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py index 7db998e54f7..4d2f0eaf015 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py +++ b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py @@ -35,6 +35,7 @@ from tensorflow.python.framework import dtypes as tf_dtypes from tensorflow.python.framework import importer from tensorflow.python.framework import ops as framework_ops from tensorflow.python.ops import random_ops +from tensorflow.python.saved_model import load as saved_model_load from tensorflow.python.saved_model import loader as saved_model_loader from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants @@ -71,6 +72,15 @@ def _generate_random_tensor_v1(tensor_info: meta_graph_pb2.TensorInfo, shape=shape, dtype=dtype, name=tensor_info.name.split(":")[0]).eval() +def _generate_random_tensor_v2( + tensor: framework_ops.Tensor, + batch_size: Optional[int] = None) -> framework_ops.Tensor: + """Generates a random tensor based on the data type and tensor shape.""" + shape = _get_concrete_tensor_shape(tensor.shape.as_proto(), batch_size) + return random_ops.random_uniform( + shape=shape, dtype=tensor.dtype, name=tensor.name) + + # Models are repeatedly loaded for different TensorRT conversion settings. # Using cache can reduce I/O. @functools.lru_cache() @@ -95,6 +105,16 @@ def load_meta_graph( return meta_graph +@functools.lru_cache() +def load_graph_func(saved_model_dir: str, saved_model_tags: str, + saved_model_signature_key: str): + """Loads a graph function in TF2.""" + imported = saved_model_load.load( + export_dir=saved_model_dir, tags=saved_model_tags) + graph_func = imported.signatures[saved_model_signature_key] + return convert_to_constants.convert_variables_to_constants_v2(graph_func) + + ### Test Classes class TestResult( collections.namedtuple("TestResult", @@ -166,8 +186,9 @@ class _ModelHandlerBase(metaclass=abc.ABCMeta): """Runs the model with provided or randomly generated input tensors. Args: - inputs: Mapping from names to input tensors. If `None`, ramdomly generated - inputs will be used instead. + inputs: Mapping from names to input ndarrays in TF1, or a sequence of + tensors in TF2. If `None`, ramdomly generated inputs will be used + instead. warmup_iterations: Number of inferences to warm up the runtime. benchmark_iterations: Number of inferences to measure the latency. allow_to_use_gpu: Whether it is allowed to use GPU or not. @@ -240,6 +261,57 @@ class ModelHandlerV1(_ModelHandlerBase): return TestResult(latency=latency, outputs=outputs if inputs else None) +class ModelHandlerV2(_ModelHandlerBase): + """Runs a model in TF2.""" + + @property + def graph_func(self): + graph_func = load_graph_func( + saved_model_dir=self.model_config.saved_model_dir, + saved_model_tags=self.model_config.saved_model_tags, + saved_model_signature_key=self.model_config.saved_model_signature_key) + return convert_to_constants.convert_variables_to_constants_v2(graph_func) + + @property + def input_tensor_names(self): + return [tensor.name for tensor in self.graph_func.inputs] + + @property + def output_tensor_names(self): + return [tensor.name for tensor in self.graph_func.outputs] + + def generate_random_inputs(self, + batch_size: Optional[int] = None + ) -> Sequence[framework_ops.Tensor]: + batch_size = batch_size or self.model_config.default_batch_size + return [ + _generate_random_tensor_v2(tensor, batch_size) + for tensor in self.graph_func.inputs + ] + + def run(self, + inputs: Optional[Sequence[framework_ops.Tensor]] = None, + warmup_iterations=10, + benchmark_iterations=100, + allow_to_use_gpu=False) -> TestResult: + inputs = inputs or self.generate_random_inputs() + try: + device = "/device:gpu:0" if allow_to_use_gpu else "/device:cpu:0" + with framework_ops.device(device): + for _ in range(warmup_iterations): + self.graph_func(*inputs) + latency = [] + for _ in range(benchmark_iterations): + before = time.time() + outputs = self.graph_func(*inputs) + latency.append(time.time() - before) + except Exception as exc: + raise RuntimeError("Failed to run model inference! " + "Model information: {}".format(str(self))) from exc + outputs = dict(zip(self.output_tensor_names, outputs)) + return TestResult(latency=latency, outputs=outputs if inputs else None) + + class _TrtModelHandlerBase(_ModelHandlerBase): """Base class for converting and running a model.""" @@ -327,3 +399,33 @@ class TrtModelHandlerV1(_TrtModelHandlerBase, ModelHandlerV1): benchmark_iterations, allow_to_use_gpu=True) return test_result._replace(trt_convert_params=self._trt_convert_params) + + +class TrtModelHandlerV2(_TrtModelHandlerBase, ModelHandlerV2): + """Converts a TF2 model with TensorRT and runs the converted model.""" + + def _create_converter(self, trt_convert_params: trt.TrtConversionParams): + return trt.TrtGraphConverterV2( + input_saved_model_dir=self.model_config.saved_model_dir, + input_saved_model_tags=self.model_config.saved_model_tags, + input_saved_model_signature_key=( + self.model_config.saved_model_signature_key), + conversion_params=trt_convert_params) + + def _check_conversion(self, graph_func): + graph_def = graph_func.graph.as_graph_def() + self._check_contains_trt_engine(graph_def) + + def run(self, + inputs: Optional[Sequence[framework_ops.Tensor]] = None, + warmup_iterations=10, + benchmark_iterations=100) -> TestResult: + self.save(overwrite=False) + logging.info("Running with TensorRT!") + test_result = ModelHandlerV2.run( + self, + inputs, + warmup_iterations, + benchmark_iterations, + allow_to_use_gpu=True) + return test_result._replace(trt_convert_params=self._trt_convert_params) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py index a43821d2d6f..8da8dbc3778 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py +++ b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py @@ -17,6 +17,7 @@ import os from absl import app +from absl import flags from absl import logging from tensorflow.python.compiler.tensorrt import trt_convert as trt @@ -24,6 +25,10 @@ from tensorflow.python.compiler.tensorrt.model_tests import model_handler from tensorflow.python.framework import ops as framework_ops from tensorflow.python.platform import test as platform_test +FLAGS = flags.FLAGS +flags.DEFINE_boolean("use_tf2", True, + "Whether to test with TF2 behavior or not (TF1).") + DEFAUL_TRT_CONVERT_PARAMS = trt.DEFAULT_TRT_CONVERSION_PARAMS @@ -37,10 +42,16 @@ def run_all_tests(): saved_model_dir=platform_test.test_src_dir_path( "python/compiler/tensorrt/model_tests/sample_model"), default_batch_size=128),) - model_handler_cls = model_handler.ModelHandlerV1 - trt_model_handeler_cls = model_handler.TrtModelHandlerV1 - default_trt_convert_params = DEFAUL_TRT_CONVERT_PARAMS._replace( - is_dynamic_op=False) + if FLAGS.use_tf2: + model_handler_cls = model_handler.ModelHandlerV2 + trt_model_handeler_cls = model_handler.TrtModelHandlerV2 + default_trt_convert_params = DEFAUL_TRT_CONVERT_PARAMS._replace( + is_dynamic_op=True) + else: + model_handler_cls = model_handler.ModelHandlerV1 + trt_model_handeler_cls = model_handler.TrtModelHandlerV1 + default_trt_convert_params = DEFAUL_TRT_CONVERT_PARAMS._replace( + is_dynamic_op=False) for model_config in model_configs: trt_convert_params = default_trt_convert_params._replace( max_batch_size=model_config.default_batch_size) @@ -70,8 +81,12 @@ def main(argv): os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False" - logging.info("Running in TF1 mode. Eager execution is disabled.") - framework_ops.disable_eager_execution() + if FLAGS.use_tf2: + logging.info("Running in TF2 mode. Eager execution is enabled.") + framework_ops.enable_eager_execution() + else: + logging.info("Running in TF1 mode. Eager execution is disabled.") + framework_ops.disable_eager_execution() run_all_tests() From b2ff807cf53a3c386414baf89945f9742f98532f Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Tue, 10 Nov 2020 15:45:59 -0800 Subject: [PATCH 135/220] Added a flag to the quantization config to disable enforced fixed output range Set the flag to true if the user only infers the output ranges by the quantization emulation ops PiperOrigin-RevId: 341713693 Change-Id: I555aaef5fa23e27de8a0376848e32518ce01570e --- .../mlir/lite/quantization/quantization_config.h | 6 ++++++ .../mlir/lite/transforms/prepare_quantize.cc | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_config.h b/tensorflow/compiler/mlir/lite/quantization/quantization_config.h index 0e766ec52b6..7343853fea0 100644 --- a/tensorflow/compiler/mlir/lite/quantization/quantization_config.h +++ b/tensorflow/compiler/mlir/lite/quantization/quantization_config.h @@ -52,6 +52,12 @@ struct QuantizationSpecs { // weight FakeQuant). bool disable_per_channel = false; + // When set to true, the fixed output ranges of the activation ops (tanh, + // sigmoid, etc.) are not enforced. Then, to quantize these ops, quantization + // emulation ops should be specified after the ops in the input graph. This + // flag should be set to false for post-training quantization. + bool disable_enforced_fixed_output_range = false; + // The node type when the model is exported. Currently this is limited to // DT_FLOAT, DT_HALF, DT_QINT8, and DT_QUINT8. When DT_HALF is used, the // `weight_quantization` flag needs to set to true. When DT_QUINT8 is used, diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc index 33affdaab45..dacc81c69ee 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc @@ -326,7 +326,14 @@ void PrepareQuantizePass::runOnFunction() { OwningRewritePatternList patterns; bool is_signed = quant_specs_.IsSignedInferenceType(); int bit_width = quant_specs_.GetQuantizationTypeWidth(); - bool enforce_fixed_output_range = ContainsQuantizeOps(func); + bool quantization_aware_training_mode = ContainsQuantizeOps(func); + // Enforce fixed output range for post-training quantization and + // when the model has quantization emulation ops, unless it was disabled + // explicitly by the flag. + bool enforced_output_range = + (quant_specs_.post_training_quantization || + quantization_aware_training_mode) && + !quant_specs_.disable_enforced_fixed_output_range; if (is_signed) { patterns.insert>(ctx); // Convert quant stats to int8 quantization parameters. @@ -345,8 +352,7 @@ void PrepareQuantizePass::runOnFunction() { // values (tensors). ApplyQuantizationParamsPropagation( func, is_signed, disable_per_channel || quant_specs_.disable_per_channel, - GetOpQuantSpec, - enforce_fixed_output_range || quant_specs_.post_training_quantization); + GetOpQuantSpec, enforced_output_range); ConvertMlirQuantOpsToTFLQuantOps(func); } From e07f05f816b0e25d0fcd37c7b439d2a50701a9b6 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 10 Nov 2020 16:40:43 -0800 Subject: [PATCH 136/220] Create BUILD files and corresponding targets for `tensorflow/core/api_def/python_api/BUILD`. PiperOrigin-RevId: 341724236 Change-Id: I65a4a6abebec4b98f2dc76062213c9b62e55b83f --- tensorflow/core/api_def/BUILD | 4 ++-- tensorflow/core/api_def/python_api/BUILD | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/api_def/python_api/BUILD diff --git a/tensorflow/core/api_def/BUILD b/tensorflow/core/api_def/BUILD index f9e2adaec6b..b1ced361927 100644 --- a/tensorflow/core/api_def/BUILD +++ b/tensorflow/core/api_def/BUILD @@ -33,9 +33,9 @@ filegroup( visibility = ["//tensorflow:internal"], ) -filegroup( +alias( name = "python_api_def", - srcs = glob(["python_api/*"]), + actual = "//tensorflow/core/api_def/python_api:python_api_def", visibility = ["//tensorflow:internal"], ) diff --git a/tensorflow/core/api_def/python_api/BUILD b/tensorflow/core/api_def/python_api/BUILD new file mode 100644 index 00000000000..f5d41a3006a --- /dev/null +++ b/tensorflow/core/api_def/python_api/BUILD @@ -0,0 +1,15 @@ +# Description: +# Provides ApiDef access and ApiDef validation for TensorFlow python. + +load("//tensorflow:tensorflow.bzl", "filegroup") + +package( + default_visibility = ["//tensorflow:internal"], + licenses = ["notice"], # Apache 2.0 +) + +filegroup( + name = "python_api_def", + srcs = glob(["*"]), + visibility = ["//tensorflow:internal"], +) From 982a5a15b379246dc7fbaaeb6c16f146f5dcb5a4 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Tue, 10 Nov 2020 16:44:04 -0800 Subject: [PATCH 137/220] [XLA:SPMD] Fix wrong offsets of windowed dot (reduce-scatter case) PiperOrigin-RevId: 341724756 Change-Id: Ied736cea8260e29dcebbbb1d79194e06ee324713 --- .../compiler/xla/service/spmd/dot_handler.cc | 71 +++++++------------ .../xla/service/spmd/spmd_partitioner_test.cc | 4 +- 2 files changed, 26 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/dot_handler.cc b/tensorflow/compiler/xla/service/spmd/dot_handler.cc index f765ee5ecc2..a346d8778d6 100644 --- a/tensorflow/compiler/xla/service/spmd/dot_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/dot_handler.cc @@ -289,6 +289,12 @@ StatusOr PartitionBaseCase( to_mask.PadWithValue(b->AddInstruction(HloInstruction::CreateConstant( LiteralUtil::Zero(output_base_shape.element_type())))); } + if (operands_sharded_at_contracting_dims) { + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(output_base_shape.element_type()))); + lhs = lhs.PadWithValue(zero); + rhs = rhs.PadWithValue(zero); + } auto result_buffer = CreateZero(padded_result_buffer_shape, b); auto iteration = b->AddInstruction( HloInstruction::CreateConstant(LiteralUtil::CreateR0(0))); @@ -333,57 +339,28 @@ StatusOr PartitionBaseCase( if (windowed_at_contracting_dims || windowed_at_batch_dims || operands_sharded_at_contracting_dims) { // Slice the matching operand according to the partitioned dimensions on - // the windowed operand. + // the windowed operand or the output. auto slice_operand = matching_operand == 0 ? l : r; - HloInstruction* slice; + // We do this by treating the matching operand as replicated, and + // resharding it to match the windowed operand or the output. + slice_operand->set_sharding(HloSharding::Replicate()); + auto state = lhs.state(); + state.b = &body_b; + state.partition_id = data_partition_id; + const HloSharding* slice_sharding; if (operands_sharded_at_contracting_dims) { - CHECK_NE(output_sharding_dim, -1); - int64 output_sharding_dim_size = - o->shape().dimensions(output_sharding_dim); - int64 slice_dim = matching_operand == 0 - ? output_to_lhs_indices[output_sharding_dim] - : output_to_rhs_indices[output_sharding_dim]; - auto slice_shape = slice_operand->shape(); - slice_shape.set_dimensions(slice_dim, output_sharding_dim_size); - std::vector slice_offsets(slice_shape.rank()); - for (int64 i = 0; i < slice_offsets.size(); ++i) { - if (i != slice_dim) { - slice_offsets[i] = - body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(0))); - } else { - auto stride = body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(output_sharding_dim_size))); - slice_offsets[i] = - body_b.AddInstruction(HloInstruction::CreateBinary( - data_partition_id->shape(), HloOpcode::kMultiply, - data_partition_id, stride)); - } - } - auto padded_shape = slice_operand->shape(); - padded_shape.set_dimensions( - slice_dim, - o->shape().dimensions(output_sharding_dim) * num_partitions); - auto padded_slice_operand = - PadToShape(slice_operand, padded_shape, &body_b); - slice = body_b.AddInstruction(HloInstruction::CreateDynamicSlice( - slice_shape, padded_slice_operand, slice_offsets, - slice_shape.dimensions())); + slice_sharding = windowing_operand == 0 + ? &*output_sharding_transposed_to_match_rhs + : &*output_sharding_transposed_to_match_lhs; } else { - // For windowed operand that partitioned along contracting dimensions, - // we do this by treating the matching operand as replicated, and - // resharding it to match the windowed operand. - slice_operand->set_sharding(HloSharding::Replicate()); - auto state = lhs.state(); - state.b = &body_b; - state.partition_id = data_partition_id; - slice = PartitionedHlo(slice_operand, slice_operand->shape(), state) - .Reshard(windowing_operand == 0 - ? *lhs_sharding_transposed_to_match_rhs - : *rhs_sharding_transposed_to_match_lhs) - .hlo(); - slice_operand->clear_sharding(); + slice_sharding = windowing_operand == 0 + ? &*lhs_sharding_transposed_to_match_rhs + : &*rhs_sharding_transposed_to_match_lhs; } + auto slice = PartitionedHlo(slice_operand, slice_operand->shape(), state) + .Reshard(*slice_sharding) + .hlo(); + slice_operand->clear_sharding(); if (matching_operand == 0) { dot_lhs = slice; } else { diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 91a0c44b51a..e4bd272e361 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3818,7 +3818,7 @@ ENTRY entry { auto ds = AllOf(op::DynamicSlice( op::Pad(op::GetTupleElement(op::Parameter(0)), op::Constant()), - op::Constant(), op::Multiply(), op::Constant(), op::Constant()), + op::Constant(), op::Reshape(), op::Constant(), op::Constant()), op::Shape("f32[320,7,16,128]")); auto partial_output = AllOf(op::Add(op::GetTupleElement(op::Parameter(0)), @@ -3909,7 +3909,7 @@ ENTRY entry { auto ds = AllOf(op::DynamicSlice( op::Pad(op::GetTupleElement(op::Parameter(0)), op::Constant()), - op::Constant(), op::Multiply(), op::Constant()), + op::Constant(), op::Reshape(), op::Constant()), op::Shape("f32[4096,17,128]")); auto partial_output = AllOf(op::Add(op::GetTupleElement(op::Parameter(0)), From a80daecdf0f7385e0cbd9bf160eb44dbfccd2983 Mon Sep 17 00:00:00 2001 From: Hye Soo Yang Date: Tue, 10 Nov 2020 16:56:13 -0800 Subject: [PATCH 138/220] Enable NonMaxSuppression test. PiperOrigin-RevId: 341726615 Change-Id: Id1c4082f363b6eb0154db18b1b5570d4b5354f69 --- tensorflow/python/ops/image_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 3b1cf9e9a15..ab42159d315 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -4817,7 +4817,7 @@ class FormatTest(test_util.TensorFlowTestCase): class NonMaxSuppressionTest(test_util.TensorFlowTestCase): - def NonMaxSuppressionTest(self): + def testNonMaxSuppression(self): boxes_np = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] scores_np = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3] From 5d8b439eb41fb2373c5b5c5f33c51313cfd9e57f Mon Sep 17 00:00:00 2001 From: Monica Song Date: Tue, 10 Nov 2020 17:42:50 -0800 Subject: [PATCH 139/220] Update `tf.saved_model.save` documentation. PiperOrigin-RevId: 341733577 Change-Id: I8d77d749839ee86f4124069da3ba8bcd87ba2ff1 --- tensorflow/python/saved_model/save.py | 175 +++++++++++++------------- 1 file changed, 88 insertions(+), 87 deletions(-) diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py index 060e03bed4d..3725576ecb0 100644 --- a/tensorflow/python/saved_model/save.py +++ b/tensorflow/python/saved_model/save.py @@ -867,38 +867,69 @@ def _export_debug_info(exported_graph, export_dir): v1=["saved_model.save", "saved_model.experimental.save"]) def save(obj, export_dir, signatures=None, options=None): # pylint: disable=line-too-long - """Exports the Trackable object `obj` to [SavedModel format](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md). + """Exports a [tf.Module](https://www.tensorflow.org/api_docs/python/tf/Module) (and subclasses) `obj` to [SavedModel format](https://www.tensorflow.org/guide/saved_model#the_savedmodel_format_on_disk). + + The `obj` must inherit from the [`Trackable` class](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/training/tracking/base.py#L591). Example usage: - ```python - class Adder(tf.Module): + >>> class Adder(tf.Module): + ... @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.float32)]) + ... def add(self, x): + ... return x + x - @tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) - def add(self, x): - return x + x + 1. + >>> model = Adder() + >>> tf.saved_model.save(model, '/tmp/adder') - to_export = Adder() - tf.saved_model.save(to_export, '/tmp/adder') - ``` + The resulting SavedModel is then servable with an input named "x", a scalar + with dtype float32. - The resulting SavedModel is then servable with an input named "x", its value - having any shape and dtype float32. + _Signatures_ - The optional `signatures` argument controls which methods in `obj` will be + Signatures define the input and output types for a computation. The optional + save `signatures` argument controls which methods in `obj` will be available to programs which consume `SavedModel`s, for example, serving APIs. Python functions may be decorated with `@tf.function(input_signature=...)` and passed as signatures directly, or lazily with a call to `get_concrete_function` on the method decorated with `@tf.function`. + Example: + + >>> class Adder(tf.Module): + ... @tf.function + ... def add(self, x): + ... return x + x + + >>> model = Adder() + >>> tf.saved_model.save( + ... model, '/tmp/adder',signatures=model.add.get_concrete_function( + ... tf.TensorSpec([], tf.float32))) + + If a `@tf.function` does not have an input signature and + `get_concrete_function` is not called on that method, the function will not + be directly callable in the restored SavedModel. + + Example: + + >>> class Adder(tf.Module): + ... @tf.function + ... def add(self, x): + ... return x + x + + >>> model = Adder() + >>> tf.saved_model.save(model, '/tmp/adder') + >>> restored = tf.saved_model.load('/tmp/adder') + >>> restored.add(1.) + Traceback (most recent call last): + ... + ValueError: Found zero restored functions for caller function. + If the `signatures` argument is omitted, `obj` will be searched for - `@tf.function`-decorated methods. If exactly one `@tf.function` is found, that - method will be used as the default signature for the SavedModel. This behavior - is expected to change in the future, when a corresponding - `tf.saved_model.load` symbol is added. At that point signatures will be - completely optional, and any `@tf.function` attached to `obj` or its - dependencies will be exported for use with `load`. + `@tf.function`-decorated methods. If exactly one traced `@tf.function` is + found, that method will be used as the default signature for the SavedModel. + Else, any `@tf.function` attached to `obj` or its dependencies will be + exported for use with `tf.saved_model.load`. When invoking a signature in an exported SavedModel, `Tensor` arguments are identified by name. These names will come from the Python function's argument @@ -915,49 +946,46 @@ def save(obj, export_dir, signatures=None, options=None): `.signatures` attribute. This is a reserved attribute: `tf.saved_model.save` on an object with a custom `.signatures` attribute will raise an exception. - Since `tf.keras.Model` objects are also Trackable, this function can be - used to export Keras models. For example, exporting with a signature - specified: + _Using `tf.saved_model.save` with Keras models_ - ```python - class Model(tf.keras.Model): + While Keras has its own [saving and loading API](https://www.tensorflow.org/guide/keras/save_and_serialize), + this function can be used to export Keras models. For example, exporting with + a signature specified: - @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)]) - def serve(self, serialized): - ... + >>> class Adder(tf.keras.Model): + ... @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)]) + ... def concat(self, x): + ... return x + x - m = Model() - tf.saved_model.save(m, '/tmp/saved_model/') - ``` + >>> model = Adder() + >>> tf.saved_model.save(model, '/tmp/adder') Exporting from a function without a fixed signature: - ```python - class Model(tf.keras.Model): + >>> class Adder(tf.keras.Model): + ... @tf.function + ... def concat(self, x): + ... return x + x - @tf.function - def call(self, x): - ... - - m = Model() - tf.saved_model.save( - m, '/tmp/saved_model/', - signatures=m.call.get_concrete_function( - tf.TensorSpec(shape=[None, 3], dtype=tf.float32, name="inp"))) - ``` + >>> model = Adder() + >>> tf.saved_model.save( + ... model, '/tmp/adder', + ... signatures=model.concat.get_concrete_function( + ... tf.TensorSpec(shape=[], dtype=tf.string, name="string_input"))) `tf.keras.Model` instances constructed from inputs and outputs already have a signature and so do not require a `@tf.function` decorator or a `signatures` argument. If neither are specified, the model's forward pass is exported. - ```python - x = input_layer.Input((4,), name="x") - y = core.Dense(5, name="out")(x) - model = training.Model(x, y) - tf.saved_model.save(model, '/tmp/saved_model/') - # The exported SavedModel takes "x" with shape [None, 4] and returns "out" - # with shape [None, 5] - ``` + >>> x = tf.keras.layers.Input((4,), name="x") + >>> y = tf.keras.layers.Dense(5, name="out")(x) + >>> model = tf.keras.Model(x, y) + >>> tf.saved_model.save(model, '/tmp/saved_model/') + + The exported SavedModel takes "x" with shape [None, 4] and returns "out" + with shape [None, 5] + + _Variables and Checkpoints_ Variables must be tracked by assigning them to an attribute of a tracked object or to an attribute of `obj` directly. TensorFlow objects (e.g. layers @@ -965,21 +993,19 @@ def save(obj, export_dir, signatures=None, options=None): automatically. This is the same tracking scheme that `tf.train.Checkpoint` uses, and an exported `Checkpoint` object may be restored as a training checkpoint by pointing `tf.train.Checkpoint.restore` to the SavedModel's - "variables/" subdirectory. Currently, variables are the only stateful objects - supported by `tf.saved_model.save`, but others (e.g. tables) will be supported - in the future. + "variables/" subdirectory. `tf.function` does not hard-code device annotations from outside the function body, instead of using the calling context's device. This means for example that exporting a model that runs on a GPU and serving it on a CPU will - generally work, with some exceptions. `tf.device` annotations inside the body - of the function will be hard-coded in the exported model; this type of - annotation is discouraged. Device-specific operations, e.g. with "cuDNN" in - the name or with device-specific layouts, may cause issues. Currently a - `DistributionStrategy` is another exception: active distribution strategies - will cause device placements to be hard-coded in a function. Exporting a - single-device computation and importing under a `DistributionStrategy` is - not currently supported, but may be in the future. + generally work, with some exceptions: + + * `tf.device` annotations inside the body of the function will be hard-coded + in the exported model; this type of annotation is discouraged. + * Device-specific operations, e.g. with "cuDNN" in the name or with + device-specific layouts, may cause issues. + * For `ConcreteFunctions`, active distribution strategies will cause device + placements to be hard-coded in the function. SavedModels exported with `tf.saved_model.save` [strip default-valued attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes) @@ -989,34 +1015,8 @@ def save(obj, export_dir, signatures=None, options=None): handled automatically, such as when the exported model contains operations which the consumer does not have definitions for. - A single tf.function can generate many ConcreteFunctions. If a downstream tool - wants to refer to all concrete functions generated by a single tf.function you - can use the `function_aliases` argument to store a map from the alias name to - all concrete function names. - E.g. - ```python - class MyModel: - @tf.function - def func(): - ... - - @tf.function - def serve(): - ... - func() - - model = MyModel() - signatures = { - 'serving_default': model.serve.get_concrete_function(), - } - options = tf.saved_model.SaveOptions(function_aliases={ - 'my_func': func, - }) - tf.saved_model.save(model, export_dir, signatures, options) - ``` - Args: - obj: A trackable object to export. + obj: A trackable object (e.g. tf.Module or tf.train.Checkpoint) to export. export_dir: A directory in which to write the SavedModel. signatures: Optional, one of three types: * a `tf.function` with an input signature specified, which will use the @@ -1042,6 +1042,7 @@ def save(obj, export_dir, signatures=None, options=None): May not be called from within a function body. @end_compatibility """ + # pylint: enable=line-too-long save_and_return_nodes(obj, export_dir, signatures, options, raise_metadata_warning=True) From 359f5e22c77a342dd5ebd8ca06aacad619158f7f Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Tue, 10 Nov 2020 17:53:16 -0800 Subject: [PATCH 140/220] The optimizations flag must be enabled for integer quantization. PiperOrigin-RevId: 341735045 Change-Id: Idff43825eb3c4d1526536133ade200c4a8366a7d --- tensorflow/lite/python/lite.py | 12 +++++++++--- tensorflow/lite/python/lite_test.py | 1 + tensorflow/lite/python/lite_v2_test.py | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 9d10168870a..5bbd0df9980 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -188,9 +188,12 @@ class QuantizationMode(object): self._validate_int8_required() + # TODO(b/162537905): Refactor the following quantization functions - + # re-organize and refactor for better readability. def post_training_int8_no_float(self): """Post training int8 quantize, disallow float fallback.""" - return (self._is_int8_target_required() and + return (self._any_optimization_enabled() and + self._is_int8_target_required() and not self._is_int16x8_target_required() and not self._is_allow_float() and self._representative_dataset is not None) @@ -223,14 +226,17 @@ class QuantizationMode(object): def post_training_int16x8_no_float(self): """Post training int16x8 quantize, disallow float fallback.""" - return (not self._is_int8_target_required() and + return (self._any_optimization_enabled() and + not self._is_int8_target_required() and self._is_int16x8_target_required() and not self._is_allow_float() and self._representative_dataset is not None) def post_training_int16x8_allow_float(self): """Post training int16x8 quantize, allow float fallback.""" - return self._is_int16x8_target_required() and self._is_allow_float() + return (self._any_optimization_enabled() and + self._is_int16x8_target_required() and + self._is_allow_float()) def post_training_dynamic_range_int8(self): """Post training int8 const, on-the-fly int8 quantize of dynamic tensors.""" diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py index fcfe1d92eb6..6f1bd19a1e7 100644 --- a/tensorflow/lite/python/lite_test.py +++ b/tensorflow/lite/python/lite_test.py @@ -935,6 +935,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): quantized_converter = lite.TFLiteConverter.from_session( sess, [inp], [output]) quantized_converter.experimental_new_converter = enable_mlir_converter + quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.target_spec.supported_ops = supported_ops quantized_converter.representative_dataset = calibration_gen quantized_tflite_model = quantized_converter.convert() diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 1867bb50d1a..3d4cc806bfd 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -313,6 +313,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): converter = lite.TFLiteConverterV2.from_concrete_functions([func]) # TODO(b/156309549): We should add INT16 to the builtin types. + converter.optimizations = [lite.Optimize.DEFAULT] converter.target_spec.supported_ops = [ lite.OpsSet.TFLITE_BUILTINS_INT8 ] From 404e6a19dbb3814e55c0d4b99bf38e83dedd819f Mon Sep 17 00:00:00 2001 From: Hyeonjong Ryu Date: Tue, 10 Nov 2020 17:53:16 -0800 Subject: [PATCH 141/220] Neon acceleration applied on kernel module. PiperOrigin-RevId: 341735046 Change-Id: I9f85b54f330078575e818d0354742bf22c81d05a --- .../internal/optimized/neon_tensor_utils.cc | 60 +++++++++++++++++++ .../internal/optimized/neon_tensor_utils.h | 4 +- .../optimized/neon_tensor_utils_impl.h | 4 ++ .../internal/optimized/sse_tensor_utils.h | 4 +- 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc index 5e61cea036b..07ecdd1208b 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -2568,6 +2568,66 @@ void NeonReductionSumVector(const int8_t* input_vector, int32_t* output_vector, } } +void NeonVectorBatchVectorCwiseProductAccumulate( + const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, + int32_t multiplier, int shift, int16_t* result) { + int32x4_t min_value_vector = vdupq_n_s32(-32768); + int32x4_t max_value_vector = vdupq_n_s32(32767); + + for (int b = 0; b < n_batch; b++) { + int v = 0; + for (; v <= v_size - 16; v += 16) { + int32x4x4_t prod; + prod.val[0] = vmull_s16(vld1_s16(vector + v), vld1_s16(batch_vector)); + prod.val[1] = + vmull_s16(vld1_s16(vector + v + 4), vld1_s16(batch_vector + 4)); + prod.val[2] = + vmull_s16(vld1_s16(vector + v + 8), vld1_s16(batch_vector + 8)); + prod.val[3] = + vmull_s16(vld1_s16(vector + v + 12), vld1_s16(batch_vector + 12)); + batch_vector += 16; + + prod = MultiplyByQuantizedMultiplier4Rows(prod, multiplier, shift); + + int16x4x4_t results; + results.val[0] = vld1_s16(result); + results.val[1] = vld1_s16(result + 4); + results.val[2] = vld1_s16(result + 8); + results.val[3] = vld1_s16(result + 12); + + prod.val[0] = vaddq_s32(prod.val[0], vmovl_s16(results.val[0])); + prod.val[1] = vaddq_s32(prod.val[1], vmovl_s16(results.val[1])); + prod.val[2] = vaddq_s32(prod.val[2], vmovl_s16(results.val[2])); + prod.val[3] = vaddq_s32(prod.val[3], vmovl_s16(results.val[3])); + + prod.val[0] = vmaxq_s32(prod.val[0], min_value_vector); + prod.val[1] = vmaxq_s32(prod.val[1], min_value_vector); + prod.val[2] = vmaxq_s32(prod.val[2], min_value_vector); + prod.val[3] = vmaxq_s32(prod.val[3], min_value_vector); + + prod.val[0] = vminq_s32(prod.val[0], max_value_vector); + prod.val[1] = vminq_s32(prod.val[1], max_value_vector); + prod.val[2] = vminq_s32(prod.val[2], max_value_vector); + prod.val[3] = vminq_s32(prod.val[3], max_value_vector); + + vst1_s16(result, vmovn_s32(prod.val[0])); + vst1_s16(result + 4, vmovn_s32(prod.val[1])); + vst1_s16(result + 8, vmovn_s32(prod.val[2])); + vst1_s16(result + 12, vmovn_s32(prod.val[3])); + + result += 16; + } + + for (; v < v_size; v++) { + int32_t prod = vector[v] * *batch_vector++; + prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift); + int32_t output = prod + *result; + output = std::max(std::min(32767, output), -32768); + *result++ = output; + } + } +} + } // namespace tensor_utils } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h index 62884620324..bfa5eb3075d 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h @@ -222,8 +222,8 @@ void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, int32_t multiplier, int shift, int16_t* result) { - PortableVectorBatchVectorCwiseProductAccumulate( - vector, v_size, batch_vector, n_batch, multiplier, shift, result); + NEON_OR_PORTABLE(VectorBatchVectorCwiseProductAccumulate, vector, v_size, + batch_vector, n_batch, multiplier, shift, result); } float VectorVectorDotProduct(const float* vector1, const float* vector2, diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h index 44bc83a0669..075cd8c20c7 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h @@ -171,6 +171,10 @@ void NeonReductionSumVector(const float* input_vector, float* output_vector, void NeonReductionSumVector(const int8_t* input_vector, int32_t* output_vector, int output_size, int reduction_size); +void NeonVectorBatchVectorCwiseProductAccumulate( + const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, + int32_t multiplier, int shift, int16_t* result); + #endif // USE_NEON } // namespace tensor_utils diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h index e416579308b..77b4fab0e42 100644 --- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h @@ -232,8 +232,8 @@ void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, int32_t multiplier, int shift, int16_t* result) { - PortableVectorBatchVectorCwiseProductAccumulate( - vector, v_size, batch_vector, n_batch, multiplier, shift, result); + NEON_OR_PORTABLE(VectorBatchVectorCwiseProductAccumulate, vector, v_size, + batch_vector, n_batch, multiplier, shift, result); } float VectorVectorDotProduct(const float* vector1, const float* vector2, From e941985dc443e2ad67a607e096d62b1bc7b3d65a Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 10 Nov 2020 17:57:32 -0800 Subject: [PATCH 142/220] Remove a layer of indirection in `strong_hash`. PiperOrigin-RevId: 341735651 Change-Id: I485054a827852526f8faa46398c5afe45b39d8f4 --- tensorflow/core/platform/BUILD | 5 +-- tensorflow/core/platform/default/BUILD | 11 ------- .../core/platform/default/build_config.bzl | 1 - .../core/platform/default/strong_hash.h | 32 ------------------- tensorflow/core/platform/strong_hash.h | 14 ++++---- 5 files changed, 10 insertions(+), 53 deletions(-) delete mode 100644 tensorflow/core/platform/default/strong_hash.h diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 76920c5e55b..7c5e6b98288 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -702,11 +702,12 @@ cc_library( cc_library( name = "strong_hash", - hdrs = ["strong_hash.h"], + textual_hdrs = ["strong_hash.h"], deps = [ ":platform", ":types", - ] + tf_platform_deps("strong_hash"), + "@highwayhash//:sip_hash", + ], ) cc_library( diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD index 69091eb909a..ceea382d57b 100644 --- a/tensorflow/core/platform/default/BUILD +++ b/tensorflow/core/platform/default/BUILD @@ -385,17 +385,6 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "strong_hash", - tags = [ - "manual", - "no_oss", - "nobuilder", - ], - textual_hdrs = ["strong_hash.h"], - deps = ["@highwayhash//:sip_hash"], -) - cc_library( name = "subprocess", srcs = ["subprocess.cc"], diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 2471ed64464..aab107b7f3f 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -569,7 +569,6 @@ def tf_additional_lib_hdrs(): "//tensorflow/core/platform/default:mutex_data.h", "//tensorflow/core/platform/default:notification.h", "//tensorflow/core/platform/default:stacktrace.h", - "//tensorflow/core/platform/default:strong_hash.h", "//tensorflow/core/platform/default:test_benchmark.h", "//tensorflow/core/platform/default:tracing_impl.h", "//tensorflow/core/platform/default:unbounded_work_queue.h", diff --git a/tensorflow/core/platform/default/strong_hash.h b/tensorflow/core/platform/default/strong_hash.h deleted file mode 100644 index e7c8047235c..00000000000 --- a/tensorflow/core/platform/default/strong_hash.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_STRONG_HASH_H_ -#define TENSORFLOW_CORE_PLATFORM_DEFAULT_STRONG_HASH_H_ - -#include "highwayhash/sip_hash.h" // from @highwayhash -#include "highwayhash/state_helpers.h" // from @highwayhash - -namespace tensorflow { - -inline uint64 StrongKeyedHash(const tensorflow::uint64 (&key)[2], - const string& s) { - return highwayhash::StringHasher()( - {key[0], key[1]}, s); -} - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_STRONG_HASH_H_ diff --git a/tensorflow/core/platform/strong_hash.h b/tensorflow/core/platform/strong_hash.h index 987df5da59d..c442103c8d1 100644 --- a/tensorflow/core/platform/strong_hash.h +++ b/tensorflow/core/platform/strong_hash.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PLATFORM_STRONG_HASH_H_ #define TENSORFLOW_CORE_PLATFORM_STRONG_HASH_H_ +#include "highwayhash/sip_hash.h" // from @highwayhash +#include "highwayhash/state_helpers.h" // from @highwayhash #include "tensorflow/core/platform/platform.h" #include "tensorflow/core/platform/types.h" @@ -32,14 +34,12 @@ namespace tensorflow { // string input = "input string"; // uint64 hash_value = StrongKeyedHash(key, input); // -uint64 StrongKeyedHash(const tensorflow::uint64 (&)[2], const string&); +inline uint64 StrongKeyedHash(const tensorflow::uint64 (&key)[2], + const string& s) { + return highwayhash::StringHasher()( + {key[0], key[1]}, s); +} } // namespace tensorflow -#if defined(PLATFORM_GOOGLE) -#include "tensorflow/core/platform/google/strong_hash.h" -#else -#include "tensorflow/core/platform/default/strong_hash.h" -#endif - #endif // TENSORFLOW_CORE_PLATFORM_STRONG_HASH_H_ From faf44b5391e0e9925efa66f3fc7521955962c091 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 10 Nov 2020 18:31:02 -0800 Subject: [PATCH 143/220] PSv2: Check that there is no more than one chief, and at least one ps/worker. Combine the validation logic with multi_worker_util. PiperOrigin-RevId: 341740027 Change-Id: I7e3125f8eaefb12c96f37b7fa3a54afbfc1e4334 --- .../python/distribute/multi_worker_util.py | 51 ++++++++++++------- .../parameter_server_strategy_v2.py | 28 +++++----- .../parameter_server_strategy_v2_test.py | 42 ++++++++++++++- 3 files changed, 87 insertions(+), 34 deletions(-) diff --git a/tensorflow/python/distribute/multi_worker_util.py b/tensorflow/python/distribute/multi_worker_util.py index 4d89b2fab08..943605fac20 100644 --- a/tensorflow/python/distribute/multi_worker_util.py +++ b/tensorflow/python/distribute/multi_worker_util.py @@ -46,13 +46,21 @@ def normalize_cluster_spec(cluster_spec): return cluster_spec -# TODO(yuefengz): add more validations. -def _validate_cluster_spec(cluster_spec, task_type, task_id): +def task_count(cluster_spec, task_type): + try: + return cluster_spec.num_tasks(task_type) + except ValueError: + return 0 + + +def _validate_cluster_spec(cluster_spec, + task_type, + task_id): """Validates `cluster_spec`. It checks: - 0) None of `cluster_spec`, `task_type`, and `task_id` is `None`. - 1) task type is one of "chief", "worker" or "evaluator". + 1) task type is one of "chief", "worker", "ps", "evaluator", or not provided + (None). 2) whether there is such a task type as `task_type` in the `cluster_spec`. The only exception is `evaluator`. In other words, it is still a valid configuration when `task_type` is `evaluator` but it doesn't appear in @@ -65,31 +73,38 @@ def _validate_cluster_spec(cluster_spec, task_type, task_id): Args: cluster_spec: a dict, `ClusterDef` or `ClusterSpec` object to be validated. task_type: string indicating the type of the task. - task_id: task_id: the id of the `task_type` in this cluster. - Throws: + task_id: the id of the `task_type` in this cluster. + + Raises: ValueError: if `cluster_spec` fails any check. """ - if cluster_spec is None or task_type is None or task_id is None: - raise ValueError( - "None of `cluster_spec`, `task_type`, and `task_id` should be `None`.") + allowed_task_types = ("chief", "worker", "evaluator", "ps", None) - cluster_spec = normalize_cluster_spec(cluster_spec).as_dict() - if task_type not in ("chief", "worker", "evaluator", "ps"): - raise ValueError( - "Unrecognized task_type: %r, valid task types are: \"chief\", " - "\"worker\", \"evaluator\" and \"ps\"." % task_type) + cluster_spec = normalize_cluster_spec(cluster_spec) - if task_type and task_type not in cluster_spec and task_type != "evaluator": + if any([job not in allowed_task_types for job in cluster_spec.jobs]): + raise ValueError("Disallowed task type found in cluster spec. Allowed " + "types are {} and the cluster spec is {}.".format( + allowed_task_types, cluster_spec)) + + if task_type not in allowed_task_types: + raise ValueError( + "Unrecognized task_type: {}, valid task types are: {}".format( + task_type, allowed_task_types)) + + if (task_type and task_type not in cluster_spec.jobs and + task_type != "evaluator"): raise ValueError("`task_type` %r not found in cluster_spec." % task_type) - if len(cluster_spec.get("chief", [])) > 1: + if task_count(cluster_spec, "chief") > 1: raise ValueError("There must be at most one 'chief' job.") - if len(cluster_spec.get("evaluator", [])) > 1: + if task_count(cluster_spec, "evaluator") > 1: raise ValueError("There must be at most one 'evaluator' job.") # The `evaluator` job is allowed to be missing in `cluster_spec`. - if task_type in cluster_spec and task_id >= len(cluster_spec[task_type]): + if task_type in cluster_spec.jobs and task_id >= task_count( + cluster_spec, task_type): raise ValueError( "The `task_id` %d exceeds the maximum id of %s." % (task_id, task_type)) diff --git a/tensorflow/python/distribute/parameter_server_strategy_v2.py b/tensorflow/python/distribute/parameter_server_strategy_v2.py index 452f89a9425..9cff2d789e6 100644 --- a/tensorflow/python/distribute/parameter_server_strategy_v2.py +++ b/tensorflow/python/distribute/parameter_server_strategy_v2.py @@ -26,6 +26,7 @@ import os from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribute_utils +from tensorflow.python.distribute import multi_worker_util from tensorflow.python.distribute import parameter_server_strategy from tensorflow.python.distribute import sharded_variable from tensorflow.python.eager import remote @@ -486,22 +487,19 @@ class ParameterServerStrategyV2(distribute_lib.Strategy): if self.extended._num_gpus_per_worker > 1: # pylint: disable=protected-access raise NotImplementedError("Multi-gpu is not supported yet.") + cluster_spec = cluster_resolver.cluster_spec() + # The following checks if the task types are allowed (chief, ps, worker). - disallowed_task_type_error_str = ( - "Disallowed task type found in " - "`tf.distribute.cluster_resolver.ClusterResolver` provided to " - "`tf.distribute.experimental.ParameterServerStrategy`. Allowed types " - "are {},".format(ALLOWED_TASK_TYPES)) - if any([ - job not in ALLOWED_TASK_TYPES - for job in cluster_resolver.cluster_spec().jobs - ]): - raise ValueError("{} and the cluster spec is {}.".format( - disallowed_task_type_error_str, cluster_resolver.cluster_spec())) - if (cluster_resolver.task_type and - cluster_resolver.task_type not in ALLOWED_TASK_TYPES): - raise ValueError("{} and current task type is {}.".format( - disallowed_task_type_error_str, cluster_resolver.task_type)) + multi_worker_util._validate_cluster_spec( # pylint: disable=protected-access + cluster_spec, + cluster_resolver.task_type, + cluster_resolver.task_id) + + if multi_worker_util.task_count(cluster_spec, "ps") < 1: + raise ValueError("There must be at least one ps.") + + if multi_worker_util.task_count(cluster_spec, "worker") < 1: + raise ValueError("There must be at least one worker.") class ParameterServerStrategyV2Extended( diff --git a/tensorflow/python/distribute/parameter_server_strategy_v2_test.py b/tensorflow/python/distribute/parameter_server_strategy_v2_test.py index 4e2ad3e70fe..7e682d07c08 100644 --- a/tensorflow/python/distribute/parameter_server_strategy_v2_test.py +++ b/tensorflow/python/distribute/parameter_server_strategy_v2_test.py @@ -412,7 +412,47 @@ class ClusterTypeNameTest(test.TestCase): ] cluster_resolver = SimpleClusterResolver( ClusterSpec(cluster_def), rpc_layer="grpc", task_type="foobar") - with self.assertRaisesRegexp(ValueError, "Disallowed task type found in"): + with self.assertRaisesRegexp(ValueError, "Unrecognized task_type: foobar"): + parameter_server_strategy_v2.ParameterServerStrategyV2(cluster_resolver) + + def testMoreThanOneChief(self): + cluster_def = multi_worker_test_base._create_cluster( + num_workers=1, num_ps=1) + chief_ports = [multi_worker_test_base.pick_unused_port() for _ in range(3)] + cluster_def["chief"] = ["localhost:%s" % port for port in chief_ports] + cluster_resolver = SimpleClusterResolver( + ClusterSpec(cluster_def), + rpc_layer="grpc", + task_type="chief", + task_id=1) + with self.assertRaisesRegexp(ValueError, + "There must be at most one 'chief' job."): + parameter_server_strategy_v2.ParameterServerStrategyV2(cluster_resolver) + + def testLessThanOneWorker(self): + cluster_def = multi_worker_test_base._create_cluster( + num_workers=0, num_ps=1) + cluster_def["chief"] = [ + "localhost:%d" % multi_worker_test_base.pick_unused_port() + ] + cluster_resolver = SimpleClusterResolver( + ClusterSpec(cluster_def), rpc_layer="grpc", task_type="ps", task_id=0) + with self.assertRaisesRegexp(ValueError, + "There must be at least one worker."): + parameter_server_strategy_v2.ParameterServerStrategyV2(cluster_resolver) + + def testLessThanOnePs(self): + cluster_def = multi_worker_test_base._create_cluster( + num_workers=1, num_ps=0) + cluster_def["chief"] = [ + "localhost:%d" % multi_worker_test_base.pick_unused_port() + ] + cluster_resolver = SimpleClusterResolver( + ClusterSpec(cluster_def), + rpc_layer="grpc", + task_type="worker", + task_id=0) + with self.assertRaisesRegexp(ValueError, "There must be at least one ps."): parameter_server_strategy_v2.ParameterServerStrategyV2(cluster_resolver) From 9e5339f2a9171a9636d4751768b27332d7cbc6e0 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Tue, 10 Nov 2020 19:19:46 -0800 Subject: [PATCH 144/220] Support of shadow runs for MLIR TF bridge. When MLIR bridge is only enabled by graph analysis, MLIR passes are executed in shadow mode and must not affect the original TF graph. Let TF graph to MLIR conversion, MLIR passes and MLIR to TF graph conversion run, but do not return failures in shadow mode, just capture stats in those cases. PiperOrigin-RevId: 341745436 Change-Id: I7a23c122955bf408f3757989b646a78bfa17a0e9 --- tensorflow/compiler/mlir/BUILD | 18 ++- .../mlir/mlir_graph_optimization_pass.cc | 70 +++++++++- .../mlir/mlir_graph_optimization_pass.h | 17 ++- .../mlir/mlir_graph_optimization_pass_test.cc | 121 ++++++++++++++++++ tensorflow/core/platform/test.h | 1 + 5 files changed, 219 insertions(+), 8 deletions(-) create mode 100644 tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD index 129e8230b95..26cb27eef9b 100644 --- a/tensorflow/compiler/mlir/BUILD +++ b/tensorflow/compiler/mlir/BUILD @@ -3,7 +3,11 @@ load("//tensorflow:tensorflow.bzl", "filegroup") load("//tensorflow/core/platform:rules_cc.bzl", "cc_library") -load("//tensorflow:tensorflow.bzl", "tf_cc_binary") +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_binary", + "tf_cc_test", +) package( default_visibility = [ @@ -126,12 +130,14 @@ cc_library( srcs = ["mlir_graph_optimization_pass.cc"], hdrs = ["mlir_graph_optimization_pass.h"], deps = [ + "//tensorflow/compiler/mlir:mlir_bridge_rollout_policy", "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:convert_graphdef", "//tensorflow/compiler/mlir/tensorflow:device_util", "//tensorflow/compiler/mlir/tensorflow:dump_mlir_util", "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags", "//tensorflow/core:core_cpu", + "//tensorflow/core:lib", "@com_google_absl//absl/container:flat_hash_set", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", @@ -204,6 +210,16 @@ cc_library( ], ) +tf_cc_test( + name = "mlir_graph_optimization_pass_test", + srcs = ["mlir_graph_optimization_pass_test.cc"], + deps = [ + ":mlir_graph_optimization_pass", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + filegroup( name = "litfiles", srcs = glob(["runlit*py"]), diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc index ff611bac943..753504ddc49 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/mlir_graph_optimization_pass.h" +#include #include #include "absl/container/flat_hash_set.h" @@ -32,10 +33,19 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/device_util.h" #include "tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h" #include "tensorflow/core/common_runtime/graph_constructor.h" +#include "tensorflow/core/lib/monitoring/counter.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { +auto* shadow_run_success = monitoring::Counter<0>::New( + "/tensorflow/mlir/shadow_run_success", "Success count of MLIR shadow runs"); + +auto* shadow_run_failure = monitoring::Counter<2>::New( + "/tensorflow/mlir/shadow_run_failure", "Failure count of MLIR shadow runs", + "kind", "name"); + static inline absl::string_view StringRefToView(llvm::StringRef ref) { return {ref.data(), ref.size()}; } @@ -123,6 +133,17 @@ Status MlirFunctionOptimizationPass::Run( << "(registered " << registry_->passes().size() << " passes)"; + // For scenarios when the new bridge is enabled by analysis we need to make + // sure that MLIR transformations are executed in a shadow mode. + // In this case, no changes should be done to the original `graph` + // and no failures propagated to the user. + bool enabled_by_analysis = + mlir_rollout_policy_(**graph, config_proto) == + MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysis; + if (enabled_by_analysis) { + LOG_FIRST_N(INFO, 1) << "Shadow run of MLIR enabled after graph analysis"; + } + GraphDebugInfo debug_info; mlir::MLIRContext context; RegisterDialects(context.getDialectRegistry()); @@ -130,10 +151,21 @@ Status MlirFunctionOptimizationPass::Run( import_config.graph_as_function = true; import_config.control_outputs = *control_ret_node_names; import_config.upgrade_legacy = true; - TF_ASSIGN_OR_RETURN(auto module_ref, - ConvertGraphToMlir(**graph, debug_info, *flib_def, - import_config, &context)); + auto module_ref_status = ConvertGraphToMlir(**graph, debug_info, *flib_def, + import_config, &context); + if (!module_ref_status.ok()) { + if (enabled_by_analysis) { + shadow_run_failure->GetCell("graph_to_mlir", "")->IncrementBy(1); + + // Do not fail, let the old bridge to run on the original `graph`. + return Status::OK(); + } + + return module_ref_status.status(); + } + + auto module_ref = std::move(module_ref_status.ValueOrDie()); AddDevicesToOp(*module_ref, &device_set); for (auto& pass_registration : registry_->passes()) { @@ -144,8 +176,17 @@ Status MlirFunctionOptimizationPass::Run( DumpModule(*module_ref, llvm::formatv("mlir_{0}_before_", name)); } - TF_RETURN_IF_ERROR( - pass_registration.pass->Run(config_proto, *module_ref, **graph)); + auto pass_status = + pass_registration.pass->Run(config_proto, *module_ref, **graph); + if (!pass_status.ok()) { + if (enabled_by_analysis) { + shadow_run_failure->GetCell("pass", name.str())->IncrementBy(1); + // Do not fail, let the old bridge to run on the original `graph`. + return Status::OK(); + } + + return pass_status; + } if (VLOG_IS_ON(1)) { DumpModule(*module_ref, llvm::formatv("mlir_{0}_after_", name)); @@ -154,6 +195,25 @@ Status MlirFunctionOptimizationPass::Run( GraphExportConfig export_config; absl::flat_hash_set control_ret_nodes; + + // In case MLIR is enabled by analysis, verify that MLIR could be converted + // back to TF graph. Original `graph` must stay the same. + if (enabled_by_analysis) { + auto empty_graph = std::make_unique(OpRegistry::Global()); + FunctionLibraryDefinition empty_flib = empty_graph->flib_def(); + + auto mlir_to_graph_status = + ConvertMlirToGraph(*module_ref, export_config, &empty_graph, + &empty_flib, &control_ret_nodes); + if (mlir_to_graph_status.ok()) { + shadow_run_success->GetCell()->IncrementBy(1); + } else { + shadow_run_failure->GetCell("mlir_to_graph", "")->IncrementBy(1); + } + + return Status::OK(); + } + TF_RETURN_WITH_CONTEXT_IF_ERROR( ConvertMlirToGraph(*module_ref, export_config, graph, flib_def, &control_ret_nodes), diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h index 3130805633b..2f02a3cbbb0 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.h @@ -16,6 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_MLIR_GRAPH_OPTIMIZATION_PASS_H_ #define TENSORFLOW_COMPILER_MLIR_MLIR_GRAPH_OPTIMIZATION_PASS_H_ +#include + +#include "tensorflow/compiler/mlir/mlir_bridge_rollout_policy.h" #include "mlir/IR/Module.h" // from @llvm-project #include "tensorflow/core/common_runtime/function_optimization_registry.h" #include "tensorflow/core/common_runtime/optimization_registry.h" @@ -60,10 +63,14 @@ class MlirOptimizationPassRegistry { // Returns the global registry of MLIR optimization passes. static MlirOptimizationPassRegistry& Global(); + // Register optimization `pass` with the given `priority`. void Add(int priority, std::unique_ptr pass) { passes_.insert({priority, std::move(pass)}); } + // Free the memory allocated for all passes. + void ClearPasses() { passes_.clear(); } + const Passes& passes() const { return passes_; } private: @@ -76,8 +83,11 @@ class MlirFunctionOptimizationPass : public FunctionOptimizationPass { public: explicit MlirFunctionOptimizationPass( const MlirOptimizationPassRegistry* registry = - &MlirOptimizationPassRegistry::Global()) - : registry_(registry) {} + &MlirOptimizationPassRegistry::Global(), + std::function)> + mlir_rollout_policy = GetMlirBridgeRolloutPolicy) + : registry_(registry), mlir_rollout_policy_(mlir_rollout_policy) {} Status Run(const DeviceSet& device_set, const ConfigProto& config_proto, std::unique_ptr* graph, FunctionLibraryDefinition* flib_def, @@ -86,6 +96,9 @@ class MlirFunctionOptimizationPass : public FunctionOptimizationPass { private: const MlirOptimizationPassRegistry* registry_; + std::function)> + mlir_rollout_policy_; }; // -------------------------------------------------------------------------- // diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc b/tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc new file mode 100644 index 00000000000..b2b8451536c --- /dev/null +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc @@ -0,0 +1,121 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/mlir_graph_optimization_pass.h" + +#include + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +using ::testing::_; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::Test; + +class MockMlirOptimizationPass : public MlirOptimizationPass { + public: + MOCK_METHOD(llvm::StringRef, name, (), (const, override)); + MOCK_METHOD(bool, IsEnabled, + (const ConfigProto& config_proto, const Graph& graph), + (const override)); + MOCK_METHOD(Status, Run, + (const ConfigProto& config_proto, mlir::ModuleOp module, + const Graph& graph), + (override)); +}; + +class MlirGraphOptimizationPassTest : public Test { + public: + void Init(MlirBridgeRolloutPolicy rollout_policy, Status pass_run_result) { + graph_ = std::make_unique(OpRegistry::Global()); + + function_optimization_pass_ = MlirFunctionOptimizationPass( + &MlirOptimizationPassRegistry::Global(), + [rollout_policy](const Graph& graph, absl::optional) { + return rollout_policy; + }); + + auto optimization_pass = + std::make_unique>(); + + EXPECT_CALL(*optimization_pass, IsEnabled(_, _)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*optimization_pass, Run(_, _, _)) + .WillOnce(Return(pass_run_result)); + MlirOptimizationPassRegistry::Global().Add(0, std::move(optimization_pass)); + + flib_.reset(new FunctionLibraryDefinition(graph_->flib_def())); + } + + void TearDown() override { + MlirOptimizationPassRegistry::Global().ClearPasses(); + } + + ConfigProto config_proto_; + MlirFunctionOptimizationPass function_optimization_pass_; + DeviceSet device_set_; + std::unique_ptr graph_; + std::unique_ptr flib_; + std::vector control_ret_node_names_; + bool control_rets_updated_{false}; +}; + +TEST_F(MlirGraphOptimizationPassTest, OptimizationPassFailsNoShadow) { + Init(MlirBridgeRolloutPolicy::kEnabledByUser, + Status(error::Code::ABORTED, "aborted")); + + GraphDef original_graph_def; + graph_->ToGraphDef(&original_graph_def); + + EXPECT_EQ(function_optimization_pass_.Run( + device_set_, config_proto_, &graph_, flib_.get(), + &control_ret_node_names_, &control_rets_updated_), + Status(error::Code::ABORTED, "aborted")); + +// Proto matchers might be unavailable. +#if defined(PLATFORM_GOOGLE) + GraphDef resulted_graph_def; + graph_->ToGraphDef(&resulted_graph_def); + EXPECT_THAT(resulted_graph_def, + ::testing::proto::IgnoringRepeatedFieldOrdering( + ::testing::EquivToProto(original_graph_def))); +#endif +} + +TEST_F(MlirGraphOptimizationPassTest, OptimizationPassFailsShadow) { + Init(MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysis, + Status(error::Code::ABORTED, "aborted")); + + GraphDef original_graph_def; + graph_->ToGraphDef(&original_graph_def); + + EXPECT_EQ(function_optimization_pass_.Run( + device_set_, config_proto_, &graph_, flib_.get(), + &control_ret_node_names_, &control_rets_updated_), + Status::OK()); + +// Proto matchers might be unavailable. +#if defined(PLATFORM_GOOGLE) + GraphDef resulted_graph_def; + graph_->ToGraphDef(&resulted_graph_def); + EXPECT_THAT(resulted_graph_def, + ::testing::proto::IgnoringRepeatedFieldOrdering( + ::testing::EquivToProto(original_graph_def))); +#endif +} + +} // namespace tensorflow diff --git a/tensorflow/core/platform/test.h b/tensorflow/core/platform/test.h index 29fceb2d896..b29c0a68595 100644 --- a/tensorflow/core/platform/test.h +++ b/tensorflow/core/platform/test.h @@ -45,6 +45,7 @@ limitations under the License. #include #include #include +#include #endif namespace tensorflow { From 24d1fba948edd2c466b85b91836f055f5553404e Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Tue, 10 Nov 2020 19:44:41 -0800 Subject: [PATCH 145/220] Add dataset_fn to fault_tolerance_test to increase test coverage. PiperOrigin-RevId: 341748168 Change-Id: I13c0da9a5e6f4aea69f1306da243e4f1963f9f9f --- .../coordinator/fault_tolerance_test.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/distribute/coordinator/fault_tolerance_test.py b/tensorflow/python/distribute/coordinator/fault_tolerance_test.py index cc075d09c3d..96ac19aff94 100644 --- a/tensorflow/python/distribute/coordinator/fault_tolerance_test.py +++ b/tensorflow/python/distribute/coordinator/fault_tolerance_test.py @@ -24,6 +24,7 @@ import threading import time from tensorflow.python.compat import v2_compat +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import multi_process_runner from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import parameter_server_strategy_v2 @@ -35,6 +36,7 @@ from tensorflow.python.eager import test from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -61,19 +63,26 @@ class Model(object): dtype=dtypes.float32) self.iterations = variables.Variable(initial_value=0, dtype=dtypes.int32) + def dataset_fn(): + data = random_ops.random_uniform((1000, 1000)) + dataset = dataset_ops.DatasetV2.from_tensors([data]).repeat() + return dataset + + self.iterator = iter( + self.cluster_coord.create_per_worker_dataset(dataset_fn)) + @def_function.function - def train_fn(self): - # train_fn roughly took 0.5s to execute on Intel Xeon Gold 6154 (3.00GHZ) - # w/o any compilation optimization (two worker setup). + def train_fn(self, iterator): for _ in math_ops.range(5): - x = math_ops.matmul(random_ops.random_uniform((1000, 1000)), self.w) + x = math_ops.matmul(array_ops.squeeze(next(iterator)), self.w) + x = math_ops.matmul(random_ops.random_uniform((1000, 1000)), x) self.w.assign_add(x) self.iterations.assign_add(1) def schedule_training_functions(self, num_steps): with self.strategy.scope(): for _ in range(num_steps): - self.cluster_coord.schedule(self.train_fn) + self.cluster_coord.schedule(self.train_fn, args=(self.iterator,)) def join_training_functions(self): self.cluster_coord.join() From 696102807d1db5e7119fdf21ab2f392eed69525c Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Tue, 10 Nov 2020 20:00:26 -0800 Subject: [PATCH 146/220] Expose the TfLiteDelegateFlags params for simple delegate creation. This allows more flexibility for example users want to implement a delegate takes in custom op. PiperOrigin-RevId: 341749773 Change-Id: I89ece3af61750fded53c810d3aa63ba0aafb9740 --- tensorflow/lite/delegates/utils/simple_delegate.cc | 4 ++-- tensorflow/lite/delegates/utils/simple_delegate.h | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/utils/simple_delegate.cc b/tensorflow/lite/delegates/utils/simple_delegate.cc index 5eb2e319b94..9746e6d8dfb 100644 --- a/tensorflow/lite/delegates/utils/simple_delegate.cc +++ b/tensorflow/lite/delegates/utils/simple_delegate.cc @@ -113,13 +113,13 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, } // namespace TfLiteDelegate* TfLiteDelegateFactory::CreateSimpleDelegate( - std::unique_ptr simple_delegate) { + std::unique_ptr simple_delegate, int64_t flag) { if (simple_delegate == nullptr) { return nullptr; } auto delegate = new TfLiteDelegate(); delegate->Prepare = &DelegatePrepare; - delegate->flags = kTfLiteDelegateFlagsNone; + delegate->flags = flag; delegate->CopyFromBufferHandle = nullptr; delegate->CopyToBufferHandle = nullptr; delegate->FreeBufferHandle = nullptr; diff --git a/tensorflow/lite/delegates/utils/simple_delegate.h b/tensorflow/lite/delegates/utils/simple_delegate.h index 338633d92e0..58b9ddb791a 100644 --- a/tensorflow/lite/delegates/utils/simple_delegate.h +++ b/tensorflow/lite/delegates/utils/simple_delegate.h @@ -114,8 +114,12 @@ class TfLiteDelegateFactory { public: // Creates TfLiteDelegate from the provided SimpleDelegateInterface. // The returned TfLiteDelegate should be deleted using DeleteSimpleDelegate. + // A simple usage of the flags bit mask: + // CreateSimpleDelegate(..., kTfLiteDelegateFlagsAllowDynamicTensors | + // kTfLiteDelegateFlagsRequirePropagatedShapes) static TfLiteDelegate* CreateSimpleDelegate( - std::unique_ptr simple_delegate); + std::unique_ptr simple_delegate, + int64_t flags = kTfLiteDelegateFlagsNone); // Deletes 'delegate' the passed pointer must be the one returned // from CreateSimpleDelegate. From bc819d9cf3b21fd3cc1119f176f287378c6bec89 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Nov 2020 21:07:36 -0800 Subject: [PATCH 147/220] Make Hlo runner interface so we can add alternative implementations. PiperOrigin-RevId: 341756589 Change-Id: I267aaa795f38d9ac1f4b4ebd9bba5efc20d2b7ef --- tensorflow/compiler/xla/service/BUILD | 25 +++ tensorflow/compiler/xla/service/hlo_runner.cc | 75 +-------- tensorflow/compiler/xla/service/hlo_runner.h | 84 ++--------- .../xla/service/hlo_runner_interface.cc | 90 +++++++++++ .../xla/service/hlo_runner_interface.h | 142 ++++++++++++++++++ .../compiler/xla/tests/hlo_test_base.cc | 6 +- 6 files changed, 276 insertions(+), 146 deletions(-) create mode 100644 tensorflow/compiler/xla/service/hlo_runner_interface.cc create mode 100644 tensorflow/compiler/xla/service/hlo_runner_interface.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index f1fa2ce3a52..edbc0078869 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -4459,6 +4459,30 @@ tf_cc_test( ], ) +cc_library( + name = "hlo_runner_interface", + srcs = ["hlo_runner_interface.cc"], + hdrs = ["hlo_runner_interface.h"], + deps = [ + ":compiler", + ":computation_placer", + ":hlo", + ":hlo_module_group", + ":hlo_parser", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:span", + ], +) + cc_library( name = "hlo_runner", srcs = ["hlo_runner.cc"], @@ -4471,6 +4495,7 @@ cc_library( ":hlo", ":hlo_module_group", ":hlo_parser", + ":hlo_runner_interface", ":transfer_manager", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 0d71c6d49ed..86ff41ba273 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -34,58 +34,6 @@ limitations under the License. namespace xla { -/*static*/ StatusOr> -HloRunner::CreateModuleFromString(const absl::string_view hlo_string, - const DebugOptions& debug_options) { - HloModuleConfig config; - config.set_debug_options(debug_options); - return ParseAndReturnUnverifiedModule(hlo_string, config); -} - -namespace { - -// Creates an HloModule from the given proto. -StatusOr> HloProtoToModule( - const HloProto& proto, const DebugOptions& debug_options) { - TF_ASSIGN_OR_RETURN(HloModuleConfig config, - HloModule::CreateModuleConfigFromProto(proto.hlo_module(), - debug_options)); - TF_ASSIGN_OR_RETURN(auto module, - HloModule::CreateFromProto(proto.hlo_module(), config)); - return std::move(module); -} - -} // namespace - -/*static*/ StatusOr> -HloRunner::ReadModuleFromBinaryProtoFile(const std::string& filename, - const DebugOptions& debug_options) { - HloProto proto; - TF_RETURN_IF_ERROR(tensorflow::ReadBinaryProto(tensorflow::Env::Default(), - filename, &proto)); - return HloProtoToModule(proto, debug_options); -} - -/*static*/ StatusOr> -HloRunner::ReadModuleFromTextProtoFile(const std::string& filename, - const DebugOptions& debug_options) { - HloProto proto; - TF_RETURN_IF_ERROR( - tensorflow::ReadTextProto(tensorflow::Env::Default(), filename, &proto)); - return HloProtoToModule(proto, debug_options); -} - -/*static*/ StatusOr> -HloRunner::ReadModuleFromHloTextFile(const std::string& filename, - const DebugOptions& debug_options) { - string hlo_string; - TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(tensorflow::Env::Default(), - filename, &hlo_string)); - HloModuleConfig config; - config.set_debug_options(debug_options); - return ParseAndReturnUnverifiedModule(hlo_string, config); -} - HloRunner::HloRunner(se::Platform* platform, int intra_op_parallelism_threads) { BackendOptions backend_options; backend_options.set_platform(platform); @@ -155,26 +103,9 @@ StatusOr HloRunner::Execute(std::unique_ptr module, return TransferLiteralFromDevice(result.Result()); } -StatusOr HloRunner::Execute(std::unique_ptr module, - absl::Span arguments, - bool run_hlo_passes, - ExecutionProfile* profile) { - // Construct a vector of plain pointers for the arguments. - std::vector argument_pointers; - argument_pointers.reserve(arguments.size()); - for (const auto& argument : arguments) { - argument_pointers.push_back(&argument); - } - return Execute( - /*module=*/std::move(module), - /*arguments=*/argument_pointers, - /*run_hlo_passes=*/run_hlo_passes, - /*profile=*/profile); -} - -StatusOr HloRunner::Execute(std::unique_ptr executable, - absl::Span arguments, - ExecutionProfile* profile) { +StatusOr HloRunner::ExecuteWithExecutable( + std::unique_ptr executable, absl::Span arguments, + ExecutionProfile* profile) { TF_ASSIGN_OR_RETURN(std::vector argument_buffers, TransferLiteralsToDevice(arguments)); TF_ASSIGN_OR_RETURN(ExecutionOutput result, diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index 733bb8bff54..721a50232cc 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/executable.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_runner_interface.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" @@ -42,48 +43,8 @@ namespace xla { // certain backend directly without using the client interface. HloModule can be // explicitly built, or loaded from a serialization file (e.g., hlo proto // file), or parsed from a hlo textual IR string. -class HloRunner { +class HloRunner : public HloRunnerInterface { public: - // The options used to configure a ExecuteReplicated() call. - struct ReplicatedExecuteOptions { - // The number of devices the HLO module should be replicated onto. - int64 num_replicas = 1; - - // The arguments to be fed to each replica. Since this is used for a - // replicated execution, all the arguments are the same for all replicas. - std::vector arguments; - - // If the HLO module being run has an infeed instruction, this will be the - // data which will be fed to it, for as many as infeed_steps steps. - const Literal* infeed = nullptr; - - // The number of times the infeed literal should be fed to the HLO module. - // For a clean exit, this should match the iterations-per-loop parameter - // used when generating the HLO module proto (that is usually the main - // while boundary counter). A value higher then iterations-per-loop would - // lead to infeed threads feeding to a gone computation, while a lower - // value would trigger a stuck ExecuteReplicated() call (the computation - // will be trying to infeed data which will never come). - int64 infeed_steps = -1; - - // The shape of the outfeed operation. If empty, the HLO module does not - // generate any outfeed. - Shape outfeed_shape; - - // A pointer to a vector where the outfeed values will be stored. If - // nullptr, the values will be read and discarded. - std::vector* outfeed_values = nullptr; - - // Whether the HLO passes should be run on the input module. Usually - // saved modules are coming from after the HLO pass pipeline, so triggering - // another run will likely cause errors. - bool run_hlo_passes = false; - - // If true, executes on multiple threads using se::Stream::ExecuteOnStream. - // Otherwise, executes using xla::Executable::ExecuteOnStreams. - bool use_threads = false; - }; - // intra_op_parallelism_threads: For the CPU backend only. It is the thread // pool size for parallel execution of an individual operator. The default // value of -1 will result in initializing the thread pool with the number of @@ -92,24 +53,7 @@ class HloRunner { explicit HloRunner(se::Platform* platform, int intra_op_parallelism_threads = -1); - ~HloRunner(); - - // Converts an HloModule from the given hlo textual IR string (in - // HloModule::ToString format). - static StatusOr> CreateModuleFromString( - const absl::string_view hlo_string, const DebugOptions& debug_options); - - // Reads the proto file in xla.HloProto format, creates and returns the - // HloModule. - static StatusOr> ReadModuleFromBinaryProtoFile( - const std::string& filename, const DebugOptions& debug_options); - static StatusOr> ReadModuleFromTextProtoFile( - const std::string& filename, const DebugOptions& debug_options); - - // Reads the hlo text dump file in HloModule::ToString format, creates and - // returns the HloModule. - static StatusOr> ReadModuleFromHloTextFile( - const std::string& filename, const DebugOptions& debug_options); + ~HloRunner() override; // Transfers data between the host and device. StatusOr TransferLiteralToDevice(const Literal& literal); @@ -124,19 +68,17 @@ class HloRunner { // // If run_hlo_passes is false, the module will be executed without Hlo // optimization. + + using HloRunnerInterface::Execute; + StatusOr Execute(std::unique_ptr module, absl::Span arguments, - bool run_hlo_passes = true, - ExecutionProfile* profile = nullptr); + bool run_hlo_passes, + ExecutionProfile* profile) override; - StatusOr Execute(std::unique_ptr module, - absl::Span arguments, - bool run_hlo_passes = true, - ExecutionProfile* profile = nullptr); - - StatusOr Execute(std::unique_ptr executable, - absl::Span arguments, - ExecutionProfile* profile = nullptr); + StatusOr ExecuteWithExecutable( + std::unique_ptr executable, + absl::Span arguments, ExecutionProfile* profile = nullptr); // As Execute(), but accepts and returns device buffers instead of host // buffers. @@ -159,13 +101,13 @@ class HloRunner { // value. StatusOr> ExecuteReplicated( std::unique_ptr module, - const ReplicatedExecuteOptions& options); + const ReplicatedExecuteOptions& options) override; // Same as above, but with specified device assignment. StatusOr> ExecuteReplicated( std::unique_ptr module, const ReplicatedExecuteOptions& options, - DeviceAssignment* device_assignment); + DeviceAssignment* device_assignment) override; // Same as above, but with a reusable Executable. This may update the profile // information in *executable. diff --git a/tensorflow/compiler/xla/service/hlo_runner_interface.cc b/tensorflow/compiler/xla/service/hlo_runner_interface.cc new file mode 100644 index 00000000000..7359f1f08b0 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_runner_interface.cc @@ -0,0 +1,90 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_runner_interface.h" + +#include "tensorflow/compiler/xla/service/hlo_parser.h" + +namespace xla { + +/*static*/ StatusOr> +HloRunnerInterface::CreateModuleFromString(const absl::string_view hlo_string, + const DebugOptions& debug_options) { + HloModuleConfig config; + config.set_debug_options(debug_options); + return ParseAndReturnUnverifiedModule(hlo_string, config); +} + +namespace { + +// Creates an HloModule from the given proto. +StatusOr> HloProtoToModule( + const HloProto& proto, const DebugOptions& debug_options) { + TF_ASSIGN_OR_RETURN(HloModuleConfig config, + HloModule::CreateModuleConfigFromProto(proto.hlo_module(), + debug_options)); + TF_ASSIGN_OR_RETURN(auto module, + HloModule::CreateFromProto(proto.hlo_module(), config)); + return std::move(module); +} + +} // namespace + +/*static*/ StatusOr> +HloRunnerInterface::ReadModuleFromBinaryProtoFile( + const std::string& filename, const DebugOptions& debug_options) { + HloProto proto; + TF_RETURN_IF_ERROR(tensorflow::ReadBinaryProto(tensorflow::Env::Default(), + filename, &proto)); + return HloProtoToModule(proto, debug_options); +} + +/*static*/ StatusOr> +HloRunnerInterface::ReadModuleFromTextProtoFile( + const std::string& filename, const DebugOptions& debug_options) { + HloProto proto; + TF_RETURN_IF_ERROR( + tensorflow::ReadTextProto(tensorflow::Env::Default(), filename, &proto)); + return HloProtoToModule(proto, debug_options); +} + +/*static*/ StatusOr> +HloRunnerInterface::ReadModuleFromHloTextFile( + const std::string& filename, const DebugOptions& debug_options) { + string hlo_string; + TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(tensorflow::Env::Default(), + filename, &hlo_string)); + HloModuleConfig config; + config.set_debug_options(debug_options); + return ParseAndReturnUnverifiedModule(hlo_string, config); +} + +StatusOr HloRunnerInterface::Execute( + std::unique_ptr module, absl::Span arguments, + bool run_hlo_passes, ExecutionProfile* profile) { + // Construct a vector of plain pointers for the arguments. + std::vector argument_pointers; + argument_pointers.reserve(arguments.size()); + for (const auto& argument : arguments) { + argument_pointers.push_back(&argument); + } + return Execute( + /*module=*/std::move(module), + /*arguments=*/argument_pointers, + /*run_hlo_passes=*/run_hlo_passes, + /*profile=*/profile); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_runner_interface.h b/tensorflow/compiler/xla/service/hlo_runner_interface.h new file mode 100644 index 00000000000..bee8349ac71 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_runner_interface.h @@ -0,0 +1,142 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_RUNNER_INTERFACE_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_RUNNER_INTERFACE_H_ + +#include +#include +#include +#include +#include + +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" + +namespace xla { + +// A base class for running an HloModule. This executes the given HloModule on a +// certain backend directly without using the client interface. HloModule can be +// explicitly built, or loaded from a serialization file (e.g., hlo proto +// file), or parsed from a hlo textual IR string. +class HloRunnerInterface { + public: + // The options used to configure an ExecuteReplicated() call. + struct ReplicatedExecuteOptions { + // The number of devices the HLO module should be replicated onto. + int64 num_replicas = 1; + + // The arguments to be fed to each replica. Since this is used for a + // replicated execution, all the arguments are the same for all replicas. + std::vector arguments; + + // If the HLO module being run has an infeed instruction, this will be the + // data which will be fed to it, for as many as infeed_steps steps. + const Literal* infeed = nullptr; + + // The number of times the infeed literal should be fed to the HLO module. + // For a clean exit, this should match the iterations-per-loop parameter + // used when generating the HLO module proto (that is usually the main + // while boundary counter). A value higher then iterations-per-loop would + // lead to infeed threads feeding to a gone computation, while a lower + // value would trigger a stuck ExecuteReplicated() call (the computation + // will be trying to infeed data which will never come). + int64 infeed_steps = -1; + + // The shape of the outfeed operation. If empty, the HLO module does not + // generate any outfeed. + Shape outfeed_shape; + + // A pointer to a vector where the outfeed values will be stored. If + // nullptr, the values will be read and discarded. + std::vector* outfeed_values = nullptr; + + // Whether the HLO passes should be run on the input module. Usually + // saved modules are coming from after the HLO pass pipeline, so triggering + // another run will likely cause errors. + bool run_hlo_passes = false; + + // If true, executes on multiple threads using se::Stream::ExecuteOnStream. + // Otherwise, executes using xla::Executable::ExecuteOnStreams. + bool use_threads = false; + }; + + HloRunnerInterface() = default; + + virtual ~HloRunnerInterface() = default; + + // Converts an HloModule from the given hlo textual IR string (in + // HloModule::ToString format). + static StatusOr> CreateModuleFromString( + const absl::string_view hlo_string, const DebugOptions& debug_options); + + // Reads the proto file in xla.HloProto format, creates and returns the + // HloModule. + static StatusOr> ReadModuleFromBinaryProtoFile( + const std::string& filename, const DebugOptions& debug_options); + static StatusOr> ReadModuleFromTextProtoFile( + const std::string& filename, const DebugOptions& debug_options); + + // Reads the hlo text dump file in HloModule::ToString format, creates and + // returns the HloModule. + static StatusOr> ReadModuleFromHloTextFile( + const std::string& filename, const DebugOptions& debug_options); + + // Executes the given module with given literals as input and returns the + // result as a Literal. + // + // If run_hlo_passes is false, the module will be executed without Hlo + // optimization + StatusOr Execute(std::unique_ptr module, + absl::Span arguments, + bool run_hlo_passes = true) { + return Execute(std::move(module), arguments, run_hlo_passes, nullptr); + } + + StatusOr Execute(std::unique_ptr module, + absl::Span arguments, + bool run_hlo_passes = true, + ExecutionProfile* profile = nullptr); + + virtual StatusOr Execute(std::unique_ptr module, + absl::Span arguments, + bool run_hlo_passes, + ExecutionProfile* profile) = 0; + + // Executes a given HLO module into a set of replicas, and returns a map + // with the replica number as key, and the corresponding returned literal as + // value. + // TODO(b/172931928): change to non-virtual function. + virtual StatusOr> ExecuteReplicated( + std::unique_ptr module, + const ReplicatedExecuteOptions& options) = 0; + + // Same as above, but with specified device assignment. + virtual StatusOr> ExecuteReplicated( + std::unique_ptr module, + const ReplicatedExecuteOptions& options, + DeviceAssignment* device_assignment) = 0; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_RUNNER_INTERFACE_H_ diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index 6c062deb363..c9d08cef857 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -507,9 +507,9 @@ StatusOr<::testing::AssertionResult> HloTestBase::RunAndCompareInternal( absl::optional canonical_output; for (int i = 0; i < n; ++i) { - StatusOr output = - test_runner_.Execute(std::move(executables[i]), fake_arguments[i], - /*profile=*/&((*profiles)[i])); + StatusOr output = test_runner_.ExecuteWithExecutable( + std::move(executables[i]), fake_arguments[i], + /*profile=*/&((*profiles)[i])); if (!output.ok()) { return ::testing::AssertionFailure() << output.status().error_message(); } From 6ee6f6111144578c94362efbc56f4162c5a1ee03 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Tue, 10 Nov 2020 21:18:16 -0800 Subject: [PATCH 148/220] Quantize recurrent cell input of UnidirectionalSequenceLSTM with 16 bits, before all StaticsOp are converted to 8 bit Quantize/Dequantize pairs * Added relevant test for UnidirectionalSequenceLSTM PiperOrigin-RevId: 341757646 Change-Id: I3bb710be8a9d9d17c58d848c9135ec25e37d0fe5 --- .../lite/tests/prepare-quantize-signed.mlir | 34 +++++++++++++ .../mlir/lite/transforms/prepare_quantize.cc | 51 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir index 505faf51fc7..12a158ad77c 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir @@ -166,3 +166,37 @@ func @QuantizeTransposeConv(%arg0: tensor<32x4x4x128xf32>, %arg1: tensor<4xi32>) // PerTensor: %[[DEQUANTIZE:.*]] = "tfl.dequantize"(%[[QUANTIZE]]) : (tensor<1x32x42x128x!quant.uniform:f32, 1.000000e+00>>) -> tensor<1x32x42x128xf32> // PerTensor: "tfl.transpose_conv"(%arg1, %arg0, %[[DEQUANTIZE]] } + +// CHECK-LABEL: QuantizeLstmCellInput +func @QuantizeLstmCellInput(%arg0: tensor<1x28x28xf32>) -> tensor<1x28x20xf32> { + %cst_1 = constant dense<1.0> : tensor<1x20xf32> + %cst_2 = constant unit + %cst_3 = constant dense<1.0> : tensor<20x20xf32> + %cst_7 = constant dense<1.0> : tensor<20xf32> + %cst_11 = constant dense<1.0> : tensor<20x28xf32> + %cell_input = constant dense<0.0> : tensor<1x20xf32> + %cell_stats = "quant.stats"(%cell_input) {layerStats = dense<[-2.73090601, 7.94872093]> : tensor<2xf32>} : (tensor<1x20xf32>) -> tensor<1x20xf32> + %0 = "tfl.unidirectional_sequence_lstm"(%arg0, + %cst_11, %cst_11, %cst_11, %cst_11, + %cst_3, %cst_3, %cst_3, %cst_3, + %cst_2, %cst_2, %cst_2, + %cst_7, %cst_7, %cst_7, %cst_7, + %cst_2, %cst_2, + %cst_1, %cell_stats, + %cst_2, %cst_2, %cst_2, %cst_2) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = false} + : ( tensor<1x28x28xf32>, + tensor<20x28xf32>, tensor<20x28xf32>, tensor<20x28xf32>, tensor<20x28xf32>, + tensor<20x20xf32>, tensor<20x20xf32>, tensor<20x20xf32>, tensor<20x20xf32>, + none, none, none, + tensor<20xf32>, tensor<20xf32>, tensor<20xf32>, tensor<20xf32>, + none, none, + tensor<1x20xf32>, tensor<1x20xf32>, + none, none, none, none) -> tensor<1x28x20xf32> + return %0 : tensor<1x28x20xf32> +// CHECK: %[[none:.*]] = constant unit +// CHECK: %[[cell_input:.*]] = constant dense<0.000000e+00> : tensor<1x20xf32> +// CHECK: %[[q:.*]] = "tfl.quantize"(%[[cell_input]]) {qtype = tensor<1x20x!quant.uniform>} : (tensor<1x20xf32>) -> tensor<1x20x!quant.uniform> +// CHECK: %[[dq:.*]] = "tfl.dequantize"(%[[q]]) : (tensor<1x20x!quant.uniform>) -> tensor<1x20xf32> +// Checks if input 19 is correctly passed from a dequantize op. +// CHECK: %[[lstm:.*]] = "tfl.unidirectional_sequence_lstm"(%arg0, {{(%[^%,]+, )+}}%[[dq]], %[[none]], %[[none]], %[[none]], %[[none]]) +} diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc index dacc81c69ee..a2af9b6e9d7 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ // This transformation pass applies quantization propagation on TFLite dialect. +#include #include #include @@ -21,10 +22,13 @@ limitations under the License. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "mlir/Dialect/Quant/QuantOps.h" // from @llvm-project #include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Transforms/GreedyPatternRewriteDriver.h" // from @llvm-project @@ -305,6 +309,52 @@ bool PrepareQuantizePass::ContainsQuantizeOps(FuncOp func) { using PrepareQuantStats = quant::ConvertStatsToQDQs; +// Calculates the minimum power of two that is not less than the value. +double power_of_two_bound(double value) { + return std::pow(2, std::ceil(std::log2(value))); +} + +// Quantize recurrent input of LSTM with 16 bits. +template +struct ConvertLstmStatsToQDQs : public OpRewritePattern { + public: + explicit ConvertLstmStatsToQDQs(MLIRContext* context) + : OpRewritePattern(context, /*benefit=*/2) {} + LogicalResult matchAndRewrite(SourceOp op, + PatternRewriter& rewriter) const override { + quant::StatisticsOp stats_op = llvm::dyn_cast_or_null( + op.input_cell_state().getDefiningOp()); + // Recurrent input is be used within an LSTM, and thus should have one use. + if (!stats_op || !stats_op.getResult().hasOneUse()) { + return failure(); + } + auto stats = stats_op.layerStats().dyn_cast(); + if (!stats) { + return failure(); + } + + double max = std::max( + std::abs(FloatAttr::getValueAsDouble(stats.getValue({0}))), + std::abs(FloatAttr::getValueAsDouble(stats.getValue({1})))); + double bound = power_of_two_bound(max); + Type expressed = stats_op.getType().cast().getElementType(); + // maximum value is adjusted to get a scale of power_of_two(max)/32768. + quant::QuantizedType quant_type = quant::fakeQuantAttrsToType( + stats_op.getLoc(), 16, -bound, bound * 32767.0 / 32768.0, + /*narrow_range*/ false, expressed, /*is_signed*/ true); + + rewriter.setInsertionPointAfter(stats_op); + Type result_type = quant_type.castFromExpressedType(stats_op.getType()); + auto q = rewriter.create(stats_op.getLoc(), result_type, stats_op.arg()); + rewriter.replaceOpWithNewOp(stats_op, stats_op.getType(), q); + return success(); + } +}; + +using PrepareLstmQuantStats = + ConvertLstmStatsToQDQs; + void PrepareQuantizePass::runOnFunction() { FuncOp func = getFunction(); MLIRContext* ctx = func.getContext(); @@ -344,6 +394,7 @@ void PrepareQuantizePass::runOnFunction() { // Currently, only activation stats are imported, so narrow_range = false. patterns.insert(bit_width, false, false, ctx); } + patterns.insert(ctx); applyPatternsAndFoldGreedily(func, std::move(patterns)); SanityCheckAndAdjustment(func); From 8460a2fee6941eb3a65bed9f5aa3dbd2f80cdb16 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Tue, 10 Nov 2020 21:24:57 -0800 Subject: [PATCH 149/220] Remove excessive LSTMs created from quantization pass Quantization pass creates ops multiple times, but they are eventually pruned out. For stateful ops such as LSTM, we need special pass to prune them when they have not used outputs. PiperOrigin-RevId: 341758270 Change-Id: I630e254de896da9e556f4dfb97661447f13772fd --- .../mlir/lite/tests/post-quantize.mlir | 29 +++++++++++++++++++ .../mlir/lite/transforms/post_quantize.cc | 25 ++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/tests/post-quantize.mlir b/tensorflow/compiler/mlir/lite/tests/post-quantize.mlir index 51a4f1d3b27..2867108fd47 100644 --- a/tensorflow/compiler/mlir/lite/tests/post-quantize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/post-quantize.mlir @@ -77,3 +77,32 @@ func @HandleReturnedDequantizeWithAnotherUse(%arg0: tensor<128x16xf32>) -> (tens // CHECK-NEXT: return %[[softmax]], %[[argmax]] : tensor<128x16xf32>, tensor<128xi32> return %2, %3 : tensor<128x16xf32>, tensor<128xi32> } + +// CHECK-LABEL: PruneUnusedLstm +func @PruneUnusedLstm(%arg0: tensor<1x28x28xf32>) -> (tensor<1x28x28xf32>) { + %input = "tfl.quantize"(%arg0) {qtype = tensor<1x28x28x!quant.uniform>} : (tensor<1x28x28xf32>) -> tensor<1x28x28x!quant.uniform> + %cst_1 = "tfl.pseudo_qconst"() {qtype = tensor<1x20x!quant.uniform>, value = dense<1> : tensor<1x20xi8>} : () -> tensor<1x20x!quant.uniform> + %cst_2 = constant unit + %cst_3 = "tfl.pseudo_qconst"() {qtype = tensor<20x20x!quant.uniform>, value = dense<1> : tensor<20x20xi8>} : () -> tensor<20x20x!quant.uniform> + %cst_7 = "tfl.pseudo_qconst"() {qtype = tensor<20x!quant.uniform>, value = dense<1> : tensor<20xi8>} : () -> tensor<20x!quant.uniform> + %cst_11 = "tfl.pseudo_qconst"() {qtype = tensor<20x28x!quant.uniform>, value = dense<1> : tensor<20x28xi8>} : () -> tensor<20x28x!quant.uniform> + %cell_input = "tfl.pseudo_qconst"() {qtype = tensor<1x20x!quant.uniform>, value = dense<1> : tensor<1x20xi6>} : () -> tensor<1x20x!quant.uniform> + %0 = "tfl.unidirectional_sequence_lstm"(%input, + %cst_11, %cst_11, %cst_11, %cst_11, + %cst_3, %cst_3, %cst_3, %cst_3, + %cst_2, %cst_2, %cst_2, + %cst_7, %cst_7, %cst_7, %cst_7, + %cst_2, %cst_2, + %cst_1, %cell_input, + %cst_2, %cst_2, %cst_2, %cst_2) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = false} + : ( tensor<1x28x28x!quant.uniform>, + tensor<20x28x!quant.uniform>, tensor<20x28x!quant.uniform>, tensor<20x28x!quant.uniform>, tensor<20x28x!quant.uniform>, + tensor<20x20x!quant.uniform>, tensor<20x20x!quant.uniform>, tensor<20x20x!quant.uniform>, tensor<20x20x!quant.uniform>, + none, none, none, + tensor<20x!quant.uniform>, tensor<20x!quant.uniform>, tensor<20x!quant.uniform>, tensor<20x!quant.uniform>, + none, none, + tensor<1x20x!quant.uniform>, tensor<1x20x!quant.uniform>, + none, none, none, none) -> tensor<1x28x20x!quant.uniform> + return %arg0 : tensor<1x28x28xf32> +// CHECK-NEXT: return %arg0 +} diff --git a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc index 5afbfe18320..424bd85a0f2 100644 --- a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc @@ -139,6 +139,30 @@ struct RemoveVolatileOps : public OpRewritePattern { } }; +// Removes LSTMs that have dangling output. +// LSTMs are not removed automatically becuase they are stateful ops. +template +struct PruneUnusedLstm : public OpRewritePattern { + public: + explicit PruneUnusedLstm(MLIRContext* context) + : OpRewritePattern(context) {} + + LogicalResult matchAndRewrite(LstmOpTy lstm_op, + PatternRewriter& rewriter) const override { + Operation* op = lstm_op.getOperation(); + if (op->isKnownTerminator()) { + return failure(); + } + for (auto result : op->getOpResults()) { + if (!result.use_empty()) { + return failure(); + } + } + rewriter.eraseOp(op); + return success(); + } +}; + #include "tensorflow/compiler/mlir/lite/transforms/generated_post_quantize.inc" void PostQuantizePass::runOnFunction() { @@ -147,6 +171,7 @@ void PostQuantizePass::runOnFunction() { auto* ctx = func.getContext(); TFL::populateWithGenerated(ctx, patterns); patterns.insert>(ctx); + patterns.insert>(ctx); applyPatternsAndFoldGreedily(func, std::move(patterns)); if (!emit_quant_adaptor_ops_) { From eafd0fd1feef7feae1cac6714c64ee5fde4b1879 Mon Sep 17 00:00:00 2001 From: Yi Situ Date: Tue, 10 Nov 2020 21:39:42 -0800 Subject: [PATCH 150/220] Refactor private structs to anonymous namespace. PiperOrigin-RevId: 341760274 Change-Id: Ie9121226730d9caaeb6963e8fb8f5bab4e071b60 --- .../profiler/internal/gpu/cupti_collector.cc | 713 +++++++++--------- 1 file changed, 358 insertions(+), 355 deletions(-) diff --git a/tensorflow/core/profiler/internal/gpu/cupti_collector.cc b/tensorflow/core/profiler/internal/gpu/cupti_collector.cc index bfdf5dad52a..f3b132c4040 100644 --- a/tensorflow/core/profiler/internal/gpu/cupti_collector.cc +++ b/tensorflow/core/profiler/internal/gpu/cupti_collector.cc @@ -59,121 +59,370 @@ bool IsHostEvent(const CuptiTracerEvent& event, int64* line_id) { } } -void CreateXEvent(const CuptiTracerEvent& event, XPlaneBuilder* plane, - uint64 start_gpu_ns, uint64 end_gpu_ns, XLineBuilder* line) { - if (event.start_time_ns < start_gpu_ns || event.end_time_ns > end_gpu_ns || - event.start_time_ns > event.end_time_ns) { - VLOG(2) << "events have abnormal timestamps:" << event.name - << " start time(ns): " << event.start_time_ns - << " end time(ns): " << event.end_time_ns; - return; - } - std::string kernel_name = port::MaybeAbiDemangle(event.name.c_str()); - if (kernel_name.empty()) { - kernel_name = GetTraceEventTypeName(event.type); - } - XEventMetadata* event_metadata = - plane->GetOrCreateEventMetadata(std::move(kernel_name)); - XEventBuilder xevent = line->AddEvent(*event_metadata); - xevent.SetTimestampNs(event.start_time_ns); - xevent.SetEndTimestampNs(event.end_time_ns); - if (event.source == CuptiTracerEventSource::DriverCallback) { - xevent.AddStatValue( - *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kDeviceId)), - event.device_id); - } - if (event.correlation_id != CuptiTracerEvent::kInvalidCorrelationId) { - xevent.AddStatValue(*plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kCorrelationId)), - event.correlation_id); - } - if (!event.annotation.empty()) { - xevent.AddStatValue(*plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kKernelAnnotation)), - *plane->GetOrCreateStatMetadata(event.annotation)); - } - if (event.context_id != CuptiTracerEvent::kInvalidContextId) { - xevent.AddStatValue( - *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kContextId)), - absl::StrCat("$$", static_cast(event.context_id))); - } - if (event.type == CuptiTracerEventType::Kernel) { - std::string kernel_details = absl::StrCat( - "regs:", event.kernel_info.registers_per_thread, - " shm:", event.kernel_info.static_shared_memory_usage, - " grid:", event.kernel_info.grid_x, ",", event.kernel_info.grid_y, ",", - event.kernel_info.grid_z, " block:", event.kernel_info.block_x, ",", - event.kernel_info.block_y, ",", event.kernel_info.block_z); - xevent.AddStatValue(*plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kKernelDetails)), - *plane->GetOrCreateStatMetadata(kernel_details)); - } else if (event.type == CuptiTracerEventType::MemcpyH2D || - event.type == CuptiTracerEventType::MemcpyD2H || - event.type == CuptiTracerEventType::MemcpyD2D || - event.type == CuptiTracerEventType::MemcpyP2P || - event.type == CuptiTracerEventType::MemcpyOther) { - const auto& memcpy_info = event.memcpy_info; - std::string memcpy_details = absl::StrCat("size:", memcpy_info.num_bytes, - " dest:", memcpy_info.destination, - " async:", memcpy_info.async); - xevent.AddStatValue( - *plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kMemcpyDetails)), - *plane->GetOrCreateStatMetadata(std::move(memcpy_details))); - } else if (event.type == CuptiTracerEventType::MemoryAlloc) { - std::string memalloc_details = - absl::StrCat("num_bytes:", event.memalloc_info.num_bytes); - xevent.AddStatValue( - *plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kMemallocDetails)), - *plane->GetOrCreateStatMetadata(std::move(memalloc_details))); - } +struct CorrelationInfo { + CorrelationInfo(uint32 t, uint32 e) : thread_id(t), enqueue_time_ns(e) {} + uint32 thread_id; + uint64 enqueue_time_ns; +}; - std::vector annotation_stack = - ParseAnnotationStack(event.annotation); - // If multiple metadata have the same key name, show the values from the top - // of the stack (innermost annotation). Concatenate the values from "hlo_op". - absl::flat_hash_set key_set; - std::vector hlo_op_names; - for (auto annotation = annotation_stack.rbegin(); - annotation != annotation_stack.rend(); ++annotation) { - for (const Annotation::Metadata& metadata : annotation->metadata) { - if (metadata.key == "tf_op") { - continue; // ignored, obtained from HLO proto via DebugInfoMap - } else if (key_set.insert(metadata.key).second) { - xevent.ParseAndAddStatValue( - *plane->GetOrCreateStatMetadata(metadata.key), metadata.value); +struct PerDeviceCollector { + void CreateXEvent(const CuptiTracerEvent& event, XPlaneBuilder* plane, + uint64 start_gpu_ns, uint64 end_gpu_ns, + XLineBuilder* line) { + if (event.start_time_ns < start_gpu_ns || event.end_time_ns > end_gpu_ns || + event.start_time_ns > event.end_time_ns) { + VLOG(2) << "events have abnormal timestamps:" << event.name + << " start time(ns): " << event.start_time_ns + << " end time(ns): " << event.end_time_ns; + return; + } + std::string kernel_name = port::MaybeAbiDemangle(event.name.c_str()); + if (kernel_name.empty()) { + kernel_name = GetTraceEventTypeName(event.type); + } + XEventMetadata* event_metadata = + plane->GetOrCreateEventMetadata(std::move(kernel_name)); + XEventBuilder xevent = line->AddEvent(*event_metadata); + xevent.SetTimestampNs(event.start_time_ns); + xevent.SetEndTimestampNs(event.end_time_ns); + if (event.source == CuptiTracerEventSource::DriverCallback) { + xevent.AddStatValue( + *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kDeviceId)), + event.device_id); + } + if (event.correlation_id != CuptiTracerEvent::kInvalidCorrelationId) { + xevent.AddStatValue(*plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kCorrelationId)), + event.correlation_id); + } + if (!event.annotation.empty()) { + xevent.AddStatValue(*plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kKernelAnnotation)), + *plane->GetOrCreateStatMetadata(event.annotation)); + } + if (event.context_id != CuptiTracerEvent::kInvalidContextId) { + xevent.AddStatValue( + *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kContextId)), + absl::StrCat("$$", static_cast(event.context_id))); + } + if (event.type == CuptiTracerEventType::Kernel) { + std::string kernel_details = absl::StrCat( + "regs:", event.kernel_info.registers_per_thread, + " shm:", event.kernel_info.static_shared_memory_usage, + " grid:", event.kernel_info.grid_x, ",", event.kernel_info.grid_y, + ",", event.kernel_info.grid_z, " block:", event.kernel_info.block_x, + ",", event.kernel_info.block_y, ",", event.kernel_info.block_z); + xevent.AddStatValue(*plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kKernelDetails)), + *plane->GetOrCreateStatMetadata(kernel_details)); + } else if (event.type == CuptiTracerEventType::MemcpyH2D || + event.type == CuptiTracerEventType::MemcpyD2H || + event.type == CuptiTracerEventType::MemcpyD2D || + event.type == CuptiTracerEventType::MemcpyP2P || + event.type == CuptiTracerEventType::MemcpyOther) { + const auto& memcpy_info = event.memcpy_info; + std::string memcpy_details = absl::StrCat( + "size:", memcpy_info.num_bytes, " dest:", memcpy_info.destination, + " async:", memcpy_info.async); + xevent.AddStatValue( + *plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kMemcpyDetails)), + *plane->GetOrCreateStatMetadata(std::move(memcpy_details))); + } else if (event.type == CuptiTracerEventType::MemoryAlloc) { + std::string memalloc_details = + absl::StrCat("num_bytes:", event.memalloc_info.num_bytes); + xevent.AddStatValue( + *plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kMemallocDetails)), + *plane->GetOrCreateStatMetadata(std::move(memalloc_details))); + } + + std::vector annotation_stack = + ParseAnnotationStack(event.annotation); + // If multiple metadata have the same key name, show the values from the top + // of the stack (innermost annotation). Concatenate the values from + // "hlo_op". + absl::flat_hash_set key_set; + std::vector hlo_op_names; + for (auto annotation = annotation_stack.rbegin(); + annotation != annotation_stack.rend(); ++annotation) { + for (const Annotation::Metadata& metadata : annotation->metadata) { + if (metadata.key == "tf_op") { + continue; // ignored, obtained from HLO proto via DebugInfoMap + } else if (key_set.insert(metadata.key).second) { + xevent.ParseAndAddStatValue( + *plane->GetOrCreateStatMetadata(metadata.key), metadata.value); + } } } + if (!annotation_stack.empty()) { + xevent.AddStatValue( + *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kTfOp)), + *plane->GetOrCreateStatMetadata(annotation_stack.begin()->name)); + } } - if (!annotation_stack.empty()) { - xevent.AddStatValue( - *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kTfOp)), - *plane->GetOrCreateStatMetadata(annotation_stack.begin()->name)); - } -} -absl::optional GetDeviceAttribute(CUdevice device, - CUdevice_attribute attrib) { - int ret_val; - CUresult err = cuDeviceGetAttribute(&ret_val, attrib, device); - if (err != CUDA_SUCCESS) return absl::nullopt; - return ret_val; -} - -std::string GetDeviceXLineName( - int64 stream_id, absl::flat_hash_set& event_types) { - std::string line_name = absl::StrCat("Stream #", stream_id); - event_types.erase(CuptiTracerEventType::Unsupported); - if (event_types.empty()) return line_name; - if (event_types.count(CuptiTracerEventType::Overhead)) - return "CUPTI overhead"; - std::vector type_names; - for (const auto event_type : event_types) { - type_names.emplace_back(GetTraceEventTypeName(event_type)); + absl::optional GetDeviceAttribute(CUdevice device, + CUdevice_attribute attrib) { + int ret_val; + CUresult err = cuDeviceGetAttribute(&ret_val, attrib, device); + if (err != CUDA_SUCCESS) return absl::nullopt; + return ret_val; } - return absl::StrCat(line_name, "(", absl::StrJoin(type_names, ","), ")"); -} + + std::string GetDeviceXLineName( + int64 stream_id, absl::flat_hash_set& event_types) { + std::string line_name = absl::StrCat("Stream #", stream_id); + event_types.erase(CuptiTracerEventType::Unsupported); + if (event_types.empty()) return line_name; + if (event_types.count(CuptiTracerEventType::Overhead)) + return "CUPTI overhead"; + std::vector type_names; + for (const auto event_type : event_types) { + type_names.emplace_back(GetTraceEventTypeName(event_type)); + } + return absl::StrCat(line_name, "(", absl::StrJoin(type_names, ","), ")"); + } + + void AddEvent(CuptiTracerEvent&& event) { + mutex_lock l(m); + if (event.source == CuptiTracerEventSource::DriverCallback) { + // Cupti api callback events were used to populate launch times etc. + if (event.correlation_id != CuptiTracerEvent::kInvalidCorrelationId) { + correlation_info.insert( + {event.correlation_id, + CorrelationInfo(event.thread_id, event.start_time_ns)}); + } + events.emplace_back(std::move(event)); + } else { + // Cupti activity events measure device times etc. + events.emplace_back(std::move(event)); + } + } + + void Flush(int32 device_ordinal, uint64 start_walltime_ns, + uint64 start_gpu_ns, StepStats* step_stats) { + mutex_lock l(m); + absl::flat_hash_map, + DeviceStepStats*> + stream_dev_stats_map; + DeviceStepStats* unknown_stream_dev_stats = nullptr; + DeviceStepStats* all_streams_dev_stats = nullptr; + DeviceStepStats* memcpy_dev_stats = nullptr; + DeviceStepStats* sync_dev_stats = nullptr; + for (const CuptiTracerEvent& event : events) { + NodeExecStats* ns = new NodeExecStats; + ns->set_all_start_micros( + (start_walltime_ns + (event.start_time_ns - start_gpu_ns)) / 1000); + ns->set_op_start_rel_micros(0); + auto elapsed_ns = event.end_time_ns - event.start_time_ns; + ns->set_op_end_rel_micros(elapsed_ns / 1000); + ns->set_all_end_rel_micros(elapsed_ns / 1000); + + if (event.source == CuptiTracerEventSource::DriverCallback) { + // Legacy code ignore all other launch events except + // cuStreamSynchronize. + if (event.name == "cuStreamSynchronize") { + ns->set_node_name(event.name); + ns->set_timeline_label(absl::StrCat("ThreadId ", event.thread_id)); + ns->set_thread_id(event.thread_id); + if (sync_dev_stats == nullptr) { + sync_dev_stats = step_stats->add_dev_stats(); + sync_dev_stats->set_device( + absl::StrCat("/device:GPU:", device_ordinal, "/sync")); + } + sync_dev_stats->add_node_stats()->Swap(ns); + } + } else { // CuptiTracerEventSource::Activity + // Get launch information if available. + if (event.correlation_id != CuptiTracerEvent::kInvalidCorrelationId) { + auto it = correlation_info.find(event.correlation_id); + if (it != correlation_info.end()) { + ns->set_scheduled_micros(it->second.enqueue_time_ns / 1000); + ns->set_thread_id(it->second.thread_id); + } + } + + auto annotation_stack = ParseAnnotationStack(event.annotation); + std::string kernel_name = port::MaybeAbiDemangle(event.name.c_str()); + std::string activity_name = + !annotation_stack.empty() + ? std::string(annotation_stack.back().name) + : kernel_name; + ns->set_node_name(activity_name); + switch (event.type) { + case CuptiTracerEventType::Kernel: { + ns->set_timeline_label(absl::StrCat( + kernel_name, " regs:", event.kernel_info.registers_per_thread, + " shm:", event.kernel_info.static_shared_memory_usage, + " grid: ", event.kernel_info.grid_x, ",", + event.kernel_info.grid_y, ",", event.kernel_info.grid_z, + " block:", event.kernel_info.block_x, ",", + event.kernel_info.block_y, ",", event.kernel_info.block_z, "@@", + event.annotation)); + DeviceStepStats*& stream_dev_stats = + stream_dev_stats_map[std::make_pair(event.stream_id, + event.type)]; + if (stream_dev_stats == nullptr) { + stream_dev_stats = step_stats->add_dev_stats(); + stream_dev_stats->set_device(absl::StrCat( + "/device:GPU:", device_ordinal, "/stream:", event.stream_id)); + } + *stream_dev_stats->add_node_stats() = *ns; + if (all_streams_dev_stats == nullptr) { + all_streams_dev_stats = step_stats->add_dev_stats(); + all_streams_dev_stats->set_device( + absl::StrCat("/device:GPU:", device_ordinal, "/stream:all")); + } + all_streams_dev_stats->add_node_stats()->Swap(ns); + break; + } + case CuptiTracerEventType::MemcpyH2D: + case CuptiTracerEventType::MemcpyD2H: + case CuptiTracerEventType::MemcpyD2D: + case CuptiTracerEventType::MemcpyP2P: { + std::string details = absl::StrCat( + activity_name, " bytes:", event.memcpy_info.num_bytes); + if (event.memcpy_info.async) { + absl::StrAppend(&details, " aync"); + } + if (event.memcpy_info.destination != event.device_id) { + absl::StrAppend(&details, + " to device:", event.memcpy_info.destination); + } + ns->set_timeline_label(std::move(details)); + DeviceStepStats*& stream_dev_stats = + stream_dev_stats_map[std::make_pair(event.stream_id, + event.type)]; + if (stream_dev_stats == nullptr) { + stream_dev_stats = step_stats->add_dev_stats(); + stream_dev_stats->set_device(absl::StrCat( + "/device:GPU:", device_ordinal, "/stream:", event.stream_id, + "<", GetTraceEventTypeName(event.type), ">")); + } + *stream_dev_stats->add_node_stats() = *ns; + if (memcpy_dev_stats == nullptr) { + memcpy_dev_stats = step_stats->add_dev_stats(); + memcpy_dev_stats->set_device( + absl::StrCat("/device:GPU:", device_ordinal, "/memcpy")); + } + memcpy_dev_stats->add_node_stats()->Swap(ns); + break; + } + default: + ns->set_timeline_label(activity_name); + if (unknown_stream_dev_stats == nullptr) { + unknown_stream_dev_stats = step_stats->add_dev_stats(); + unknown_stream_dev_stats->set_device( + absl::StrCat("/device:GPU:", device_ordinal, "/stream:")); + } + unknown_stream_dev_stats->add_node_stats()->Swap(ns); + break; + } + } + } + events.clear(); + } + + size_t Flush(uint64 start_gpu_ns, uint64 end_gpu_ns, + XPlaneBuilder* device_plane, XPlaneBuilder* host_plane) { + mutex_lock l(m); + // Tracking event types per line. + absl::flat_hash_map> + events_types_per_line; + for (auto& event : events) { + int64 line_id = CuptiTracerEvent::kInvalidThreadId; + bool is_host_event = IsHostEvent(event, &line_id); + if (line_id == CuptiTracerEvent::kInvalidThreadId || + line_id == CuptiTracerEvent::kInvalidStreamId) + continue; + auto* plane = is_host_event ? host_plane : device_plane; + XLineBuilder line = plane->GetOrCreateLine(line_id); + line.SetTimestampNs(start_gpu_ns); + CreateXEvent(event, plane, start_gpu_ns, end_gpu_ns, &line); + events_types_per_line[line_id].emplace(event.type); + } + device_plane->ForEachLine([&](XLineBuilder line) { + line.SetName( + GetDeviceXLineName(line.Id(), events_types_per_line[line.Id()])); + }); + host_plane->ForEachLine([&](XLineBuilder line) { + line.SetName(absl::StrCat("Host Threads/", line.Id())); + }); + size_t num_events = events.size(); + events.clear(); + return num_events; + } + + void GetDeviceCapabilities(int32 device_ordinal, + XPlaneBuilder* device_plane) { + CUdevice device; + if (cuDeviceGet(&device, device_ordinal) != CUDA_SUCCESS) return; + + auto clock_rate_in_khz = + GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_CLOCK_RATE); + if (clock_rate_in_khz) { + device_plane->AddStatValue( + *device_plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kDevCapClockRateKHz)), + *clock_rate_in_khz); + } + + auto core_count = + GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT); + if (core_count) { + device_plane->AddStatValue( + *device_plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kDevCapCoreCount)), + *core_count); + } + + auto mem_clock_khz = + GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE); + auto mem_bus_width_bits = + GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH); + if (mem_clock_khz && mem_bus_width_bits) { + // Times 2 because HBM is DDR memory; it gets two data bits per each + // data lane. + auto memory_bandwidth = + uint64{2} * (*mem_clock_khz) * 1000 * (*mem_bus_width_bits) / 8; + device_plane->AddStatValue( + *device_plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kDevCapMemoryBandwidth)), + memory_bandwidth); + } + + size_t total_memory = 0; + if (cuDeviceTotalMem(&total_memory, device) == CUDA_SUCCESS) { + device_plane->AddStatValue( + *device_plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kDevCapMemorySize)), + static_cast(total_memory)); + } + + auto compute_capability_major = GetDeviceAttribute( + device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR); + if (compute_capability_major) { + device_plane->AddStatValue( + *device_plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kDevCapComputeCapMajor)), + *compute_capability_major); + } + auto compute_capability_minor = GetDeviceAttribute( + device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR); + if (compute_capability_minor) { + device_plane->AddStatValue( + *device_plane->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kDevCapComputeCapMinor)), + *compute_capability_minor); + } + } + + mutex m; + std::vector events TF_GUARDED_BY(m); + absl::flat_hash_map correlation_info + TF_GUARDED_BY(m); +}; } // namespace @@ -310,252 +559,6 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector { [&](XLineBuilder line) { line.SetTimestampNs(start_walltime_ns); }); } - struct CorrelationInfo { - CorrelationInfo(uint32 t, uint32 e) : thread_id(t), enqueue_time_ns(e) {} - uint32 thread_id; - uint64 enqueue_time_ns; - }; - struct PerDeviceCollector { - void AddEvent(CuptiTracerEvent&& event) { - mutex_lock l(m); - if (event.source == CuptiTracerEventSource::DriverCallback) { - // Cupti api callback events were used to populate launch times etc. - if (event.correlation_id != CuptiTracerEvent::kInvalidCorrelationId) { - correlation_info.insert( - {event.correlation_id, - CorrelationInfo(event.thread_id, event.start_time_ns)}); - } - events.emplace_back(std::move(event)); - } else { - // Cupti activity events measure device times etc. - events.emplace_back(std::move(event)); - } - } - - void Flush(int32 device_ordinal, uint64 start_walltime_ns, - uint64 start_gpu_ns, StepStats* step_stats) { - mutex_lock l(m); - absl::flat_hash_map, - DeviceStepStats*> - stream_dev_stats_map; - DeviceStepStats* unknown_stream_dev_stats = nullptr; - DeviceStepStats* all_streams_dev_stats = nullptr; - DeviceStepStats* memcpy_dev_stats = nullptr; - DeviceStepStats* sync_dev_stats = nullptr; - for (const CuptiTracerEvent& event : events) { - NodeExecStats* ns = new NodeExecStats; - ns->set_all_start_micros( - (start_walltime_ns + (event.start_time_ns - start_gpu_ns)) / 1000); - ns->set_op_start_rel_micros(0); - auto elapsed_ns = event.end_time_ns - event.start_time_ns; - ns->set_op_end_rel_micros(elapsed_ns / 1000); - ns->set_all_end_rel_micros(elapsed_ns / 1000); - - if (event.source == CuptiTracerEventSource::DriverCallback) { - // Legacy code ignore all other launch events except - // cuStreamSynchronize. - if (event.name == "cuStreamSynchronize") { - ns->set_node_name(event.name); - ns->set_timeline_label(absl::StrCat("ThreadId ", event.thread_id)); - ns->set_thread_id(event.thread_id); - if (sync_dev_stats == nullptr) { - sync_dev_stats = step_stats->add_dev_stats(); - sync_dev_stats->set_device( - absl::StrCat("/device:GPU:", device_ordinal, "/sync")); - } - sync_dev_stats->add_node_stats()->Swap(ns); - } - } else { // CuptiTracerEventSource::Activity - // Get launch information if available. - if (event.correlation_id != CuptiTracerEvent::kInvalidCorrelationId) { - auto it = correlation_info.find(event.correlation_id); - if (it != correlation_info.end()) { - ns->set_scheduled_micros(it->second.enqueue_time_ns / 1000); - ns->set_thread_id(it->second.thread_id); - } - } - - auto annotation_stack = ParseAnnotationStack(event.annotation); - std::string kernel_name = port::MaybeAbiDemangle(event.name.c_str()); - std::string activity_name = - !annotation_stack.empty() - ? std::string(annotation_stack.back().name) - : kernel_name; - ns->set_node_name(activity_name); - switch (event.type) { - case CuptiTracerEventType::Kernel: { - ns->set_timeline_label(absl::StrCat( - kernel_name, " regs:", event.kernel_info.registers_per_thread, - " shm:", event.kernel_info.static_shared_memory_usage, - " grid: ", event.kernel_info.grid_x, ",", - event.kernel_info.grid_y, ",", event.kernel_info.grid_z, - " block:", event.kernel_info.block_x, ",", - event.kernel_info.block_y, ",", event.kernel_info.block_z, - "@@", event.annotation)); - DeviceStepStats*& stream_dev_stats = - stream_dev_stats_map[std::make_pair(event.stream_id, - event.type)]; - if (stream_dev_stats == nullptr) { - stream_dev_stats = step_stats->add_dev_stats(); - stream_dev_stats->set_device( - absl::StrCat("/device:GPU:", device_ordinal, - "/stream:", event.stream_id)); - } - *stream_dev_stats->add_node_stats() = *ns; - if (all_streams_dev_stats == nullptr) { - all_streams_dev_stats = step_stats->add_dev_stats(); - all_streams_dev_stats->set_device(absl::StrCat( - "/device:GPU:", device_ordinal, "/stream:all")); - } - all_streams_dev_stats->add_node_stats()->Swap(ns); - break; - } - case CuptiTracerEventType::MemcpyH2D: - case CuptiTracerEventType::MemcpyD2H: - case CuptiTracerEventType::MemcpyD2D: - case CuptiTracerEventType::MemcpyP2P: { - std::string details = absl::StrCat( - activity_name, " bytes:", event.memcpy_info.num_bytes); - if (event.memcpy_info.async) { - absl::StrAppend(&details, " aync"); - } - if (event.memcpy_info.destination != event.device_id) { - absl::StrAppend(&details, - " to device:", event.memcpy_info.destination); - } - ns->set_timeline_label(std::move(details)); - DeviceStepStats*& stream_dev_stats = - stream_dev_stats_map[std::make_pair(event.stream_id, - event.type)]; - if (stream_dev_stats == nullptr) { - stream_dev_stats = step_stats->add_dev_stats(); - stream_dev_stats->set_device(absl::StrCat( - "/device:GPU:", device_ordinal, "/stream:", event.stream_id, - "<", GetTraceEventTypeName(event.type), ">")); - } - *stream_dev_stats->add_node_stats() = *ns; - if (memcpy_dev_stats == nullptr) { - memcpy_dev_stats = step_stats->add_dev_stats(); - memcpy_dev_stats->set_device( - absl::StrCat("/device:GPU:", device_ordinal, "/memcpy")); - } - memcpy_dev_stats->add_node_stats()->Swap(ns); - break; - } - default: - ns->set_timeline_label(activity_name); - if (unknown_stream_dev_stats == nullptr) { - unknown_stream_dev_stats = step_stats->add_dev_stats(); - unknown_stream_dev_stats->set_device( - absl::StrCat("/device:GPU:", device_ordinal, "/stream:")); - } - unknown_stream_dev_stats->add_node_stats()->Swap(ns); - break; - } - } - } - events.clear(); - } - - size_t Flush(uint64 start_gpu_ns, uint64 end_gpu_ns, - XPlaneBuilder* device_plane, XPlaneBuilder* host_plane) { - mutex_lock l(m); - // Tracking event types per line. - absl::flat_hash_map> - events_types_per_line; - for (auto& event : events) { - int64 line_id = CuptiTracerEvent::kInvalidThreadId; - bool is_host_event = IsHostEvent(event, &line_id); - if (line_id == CuptiTracerEvent::kInvalidThreadId || - line_id == CuptiTracerEvent::kInvalidStreamId) - continue; - auto* plane = is_host_event ? host_plane : device_plane; - XLineBuilder line = plane->GetOrCreateLine(line_id); - line.SetTimestampNs(start_gpu_ns); - CreateXEvent(event, plane, start_gpu_ns, end_gpu_ns, &line); - events_types_per_line[line_id].emplace(event.type); - } - device_plane->ForEachLine([&](XLineBuilder line) { - line.SetName( - GetDeviceXLineName(line.Id(), events_types_per_line[line.Id()])); - }); - host_plane->ForEachLine([&](XLineBuilder line) { - line.SetName(absl::StrCat("Host Threads/", line.Id())); - }); - size_t num_events = events.size(); - events.clear(); - return num_events; - } - - void GetDeviceCapabilities(int32 device_ordinal, - XPlaneBuilder* device_plane) { - CUdevice device; - if (cuDeviceGet(&device, device_ordinal) != CUDA_SUCCESS) return; - - auto clock_rate_in_khz = - GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_CLOCK_RATE); - if (clock_rate_in_khz) { - device_plane->AddStatValue( - *device_plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kDevCapClockRateKHz)), - *clock_rate_in_khz); - } - - auto core_count = - GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT); - if (core_count) { - device_plane->AddStatValue( - *device_plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kDevCapCoreCount)), - *core_count); - } - - auto mem_clock_khz = - GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE); - auto mem_bus_width_bits = GetDeviceAttribute( - device, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH); - if (mem_clock_khz && mem_bus_width_bits) { - // Times 2 because HBM is DDR memory; it gets two data bits per each - // data lane. - auto memory_bandwidth = - uint64{2} * (*mem_clock_khz) * 1000 * (*mem_bus_width_bits) / 8; - device_plane->AddStatValue( - *device_plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kDevCapMemoryBandwidth)), - memory_bandwidth); - } - - size_t total_memory = 0; - if (cuDeviceTotalMem(&total_memory, device) == CUDA_SUCCESS) { - device_plane->AddStatValue( - *device_plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kDevCapMemorySize)), - static_cast(total_memory)); - } - - auto compute_capability_major = GetDeviceAttribute( - device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR); - if (compute_capability_major) { - device_plane->AddStatValue( - *device_plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kDevCapComputeCapMajor)), - *compute_capability_major); - } - auto compute_capability_minor = GetDeviceAttribute( - device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR); - if (compute_capability_minor) { - device_plane->AddStatValue( - *device_plane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kDevCapComputeCapMinor)), - *compute_capability_minor); - } - } - - mutex m; - std::vector events TF_GUARDED_BY(m); - absl::flat_hash_map correlation_info - TF_GUARDED_BY(m); - }; absl::FixedArray per_device_collector_; TF_DISALLOW_COPY_AND_ASSIGN(CuptiTraceCollectorImpl); From ee9115e96ee708344cbaeb80c46a3fd6481b532b Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Tue, 10 Nov 2020 21:43:38 -0800 Subject: [PATCH 151/220] Internal code cleanup PiperOrigin-RevId: 341760834 Change-Id: I6436cf8eba026fe0b631f79f96fa8ad476f849bd --- tensorflow/lite/kernels/squeeze_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/squeeze_test.cc b/tensorflow/lite/kernels/squeeze_test.cc index 9aac56cf2ef..5adec247339 100644 --- a/tensorflow/lite/kernels/squeeze_test.cc +++ b/tensorflow/lite/kernels/squeeze_test.cc @@ -149,7 +149,7 @@ TEST(SqueezeOpTest, SqueezeNegativeAxisString) { EXPECT_THAT(m.GetStringOutput(), ElementsAreArray({"a", "b"})); } -TYPED_TEST(SqueezeOpTest, SqueezeAllDimsString) { +TEST(SqueezeOpTest, SqueezeAllDimsString) { std::initializer_list data = {"a"}; SqueezeOpModel m( {GetTensorType(), {1, 1, 1, 1, 1, 1, 1}}, From 489074cda453f73bb2d168a6a7fd09d753081274 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 10 Nov 2020 22:57:19 -0800 Subject: [PATCH 152/220] Preserving infinite cardinality information for `tf.data.experimental.sample_from_datasets` transformation. PiperOrigin-RevId: 341768470 Change-Id: If6f379472834640e35146116987a9348efb662f6 --- .../experimental/directed_interleave_dataset_op.cc | 12 ++++++++++++ .../kernel_tests/directed_interleave_dataset_test.py | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/tensorflow/core/kernels/data/experimental/directed_interleave_dataset_op.cc b/tensorflow/core/kernels/data/experimental/directed_interleave_dataset_op.cc index 6e52f74a336..bfa39a71bd9 100644 --- a/tensorflow/core/kernels/data/experimental/directed_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/directed_interleave_dataset_op.cc @@ -82,6 +82,18 @@ class DirectedInterleaveDatasetOp::Dataset : public DatasetBase { return name_utils::DatasetDebugString(kDatasetType); } + int64 Cardinality() const override { + // As long as one of input dataset has infinite cardinality, the output + // cardinality is infinite. + for (const auto& input : data_inputs_) { + int64 n = input->Cardinality(); + if (n == kInfiniteCardinality) { + return n; + } + } + return kUnknownCardinality; + } + Status CheckExternalState() const override { for (const auto& input : data_inputs_) { TF_RETURN_IF_ERROR(input->CheckExternalState()); diff --git a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py index f6ccc5163a4..1d3c95d4cc0 100644 --- a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py @@ -98,6 +98,13 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase, freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples) self.assertLess(self._chi2(probs, freqs / num_samples), 1e-2) + @combinations.generate(test_base.default_test_combinations()) + def testSampleFromDatasetsCardinality(self): + ds1 = dataset_ops.Dataset.from_tensors([1.0]).repeat() + ds2 = dataset_ops.Dataset.from_tensors([2.0]).repeat() + ds = interleave_ops.sample_from_datasets([ds1, ds2]) + self.assertEqual(self.evaluate(ds.cardinality()), dataset_ops.INFINITE) + @combinations.generate(test_base.default_test_combinations()) def testSelectFromDatasets(self): words = [b"foo", b"bar", b"baz"] From fdad6c7ae428036601ca20db4fac16bab3e07969 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Wed, 11 Nov 2020 00:27:05 -0800 Subject: [PATCH 153/220] Set inter_ops threads of workers in a in-process cluster to at least 4. Enable parameter_server_training_test on OSS. PiperOrigin-RevId: 341777401 Change-Id: I40b509ea849f0c52ba42e06475fcbc113b07c7e4 --- tensorflow/python/distribute/multi_worker_test_base.py | 5 +++++ tensorflow/python/keras/distribute/BUILD | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/distribute/multi_worker_test_base.py b/tensorflow/python/distribute/multi_worker_test_base.py index 013e178d543..4feb2337b86 100644 --- a/tensorflow/python/distribute/multi_worker_test_base.py +++ b/tensorflow/python/distribute/multi_worker_test_base.py @@ -21,6 +21,7 @@ from __future__ import print_function import contextlib import copy import json +import multiprocessing import os import subprocess import sys @@ -163,6 +164,10 @@ def create_in_process_cluster(num_workers, worker_config = config_pb2.ConfigProto() worker_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac + # The cluster may hang if workers don't have enough inter_op threads. + if multiprocessing.cpu_count() < 4: + worker_config.inter_op_parallelism_threads = 4 + # Enable collective ops which has no impact on non-collective ops. # TODO(yuefengz, tucker): removing this after we move the initialization of # collective mgr to the session level. diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 95644a25538..d0236226aae 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -866,9 +866,6 @@ tf_py_test( srcs = ["parameter_server_training_test.py"], python_version = "PY3", shard_count = 1, - tags = [ - "no_oss", # TODO(b/162119374): enable it in OSS. - ], deps = [ "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", From bec7b3dae423fde74bf57bce037acd075c9f43d3 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Wed, 11 Nov 2020 00:29:04 -0800 Subject: [PATCH 154/220] Make sure TPUPartitionedInput shape inference doesn't crash if input handle shapes and types are not available. PiperOrigin-RevId: 341777572 Change-Id: Iad741580d81a51de3d92861f8c999047bd4b163d --- .../core/tpu/ops/tpu_partitioned_input_op.cc | 53 ++++++++++--------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc b/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc index f6ccf279956..26d602c964a 100644 --- a/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc +++ b/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc @@ -61,35 +61,40 @@ REGISTER_OP("TPUPartitionedInput") // If this is a resource, unify the resource shapes. if (dtype == DT_RESOURCE) { ShapeHandle previous_shape_handle; + const std::vector* shapes_and_types = + nullptr; for (int i = c->num_inputs() - 1; i >= 0; --i) { - ShapeHandle shape_handle = - c->input_handle_shapes_and_types(i)->at(0).shape; - if (!c->FullyDefined(shape_handle)) { - return errors::InvalidArgument("Inputs must have static shape,", - "input[", i, - "] has unknown dimension."); - } - if (i != c->num_inputs() - 1) { - ShapeHandle tmp; - if (!c->Merge(shape_handle, previous_shape_handle, &tmp).ok()) { - return errors::InvalidArgument( - "Inputs must have the same shape."); + shapes_and_types = c->input_handle_shapes_and_types(i); + if (shapes_and_types) { + ShapeHandle shape_handle = shapes_and_types->at(0).shape; + if (!c->FullyDefined(shape_handle)) { + return errors::InvalidArgument("Inputs must have static shape,", + "input[", i, + "] has unknown dimension."); + } + if (i != c->num_inputs() - 1) { + ShapeHandle tmp; + if (!c->Merge(shape_handle, previous_shape_handle, &tmp).ok()) { + return errors::InvalidArgument( + "Inputs must have the same shape."); + } + } else { + previous_shape_handle = shape_handle; } - } else { - previous_shape_handle = shape_handle; } } - if (partition_dim == -1) { - c->set_output_handle_shapes_and_types( - 0, *c->input_handle_shapes_and_types(0)); - } else { - ShapeHandle newoutput0 = - _UpdatePartitionDim(c, previous_shape_handle, partition_dim); + if (shapes_and_types) { + if (partition_dim == -1) { + c->set_output_handle_shapes_and_types(0, *shapes_and_types); + } else { + ShapeHandle newoutput0 = + _UpdatePartitionDim(c, previous_shape_handle, partition_dim); - std::vector output_shapes_and_types; - output_shapes_and_types.push_back(shape_inference::ShapeAndType( - newoutput0, c->input_handle_shapes_and_types(0)->at(0).dtype)); - c->set_output_handle_shapes_and_types(0, output_shapes_and_types); + std::vector output_shapes_and_types; + output_shapes_and_types.push_back(shape_inference::ShapeAndType( + newoutput0, shapes_and_types->at(0).dtype)); + c->set_output_handle_shapes_and_types(0, output_shapes_and_types); + } } } From 14478ff4e50b583930ac3bf13fc1855e0595de62 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 11 Nov 2020 00:36:38 -0800 Subject: [PATCH 155/220] [XLA:SPMD] Propagate sharding backwards through slice. PiperOrigin-RevId: 341778376 Change-Id: I5ef13e20c40eb4d6bb28cec753c3ea55180adf74 --- tensorflow/compiler/xla/service/sharding_propagation.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/xla/service/sharding_propagation.cc b/tensorflow/compiler/xla/service/sharding_propagation.cc index 6524973a08e..94d97e0de19 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation.cc @@ -1290,6 +1290,9 @@ absl::optional GetShardingFromUser( return hlo_sharding_util::ReshapeSharding( user.shape(), instruction.shape(), user.sharding()); } + case HloOpcode::kSlice: { + return user.sharding(); + } case HloOpcode::kTranspose: { // Calculate the dimension numbers for reversing the current transpose // and then use TransposeSharding to convert the output sharding to an From ff8c7a7ee18913fd379f538a43f2b6df3c19f3c8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 01:01:32 -0800 Subject: [PATCH 156/220] Update GraphDef version to 582. PiperOrigin-RevId: 341780989 Change-Id: Ie984d9d97edf715e08bacb54e09cc346ebb7ff77 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 00ab2053cbb..80025fce20b 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 581 // Updated: 2020/11/10 +#define TF_GRAPH_DEF_VERSION 582 // Updated: 2020/11/11 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 1b39a191caa651a7718e7059103bb43760b8fd9f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 01:01:32 -0800 Subject: [PATCH 157/220] compat: Update forward compatibility horizon to 2020-11-11 PiperOrigin-RevId: 341780990 Change-Id: I8f73928827771aca20b83676ec8f0ae18a4c749c --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 3083f161947..19ae61be3c0 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 10) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 11) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From df0a5c381f9c12a585093a44f0e8f9fe66419a23 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Wed, 11 Nov 2020 01:51:25 -0800 Subject: [PATCH 158/220] Retire the use of same_shape annotations in kernel generator. PiperOrigin-RevId: 341786415 Change-Id: I200b4de553ddc85811c99eba14654eba4397c120 --- .../mlir/tools/kernel_gen/kernel_creator.cc | 23 +-- .../mlir/tools/kernel_gen/kernel_creator.h | 1 - .../tests/tf_to_gpu_binary/abs.mlir | 4 +- .../tests/tf_to_gpu_binary/ceil.mlir | 4 +- .../tests/tf_to_gpu_binary/tanh.mlir | 4 +- .../kernel_gen/tests/tf_to_kernel/tanh.mlir | 2 +- .../mlir/tools/kernel_gen/tf_to_gpu_binary.cc | 18 +-- .../mlir/tools/kernel_gen/tf_to_kernel.cc | 13 +- .../mlir/tools/kernel_gen/transforms/BUILD | 1 - .../transforms/gpu_kernel_to_blob_pass.cc | 2 + .../mlir/tools/kernel_gen/transforms/passes.h | 7 - .../tools/kernel_gen/transforms/passes.td | 10 -- .../propagate_tf_abi_knowledge_pass.cc | 137 ------------------ tensorflow/core/kernels/mlir_generated/BUILD | 19 --- .../kernels/mlir_generated/build_defs.bzl | 13 +- 15 files changed, 27 insertions(+), 231 deletions(-) delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/propagate_tf_abi_knowledge_pass.cc diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc index 8850a61fc3e..528b1cd68ee 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc @@ -157,11 +157,6 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, // Apply the mapping. pm.addNestedPass<::mlir::FuncOp>(mlir::createParallelLoopToGpuPass()); - // Embed TF Framework ops. - if (!gpu_binary_only) { - pm.addPass(mlir::kernel_gen::tf_framework::CreateEmbedTFFrameworkPass()); - } - // Some basic cleanup. pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass()); pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); @@ -182,7 +177,8 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, xla::mlir_gpu::createRewriteKernelSignaturePass()); } pm.addPass(::mlir::createLowerAffinePass()); - + // Map allocs, asserts, etc. to the tensorflow framework. + pm.addPass(mlir::kernel_gen::tf_framework::CreateEmbedTFFrameworkPass()); // Constraints are removed as late as possible and before lowering to CFG. pm.addNestedPass<::mlir::FuncOp>(::mlir::createConvertShapeConstraintsPass()); if (embed_memref_prints) { @@ -212,8 +208,7 @@ Status AmendKernelLLVMIRWithStaticKnowledge(mlir::ModuleOp module) { : Status::OK(); } -Status GenerateDeviceCode(mlir::ModuleOp module, bool gpu_binary_only, - llvm::ArrayRef same_shape, +Status GenerateDeviceCode(mlir::ModuleOp module, llvm::StringRef gpu_binary_attr_name, llvm::ArrayRef architectures, bool generate_fatbin) { @@ -221,13 +216,6 @@ Status GenerateDeviceCode(mlir::ModuleOp module, bool gpu_binary_only, applyTensorflowAndCLOptions(pm); auto& kernel_pm = pm.nest(); - // TODO(herhut): Remove this. - if (gpu_binary_only) { - // Grab the original signature from the single function. - kernel_pm.addNestedPass( - mlir::kernel_gen::transforms::CreatePropagateTensorFlowABIKnowledgePass( - same_shape)); - } // Remove debug information to ensure we do not create debug PTX. kernel_pm.addPass(mlir::createStripDebugInfoPass()); kernel_pm.addPass(mlir::kernel_gen::transforms::CreateGpuKernelToBlobPass( @@ -256,7 +244,7 @@ Status LowerHostSideToFinalForm(mlir::ModuleOp module) { StatusOr GenerateKernelForTfCode( mlir::MLIRContext& context, llvm::StringRef tf_code, bool gpu_binary_only, llvm::ArrayRef architectures, - llvm::ArrayRef tile_sizes, llvm::ArrayRef same_shape, + llvm::ArrayRef tile_sizes, llvm::ArrayRef unroll_factors, bool embed_memref_prints, bool generate_fatbin) { mlir::RegisterAllTensorFlowDialects(context.getDialectRegistry()); @@ -275,8 +263,7 @@ StatusOr GenerateKernelForTfCode( TF_RETURN_IF_ERROR(xla::mlir_gpu::LowerKernelBodiesToNVVM(module.get())); #endif TF_RETURN_IF_ERROR(AmendKernelLLVMIRWithStaticKnowledge(module.get())); - TF_RETURN_IF_ERROR(GenerateDeviceCode(module.get(), gpu_binary_only, - same_shape, kGpuBinaryAttrName, + TF_RETURN_IF_ERROR(GenerateDeviceCode(module.get(), kGpuBinaryAttrName, architectures, generate_fatbin)); if (!gpu_binary_only) { TF_RETURN_IF_ERROR(LowerHostSideToFinalForm(module.get())); diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h index 80538c1a4a6..3caf31e39ad 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h @@ -40,7 +40,6 @@ xla::StatusOr GenerateKernelForTfCode( mlir::MLIRContext& context, llvm::StringRef tf_code, bool gpu_binary_only, llvm::ArrayRef architectures = {"sm_75"}, llvm::ArrayRef tile_sizes = {16, 64}, - llvm::ArrayRef same_shape = {}, llvm::ArrayRef unroll_factors = {}, bool embed_memref_prints = false, bool generate_fatbin = true); diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir index edb023e5fe7..51773093564 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir @@ -1,5 +1,5 @@ -// RUN: tf_to_gpu_binary --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70 -func @abs(%arg0: tensor) -> tensor { +// RUN: tf_to_gpu_binary --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70 +func @abs(%arg0: tensor) -> tensor attributes {tf_entry} { %0 = "tf.Abs"(%arg0) { } : (tensor) -> tensor return %0 : tensor diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir index 25b79c47f4e..bb505809abe 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir @@ -1,5 +1,5 @@ -// RUN: tf_to_gpu_binary --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70 -func @ceil(%arg0: tensor) -> tensor { +// RUN: tf_to_gpu_binary --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70 +func @ceil(%arg0: tensor) -> tensor attributes {tf_entry} { %0 = "tf.Ceil"(%arg0) { } : (tensor) -> tensor return %0 : tensor diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir index 69632f498a9..fa88fc76c90 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir @@ -1,5 +1,5 @@ -// RUN: tf_to_gpu_binary --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70 -func @tanh(%arg0: tensor) -> tensor { +// RUN: tf_to_gpu_binary --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70 +func @tanh(%arg0: tensor) -> tensor attributes {tf_entry} { %0 = "tf.Tanh"(%arg0) : (tensor) -> tensor return %0 : tensor } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir index 3eb736a359e..dc8b5a75dac 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir @@ -1,4 +1,4 @@ -// RUN: tf_to_kernel --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70,compute_75 +// RUN: tf_to_kernel --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70,compute_75 func @tanh(%arg: tensor<*xf32>) -> tensor<*xf32> attributes {tf_entry} { %0 = "tf.Tanh"(%arg) : (tensor<*xf32>) -> tensor<*xf32> diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc index 17875863029..6f1de7dc1bc 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc @@ -37,7 +37,6 @@ namespace { xla::Status Run(llvm::StringRef input_file, llvm::StringRef output_file, std::string architecture, llvm::ArrayRef tile_sizes, - llvm::ArrayRef same_shape, llvm::ArrayRef unroll_factors) { // Read TF code. std::string tf_code; @@ -47,10 +46,10 @@ xla::Status Run(llvm::StringRef input_file, llvm::StringRef output_file, mlir::MLIRContext context; TF_ASSIGN_OR_RETURN( mlir::OwningModuleRef module, - GenerateKernelForTfCode( - context, tf_code, /*gpu_binary_only=*/true, architecture, tile_sizes, - same_shape, unroll_factors, - /*embed_memref_prints=*/false, /*generate_fatbin=*/false)); + GenerateKernelForTfCode(context, tf_code, /*gpu_binary_only=*/true, + architecture, tile_sizes, unroll_factors, + /*embed_memref_prints=*/false, + /*generate_fatbin=*/false)); // Extract gpu_binary. TF_ASSIGN_OR_RETURN(std::string gpu_binary, ExtractGpuBinary(*module)); @@ -82,18 +81,13 @@ int main(int argc, char** argv) { "unroll_factors", llvm::cl::desc("factors to unroll by, separated by commas"), llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated); - llvm::cl::list same_shape( - "same_shape", - llvm::cl::desc("arguments with same shape, separated by commas"), - llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated); tensorflow::InitMlir y(&argc, &argv); mlir::registerPassManagerCLOptions(); llvm::cl::ParseCommandLineOptions(argc, argv, "TF op GPU kernel generator\n"); - auto status = - tensorflow::kernel_gen::Run(input_file, output_file, architecture, - tile_sizes, same_shape, unroll_factors); + auto status = tensorflow::kernel_gen::Run( + input_file, output_file, architecture, tile_sizes, unroll_factors); if (!status.ok()) { LOG(ERROR) << status; return 1; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc index e805830ec16..f804115e39b 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc @@ -97,7 +97,6 @@ xla::StatusOr EmitToBinary(mlir::ModuleOp module) { xla::Status Run(llvm::StringRef input_file, llvm::StringRef output_file, llvm::ArrayRef architectures, llvm::ArrayRef tile_sizes, - llvm::ArrayRef same_shape, llvm::ArrayRef unroll_factors, bool embed_memref_prints) { // Read TF code. @@ -109,8 +108,8 @@ xla::Status Run(llvm::StringRef input_file, llvm::StringRef output_file, TF_ASSIGN_OR_RETURN( mlir::OwningModuleRef module, GenerateKernelForTfCode(context, tf_code, /*gpu_binary_only=*/false, - architectures, tile_sizes, same_shape, - unroll_factors, embed_memref_prints)); + architectures, tile_sizes, unroll_factors, + embed_memref_prints)); // Get binary. TF_ASSIGN_OR_RETURN(std::string binary, EmitToBinary(*module)); @@ -145,10 +144,6 @@ int main(int argc, char** argv) { "unroll_factors", llvm::cl::desc("factors to unroll by, separated by commas"), llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated); - llvm::cl::list same_shape( - "same_shape", - llvm::cl::desc("arguments with same shape, separated by commas"), - llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated); tensorflow::InitMlir y(&argc, &argv); llvm::InitializeNativeTarget(); @@ -157,8 +152,8 @@ int main(int argc, char** argv) { llvm::cl::ParseCommandLineOptions(argc, argv, "TF op GPU kernel generator\n"); auto status = tensorflow::kernel_gen::Run( - input_file, output_file, architectures, tile_sizes, same_shape, - unroll_factors, embed_memref_prints); + input_file, output_file, architectures, tile_sizes, unroll_factors, + embed_memref_prints); if (!status.ok()) { LOG(ERROR) << status; return 1; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index a3391e08ac0..2c04ace99dc 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -77,7 +77,6 @@ cc_library( "gpu_kernel_to_blob_pass.cc", "materialize_broadcasts_pass.cc", "parallel_loops_to_sequential.cc", - "propagate_tf_abi_knowledge_pass.cc", "same_shape_propagation.cc", "shape_to_descriptors_pass.cc", "tensorflow_abi_knowledge_propagation.cc", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/gpu_kernel_to_blob_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/gpu_kernel_to_blob_pass.cc index 46bf13b7d20..5c58bb87680 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/gpu_kernel_to_blob_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/gpu_kernel_to_blob_pass.cc @@ -68,6 +68,8 @@ class GpuKernelToBlobPass mlir::StringAttr::get(blob_string, &getContext())); return; } + // Forward the error by attaching the message to the gpu module. + gpu_module.emitError(blob_or.status().error_message()); return signalPassFailure(); } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h index b5b30e37122..2acb74e4ceb 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h @@ -57,13 +57,6 @@ std::unique_ptr CreateMaterializeBroadcastsPass(); // Pass to convert scf::ParallelOp to scf::ForOp. std::unique_ptr CreateParallelLoopsToSequential(); -// Pass to propagate TF ABI knowledge, e.g. offsets, alignment. -// This is very limited and will be removed soon. -// TODO(herhut): Remove this. -std::unique_ptr> -CreatePropagateTensorFlowABIKnowledgePass( - llvm::ArrayRef same_shape = {}); - // Pass to annotate GPU Module with its PTX. std::unique_ptr> CreateGpuKernelToBlobPass( mlir::StringRef blob_annotation = "", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td index 2c0f81b7bce..733cf917b33 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td @@ -71,16 +71,6 @@ def ParallelLoopsToSequential : FunctionPass<"parallel-loops-to-sequential"> { let constructor = "transforms::CreateParallelLoopsToSequential()"; } -def PropagateTensorFlowABIKnowledgePass - : Pass<"propagate-tf-abi-knowledge", "LLVM::LLVMFuncOp"> { - let summary = "Pass to propagate TF ABI knowledge, e.g. offsets, alignment"; - let options = [ - ListOption<"same_shape_", "same-shape", "uint32_t", - "List of same shape args">, - ]; - let constructor = "transforms::CreatePropagateTensorFlowABIKnowledgePass()"; -} - def PropagateTfAbiKnowledgeToKernels : FunctionPass<"propagate-tf-abi-knowledge-to-kernels"> { let summary = "Pass to propagate tensorflow ABI knowledge to kernels"; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/propagate_tf_abi_knowledge_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/propagate_tf_abi_knowledge_pass.cc deleted file mode 100644 index 3b568f5f25f..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/propagate_tf_abi_knowledge_pass.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/OperationSupport.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" -#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" - -namespace mlir { -namespace kernel_gen { -namespace transforms { -namespace { - -#define GEN_PASS_CLASSES -#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" - -struct PropagateTensorFlowABIKnowledgePass - : public PropagateTensorFlowABIKnowledgePassBase< - PropagateTensorFlowABIKnowledgePass> { - explicit PropagateTensorFlowABIKnowledgePass( - llvm::ArrayRef same_shape) { - same_shape_ = same_shape; - } - - void runOnOperation() override { - // We know due to tensorflow ABI that the offset is always 0 and that the - // innermost stride is always 1. To make this visible to the compiler, - // we insert constants into the code and replace usages accordingly. - // We do not change the signature so that we keep a somewhat stable ABI - // that is easy to undertand by tools. - // We also know that tensorflow aligns all allocated pointers by 16, so - // we pass this on. Furthermore, we know that arguments never alias. More - // precicely, they may only alias (due to reuse) if the kernel does not - // read from a position it previously has written to. We express this with - // the noalias attribute. - mlir::LLVM::LLVMFuncOp func = getOperation(); - - // This only works if the function is local and we can rewrite it. - if (func.isExternal()) return; - - auto function_list = - func.getParentOfType().getOps(); - if (function_list.empty()) { - func.emitError() << "No possible kernel function found"; - return signalPassFailure(); - } - auto func_iterator = function_list.begin(); - if (std::next(func_iterator) != function_list.end()) { - func.emitError() << "More than one possible kernel function detected"; - return signalPassFailure(); - } - // Note that this dereference is necessary to prevent a - // stack-use-after-return error. - auto func_type = (*func_iterator).getType(); - - mlir::OpBuilder b(func.getBody()); - // Steal the LLVM representation of the index type from the third argument. - auto index_type = func.getArgument(3).getType(); - mlir::Value one = b.create( - func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1)); - mlir::Value zero = b.create( - func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0)); - uint32_t arg_pos = 0; - std::vector positions; - // Collect the agument and return types of the surrounding function. - auto arg_types = llvm::to_vector<4>(llvm::concat( - func_type.getInputs(), func_type.getResults())); - for (mlir::Type arg_type : arg_types) { - if (!arg_type.isa()) { - func.emitError() << "argument of surrounding func is not ranked memref"; - return signalPassFailure(); - } - positions.push_back(arg_pos); - // Set alignment and aliasing on the pointers. - func.setArgAttr(arg_pos + 1, "llvm.noalias", b.getBoolAttr(true)); - func.setArgAttr(arg_pos + 1, "llvm.align", b.getIndexAttr(16)); - // Replace the offset with zero. Offset is argument number 3. - func.getArgument(arg_pos + 2).replaceAllUsesWith(zero); - // Forward over base_ptr, aligned_ptr, offset, size and stride arguments. - arg_pos += 3 + arg_type.cast().getRank() * 2; - // Replace the last stride with constant 1. - func.getArgument(arg_pos - 1).replaceAllUsesWith(one); - } - - // If we have knowledge that some arguments have the same shape, we - // can use that here. Simply replace usages of the shape parameters within - // the function body to a single shape parameter. - if (same_shape_.empty()) { - return; - } - auto first = same_shape_.front(); - auto first_offset = positions.at(first); - auto first_type = arg_types[first].cast(); - uint32_t rank = first_type.getRank(); - for (int i = 1, e = same_shape_.size(); i < e; ++i) { - uint32_t same = same_shape_[i]; - uint32_t same_offset = positions.at(same); - auto same_type = arg_types[same].cast(); - if (same_type.getRank() != rank) { - func.emitOpError() << "same shape constraints on arguments with " - "non-matching shapes: #" - << first << " and #" << same; - return signalPassFailure(); - } - - for (uint32_t i = 0; i < 2 * rank; ++i) { - // Replace uses for second arg data with first arg. - auto same_arg = func.getArgument(same_offset + 3 + i); - auto first_arg = func.getArgument(first_offset + 3 + i); - same_arg.replaceAllUsesWith(first_arg); - } - } - } -}; - -} // namespace - -std::unique_ptr> -CreatePropagateTensorFlowABIKnowledgePass(llvm::ArrayRef same_shape) { - return std::make_unique(same_shape); -} - -} // namespace transforms -} // namespace kernel_gen -} // namespace mlir diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 7322e7912a5..a75347392d6 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -212,7 +212,6 @@ tf_cuda_cc_test( # TODO(b/160731748): Re-enable when it works again. # gen_kernel_library( # name = "bias_add", -# same_shape = "0,2", # tile_size = "16x16", # types = [ # "f16", @@ -224,7 +223,6 @@ tf_cuda_cc_test( # TODO(b/160190568): Re-enable when it works again. # gen_kernel_library( # name = "relu", -# same_shape = "0,1", # tile_size = "256", # types = [ # "f16", @@ -236,7 +234,6 @@ tf_cuda_cc_test( gen_kernel_library( name = "abs", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -251,7 +248,6 @@ gen_kernel_library( gen_kernel_library( name = "ceil", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -263,7 +259,6 @@ gen_kernel_library( gen_kernel_library( name = "conj", - same_shape = "0,1", tile_size = "256", types = [ "c64", @@ -275,7 +270,6 @@ gen_kernel_library( gen_kernel_library( name = "cos", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -288,7 +282,6 @@ gen_kernel_library( gen_kernel_library( name = "exp", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -301,7 +294,6 @@ gen_kernel_library( gen_kernel_library( name = "floor", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -324,7 +316,6 @@ gen_kernel_library( gen_kernel_library( name = "invert", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "i8", @@ -338,7 +329,6 @@ gen_kernel_library( gen_kernel_library( name = "isfinite", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -351,7 +341,6 @@ gen_kernel_library( gen_kernel_library( name = "log", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -364,7 +353,6 @@ gen_kernel_library( gen_kernel_library( name = "logicalnot", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = ["i1"], unroll_factors = "4", @@ -373,7 +361,6 @@ gen_kernel_library( gen_kernel_library( name = "neg", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -396,7 +383,6 @@ gen_kernel_library( gen_kernel_library( name = "rsqrt", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -409,7 +395,6 @@ gen_kernel_library( gen_kernel_library( name = "sign", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ # TODO(b/162577610): Add bf16, c64 and c128. @@ -426,7 +411,6 @@ gen_kernel_library( name = "sin", generate_ranked = False, generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -439,7 +423,6 @@ gen_kernel_library( gen_kernel_library( name = "sqrt", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -452,7 +435,6 @@ gen_kernel_library( gen_kernel_library( name = "tanh", generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", @@ -466,7 +448,6 @@ gen_kernel_library( name = "addv2", generate_ranked = False, generate_unranked = True, - same_shape = "0,1", tile_size = "256", types = [ "f16", diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index 5b4daac8820..6240f3145a5 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -37,8 +37,6 @@ def _gen_kernel_gpu_bin_impl(ctx): name = ctx.attr.name tile_sizes = ctx.attr.tile_size.replace("x", ",") cmd_args = [] - if ctx.attr.same_shape: - cmd_args.append("--same_shape=%s" % ctx.attr.same_shape) if ctx.attr.unroll_factors: cmd_args.append("--unroll_factors=%s" % ctx.attr.unroll_factors) @@ -70,7 +68,6 @@ _gen_kernel_gpu_bin_rule = rule( attrs = { "mlir_op": attr.label(mandatory = True, allow_single_file = True), "tile_size": attr.string(mandatory = True), - "same_shape": attr.string(), "unroll_factors": attr.string(), "gpu_archs": attr.string_list(mandatory = True), "extra_args": attr.string_list(), @@ -181,13 +178,12 @@ _gen_kernel_image_hdr_rule = rule( }, ) -def _gen_kernel_image_hdr(name, mlir_op, gpu_archs, tile_size, same_shape = None, unroll_factors = None, extra_args = []): +def _gen_kernel_image_hdr(name, mlir_op, gpu_archs, tile_size, unroll_factors = None, extra_args = []): """Generates a C header with fatbin data from a Tensorflow op.""" _gen_kernel_gpu_bin_rule( name = name + "_cubin", mlir_op = mlir_op, tile_size = tile_size, - same_shape = same_shape, unroll_factors = unroll_factors, gpu_archs = gpu_archs, extra_args = extra_args, @@ -240,7 +236,7 @@ def _gen_mlir_op(name, type, unranked): unranked = unranked, ) -def gen_ranked_kernel_library(name, types, tile_size, tags = [], same_shape = None, unroll_factors = None, extra_args = []): +def gen_ranked_kernel_library(name, types, tile_size, tags = [], unroll_factors = None, extra_args = []): """ Generate a library with kernels for a specific tensorflow op. Args: @@ -249,7 +245,6 @@ def gen_ranked_kernel_library(name, types, tile_size, tags = [], same_shape = No tile_size: The tiling specification, e.g. "16x16". unroll_factors: The unrolling specification, e.g. "4,4" tags: The tags which should be added to the library. - same_shape: The information about which shapes are the same, e.g. "0,1". extra_args: Extra arguments to pass to the generator tool. """ @@ -265,7 +260,6 @@ def gen_ranked_kernel_library(name, types, tile_size, tags = [], same_shape = No mlir_op = "{name}_{type}.mlir".format(name = name, type = type), gpu_archs = rocm_gpu_architectures() if rocm_is_configured() else cuda_gpu_architectures(), tile_size = tile_size, - same_shape = same_shape, unroll_factors = unroll_factors, extra_args = extra_args, ) @@ -367,14 +361,13 @@ def gen_unranked_kernel_library(name, types, tile_size, tags = [], unroll_factor tags = tags, ) -def gen_kernel_library(name, types, tile_size, tags = [], same_shape = None, unroll_factors = None, extra_args = [], generate_ranked = True, generate_unranked = False): +def gen_kernel_library(name, types, tile_size, tags = [], unroll_factors = None, extra_args = [], generate_ranked = True, generate_unranked = False): if (generate_ranked): gen_ranked_kernel_library( name = name, types = types, tile_size = tile_size, tags = tags, - same_shape = same_shape, unroll_factors = unroll_factors, extra_args = extra_args, ) From 04b9cd834bd6fd9dc12ee3c16e2d4b9cb6e98387 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 02:14:13 -0800 Subject: [PATCH 159/220] [MLIR][KernelGen] Generate multiple kernels with the same attributes simultaneously PiperOrigin-RevId: 341789268 Change-Id: I5cf9edf74336cdcbd095da52cbf5e83d86ba5590 --- tensorflow/core/kernels/mlir_generated/BUILD | 146 ++++--------------- 1 file changed, 26 insertions(+), 120 deletions(-) diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index a75347392d6..f36fc92535f 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -245,18 +245,6 @@ gen_kernel_library( unroll_factors = "4", ) -gen_kernel_library( - name = "ceil", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - gen_kernel_library( name = "conj", tile_size = "256", @@ -267,42 +255,6 @@ gen_kernel_library( unroll_factors = "4", ) -gen_kernel_library( - name = "cos", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - -gen_kernel_library( - name = "exp", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - -gen_kernel_library( - name = "floor", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - gen_kernel_library( name = "imag", tile_size = "256", @@ -326,30 +278,6 @@ gen_kernel_library( unroll_factors = "4", ) -gen_kernel_library( - name = "isfinite", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - -gen_kernel_library( - name = "log", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - gen_kernel_library( name = "logicalnot", generate_unranked = True, @@ -358,18 +286,6 @@ gen_kernel_library( unroll_factors = "4", ) -gen_kernel_library( - name = "neg", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - gen_kernel_library( name = "real", tile_size = "256", @@ -380,18 +296,6 @@ gen_kernel_library( unroll_factors = "4", ) -gen_kernel_library( - name = "rsqrt", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - gen_kernel_library( name = "sign", generate_unranked = True, @@ -420,30 +324,6 @@ gen_kernel_library( unroll_factors = "4", ) -gen_kernel_library( - name = "sqrt", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - -gen_kernel_library( - name = "tanh", - generate_unranked = True, - tile_size = "256", - types = [ - "f16", - "f32", - "f64", - ], - unroll_factors = "4", -) - gen_kernel_library( name = "addv2", generate_ranked = False, @@ -457,3 +337,29 @@ gen_kernel_library( ], unroll_factors = "4", ) + +[ + gen_kernel_library( + name = name, + generate_unranked = True, + tile_size = "256", + types = [ + "f16", + "f32", + "f64", + ], + unroll_factors = "4", + ) + for name in [ + "ceil", + "cos", + "exp", + "floor", + "isfinite", + "log", + "neg", + "rsqrt", + "sqrt", + "tanh", + ] +] From e7a139356239c05dcbade7d118e7d22b1a81873e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 02:42:52 -0800 Subject: [PATCH 160/220] [MLIR][KernelGen] Fix buffer reuse analysis Fix the case where an argument buffer has an alias outside the block in which the new buffer is allocated and used. Also, add assertions to clarify expected properties. PiperOrigin-RevId: 341792380 Change-Id: I046c16974db8309062fb2174a5131568cdd64fc0 --- .../tools/kernel_gen/tests/buffer_reuse.mlir | 22 +++++++++++++++++++ .../transforms/buffer_reuse_pass.cc | 20 +++++++++-------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir index 33e2074d870..7c519057580 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir @@ -328,3 +328,25 @@ func @abs_unranked_i64(%arg : memref<*xi64>, : (memref, memref) -> memref<*xi64> return %result : memref<*xi64> } + +// CHECK-LABEL: @old_buffer_alias_outside_block +func @old_buffer_alias_outside_block(%arg: memref<3xf32>) + attributes {llvm.emit_c_interface, tf_entry} { + %c0 = constant 0 : index + %c1 = constant 1 : index + %true = constant true + + // Alias outside of the block with the new buffer allocation. + %alias = memref_cast %arg : memref<3xf32> to memref<3xf32> + + scf.if %true { + + // Allocation and use of new buffer. + // CHECK: reuse_input_candidates = [0 : index] + %mem = alloc() : memref<3xf32> + %use = load %mem[%c0] : memref<3xf32> + + } else { + } + return +} diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc index fd986cf573e..69e1f26919e 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc @@ -171,23 +171,24 @@ class BufferReuseAnalysis { Liveness liveness(f); BufferSizeAnalysis size_equivalences(f); f.walk([&](Block *block) { - find_reuse_candiates(*block, aliases, liveness.getLiveness(block), + find_reuse_candiates(block, aliases, liveness.getLiveness(block), size_equivalences, f.getArguments()); }); } - void find_reuse_candiates(Block &block, BufferAliasAnalysis &aliases, + void find_reuse_candiates(Block *block, BufferAliasAnalysis &aliases, const LivenessBlockInfo *liveness, BufferSizeAnalysis &size_equivalences, ArrayRef arguments) { - for (Operation &op : block) { + for (Operation &op : *block) { auto alloc_op = dyn_cast(op); if (!alloc_op) continue; // Find first use of the newly allocated buffer within this block. Value new_buffer = alloc_op.getResult(); - Operation *first_reuse = find_first_use_in_block( - new_buffer, alloc_op.getOperation()->getBlock()); + Operation *first_reuse = find_first_use_in_block(new_buffer, block); + assert((first_reuse == nullptr || first_reuse->getBlock() == block) && + "Expected first use in same block if found."); // Find reuse candidates for the regarded allocation. SmallVector local_reuse_candidates; @@ -216,9 +217,10 @@ class BufferReuseAnalysis { // i) its last use is after the point of reuse, or // ii) its last use is also its first reuse but the operation // does not allow for local reuse. - Operation *last_use = liveness->getEndOperation( - old_buffer_alias, - liveness->getStartOperation(old_buffer_alias)); + Operation *last_use = + liveness->getEndOperation(old_buffer_alias, &block->front()); + assert(last_use != nullptr && last_use->getBlock() == block && + "Expected last use in same block."); if (first_reuse->isBeforeInBlock(last_use)) { livetimes_compatible = false; break; @@ -272,7 +274,7 @@ class BufferReuseAnalysis { op->getOperands().end() && llvm::find(op->getOperands(), new_buffer) != op->getOperands().end() && - "expect `old/new_buffer` to be operand of `op`"); + "Expect `old/new_buffer` to be operand of `op`."); // If `linalg.generic` indexing maps are the same for input and output // buffer then the last use of the input buffer happens before its first From 58d8f3f1623c315e0351108fd2319c71716d19e5 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Wed, 11 Nov 2020 07:01:31 -0800 Subject: [PATCH 161/220] [NFC] Clean up some comments in dynamic dimension inference. PiperOrigin-RevId: 341822139 Change-Id: I55b1091e0d4c05712a618ea4de93a0eaabad174d --- .../xla/service/dynamic_dimension_inference.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc index f079c0bc10a..8b220e1833b 100644 --- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc +++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc @@ -1373,12 +1373,12 @@ Status DynamicDimensionInferenceVisitor::HandleScatter(HloInstruction* hlo) { } Status DynamicDimensionInferenceVisitor::HandleWhile(HloInstruction* hlo) { - // If the output of the conditional contains dynamic dimension. We send - // dynamic dimension size out by adding additional root element. A mapping - // from the root instruction's dynamic dimension index (represented by a shape - // index as output index and a int64 dimension number) to output index - // (represented by an int64) is tracked for the conditional instruction (all - // branches should have the same mapping). + // If the output of the kWhile contains dynamic dimension, we send + // dynamic dimension size into the while body by adding additional root/body + // element. A mapping from the root instruction's dynamic dimension index + // (represented by a shape index as output index and an int64 dimension + // number) to output index (represented by an int64) is tracked for the + // conditional instruction. ShapeTree> dynamic_output_mapping( hlo->shape()); std::vector operands_to_add; From b9bb7ce80bfb09fffac2db27b5f91d2f1f4a8e57 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 07:25:14 -0800 Subject: [PATCH 162/220] [MLIR][KernelGen] Fix size analysis for `index` type PiperOrigin-RevId: 341825455 Change-Id: I5616d6c3174983c4471d13ccb06e64f6ebd8dc75 --- .../tools/kernel_gen/tests/buffer_reuse.mlir | 12 +++++++++++- .../kernel_gen/transforms/buffer_reuse_pass.cc | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir index 7c519057580..b297c72af5d 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir @@ -342,7 +342,8 @@ func @old_buffer_alias_outside_block(%arg: memref<3xf32>) scf.if %true { // Allocation and use of new buffer. - // CHECK: reuse_input_candidates = [0 : index] + // CHECK: alloc + // CHECK-SAME: reuse_input_candidates = [0 : index] %mem = alloc() : memref<3xf32> %use = load %mem[%c0] : memref<3xf32> @@ -350,3 +351,12 @@ func @old_buffer_alias_outside_block(%arg: memref<3xf32>) } return } + +// CHECK-LABEL: @index_element_type +func @index_element_type(%arg : memref<2x3xindex>) -> memref<2x3xindex> + attributes {tf_entry} { + // CHECK: alloc + // CHECK-SAME: reuse_input_candidates = [0 : index] + %result = alloc() : memref<2x3xindex> + return %result : memref<2x3xindex> +} diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc index 69e1f26919e..a184b80ea44 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc @@ -63,7 +63,8 @@ class BufferSizeAnalysis { void build(FuncOp &f) { auto buffers = find_buffer_values(f); - // Memrefs with statically known same shape must be of the same size. + // Memrefs with statically known same shape and same symbol-free affine maps + // must be of the same size. int n = buffers.size(); for (int i = 0; i < n; ++i) { for (int j = i + 1; j < n; ++j) { @@ -72,8 +73,10 @@ class BufferSizeAnalysis { auto a_ty = a.getType().dyn_cast(); auto b_ty = b.getType().dyn_cast(); if (a_ty && b_ty && a_ty.hasStaticShape() && b_ty.hasStaticShape() && - a_ty.getSizeInBits() == b_ty.getSizeInBits() && - a_ty.getAffineMaps() == b_ty.getAffineMaps()) { + a_ty.getNumElements() == b_ty.getNumElements() && + a_ty.getElementType() == b_ty.getElementType() && + affine_maps_symbol_free_and_equal(a_ty.getAffineMaps(), + b_ty.getAffineMaps())) { ecs_.unionSets(a, b); } } @@ -105,6 +108,15 @@ class BufferSizeAnalysis { }); } + bool affine_maps_symbol_free_and_equal(ArrayRef as, + ArrayRef bs) { + auto is_symbol_free = [](AffineMap map) { + return map.getNumSymbols() == 0; + }; + return llvm::all_of(as, is_symbol_free) && + llvm::all_of(bs, is_symbol_free) && as == bs; + } + llvm::SmallVector find_buffer_values(FuncOp f) { llvm::SmallVector buffers; f.walk([&](Operation *op) { From a839007bfdcc3f91583fd26f48fc9f3c77c81ffe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 08:11:42 -0800 Subject: [PATCH 163/220] [MLIR][KernelGen] Add buffer reuse analysis pass to kernel creator PiperOrigin-RevId: 341832926 Change-Id: Ib69e74c99c7748c53d97b08394d17cd9a93b40f8 --- tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc index 528b1cd68ee..0207f3c8188 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc @@ -152,6 +152,11 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, // Some basic cleanup. pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass()); pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); + if (!gpu_binary_only) { + // Find candidates for buffer reuse. + pm.addNestedPass( + mlir::kernel_gen::transforms::CreateBufferReusePass()); + } // Greedily map the remaining loop to GPU hardware dimensions. pm.addNestedPass<::mlir::FuncOp>(xla::mlir_gpu::createMapParallelLoopsPass()); // Apply the mapping. From e0d555fd3270570b8e7e143eba6017f7ff9ba987 Mon Sep 17 00:00:00 2001 From: Marissa Ikonomidis Date: Wed, 11 Nov 2020 09:28:24 -0800 Subject: [PATCH 164/220] Update metrics to better support tensors that are not fully defined If a tensor is not fully defined, broadcast_weights calls DenseToDenseSetOperation which isn't supported by TPUs. The DenseToDenseSetOperation is called during an assert before the weights are broadcasted to check if the values and the weights have valid dimensions for broadcasting. TPUs do not support asserts so executing the assert when on a TPU just takes time without having any beneficial effect. Instead, on TPUs / GPUs, skip the assert. PiperOrigin-RevId: 341845786 Change-Id: Ibcacc025df5a35fe1a3ad4357547623832bc7f55 --- .../custom_training_loop_metrics_test.py | 25 +++++++++++++++++++ .../python/ops/weights_broadcast_ops.py | 7 ++++++ 2 files changed, 32 insertions(+) diff --git a/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py b/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py index 08a1b7a0e1a..00212416f86 100644 --- a/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py +++ b/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py @@ -100,6 +100,31 @@ class KerasMetricsTest(test.TestCase, parameterized.TestCase): # of 10 resulting in mean of 4.5. self.assertEqual(metric.result().numpy(), 4.5) + @ds_combinations.generate( + combinations.combine( + distribution=strategy_combinations.all_strategies, mode=["eager"])) + def test_update_keras_metrics_dynamic_shape(self, distribution): + with distribution.scope(): + metric = metrics.Mean("test_metric", dtype=np.float32) + + dataset = dataset_ops.Dataset.range(10).batch(2, drop_remainder=False) + + @def_function.function + def train_fn(dataset): + weights = constant_op.constant([0.1, 0.1]) + + def step_fn(i): + metric.update_state(i, weights) + + for i in dataset: + distribution.run(step_fn, args=(i,)) + + train_fn(dataset) + + # This should be the mean of integers 0-9 which has a sum of 45 and a count + # of 10 resulting in mean of 4.5. + self.assertEqual(metric.result().numpy(), 4.5) + if __name__ == "__main__": multi_process_runner.test_main() diff --git a/tensorflow/python/ops/weights_broadcast_ops.py b/tensorflow/python/ops/weights_broadcast_ops.py index bfaa7e08fe9..01dc7d784e1 100644 --- a/tensorflow/python/ops/weights_broadcast_ops.py +++ b/tensorflow/python/ops/weights_broadcast_ops.py @@ -166,6 +166,13 @@ def broadcast_weights(weights, values): weights_shape.is_compatible_with(values_shape)): return weights + # Skip the assert_broadcastable on TPU/GPU because asserts are not + # supported so it only causes unnecessary ops. Also skip it because it uses + # a DenseToDenseSetOperation op that is incompatible with the TPU/GPU when + # the shape(s) are dynamic. + if control_flow_ops.get_enclosing_xla_context() is not None: + return math_ops.multiply( + weights, array_ops.ones_like(values), name=scope) with ops.control_dependencies((assert_broadcastable(weights, values),)): return math_ops.multiply( weights, array_ops.ones_like(values), name=scope) From 6ff564605f8cc510444cdbcb4579d9a43d3e480f Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 11 Nov 2020 09:29:48 -0800 Subject: [PATCH 165/220] [MLIR:XLA] Fix gpu_ops.mlir test to do an actual FileCheck. - Also fix some of the incorrect checks. PiperOrigin-RevId: 341846075 Change-Id: Id70d43e4da9485b6f944d1b84fdd3c4fd9127f62 --- .../mlir/xla/tests/hlo_to_lhlo_with_xla/gpu_ops.mlir | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/gpu_ops.mlir b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/gpu_ops.mlir index 83c156554cd..214b45d0180 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/gpu_ops.mlir +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/gpu_ops.mlir @@ -1,14 +1,13 @@ -// RUN: xla-opt -split-input-file "-xla-hlo-to-lhlo-with-xla=platform=CUDA" %s -//// | FILECHECK_OPTS="" FileCheck --enable-var-scope %s +// RUN: xla-opt -split-input-file "-xla-hlo-to-lhlo-with-xla=platform=CUDA" %s | FileCheck %s // CHECK-LABEL: func @main // CHECK-SAME: %[[ARG0:.*]]: memref<3x3xi32> // CHECK-SAME: %[[ARG1:.*]]: memref<2xi32> // CHECK-SAME: %[[ARG2:.*]]: memref<2x3xi32> // CHECK-SAME: %[[ARG3:.*]]: memref<36xi8> {lmhlo.alloc = 0 -// CHECK: %[[VIEW0:.*]] = std.view %[[ARG3]]{{.*}} : memref<36xi8> to memref3x3xi32> +// CHECK: %[[VIEW0:.*]] = std.view %[[ARG3]]{{.*}} : memref<36xi8> to memref<3x3xi32> // CHECK: "lmhlo.copy"(%[[ARG0]], %[[VIEW0]]) -// CHECK: %[[VIEW1:.*]] = std.view %[[ARG3]]{{.*}} : memref<100xi8> to memref<5x5xf32> +// CHECK: %[[VIEW1:.*]] = std.view %[[ARG3]]{{.*}} : memref<36xi8> to memref<3x3xi32> // CHECK: "lmhlo.scatter"(%[[VIEW0]], %[[ARG1]], %[[ARG2]], %[[VIEW1]]) // CHECK: mhlo.add // CHECK: indices_are_sorted = false @@ -17,6 +16,7 @@ // CHECK: scatter_dims_to_operand_dims = dense<0> : tensor<1xi64> // CHECK: update_window_dims = dense<1> : tensor<1xi64> // CHECK: unique_indices = false +// CHECK: (memref<3x3xi32>, memref<2xi32>, memref<2x3xi32>, memref<3x3xi32>) -> () func @main(%operand:tensor<3x3xi32>, %indices: tensor<2xi32>, %updates: tensor<2x3xi32>) -> tensor<3x3xi32> { %result = "mhlo.scatter"(%operand, %indices, %updates) ( { ^bb0(%x: tensor, %y : tensor): From cc39a32e8671e4a85f7aae6ded0bb091d4e648e4 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 11 Nov 2020 10:00:22 -0800 Subject: [PATCH 166/220] Extract some duplicated code into a helper function. - Extract code to create result memref's into a ConvertResults function. - Also fix a bug when using reifyReturnTypes: use correct index for result_shape instead of always using the first element. PiperOrigin-RevId: 341852227 Change-Id: I2a9bc77645e9ee6b94b8da438fa2ca137b8ab444 --- .../mhlo/transforms/hlo_legalize_to_lhlo.cc | 71 ++++++++----------- 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc index 6710d371cfd..24f12d41b39 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc @@ -87,6 +87,32 @@ Value InsertAlloc(Location loc, OpResult result, return alloc; } +/// Converts the results of the operation `op` to memref types and append them +/// to the `results` vector. +LogicalResult ConvertResults(Operation* op, SmallVectorImpl& results, + ConversionPatternRewriter& rewriter) { + for (auto result : llvm::enumerate(op->getResults())) { + RankedTensorType resultType = + result.value().getType().dyn_cast(); + if (!resultType) return failure(); + + if (resultType.hasStaticShape()) { + results.push_back(InsertAlloc(op->getLoc(), result.value(), &rewriter)); + continue; + } + auto shape_type_op = dyn_cast(op); + if (!shape_type_op) return failure(); + + SmallVector results_shape; + auto status = shape_type_op.reifyReturnTypeShapes(rewriter, results_shape); + if (failed(status)) return failure(); + results.push_back( + InsertDynamicAllocAndDealloc(op->getLoc(), result.value(), + results_shape[result.index()], &rewriter)); + } + return success(); +} + template class HloToLhloOpConverter : public BaseOpConversion { public: @@ -95,29 +121,8 @@ class HloToLhloOpConverter : public BaseOpConversion { HloOpTy hloOp, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { Operation* op = hloOp.getOperation(); - const auto& original_results = op->getResults(); SmallVector buffer_args(operands.begin(), operands.end()); - for (auto result : llvm::enumerate(original_results)) { - RankedTensorType resultType = - result.value().getType().dyn_cast(); - if (!resultType) { - return failure(); - } - if (resultType.hasStaticShape()) { - buffer_args.push_back( - InsertAlloc(op->getLoc(), result.value(), &rewriter)); - } else { - auto shape_type_op = dyn_cast(op); - if (!shape_type_op) return failure(); - - SmallVector results_shape; - auto status = - shape_type_op.reifyReturnTypeShapes(rewriter, results_shape); - if (failed(status)) return failure(); - buffer_args.push_back(InsertDynamicAllocAndDealloc( - op->getLoc(), result.value(), results_shape.front(), &rewriter)); - } - } + if (failed(ConvertResults(op, buffer_args, rewriter))) return failure(); rewriter.create>(op->getLoc(), llvm::None, buffer_args, op->getAttrs()); rewriter.replaceOp( @@ -139,28 +144,8 @@ class HloToLhloOpConverter : public BaseOpConversion { mhlo::DotOp hloOp, ArrayRef operands, ConversionPatternRewriter& rewriter) const final { Operation* op = hloOp.getOperation(); - const auto& original_results = op->getResults(); SmallVector buffer_args(operands.begin(), operands.end()); - for (auto result : llvm::enumerate(original_results)) { - RankedTensorType resultType = - result.value().getType().dyn_cast(); - if (!resultType) { - return failure(); - } - if (resultType.hasStaticShape()) { - buffer_args.push_back( - InsertAlloc(op->getLoc(), result.value(), &rewriter)); - } else { - SmallVector results_shape; - auto shape_type_op = dyn_cast(op); - if (!shape_type_op) return failure(); - if (failed( - shape_type_op.reifyReturnTypeShapes(rewriter, results_shape))) - return failure(); - buffer_args.push_back(InsertDynamicAllocAndDealloc( - op->getLoc(), result.value(), results_shape.front(), &rewriter)); - } - } + if (failed(ConvertResults(op, buffer_args, rewriter))) return failure(); // TODO(silvasean): Move this helper to MLIR core. auto make_elements_attr = [&rewriter](ArrayRef integers) { From 82f67034209111fb94cb7086199cb5c18ff98948 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 11 Nov 2020 10:15:10 -0800 Subject: [PATCH 167/220] Port changes from 2.4 branch w.r.t project renaming. PiperOrigin-RevId: 341855619 Change-Id: I513906ce05e672c779ee6ad5603d33fcaac3b6a6 --- .../ci_build/nightly_release/macos/cpu_py35.sh | 2 +- .../ci_build/nightly_release/macos/cpu_py36.sh | 2 +- .../ci_build/nightly_release/macos/cpu_py37.sh | 2 +- .../ci_build/nightly_release/macos/cpu_py38.sh | 2 +- .../windows/upload_nightly_pip.sh | 2 +- .../tools/ci_build/rel/ubuntu/gpu_py36_pip.sh | 2 +- .../tools/ci_build/rel/ubuntu/gpu_py37_pip.sh | 2 +- .../tools/ci_build/rel/ubuntu/gpu_py38_pip.sh | 2 +- .../tools/ci_build/rel/windows/gpu_py36.bat | 3 +-- .../tools/ci_build/rel/windows/gpu_py37.bat | 3 +-- .../tools/ci_build/rel/windows/gpu_py38.bat | 3 +-- tensorflow/tools/ci_build/release/common.sh | 16 ++++++++++------ .../windows/cpu_py36_full/release_pip_rename.sh | 2 +- .../windows/cpu_py37_full/release_pip_rename.sh | 2 +- .../windows/cpu_py38_full/release_pip_rename.sh | 2 +- .../windows/gpu_py36_full/release_pip_rename.sh | 2 +- .../windows/gpu_py37_full/release_pip_rename.sh | 2 +- .../windows/gpu_py38_full/release_pip_rename.sh | 2 +- 18 files changed, 27 insertions(+), 26 deletions(-) diff --git a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py35.sh b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py35.sh index 7da3b0ea9be..fed8a7972e5 100644 --- a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py35.sh +++ b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py35.sh @@ -46,7 +46,7 @@ mkdir pip_pkg # Copy and rename to tf_nightly for f in $(ls pip_pkg/tf_nightly_cpu-*dev*macosx*.whl); do - copy_to_new_project_name "${f}" tf_nightly + copy_to_new_project_name "${f}" tf_nightly python done # Upload the built packages to pypi. diff --git a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py36.sh b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py36.sh index 33e1491dd86..db07014668f 100644 --- a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py36.sh +++ b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py36.sh @@ -44,7 +44,7 @@ mkdir pip_pkg # Copy and rename to tf_nightly for f in $(ls pip_pkg/tf_nightly_cpu-*dev*macosx*.whl); do - copy_to_new_project_name "${f}" tf_nightly + copy_to_new_project_name "${f}" tf_nightly python done # Upload the built packages to pypi. diff --git a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py37.sh b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py37.sh index 631aea318bd..71b6ec4a47b 100644 --- a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py37.sh +++ b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py37.sh @@ -44,7 +44,7 @@ mkdir pip_pkg # Copy and rename to tf_nightly for f in $(ls pip_pkg/tf_nightly_cpu-*dev*macosx*.whl); do - copy_to_new_project_name "${f}" tf_nightly + copy_to_new_project_name "${f}" tf_nightly python done # Upload the built packages to pypi. diff --git a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py38.sh b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py38.sh index 5ffef89188c..32349a45c3e 100644 --- a/tensorflow/tools/ci_build/nightly_release/macos/cpu_py38.sh +++ b/tensorflow/tools/ci_build/nightly_release/macos/cpu_py38.sh @@ -44,7 +44,7 @@ mkdir pip_pkg # Copy and rename to tf_nightly for f in $(ls pip_pkg/tf_nightly_cpu-*dev*macosx*.whl); do - copy_to_new_project_name "${f}" tf_nightly + copy_to_new_project_name "${f}" tf_nightly python done # Upload the built packages to pypi. diff --git a/tensorflow/tools/ci_build/nightly_release/windows/upload_nightly_pip.sh b/tensorflow/tools/ci_build/nightly_release/windows/upload_nightly_pip.sh index 31d21c46816..43cf43180c1 100644 --- a/tensorflow/tools/ci_build/nightly_release/windows/upload_nightly_pip.sh +++ b/tensorflow/tools/ci_build/nightly_release/windows/upload_nightly_pip.sh @@ -22,7 +22,7 @@ sudo pip install --upgrade twine # Copy and rename to tf_nightly for f in $(ls "${KOKORO_GFILE_DIR}"/tf_nightly_gpu*dev*cp3*-cp3*-win_amd64.whl); do - copy_to_new_project_name "${f}" tf_nightly + copy_to_new_project_name "${f}" tf_nightly /c/Python36/python done # Upload the built packages to pypi. diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh index fac34ce0244..1432b1e01bf 100644 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh @@ -46,7 +46,7 @@ export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filt export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME="tensorflow_gpu" +export TF_PROJECT_NAME="tensorflow" # single pip package! export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh index d842cfde829..dd11a262294 100644 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh @@ -46,7 +46,7 @@ export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filt export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME="tensorflow_gpu" +export TF_PROJECT_NAME="tensorflow" # single pip package! export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh index 59312a20b42..6064933e9c7 100644 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh @@ -46,7 +46,7 @@ export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filt export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME="tensorflow_gpu" +export TF_PROJECT_NAME="tensorflow" # single pip package! export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat index 6737261ce69..3d16ff1e5a6 100644 --- a/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat +++ b/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat @@ -21,6 +21,5 @@ if "%IS_NIGHTLY%" == "1" ( call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" ) else ( call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" - for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" - bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh + bash -l tensorflow\tools\ci_build\release\windows\gpu_py36_full\release_pip_rename.sh ) diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat index 7ecfd83927f..2b7a3e72750 100644 --- a/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat +++ b/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat @@ -21,6 +21,5 @@ if "%IS_NIGHTLY%" == "1" ( call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" ) else ( call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" - for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" - bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh + bash -l tensorflow\tools\ci_build\release\windows\gpu_py37_full\release_pip_rename.sh ) diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat index 8d152b10a2e..15f7495b9c1 100644 --- a/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat +++ b/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat @@ -21,6 +21,5 @@ if "%IS_NIGHTLY%" == "1" ( call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" ) else ( call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" - for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" - bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh + bash -l tensorflow\tools\ci_build\release\windows\gpu_py38_full\release_pip_rename.sh ) diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh index 7e837596350..78789319c6c 100644 --- a/tensorflow/tools/ci_build/release/common.sh +++ b/tensorflow/tools/ci_build/release/common.sh @@ -223,6 +223,7 @@ function maybe_skip_v1 { function copy_to_new_project_name { WHL_PATH="$1" NEW_PROJECT_NAME="$2" + PYTHON_CMD="$3" ORIGINAL_WHL_NAME=$(basename "${WHL_PATH}") ORIGINAL_WHL_DIR=$(realpath "$(dirname "${WHL_PATH}")") @@ -231,13 +232,14 @@ function copy_to_new_project_name { NEW_WHL_NAME="${NEW_PROJECT_NAME}-${FULL_TAG}" VERSION="$(echo "${FULL_TAG}" | cut -d '-' -f 1)" - TMP_DIR="$(mktemp -d)" - wheel unpack "${WHL_PATH}" -d "${TMP_DIR}" - TMP_UNPACKED_DIR="$(ls -d "${TMP_DIR}"/* | head -n 1)" - pushd "${TMP_UNPACKED_DIR}" - ORIGINAL_WHL_DIR_PREFIX="${ORIGINAL_PROJECT_NAME}-${VERSION}" NEW_WHL_DIR_PREFIX="${NEW_PROJECT_NAME}-${VERSION}" + + TMP_DIR="$(mktemp -d)" + ${PYTHON_CMD} -m wheel unpack "${WHL_PATH}" + mv "${ORIGINAL_WHL_DIR_PREFIX}" "${TMP_DIR}" + pushd "${TMP_DIR}/${ORIGINAL_WHL_DIR_PREFIX}" + mv "${ORIGINAL_WHL_DIR_PREFIX}.dist-info" "${NEW_WHL_DIR_PREFIX}.dist-info" if [[ -d "${ORIGINAL_WHL_DIR_PREFIX}.data" ]]; then mv "${ORIGINAL_WHL_DIR_PREFIX}.data" "${NEW_WHL_DIR_PREFIX}.data" @@ -247,7 +249,9 @@ function copy_to_new_project_name { NEW_PROJECT_NAME_DASH="${NEW_PROJECT_NAME//_/-}" sed -i.bak "s/${ORIGINAL_PROJECT_NAME_DASH}/${NEW_PROJECT_NAME_DASH}/g" "${NEW_WHL_DIR_PREFIX}.dist-info/METADATA" - wheel pack "${TMP_UNPACKED_DIR}" -d "${ORIGINAL_WHL_DIR}" + ${PYTHON_CMD} -m wheel pack . + mv *.whl "${ORIGINAL_WHL_DIR}" + popd rm -rf "${TMP_DIR}" } diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release_pip_rename.sh index 43982623109..03b6f0f4d91 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release_pip_rename.sh +++ b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release_pip_rename.sh @@ -20,6 +20,6 @@ source tensorflow/tools/ci_build/release/common.sh # Rename to tensorflow_cpu for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do - copy_to_new_project_name "${f}" tensorflow_cpu + copy_to_new_project_name "${f}" tensorflow_cpu /c/Python36/python rm "${f}" done diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release_pip_rename.sh index 43982623109..26ba1192cbe 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release_pip_rename.sh +++ b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release_pip_rename.sh @@ -20,6 +20,6 @@ source tensorflow/tools/ci_build/release/common.sh # Rename to tensorflow_cpu for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do - copy_to_new_project_name "${f}" tensorflow_cpu + copy_to_new_project_name "${f}" tensorflow_cpu /c/Python37/python rm "${f}" done diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release_pip_rename.sh index 43982623109..b3a0e7aae5f 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release_pip_rename.sh +++ b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release_pip_rename.sh @@ -20,6 +20,6 @@ source tensorflow/tools/ci_build/release/common.sh # Rename to tensorflow_cpu for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do - copy_to_new_project_name "${f}" tensorflow_cpu + copy_to_new_project_name "${f}" tensorflow_cpu /c/Python38/python rm "${f}" done diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh index 039f9516d86..26a06331fbc 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh +++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh @@ -20,5 +20,5 @@ source tensorflow/tools/ci_build/release/common.sh # Copy and rename to tensorflow for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do - copy_to_new_project_name "${f}" tensorflow_gpu + copy_to_new_project_name "${f}" tensorflow_gpu /c/Python36/python done diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh index 039f9516d86..2ef8119d38f 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh +++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh @@ -20,5 +20,5 @@ source tensorflow/tools/ci_build/release/common.sh # Copy and rename to tensorflow for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do - copy_to_new_project_name "${f}" tensorflow_gpu + copy_to_new_project_name "${f}" tensorflow_gpu /c/Python37/python done diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release_pip_rename.sh index 11744ea734d..f16ab338f2d 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release_pip_rename.sh +++ b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release_pip_rename.sh @@ -20,5 +20,5 @@ source tensorflow/tools/ci_build/release/common.sh # Copy and rename to tensorflow for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*-win_amd64.whl); do - copy_to_new_project_name "${f}" tensorflow_gpu + copy_to_new_project_name "${f}" tensorflow_gpu /c/Python38/python done From 2de245e6eeb292083d8c43f85fac74957d8c24b4 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 11 Nov 2020 10:18:24 -0800 Subject: [PATCH 168/220] [XLA:SPMD] Make sharding merging more general for non-default device order PiperOrigin-RevId: 341856298 Change-Id: Icc5ca7c5209df3f2fd59d8fdbe6da241e2e38b59 --- .../xla/service/sharding_propagation.cc | 21 +++++++++++++++---- .../xla/service/sharding_propagation_test.cc | 19 +++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/sharding_propagation.cc b/tensorflow/compiler/xla/service/sharding_propagation.cc index 94d97e0de19..b67d671f377 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation.cc @@ -206,13 +206,26 @@ bool MergeSharding(const HloSharding& old, HloSharding* to_merge, int64 old_group_id = get_group_index(old_index, old); int64 new_group_id = get_group_index(new_index, *to_merge); if (old_group_members[old_group_id].empty() || - new_group_members[new_group_id].empty() || - *old_group_members[old_group_id].begin() != - *new_group_members[new_group_id].begin()) { + new_group_members[new_group_id].empty()) { compatible = false; return; } - *device = *old_group_members[old_group_id].begin(); + + int64 smallest_old = *old_group_members[old_group_id].begin(); + int64 smallest_new = *new_group_members[new_group_id].begin(); + if (smallest_old < smallest_new) { + if (old_group_members[old_group_id].count(smallest_new) == 0) { + compatible = false; + return; + } + *device = smallest_new; + } else { + if (new_group_members[new_group_id].count(smallest_old) == 0) { + compatible = false; + return; + } + *device = smallest_old; + } old_group_members[old_group_id].erase(*device); new_group_members[new_group_id].erase(*device); }); diff --git a/tensorflow/compiler/xla/service/sharding_propagation_test.cc b/tensorflow/compiler/xla/service/sharding_propagation_test.cc index 8c4d8fc24ff..ec83f99db32 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation_test.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation_test.cc @@ -1222,6 +1222,25 @@ ENTRY %conv { "{devices=[2,2,1,2]0,1,2,3,4,5,6,7 last_tile_dim_replicate}")); } +TEST_F(ShardingPropagationTest, DotMergeOperands3) { + const char* const hlo_string = R"( +HloModule module +ENTRY %conv { + %p0 = f32[256,512] parameter(0), sharding={devices=[2,4]0,1,2,3,4,5,6,7} + %p1 = f32[128,512] parameter(1), sharding={devices=[4,2]0,4,2,6,3,7,1,5} + %dot = f32[256,128] dot(%p0, %p1), + lhs_contracting_dims={1}, rhs_contracting_dims={1} + ROOT %copy = f32[256,128] copy(%dot) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "dot"), + op::Sharding("{devices=[2,4]0,2,3,1,4,6,7,5}")); +} + TEST_F(ShardingPropagationTest, BackwardDotFromContracting) { const char* const hlo_string = R"( HloModule module From d75a0910ec1853c4dbb962270a6c238a35cff32d Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 11 Nov 2020 10:41:08 -0800 Subject: [PATCH 169/220] [XLA:GPU] Add test for SliceToDynamic IR emission. - Also fixed IsBlock0Thread0 to split out nested CreateICmpEQ calls out to prevent compiler dependent change in order of the instructions generated. PiperOrigin-RevId: 341860846 Change-Id: Ic65ee73d3b916e925e9745abcfb21186909feed4 --- .../xla/service/gpu/ir_emission_utils.cc | 15 ++-- .../service/gpu/tests/slice_to_dynamic.hlo | 88 +++++++++++++++++++ 2 files changed, 96 insertions(+), 7 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index 7743d19497d..53474dcdc66 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -486,13 +486,14 @@ string CudnnConvKindToString(CudnnConvKind kind) { } llvm::Value* IsBlock0Thread0(llvm::IRBuilder<>* b) { - return b->CreateAnd( - b->CreateICmpEQ( - b->getInt32(0), - EmitCallToTargetIntrinsic(TargetIntrinsicID::kThreadIdx, {}, {}, b)), - b->CreateICmpEQ( - b->getInt32(0), - EmitCallToTargetIntrinsic(TargetIntrinsicID::kBlockIdx, {}, {}, b))); + llvm::Value* is_thread0 = b->CreateICmpEQ( + b->getInt32(0), + EmitCallToTargetIntrinsic(TargetIntrinsicID::kThreadIdx, {}, {}, b)); + + llvm::Value* is_block0 = b->CreateICmpEQ( + b->getInt32(0), + EmitCallToTargetIntrinsic(TargetIntrinsicID::kBlockIdx, {}, {}, b)); + return b->CreateAnd(is_thread0, is_block0); } bool AreFusedReductionOutputsConsistent( diff --git a/tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo b/tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo new file mode 100644 index 00000000000..1f4f2602094 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo @@ -0,0 +1,88 @@ +// RUN: hlo_to_llvm_ir %s | FileCheck %s + +// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +// CHECK-LABEL: entry: +// CHECK: %[[VAL_0:.*]] = getelementptr inbounds i8, i8* %[[VAL_1:.*]], i64 0 +// CHECK: %[[VAL_2:.*]] = bitcast i8* %[[VAL_0]] to [2 x [2 x [2 x i32]]]* +// CHECK: %[[VAL_3:.*]] = getelementptr inbounds i8, i8* %[[VAL_4:.*]], i64 0 +// CHECK: %[[VAL_5:.*]] = bitcast i8* %[[VAL_3]] to [2 x [2 x [2 x i32]]]* +// CHECK: %[[VAL_6:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_2]] to i8* +// CHECK: %[[VAL_7:.*]] = load i32, i32* bitcast ([4 x i8]* @buffer_for_static to i32*), align 4 +// CHECK: %[[VAL_8:.*]] = load i32, i32* bitcast ([4 x i8]* @buffer_for_dynamic to i32*), align 4 +// CHECK: %[[VAL_9:.*]] = load i32, i32* bitcast ([4 x i8]* @buffer_for_static to i32*), align 4 +// CHECK: %[[VAL_10:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK: %[[VAL_11:.*]] = icmp eq i32 0, %[[VAL_10]] +// CHECK: %[[VAL_12:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +// CHECK: %[[VAL_13:.*]] = icmp eq i32 0, %[[VAL_12]] +// CHECK: %[[VAL_14:.*]] = and i1 %[[VAL_11]], %[[VAL_13]] +// CHECK: br i1 %[[VAL_14]], label %[[VAL_15:.*]], label %[[VAL_16:.*]] +// CHECK: is_thred_0-after: ; preds = %[[VAL_15]], %[[VAL_17:.*]] +// CHECK: %[[VAL_18:.*]] = mul i32 1, %[[VAL_7]] +// CHECK: %[[VAL_19:.*]] = mul i32 %[[VAL_18]], %[[VAL_8]] +// CHECK: %[[VAL_20:.*]] = mul i32 %[[VAL_19]], %[[VAL_9]] +// CHECK: %[[VAL_21:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !2 +// CHECK: %[[VAL_22:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !3 +// CHECK: %[[VAL_23:.*]] = mul nuw nsw i32 %[[VAL_21]], 8 +// CHECK: %[[VAL_24:.*]] = add nuw nsw i32 %[[VAL_23]], %[[VAL_22]] +// CHECK: %[[VAL_25:.*]] = icmp ult i32 %[[VAL_24]], 8 +// CHECK: call void @llvm.assume(i1 %[[VAL_25]]) +// CHECK: %[[VAL_26:.*]] = udiv i32 %[[VAL_24]], 1 +// CHECK: %[[VAL_27:.*]] = urem i32 %[[VAL_26]], 2 +// CHECK: %[[VAL_28:.*]] = udiv i32 %[[VAL_24]], 2 +// CHECK: %[[VAL_29:.*]] = urem i32 %[[VAL_28]], 2 +// CHECK: %[[VAL_30:.*]] = udiv i32 %[[VAL_24]], 4 +// CHECK: %[[VAL_31:.*]] = icmp ult i32 %[[VAL_24]], 8 +// CHECK: br i1 %[[VAL_31]], label %[[VAL_32:.*]], label %[[VAL_33:.*]] +// CHECK: custom-call.in_bounds-after: ; preds = %[[VAL_34:.*]], %[[VAL_16]] +// CHECK: ret void +// CHECK: is_thred_0-true: ; preds = %[[VAL_17]] +// CHECK: %[[VAL_35:.*]] = getelementptr inbounds i8, i8* %[[VAL_6]], i32 32 +// CHECK: %[[VAL_36:.*]] = bitcast i8* %[[VAL_35]] to i32* +// CHECK: store i32 %[[VAL_7]], i32* %[[VAL_36]], align 4 +// CHECK: %[[VAL_37:.*]] = getelementptr inbounds i8, i8* %[[VAL_6]], i32 36 +// CHECK: %[[VAL_38:.*]] = bitcast i8* %[[VAL_37]] to i32* +// CHECK: store i32 %[[VAL_8]], i32* %[[VAL_38]], align 4 +// CHECK: %[[VAL_39:.*]] = getelementptr inbounds i8, i8* %[[VAL_6]], i32 40 +// CHECK: %[[VAL_40:.*]] = bitcast i8* %[[VAL_39]] to i32* +// CHECK: store i32 %[[VAL_9]], i32* %[[VAL_40]], align 4 +// CHECK: br label %[[VAL_16]] +// CHECK: custom-call.in_bounds-true: ; preds = %[[VAL_16]] +// CHECK: %[[VAL_41:.*]] = mul nuw nsw i32 %[[VAL_27]], 1 +// CHECK: %[[VAL_42:.*]] = add nuw nsw i32 0, %[[VAL_41]] +// CHECK: %[[VAL_43:.*]] = mul nuw nsw i32 %[[VAL_30]], 2 +// CHECK: %[[VAL_44:.*]] = add nuw nsw i32 %[[VAL_42]], %[[VAL_43]] +// CHECK: %[[VAL_45:.*]] = mul nuw nsw i32 %[[VAL_29]], 4 +// CHECK: %[[VAL_46:.*]] = add nuw nsw i32 %[[VAL_44]], %[[VAL_45]] +// CHECK: %[[VAL_47:.*]] = icmp ult i32 %[[VAL_46]], %[[VAL_20]] +// CHECK: br i1 %[[VAL_47]], label %[[VAL_48:.*]], label %[[VAL_34]] +// CHECK: custom-call.in_dyn_bounds-after: ; preds = %[[VAL_48]], %[[VAL_32]] +// CHECK: br label %[[VAL_33]] +// CHECK: custom-call.in_dyn_bounds-true: ; preds = %[[VAL_32]] +// CHECK: %[[VAL_49:.*]] = udiv i32 %[[VAL_46]], 1 +// CHECK: %[[VAL_50:.*]] = urem i32 %[[VAL_49]], %[[VAL_9]] +// CHECK: %[[VAL_51:.*]] = mul i32 1, %[[VAL_9]] +// CHECK: %[[VAL_52:.*]] = udiv i32 %[[VAL_46]], %[[VAL_51]] +// CHECK: %[[VAL_53:.*]] = urem i32 %[[VAL_52]], %[[VAL_7]] +// CHECK: %[[VAL_54:.*]] = mul i32 %[[VAL_51]], %[[VAL_7]] +// CHECK: %[[VAL_55:.*]] = udiv i32 %[[VAL_46]], %[[VAL_54]] +// CHECK: %[[VAL_56:.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], [2 x [2 x [2 x i32]]]* %[[VAL_5]], i32 0, i32 %[[VAL_55]], i32 %[[VAL_53]], i32 %[[VAL_50]] +// CHECK: %[[VAL_57:.*]] = load i32, i32* %[[VAL_56]], align 4, !invariant.load !4 +// CHECK: %[[VAL_58:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_2]] to i32* +// CHECK: %[[VAL_59:.*]] = getelementptr inbounds i32, i32* %[[VAL_58]], i32 %[[VAL_24]] +// CHECK: store i32 %[[VAL_57]], i32* %[[VAL_59]], align 4 +// CHECK: br label %[[VAL_34]] + +HloModule SliceToDynamic + +ENTRY main { + %param = s32[2,2,2]{2,0,1} parameter(0) + %static = s32[] constant(2) + %dynamic = s32[] constant(1) + ROOT %custom-call = s32[2,<=2, 2]{2,0,1} custom-call(s32[2,2,2]{2,0,1} %param, + s32[] %static, + s32[] %dynamic, + s32[] %static), + custom_call_target="SliceToDynamic", + backend_config="" +} From 094af07edadefbc81e329d86f476b5d68ef6158d Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 11 Nov 2020 10:55:55 -0800 Subject: [PATCH 170/220] Disable flaky keras_dnn_correctness_test PiperOrigin-RevId: 341864339 Change-Id: I59e35f11b1ad84eac4f59b26f3620762651d650d --- tensorflow/python/keras/distribute/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index d0236226aae..9f340a4b7d6 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -436,6 +436,7 @@ distribute_py_test( shard_count = 19, tags = [ "multi_and_single_gpu", + "no_oss", # TODO(b/173021094) "no_rocm", # times out on ROCm "no_windows_gpu", "nogpu", # TODO(b/170905292) From 28835e41034507e754eab6da32ebe81638957670 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 11:09:17 -0800 Subject: [PATCH 171/220] Integrate LLVM at llvm/llvm-project@1cbf8e89b54d Updates LLVM usage to match [1cbf8e89b54d](https://github.com/llvm/llvm-project/commit/1cbf8e89b54d) PiperOrigin-RevId: 341867974 Change-Id: Iaf70623266ce86b87a31aa6bfbe65f2659acb5ea --- .../mlir/lite/tests/end2end/if_op.pbtxt | 8 +- .../unranked_function_output.mlir | 4 +- .../compiler/mlir/lite/tests/fuse-tftext.mlir | 48 ++--- .../mlir/lite/tests/tfl_while_outline.mlir | 20 +- .../tensorflow/tests/cluster_outlining.mlir | 9 +- .../executor_tpuv1_outline_tpu_island.mlir | 4 +- .../tensorflow/tests/functionalize-if.mlir | 6 +- .../tests/graphdef2mlir/arg-control-dep.pbtxt | 2 +- .../graphdef2mlir/empty-value-attr.pbtxt | 2 +- .../force_shared_name_for_resource_ops.pbtxt | 2 +- .../graphdef2mlir/function-func-attr.pbtxt | 4 +- .../graphdef2mlir/graph-as-function.pbtxt | 2 +- .../graph-custom-operation.pbtxt | 2 +- .../graphdef2mlir/graph-device-retval.pbtxt | 2 +- .../tests/graphdef2mlir/graph-func-attr.pbtxt | 4 +- .../graph-function-input-shapes.pbtxt | 2 +- .../graph-function-name-bug.pbtxt | 4 +- .../graph-function-resource-args.pbtxt | 2 +- .../graphdef2mlir/graph-gradient-def.pbtxt | 2 +- .../graph-input-func-arg-name-collision.pbtxt | 4 +- .../tests/graphdef2mlir/graph-library.pbtxt | 4 +- .../graphdef2mlir/graph-uint8-return.pbtxt | 2 +- .../graphdef2mlir/stateful-attribute.pbtxt | 2 +- .../tests/guarantee-all-funcs-one-use.mlir | 12 +- .../mlir/tensorflow/tests/isolate-placer.mlir | 2 +- .../tests/promote_resources_to_args.mlir | 2 +- .../region-control-flow-to-functional.mlir | 68 +++---- .../tensorflow/tests/resource_inlining.mlir | 4 +- .../tensorflow/tests/resource_op_lifting.mlir | 6 +- .../tests/stack_ops_decomposition.mlir | 10 +- .../tests/tensor_array_ops_decomposition.mlir | 14 +- .../tests/tensor_list_ops_decomposition.mlir | 12 +- .../tests/tf_saved_model/call_to_exported.py | 2 +- .../control_flow_duplicate_v1.py | 4 +- ...timize_global_tensors_interprocedural.mlir | 28 +-- .../mlir/tensorflow/tests/tpu_rewrite.mlir | 14 +- .../tests/tpu_space_to_depth_pass.mlir | 12 +- .../mlir/tools/kernel_gen/transforms/BUILD | 1 + .../transforms/tf_kernel_to_llvm_pass.cc | 10 +- .../xla/tests/legalize-tf-communication.mlir | 34 ++-- .../mlir/xla/tests/translate/import.hlotxt | 177 +++++++++--------- tensorflow/python/compiler/mlir/mlir_test.py | 2 +- tensorflow/workspace.bzl | 4 +- third_party/mlir/BUILD | 108 +++++++++++ third_party/mlir/test.BUILD | 14 ++ 45 files changed, 403 insertions(+), 278 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/if_op.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/if_op.pbtxt index 97edfdf9c45..622a536e0af 100644 --- a/tensorflow/compiler/mlir/lite/tests/end2end/if_op.pbtxt +++ b/tensorflow/compiler/mlir/lite/tests/end2end/if_op.pbtxt @@ -411,11 +411,11 @@ versions { # CHECK-NEXT: constant dense<[5.000000e+00, 6.000000e+00, 7.000000e+00, 8.000000e+00]> # CHECK: "tf.If"{{.+}}else_branch = @cond_false_10{{.+}}is_stateless = true{{.+}}then_branch = @cond_true_10 # CHECK: "tf.If"{{.+}}else_branch = @cond_false0{{.+}}is_stateless = false{{.+}}then_branch = @cond_true0 -# CHECK: func @cond_false_10 +# CHECK: func private @cond_false_10 # CHECK-NEXT: tfl.div -# CHECK: func @cond_true_10 +# CHECK: func private @cond_true_10 # CHECK-NEXT: tfl.sub -# CHECK: func @cond_false0 +# CHECK: func private @cond_false0 # CHECK-NEXT: tfl.mul -# CHECK: func @cond_true0 +# CHECK: func private @cond_true0 # CHECK-NEXT: tfl.add diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir index 5b471d69b8e..8a97a83064f 100644 --- a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir +++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir @@ -5,8 +5,8 @@ // CHECK: func @main(%arg0: tensor<1xf32>) -> tensor<*xf32> // CHECK: %0 = "tf.While"(%arg0) {body = @body, cond = @cond, is_stateless = false} : (tensor<1xf32>) -> tensor<*xf32> // CHECK: return %0 : tensor<*xf32> -// CHECK: func @cond(%arg0: tensor<*xf32>) -> tensor<*xf32> -// CHECK: func @body(%arg0: tensor<*xf32>) -> tensor<*xf32> +// CHECK: func private @cond(%arg0: tensor<*xf32>) -> tensor<*xf32> +// CHECK: func private @body(%arg0: tensor<*xf32>) -> tensor<*xf32> func @main(%arg0: tensor<1xf32>) -> tensor<*xf32> { %0 = "tf.While"(%arg0) {cond = @cond, body = @body, is_stateless = false} : (tensor<1xf32>) -> tensor<*xf32> diff --git a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir index d56c2cc221a..6779242b616 100644 --- a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir +++ b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir @@ -1,6 +1,6 @@ // RUN: tf-opt -tfl-prepare-composite-funcs-tf -tfl-fuse-tftext=true %s | FileCheck %s -func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<1>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { +func private @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -1026,11 +1026,11 @@ func @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23810(%arg0: t return %1 : tensor } -// CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<1>], tf.signature.is_stateful} { +// CHECK: func private @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<1>], tf.signature.is_stateful} { // CHECK: %0:2 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) // CHECK: return %0#0, %0#1 : tensor, tensor -func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { +func private @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %1 = "tf.Const"() {value = dense : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor @@ -2160,11 +2160,11 @@ func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_As -// CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape], tf.signature.is_stateful} { +// CHECK: func private @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape], tf.signature.is_stateful} { // CHECK: %0:3 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> (tensor, tensor, tensor) // CHECK: return %0#0, %0#1, %0#2 : tensor, tensor, tensor -func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { +func private @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -3190,7 +3190,7 @@ func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_ return %1 : tensor } -// CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<>], tf.signature.is_stateful} { +// CHECK: func private @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<>], tf.signature.is_stateful} { // CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> tensor // CHECK: return %0 : tensor @@ -3213,7 +3213,7 @@ func @ngrams(%arg0: tensor {tf._user_specified_name = "input"}) -> // CHECK: return %0 : tensor // CHECK: } -func @ngrams_ragged_rank_2(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor<3xi64> {tf._user_specified_name = "args_0"}, %arg2: tensor {tf._user_specified_name = "args_1"}) -> (tensor, tensor<3xi64>, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = "", width = 2 : i64}>, tf._input_shapes = [#tf.shape, #tf.shape<3>, #tf.shape], tf.signature.is_stateful} { +func private @ngrams_ragged_rank_2(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor<3xi64> {tf._user_specified_name = "args_0"}, %arg2: tensor {tf._user_specified_name = "args_1"}) -> (tensor, tensor<3xi64>, tensor) attributes {tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = "", width = 2 : i64}>, tf._input_shapes = [#tf.shape, #tf.shape<3>, #tf.shape], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor @@ -3330,12 +3330,12 @@ func @ngrams_ragged_rank_2(%arg0: tensor {tf._user_specified_name %71 = "tf.Identity"(%70) {device = ""} : (tensor<3xi64>) -> tensor<3xi64> return %68, %71, %64 : tensor, tensor<3xi64>, tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_27770(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_27770(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor return %1 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_27780(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_27780(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor @@ -3345,12 +3345,12 @@ func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_as %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor return %5 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_28130(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape]} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_28130(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor return %1 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_28140(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_28140(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor @@ -3359,12 +3359,12 @@ func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_as %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor return %4 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28500(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28500(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor return %1 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28510(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28510(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor @@ -3374,12 +3374,12 @@ func @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_Assert %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor return %5 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor return %1 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28910(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28910(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor @@ -3389,12 +3389,12 @@ func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_ %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor return %5 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_29260(%arg0: tensor, %arg1: tensor<2xi64>) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<2>]} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_29260(%arg0: tensor, %arg1: tensor<2xi64>) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<2>]} { %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor return %1 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_29270(%arg0: tensor, %arg1: tensor<2xi64>) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<2>], tf.signature.is_stateful} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_29270(%arg0: tensor, %arg1: tensor<2xi64>) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<2>], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor @@ -3403,12 +3403,12 @@ func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_ %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor return %4 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_29650(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_29650(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor return %1 : tensor } -func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_29660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { +func private @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_29660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor @@ -3418,12 +3418,12 @@ func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_Asse %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor return %5 : tensor } -func @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_true_30330(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape]} { +func private @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_true_30330(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape]} { %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor return %1 : tensor } -func @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_false_30340(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape], tf.signature.is_stateful} { +func private @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_false_30340(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<"Inputs must have identical ragged splits"> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<"x (NGrams/SlidingWindow/RaggedGetItem/RaggedRange:0) = "> : tensor} : () -> tensor @@ -3433,12 +3433,12 @@ func @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_false_ %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor return %5 : tensor } -// CHECK: func @ngrams_ragged_rank_2(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor<3xi64> {tf._user_specified_name = "args_0"}, %arg2: tensor {tf._user_specified_name = "args_1"}) -> (tensor, tensor<3xi64>, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = "", width = 2 : i64}>, tf._input_shapes = [#tf.shape, #tf.shape<3>, #tf.shape], tf.signature.is_stateful} { +// CHECK: func private @ngrams_ragged_rank_2(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor<3xi64> {tf._user_specified_name = "args_0"}, %arg2: tensor {tf._user_specified_name = "args_1"}) -> (tensor, tensor<3xi64>, tensor) attributes {tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = "", width = 2 : i64}>, tf._input_shapes = [#tf.shape, #tf.shape<3>, #tf.shape], tf.signature.is_stateful} { // CHECK: %0:3 = "tfl.custom"(%arg0, %arg1, %arg2) {custom_code = "tftext:Ngrams", custom_option = opaque<"tfl", "0x776964746800737472696E675F736570617261746F720000006178697300726564756374696F6E5F74797065000B535452494E475F4A4F494E0004221E373E040104FF152C0204141404082401"> : tensor<77xi8>} : (tensor, tensor<3xi64>, tensor) -> (tensor, tensor<3xi64>, tensor) // CHECK: return %0#0, %0#1, %0#2 : tensor, tensor<3xi64>, tensor -func @sgnn_projection(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor {tf._user_specified_name = "row_splits"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:custom:SgnnProjection", {buckets = 2147483647 : i64, hash_seed = [1902835825, -1475704015, 473120514, 1254202069, 1558833093, 1756181982, 1906603252, -1034142694, 542842690, 535515822]}>, tf._input_shapes = [#tf.shape, #tf.shape], tf.signature.is_stateful} { +func private @sgnn_projection(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor {tf._user_specified_name = "row_splits"}) -> tensor attributes {tf._implements = #tf.func<@"tftext:custom:SgnnProjection", {buckets = 2147483647 : i64, hash_seed = [1902835825, -1475704015, 473120514, 1254202069, 1558833093, 1756181982, 1906603252, -1034142694, 542842690, 535515822]}>, tf._input_shapes = [#tf.shape, #tf.shape], tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[[1902835825], [-1475704015], [473120514], [1254202069], [1558833093], [1756181982], [1906603252], [-1034142694], [542842690], [535515822]]> : tensor<10x1xi64>} : () -> tensor<10x1xi64> %1 = "tf.StringToHashBucketFast"(%arg0) {device = "", num_buckets = 2147483647 : i64} : (tensor) -> tensor %2 = "tf.Sgnn"(%1, %0) {device = ""} : (tensor, tensor<10x1xi64>) -> tensor<10x?xf64> @@ -3448,6 +3448,6 @@ func @sgnn_projection(%arg0: tensor {tf._user_specified_name = "va } -// CHECK: func @sgnn_projection(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor {tf._user_specified_name = "row_splits"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:custom:SgnnProjection", {buckets = 2147483647 : i64, hash_seed = [1902835825, -1475704015, 473120514, 1254202069, 1558833093, 1756181982, 1906603252, -1034142694, 542842690, 535515822]}>, tf._input_shapes = [#tf.shape, #tf.shape], tf.signature.is_stateful} { +// CHECK: func private @sgnn_projection(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor {tf._user_specified_name = "row_splits"}) -> tensor attributes {tf._implements = #tf.func<@"tftext:custom:SgnnProjection", {buckets = 2147483647 : i64, hash_seed = [1902835825, -1475704015, 473120514, 1254202069, 1558833093, 1756181982, 1906603252, -1034142694, 542842690, 535515822]}>, tf._input_shapes = [#tf.shape, #tf.shape], tf.signature.is_stateful} { // CHECK: %0 = "tfl.custom"(%arg0, %arg1) {custom_code = "tftext:custom:SgnnProjection", custom_option = opaque<"tfl", "0x686173685F736565640000000A00000071F86A71318B0AA8023F331CD59AC14AC5E7E95CDE35AD68F474A4711A3C5CC2421F5B20AE52EB1F6275636B6574730002094200030000000100000002000000FFFFFF7F44000000062E0A2601"> : tensor<93xi8>} : (tensor, tensor) -> tensor // CHECK: return %0 : tensor diff --git a/tensorflow/compiler/mlir/lite/tests/tfl_while_outline.mlir b/tensorflow/compiler/mlir/lite/tests/tfl_while_outline.mlir index 17eb661e8f9..525571b7c6c 100644 --- a/tensorflow/compiler/mlir/lite/tests/tfl_while_outline.mlir +++ b/tensorflow/compiler/mlir/lite/tests/tfl_while_outline.mlir @@ -30,9 +30,9 @@ func @while() -> tensor<1xf32> }) : (tensor, tensor<1xf32>) -> (tensor, tensor<1xf32>) loc("WhileOp") return %0#1 : tensor<1xf32> } -// CHECK-LABEL: func @WhileOp_cond( +// CHECK-LABEL: func private @WhileOp_cond( // CHECK: tfl.greater -// CHECK-LABEL: func @WhileOp_body( +// CHECK-LABEL: func private @WhileOp_body( // CHECK: tfl.sub // CHECK: tfl.add @@ -63,21 +63,21 @@ func @while2(%cst : tensor) -> tensor<1xf32> attributes {tf.entry_function return %0#1 : tensor<1xf32> } -func @WhileOp_cond(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>, %arg2: tensor) -> tensor attributes {sym_visibility = "private"} { +func private @WhileOp_cond(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>, %arg2: tensor) -> tensor { %cst = constant dense<0> : tensor %0 = "tfl.greater"(%arg0, %cst) : (tensor<*xi32>, tensor) -> tensor return %0 : tensor } -func @WhileOp_body(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>, %arg2: tensor) -> (tensor<*xi32>, tensor<*xf32>, tensor) attributes {sym_visibility = "private"} { +func private @WhileOp_body(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>, %arg2: tensor) -> (tensor<*xi32>, tensor<*xf32>, tensor) { %0 = "tfl.sub"(%arg0, %arg2) {fused_activation_function = "NONE"} : (tensor<*xi32>, tensor) -> tensor<*xi32> %1 = tfl.add %arg1, %arg1 {fused_activation_function = "NONE"} : tensor<*xf32> return %0, %1, %arg2 : tensor<*xi32>, tensor<*xf32>, tensor } -// CHECK-LABEL: func @WhileOp_cond( +// CHECK-LABEL: func private @WhileOp_cond( // CHECK: tfl.greater -// CHECK-LABEL: func @WhileOp_body( +// CHECK-LABEL: func private @WhileOp_body( // CHECK: tfl.sub // CHECK: tfl.add @@ -152,14 +152,14 @@ func @rnn(%arg0: tensor<4x4x3xf32> {tf.device = "/device:CPU:0"}) -> tensor<4x?x // CHECK: tfl.yield // CHECK-SAME: (tensor, tensor, tensor<*xf32>, tensor<4x2xf32>, tensor<4x2xf32>, tensor<*xf32>, tensor<4x4x3xf32>) -> () -// CHECK-LABEL: func @tfl.while_cond( -// CHECK-SAME: [[VAL_35:%.*]]: tensor, [[VAL_36:%.*]]: tensor, [[VAL_37:%.*]]: tensor<*xf32>, [[VAL_38:%.*]]: tensor<4x2xf32>, [[VAL_39:%.*]]: tensor<4x2xf32>, [[VAL_40:%.*]]: tensor<*xf32>, [[VAL_41:%.*]]: tensor<4x4x3xf32>) -> tensor attributes {sym_visibility = "private"} { +// CHECK-LABEL: func private @tfl.while_cond( +// CHECK-SAME: [[VAL_35:%.*]]: tensor, [[VAL_36:%.*]]: tensor, [[VAL_37:%.*]]: tensor<*xf32>, [[VAL_38:%.*]]: tensor<4x2xf32>, [[VAL_39:%.*]]: tensor<4x2xf32>, [[VAL_40:%.*]]: tensor<*xf32>, [[VAL_41:%.*]]: tensor<4x4x3xf32>) -> tensor { // CHECK: return // CHECK-SAME: tensor // CHECK: } -// CHECK-LABEL: func @tfl.while_body( -// CHECK-SAME: [[VAL_46:%.*]]: tensor, [[VAL_47:%.*]]: tensor, [[VAL_48:%.*]]: tensor<*xf32>, [[VAL_49:%.*]]: tensor<4x2xf32>, [[VAL_50:%.*]]: tensor<4x2xf32>, [[VAL_51:%.*]]: tensor<*xf32>, [[VAL_52:%.*]]: tensor<4x4x3xf32>) -> (tensor, tensor, tensor<*xf32>, tensor<4x2xf32>, tensor<4x2xf32>, tensor<*xf32>, tensor<4x4x3xf32>) attributes {sym_visibility = "private"} { +// CHECK-LABEL: func private @tfl.while_body( +// CHECK-SAME: [[VAL_46:%.*]]: tensor, [[VAL_47:%.*]]: tensor, [[VAL_48:%.*]]: tensor<*xf32>, [[VAL_49:%.*]]: tensor<4x2xf32>, [[VAL_50:%.*]]: tensor<4x2xf32>, [[VAL_51:%.*]]: tensor<*xf32>, [[VAL_52:%.*]]: tensor<4x4x3xf32>) -> (tensor, tensor, tensor<*xf32>, tensor<4x2xf32>, tensor<4x2xf32>, tensor<*xf32>, tensor<4x4x3xf32>) { // CHECK: [[VAL_91:%.*]] = "tfl.cast" // CHECK: return // CHECK-SAME: [[VAL_91]], [[VAL_52]] : tensor, tensor, tensor<*xf32>, tensor<4x2xf32>, tensor<4x2xf32>, tensor<*xf32>, tensor<4x4x3xf32> diff --git a/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir b/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir index 132482cab24..c224d56ce65 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir @@ -24,9 +24,8 @@ func @single_cluster(%arg0: tensor) -> tensor { return %0 : tensor } -// CHECK: func @[[CLUSTER]] +// CHECK: func private @[[CLUSTER]] // CHECK-SAME: (%[[CLUSTER_ARG_0:[a-z0-9]*]]: tensor) -> tensor -// CHECK-SAME: sym_visibility = "private" // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[CLUSTER_ARG_0]]) // CHECK: return %[[B_OUTPUT]] @@ -67,12 +66,12 @@ func @multiple_clusters(%arg0: tensor) -> tensor { return %0 : tensor } -// CHECK: func @[[CLUSTER_0]] +// CHECK: func private @[[CLUSTER_0]] // CHECK-SAME: (%[[CLUSTER_0_ARG_0:[a-z0-9]*]]: tensor) -> tensor // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[CLUSTER_0_ARG_0]]) // CHECK: return %[[B_OUTPUT]] -// CHECK: func @[[CLUSTER_1]] +// CHECK: func private @[[CLUSTER_1]] // CHECK-SAME: (%[[CLUSTER_1_ARG_0:[a-z0-9]*]]: tensor, %[[CLUSTER_1_ARG_1:[a-z0-9]*]]: tensor) -> tensor // CHECK: %[[E_OUTPUT:[0-9]*]] = "tf.E"(%[[CLUSTER_1_ARG_0]]) // CHECK: %[[F_OUTPUT:[0-9]*]] = "tf.F"(%[[CLUSTER_1_ARG_1]], %[[E_OUTPUT]]) @@ -98,7 +97,7 @@ func @cluster_operands(%arg0: tensor) -> tensor { return %0 : tensor } -// CHECK: func @[[CLUSTER]] +// CHECK: func private @[[CLUSTER]] // CHECK-SAME: () -> tensor // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"() // CHECK: return %[[A_OUTPUT]] diff --git a/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_outline_island/executor_tpuv1_outline_tpu_island.mlir b/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_outline_island/executor_tpuv1_outline_tpu_island.mlir index 6bc4756f471..4d781a928cd 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_outline_island/executor_tpuv1_outline_tpu_island.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_outline_island/executor_tpuv1_outline_tpu_island.mlir @@ -47,11 +47,11 @@ func @func2(%arg0 : tensor) -> tensor { // CHECK: module // CHECK-SAME: @_tpu_v1_compat_outlined -// CHECK-LABEL: func @_tpu_v1_compat_outlined_func0(%arg0: tensor) -> tensor +// CHECK-LABEL: func nested @_tpu_v1_compat_outlined_func0(%arg0: tensor) -> tensor // CHECK-NEXT: tf.TPUReplicateMetadata // CHECK-NEXT: tf.opA -// CHECK-LABEL: func @_tpu_v1_compat_outlined_func1(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) +// CHECK-LABEL: func nested @_tpu_v1_compat_outlined_func1(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) // CHECK-NEXT: tf.TPUReplicateMetadata // CHECK-NEXT: tf.opA // CHECK-NEXT: tf.opA diff --git a/tensorflow/compiler/mlir/tensorflow/tests/functionalize-if.mlir b/tensorflow/compiler/mlir/tensorflow/tests/functionalize-if.mlir index f7bf404e937..8ff6a2d4f66 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/functionalize-if.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/functionalize-if.mlir @@ -27,14 +27,14 @@ func @foo() { // In the newly cloned function, check that we have a _tf.If operation and capture the then and else branch. -// CHECK: func @[[FUNCTIONALIZE_FUNC]] +// CHECK: func private @[[FUNCTIONALIZE_FUNC]] // CHECK: "tf.If" // CHECK-SAME: else_branch = @[[ELSE_FUNC:[A-Za-z0-9_]*]] // CHECK-SAME: then_branch = @[[THEN_FUNC:[A-Za-z0-9_]*]] // We expect the _tf.Add in the else func and the _tf.Mul in the then func -// CHECK: func @[[ELSE_FUNC]] +// CHECK: func private @[[ELSE_FUNC]] // CHECK: "tf.Add" -// CHECK: func @[[THEN_FUNC]] +// CHECK: func private @[[THEN_FUNC]] // CHECK: "tf.Mul" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/arg-control-dep.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/arg-control-dep.pbtxt index 5578b45716b..fd2b968d634 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/arg-control-dep.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/arg-control-dep.pbtxt @@ -40,7 +40,7 @@ library { } } # Drop the control dependency on arg for the node "test" - # CHECK-LABEL: func @foo + # CHECK-LABEL: func private @foo # CHECK: tf_executor.island wraps "tf.Const"() node_def { name: "test" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/empty-value-attr.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/empty-value-attr.pbtxt index 6c385bd219f..58f8cd0fca9 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/empty-value-attr.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/empty-value-attr.pbtxt @@ -80,6 +80,6 @@ versions { # CHECK-SAME: f = @[[FUNCTION:[a-zA-Z0-9_]*]] # Verify that callee has the unit attribute tf._input_shapes. -# CHECK: func @[[FUNCTION]] +# CHECK: func private @[[FUNCTION]] # CHECK: attributes # CHECK-SAME: tf._input_shapes{{[,}]}} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/force_shared_name_for_resource_ops.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/force_shared_name_for_resource_ops.pbtxt index 05302ed430c..f88c69a81d1 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/force_shared_name_for_resource_ops.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/force_shared_name_for_resource_ops.pbtxt @@ -90,6 +90,6 @@ library { # CHECK: tf.HashTableV2 # CHECK-SAME: shared_name = "hash_table_node" -# CHECK: func @create_resource +# CHECK: func private @create_resource # CHECK: tf.HashTableV2 # CHECK-SAME: shared_name = "hash_table_node@create_resource" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/function-func-attr.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/function-func-attr.pbtxt index 6bd17892c43..3b8ac26bd92 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/function-func-attr.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/function-func-attr.pbtxt @@ -49,5 +49,5 @@ library { } } -# CHECK-DAG: func @custom_relu{{[0-9]*}}(){{.+}}tf._implements = #tf.func<@tensorflow.relu, {}>} -# CHECK-DAG: func @custom_embedding_matmul{{[0-9]*}}(){{.+}}tf._implements = #tf.func<@tensorflow.embedding_matmul, {key1 = 2 : i64, key2 = false}>} +# CHECK-DAG: func private @custom_relu{{[0-9]*}}(){{.+}}tf._implements = #tf.func<@tensorflow.relu, {}>} +# CHECK-DAG: func private @custom_embedding_matmul{{[0-9]*}}(){{.+}}tf._implements = #tf.func<@tensorflow.embedding_matmul, {key1 = 2 : i64, key2 = false}>} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-as-function.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-as-function.pbtxt index 9ccf06e9048..2fccbfd7086 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-as-function.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-as-function.pbtxt @@ -13,7 +13,7 @@ # CHECK: %[[ISLAND_2:.*]], %[[ISLAND_2_control:.*]] = tf_executor.island wraps "tf.StatefulPartitionedCall" # CHECK-SAME: f = @[[FUNC:[a-z0-9]*]] # CHECK: tf_executor.fetch %[[ISLAND_1]], %[[ISLAND_2]] : tensor<*xf32>, tensor<*xf32> -# CHECK: func @[[FUNC]](%arg0: tensor<*xf32> {tf._user_specified_name = "inputs"}, %arg1: tensor<*x!tf.resource>) -> tensor<*xf32> +# CHECK: func private @[[FUNC]](%arg0: tensor<*xf32> {tf._user_specified_name = "inputs"}, %arg1: tensor<*x!tf.resource>) -> tensor<*xf32> node { name: "args_0" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt index 304429c8783..c2f4d7aab5c 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt @@ -55,4 +55,4 @@ versions { # site (a numerical suffix may be appended). # CHECK: "tf.LegacyCall"(%outputs) {_disable_call_shape_inference = false, device = "", f = @foo0} -# CHECK: func @foo0 +# CHECK: func private @foo0 diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt index 327260e2860..ed3e184ba90 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-device-retval.pbtxt @@ -74,7 +74,7 @@ library { } # The attribute "experimental_ints_on_device" and the return type INT32 # ensure that kDeviceRetOp is used instead of kRetOp - # CHECK-LABEL: func @foo + # CHECK-LABEL: func private @foo # CHECK: tf.experimental_ints_on_device = true # CHECK: return %{{.*}} tensor<{{.*}}i32> attr { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt index eb909834357..60307ddf3ae 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-func-attr.pbtxt @@ -5,8 +5,8 @@ # Verify that the NameAttrList is properly turned into reference to functions on import # CHECK: tf.Case # CHECK-SAME: branches = [@[[FOO:[a-z0-9]+]], @[[BAR:[a-z0-9]+]]] -# CHECK-DAG: func @[[FOO]]() -# CHECK-DAG: func @[[BAR]]() +# CHECK-DAG: func private @[[FOO]]() +# CHECK-DAG: func private @[[BAR]]() node { name: "predicate" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-input-shapes.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-input-shapes.pbtxt index 9d47292f806..6a7b30663b8 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-input-shapes.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-input-shapes.pbtxt @@ -3,7 +3,7 @@ # Verify that the _input_shapes attribute of the FunctionDef is respected. # This also checks that the output type is correctly inferred based on # that. -#CHECK: func @identity_function0(%arg0: tensor) -> tensor +#CHECK: func private @identity_function0(%arg0: tensor) -> tensor node { name: "Placeholder" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt index 326e7b1ecd4..4b937a17af8 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt @@ -124,5 +124,5 @@ versions { # CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @foo110} # CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @foo111} -# CHECK-LABEL: func @foo110() attributes {sym_visibility = "private"} -# CHECK-LABEL: func @foo111() attributes {sym_visibility = "private"} +# CHECK-LABEL: func private @foo110() +# CHECK-LABEL: func private @foo111() diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt index 7cb7ac7e008..66847dc63e9 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt @@ -91,7 +91,7 @@ library { # CHECK-SAME: {_disable_call_shape_inference = true, device = "", f = @test_func_name0} # CHECK: tf_executor.fetch # CHECK: return -# CHECK: func @test_func_name0 +# CHECK: func private @test_func_name0 # CHECK-SAME: tf._resource_arg_unique_id = 0 # CHECK-SAME: tf._resource_arg_unique_id = 0 # CHECK: tf_executor.graph diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt index e7f7a59a343..b3b3d6dc917 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-gradient-def.pbtxt @@ -4,7 +4,7 @@ # links the function and its gradient. In MLIR a TF ops gradient function is # added to its list of function attributes. -# CHECK: func @foo0( +# CHECK: func private @foo0( # CHECK: tf.gradient = @foo_grad node { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-input-func-arg-name-collision.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-input-func-arg-name-collision.pbtxt index bf210e51288..1e574b85d5b 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-input-func-arg-name-collision.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-input-func-arg-name-collision.pbtxt @@ -4,8 +4,8 @@ # functions with arg name that are the same as the graph input name # CHECK: func @main(%arg0: tensor<{{.*}}i32>) -> tensor<{{.*}}i32> -# CHECK: func @while_body -# CHECK: func @while_cond +# CHECK: func private @while_body +# CHECK: func private @while_cond node { name: "input" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt index 53e951473d0..eb593188888 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt @@ -57,7 +57,7 @@ versions { # CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = true, device = "", f = @foo0} # CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @bar0} -# CHECK-LABEL: func @foo0() attributes {sym_visibility = "private"} +# CHECK-LABEL: func private @foo0() # CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @bar0} -# CHECK-LABEL: func @bar0() attributes {sym_visibility = "private"} +# CHECK-LABEL: func private @bar0() diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-uint8-return.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-uint8-return.pbtxt index e732d8156a0..7c6353ae2a3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-uint8-return.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-uint8-return.pbtxt @@ -106,5 +106,5 @@ versions { # CHECK: func @main # CHECK: "tf.PartitionedCall"() # CHECK-SAME: f = @[[FUNCTION:[A-Za-z0-9_]*]] -# CHECK: func @[[FUNCTION]]() -> tensor<*xui8> +# CHECK: func private @[[FUNCTION]]() -> tensor<*xui8> # CHECK: return {{.*}} : tensor<*xui8> diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/stateful-attribute.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/stateful-attribute.pbtxt index 7a395d2d345..66e53cde3c1 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/stateful-attribute.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/stateful-attribute.pbtxt @@ -86,6 +86,6 @@ versions { # CHECK-SAME: f = @[[FUNCTION_FOO:[a-zA-Z0-9_]*]] # Find callee and verify it has the stateful attribute set. -# CHECK: func @[[FUNCTION_FOO]] +# CHECK: func private @[[FUNCTION_FOO]] # CHECK-SAME: attributes # CHECK-SAME: tf.signature.is_stateful diff --git a/tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir b/tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir index d8903846158..8b2c36c7249 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir @@ -12,7 +12,7 @@ func @f() { } // CHECK: func @g() -// CHECK: func @[[NEWG]]() attributes {sym_visibility = "private"} +// CHECK: func private @[[NEWG]]() func @g() { return } @@ -22,12 +22,12 @@ func @g() { // CHECK-LABEL: func @f // 2 copies of @g // CHECK-DAG: func @g{{.*}} -// CHECK-DAG: func @g{{.*}} +// CHECK-DAG: func private @g{{.*}} // 4 copies of @h // CHECK-DAG: func @h{{.*}} -// CHECK-DAG: func @h{{.*}} -// CHECK-DAG: func @h{{.*}} -// CHECK-DAG: func @h{{.*}} +// CHECK-DAG: func private @h{{.*}} +// CHECK-DAG: func private @h{{.*}} +// CHECK-DAG: func private @h{{.*}} func @f() { call @g() : () -> () call @g() : () -> () @@ -47,7 +47,7 @@ func @h() { // ----- // Handle error case of infinite recursion. // expected-error @+1 {{reached cloning limit}} -func @f() attributes {sym_visibility = "private"} { +func private @f() { call @f() : () -> () call @f() : () -> () return diff --git a/tensorflow/compiler/mlir/tensorflow/tests/isolate-placer.mlir b/tensorflow/compiler/mlir/tensorflow/tests/isolate-placer.mlir index 1f4f03466f1..d0800a36004 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/isolate-placer.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/isolate-placer.mlir @@ -33,4 +33,4 @@ func @foo(%arg0: tensor) -> tensor { // CHECK: "tf.Identity"([[CALL_RESULT_REG]]) // Match the function name -// CHECK: func @[[FUNCTION]] +// CHECK: func private @[[FUNCTION]] diff --git a/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir b/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir index 0813ee8db90..b18b03b6866 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir @@ -299,7 +299,7 @@ func @main(%arg0: tensor) -> tensor<2xf32> { %2 = "tf.PartitionedCall"(%0) {config = "", config_proto = "", executor_type = "", f = @callee} : (tensor>>) -> tensor<2xf32> return %2 : tensor<2xf32> } -func @callee(%arg0: tensor>>) -> tensor<2xf32> attributes {sym_visibility = "private"} { +func private @callee(%arg0: tensor>>) -> tensor<2xf32> { %0 = "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor<2xf32> return %0 : tensor<2xf32> } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir b/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir index 3e8935b699e..7059401da24 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir @@ -1,9 +1,9 @@ // RUN: tf-opt %s -tf-region-control-flow-to-functional -split-input-file | FileCheck %s // Simple IfRegion -// CHECK: func @tf.IfRegion_else(%arg0: tensor<*xf32>) -> tensor<*xf32> +// CHECK: func private @tf.IfRegion_else(%arg0: tensor<*xf32>) -> tensor<*xf32> // CHECK-NEXT: "tf.Neg" -// CHECK: func @tf.IfRegion_then(%arg0: tensor<*xf32>) -> tensor<*xf32> +// CHECK: func private @tf.IfRegion_then(%arg0: tensor<*xf32>) -> tensor<*xf32> // CHECK-NEXT: "tf.Abs" func @testSimple(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { // CHECK: "tf.If" @@ -24,9 +24,9 @@ func @testSimple(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { // ----- // Use if condition inside the regions -// CHECK: func @tf.IfRegion_else(%arg0: tensor, %arg1: tensor<2xf32>, %arg2: tensor<2xf32>, %arg3: tensor<2xf32>) -> tensor<2xf32> +// CHECK: func private @tf.IfRegion_else(%arg0: tensor, %arg1: tensor<2xf32>, %arg2: tensor<2xf32>, %arg3: tensor<2xf32>) -> tensor<2xf32> // CHECK-NEXT: "tf.Select"(%arg0, %arg2, %arg3) -// CHECK: func @tf.IfRegion_then(%arg0: tensor, %arg1: tensor<2xf32>, %arg2: tensor<2xf32>, %arg3: tensor<2xf32>) -> tensor<2xf32> +// CHECK: func private @tf.IfRegion_then(%arg0: tensor, %arg1: tensor<2xf32>, %arg2: tensor<2xf32>, %arg3: tensor<2xf32>) -> tensor<2xf32> // CHECK-NEXT: "tf.Select"(%arg0, %arg1, %arg2) func @testIfCondition(%arg0: tensor, %arg1: tensor<2xf32>) -> tensor<2xf32> { %0 = "tf.Add"(%arg1, %arg1) : (tensor<2xf32>, tensor<2xf32>) -> tensor<2xf32> @@ -48,9 +48,9 @@ func @testIfCondition(%arg0: tensor, %arg1: tensor<2xf32>) -> tensor<2xf32> // Constant sinking for IfRegion -// CHECK: func @tf.IfRegion_else() -> tensor<2xf32> +// CHECK: func private @tf.IfRegion_else() -> tensor<2xf32> // CHECK-NEXT: constant dense<1.0 -// CHECK: func @tf.IfRegion_then() -> tensor<2xf32> +// CHECK: func private @tf.IfRegion_then() -> tensor<2xf32> // CHECK-NEXT: constant dense<0.0 func @testIfConstant(%arg0: tensor) -> tensor<2xf32> { %cst_zero = constant dense<0.0> : tensor<2xf32> @@ -67,18 +67,18 @@ func @testIfConstant(%arg0: tensor) -> tensor<2xf32> { // ----- // Nested IfRegions -// CHECK: func @tf.IfRegion1_else +// CHECK: func private @tf.IfRegion1_else // CHECK-NEXT: "tf.Acos" // CHECK-NEXT: "tf.Abs" -// CHECK: func @tf.IfRegion1_then +// CHECK: func private @tf.IfRegion1_then // CHECK-NEXT: "tf.LogicalNot" // CHECK-NEXT: "tf.Asin" // CHECK-NEXT: "tf.If"({{.+}}) {else_branch = @tf.IfRegion_else, {{.+}} then_branch = @tf.IfRegion_then} -// CHECK: func @tf.IfRegion_else +// CHECK: func private @tf.IfRegion_else // CHECK-NEXT: "tf.Neg" -// CHECK: func @tf.IfRegion_then +// CHECK: func private @tf.IfRegion_then // CHECK-NEXT: "tf.Abs" func @testNested(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { @@ -169,10 +169,10 @@ func @testIf2Result(%arg0: tensor, %arg1: tensor<2xf32>) -> tensor<2xf32> { // ----- // No inputs, some outputs for IfRegion -// CHECK: func @tf.IfRegion_else() -> tensor<2xf32> +// CHECK: func private @tf.IfRegion_else() -> tensor<2xf32> // CHECK-NEXT: constant dense<1.000000e+00> // CHECK-NEXT: "tf.Neg" -// CHECK: func @tf.IfRegion_then() -> tensor<2xf32> +// CHECK: func private @tf.IfRegion_then() -> tensor<2xf32> // CHECK-NEXT: constant dense<0.000000e+00> // CHECK-NEXT: "tf.Abs" func @testSimple(%arg0: tensor) -> tensor<2xf32> { @@ -193,9 +193,9 @@ func @testSimple(%arg0: tensor) -> tensor<2xf32> { // No outputs, some inputs for IfRegion // -// CHECK: func @tf.IfRegion_else(%arg0: tensor<*xf32>) +// CHECK: func private @tf.IfRegion_else(%arg0: tensor<*xf32>) // CHECK-NEXT: "tf.Neg" -// CHECK: func @tf.IfRegion_then(%arg0: tensor<*xf32>) +// CHECK: func private @tf.IfRegion_then(%arg0: tensor<*xf32>) // CHECK-NEXT: "tf.Abs" func @printer(tensor<*xf32>) -> () func @testNoOutputs(%arg0: tensor, %arg1: tensor<*xf32>) -> () { @@ -214,9 +214,9 @@ func @testNoOutputs(%arg0: tensor, %arg1: tensor<*xf32>) -> () { // ----- // Check ToBool folding for IfRegion -// CHECK: func @tf.IfRegion_else(%arg0: tensor<*xf32>) -> tensor<*xf32> +// CHECK: func private @tf.IfRegion_else(%arg0: tensor<*xf32>) -> tensor<*xf32> // CHECK-NEXT: "tf.Neg" -// CHECK: func @tf.IfRegion_then(%arg0: tensor<*xf32>) -> tensor<*xf32> +// CHECK: func private @tf.IfRegion_then(%arg0: tensor<*xf32>) -> tensor<*xf32> // CHECK-NEXT: "tf.Abs" // CHECK-LABEL: @testToBoolFold func @testToBoolFold(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { @@ -237,11 +237,11 @@ func @testToBoolFold(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> // ----- // Simple WhileRegion -// CHECK: func @tf.WhileRegion_body{{.+}}{sym_visibility = "private"} +// CHECK: func private @tf.WhileRegion_body{{.+}} // CHECK: "tf.Add" // CHECK: constant dense<1> // CHECK: "tf.Sub" -// CHECK:func @tf.WhileRegion_cond{{.+}}{sym_visibility = "private"} +// CHECK:func private @tf.WhileRegion_cond{{.+}} // CHECK: constant dense<0> // CHECK: "tf.NotEqual" // CHECK-LABEL: testValidWhileRegion @@ -275,11 +275,11 @@ func @testValidWhileRegion(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor // ----- // WhileRegion with type mismatch -// CHECK: func @tf.WhileRegion_body{{.+}}{sym_visibility = "private"} +// CHECK: func private @tf.WhileRegion_body{{.+}} // CHECK: "tf.Add" // CHECK: constant dense<1> // CHECK: "tf.Sub" -// CHECK:func @tf.WhileRegion_cond{{.+}}{sym_visibility = "private"} +// CHECK:func private @tf.WhileRegion_cond{{.+}} // CHECK: constant dense<0> // CHECK: "tf.NotEqual" // CHECK-LABEL: testWhileRegionTypeMismatch @@ -309,11 +309,11 @@ func @testWhileRegionTypeMismatch(%arg0 : tensor<*xf32>, %arg1 : tensor) -> // ----- // WhileRegion with constant sinking -// CHECK: func @tf.WhileRegion_body{{.+}}{sym_visibility = "private"} +// CHECK: func private @tf.WhileRegion_body{{.+}} // CHECK: constant dense<1> // CHECK: "tf.Add" // CHECK: "tf.Sub" -// CHECK:func @tf.WhileRegion_cond{{.+}}{sym_visibility = "private"} +// CHECK:func private @tf.WhileRegion_cond{{.+}} // CHECK: constant dense<0> // CHECK: "tf.NotEqual" // CHECK-LABEL: testWhileRegionConstantSink @@ -342,12 +342,12 @@ func @testWhileRegionConstantSink(%arg0 : tensor<*xf32>, %arg1 : tensor) -> // ----- // WhileRegion with implicitly captured extern value in cond -// CHECK: func @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: func private @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) // CHECK: "tf.Add" // CHECK: constant dense<1> // CHECK: "tf.Sub" // CHECK: return %{{.+}}, %{{.+}}, %arg2 : tensor<*xf32>, tensor, tensor -// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: func private @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) // CHECK: "tf.NotEqual"(%arg1, %arg2) // CHECK-LABEL: testWhileRegionExternInCond func @testWhileRegionExternInCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor) -> tensor<*xf32> { @@ -376,12 +376,12 @@ func @testWhileRegionExternInCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %a // ----- // WhileRegion with implicitly captured extern value in body -// CHECK: func @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: func private @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) // CHECK: %0 = "tf.Add"(%arg0, %arg0) // CHECK: %1 = "tf.Sub"(%arg1, %arg2) // CHECK: return %0, %1, %arg2 -// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: func private @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) // CHECK: constant dense<0> // CHECK: "tf.NotEqual" @@ -412,9 +412,9 @@ func @testWhileRegionExternInBody(%arg0 : tensor<*xf32>, %arg1 : tensor, %a // ----- // WhileRegion with implicitly captured extern value in cond and body -// CHECK: func @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) +// CHECK: func private @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) // CHECK: return %{{.+}}, %{{.+}}, %arg2, %arg3 -// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) +// CHECK: func private @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) // CHECK-LABEL: testWhileRegionExternInBodyAndCond func @testWhileRegionExternInBodyAndCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor) -> tensor<*xf32> { %cst = constant dense<4> : tensor @@ -443,9 +443,9 @@ func @testWhileRegionExternInBodyAndCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg1: tensor, %arg2: tensor) +// CHECK: func private @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) // CHECK: return %{{.+}}, %{{.+}}, %arg2 -// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: func private @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) // CHECK-LABEL: testWhileRegionSameExternInBodyAndCond func @testWhileRegionSameExternInBodyAndCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor) -> tensor<*xf32> { %cst = constant dense<4> : tensor @@ -559,9 +559,9 @@ func @testWhileRegionTrivialMultipleCasts(%arg0 : tensor<*xf32>, %arg1 : tensor< // ----- // Almost trivially transformable with extern values -// CHECK: func @tf.WhileRegion_body +// CHECK: func private @tf.WhileRegion_body // CHECK: call @while_body -// CHECK: @tf.WhileRegion_cond +// CHECK: func private @tf.WhileRegion_cond // CHECK: call @while_cond // CHECK-LABEL: testWhileRegionExtern func @while_cond(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor @@ -589,9 +589,9 @@ func @testWhileRegionExtern(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tenso // ----- // Almost trivially transformable, mismatching block arguments -// CHECK: func @tf.WhileRegion_body +// CHECK: func private @tf.WhileRegion_body // CHECK: call @while_body -// CHECK: @tf.WhileRegion_cond +// CHECK: func private @tf.WhileRegion_cond // CHECK: call @while_cond // CHECK-LABEL: testWhileRegionBlockArgMismatch func @while_cond(%arg0 : tensor, %arg1 : tensor<*xf32>) -> tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_inlining.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_inlining.mlir index 788c6e2f5a1..af8e7206ea5 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_inlining.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_inlining.mlir @@ -17,8 +17,8 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, p return %1 : tensor } - // CHECK-NOT: func @callee - func @callee(%arg0: tensor) -> tensor<*xf32> attributes {sym_visibility = "private", tf.signature.is_stateful} { + // CHECK-NOT: func private @callee + func private @callee(%arg0: tensor) -> tensor<*xf32> attributes {tf.signature.is_stateful} { %0 = "tf.ReadVariableOp"(%arg0) {device = ""} : (tensor) -> tensor<*xf32> return %0 : tensor<*xf32> } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir index 6cda668ab0f..5560d369db6 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir @@ -644,7 +644,7 @@ func @callee(%arg0: tensor, %arg1: tensor<*x!tf.resource>>, %ar %2 = "tf.AddV2"(%1, %arg2) : (tensor, tensor) -> tensor return %2 : tensor } -// CHECK: func @callee_resource_lifted(%[[A0:.*]]: tensor, %[[A1:.*]]: tensor, %[[A2:.*]]: tensor) -> tensor +// CHECK: func private @callee_resource_lifted(%[[A0:.*]]: tensor, %[[A1:.*]]: tensor, %[[A2:.*]]: tensor) -> tensor // CHECK-NEXT: %[[ADD0:.*]] = "tf.AddV2"(%[[A1]], %[[A0]]) // CHECK-NEXT: %[[ADD1:.*]] = "tf.AddV2"(%[[ADD0]], %[[A2]]) // CHECK-NEXT: return %[[ADD1]] @@ -691,7 +691,7 @@ func @callee(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf.res "tf.AssignVariableOp"(%arg0, %1) {dtype = i32} : (tensor<*x!tf.resource>>, tensor) -> () return %arg0 : tensor<*x!tf.resource>> } -// CHECK: func @callee_resource_lifted(%[[A0:.*]]: tensor, %[[A1:.*]]: tensor, %[[A2:.*]]: tensor) -> tensor +// CHECK: func private @callee_resource_lifted(%[[A0:.*]]: tensor, %[[A1:.*]]: tensor, %[[A2:.*]]: tensor) -> tensor // CHECK-NEXT: %[[ADD:.*]] = "tf.AddV2"(%[[A1]], %[[A2]]) // CHECK-NEXT: return %[[ADD]] @@ -743,7 +743,7 @@ func @callee(%arg0: tensor<*x!tf.resource>>) -> tensor { return %1 : tensor } -// CHECK: func @callee_resource_lifted(%[[A0:.*]]: tensor) -> tensor +// CHECK: func private @callee_resource_lifted(%[[A0:.*]]: tensor) -> tensor // CHECK-NEXT: return %[[A0]] // ----- diff --git a/tensorflow/compiler/mlir/tensorflow/tests/stack_ops_decomposition.mlir b/tensorflow/compiler/mlir/tensorflow/tests/stack_ops_decomposition.mlir index 17329050f3e..2cc79637fc5 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/stack_ops_decomposition.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/stack_ops_decomposition.mlir @@ -287,14 +287,14 @@ func @main(%arg0: tensor) -> () { } // CHECK: func @callee(%[[AARG0:.*]]: tensor, %[[AARG1:.*]]: tensor) -> tensor -func @callee(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "public"} { +func @callee(%arg0: tensor, %arg1: tensor) -> tensor { %elem = "tf._SomeOp"(%arg1) : (tensor) -> tensor // CHECK: tf.StackPushV2" %push = "tf.StackPushV2"(%arg0, %elem) {swap_memory = false} : (tensor, tensor) -> tensor return %arg0 : tensor } -// CHECK: func @callee_stack_decomposed(%[[ARG0:.*]]: tensor>>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor>>) +// CHECK: func private @callee_stack_decomposed(%[[ARG0:.*]]: tensor>>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor>>) // CHECK-NOT: "tf.StackPushV2" // CHECK: %[[UPDATE:.*]] = "tf.XlaDynamicUpdateSlice" // CHECK: "tf.AssignVariableOp"(%[[TARG0:.*]], %[[UPDATE]]) @@ -326,8 +326,8 @@ func @main(%arg0: tensor) -> () { return } -// CHECK: func @callee(%[[ARG0:.*]]: tensor>>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor>>) -func @callee(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "private"} { +// CHECK: func private @callee(%[[ARG0:.*]]: tensor>>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor>>) +func private @callee(%arg0: tensor, %arg1: tensor) -> tensor { %elem = "tf._SomeOp"(%arg1) : (tensor) -> tensor // CHECK-NOT: "tf.StackPushV2" // CHECK: %[[UPDATE:.*]] = "tf.XlaDynamicUpdateSlice" @@ -348,7 +348,7 @@ func @main() -> () { return } // CHECK: func @callee() -func @callee() -> () attributes {sym_visibility = "public"} { +func @callee() -> () { %max_size = "tf.Const"() {value = dense<10> : tensor} : () -> tensor // CHECK-NOT: tf.Stack %stack = "tf.StackV2"(%max_size) {elem_type = f32, stack_name = "s"} : (tensor) -> tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tensor_array_ops_decomposition.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tensor_array_ops_decomposition.mlir index 8200cedaea9..4d60e46e206 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tensor_array_ops_decomposition.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tensor_array_ops_decomposition.mlir @@ -432,7 +432,7 @@ func @main() -> () { } // CHECK-LABEL: func @callee // CHECK-SAME: (%[[OCARG0:.*]]: tensor) -> tensor -func @callee(%arg0: tensor) -> tensor attributes {sym_visibility = "public"} { +func @callee(%arg0: tensor) -> tensor { %const1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %elem = "tf._SomeOp"() : () -> tensor<3xf32> %flow = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor @@ -442,7 +442,7 @@ func @callee(%arg0: tensor) -> tensor attributes {sy %gwrite2 = "tf.TensorArrayWriteV3"(%grad2#0, %const1, %elem, %grad2#1) : (tensor, tensor, tensor<3xf32>, tensor) -> tensor return %arg0 : tensor } -// CHECK: func @callee_tensorarray_decomposed(%[[CARG0:.*]]: tensor>>, %[[CARG1:.*]]: tensor>>, %[[CARG2:.*]]: tensor>>) +// CHECK: func private @callee_tensorarray_decomposed(%[[CARG0:.*]]: tensor>>, %[[CARG1:.*]]: tensor>>, %[[CARG2:.*]]: tensor>>) // CHECK: %[[READ1:.*]] = "tf.ReadVariableOp"(%[[CARG1]]) : (tensor>>) -> tensor<5x3xf32> // CHECK: %[[UPDATE1:.*]] = "tf.XlaDynamicUpdateSlice"(%[[READ1]], // CHECK: "tf.AssignVariableOp"(%[[CARG1]], %[[UPDATE1]]) @@ -480,8 +480,8 @@ func @main() -> () { %read = "tf.TensorArrayReadV3"(%call2, %index, %ta#1) : (tensor, tensor, tensor) -> tensor<3xf32> return } -// CHECK: func @callee(%[[CARG0:.*]]: tensor>>, %[[CARG1:.*]]: tensor>>, %[[CARG2:.*]]: tensor>>) -func @callee(%arg0: tensor) -> tensor attributes {sym_visibility = "private"} { +// CHECK: func private @callee(%[[CARG0:.*]]: tensor>>, %[[CARG1:.*]]: tensor>>, %[[CARG2:.*]]: tensor>>) +func private @callee(%arg0: tensor) -> tensor { // CHECK: %[[READ1:.*]] = "tf.ReadVariableOp"(%[[CARG1]]) : (tensor>>) -> tensor<5x3xf32> // CHECK: %[[UPDATE1:.*]] = "tf.XlaDynamicUpdateSlice"(%[[READ1]], // CHECK: "tf.AssignVariableOp"(%[[CARG1]], %[[UPDATE1]]) @@ -508,8 +508,8 @@ func @main() -> () { %call = "tf.PartitionedCall"() {f = @callee, config = "", config_proto = "", executor_type = ""} : () -> tensor return } -// CHECK: func @callee() -> tensor -func @callee() -> tensor attributes {sym_visibility = "public"} { +// CHECK: func private @callee() -> tensor +func @callee() -> tensor { %size = "tf.Const"() {value = dense<5> : tensor} : () -> tensor // CHECK: "tf.MlirLocalVarOp"() : () -> tensor>> // CHECK: "tf.AssignVariableOp" @@ -567,7 +567,7 @@ func @main() -> () { return } -// CHECK-LABEL: func @callee +// CHECK-LABEL: func private @callee // CHECK-SAME: %[[VAR:.*]]: tensor>>, %[[GVAR:.*]]: tensor>> func @callee(%arg0: tensor) -> tensor attributes {sym_visibility = "private"} { %index = "tf.Const"() {value = dense<1> : tensor} : () -> tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tensor_list_ops_decomposition.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tensor_list_ops_decomposition.mlir index 09a2dcb6713..fa3615680f2 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tensor_list_ops_decomposition.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tensor_list_ops_decomposition.mlir @@ -472,14 +472,14 @@ func @main(%arg0: tensor) -> () { } // CHECK: func @callee(%[[AARG0:.*]]: tensor>>, %[[AARG1:.*]]: tensor) -> tensor>> -func @callee(%arg0: tensor>>, %arg1: tensor) -> tensor>> attributes {sym_visibility = "public"} { +func @callee(%arg0: tensor>>, %arg1: tensor) -> tensor>> { %elem = "tf._SomeOp"(%arg1) : (tensor) -> tensor // CHECK: "tf.TensorListPushBack" %push = "tf.TensorListPushBack"(%arg0, %elem) : (tensor>>, tensor) -> tensor>> return %push : tensor>> } -// CHECK: func @callee_tensorlist_decomposed(%[[ARG0:.*]]: tensor<10xf32>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor<1xi32>) -> (tensor<10xf32>, tensor<1xi32>) +// CHECK: func private @callee_tensorlist_decomposed(%[[ARG0:.*]]: tensor<10xf32>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor<1xi32>) -> (tensor<10xf32>, tensor<1xi32>) // CHECK-NOT: "tf.TensorListPushBack" // CHECK: %[[UPDATE:.*]] = "tf.XlaDynamicUpdateSlice" // CHECK: %[[CONST1:.*]] = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> @@ -514,7 +514,7 @@ func @main(%arg0: tensor) -> () { return } -// CHECK: func @callee(%[[ARG0:.*]]: tensor<10xf32>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor<1xi32>) -> (tensor<10xf32>, tensor<1xi32>) +// CHECK: func private @callee(%[[ARG0:.*]]: tensor<10xf32>, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor<1xi32>) -> (tensor<10xf32>, tensor<1xi32>) func @callee(%arg0: tensor>>, %arg1: tensor) -> tensor>> attributes {sym_visibility = "private"} { %elem = "tf._SomeOp"(%arg1) : (tensor) -> tensor @@ -533,12 +533,12 @@ func @callee(%arg0: tensor>>, %arg1: tensor) -> tens // Tests PartitionedCall op with no signature change on callee. // CHECK-LABEL: func @main -func @main() -> () { +func @main() { "tf.PartitionedCall"() {f = @callee, config = "", config_proto = "", executor_type = ""} : () -> () return } -// CHECK: func @callee() -func @callee() -> () attributes {sym_visibility = "public"} { +// CHECK: func private @callee() +func @callee() { %elem_shape = "tf.Const"() {value = dense<> : tensor<0xi32>} : () -> tensor<0xi32> %max_size = "tf.Const"() {value = dense<10> : tensor} : () -> tensor // CHECK-NOT: tf.EmptyTensorList diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/call_to_exported.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/call_to_exported.py index 694942f4b00..cb8e32df249 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/call_to_exported.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/call_to_exported.py @@ -62,7 +62,7 @@ class TestModule(tf.Module): # CHECK-SAME: attributes{{.*}}tf_saved_model.exported_names = ["caller"] # CHECK: "tf.StatefulPartitionedCall"{{.*}}f = @[[CALLEE_INTERNAL]] # - # CHECK: func @[[CALLEE_INTERNAL]] + # CHECK: func private @[[CALLEE_INTERNAL]] # CHECK-NOT: tf_saved_model.exported_names @tf.function(input_signature=[tf.TensorSpec([], tf.float32)]) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_duplicate_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_duplicate_v1.py index 78fde0dca01..ab786ac8300 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_duplicate_v1.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_duplicate_v1.py @@ -35,8 +35,8 @@ from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1 # CHECK-SAME: else_branch = @[[else]] # CHECK-SAME: then_branch = @[[then]] -# CHECK: func @[[else]]( -# CHECK: func @[[then]]( +# CHECK: func private @[[else]]( +# CHECK: func private @[[then]]( def Test(): diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_optimize_global_tensors_interprocedural.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_optimize_global_tensors_interprocedural.mlir index 14a0006cd3b..636bd608f1c 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_optimize_global_tensors_interprocedural.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_optimize_global_tensors_interprocedural.mlir @@ -111,14 +111,14 @@ module attributes {tf_saved_model.semantics} { return %val : tensor } - // CHECK: func @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor - func @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor attributes {sym_visibility = "private"} { + // CHECK: func private @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor + func private @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor { %val = "tf.PartitionedCall"(%arg0) {config = "", config_proto = "", executor_type = "", f = @f_callee_callee} : (tensor<*x!tf.resource>) -> (tensor) return %val : tensor } - // CHECK: func @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor - func @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor attributes {sym_visibility = "private"} { + // CHECK: func private @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor + func private @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor { %c0 = "tf.Const"() { value = dense<1.0> : tensor } : () -> tensor "tf.AssignVariableOp"(%arg0, %c0) : (tensor<*x!tf.resource>, tensor) -> () return %c0 : tensor @@ -145,14 +145,14 @@ module attributes {tf_saved_model.semantics} { return %val : tensor } - // CHECK: func @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor - func @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor attributes {sym_visibility = "private"} { + // CHECK: func private @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor + func private @f_callee(%arg0: tensor<*x!tf.resource>) -> tensor { %val = "tf.PartitionedCall"(%arg0) {config = "", config_proto = "", executor_type = "", f = @f_callee_callee} : (tensor<*x!tf.resource>) -> (tensor) return %val : tensor } - // CHECK: func @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor - func @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor attributes {sym_visibility = "private"} { + // CHECK: func private @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor + func private @f_callee_callee(%arg0: tensor<*x!tf.resource>) -> tensor { %c0 = "tf.Const"() { value = dense<1.0> : tensor } : () -> tensor "tf.AssignVariableOp"(%arg0, %c0) : (tensor<*x!tf.resource>, tensor) -> () return %c0 : tensor @@ -178,14 +178,14 @@ module attributes {tf_saved_model.semantics} { } - // CHECK: func @f(%arg0: tensor<*x!tf.resource>) -> tensor - func @f(%arg0: tensor<*x!tf.resource>) -> tensor attributes {sym_visibility = "private"} { + // CHECK: func private @f(%arg0: tensor<*x!tf.resource>) -> tensor + func private @f(%arg0: tensor<*x!tf.resource>) -> tensor { %val = "tf.PartitionedCall"(%arg0) {config = "", config_proto = "", executor_type = "", f = @g} : (tensor<*x!tf.resource>) -> (tensor) return %val : tensor } - // CHECK: func @g(%arg0: tensor<*x!tf.resource>) -> tensor - func @g(%arg0: tensor<*x!tf.resource>) -> tensor attributes {sym_visibility = "private"} { + // CHECK: func private @g(%arg0: tensor<*x!tf.resource>) -> tensor + func private @g(%arg0: tensor<*x!tf.resource>) -> tensor { %val = "tf.PartitionedCall"(%arg0) {config = "", config_proto = "", executor_type = "", f = @f} : (tensor<*x!tf.resource>) -> (tensor) return %val : tensor } @@ -211,8 +211,8 @@ module attributes {tf_saved_model.semantics} { } - // CHECK: func @f(%arg0: tensor<*x!tf.resource>) -> tensor - func @f(%arg0: tensor<*x!tf.resource>) -> tensor attributes {sym_visibility = "private"} { + // CHECK: func private @f(%arg0: tensor<*x!tf.resource>) -> tensor + func private @f(%arg0: tensor<*x!tf.resource>) -> tensor { %c0 = "tf.Const"() { value = dense<1.0> : tensor } : () -> tensor "tf.AssignAddVariableOp"(%arg0, %c0) : (tensor<*x!tf.resource>, tensor) -> () return %c0 : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir index ef7b52cd978..cbeb2b389d8 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir @@ -859,7 +859,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-SAME: mlir_module // CHECK-SAME: func @main // CHECK-SAME: tf.B - // CHECK-SAME: func @nested_func + // CHECK-SAME: func private @nested_func // CHECK-SAME: tf.D // CHECK-NOT: func = @tpu0_func // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0" @@ -908,7 +908,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-SAME: mlir_module // CHECK-SAME: func @main // CHECK-SAME: tf.B - // CHECK-SAME: func @referenced_func + // CHECK-SAME: func private @referenced_func // CHECK-SAME: tf.D // CHECK-NOT: func = @tpu0_func // CHECK: "tf_device.launch" @@ -1007,7 +1007,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-SAME: func @main // CHECK-SAME: tf.B // CHECK-COUNT-2: call @referenced_func - // CHECK-COUNT-1: func @referenced_func + // CHECK-COUNT-1: func private @referenced_func // CHECK-SAME: tf.D // CHECK-NOT: func = @tpu0_func // CHECK: "tf_device.launch" @@ -1161,13 +1161,13 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-SAME: mlir_module // CHECK-SAME: func @main // CHECK-SAME: tf.B - // CHECK-SAME: func @referenced_func3 + // CHECK-SAME: func private @referenced_func3 // CHECK-SAME: tf.I - // CHECK-SAME: func @referenced_func2 + // CHECK-SAME: func private @referenced_func2 // CHECK-SAME: tf.H - // CHECK-SAME: func @referenced_func1 + // CHECK-SAME: func private @referenced_func1 // CHECK-SAME: tf.G - // CHECK-SAME: func @referenced_func0 + // CHECK-SAME: func private @referenced_func0 // CHECK-SAME: tf.F // CHECK: "tf_device.launch" // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUTPUT]]#0) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir index ceecb3e72d9..3252cbca305 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir @@ -44,9 +44,9 @@ module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:CPU:0" %10 = "tf.Identity"(%9) {device = ""} : (tensor) -> tensor return %10 : tensor } - // CHECK-LABEL: func @_func - // CHECK-SAME: [[FUNCINPUT0:.*]]: tensor<2x112x112x12xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, [[FUNCINPUT1:%.*]]: tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, [[FUNCINPUT2:%.*]]: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, [[VAL_59:%.*]]: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) attributes {sym_visibility = "private"} { - func @_func(%arg0: tensor<2x224x224x3xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg1: tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg2: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg3: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) attributes {sym_visibility = "private"} { + // CHECK-LABEL: func private @_func + // CHECK-SAME: [[FUNCINPUT0:.*]]: tensor<2x112x112x12xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, [[FUNCINPUT1:%.*]]: tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, [[FUNCINPUT2:%.*]]: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, [[VAL_59:%.*]]: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) { + func private @_func(%arg0: tensor<2x224x224x3xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg1: tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg2: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg3: tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor<7x7x3x64xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) { %0 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<0> : tensor<1x1xi32>} : () -> tensor<1x1xi32> %2 = "tf.Const"() {value = dense<[7, 7, 3, 64]> : tensor<4xi32>} : () -> tensor<4xi32> @@ -112,9 +112,9 @@ module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:COMPOSI } return } - // CHECK-LABEL: func @_func - // CHECK-SAME: [[FUNCINPUT00:.*]]: tensor<2x112x112x12xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg1: tensor<2x1xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg2: tensor<7x7x3x64xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg3: tensor<64x1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg4: tensor<1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg5: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg6: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg7: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg8: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) attributes {sym_visibility = "private"} { - func @_func(%arg0: tensor<2x224x224x3xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg1: tensor<2x1xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg2: tensor<7x7x3x64xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg3: tensor<64x1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg4: tensor<1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg5: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg6: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg7: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg8: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) attributes {sym_visibility = "private"} { + // CHECK-LABEL: func private @_func + // CHECK-SAME: [[FUNCINPUT00:.*]]: tensor<2x112x112x12xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg1: tensor<2x1xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg2: tensor<7x7x3x64xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg3: tensor<64x1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg4: tensor<1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg5: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg6: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg7: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg8: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) { + func private @_func(%arg0: tensor<2x224x224x3xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg1: tensor<2x1xf32> {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg2: tensor<7x7x3x64xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg3: tensor<64x1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg4: tensor<1001xf32> {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg5: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg6: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg7: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, %arg8: tensor {mhlo.is_same_data_across_replicas, mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) -> (tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}, tensor {mhlo.sharding = "\08\01\1A\01\01\22\01\00"}) { %0 = "tf.Const"() {value = dense<2.000000e+00> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<1.000000e+00> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index 2c04ace99dc..12c3f46d72f 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -118,6 +118,7 @@ cc_library( "@llvm-project//mlir:SCFToStandard", "@llvm-project//mlir:ShapeTransforms", "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:StandardOpsTransforms", "@llvm-project//mlir:AllPassesAndDialects", "@llvm-project//mlir:Support", "@llvm-project//mlir:Transforms", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc index ccb040599c9..c1b95bf7f2e 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc @@ -19,6 +19,7 @@ limitations under the License. #include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project #include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/Transforms/Passes.h" // from @llvm-project #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h" @@ -41,20 +42,23 @@ class TFKernelToLLVMPass : public TFKernelToLLVMPassBase { ModuleOp m = getOperation(); // Populate type conversions. - LLVMTypeConverter type_converter(m.getContext()); + MLIRContext* ctx = m.getContext(); + LLVMTypeConverter type_converter(ctx); type_converter.addConversion([&](tf_framework::OpKernelContextType type) { - return LLVM::LLVMType::getInt8PtrTy(m.getContext()); + return LLVM::LLVMType::getInt8PtrTy(ctx); }); // Populate patterns. OwningRewritePatternList patterns; + + populateStdExpandOpsPatterns(ctx, patterns); populateStdToLLVMConversionPatterns(type_converter, patterns); tf_framework::PopulateTFFrameworkToLLVMConversionPatterns(&type_converter, &patterns); populateGpuToLLVMConversionPatterns(type_converter, patterns, "gpu.binary"); // Set target. - ConversionTarget target(getContext()); + ConversionTarget target(*ctx); target.addLegalDialect(); target.addIllegalDialect(); diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir index 876a1bf03e7..5fbad1d0e8b 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir @@ -266,8 +266,8 @@ func @main(%arg0: tensor) -> tensor { return %0 : tensor } -// CHECK: func @callee([[CALLEE_ARG0:%.*]]: tensor, [[CALLEE_ARG1:%.*]]: !mhlo.token) -> (tensor, !mhlo.token) -func @callee(%arg0: tensor) -> tensor attributes {sym_visibility = "private"} { +// CHECK: func private @callee([[CALLEE_ARG0:%.*]]: tensor, [[CALLEE_ARG1:%.*]]: !mhlo.token) -> (tensor, !mhlo.token) +func private @callee(%arg0: tensor) -> tensor { // CHECK-NOT: "mhlo.create_token" // CHECK: [[SEND_ARG0_TOKEN:%.*]] = "mhlo.send"([[CALLEE_ARG0]], [[CALLEE_ARG1]]) @@ -319,7 +319,7 @@ func @callee(%arg0: tensor) -> tensor { return %0 : tensor } -// CHECK: func [[CALLEE_CLONE]]([[CALLEE_CLONE_ARG0:%.*]]: tensor, [[CALLEE_CLONE_ARG1:%.*]]: !mhlo.token) -> (tensor, !mhlo.token) +// CHECK: func private [[CALLEE_CLONE]]([[CALLEE_CLONE_ARG0:%.*]]: tensor, [[CALLEE_CLONE_ARG1:%.*]]: !mhlo.token) -> (tensor, !mhlo.token) // CHECK-NOT: "mhlo.create_token" // CHECK: [[CLONE_SEND_ARG0_TOKEN:%.*]] = "mhlo.send"([[CALLEE_CLONE_ARG0]], [[CALLEE_CLONE_ARG1]]) @@ -352,8 +352,8 @@ func @main(%arg0: tensor) { return } -// CHECK: func @callee([[CALLEE_ARG0:%.*]]: !mhlo.token) -> !mhlo.token -func @callee() attributes {sym_visibility = "private"} { +// CHECK: func private @callee([[CALLEE_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func private @callee() { // CHECK-NOT: "mhlo.create_token" // CHECK: [[ZERO:%.*]] = mhlo.constant dense<0> @@ -370,8 +370,8 @@ func @callee() attributes {sym_visibility = "private"} { // Test only the top level function generates a token. -// CHECK: func @callee0() -func @callee0() attributes {sym_visibility = "private"} { +// CHECK: func private @callee0() +func private @callee0() { // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" // CHECK: call @callee1([[INIT_TOKEN]]) @@ -379,8 +379,8 @@ func @callee0() attributes {sym_visibility = "private"} { return } -// CHECK: func @callee1([[CALLEE1_ARG0:%.*]]: !mhlo.token) -> !mhlo.token -func @callee1() attributes {sym_visibility = "private"} { +// CHECK: func private @callee1([[CALLEE1_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func private @callee1() { // CHECK-NOT: "mhlo.create_token" // CHECK: [[CALL_2:%.*]] = call @callee2([[CALLEE1_ARG0]]) @@ -390,8 +390,8 @@ func @callee1() attributes {sym_visibility = "private"} { return } -// CHECK: func @callee2([[CALLEE2_ARG0:%.*]]: !mhlo.token) -> !mhlo.token -func @callee2() attributes {sym_visibility = "private"} { +// CHECK: func private @callee2([[CALLEE2_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func private @callee2() { // CHECK-NOT: "mhlo.create_token" // CHECK: [[RECV_TUPLE:%.*]] = "mhlo.recv"([[CALLEE2_ARG0]]) @@ -430,8 +430,8 @@ func @callee4() { return } -// CHECK: func @callee5([[CALLEE5_ARG0:%.*]]: !mhlo.token) -> !mhlo.token -func @callee5() attributes {sym_visibility = "private"} { +// CHECK: func private @callee5([[CALLEE5_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func private @callee5() { // CHECK-NOT: "mhlo.create_token" // CHECK: [[RECV_TUPLE:%.*]] = "mhlo.recv"([[CALLEE5_ARG0]]) @@ -445,7 +445,7 @@ func @callee5() attributes {sym_visibility = "private"} { return } -// CHECK: func @callee4{{.+}}([[CALLEE4_ARG0:%.*]]: !mhlo.token) -> !mhlo.token attributes {sym_visibility = "private"} +// CHECK: func private @callee4{{.+}}([[CALLEE4_ARG0:%.*]]: !mhlo.token) -> !mhlo.token // CHECK-NOT: "mhlo.create_token" // CHECK: [[CALL_5:%.*]] = call @callee5([[CALLEE4_ARG0]]) // CHECK: return [[CALL_5]] @@ -784,9 +784,9 @@ func @if_function_call(%arg0: tensor, %arg1: tensor) -> tensor { return %0 : tensor } -// CHECK-LABEL: func @callee +// CHECK-LABEL: func private @callee // CHECK-SAME: ([[CALLEE_ARG0:%.*]]: tensor, [[CALLEE_ARG1:%.*]]: !mhlo.token) -> !mhlo.token -func @callee(%arg0: tensor) attributes {sym_visibility = "private"} { +func private @callee(%arg0: tensor) { // CHECK: [[SEND_TOKEN:%.*]] = "mhlo.send" "tf.XlaSendToHost"(%arg0) {key = "send_key"} : (tensor) -> () @@ -1068,7 +1068,7 @@ func @unsupported_ancestor(%arg0: tensor, %arg1: tensor) { return } -func @callee() attributes {sym_visibility = "private"} { +func private @callee() { "tf._XlaHostComputeMlir"() {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 0 : i64} : () -> () return } diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt index 55e845e0d85..a9744f0884e 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt @@ -1,4 +1,4 @@ -// RUN: tf-mlir-translate -hlo-text-to-mlir-hlo %s -o - | FILECHECK_OPTS="" FileCheck %s -DPRIVATE="attributes {sym_visibility = \"private\"}" +// RUN: tf-mlir-translate -hlo-text-to-mlir-hlo %s -o - | FILECHECK_OPTS="" FileCheck %s HloModule main @@ -7,8 +7,7 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] { ROOT %Arg_0.1 = f32[] parameter(0) } -// CHECK-LABEL: func @test_simple -// CHECK-SAME: [[PRIVATE]] +// CHECK-LABEL: func private @test_simple %test_simple (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[] { %Arg_0.1 = f32[4]{0} parameter(0) %Arg_1.2 = f32[4]{0} parameter(1) @@ -21,8 +20,8 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] { ROOT %dot.4 = f32[] dot(f32[4]{0} %add.42, f32[4]{0} %Arg_1.2), lhs_contracting_dims={0}, rhs_contracting_dims={0} } -// CHECK-LABEL: func @test_after_all -// CHECK-SAME: ([[VAL_0:%.*]]: !mhlo.token, [[VAL_1:%.*]]: !mhlo.token) -> !mhlo.token [[PRIVATE]] +// CHECK-LABEL: func private @test_after_all +// CHECK-SAME: ([[VAL_0:%.*]]: !mhlo.token, [[VAL_1:%.*]]: !mhlo.token) -> !mhlo.token %test_after_all (token0: token[], token1: token[] ) -> token[] { token0 = token[] parameter(0) token1 = token[] parameter(1) @@ -37,7 +36,7 @@ add { ROOT add = f32[] add(lhs, rhs) } -// CHECK-LABEL: func @test_all_reduce +// CHECK-LABEL: func private @test_all_reduce // CHECK-SAME: ([[INPUT:%.*]]: tensor<8xf32>) %test_all_reduce { input = f32[8] parameter(0) @@ -52,7 +51,7 @@ add { } -// CHECK-LABEL: func @test_and +// CHECK-LABEL: func private @test_and %test_and (Arg_0.1: pred[4], Arg_1.2: pred[4]) -> pred[4] { %Arg_0.1 = pred[4] parameter(0) %Arg_1.2 = pred[4] parameter(1) @@ -61,7 +60,7 @@ add { ROOT %and.3 = pred[4] and(pred[4] %Arg_0.1, pred[4] %Arg_1.2) } -// CHECK-LABEL: func @test_atan2 +// CHECK-LABEL: func private @test_atan2 // CHECK-SAME: ([[VAL_0:%.*]]: tensor<4xi32>, [[VAL_1:%.*]]: tensor<4xi32>) -> tensor<4xi32> %test_atan2 (Arg_0.1: s32[4], Arg_1.2: s32[4]) -> s32[4] { %Arg_0.1 = s32[4] parameter(0) @@ -71,7 +70,7 @@ add { ROOT %atan2 = s32[4] atan2(s32[4] %Arg_0.1, s32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_broadcast_in_dim +// CHECK-LABEL: func private @test_broadcast_in_dim %test_broadcast_in_dim { %Arg_0.1 = f32[1, 2] parameter(0) @@ -82,7 +81,7 @@ add { ROOT broadcast.4 = f32[3,1,2] broadcast(%Arg_0.1), dimensions={1, 2} } -// CHECK-LABEL: func @test_batch_norm_grad +// CHECK-LABEL: func private @test_batch_norm_grad %test_batch_norm_grad (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], variance: f32[2], grad_output: f32[2,2,2,2]) -> (f32[2,2,2,2], f32[2], f32[2]) { %input = f32[2,2,2,2] parameter(0) %scale = f32[2] parameter(1) @@ -96,20 +95,20 @@ add { ROOT %batch-norm-grad = (f32[2,2,2,2], f32[2], f32[2]) batch-norm-grad(f32[2,2,2,2] %input, f32[2] %scale, f32[2] %mean, f32[2] %variance, f32[2,2,2,2] %grad_output), epsilon=0.001, feature_index=1 } -// CHECK-LABEL: func @call(%arg0: tensor) -> tensor +// CHECK-LABEL: func private @call(%arg0: tensor) -> tensor %call (arg_1: s64[]) -> s64[] { %arg_1 = s64[] parameter(0), metadata={op_name="HLO_Args"} ROOT %compare.2 = s64[] add(%arg_1, %arg_1), metadata={op_type="Less" op_name="Less"} } -// CHECK-LABEL: func @test_call +// CHECK-LABEL: func private @test_call %test_call (arg0.1: s64[]) -> s64[] { %arg0.1 = s64[] parameter(0), metadata={op_name="HLO_Args"} // CHECK-NEXT: call @call(%arg0) : (tensor) -> tensor ROOT %call.2 = s64[] call(%arg0.1), to_apply=%call } -// CHECK-LABEL: func @test_cholesky +// CHECK-LABEL: func private @test_cholesky // CHECK-SAME: ([[ARG:%.*]]: tensor<1x291x291xf32>) -> tensor<1x291x291xf32> %test_cholesky (a: f32[1,291,291]) -> f32[1,291,291] { %a = f32[1,291,291] parameter(0) @@ -118,7 +117,7 @@ add { } -// CHECK-LABEL: func @test_clamp( +// CHECK-LABEL: func private @test_clamp( %test_clamp (Arg_0.1: f32[], Arg_1.2: f32[4], Arg_1.3: f32[]) -> f32[4] { %Arg_0.1 = f32[] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -128,7 +127,7 @@ add { ROOT %clamp.3 = f32[4] clamp(f32[] %Arg_0.1, f32[4] %Arg_1.2, f32[] %Arg_2.3) } -// CHECK-LABEL: func @test_collective_permute +// CHECK-LABEL: func private @test_collective_permute // CHECK-SAME: ([[ARG:%.*]]: tensor<128x32xf32>) -> tensor<128x32xf32> %test_collective_permute (input: f32[128,32]) -> f32[128,32] { %input = f32[128,32]{1,0} parameter(0) @@ -137,7 +136,7 @@ add { } -// CHECK-LABEL: func @test_compare(%arg0: tensor<3xf32>, %arg1: tensor<3xf32>, %arg2: tensor<3xf32>) -> tensor<3xi1> +// CHECK-LABEL: func private @test_compare(%arg0: tensor<3xf32>, %arg1: tensor<3xf32>, %arg2: tensor<3xf32>) -> tensor<3xi1> %test_compare (Arg_0.1: f32[3], Arg_1.2: f32[3], Arg_2.3: f32[3]) -> pred[3] { %Arg_0.1 = f32[3] parameter(0) %Arg_1.2 = f32[3] parameter(1) @@ -154,7 +153,7 @@ add { ROOT %compare.6 = pred[3] compare(Arg_0.1, Arg_2.3), direction=GT } -// CHECK-LABEL: func @test_complex +// CHECK-LABEL: func private @test_complex %test_complex (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> c64[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -163,7 +162,7 @@ add { ROOT %complex.3 = c64[4] complex(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_concat(%arg0: tensor<4x1xf32>, %arg1: tensor<4x2xf32>) -> tensor<4x3xf32> +// CHECK-LABEL: func private @test_concat(%arg0: tensor<4x1xf32>, %arg1: tensor<4x2xf32>) -> tensor<4x3xf32> %test_concat (Arg_0.1: f32[4, 1], Arg_1.2: f32[4, 2]) -> f32[4, 3] { %Arg_0.1 = f32[4, 1] parameter(0) %Arg_1.2 = f32[4, 2] parameter(1) @@ -172,7 +171,7 @@ add { ROOT %concatenate.3 = f32[4, 3] concatenate(f32[4, 1] %Arg_0.1, f32[4, 2] %Arg_1.2), dimensions={1} } -// CHECK-LABEL: func @test_constant +// CHECK-LABEL: func private @test_constant %test_constant { // Scalar/0D tensor constant @@ -202,8 +201,8 @@ add { // TODO(b/129422361) Potentially update when copy, reshape, and conv have actual // implementations with attributes, etc. -// CHECK-LABEL: func @test_conv( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<256x32x32x6xf32>) -> tuple> attributes {sym_visibility = "private"} { +// CHECK-LABEL: func private @test_conv( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<256x32x32x6xf32>) -> tuple> { %test_conv { %arg0.1 = f32[256,32,32,6]{3,2,1,0} parameter(0), metadata={op_name="HLO_Args"} @@ -250,7 +249,7 @@ add { } // Test for padding attribute shape in convolution -// CHECK-LABEL: func @test_convolve1D_padding +// CHECK-LABEL: func private @test_convolve1D_padding %test_convolve1D_padding (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,5,1] { %input = f32[1,2,1] parameter(0) %filter = f32[1,1,1] parameter(1) @@ -259,7 +258,7 @@ add { ROOT %convolution = f32[1,5,1] convolution(f32[1,2,1] %input, f32[1,1,1] %filter), feature_group_count=1, dim_labels=b0f_0io->b0f, window={pad=1_2 size=1} } -// CHECK-LABEL: func @test_convert(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf64> +// CHECK-LABEL: func private @test_convert(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf64> %test_convert (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f64[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -274,7 +273,7 @@ add { ROOT %add.5 = f64[4] add(f64[4] %convert.3, f64[4] %convert.4) } -// CHECK-LABEL: func @test_cosine(%arg0: tensor<1x16x16x3xf32>) -> tensor<1x16x16x3xf32> +// CHECK-LABEL: func private @test_cosine(%arg0: tensor<1x16x16x3xf32>) -> tensor<1x16x16x3xf32> %test_cosine (arg0.1: f32[1,16,16,3]) -> f32[1,16,16,3] { %arg0.1 = f32[1,16,16,3]{3,2,1,0} parameter(0), metadata={op_name="HLO_Args"} @@ -282,7 +281,7 @@ add { ROOT %cosine.3 = f32[1,16,16,3]{3,2,1,0} cosine(f32[1,16,16,3]{3,2,1,0} %arg0.1) } -// CHECK-LABEL: func @test_custom_call +// CHECK-LABEL: func private @test_custom_call // CHECK-SAME: [[ARG_0:%.*]]: tensor<2x3xf32>, [[ARG_1:%.*]]: tensor<5x5xf32>) -> tensor<1x2x3xf32> %test_custom_call (arg1: f32[2,3], arg2: f32[5,5]) -> f32[1,2,3] { %arg1 = f32[2,3] parameter(0) @@ -291,7 +290,7 @@ add { ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[2,3] %arg1, f32[5,5] %arg2), custom_call_target="foo", backend_config="bar", custom_call_has_side_effect=true } -// CHECK-LABEL: func @test_div(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> +// CHECK-LABEL: func private @test_div(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> %test_div (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -300,7 +299,7 @@ add { ROOT %divide.3 = f32[4] divide(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_dot(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor +// CHECK-LABEL: func private @test_dot(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor %test_dot (Arg_0.1: f32[1, 4], Arg_1.2: f32[4, 1]) -> f32[] { %Arg_0.1 = f32[1, 4] parameter(0) %Arg_1.2 = f32[4, 1] parameter(1) @@ -340,7 +339,7 @@ add { ROOT %dot.6 = f32[] dot(Arg_0.1, Arg_1.2), lhs_contracting_dims={0}, rhs_contracting_dims={1} } -// CHECK-LABEL: func @test_dynamic_slice +// CHECK-LABEL: func private @test_dynamic_slice // CHECK-SAME: [[OPERAND:%.*]]: tensor<2x2x258xi32>, [[START_IDX_1:%.*]]: tensor, [[START_IDX_2:%.*]]: tensor, [[START_IDX_3:%.*]]: tensor %test_dynamic_slice (operand: s32[2,2,258], start_indices: s32[3]) -> s32[1,1,32] { %operand = s32[2,2,258] parameter(0) @@ -352,7 +351,7 @@ add { ROOT %dynamic-slice = s32[1,1,32] dynamic-slice(s32[2,2,258] %operand, s32[] %start_idx_1, s32[] %start_idx_2, s32[] %start_idx_3), dynamic_slice_sizes={1,1,32} } -// CHECK-LABEL: func @test_dynamic_update_slice_1(%arg0: tensor<4x4xf32>, %arg1: tensor<1x4xf32>, %arg2: tensor, %arg3: tensor) -> tensor<4x4xf32> +// CHECK-LABEL: func private @test_dynamic_update_slice_1(%arg0: tensor<4x4xf32>, %arg1: tensor<1x4xf32>, %arg2: tensor, %arg3: tensor) -> tensor<4x4xf32> %test_dynamic_update_slice_1 (Arg_0.1: f32[4, 4], Arg_1.2: f32[1, 4], Arg_2.3: f32[], Arg_3.4: f32[]) -> f32[4, 4] { %Arg_0.1 = f32[4, 4] parameter(0) %Arg_1.2 = f32[1, 4] parameter(1) @@ -363,7 +362,7 @@ add { ROOT %dynamic-update-slice.5 = f32[4, 4] dynamic-update-slice(%Arg_0.1, %Arg_1.2, %Arg_2.3, %Arg_3.4) } -// CHECK-LABEL: func @test_dynamic_update_slice_2(%arg0: tensor<4xf32>, %arg1: tensor<2xf32>, %arg2: tensor) -> tensor<4xf32> +// CHECK-LABEL: func private @test_dynamic_update_slice_2(%arg0: tensor<4xf32>, %arg1: tensor<2xf32>, %arg2: tensor) -> tensor<4xf32> %test_dynamic_update_slice_2 (Arg_0.1: f32[4], Arg_1.2: f32[2], Arg_2.3: f32[]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[2] parameter(1) @@ -373,7 +372,7 @@ add { ROOT %dynamic-update-slice.5 = f32[4] dynamic-update-slice(%Arg_0.1, %Arg_1.2, %Arg_2.3) } -// CHECK-LABEL: func @test_exponential(%arg0: tensor<16xf32>) -> tensor<16xf32> +// CHECK-LABEL: func private @test_exponential(%arg0: tensor<16xf32>) -> tensor<16xf32> %test_exponential (arg0.1: f32[16]) -> f32[16] { %arg0.1 = f32[16] parameter(0) @@ -381,7 +380,7 @@ add { ROOT %exp.2 = f32[16] exponential(f32[16] %arg0.1) } -// CHECK-LABEL: func @test_expm1(%arg0: tensor<16xf32>) -> tensor<16xf32> +// CHECK-LABEL: func private @test_expm1(%arg0: tensor<16xf32>) -> tensor<16xf32> %test_expm1 (arg0.1: f32[16]) -> f32[16] { %arg0.1 = f32[16] parameter(0) @@ -389,14 +388,14 @@ add { ROOT %expm1.2 = f32[16] exponential-minus-one(f32[16] %arg0.1) } -// CHECK-LABEL: func @test_fft(%arg0: tensor<3x9xf32>) -> tensor<3x5xcomplex> +// CHECK-LABEL: func private @test_fft(%arg0: tensor<3x9xf32>) -> tensor<3x5xcomplex> %test_fft { %arg0.1 = f32[3,9]{1,0} parameter(0), parameter_replication={false}, metadata={op_name="XLA_Args"} // CHECK: "mhlo.fft"(%arg0) {fft_length = dense<9> : tensor<1xi64>, fft_type = "RFFT" ROOT %fft.2 = c64[3,5]{1,0} fft(%arg0.1), fft_type=RFFT, fft_length={9}, metadata={op_type="RFFT" op_name="rfft"} } -// CHECK-LABEL: func @test_floor( +// CHECK-LABEL: func private @test_floor( // CHECK-SAME: [[A0:%.+]]: tensor<16xf32>) -> tensor<16xf32> %test_floor (arg0.1: f32[16]) -> f32[16] { %arg0.1 = f32[16] parameter(0) @@ -405,7 +404,7 @@ add { ROOT %floor.2 = f32[16] floor(f32[16] %arg0.1) } -// CHECK-LABEL: func @test_gather( +// CHECK-LABEL: func private @test_gather( // CHECK-SAME: [[ARG0:%.+]]: tensor<200x100x300xf32>, [[ARG1:%.+]]: tensor<10x2xi32>) -> tensor<10x300xf32> %test_gather (arg.0: f32[200,100,300], arg.1: s32[10,2]) -> f32[10,300] { %arg.0 = f32[200,100,300] parameter(0) @@ -427,7 +426,7 @@ add { slice_sizes={1,1,300} } -// CHECK-LABEL: func @test_get_dimension_size +// CHECK-LABEL: func private @test_get_dimension_size // CHECK-SAME: ([[ARG:%.*]]: tensor<4x2xf32>) %test_get_dimension_size (Arg_0.1: f32[4,2]) -> s32[] { %Arg_0.1 = f32[4,2] parameter(0) @@ -435,7 +434,7 @@ add { ROOT %get-dimension-size.2 = s32[] get-dimension-size(f32[4,2] %Arg_0.1), dimensions={1} } -// CHECK-LABEL: func @test_imag +// CHECK-LABEL: func private @test_imag %test_imag (Arg_0.1: c64[4]) -> f32[4] { %Arg_0.1 = c64[4] parameter(0) @@ -443,7 +442,7 @@ add { ROOT %imag.3 = f32[4] imag(c64[4] %Arg_0.1) } -// CHECK-LABEL: func @test_infeed +// CHECK-LABEL: func private @test_infeed // CHECK-SAME: ([[TOKEN:%.*]]: !mhlo.token) -> tuple, !mhlo.token> %test_infeed (token0: token[]) -> (s32[3], token[]) { %token0 = token[] parameter(0) @@ -453,19 +452,19 @@ add { } -// CHECK-LABEL: func @test_iota_1() -> tensor<4xf32> +// CHECK-LABEL: func private @test_iota_1() -> tensor<4xf32> %test_iota_1 () -> f32[4] { // CHECK-NEXT: "mhlo.iota"() {iota_dimension = 0 : i64} : () -> tensor<4xf32> ROOT %iota.0 = f32[4] iota(), iota_dimension=0 } -// CHECK-LABEL: func @test_iota_2() -> tensor<4x5xf32> +// CHECK-LABEL: func private @test_iota_2() -> tensor<4x5xf32> %test_iota_2 () -> f32[4, 5] { // CHECK-NEXT: "mhlo.iota"() {iota_dimension = 1 : i64} : () -> tensor<4x5xf32> ROOT %iota.0 = f32[4, 5] iota(), iota_dimension=1 } -// CHECK-LABEL: func @test_log(%arg0: tensor<16xf32>) -> tensor<16xf32> +// CHECK-LABEL: func private @test_log(%arg0: tensor<16xf32>) -> tensor<16xf32> %test_log (arg0.1: f32[16]) -> f32[16] { %arg0.1 = f32[16] parameter(0) @@ -473,7 +472,7 @@ add { ROOT %log.2 = f32[16] log(f32[16] %arg0.1) } -// CHECK-LABEL: func @test_log1p(%arg0: tensor<16xf32>) -> tensor<16xf32> +// CHECK-LABEL: func private @test_log1p(%arg0: tensor<16xf32>) -> tensor<16xf32> %test_log1p (arg0.1: f32[16]) -> f32[16] { %arg0.1 = f32[16] parameter(0) @@ -488,7 +487,7 @@ add { ROOT add = f32[] add(lhs, rhs) } -// CHECK-LABEL: func @test_map +// CHECK-LABEL: func private @test_map // CHECK-SAME: [[ARG_0:%.*]]: tensor<4xf32>, [[ARG_1:%.*]]: tensor<4xf32>) -> tensor<4xf32> %test_map { param0 = f32[4]{0} parameter(0) @@ -503,7 +502,7 @@ add { -// CHECK-LABEL: func @test_maximum(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> +// CHECK-LABEL: func private @test_maximum(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> %test_maximum (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -512,7 +511,7 @@ add { ROOT %maximum.3 = f32[4] maximum(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_minimum(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> +// CHECK-LABEL: func private @test_minimum(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> %test_minimum (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -521,7 +520,7 @@ add { ROOT %minimum.3 = f32[4] minimum(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_multiply(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> +// CHECK-LABEL: func private @test_multiply(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> %test_multiply (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -530,7 +529,7 @@ add { ROOT %multiply.3 = f32[4] multiply(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_negate(%arg0: tensor<16xf32>) -> tensor<16xf32> +// CHECK-LABEL: func private @test_negate(%arg0: tensor<16xf32>) -> tensor<16xf32> %test_negate (arg0.1: f32[16]) -> f32[16] { %arg0.1 = f32[16] parameter(0) @@ -538,7 +537,7 @@ add { ROOT %negate.2 = f32[16] negate(f32[16] %arg0.1) } -// CHECK-LABEL: func @test_not(%arg0: tensor<16xi1>) -> tensor<16xi1> +// CHECK-LABEL: func private @test_not(%arg0: tensor<16xi1>) -> tensor<16xi1> %test_not (arg0.1: pred[16]) -> pred[16] { %arg0.1 = pred[16] parameter(0) @@ -546,7 +545,7 @@ add { ROOT %not.2 = pred[16] not(pred[16] %arg0.1) } -// CHECK-LABEL: func @test_or +// CHECK-LABEL: func private @test_or %test_or (Arg_0.1: pred[4], Arg_1.2: pred[4]) -> pred[4] { %Arg_0.1 = pred[4] parameter(0) %Arg_1.2 = pred[4] parameter(1) @@ -555,7 +554,7 @@ add { ROOT %or.3 = pred[4] or(pred[4] %Arg_0.1, pred[4] %Arg_1.2) } -// CHECK-LABEL: func @test_outfeed +// CHECK-LABEL: func private @test_outfeed // CHECK-SAME: ([[DATA:%.*]]: tensor<3xi32>, [[TOKEN:%.*]]: !mhlo.token) -> !mhlo.token %test_outfeed (Arg_0.1: s32[3], Arg_1.2: token[]) -> token[] { %Arg_0.1 = s32[3] parameter(0) @@ -565,7 +564,7 @@ add { ROOT %outfeed.3 = token[] outfeed(s32[3] %Arg_0.1, token[] %Arg_1.2), outfeed_config="foobar" } -// CHECK-LABEL: func @test_pad(%arg0: tensor<4xf32>, %arg1: tensor) -> tensor<4xf32> +// CHECK-LABEL: func private @test_pad(%arg0: tensor<4xf32>, %arg1: tensor) -> tensor<4xf32> %test_pad (Arg_0.1: f32[4], Arg_1.2: f32[]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[] parameter(1) @@ -574,7 +573,7 @@ add { ROOT %pad.3 = f32[4] pad(%Arg_0.1, %Arg_1.2), padding=0_0_0 } -// CHECK-LABEL: func @test_pad_edge(%arg0: tensor<4x4x4xf32>, %arg1: tensor) -> tensor<7x11x15xf32> +// CHECK-LABEL: func private @test_pad_edge(%arg0: tensor<4x4x4xf32>, %arg1: tensor) -> tensor<7x11x15xf32> %test_pad_edge (Arg_0.1: f32[4, 4, 4], Arg_1.2: f32[]) -> f32[7, 11, 15] { %Arg_0.1 = f32[4, 4, 4] parameter(0) %Arg_1.2 = f32[] parameter(1) @@ -583,7 +582,7 @@ add { ROOT %pad.3 = f32[7, 11, 15] pad(%Arg_0.1, %Arg_1.2), padding=1_2x3_4x5_6 } -// CHECK-LABEL: func @test_pad_interior(%arg0: tensor<4xf32>, %arg1: tensor) -> tensor<10xf32> +// CHECK-LABEL: func private @test_pad_interior(%arg0: tensor<4xf32>, %arg1: tensor) -> tensor<10xf32> %test_pad_interior (Arg_0.1: f32[4], Arg_1.2: f32[]) -> f32[10] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[] parameter(1) @@ -592,7 +591,7 @@ add { ROOT %pad.3 = f32[10] pad(%Arg_0.1, %Arg_1.2), padding=0_0_2 } -// CHECK-LABEL: func @test_popcnt(%arg0: tensor<16xi32>) -> tensor<16xi32> +// CHECK-LABEL: func private @test_popcnt(%arg0: tensor<16xi32>) -> tensor<16xi32> %test_popcnt (arg0.1: s32[16]) -> s32[16] { %arg0.1 = s32[16] parameter(0) @@ -600,7 +599,7 @@ add { ROOT %popcnt.2 = s32[16] popcnt(s32[16] %arg0.1) } -// CHECK-LABEL: func @test_pow(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> +// CHECK-LABEL: func private @test_pow(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> %test_pow (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -609,7 +608,7 @@ add { ROOT %power.3 = f32[4] power(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_rng_normal +// CHECK-LABEL: func private @test_rng_normal // CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor) -> tensor<2x3x5xf32> %test_rng_normal (Arg_0.1: f32[], Arg_1.2: f32[]) -> f32[2,3,5] { %Arg_0.1 = f32[] parameter(0) @@ -619,7 +618,7 @@ add { ROOT %rng.4 = f32[2,3,5] rng(f32[] %Arg_0.1, f32[] %Arg_1.2), distribution=rng_normal } -// CHECK-LABEL: func @test_rng_uniform +// CHECK-LABEL: func private @test_rng_uniform // CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor) -> tensor<2x3x5xf32> %test_rng_uniform (Arg_0.1: f32[], Arg_1.2: f32[]) -> f32[2,3,5] { %Arg_0.1 = f32[] parameter(0) @@ -629,7 +628,7 @@ add { ROOT %rng.4 = f32[2,3,5] rng(f32[] %Arg_0.1, f32[] %Arg_1.2), distribution=rng_uniform } -// CHECK-LABEL: func @test_real +// CHECK-LABEL: func private @test_real %test_real (Arg_0.1: c64[4]) -> f32[4] { %Arg_0.1 = c64[4] parameter(0) @@ -660,7 +659,7 @@ add { ROOT %add.3 = f32[] add(f32[] %Arg_0.1, f32[] %Arg_1.2) } -// CHECK-LABEL: func @test_reduce +// CHECK-LABEL: func private @test_reduce // CHECK-SAME: ([[ARG0:%.*]]: tensor<4x4xf32>, [[ARG1:%.*]]: tensor<4xf32>, [[ARG2:%.*]]: tensor) -> tuple, tensor>, tensor> %test_reduce (Arg_0.1: f32[4, 4], Arg_1.2: f32[4], Arg_2.3: f32[]) -> ((f32[], f32[]), f32[]) { %Arg_0.1 = f32[4, 4] parameter(0) @@ -694,7 +693,7 @@ add { ROOT %tuple.6 = ((f32[], f32[]), f32[]) tuple(%reduce.1, %sub.5) } -// CHECK-LABEL: func @test_reduce_window +// CHECK-LABEL: func private @test_reduce_window // CHECK-SAME: ([[ARG0:%.*]]: tensor<2x17x31x7xf32>, [[ARG1:%.*]]: tensor) %test_reduce_window (Arg_0.1: f32[2,17,31,7], Arg_1.2: f32[]) -> f32[2,5,8,7] { %Arg_0.1 = f32[2,17,31,7] parameter(0) @@ -712,7 +711,7 @@ add { ROOT %reduce-window.1 = f32[2,5,8,7] reduce-window(f32[2,17,31,7] %Arg_0.1, f32[] %Arg_1.2), window={size=1x2x2x1 stride=1x4x4x1 pad=0_0x2_0x0_2x0_0 rhs_dilate=1x2x2x1}, to_apply=%reduce_helper.3 } -// CHECK-LABEL: func @test_remainder +// CHECK-LABEL: func private @test_remainder // CHECK-SAME: ([[VAL_0:%.*]]: tensor<4xf32>, [[VAL_1:%.*]]: tensor<4xf32>) %test_remainder (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) @@ -721,7 +720,7 @@ add { ROOT %remainder.3 = f32[4] remainder(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_reverse_1d(%arg0: tensor<4xf32>) -> tensor<4xf32> +// CHECK-LABEL: func private @test_reverse_1d(%arg0: tensor<4xf32>) -> tensor<4xf32> %test_reverse_1d (Arg_0.1: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) @@ -729,7 +728,7 @@ add { ROOT reverse.2 = f32[4] reverse(%Arg_0.1), dimensions={0} } -// CHECK-LABEL: func @test_reverse_2d(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32 +// CHECK-LABEL: func private @test_reverse_2d(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32 %test_reverse_2d (Arg_0.1: f32[4, 4]) -> f32[4, 4] { %Arg_0.1 = f32[4, 4] parameter(0) @@ -737,7 +736,7 @@ add { ROOT reverse.2 = f32[4, 4] reverse(%Arg_0.1), dimensions={0, 1} } -// CHECK-LABEL: func @test_rsqrt( +// CHECK-LABEL: func private @test_rsqrt( // CHECK-SAME: [[ARG0:%.+]]: tensor<16xf32>) -> tensor<16xf32> %test_rsqrt (arg0.1: f32[16]) -> f32[16] { %arg0.1 = f32[16] parameter(0) @@ -746,7 +745,7 @@ add { ROOT %rsqrt.2 = f32[16] rsqrt(f32[16] %arg0.1) } -// CHECK-LABEL: func @test_scalar(%arg0: tensor) -> tensor +// CHECK-LABEL: func private @test_scalar(%arg0: tensor) -> tensor %test_scalar (Arg_0.1: f32[]) -> f32[] { // CHECK-NEXT: return %arg0 : tensor ROOT %Arg_0.1 = f32[] parameter(0) @@ -766,7 +765,7 @@ add { ROOT %scatter = f32[200,100,300] scatter(f32[200,100,300] %input_tensor, s64[10,2] %scatter_indices, f32[10,300] %updates), update_window_dims={1}, inserted_window_dims={0,1}, scatter_dims_to_operand_dims={0,1}, index_vector_dim=1, to_apply=%update_computation } -// CHECK-LABEL: func @test_scatter +// CHECK-LABEL: func private @test_scatter // CHECK-SAME: [[ARG_0:%.*]]: tensor<200x100x300xf32>, [[ARG_1:%.*]]: tensor<10x2xi64>, [[ARG_2:%.*]]: tensor<10x300xf32>) -> tensor<200x100x300xf32> // CHECK: "mhlo.scatter"([[ARG_0]], [[ARG_1]], [[ARG_2]]) ( { // CHECK: ^bb0([[LHS:%.*]]: tensor, [[RHS:%.*]]: tensor): @@ -783,7 +782,7 @@ add { // CHECK-SAME: unique_indices = false -// CHECK-LABEL: func @test_select(%arg0: tensor<2x3xi1>, %arg1: tensor<2x3xi32>, %arg2: tensor<2x3xi32>) -> tensor<2x3xi32> +// CHECK-LABEL: func private @test_select(%arg0: tensor<2x3xi1>, %arg1: tensor<2x3xi32>, %arg2: tensor<2x3xi32>) -> tensor<2x3xi32> %test_select { %Arg_0.1 = pred[2,3] parameter(0) %Arg_1.2 = s32[2,3] parameter(1) @@ -806,7 +805,7 @@ add { ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) } -// CHECK-LABEL: func @test_select_and_scatter +// CHECK-LABEL: func private @test_select_and_scatter // CHECK-SAME: [[INPUT:%.*]]: tensor<4x5xf32>, [[SOURCE:%.*]]: tensor<2x2xf32>, [[INIT_VAL:%.*]]: tensor %test_select_and_scatter { %input = f32[4,5] parameter(0) @@ -831,7 +830,7 @@ add { // CHECK: return [[RESULT:%.*]] : tensor<4x5xf32> -// CHECK-LABEL: func @test_set_dimension_size +// CHECK-LABEL: func private @test_set_dimension_size // CHECK-SAME: ([[ARG:%.*]]: tensor<4x4xf32>, [[SIZE:%.*]]: tensor) %test_set_dimension_size (Arg_0.1: f32[4,4], Arg_1.2: s32[]) -> f32[4,<=4] { %Arg_0.1 = f32[4,4] parameter(0) @@ -840,7 +839,7 @@ add { ROOT %set-dimension-size.2 = f32[4,<=4] set-dimension-size(f32[4,4] %Arg_0.1, s32[] %Arg_1.2), dimensions={1} } -// CHECK-LABEL: func @test_sine(%arg0: tensor<1x16x16x3xf32>) -> tensor<1x16x16x3xf32> +// CHECK-LABEL: func private @test_sine(%arg0: tensor<1x16x16x3xf32>) -> tensor<1x16x16x3xf32> %test_sine (arg0.1: f32[1,16,16,3]) -> f32[1,16,16,3] { %arg0.1 = f32[1,16,16,3]{3,2,1,0} parameter(0), metadata={op_name="HLO_Args"} @@ -859,7 +858,7 @@ add { x = f32[1024]{0} parameter(0) ROOT sorted = f32[1024]{0} sort(x), dimensions={0}, is_stable=true, to_apply=compare } -// CHECK-LABEL: func @test_sort +// CHECK-LABEL: func private @test_sort // CHECK-SAME: [[ARG:%.*]]: tensor<1024xf32>) -> tensor<1024xf32> // CHECK: "mhlo.sort"([[ARG]]) ( { // CHECK: ^bb0([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor): @@ -867,7 +866,7 @@ add { // CHECK: "mhlo.return"([[CMP]]) : (tensor) -> () // CHECK: }) {dimension = 0 : i64, is_stable = true} : (tensor<1024xf32>) -> tensor<1024xf32> -// CHECK-LABEL: func @test_subtract +// CHECK-LABEL: func private @test_subtract %test_subtract (Arg_0.1: f32[4], Arg_1.2: f32[4]) -> f32[4] { %Arg_0.1 = f32[4] parameter(0) %Arg_1.2 = f32[4] parameter(1) @@ -876,7 +875,7 @@ add { ROOT %subtract.3 = f32[4] subtract(f32[4] %Arg_0.1, f32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_tanh(%arg0: tensor<1x16x16x3xf32>) -> tensor<1x16x16x3xf32> +// CHECK-LABEL: func private @test_tanh(%arg0: tensor<1x16x16x3xf32>) -> tensor<1x16x16x3xf32> %test_tanh (arg0.1: f32[1,16,16,3]) -> f32[1,16,16,3] { %arg0.1 = f32[1,16,16,3]{3,2,1,0} parameter(0), metadata={op_name="HLO_Args"} @@ -884,7 +883,7 @@ add { ROOT %tanh.3 = f32[1,16,16,3]{3,2,1,0} tanh(f32[1,16,16,3]{3,2,1,0} %arg0.1), metadata={op_type="Tanh" op_name="embedded_inference/tanh_model/Tanh"} } -// CHECK-LABEL: func @test_transpose(%arg0: tensor<1x2x3x4xi32>) -> tensor<2x1x4x3xi32> +// CHECK-LABEL: func private @test_transpose(%arg0: tensor<1x2x3x4xi32>) -> tensor<2x1x4x3xi32> %test_transpose { %Arg_0.1 = s32[1,2,3,4] parameter(0) @@ -892,7 +891,7 @@ add { ROOT %transpose.2 = s32[2,1,4,3] transpose(s32[1,2,3,4] %Arg_0.1), dimensions={1,0,3,2} } -// CHECK-LABEL: func @test_triangular_solve +// CHECK-LABEL: func private @test_triangular_solve // CHECK-SAME: ([[ARG_A:%.*]]: tensor<4x4xf32>, [[ARG_B:%.*]]: tensor<4x3xf32>) -> tensor<4x3xf32> %test_triangular_solve (Arg_0.1: f32[4,4], Arg_1.2: f32[4,3]) -> f32[4,3] { %Arg_0.1 = f32[4,4] parameter(0) @@ -905,7 +904,7 @@ add { ROOT %triangular-solve.3 = f32[4,3] triangular-solve(f32[4,4] %Arg_0.1, f32[4,3] %Arg_1.2), left_side=true, lower=true, transpose_a=NO_TRANSPOSE, unit_diagonal=true } -// CHECK-LABEL: func @test_tuple(%arg0: tensor<1xi32>, %arg1: tensor<1x2xf32>) -> tuple, tensor<1x2xf32>> +// CHECK-LABEL: func private @test_tuple(%arg0: tensor<1xi32>, %arg1: tensor<1x2xf32>) -> tuple, tensor<1x2xf32>> %test_tuple(Arg_0.1: s32[1], Arg_1.2: f32[1, 2]) -> (s32[1], f32[1,2]) { %Arg_0.1 = s32[1] parameter(0) %Arg_1.2 = f32[1, 2] parameter(1) @@ -918,19 +917,19 @@ add { } // Test while op -// CHECK-LABEL: func @cond +// CHECK-LABEL: func private @cond %cond (arg_1: s64[]) -> pred[] { %arg_1 = s64[] parameter(0), metadata={op_name="HLO_Args"} ROOT %compare.2 = pred[] compare(%arg_1, %arg_1), direction=LT, metadata={op_type="Less" op_name="Less"} } -// CHECK-LABEL: func @loop +// CHECK-LABEL: func private @loop %loop (arg_1: s64[]) -> s64[] { %arg_1 = s64[] parameter(0), metadata={op_name="HLO_Args"} ROOT %compare.2 = s64[] add(%arg_1, %arg_1), metadata={op_type="Less" op_name="Less"} } -// CHECK-LABEL: func @test_while(%arg0: tensor) -> tensor +// CHECK-LABEL: func private @test_while(%arg0: tensor) -> tensor %test_while (arg0.1: s64[]) -> s64[] { %arg0.1 = s64[] parameter(0), metadata={op_name="HLO_Args"} // CHECK-NEXT: "mhlo.while"(%arg0) ( { @@ -945,7 +944,7 @@ add { ROOT %while.2 = s64[] while(%arg0.1), body=%loop, condition=%cond } -// CHECK-LABEL: func @test_xor +// CHECK-LABEL: func private @test_xor // CHECK-SAME: ([[VAL_0:%.*]]: tensor<4xi1>, [[VAL_1:%.*]]: tensor<4xi1>) -> tensor<4xi1> %test_xor (Arg_0.1: pred[4], Arg_1.2: pred[4]) -> pred[4] { %Arg_0.1 = pred[4] parameter(0) @@ -955,7 +954,7 @@ add { ROOT %xor.3 = pred[4] xor(pred[4] %Arg_0.1, pred[4] %Arg_1.2) } -// CHECK-LABEL: func @test_shiftleft +// CHECK-LABEL: func private @test_shiftleft // CHECK-SAME: ([[VAL_0:%.*]]: tensor<4xi32>, [[VAL_1:%.*]]: tensor<4xi32>) -> tensor<4xi32> %test_shiftleft (Arg_0.1: s32[4], Arg_1.2: s32[4]) -> s32[4] { %Arg_0.1 = s32[4] parameter(0) @@ -965,7 +964,7 @@ add { ROOT %shiftleft = s32[4] shift-left(s32[4] %Arg_0.1, s32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_shiftright_arithmetic +// CHECK-LABEL: func private @test_shiftright_arithmetic // CHECK-SAME: ([[VAL_0:%.*]]: tensor<4xi32>, [[VAL_1:%.*]]: tensor<4xi32>) -> tensor<4xi32> %test_shiftright_arithmetic (Arg_0.1: s32[4], Arg_1.2: s32[4]) -> s32[4] { %Arg_0.1 = s32[4] parameter(0) @@ -975,7 +974,7 @@ add { ROOT %shiftright.arithmetic = s32[4] shift-right-arithmetic(s32[4] %Arg_0.1, s32[4] %Arg_1.2) } -// CHECK-LABEL: func @test_shiftright_logical +// CHECK-LABEL: func private @test_shiftright_logical // CHECK-SAME: ([[VAL_0:%.*]]: tensor<4xi32>, [[VAL_1:%.*]]: tensor<4xi32>) -> tensor<4xi32> %test_shiftright_logical (Arg_0.1: s32[4], Arg_1.2: s32[4]) -> s32[4] { %Arg_0.1 = s32[4] parameter(0) @@ -985,7 +984,7 @@ add { ROOT %shiftright.logical = s32[4] shift-right-logical(s32[4] %Arg_0.1, s32[4] %Arg_1.2) } -// CHECK-LABEL: func @complex_type +// CHECK-LABEL: func private @complex_type // CHECK-SAME: (%[[ARG0:.*]]: tensor<2xcomplex>, %[[ARG1:.*]]: tensor<2xcomplex>) -> tuple, tensor<2xf64>> %complex_type (Arg_0.1: c64[2], Arg_1.2: c128[2]) -> (f32[2], f64[2]) { %Arg_0.1 = c64[2] parameter(0) @@ -998,7 +997,7 @@ add { ROOT %tuple.5 = (f32[2], f64[2]) tuple(f32[2] %abs.3, f64[2] %abs.4) } -// CHECK-LABEL: func @unsigned_int +// CHECK-LABEL: func private @unsigned_int // CHECK-SAME: (%[[ARG0:.*]]: tensor<4xui16>) %unsigned_int(Arg_0.1: u16[4]) -> u16[4] { %Arg_0.1 = u16[4] parameter(0) @@ -1007,7 +1006,7 @@ add { ROOT %not.2 = u16[4] not(u16[4] %Arg_0.1) } -// CHECK-LABEL: func @rngbitgen +// CHECK-LABEL: func private @rngbitgen // CHECK-SAME: (%[[ARG0:.*]]: tensor<3xui64>) %rngbitgen (Arg_0.1: u64[3]) -> (u64[3], u32[2,2]) { %Arg_0.1 = u64[3] parameter(0) @@ -1015,7 +1014,7 @@ add { ROOT %rng-bit-generator.2 = (u64[3], u32[2,2]) rng-bit-generator(u64[3] %Arg_0.1), algorithm=rng_philox } -// CHECK-LABEL: func @cbrt +// CHECK-LABEL: func private @cbrt // CHECK-SAME: (%[[ARG0:.*]]: tensor<3x4xf32>) %cbrt (Arg_0.1: f32[3,4]) -> f32[3,4] { %Arg_0.1 = f32[3,4] parameter(0) @@ -1023,7 +1022,7 @@ add { ROOT %cbrt = f32[3,4] cbrt(f32[3,4] %Arg_0.1) } -// CHECK-LABEL: func @bitcast +// CHECK-LABEL: func private @bitcast // CHECK-SAME: (%[[ARG0:.*]]: tensor<3x4xf32>) -> tensor<3x4x1xf32> %bitcast (Arg_0.1: f32[3,4]) -> f32[3,4,1] { %Arg_0.1 = f32[3,4] parameter(0) @@ -1031,7 +1030,7 @@ add { ROOT %bitcast = f32[3,4,1] bitcast(f32[3,4] %Arg_0.1) } -// CHECK-LABEL: func @reduce_precision +// CHECK-LABEL: func private @reduce_precision // CHECK-SAME: (%[[ARG0:.*]]: tensor<3x4xf32>) %reduce_precision (Arg_0.1: f32[3,4]) -> f32[3,4] { %Arg_0.1 = f32[3,4] parameter(0) diff --git a/tensorflow/python/compiler/mlir/mlir_test.py b/tensorflow/python/compiler/mlir/mlir_test.py index 9cb0063dc64..1602143ddb7 100644 --- a/tensorflow/python/compiler/mlir/mlir_test.py +++ b/tensorflow/python/compiler/mlir/mlir_test.py @@ -68,7 +68,7 @@ class MLIRConcreteFunctionImportTest(test.TestCase): tensor_spec.TensorSpec(None, dtypes.float32)) mlir_module = mlir.convert_function(concrete_function) self.assertRegex(mlir_module, r'func @.*caller.*\(') - self.assertRegex(mlir_module, r'func @.*callee.*\(') + self.assertRegex(mlir_module, r'func private @.*callee.*\(') def testImportWithControlRet(self): diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 8f1c14d947b..9532efb0016 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -686,8 +686,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "f147f59cd377a6be68e5ca5c343eb11df8e7ee6f" - LLVM_SHA256 = "22cb626398e60d5bcb75ce61f59ae9df56ffedc75c40525214ff890e3e27e3d2" + LLVM_COMMIT = "1cbf8e89b54de939420d53d7a528bec6fbaf0a55" + LLVM_SHA256 = "8ec5f5a1330f69ec7b4a0365109a7b6b543df7ca98c02b1c5e13c7de4e58f662" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index ade4f1887b7..fb43cfd3636 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -3295,6 +3295,7 @@ cc_library( ":StandardOpsTransformsPassIncGen", ":StandardToLLVM", ":StandardToSPIRVTransforms", + ":TosaDialect", ":Transforms", ":TransformsPassIncGen", ":VectorOps", @@ -3337,6 +3338,7 @@ cc_binary( "@llvm-project//mlir/test:TestPass", "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", + "@llvm-project//mlir/test:TestTosaDialect", "@llvm-project//mlir/test:TestTransforms", "@llvm-project//mlir/test:TestTypeDialect", ], @@ -4234,6 +4236,112 @@ cc_library( ], ) +gentbl( + name = "TosaDialectIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + "-gen-op-decls", + "include/mlir/Dialect/Tosa/IR/TosaOps.h.inc", + ), + ( + "-gen-op-defs", + "include/mlir/Dialect/Tosa/IR/TosaOps.cpp.inc", + ), + ( + "-gen-struct-attr-decls", + "include/mlir/Dialect/Tosa/IR/TosaStructs.h.inc", + ), + ( + "-gen-struct-attr-defs", + "include/mlir/Dialect/Tosa/IR/TosaStructs.cpp.inc", + ), + ( + "-gen-dialect-decls", + "include/mlir/Dialect/Tosa/IR/TosaOpsDialect.h.inc", + ), + ( + "-gen-op-doc", + "g3doc/Dialects/Tosa/TosaOps.md", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Tosa/IR/TosaOps.td", + td_srcs = [ + ":OpBaseTdFiles", + "include/mlir/Dialect/Tosa/IR/TosaOpBase.td", + "include/mlir/Dialect/Tosa/IR/TosaInterfaces.td", + "include/mlir/Dialect/Tosa/IR/TosaTypesBase.td", + "include/mlir/Interfaces/SideEffectInterfaces.td", + "include/mlir/Interfaces/LoopLikeInterface.td", + ], +) + +gentbl( + name = "TosaInterfacesIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + "-gen-op-interface-decls", + "include/mlir/Dialect/Tosa/IR/TosaInterfaces.h.inc", + ), + ( + "-gen-op-interface-defs", + "include/mlir/Dialect/Tosa/IR/TosaInterfaces.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Tosa/IR/TosaInterfaces.td", + td_srcs = [ + ":OpBaseTdFiles", + ], +) + +gentbl( + name = "TosaPassIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + "-gen-pass-decls -name TosaOpt", + "include/mlir/Dialect/Tosa/Transforms/Passes.h.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Tosa/Transforms/Passes.td", + td_srcs = [ + ":PassBaseTdFiles", + ], +) + +cc_library( + name = "TosaDialect", + srcs = glob([ + "lib/Dialect/Tosa/IR/*.cpp", + "lib/Dialect/Tosa/IR/*.h", + "lib/Dialect/Tosa/Utils/*.cpp", + "lib/Dialect/Tosa/Transforms/*.cpp", + ]), + hdrs = glob([ + "include/mlir/Dialect/Tosa/IR/*.h", + "include/mlir/Dialect/Tosa/Utils/*.h", + "include/mlir/Dialect/Tosa/Transforms/*.h", + ]), + includes = ["include"], + deps = [ + ":Dialect", + ":IR", + ":LoopLikeInterface", + ":Pass", + ":QuantOps", + ":SideEffectInterfaces", + ":StandardOps", + ":TosaDialectIncGen", + ":TosaInterfacesIncGen", + ":TosaPassIncGen", + ":TransformUtils", + ], +) + # To reference all tablegen files here when checking for updates to them. filegroup( name = "TdFiles", diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index a2fa6e25267..8cedf347a79 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -286,3 +286,17 @@ cc_library( "@llvm-project//mlir:LLVMDialect", ], ) + +cc_library( + name = "TestTosaDialect", + srcs = glob([ + "lib/Dialect/Tosa/*.cpp", + ]), + deps = [ + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:TosaDialect", + "@llvm-project//mlir:Transforms", + ], +) From 07b9eccccfc13e687a1de4ad08cd49c8954236a5 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 11 Nov 2020 11:09:51 -0800 Subject: [PATCH 172/220] Order NCCL all-reduce with ordering token Auto control dep will chain operations with the same resource input. We'll do the same thing for all-gather after some refactoring is done. PiperOrigin-RevId: 341868107 Change-Id: I5570a28c2e1c638980e3509088c0525e957c463b --- tensorflow/python/distribute/BUILD | 2 + .../python/distribute/cross_device_ops.py | 14 ++- .../distribute/cross_device_ops_test.py | 108 +++++++++++++++++- .../python/distribute/cross_device_utils.py | 23 +++- tensorflow/python/distribute/test_util.py | 84 ++++++++++++++ .../python/distribute/test_util_test.py | 33 ++++++ 6 files changed, 255 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index dac29b1c15e..f5b74545ad9 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -106,6 +106,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:nccl_ops", "//tensorflow/python:platform", + "//tensorflow/python:resource_variable_ops", "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", ], @@ -1079,6 +1080,7 @@ cuda_py_test( ":multi_process_runner", ":multi_worker_test_base", ":reduce_util", + ":test_util", ":values", "//tensorflow/python:array_ops", "//tensorflow/python:collective_ops", diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py index c5aca728827..3c424b301a8 100644 --- a/tensorflow/python/distribute/cross_device_ops.py +++ b/tensorflow/python/distribute/cross_device_ops.py @@ -990,6 +990,11 @@ class CollectiveAllReduce(CrossDeviceOps): all workers and then put results on the right destinations. """ + # Whether to only use NCCL for batched all-reduce when NCCL is requested. This + # is because of the lack of mechanism to order NCCL operations + # deterministically. + _limited_nccl = True + def __init__(self, devices, group_size, collective_keys=None): """Initializes the object. @@ -1121,8 +1126,8 @@ class CollectiveAllReduce(CrossDeviceOps): # all-reduce, which is the gradients. # TODO(b/132575814): switch to NCCL for all collectives when communication # is NCCL if and only if we can order collectives deterministically. - # is NCCL. - if (options.implementation == CommunicationImplementation.NCCL and + if (self._limited_nccl and + options.implementation == CommunicationImplementation.NCCL and batch_size == 1): implementation = CommunicationImplementation.AUTO.value @@ -1182,8 +1187,9 @@ class CollectiveAllReduce(CrossDeviceOps): # For now, we use NCCL only when batch_size > 1. # TODO(b/132575814): switch to NCCL for all collectives when implementation # is NCCL. - if options.implementation == CommunicationImplementation.NCCL and len( - per_replica_values) == 1: + if (self._limited_nccl and + options.implementation == CommunicationImplementation.NCCL and + len(per_replica_values) == 1): implementation = CommunicationImplementation.AUTO.value gathered_values = [] diff --git a/tensorflow/python/distribute/cross_device_ops_test.py b/tensorflow/python/distribute/cross_device_ops_test.py index 191394f69af..a5818c37aa5 100644 --- a/tensorflow/python/distribute/cross_device_ops_test.py +++ b/tensorflow/python/distribute/cross_device_ops_test.py @@ -32,9 +32,11 @@ from tensorflow.python.distribute import collective_util from tensorflow.python.distribute import combinations from tensorflow.python.distribute import cross_device_ops as cross_device_ops_lib from tensorflow.python.distribute import cross_device_utils +from tensorflow.python.distribute import device_util from tensorflow.python.distribute import multi_process_runner from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import reduce_util +from tensorflow.python.distribute import test_util from tensorflow.python.distribute import values as value_lib from tensorflow.python.eager import context from tensorflow.python.eager import def_function @@ -70,7 +72,12 @@ def make_per_replica_value(value, devices): """ values = [] for device_idx, device in enumerate(devices): - v = value(device_idx) if callable(value) else value + if callable(value): + v = value(device_idx) + elif isinstance(value, list): + v = value[device_idx] + else: + v = value if isinstance(v, IndexedSlicesValue): with ops.device(device): values.append( @@ -99,6 +106,11 @@ def enable_collective_ops(): task_index=cluster_resolver.task_id, protocol=cluster_resolver.rpc_layer) context.context().enable_collective_ops(server_def) + # Recover default flag values. + cross_device_ops_lib.CollectiveAllReduce._limited_nccl = True + cross_device_utils.CollectiveReplicaLauncher._use_scoped_allocator = True + cross_device_utils.CollectiveReplicaLauncher._use_collective_v2 = False + cross_device_utils.CollectiveReplicaLauncher._use_ordering_token = False class MultiProcessPoolRunner(): @@ -858,9 +870,101 @@ class CollectiveOpsTest(test.TestCase, parameterized.TestCase): get_global_mpr(num_processes).run(replica_fn) + @combinations.generate(combinations.combine(num_processes=1, required_gpus=2)) + def testNcclOrdering(self, num_processes, required_gpus): + + def replica_fn(): + cross_device_ops_lib.CollectiveAllReduce._limited_nccl = False + cross_device_utils.CollectiveReplicaLauncher._use_collective_v2 = True + cross_device_utils.CollectiveReplicaLauncher._use_ordering_token = True + collective, devices, _ = self.make_collective( + num_processes, required_gpus) + options = collective_util.Options( + implementation=CommunicationImplementation.NCCL) + + v_dense = make_per_replica_value([1.0, 1.0], devices) + v_sparse = make_per_replica_value([ + IndexedSlicesValue([[4., 6.], [5., 6.]], [1, 3], [5, 2]), + IndexedSlicesValue([[4., 6.], [5., 6.]], [1, 3], [5, 2]), + ], devices) + + @def_function.function + def nested_dense(): + collective.reduce(reduce_util.ReduceOp.SUM, v_dense, v_dense, options) + + @def_function.function + def nested_sparse(): + collective.reduce(reduce_util.ReduceOp.SUM, v_sparse, v_sparse, options) + + # All collectives, function calls, if clause and while loops should be + # chained by control dependencies, so that the execution order is + # deterministic. + @def_function.function + def f(): + # pylint: disable=pointless-statement + collective.reduce(reduce_util.ReduceOp.SUM, v_sparse, v_sparse, options) + # reducing dense value. + collective.reduce(reduce_util.ReduceOp.SUM, v_dense, v_dense, options) + # reducing sparse value. + collective.reduce(reduce_util.ReduceOp.SUM, v_sparse, v_sparse, options) + # reduce dense value in nested tf.function. + nested_dense() + # reduce sparse value in nested tf.function. + nested_sparse() + # reduce dense value in tf.cond. + if array_ops.identity(1.0) > array_ops.identity(2.0): + collective.reduce(reduce_util.ReduceOp.SUM, v_dense, v_dense, options) + else: + v_dense + # reduce sparse value in tf.cond. + if array_ops.identity(1.0) > array_ops.identity(2.0): + v_sparse + else: + collective.reduce(reduce_util.ReduceOp.SUM, v_sparse, v_sparse, + options) + # reduce dense value in tf.while_loop. + i = array_ops.identity(1) + while i < 3: + collective.reduce(reduce_util.ReduceOp.SUM, v_dense, v_dense, options) + i += 1 + # reduce sparse value in tf.while_loop. + i = array_ops.identity(1) + while i < 3: + collective.reduce(reduce_util.ReduceOp.SUM, v_sparse, v_sparse, + options) + i += 1 + # reducing dense and sparse value again. + collective.reduce(reduce_util.ReduceOp.SUM, v_dense, v_dense, options) + collective.reduce(reduce_util.ReduceOp.SUM, v_sparse, v_sparse, options) + # pylint: enable=pointless-statement + + graph = f.get_concrete_function().graph + should_be_ordered = set([ + "CollectiveReduce", "CollectiveGather", "If", "While", + "StatefulPartitionedCall" + ]) + nodes_by_device = {} + for op in graph.get_operations(): + if op.type in should_be_ordered: + if op.device not in nodes_by_device: + nodes_by_device[op.device] = [] + nodes_by_device[op.device].append(op) + order = test_util.topological_sort_operations(graph.get_operations()) + for device in devices: + device = device_util.canonicalize(device) + # Those function ops don't have device annotations, but they contain + # collectives for both devices so we always include them. + operations = nodes_by_device[device] + nodes_by_device[""] + # Verify that we get all types of nodes we want. + self.assertEqual(set(op.type for op in operations), should_be_ordered) + test_util.assert_sequential_execution(order, operations) + + get_global_mpr(num_processes).run(replica_fn) + if __name__ == "__main__": # Set default inter op thread pool size to one to ensure we don't exhaust the # thread pool with the additional executors to run collectives in eager. os.environ["TF_NUM_INTEROP_THREADS"] = "1" - multi_process_runner.test_main() + # TODO(b/172304955): figure why logical devices doesn't work. + test_util.main(config_logical_devices=False) diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py index 96866fb1ca4..d90c3b73717 100644 --- a/tensorflow/python/distribute/cross_device_utils.py +++ b/tensorflow/python/distribute/cross_device_utils.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import collective_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nccl_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import tf_logging as logging INSTANCE_KEY_START_NUMBER = 100 @@ -258,6 +259,7 @@ class CollectiveReplicaLauncher(object): _use_scoped_allocator = True _use_collective_v2 = False + _use_ordering_token = False def __init__(self, group_key, @@ -272,6 +274,12 @@ class CollectiveReplicaLauncher(object): self._collective_keys = collective_keys self._device = device self._executor = executor + if (self._use_ordering_token and self._use_collective_v2 and + ops.executing_eagerly_outside_functions()): + with ops.init_scope(), ops.device(device): + self._ordering_token = resource_variable_ops.ResourceVariable(0.) + else: + self._ordering_token = None def _executor_scope(self): if context.executing_eagerly() and not self._executor: @@ -281,7 +289,7 @@ class CollectiveReplicaLauncher(object): return ops.NullContextmanager() def _control_input(self, control_input): - if control_input is not None: + if control_input is not None and self._ordering_token is None: return ops.control_dependencies([control_input]) return ops.NullContextmanager() @@ -323,6 +331,11 @@ class CollectiveReplicaLauncher(object): return self._collective_keys.get_instance_key(self._group_key, self._device) + def _get_ordering_token(self, communication_hint): + if self._ordering_token is not None and communication_hint == 'NCCL': + return self._ordering_token.handle + return None + def all_reduce(self, input_tensor, control_input=None, @@ -345,6 +358,7 @@ class CollectiveReplicaLauncher(object): The reduced tensor. """ instance_key = self._next_instance_key() + ordering_token = self._get_ordering_token(communication_hint) with self._executor_scope(), \ ops.device(self._device), \ self._control_input(control_input): @@ -355,7 +369,8 @@ class CollectiveReplicaLauncher(object): self._group_key, instance_key, communication_hint=communication_hint, - timeout=timeout) + timeout=timeout, + ordering_token=ordering_token) else: return collective_ops.all_reduce( input_tensor, @@ -381,6 +396,7 @@ class CollectiveReplicaLauncher(object): The reduced tensor. """ instance_key = self._next_instance_key() + ordering_token = self._get_ordering_token(communication_hint) with self._executor_scope(), ops.device(self._device): if self._should_use_collective_v2(): return collective_ops.all_gather_v2( @@ -389,7 +405,8 @@ class CollectiveReplicaLauncher(object): self._group_key, instance_key, communication_hint=communication_hint, - timeout=timeout) + timeout=timeout, + ordering_token=ordering_token) else: return collective_ops.all_gather( input_tensor, diff --git a/tensorflow/python/distribute/test_util.py b/tensorflow/python/distribute/test_util.py index 2cc278c647e..45085ba6203 100644 --- a/tensorflow/python/distribute/test_util.py +++ b/tensorflow/python/distribute/test_util.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import functools +import itertools from absl import app @@ -106,3 +107,86 @@ def main(enable_v2_behavior=True, config_logical_devices=True): v2_compat.disable_v2_behavior() # TODO(b/131360402): configure default logical devices. multi_process_runner.test_main() + + +def _op_dependencies(op): + """Returns the data and control dependencies of a tf.Operation combined.""" + deps = [] + for node in itertools.chain(op.inputs, op.control_inputs): + if isinstance(node, ops.Tensor): + node = node.op + assert isinstance(node, ops.Operation) + deps.append(node) + return deps + + +def topological_sort_operations(operations): + """Topological sorts a list of operations. + + This does a topological sort of the operations in a graph. The edges include + both data dependencies and control dependencies. Note that the edge goes from + an operation to its dependencies. + + Args: + operations: a list of tf.Operation in the same graph. + + Returns: + A map from a tf.Operation to its topological order. + """ + in_degrees = {} + for op in operations: + if op not in in_degrees: + in_degrees[op] = 0 + for next_op in _op_dependencies(op): + in_degrees[next_op] = in_degrees.get(next_op, 0) + 1 + nexts = [] + for op, in_degree in in_degrees.items(): + if in_degree == 0: + nexts.append(op) + order = {} + next_order = 0 + while nexts: + op, nexts = nexts[0], nexts[1:] + order[op] = next_order + next_order += 1 + for next_op in _op_dependencies(op): + in_degrees[next_op] -= 1 + if in_degrees[next_op] == 0: + nexts.append(next_op) + assert len(order) == len(operations) + return order + + +def _exists_dependency(start, end): + """Returns whether there exists a dependency chain from start to end.""" + nexts = [start] + while nexts: + op, nexts = nexts[0], nexts[1:] + for next_op in _op_dependencies(op): + if next_op == end: + return True + nexts.append(next_op) + return False + + +def assert_sequential_execution(order, operations): + """Asserts there's a deterministic execution order between the operations. + + Args: + order: a map from a tf.Operation to its topological order. + operations: a list of operations that should be executed sequentially. It + can be given in any order. + """ + # Topological ordering guarantees that, if there's a dependency from N_a to + # N_b, then order[N_a] < order[N_b]. If there do exist a path of dependencies + # among the operations, it always goes from a operation with a smaller + # topological order to one with a larger topological order. Therefore, we only + # need to sort the operations by their topological orders, and verify that + # there's a path of dependency between adjacent pairs. + operations = sorted(operations, key=lambda op: order[op]) + for i in range(len(operations) - 1): + if not _exists_dependency(operations[i], operations[i + 1]): + print(operations[i].graph.as_graph_def()) + raise AssertionError( + "No dependency between {} and {}. Graph is dumped to stdout.".format( + operations[i].name, operations[i + 1].name)) diff --git a/tensorflow/python/distribute/test_util_test.py b/tensorflow/python/distribute/test_util_test.py index 165f97be6e2..756e08dbb42 100644 --- a/tensorflow/python/distribute/test_util_test.py +++ b/tensorflow/python/distribute/test_util_test.py @@ -28,6 +28,7 @@ from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.framework import config from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -82,5 +83,37 @@ class LogicalDevicesTest(test.TestCase): self.assertLen(config.get_logical_device_configuration(cpu_device), 3) +class AssertSequentailExecutionTest(test.TestCase): + + def test1(self): + + @def_function.function + def f(): + a = array_ops.identity(1., name='a') + b = a + 1 + c = array_ops.identity(2., name='c') + d = array_ops.identity(a + c, name='d') + with ops.control_dependencies([b]): + e = array_ops.identity(3., name='e') + f = array_ops.identity(c + e, name='f') + return d, f + + graph = f.get_concrete_function().graph + order = test_util.topological_sort_operations(graph.get_operations()) + a = graph.get_operation_by_name('a') + c = graph.get_operation_by_name('c') + d = graph.get_operation_by_name('d') + e = graph.get_operation_by_name('e') + f = graph.get_operation_by_name('f') + test_util.assert_sequential_execution(order, [a, d]) + test_util.assert_sequential_execution(order, [e, a, f]) + with self.assertRaises(AssertionError): + test_util.assert_sequential_execution(order, [a, c]) + with self.assertRaises(AssertionError): + test_util.assert_sequential_execution(order, [f, a, c]) + with self.assertRaises(AssertionError): + test_util.assert_sequential_execution(order, [d, e, a, c]) + + if __name__ == '__main__': test_util.main() From 2e409c1640021b6b4fea21d2189f7305fb8ec913 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 11 Nov 2020 11:36:54 -0800 Subject: [PATCH 173/220] [XLA:GPU] Migrate SliceToDynamic IR emission to use MLIR. - Add support for converting HLO CustomCall -> LHLO CustomCall. - Handle dynamic XLA shapes in GetOrCreateArrayView by converting them to static shapes. PiperOrigin-RevId: 341874453 Change-Id: I7efe9bf8de31e8ff26f13e1f8a0c57d14102e7fa --- .../hlo_text_to_lhlo_no_opt.hlotxt | 20 ++++++ .../xla/transforms/mhlo_to_lhlo_with_xla.cc | 27 ++++++- .../xla/transforms/mhlo_to_lhlo_with_xla.h | 2 + .../xla/service/gpu/ir_emitter_unnested.cc | 71 ++++++++++++------- .../xla/service/gpu/ir_emitter_unnested.h | 1 + .../service/gpu/tests/slice_to_dynamic.hlo | 6 +- 6 files changed, 96 insertions(+), 31 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt index be8b2e13daf..6bbdde7fb90 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt @@ -81,3 +81,23 @@ ENTRY main () -> f32[6] { %init = f32[] parameter(2) ROOT %select-and-scatter.12 = f32[6]{0} select-and-scatter(f32[6]{0} %operand, f32[2]{0} %source, f32[] %init), window={size=3 stride=3}, select=%ge_F32, scatter=%add_F32 } + +// ----- + +HloModule SliceToDynamic + +// CHECK-LABEL: func @main +// CHECK: "lmhlo.custom_call" +// CHECK: backend_config = "", call_target_name = "SliceToDynamic" +// CHECK: (memref<2x2x2xi32>, memref, memref, memref, memref<2x2x2xi32>) -> () +ENTRY main { + %param = s32[2,2,2] parameter(0) + %static = s32[] parameter(1) + %dynamic = s32[] parameter(2) + ROOT %custom-call = s32[2,<=2, 2] custom-call(s32[2,2,2] %param, + s32[] %static, + s32[] %dynamic, + s32[] %static), + custom_call_target="SliceToDynamic", + backend_config="" +} \ No newline at end of file diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc index 1e54cfb6ecf..5dd9605f8ec 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc @@ -264,6 +264,8 @@ StatusOr LhloDialectEmitter::EmitOp(HloInstruction* instr) { return EmitScatterOp(instr); case HloOpcode::kSelectAndScatter: return EmitSelectAndScatterOp(instr); + case HloOpcode::kCustomCall: + return EmitCustomCallOp(instr); default: llvm::errs() << instr->ToString(); return tensorflow::errors::Internal( @@ -475,11 +477,29 @@ StatusOr LhloDialectEmitter::EmitSelectAndScatterOp( return select_and_scatter; } +StatusOr LhloDialectEmitter::EmitCustomCallOp( + HloInstruction* instr) { + TF_ASSIGN_OR_RETURN(auto custom_call, + CreateOpWithoutAttrs(instr)); + auto* custom_call_instr = ::xla::Cast<::xla::HloCustomCallInstruction>(instr); + custom_call.call_target_nameAttr( + builder_.getStringAttr(custom_call_instr->custom_call_target())); + custom_call.backend_configAttr( + builder_.getStringAttr(custom_call_instr->opaque())); + return custom_call; +} + StatusOr LhloDialectEmitter::GetOrCreateArrayView( const ::xla::HloInstruction* instr, const ::xla::Shape& current_shape, const ::xla::ShapeIndex& shape_index) { + // If the shape happens to have dynamic dimensions, create the memref using + // the underlying static shape. + // TODO(jurahul): Revisit this when we can model memrefs with dynamic shape + // but static bounds in MLIR. + const Shape static_shape = xla::ShapeUtil::MakeStaticShape(current_shape); + TF_ASSIGN_OR_RETURN(Type out_type, ::xla::ConvertShapeToType( - current_shape, builder_)); + static_shape, builder_)); TF_ASSIGN_OR_RETURN(BufferAllocation::Slice slice, assignment_.GetUniqueSlice(instr, shape_index)); Value alloc = allocations_[slice.allocation()]; @@ -490,7 +510,8 @@ StatusOr LhloDialectEmitter::GetOrCreateArrayView( auto out_memref_type = out_type.dyn_cast(); if (!out_memref_type) return tensorflow::errors::Internal( - "Expected memref type when creating a view for leaf type of a tuple."); + "Expected memref type when creating a view for leaf type of a " + "tuple."); // Cache generated ViewOp and StaticMemRefCastOp by (instruction, // shape_index). @@ -504,7 +525,7 @@ StatusOr LhloDialectEmitter::GetOrCreateArrayView( xla::Shape physical_shape = xla::ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( - current_shape); + static_shape); TF_ASSIGN_OR_RETURN( Type physical_out_type, ::xla::ConvertShapeToType(physical_shape, builder_)); diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h index 339c918d1bd..dc15455fe26 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h @@ -55,6 +55,8 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { ::xla::StatusOr EmitScatterOp(::xla::HloInstruction* instr); ::xla::StatusOr EmitSelectAndScatterOp( ::xla::HloInstruction* instr); + ::xla::StatusOr EmitCustomCallOp( + ::xla::HloInstruction* instr); template ::xla::StatusOr CreateOpWithoutAttrs(::xla::HloInstruction* instr); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 10a712d7883..dcbb887670d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -676,31 +676,54 @@ Status IrEmitterUnnested::HandlePadToStatic(HloInstruction* pad_to_static) { // Output = {static array, dynamic_dim0, dynamic_dim1} Status IrEmitterUnnested::HandleSliceToDynamic( HloInstruction* slice_to_dynamic) { - int unroll_factor = 1; - string ir_name = IrName(slice_to_dynamic); - auto kernel_thunk = BuildKernelThunk(slice_to_dynamic, - /*implements_whole_instruction=*/true); + TF_ASSIGN_OR_RETURN(auto input, GetMlirEmitterInput(slice_to_dynamic)); + return EmitSliceToDynamicFromMlir(input); +} - std::vector dynamic_dims; - const Shape& input_shape = slice_to_dynamic->operand(0)->shape(); - const Shape& data_shape = slice_to_dynamic->shape(); - int32 raw_data_size = ShapeUtil::ByteSizeOf( - ShapeUtil::MakeStaticShape(slice_to_dynamic->shape())); - // pseudo code for sliceToDynamic on a 2d array - // int* source_array = input[0]; - // int* dest_array = output[0]; - llvm::Value* dest_buffer = GetBasePointer(*slice_to_dynamic); - llvm::Value* raw_buffer = - b_.CreateBitCast(dest_buffer, b_.getInt8Ty()->getPointerTo()); - llvm_ir::IrArray data_array = - GetIrArray(*slice_to_dynamic, *slice_to_dynamic); +Status IrEmitterUnnested::EmitSliceToDynamicFromMlir( + MlirEmitterInput mlir_input) { + // TODO(jurahul): Create an op to represent SliceToDynamic. + auto slice_to_dynamic = + ::mlir::cast<::mlir::lmhlo::CustomCallOp>(mlir_input.op); + int unroll_factor = 1; + std::string ir_name = mlir::GetNameFromLoc(slice_to_dynamic.getLoc()); + absl::Span allocations( + ir_emitter_context_->buffer_assignment().Allocations()); + + std::vector ir_arrays; + TF_ASSIGN_OR_RETURN( + auto kernel_thunk, + BuildKernelThunkForMlir(slice_to_dynamic, mlir_input.thunk_info, + mlir_input.extra_slice, &ir_arrays)); + + const Shape& input_shape = + TypeToShape(slice_to_dynamic.args().front().getType()); + const Shape& data_shape = TypeToShape(slice_to_dynamic.output().getType()); + + // TODO(jurahul): data_shape here is the static shape of the output (which has + // a dynamic shape in XLA). Currently, we are mapping that to a static shaped + // memref. When we change that to a more appropriate representation in MLIR, + // fix this code to correctly deduce the static shape backing the dynamically + // shaped memref. // calculate the location where metadata needs to be inserted // int* dyn_dim0_size = dest_array + meta_data_offset; // int* dyn_dim1_size = dest_array + meta_data_offset + sizeof(int); - for (int64 i = 1; i < slice_to_dynamic->operand_count(); ++i) { + int32 raw_data_size = ShapeUtil::ByteSizeOf(data_shape); + + // pseudo code for sliceToDynamic on a 2d array + // int* source_array = input[0]; + // int* dest_array = output[0]; + const llvm_ir::IrArray data_array = ir_arrays.back(); + llvm::Value* dest_buffer = data_array.GetBasePointer(); + llvm::Value* raw_buffer = + b_.CreateBitCast(dest_buffer, b_.getInt8Ty()->getPointerTo()); + + // Load dynamic dimensions from memory. + std::vector dynamic_dims; + for (int64 i = 1; i < slice_to_dynamic.args().size(); ++i) { // const int64 dim_index = i - 1; - llvm::Value* source_buffer = GetBasePointer(*slice_to_dynamic->operand(i)); + llvm::Value* source_buffer = ir_arrays[i].GetBasePointer(); llvm::LoadInst* dyn_dim_size = b_.CreateLoad(source_buffer, "dyn_dim_size"); dynamic_dims.push_back(dyn_dim_size); } @@ -713,7 +736,7 @@ Status IrEmitterUnnested::HandleSliceToDynamic( // *dyn_dim1_size = *output[2]; // } KernelSupportLibrary{&b_}.If("is_thred_0", IsBlock0Thread0(&b_), [&] { - for (int64 i = 1; i < slice_to_dynamic->operand_count(); ++i) { + for (int64 i = 1; i < slice_to_dynamic.args().size(); ++i) { const int64 dim_index = i - 1; llvm::Value* metadata = b_.CreateConstInBoundsGEP1_32( b_.getInt8Ty(), raw_buffer, @@ -759,9 +782,8 @@ Status IrEmitterUnnested::HandleSliceToDynamic( data_array.EmitWriteArrayElement( array_index, - GetIrArray(*slice_to_dynamic->operand(0), *slice_to_dynamic) - .EmitReadArrayElement(dyn_index, &b_, /*name=*/"", - /*use_linear_index=*/false), + ir_arrays[0].EmitReadArrayElement(dyn_index, &b_, /*name=*/"", + /*use_linear_index=*/false), &b_); return Status::OK(); }; @@ -774,11 +796,10 @@ Status IrEmitterUnnested::HandleSliceToDynamic( TF_RETURN_IF_ERROR( ParallelLoopEmitter(body_generator, data_shape, launch_dimensions, &b_, unroll_factor) - .EmitLoop(ir_name, GetIndexTypeForKernel( + .EmitLoop(ir_name, GetIndexTypeForKernelFromMlir( slice_to_dynamic, launch_dimensions.launch_bound(), &b_))); thunk_sequence_.emplace_back(std::move(kernel_thunk)); - return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index de35ac2f4fd..3c67323875b 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -306,6 +306,7 @@ class IrEmitterUnnested : public IrEmitter, // } // ``` Status HandleSliceToDynamic(HloInstruction* slice_to_dynamic); + Status EmitSliceToDynamicFromMlir(MlirEmitterInput mlir_input); // A convenient helper for calling BufferAssignment::GetUniqueSlice. StatusOr MaybeGetAllocationSlice( diff --git a/tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo b/tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo index 1f4f2602094..9b663d4c234 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo +++ b/tensorflow/compiler/xla/service/gpu/tests/slice_to_dynamic.hlo @@ -7,7 +7,7 @@ // CHECK: %[[VAL_2:.*]] = bitcast i8* %[[VAL_0]] to [2 x [2 x [2 x i32]]]* // CHECK: %[[VAL_3:.*]] = getelementptr inbounds i8, i8* %[[VAL_4:.*]], i64 0 // CHECK: %[[VAL_5:.*]] = bitcast i8* %[[VAL_3]] to [2 x [2 x [2 x i32]]]* -// CHECK: %[[VAL_6:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_2]] to i8* +// CHECK: %[[VAL_6:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_5]] to i8* // CHECK: %[[VAL_7:.*]] = load i32, i32* bitcast ([4 x i8]* @buffer_for_static to i32*), align 4 // CHECK: %[[VAL_8:.*]] = load i32, i32* bitcast ([4 x i8]* @buffer_for_dynamic to i32*), align 4 // CHECK: %[[VAL_9:.*]] = load i32, i32* bitcast ([4 x i8]* @buffer_for_static to i32*), align 4 @@ -66,9 +66,9 @@ // CHECK: %[[VAL_53:.*]] = urem i32 %[[VAL_52]], %[[VAL_7]] // CHECK: %[[VAL_54:.*]] = mul i32 %[[VAL_51]], %[[VAL_7]] // CHECK: %[[VAL_55:.*]] = udiv i32 %[[VAL_46]], %[[VAL_54]] -// CHECK: %[[VAL_56:.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], [2 x [2 x [2 x i32]]]* %[[VAL_5]], i32 0, i32 %[[VAL_55]], i32 %[[VAL_53]], i32 %[[VAL_50]] +// CHECK: %[[VAL_56:.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], [2 x [2 x [2 x i32]]]* %[[VAL_2]], i32 0, i32 %[[VAL_55]], i32 %[[VAL_53]], i32 %[[VAL_50]] // CHECK: %[[VAL_57:.*]] = load i32, i32* %[[VAL_56]], align 4, !invariant.load !4 -// CHECK: %[[VAL_58:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_2]] to i32* +// CHECK: %[[VAL_58:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_5]] to i32* // CHECK: %[[VAL_59:.*]] = getelementptr inbounds i32, i32* %[[VAL_58]], i32 %[[VAL_24]] // CHECK: store i32 %[[VAL_57]], i32* %[[VAL_59]], align 4 // CHECK: br label %[[VAL_34]] From 06767d314524da091044f8ec1d5b81837e40fd30 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava Date: Wed, 11 Nov 2020 11:54:52 -0800 Subject: [PATCH 174/220] Add aliasing info to serialized module for better memory management on device. Set `tf.aliasing_output` attribute for serialized module inputs and outputs that are read and written to the same resource. This is passed as `input_output_alias` config to the compiled HLO module and helps it share buffer between inputs and outputs. `tpu-merge-variabels-with-execute` pass already does this analysis and merges resource reads and writes with TPUExecute op. In this change, we update the serialized module with this alias information as well. PiperOrigin-RevId: 341878702 Change-Id: I010e6ffec427321b763f75f9e5df9cfe800da2ad --- .../tpu-merge-variables-with-execute.mlir | 76 ++++++++++++------ .../tpu_merge_variables_with_execute.cc | 79 ++++++++++++++++++- .../tensorflow/utils/compile_mlir_util.cc | 37 ++++----- .../mlir/tensorflow/utils/compile_mlir_util.h | 1 + .../tensorflow/utils/tf_xla_mlir_translate.cc | 3 +- 5 files changed, 152 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir index 7c55018499d..228379f9da3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir @@ -6,12 +6,10 @@ // CHECK-SAME: %[[ARG_0:.*]]: tensor<*x!tf.resource>> // CHECK-SAME: %[[ARG_1:.*]]: tensor<*x!tf.resource>> // CHECK-SAME: %[[ARG_2:.*]]: tensor<*x!tf.resource>> -// CHECK-SAME: %[[ARG_3:.*]]: tensor func @merge_same_device_variables( %arg0: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"}, %arg1: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"}, - %arg2: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:CPU:0"}, - %arg3: tensor) { + %arg2: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:CPU:0"}) { // CHECK-NEXT: %[[ID_0:.*]] = "tf.IdentityN"(%[[ARG_0]]) %id0 = "tf.IdentityN"(%arg0) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : (tensor<*x!tf.resource>>) -> tensor<*x!tf.resource>> @@ -19,15 +17,27 @@ func @merge_same_device_variables( %read0 = "tf.ReadVariableOp"(%id0) : (tensor<*x!tf.resource>>) -> tensor<32xf32> %read1 = "tf.ReadVariableOp"(%arg1) : (tensor<*x!tf.resource>>) -> tensor<64xf32> %read2 = "tf.ReadVariableOp"(%arg2) : (tensor<*x!tf.resource>>) -> tensor<16xf32> - // CHECK-NEXT: %[[EXE:.*]] = "tf_device.launch" - // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[ID_0]], %[[ARG_1]], %[[READ_2]], %[[ARG_3]]) + // CHECK: %[[COMPILE:.*]]:2 = "tf_device.launch" + %compile:2 = "tf_device.launch"() ( { + // CHECK: tf._TPUCompileMlir + // CHECK-SAME: mlir_module + // CHECK-SAME: func @main(%arg0: tensor<32xf32> {tf.aliasing_output = 0 : i64}, + // CHECK-SAME: %arg1: tensor<64xf32>, %arg2: tensor<16xf32>) + %0:2 = "tf._TPUCompileMlir"() { + metadata = "", + mlir_module = "module attributes {tf.versions = {producer = 888 : i32}} {\0A func @main(%arg0: tensor<32xf32>, %arg1: tensor<64xf32>, %arg2: tensor<16xf32>) -> (tensor<32xf32>, tensor<16xf32>) {\0A %0:2 = \22tf.A\22(%arg0, %arg1, %arg2) : (tensor<32xf32>, tensor<64xf32>, tensor<16xf32>) -> (tensor<32xf32>, tensor<16xf32>)\0A return %0#0, %0#1 : tensor<32xf32>, tensor<16xf32>\0A }\0A}" + } : () -> (tensor, tensor<2x!tf.string>) + tf_device.return %0#0, %0#1 : tensor, tensor<2x!tf.string> + }) {device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<2x!tf.string>) + // CHECK: %[[EXE:.*]] = "tf_device.launch" + // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[ID_0]], %[[ARG_1]], %[[READ_2]], %[[COMPILE]]#1) // CHECK-SAME: device_var_reads_indices = [0, 1], // CHECK-SAME: device_var_updates_indices = [0, -1] %execute:2 = "tf_device.launch"() ( { - %0:2 = "tf.TPUExecute"(%read0, %read1, %read2, %arg3) { + %0:2 = "tf.TPUExecute"(%read0, %read1, %read2, %compile#1) { Targs = [tensor<32xf32>, tensor<64xf32>, tensor<16xf32>], Tresults = [tensor<32xf32>, tensor<16xf32>]} - : (tensor<32xf32>, tensor<64xf32>, tensor<16xf32>, tensor) -> (tensor<32xf32>, tensor<16xf32>) + : (tensor<32xf32>, tensor<64xf32>, tensor<16xf32>, tensor<2x!tf.string>) -> (tensor<32xf32>, tensor<16xf32>) tf_device.return %0#0, %0#1 : tensor<32xf32>, tensor<16xf32> }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> (tensor<32xf32>, tensor<16xf32>) // CHECK-NEXT: tf_device.return @@ -44,26 +54,35 @@ func @merge_same_device_variables( // Tests that the pass do not check devices for replicated region. // CHECK-LABEL: func @merge_replicated_variables -// CHECK-SAME: %[[ARG_0:.*]]: tensor<*x!tf.resource>>, %[[ARG_1:.*]]: tensor, -// CHECK-SAME: %[[ARG_2:.*]]: tensor<*x!tf.resource>>, -// CHECK-SAME: %[[ARG_3:.*]]: tensor<*x!tf.resource>> +// CHECK-SAME: %[[ARG_0:.*]]: tensor<*x!tf.resource>>, %[[ARG_1:.*]]: tensor<*x!tf.resource>>, +// CHECK-SAME: %[[ARG_2:.*]]: tensor<*x!tf.resource>> func @merge_replicated_variables( %arg0: tensor<*x!tf.resource>>, - %arg1: tensor, - %arg2: tensor<*x!tf.resource>>, - %arg3: tensor<*x!tf.resource>>) { + %arg1: tensor<*x!tf.resource>>, + %arg2: tensor<*x!tf.resource>>) { // CHECK-NEXT: %[[READ_0:.*]] = "tf.ReadVariableOp"(%[[ARG_0]]) %read0 = "tf.ReadVariableOp"(%arg0) : (tensor<*x!tf.resource>>) -> tensor<32xf32> - // CHECK-NEXT: tf_device.replicate([%[[ARG_2]], %[[ARG_3]]] as %[[R_ARG:.*]]: tensor<*x!tf.resource>>) - tf_device.replicate([%arg2, %arg3] as %r: tensor<*x!tf.resource>>) {n = 2 : i32} { + // CHECK: %[[COMPILE:.*]]:2 = "tf_device.launch" + %compile:2 = "tf_device.launch"() ( { + // CHECK: tf._TPUCompileMlir + // CHECK-SAME: mlir_module + // CHECK-SAME: func @main(%arg0: tensor<32xf32>, %arg1: tensor<32xf32> {tf.aliasing_output = 0 : i64}) + %0:2 = "tf._TPUCompileMlir"() { + metadata = "", + mlir_module = "module attributes {tf.versions = {producer = 888 : i32}} {\0A func @main(%arg0: tensor<32xf32>, %arg1: tensor<32xf32>) -> (tensor<32xf32>) {\0A %0 = \22tf.A\22(%arg0, %arg1) : (tensor<32xf32>, tensor<32xf32>) -> (tensor<32xf32>)\0A return %0 : tensor<32xf32>\0A }\0A}" + } : () -> (tensor, tensor<2x!tf.string>) + tf_device.return %0#0, %0#1 : tensor, tensor<2x!tf.string> + }) {device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<2x!tf.string>) + // CHECK: tf_device.replicate([%[[ARG_1]], %[[ARG_2]]] as %[[R_ARG:.*]]: tensor<*x!tf.resource>>) + tf_device.replicate([%arg1, %arg2] as %r: tensor<*x!tf.resource>>) {n = 2 : i32} { // CHECK-NEXT: "tf_device.launch" - // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[READ_0]], %[[R_ARG]], %[[ARG_1]]) + // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[READ_0]], %[[R_ARG]], %[[COMPILE]]#1) // CHECK-SAME: device_var_reads_indices = [1], // CHECK-SAME: device_var_updates_indices = [0] %read1 = "tf.ReadVariableOp"(%r) : (tensor<*x!tf.resource>>) -> tensor<32xf32> %execute = "tf_device.launch"() ( { - %0 = "tf.TPUExecute"(%read0, %read1, %arg1) - : (tensor<32xf32>, tensor<32xf32>, tensor) -> tensor<32xf32> + %0 = "tf.TPUExecute"(%read0, %read1, %compile#1) + : (tensor<32xf32>, tensor<32xf32>, tensor<2x!tf.string>) -> tensor<32xf32> tf_device.return %0 : tensor<32xf32> }) {device = ""} : () -> tensor<32xf32> // CHECK-NEXT: tf_device.return @@ -86,7 +105,6 @@ func @merge_replicated_variables( // CHECK-SAME: %[[ARG_0:.*]]: tensor<*x!tf.resource>> // CHECK-SAME: %[[ARG_1:.*]]: tensor<*x!tf.resource>> // CHECK-SAME: %[[ARG_2:.*]]: tensor<32xf32> -// CHECK-SAME: %[[ARG_3:.*]]: tensor // CHECK-SAME: %[[ARG_4:.*]]: tensor<*x!tf.resource>> // CHECK-SAME: %[[ARG_5:.*]]: tensor<*x!tf.resource>> // CHECK-SAME: %[[ARG_6:.*]]: tensor<2xf32> @@ -94,7 +112,6 @@ func @interferencing_accesses( %arg0: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"}, %arg1: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"}, %arg2: tensor<32xf32>, - %arg3: tensor, %arg4: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"}, %arg5: tensor<*x!tf.resource>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"}, %arg6: tensor<2xf32>) -> (tensor<8xf32>) { @@ -108,15 +125,26 @@ func @interferencing_accesses( "tf.AssignVariableOp"(%arg5, %arg6) : (tensor<*x!tf.resource>>, tensor<2xf32>) -> () %read1 = "tf.ReadVariableOp"(%arg1) : (tensor<*x!tf.resource>>) -> tensor<64xf32> %read2 = "tf.ReadVariableOp"(%arg4) : (tensor<*x!tf.resource>>) -> tensor<8xf32> - // CHECK-NEXT: %[[EXE:.*]]:2 = "tf_device.launch" - // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[READ_0]], %[[ARG_1]], %[[ARG_4]], %[[READ_5]], %[[ARG_3]]) + // CHECK: %[[COMPILE:.*]]:2 = "tf_device.launch" + %compile:2 = "tf_device.launch"() ( { + // CHECK: tf._TPUCompileMlir + // CHECK-SAME: mlir_module + // CHECK-SAME: func @main(%arg0: tensor<32xf32>, %arg1: tensor<32xf32> {tf.aliasing_output = 1 : i64}) + %0:2 = "tf._TPUCompileMlir"() { + metadata = "", + mlir_module = "module attributes {tf.versions = {producer = 888 : i32}} {\0A func @main(%arg0: tensor<32xf32>, %arg1: tensor<32xf32>) -> (tensor<32xf32>) {\0A %0 = \22tf.A\22(%arg0, %arg1) : (tensor<32xf32>, tensor<32xf32>) -> (tensor<32xf32>)\0A return %0 : tensor<32xf32>\0A }\0A}" + } : () -> (tensor, tensor<2x!tf.string>) + tf_device.return %0#0, %0#1 : tensor, tensor<2x!tf.string> + }) {device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<2x!tf.string>) + // CHECK: %[[EXE:.*]]:2 = "tf_device.launch" + // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[READ_0]], %[[ARG_1]], %[[ARG_4]], %[[READ_5]], %[[COMPILE]]#1) // CHECK-SAME: device_var_reads_indices = [1, 2], // CHECK-SAME: device_var_updates_indices = [1, -1] %execute:3 = "tf_device.launch"() ( { - %0:3 = "tf.TPUExecute"(%read0, %read1, %read2, %read5, %arg3) { + %0:3 = "tf.TPUExecute"(%read0, %read1, %read2, %read5, %compile#1) { Targs = [tensor<32xf32>, tensor<64xf32>, tensor<8xf32>, tensor<2xf32>], Tresults = [tensor<32xf32>, tensor<64xf32>, tensor<8xf32>]} - : (tensor<32xf32>, tensor<64xf32>, tensor<8xf32>, tensor<2xf32>, tensor) + : (tensor<32xf32>, tensor<64xf32>, tensor<8xf32>, tensor<2xf32>, tensor<2x!tf.string>) -> (tensor<32xf32>, tensor<64xf32>, tensor<8xf32>) tf_device.return %0#0, %0#1, %0#2 : tensor<32xf32>, tensor<64xf32>, tensor<8xf32> }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> (tensor<32xf32>, tensor<64xf32>, tensor<8xf32>) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc index 52c9287b619..4c947d72be3 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc @@ -43,13 +43,14 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" - +#include "tensorflow/compiler/mlir/tensorflow/utils/serialize_mlir_module_utils.h" #define DEBUG_TYPE "tf-tpu-merge-variables-with-execute" namespace mlir { namespace TFTPU { namespace { +constexpr char kAliasingAttr[] = "tf.aliasing_output"; constexpr char kDeviceAttr[] = "device"; constexpr char kFuncDeviceAttr[] = "tf.device"; @@ -418,6 +419,69 @@ void ReplaceExecute(tf_device::LaunchOp execute_launch, execute_launch.erase(); } +// Returns TPUCompileMlir op that generates the program executed by the +// TPUExecute op. +TF::_TPUCompileMlirOp GetTPUCompileOp(tf_device::LaunchOp execute_launch) { + auto execute = + llvm::dyn_cast(execute_launch.GetBody().front()); + if (!execute) return {}; + auto compile_launch = llvm::dyn_cast_or_null( + execute.getOperand(execute.getNumOperands() - 1).getDefiningOp()); + if (!compile_launch) return {}; + return llvm::dyn_cast( + compile_launch.GetBody().front()); +} + +// Updates the serialized module associated with the TPUExecute op to reflect +// the aliasing information for better management of device memory. +LogicalResult UpdateSerializedModule(tf_device::LaunchOp execute_launch, + VariableAccessesForTPUExecute& infos) { + TF::_TPUCompileMlirOp compile = GetTPUCompileOp(execute_launch); + + // Skip adding alias information in case of model parallelism i.e., + // TPUCompileMlir op generates multiple programs. + if (!compile || compile.program().size() > 1) return failure(); + + // Parse the serialized module + mlir::OwningModuleRef module_ref; + tensorflow::Status status = tensorflow::DeserializeMlirModule( + compile.mlir_module().str(), compile.getContext(), &module_ref); + if (!status.ok()) { + LLVM_DEBUG(llvm::dbgs() << "Error in parsing serialized module: " + << status.error_message() << "\n"); + + return failure(); + } + + // Add aliasing information to main function arguments. + FuncOp main_func = module_ref->lookupSymbol("main"); + if (!main_func) return failure(); + + OpBuilder builder(main_func.getContext()); + for (auto resource : infos.resources_read) { + auto& info = infos.per_resource_info[resource]; + if (info.execute_input_index < 0 || info.execute_output_index < 0) continue; + auto aliasing_attr = main_func.getArgAttrOfType( + info.execute_input_index, kAliasingAttr); + + // Set only if aliasing attribute does not exist. + if (!aliasing_attr) { + main_func.setArgAttr( + info.execute_input_index, kAliasingAttr, + builder.getI64IntegerAttr(info.execute_output_index)); + continue; + } + // If aliasing attribute already exists, it must match the new value. + assert(aliasing_attr.getInt() == info.execute_output_index); + } + + // Serialize the updated module back into the TPUCompileMlir op. + auto module_string = tensorflow::SerializeMlirModule(module_ref.get()); + compile.mlir_moduleAttr( + mlir::StringAttr::get(module_string, module_ref->getContext())); + return success(); +} + // Merges the variable accesses into one TPUExecute op. void MergeForOneTPUExecute(tf_device::LaunchOp execute_launch, bool check_device, bool check_same_region, @@ -427,6 +491,19 @@ void MergeForOneTPUExecute(tf_device::LaunchOp execute_launch, if (infos.per_resource_info.empty()) { return; } + + // Update the serialized module with aliasing information for better memory + // management on device. + // TODO(b/172608422): Benchmark the cost of deserialization/serialization of + // the attached module. We can avoid it by serializing it at the end of the + // bridge pipeline. + if (failed(UpdateSerializedModule(execute_launch, infos))) { + LLVM_DEBUG( + llvm::dbgs() + << "Unable to update the serialized module with aliasing information " + "which can lead to poor memory management on device.\n"); + } + // Start creating the new TPUExecuteAndUpdateVariables op. builder->setInsertionPoint(execute_launch); // Output types. Skip the original outputs for fused assigns. diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index a94d20b1d2a..1fb99309495 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -133,7 +133,7 @@ Status GetXlaInputShapes( // output based on static shapes in MLIR module. If an output is a resource // write, `resource_updates` is populated insead of `outputs` for that output. Status GetOutputInfo( - mlir::ModuleOp module, + mlir::ModuleOp module, bool use_resource_updates_for_aliases, const XlaHelpers::ShapeRepresentationFn shape_representation_fn, xla::Shape* xla_output_shape, std::vector* outputs, std::vector* resource_updates) { @@ -152,11 +152,11 @@ Status GetOutputInfo( std::vector shapes; shapes.reserve(func_type.getNumResults()); - llvm::SmallDenseMap resource_arg_to_write; + llvm::SmallDenseMap output_to_input_alias; for (unsigned i = 0; i < main_func.getNumArguments(); ++i) if (auto aliasing_output = main_func.getArgAttrOfType( i, "tf.aliasing_output")) - resource_arg_to_write.insert({aliasing_output.getInt(), i}); + output_to_input_alias[aliasing_output.getInt()] = i; for (auto type_and_idx : llvm::enumerate(func_type.getResults())) { TF_ASSIGN_OR_RETURN( @@ -166,8 +166,8 @@ Status GetOutputInfo( auto tensor_type = type_and_idx.value().dyn_cast(); shapes.push_back(shape); - auto it = resource_arg_to_write.find(type_and_idx.index()); - if (it != resource_arg_to_write.end()) { + auto it = output_to_input_alias.find(type_and_idx.index()); + if (it != output_to_input_alias.end() && use_resource_updates_for_aliases) { // Add resource write. resource_updates->emplace_back(); XlaResourceUpdate& resource_update = resource_updates->back(); @@ -177,7 +177,6 @@ Status GetOutputInfo( TF_RETURN_IF_ERROR(XLAShapeToTensorShape(shape, &resource_update.shape)); continue; } - // Construct OutputDescription for result. outputs->emplace_back(); XlaOutputDescription& out_desc = outputs->back(); @@ -185,11 +184,10 @@ Status GetOutputInfo( // TODO(ycao): Support constant output. out_desc.is_constant = false; TF_RETURN_IF_ERROR(XLAShapeToTensorShape(shape, &out_desc.shape)); - // Input_index is only meaningful for resource output. Since MLIR-based - // TF-Compiler bridge doesn't support resource output yet. Setting it to - // meaningless value -1. - // TODO(ycao): Support resource-type output. - out_desc.input_index = -1; + // Input_index is only meaningful for resource output. Setting it to + // meaningless value -1 for non resource outputs. + out_desc.input_index = + it != output_to_input_alias.end() ? it->getSecond() : -1; // MLIR-based TF-Compiler bridge doesn't support tensorlist output yet. // TODO(ycao): Support tensorlist-type output. out_desc.is_tensor_list = false; @@ -368,6 +366,7 @@ Status ConvertMLIRToXlaComputation( Status CompileMlirToXlaHlo( mlir::ModuleOp module_op, llvm::ArrayRef arg_shapes, llvm::StringRef device_type, bool use_tuple_args, bool use_return_tuple, + bool use_resource_updates_for_aliases, XlaHelpers::ShapeRepresentationFn shape_representation_fn, XlaCompilationResult* compilation_result, llvm::MutableArrayRef> @@ -402,8 +401,9 @@ Status CompileMlirToXlaHlo( // Compute all output descriptions and resource writes TF_RETURN_IF_ERROR(GetOutputInfo( - module_op, shape_representation_fn, &compilation_result->xla_output_shape, - &compilation_result->outputs, &compilation_result->resource_updates)); + module_op, use_resource_updates_for_aliases, shape_representation_fn, + &compilation_result->xla_output_shape, &compilation_result->outputs, + &compilation_result->resource_updates)); if (VLOG_IS_ON(1)) tensorflow::DumpMlirOpToFile("mlir_compile_after", module_op); @@ -428,10 +428,10 @@ Status CompileSerializedMlirToXlaHlo( tensor_or_resource_shapes.reserve(arg_shapes.size()); for (const auto& arg_shape : arg_shapes) tensor_or_resource_shapes.push_back({arg_shape}); - return CompileMlirToXlaHlo(mlir_module.get(), tensor_or_resource_shapes, - device_type, use_tuple_args, - /*use_return_tuple=*/true, shape_representation_fn, - compilation_result, custom_legalization_passes); + return CompileMlirToXlaHlo( + mlir_module.get(), tensor_or_resource_shapes, device_type, use_tuple_args, + /*use_return_tuple=*/true, /*use_resource_updates_for_aliases=*/false, + shape_representation_fn, compilation_result, custom_legalization_passes); } // Rewrites the given module with specified args. For each of the constant args, @@ -531,7 +531,8 @@ Status CompileGraphToXlaHlo( auto status = CompileMlirToXlaHlo( module_op, arg_shapes, device_type, use_tuple_args, use_return_tuple, - shape_representation_fn, compilation_result, custom_legalization_passes); + /*use_resource_updates_for_aliases=*/true, shape_representation_fn, + compilation_result, custom_legalization_passes); compilation_result->input_mapping = remaining_params; return status; } diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h index 40230de406b..c933ff97fbb 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h @@ -86,6 +86,7 @@ struct TensorOrResourceShape { Status CompileMlirToXlaHlo( mlir::ModuleOp module_op, llvm::ArrayRef arg_shapes, llvm::StringRef device_type, bool use_tuple_args, bool use_return_tuple, + bool use_resource_updates_for_aliases, XlaHelpers::ShapeRepresentationFn shape_representation_fn, XlaCompilationResult* compilation_result, llvm::MutableArrayRef> diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tf_xla_mlir_translate.cc b/tensorflow/compiler/mlir/tensorflow/utils/tf_xla_mlir_translate.cc index bcc3fe62f99..d0cbe1abc9d 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tf_xla_mlir_translate.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/tf_xla_mlir_translate.cc @@ -236,7 +236,8 @@ static mlir::LogicalResult MlirTfToHloTextTranslateFunction( XlaCompilationResult compilation_result; auto compilation_status = CompileMlirToXlaHlo( module_op, arg_shapes, /*device_type=*/"XLA_CPU_JIT", emit_use_tuple_arg, - emit_return_tuple, IdentityShapeRepresentationFn(), &compilation_result, + emit_return_tuple, /*use_resource_updates_for_aliases=*/true, + IdentityShapeRepresentationFn(), &compilation_result, /*custom_legalization_passes=*/{}); if (!compilation_status.ok()) { LOG(ERROR) << "TF/XLA compilation failed: " From 06e83cc2c27830e872161acee77855927d62cfac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 12:00:45 -0800 Subject: [PATCH 175/220] [TFLite/MLIR] Converts splat constant to scalar operand when consumed by broadcastable ops. PiperOrigin-RevId: 341879953 Change-Id: If5bba84538131199c96f51149c5216cffee22187 --- .../compiler/mlir/lite/tests/optimize.mlir | 66 +++++--- .../compiler/mlir/lite/transforms/optimize.cc | 142 ++++++++++++++++++ 2 files changed, 190 insertions(+), 18 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir index 1d9621084ee..e7516658b07 100644 --- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir @@ -407,16 +407,16 @@ func @fuseMulIntoDepthwiseConv2d(%arg0: tensor<1x112x112x2xf32>) -> tensor<1x112 } // CHECK-LABEL: @notFuseMulIntoDepthwiseConv2d -func @notFuseMulIntoDepthwiseConv2d(%arg0: tensor<1x112x112x2xf32>) -> tensor<1x112x112x2xf32> { +func @notFuseMulIntoDepthwiseConv2d(%arg0: tensor<1x4x4x2xf32>) -> tensor<1x4x4x2xf32> { %cst0 = constant dense<[[[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], [[7.0, 8.0], [9.0, 10.0], [11.0, 12.0]], [[13.0, 14.0], [15.0, 16.0], [17.0, 18.0]]]]> : tensor<1x3x3x2xf32> %cst1 = constant dense<2.0> : tensor<2xf32> - %cst2 = constant dense<3.0> : tensor<112x2xf32> + %cst2 = constant dense<[[3.1, 3.2], [3.1, 3.2], [3.1, 3.2], [3.1, 3.2]]> : tensor<4x2xf32> - %0 = "tfl.depthwise_conv_2d"(%arg0, %cst0, %cst1) {depth_multiplier = 1 : i32, dilation_h_factor = 1 : i32, dilation_w_factor = 1 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 1 : i32, stride_w = 1 : i32} : (tensor<1x112x112x2xf32>, tensor<1x3x3x2xf32>, tensor<2xf32>) -> tensor<1x112x112x2xf32> + %0 = "tfl.depthwise_conv_2d"(%arg0, %cst0, %cst1) {depth_multiplier = 1 : i32, dilation_h_factor = 1 : i32, dilation_w_factor = 1 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 1 : i32, stride_w = 1 : i32} : (tensor<1x4x4x2xf32>, tensor<1x3x3x2xf32>, tensor<2xf32>) -> tensor<1x4x4x2xf32> // We cannot fuse this tfl.mul into the preceding conv op because %cst2 is not broadcast-compatible to %cst0. - %1 = "tfl.mul"(%0, %cst2) {fused_activation_function = "RELU6"} : (tensor<1x112x112x2xf32>, tensor<112x2xf32>) -> tensor<1x112x112x2xf32> + %1 = "tfl.mul"(%0, %cst2) {fused_activation_function = "RELU6"} : (tensor<1x4x4x2xf32>, tensor<4x2xf32>) -> tensor<1x4x4x2xf32> - return %1 : tensor<1x112x112x2xf32> + return %1 : tensor<1x4x4x2xf32> // CHECK: %0 = "tfl.depthwise_conv_2d"(%arg0, %cst, %cst_0) // CHECK: %1 = "tfl.mul"(%0, %cst_1) @@ -484,17 +484,17 @@ func @FuseFullyConnectedAddWithScalarRhs(%arg0: tensor<40x37xf32>, %arg1: tensor } // CHECK-LABEL: @FuseFullyConnectedAddWithUnfusableRhs -func @FuseFullyConnectedAddWithUnfusableRhs(%arg0: tensor<40x37xf32>, %arg1: tensor<40x37xf32>) -> tensor<40x40xf32> { +func @FuseFullyConnectedAddWithUnfusableRhs(%arg0: tensor<4x37xf32>, %arg1: tensor<4x37xf32>) -> tensor<4x4xf32> { %cst = constant unit - %cst2 = constant dense<2.0> : tensor<40x40xf32> + %cst2 = constant dense<[[2.0, 2.1, 2.2, 2.3], [2.0, 2.1, 2.2, 2.3], [2.0, 2.1, 2.2, 2.3], [2.0, 2.1, 2.2, 2.3]]> : tensor<4x4xf32> - %0 = "tfl.fully_connected" (%arg0, %arg1, %cst) {fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"} : (tensor<40x37xf32>, tensor<40x37xf32>, none) -> (tensor<40x40xf32>) - %1 = "tfl.add"(%0, %cst2) {fused_activation_function = "NONE"} : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %0 = "tfl.fully_connected" (%arg0, %arg1, %cst) {fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"} : (tensor<4x37xf32>, tensor<4x37xf32>, none) -> (tensor<4x4xf32>) + %1 = "tfl.add"(%0, %cst2) {fused_activation_function = "NONE"} : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32> - return %1 : tensor<40x40xf32> + return %1 : tensor<4x4xf32> // CHECK: %[[unit:.*]] = constant unit - // CHECK: %[[filter:.*]] = constant dense<2.000000e+00> : tensor<40x40xf32> + // CHECK: %[[filter:.*]] = constant dense<{{.*}}> : tensor<4x4xf32> // CHECK: %[[fc_result:.*]] = "tfl.fully_connected"(%arg0, %arg1, %[[unit]]) // CHECK: %[[add_result:.*]] = tfl.add %[[fc_result]], %[[filter]] // CHECK: return %[[add_result]] @@ -851,17 +851,17 @@ func @fuseDivIntoConv2d_Scalar(%arg0: tensor<1x112x112x2xf32>) -> tensor<1x112x1 } // CHECK-LABEL: @fuseMulIntoConv2d_Scalar -func @fuseMulIntoConv2d_Scalar(%arg0: tensor<1x112x112x2xf32>) -> tensor<1x112x112x2xf32> { +func @fuseMulIntoConv2d_Scalar(%arg0: tensor<1x112x112x2xf32>) -> tensor<1x112x112x1xf32> { %cst0 = constant dense<[[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]]> : tensor<1x2x2x2xf32> - %cst1 = constant dense<1.0> : tensor<2xf32> + %cst1 = constant dense<1.0> : tensor<1xf32> %cst2 = constant dense<2.0> : tensor - %0 = "tfl.conv_2d"(%arg0, %cst0, %cst1) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<1x112x112x2xf32>, tensor<1x2x2x2xf32>, tensor<2xf32>) -> tensor<1x112x112x2xf32> - %1 = "tfl.mul"(%0, %cst2) {fused_activation_function = "NONE"} : (tensor<1x112x112x2xf32>, tensor) -> tensor<1x112x112x2xf32> + %0 = "tfl.conv_2d"(%arg0, %cst0, %cst1) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<1x112x112x2xf32>, tensor<1x2x2x2xf32>, tensor<1xf32>) -> tensor<1x112x112x1xf32> + %1 = "tfl.mul"(%0, %cst2) {fused_activation_function = "NONE"} : (tensor<1x112x112x1xf32>, tensor) -> tensor<1x112x112x1xf32> - return %1 : tensor<1x112x112x2xf32> + return %1 : tensor<1x112x112x1xf32> // CHECK: %[[CST1:.*]] = constant dense<{{\[\[\[\[}}2.000000e+00, 4.000000e+00], [6.000000e+00, 8.000000e+00]], {{\[\[}}1.000000e+01, 1.200000e+01], [1.400000e+01, 1.600000e+01]]]]> : tensor<1x2x2x2xf32> - // CHECK: %[[CST2:.*]] = constant dense<2.000000e+00> : tensor<2xf32> - // CHECK: %[[RES:[0-9].*]] = "tfl.conv_2d"(%arg0, %[[CST1]], %[[CST2]]) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<1x112x112x2xf32>, tensor<1x2x2x2xf32>, tensor<2xf32>) -> tensor<1x112x112x2xf32> + // CHECK: %[[CST2:.*]] = constant dense<2.000000e+00> : tensor<1xf32> + // CHECK: %[[RES:[0-9].*]] = "tfl.conv_2d"(%arg0, %[[CST1]], %[[CST2]]) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<1x112x112x2xf32>, tensor<1x2x2x2xf32>, tensor<1xf32>) -> tensor<1x112x112x1xf32> // CHECK: return %[[RES]] } @@ -1397,3 +1397,33 @@ func @fuseExpanded1DMulIntoConv2d(%arg0: tensor<1x8x8x207xf32>) -> tensor<1x8x8x // CHECK: "tfl.conv_2d"(%arg0, %[[CST_0]], %[[CST_1]]) } + +// CHECK-LABEL: @FuseFullyConnectedAddWithSplat2D +func @FuseFullyConnectedAddWithSplat2D(%arg0: tensor<40x37xf32>, %arg1: tensor<40x37xf32>) -> tensor<40x40xf32> { + %cst = constant unit + %cst2 = constant dense<2.0> : tensor<40x40xf32> + + %0 = "tfl.fully_connected" (%arg0, %arg1, %cst) {fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"} : (tensor<40x37xf32>, tensor<40x37xf32>, none) -> (tensor<40x40xf32>) + %1 = "tfl.add"(%0, %cst2) {fused_activation_function = "NONE"} : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + + return %1 : tensor<40x40xf32> + + // CHECK: %[[BIAS:.*]] = constant dense<2.000000e+00> : tensor<40xf32> + // CHECK: %[[FC_RESULT:.*]] = "tfl.fully_connected"(%arg0, %arg1, %[[BIAS]]) + // CHECK: return %[[FC_RESULT]] +} + +// CHECK-LABEL: @fuseMulIntoConv2d_Splat2D +func @fuseMulIntoConv2d_Splat2D(%arg0: tensor<1x112x112x2xf32>) -> tensor<1x112x112x2xf32> { + %cst0 = constant dense<[[[[1.0, 2.0]]], [[[3.0, 4.0]]]]> : tensor<2x1x1x2xf32> + %cst1 = constant dense<1.0> : tensor<2xf32> + %cst2 = constant dense<2.0> : tensor<1x112x112x2xf32> + %0 = "tfl.conv_2d"(%arg0, %cst0, %cst1) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<1x112x112x2xf32>, tensor<2x1x1x2xf32>, tensor<2xf32>) -> tensor<1x112x112x2xf32> + %1 = "tfl.mul"(%0, %cst2) {fused_activation_function = "NONE"} : (tensor<1x112x112x2xf32>, tensor<1x112x112x2xf32>) -> tensor<1x112x112x2xf32> + + return %1 : tensor<1x112x112x2xf32> + // CHECK: %[[CST1:.*]] = constant dense<{{\[\[\[\[}}2.000000e+00, 4.000000e+00]]], {{\[\[\[}}6.000000e+00, 8.000000e+00]]]]> : tensor<2x1x1x2xf32> + // CHECK: %[[CST2:.*]] = constant dense<2.000000e+00> : tensor<2xf32> + // CHECK: %[[RES:[0-9].*]] = "tfl.conv_2d"(%arg0, %[[CST1]], %[[CST2]]) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<1x112x112x2xf32>, tensor<2x1x1x2xf32>, tensor<2xf32>) -> tensor<1x112x112x2xf32> + // CHECK: return %[[RES]] +} diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc index 9acade8199f..89c07fa5d66 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc @@ -27,11 +27,14 @@ limitations under the License. #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project @@ -729,6 +732,143 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern { } }; +// If the operand to a broadcastable op is a splat constant, try to replace it +// with a 0-d constant, e.g. before this optimization, +// %cst = constant dense<1.0> : tensor<16x16x4xf32> +// %0 = "tfl.conv_2d"... +// %1 = "tfl.add"(%0, %cst) : (tensor<16x16x4xf32>, tensor<16x16x4xf32>) +// After this optimization: +// %cst = constant dense<1.0> : tensor +// %0 = "tfl.conv_2d"... +// %1 = "tfl.add"(%0, %cst) : (tensor<16x16x4xf32>, tensor) +// This pattern can enable more fusing opportunities when the binary op is +// following conv ops. +template +struct ScalarizeSplatConstantForBroadcastableOps + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(BinaryOpType binary_op, + PatternRewriter &rewriter) const override { + DenseElementsAttr splat_elements_attr; + if (!IsScalarizableSplatConstant(binary_op.rhs(), &splat_elements_attr)) { + return failure(); + } + + constexpr int kSplatOperandIndex = 1; + auto result_type = + binary_op.getResult().getType().template cast(); + mlir::Value non_splat_operand = + binary_op.getOperand(1 - kSplatOperandIndex); + auto non_splat_operand_type = + non_splat_operand.getType().cast(); + // If the other operand's shape does not equal to the result shape, then we + // cannot scalarize the splat constant because the result shape relies on + // the splat constant op's shape for broadcasting. + if (!non_splat_operand_type.hasStaticShape() || + non_splat_operand_type.getShape() != result_type.getShape()) { + return failure(); + } + + // If non-splat operand is not fusable affine ops, then no need to apply + // this transformation. + if (!CanFuseAffineOp(non_splat_operand.getDefiningOp(), binary_op)) { + return failure(); + } + + // Creates a new scalar constant op using the splat value. + mlir::Value splat_operand = binary_op.getOperand(kSplatOperandIndex); + auto scalar_elements_attr = DenseElementsAttr::get( + RankedTensorType::get({}, + splat_elements_attr.getType().getElementType()), + splat_elements_attr.getSplatValue()); + + auto scalar_constant_op = rewriter.create( + splat_operand.getLoc(), scalar_elements_attr.getType(), + scalar_elements_attr); + + binary_op.setOperand(kSplatOperandIndex, scalar_constant_op); + return success(); + } + + private: + // Returns true if this value is a splat constant op which can be scalarized. + // Also returns the elements attr if this value is indeed a splat constant. + bool IsScalarizableSplatConstant(mlir::Value value, + DenseElementsAttr *elements_attr) const { + if (!matchPattern(value, m_Constant(elements_attr))) { + return false; + } + auto element_type = value.getType().cast().getElementType(); + // Ignore per-axis quantized constants because after converting to scalar, + // we will lose per-axis qantization parameter. + if (element_type.isa()) { + return false; + } + if (IsScalar(value)) { + return false; + } + return elements_attr->isSplat(); + } + + // If this type is a scalar shaped type. + bool IsScalar(mlir::Value value) const { + auto type = value.getType().dyn_cast(); + if (!type) { + return false; + } + if (!type.hasStaticShape()) { + return false; + } + return type.getNumElements() == 1; + } + + // Returns true if we can fuse an affine op with consuming binary op. + bool CanFuseAffineOp(Operation *affine_op, Operation *binary_op) const { + if (!isa_and_nonnull(affine_op)) { + return false; + } + DenseElementsAttr value; + // Check that bias are constants if not none. + Value bias = affine_op->getOperand(2); + if (!bias.getType().isa() && + !matchPattern(bias, m_Constant(&value))) { + return false; + } + // If the binary op is mul/div, also check that filter is constant. + if (isa(binary_op) && + !matchPattern(affine_op->getOperand(1), m_Constant(&value))) { + return false; + } + + // We can only fuse F32/BF16. + auto is_fusable_type = [](Type t) { + Type element_type = t; + if (auto shaped_type = t.dyn_cast()) { + element_type = shaped_type.getElementType(); + } + return element_type.isBF16() || element_type.isF32(); + }; + for (Type t : binary_op->getOperandTypes()) { + if (!is_fusable_type(t)) { + return false; + } + } + + return true; + } +}; + +using ScalarizeSplatConstantForSub = + ScalarizeSplatConstantForBroadcastableOps; +using ScalarizeSplatConstantForAdd = + ScalarizeSplatConstantForBroadcastableOps; +using ScalarizeSplatConstantForMul = + ScalarizeSplatConstantForBroadcastableOps; +using ScalarizeSplatConstantForDiv = + ScalarizeSplatConstantForBroadcastableOps; + struct ConvertTrivialTransposeOpToReshapeOp : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -818,6 +958,8 @@ void Optimize::runOnFunction() { OwningRewritePatternList phase_2_patterns; TFL::populateWithGenerated(ctx, phase_2_patterns); phase_2_patterns.insert< + ScalarizeSplatConstantForAdd, ScalarizeSplatConstantForSub, + ScalarizeSplatConstantForMul, ScalarizeSplatConstantForDiv, FuseFullyConnectedAndAdd, FuseFullyConnectedAndReluX, FuseFullyConnectedAndReluX, FuseFullyConnectedAndReluX, From ddb6af7b307fb9829cbdaee9093ad6a9a4f2ec2b Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 11 Nov 2020 12:08:31 -0800 Subject: [PATCH 176/220] Update resource op lifting cleanup to remove redundant tf.Cast ops of resources. These ops will break analysis of tf.While/tf.WhileRegion in regards to resource handles being direct inputs and outputs. PiperOrigin-RevId: 341882124 Change-Id: I4ed4d02f7a892be4f5d0e35cc56f91bc6e61c21c --- .../tensorflow/tests/resource_op_lifting.mlir | 37 +++++++++++++++++++ .../transforms/resource_op_lifting_cleanup.cc | 15 ++++++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir index 5560d369db6..5bf3a3c6721 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir @@ -1249,3 +1249,40 @@ func @callee(%arg0: !tf_res) -> tensor { // CHECK-NEXT: return [[TRUE]] : return %0 : tensor } + +// ----- + +// Tests passthrough tf.Cast ops are removed. + +!tf_res = type tensor<*x!tf.resource>> + +// CHECK-LABEL: func @tpu_computation +func @tpu_computation(%arg0: !tf_res) { + "tf_device.cluster"() ( { + %0 = "tf.While"(%arg0) {body = @while_body, cond = @while_cond, is_stateless = false} : (!tf_res) -> !tf_res + %1 = "tf.WhileRegion"(%arg0) ( { + ^cond(%carg0: !tf_res): + %2 = "tf.Const"() {value = dense : tensor} : () -> tensor + "tf.Yield"(%2) : (tensor) -> () + }, { + ^body(%barg0: !tf_res): + // CHECK-NOT: tf.Cast + %2 = "tf.Cast"(%barg0) : (!tf_res) -> !tf_res + "tf.Yield"(%2) : (!tf_res) -> () + }) {is_stateless = false} : (!tf_res) -> !tf_res + tf_device.return + }) {} : () -> () + return +} + +func @while_cond(%arg0: !tf_res) -> tensor { + %0 = "tf.Const"() {value = dense : tensor} : () -> tensor + return %0 : tensor +} + +// CHECK-LABEL: func @while_body +func @while_body(%arg0: !tf_res) -> !tf_res { + // CHECK-NOT: tf.Cast + %0 = "tf.Cast"(%arg0) : (!tf_res) -> !tf_res + return %0 : !tf_res +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting_cleanup.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting_cleanup.cc index b635096cc9b..283ad08a52c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting_cleanup.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting_cleanup.cc @@ -31,11 +31,18 @@ bool IsResource(Value value) { return getElementTypeOrSelf(value.getType()).isa(); } -// Removes identity nodes in the block. The device computation does not need +// Checks if a cast op is casting a resource -> resource. +bool IsCastOfResource(Operation &op) { + auto cast = dyn_cast(op); + if (!cast) return false; + return IsResource(cast.x()) && cast.x().getType() == cast.y().getType(); +} + +// Removes passthrough ops in the block. The device computation does not need // such nodes to carry information. -void RemoveIdentity(Block &block) { +void RemovePassthroughOp(Block &block) { for (auto &op : llvm::make_early_inc_range(block)) { - if (isa(&op)) { + if (isa(op) || IsCastOfResource(op)) { op.replaceAllUsesWith(op.getOperands()); op.erase(); } @@ -397,7 +404,7 @@ LogicalResult CleanupAndCanonicalize(Operation *parent_op) { // Cleanup code in attached regions. for (Region ®ion : op->getRegions()) { if (!llvm::hasSingleElement(region)) return WalkResult::interrupt(); - RemoveIdentity(region.front()); + RemovePassthroughOp(region.front()); RemoveDeadLocalVariables(region.front()); } From ce6f0682d2e441dd648c202fac09662ebcd323bb Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Wed, 11 Nov 2020 12:16:47 -0800 Subject: [PATCH 177/220] Some internal changes PiperOrigin-RevId: 341884008 Change-Id: I585bf735d032503679f45f71f9820784e339b012 --- tensorflow/python/distribute/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index f5b74545ad9..ea0dafac69a 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1071,6 +1071,7 @@ cuda_py_test( tags = [ "multi_and_single_gpu", "no_cuda_asan", # times out + "notsan", # b/173031470 ], deps = [ ":collective_util", From 4c4fe4b8e29044d681c423807bcf38dab56e2a75 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 12:21:56 -0800 Subject: [PATCH 178/220] [TF:TRT] Adds ModelHandlerManager and SamplerRunner to organize model testing. PiperOrigin-RevId: 341885127 Change-Id: I9179ee8e90beb13b2f9c6862cf31367a4ec2f29f --- .../tensorrt/model_tests/model_handler.py | 93 ++++++++++++++- .../tensorrt/model_tests/run_models.py | 107 +++++++++++------- 2 files changed, 161 insertions(+), 39 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py index 4d2f0eaf015..072f5247503 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py +++ b/tensorflow/python/compiler/tensorrt/model_tests/model_handler.py @@ -19,7 +19,7 @@ import collections import functools import tempfile import time -from typing import List, Mapping, Optional, Sequence, Union +from typing import Callable, Iterable, List, Mapping, Optional, Sequence, Union from absl import logging import numpy as np @@ -146,6 +146,14 @@ class ModelConfig( saved_model_signature_key, default_batch_size) +class TestResultCollection( + collections.namedtuple("TestResultCollection", ["results", "config"])): + + def __new__(cls, config: ModelConfig, + results: Sequence[TestResult] = tuple()): + return super(TestResultCollection, cls).__new__(cls, config, results) + + class _ModelHandlerBase(metaclass=abc.ABCMeta): """Base class for running a model.""" @@ -429,3 +437,86 @@ class TrtModelHandlerV2(_TrtModelHandlerBase, ModelHandlerV2): benchmark_iterations, allow_to_use_gpu=True) return test_result._replace(trt_convert_params=self._trt_convert_params) + + +class _ModelHandlerManagerBase(metaclass=abc.ABCMeta): + """Manages a series of ModelHandlers for aggregrated testing/benchmarking.""" + + def __init__( + self, model_config: ModelConfig, + default_trt_convert_params: trt.TrtConversionParams, + trt_convert_params_updater: Callable[[trt.TrtConversionParams], + Iterable[trt.TrtConversionParams]]): + self._ori_model = self.model_handler_cls(model_config) + self._trt_models = [] + for trt_convert_params in trt_convert_params_updater( + default_trt_convert_params): + trt_model = self.trt_model_handler_cls( + model_config, trt_convert_params=trt_convert_params) + self._trt_models.append(trt_model) + + self._result_collection = TestResultCollection( + results=[], config=model_config) + + def __str__(self) -> str: + return "Input Model: {}".format(str(self._ori_model)) + + def __repr__(self) -> str: + return "{}({})".format(self.__class__.__name__, str(self)) + + @property + @classmethod + @abc.abstractmethod + def model_handler_cls(cls): + """The modle handler class. ModelHandleV1/ModelHandlerV2.""" + + @property + @classmethod + @abc.abstractmethod + def trt_model_handler_cls(cls): + """The TensorRTmodle handler class. TrtModelHandleV1/TrtModelHandlerV2.""" + + @property + def model_config(self): + return self._ori_model.model_config + + def generate_random_inputs(self, batch_size: Optional[int] = None): + return self._ori_model.generate_random_inputs(batch_size) + + def run(self, + inputs=None, + warmup_iterations: int = 10, + benchmark_iterations: int = 100) -> TestResultCollection: + """Runs model inference with provided or randomly generated input tensors. + + Args: + inputs: Mapping from names to input ndarrays in TF1. Or a sequence of + tensors in TF2. If `None`, ramdomly generated input tensors will be used + instead. + warmup_iterations: Number of inferences to warm up the runtime. + benchmark_iterations: Number of inferences to measure the latency. + + Returns: + `TestResultCollection` summarizing timing and numerics information for + different TensorRT conversion settings. + """ + inputs = inputs or self.generate_random_inputs() + results = [ + model.run(inputs, warmup_iterations, benchmark_iterations) + for model in [self._ori_model] + self._trt_models + ] + return self._result_collection._replace(results=results) + + +class ModelHandlerManagerV1(_ModelHandlerManagerBase): + """Manages a series of ModelHandlers for aggregrated testing/benchmarking in TF1.""" + + model_handler_cls = ModelHandlerV1 + trt_model_handler_cls = TrtModelHandlerV1 + + +class ModelHandlerManagerV2(_ModelHandlerManagerBase): + """Manages a series of ModelHandlers for aggregrated testing/benchmarking in TF2.""" + + model_handler_cls = ModelHandlerV2 + trt_model_handler_cls = TrtModelHandlerV2 diff --git a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py index 8da8dbc3778..e43430995d3 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py +++ b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py @@ -15,6 +15,7 @@ """Runs sample models with TensorRT and analyzes numerics and timing information.""" import os +from typing import Callable, Iterable from absl import app from absl import flags @@ -26,9 +27,18 @@ from tensorflow.python.framework import ops as framework_ops from tensorflow.python.platform import test as platform_test FLAGS = flags.FLAGS + +flags.DEFINE_string( + "saved_model_dir", + platform_test.test_src_dir_path( + "python/compiler/tensorrt/model_tests/sample_model"), + "The directory to the testing SavedModel.") + +flags.DEFINE_integer("batch_size", 128, + "The batch size used to run the testing model with.") + flags.DEFINE_boolean("use_tf2", True, "Whether to test with TF2 behavior or not (TF1).") - DEFAUL_TRT_CONVERT_PARAMS = trt.DEFAULT_TRT_CONVERSION_PARAMS @@ -36,43 +46,61 @@ def _get_mean_latency(result: model_handler.TestResult): return (sum(result.latency) / len(result.latency)) * 1000.0 -def run_all_tests(): - """Runs all sample model with TensorRT FP32/FP16 and reports latency.""" - model_configs = (model_handler.ModelConfig( - saved_model_dir=platform_test.test_src_dir_path( - "python/compiler/tensorrt/model_tests/sample_model"), - default_batch_size=128),) - if FLAGS.use_tf2: - model_handler_cls = model_handler.ModelHandlerV2 - trt_model_handeler_cls = model_handler.TrtModelHandlerV2 - default_trt_convert_params = DEFAUL_TRT_CONVERT_PARAMS._replace( - is_dynamic_op=True) - else: - model_handler_cls = model_handler.ModelHandlerV1 - trt_model_handeler_cls = model_handler.TrtModelHandlerV1 - default_trt_convert_params = DEFAUL_TRT_CONVERT_PARAMS._replace( - is_dynamic_op=False) - for model_config in model_configs: - trt_convert_params = default_trt_convert_params._replace( - max_batch_size=model_config.default_batch_size) - base_model = model_handler_cls(model_config) - random_inputs = base_model.generate_random_inputs() - base_model_result = base_model.run(random_inputs) - trt_fp32_model_result = trt_model_handeler_cls( - model_config=model_config, - trt_convert_params=trt_convert_params._replace( - precision_mode=trt.TrtPrecisionMode.FP32)).run(random_inputs) - trt_fp16_model_result = trt_model_handeler_cls( - model_config=model_config, - trt_convert_params=trt_convert_params._replace( - precision_mode=trt.TrtPrecisionMode.FP16)).run(random_inputs) +class SampleRunner(object): + """The driver to run all sample models in all specified configurations.""" - logging.info("Base model latency: %f ms", - _get_mean_latency(base_model_result)) - logging.info("TensorRT FP32 model latency: %f ms", - _get_mean_latency(trt_fp32_model_result)) - logging.info("TensorRT FP16 model latency: %f ms", - _get_mean_latency(trt_fp16_model_result)) + def __init__(self, saved_model_dir: str, batch_size: int, use_tf2=True): + # The model_configs contains (saved_model_dir, saved_model_signature_key, + # batch_size) for each model + self._configs = (model_handler.ModelConfig( + saved_model_dir=saved_model_dir, default_batch_size=batch_size),) + self._model_handler_manager_cls = ( + model_handler.ModelHandlerManagerV2 + if use_tf2 else model_handler.ModelHandlerManagerV1) + self._default_trt_convert_params = ( + DEFAUL_TRT_CONVERT_PARAMS._replace(is_dynamic_op=True) + if use_tf2 else DEFAUL_TRT_CONVERT_PARAMS._replace(is_dynamic_op=False)) + + def _run_impl( + self, + default_trt_converter_params: trt.TrtConversionParams, + trt_converter_params_updater: Callable[[trt.TrtConversionParams], + Iterable[trt.TrtConversionParams]], + ): + """Runs all sample models based on a key varying parameter.""" + for model_config in self._configs: + trt_convert_params = default_trt_converter_params._replace( + max_batch_size=model_config.default_batch_size) + # Load, compile and runs the models. + manager = self._model_handler_manager_cls( + model_config=model_config, + default_trt_convert_params=trt_convert_params, + trt_convert_params_updater=trt_converter_params_updater) + inputs = manager.generate_random_inputs() + result_collection = manager.run(inputs) + + logging.info("Model information: %s ms", repr(manager)) + for result in result_collection.results: + logging.info("TensorRT parameters: %s ms", result.trt_convert_params or + "Not a TensorRT Model") + logging.info("Mean latency: %f ms", _get_mean_latency(result)) + + def run_trt_precision_tests(self) -> None: + """Runs tests for all TensorRT precisions.""" + + def trt_converter_params_updater(params: trt.TrtConversionParams): + for precision_mode in [ + trt.TrtPrecisionMode.FP32, trt.TrtPrecisionMode.FP16 + ]: + yield params._replace(precision_mode=precision_mode) + + self._run_impl( + default_trt_converter_params=self._default_trt_convert_params, + trt_converter_params_updater=trt_converter_params_updater) + + def run_all_tests(self) -> None: + """Runs all tests available.""" + self.run_trt_precision_tests() def main(argv): @@ -88,7 +116,10 @@ def main(argv): logging.info("Running in TF1 mode. Eager execution is disabled.") framework_ops.disable_eager_execution() - run_all_tests() + SampleRunner( + saved_model_dir=FLAGS.saved_model_dir, + batch_size=FLAGS.batch_size, + use_tf2=FLAGS.use_tf2).run_all_tests() if __name__ == "__main__": From bb8e14a7334219b71110426d59bc6e750ab349c8 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 11 Nov 2020 12:22:55 -0800 Subject: [PATCH 179/220] Switch to V2 collectives There're still some performance issues, which don't seem to be blocking: 1) we don't have scoped allocator for v2 collective ops. The non scoped allocator concat/split adds about 4ms step time to bert pretrain 2) instance params is effectively leaked, but the effect is hidden by a far more serious leak from ScopedAllocatorContainer. 3) The overhead capture_call_time_value is a function of the number of collectives, and can cause serious performance issues if there're >100 collectives, which is uncommon. PiperOrigin-RevId: 341885335 Change-Id: If8f773847b18fd5ae37a00e3a4c2563ec913b907 --- tensorflow/python/distribute/cross_device_ops_test.py | 4 ++-- tensorflow/python/distribute/cross_device_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/distribute/cross_device_ops_test.py b/tensorflow/python/distribute/cross_device_ops_test.py index a5818c37aa5..983e1db5f3d 100644 --- a/tensorflow/python/distribute/cross_device_ops_test.py +++ b/tensorflow/python/distribute/cross_device_ops_test.py @@ -108,8 +108,8 @@ def enable_collective_ops(): context.context().enable_collective_ops(server_def) # Recover default flag values. cross_device_ops_lib.CollectiveAllReduce._limited_nccl = True - cross_device_utils.CollectiveReplicaLauncher._use_scoped_allocator = True - cross_device_utils.CollectiveReplicaLauncher._use_collective_v2 = False + cross_device_utils.CollectiveReplicaLauncher._use_scoped_allocator = False + cross_device_utils.CollectiveReplicaLauncher._use_collective_v2 = True cross_device_utils.CollectiveReplicaLauncher._use_ordering_token = False diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py index d90c3b73717..f8090c5a5a8 100644 --- a/tensorflow/python/distribute/cross_device_utils.py +++ b/tensorflow/python/distribute/cross_device_utils.py @@ -257,8 +257,8 @@ class CollectiveKeys(object): class CollectiveReplicaLauncher(object): """Launch collectives on one replica.""" - _use_scoped_allocator = True - _use_collective_v2 = False + _use_scoped_allocator = False + _use_collective_v2 = True _use_ordering_token = False def __init__(self, From 82c66bed39f6b0db10ad5445f110e6f9377353b5 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Wed, 11 Nov 2020 12:29:47 -0800 Subject: [PATCH 180/220] Some internal change PiperOrigin-RevId: 341886781 Change-Id: Ib703b85e3b9bc0546edcf3b9c758b76745c8bf99 --- tensorflow/core/kernels/mlir_generated/BUILD | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index f36fc92535f..c84a2c15ffb 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -195,7 +195,9 @@ tf_cuda_cc_test( name = "gpu_add_test", size = "small", srcs = if_mlir_generated_gpu_kernels_enabled(["gpu_add_test.cc"]), - tags = tf_cuda_tests_tags(), + tags = tf_cuda_tests_tags() + [ + "no_cuda_asan", # b/173033461 + ], deps = [ "//tensorflow/core:framework", "//tensorflow/core:framework_internal", From d6f39a9926b60983bdd0f49553967197ee58249e Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Wed, 11 Nov 2020 12:32:19 -0800 Subject: [PATCH 181/220] Support session initializer via op addition in TFLite (Part 2) This change is for adding MLIR pass to insert the session initializer op. The tf saved model dialect has a special operation, tf_saved_model.session_initializer, which indicates that there is a session initializer function in a graph. The session initializer function takes care of graph resource initializations, for example, variable initialization and hash table initialization. In this change, a new pass in MLIR, InsertCallOnceOpFromSessionInitializerPass will insert an TFL CallOnce op to invoke the graph once in a life cycle to set up the above initializations by converting the tf_saved_model.session_initializer op and inserting the new op at the main graph to invoke the given session initializer function. The newly introduced CallOnce op will invoke the initialization subgraph once and always located at the beginning of the main graph. Once its execution is completed, no more invocations will run to ensure that the necessary behavior is implemented. PiperOrigin-RevId: 341887319 Change-Id: I72412841fc12afb5c6c97cad002a19c8de3253b3 --- RELEASE.md | 2 + tensorflow/compiler/mlir/lite/BUILD | 1 + .../compiler/mlir/lite/flatbuffer_export.cc | 27 ++++++ tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 15 ++++ .../mlir/lite/tests/insert_call_once_op.mlir | 40 +++++++++ .../compiler/mlir/lite/tf_tfl_passes.cc | 5 ++ .../lite/transforms/insert_call_once_op.cc | 78 +++++++++++++++++ .../compiler/mlir/lite/transforms/passes.h | 4 + tensorflow/lite/python/BUILD | 1 + tensorflow/lite/python/lite_v2_test.py | 83 +++++++++++++++++++ 10 files changed, 256 insertions(+) create mode 100644 tensorflow/compiler/mlir/lite/tests/insert_call_once_op.mlir create mode 100644 tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc diff --git a/RELEASE.md b/RELEASE.md index 755c2eb3c10..dab8fe1c405 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -47,6 +47,8 @@ directly. * 16 bits quantization * Added int16x8 support for ABS, REDUCE_MAX and REDUCE_MIN operators. + * Added support for saved model's session initializer through + `TFLiteConverter.from_saved_model`. * TF Core: * Corrected higher-order gradients of control flow constructs (`tf.cond`, diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 76fde446b15..052795a4264 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -390,6 +390,7 @@ cc_library( "transforms/generated_legalize_tf.inc", "transforms/generated_lower_static_tensor_list.inc", "transforms/generated_prepare_tf.inc", + "transforms/insert_call_once_op.cc", "transforms/legalize_tf.cc", "transforms/legalize_tf_while.cc", "transforms/lower_static_tensor_list.cc", diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index 13091af45e3..8f8adceefbc 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -453,6 +453,11 @@ class Translator { mlir::TFL::WhileOp op, const std::vector& operands, const std::vector& results); + // Build call once operator. + BufferOffset BuildCallOnceOperator( + mlir::TFL::CallOnceOp op, const std::vector& operands, + const std::vector& results); + // Builds custom operators. // Templated on a) data type of custom_option to be stored into flatbuffer, // and b) TFL custom op type. @@ -787,6 +792,22 @@ BufferOffset Translator::BuildIfOperator( builtin_options); } +BufferOffset Translator::BuildCallOnceOperator( + mlir::TFL::CallOnceOp op, const std::vector& operands, + const std::vector& results) { + auto opcode_index = + GetOpcodeIndex("call_once", tflite::BuiltinOperator_CALL_ONCE); + int init_subgraph_index = + subgraph_index_map_.at(op.session_init_function().str()); + auto builtin_options = + tflite::CreateCallOnceOptions(builder_, init_subgraph_index).Union(); + auto inputs = builder_.CreateVector(operands); + auto outputs = builder_.CreateVector(results); + return tflite::CreateOperator(builder_, opcode_index, inputs, outputs, + tflite::BuiltinOptions_CallOnceOptions, + builtin_options); +} + BufferOffset Translator::BuildWhileOperator( mlir::TF::WhileOp op, const std::vector& operands, const std::vector& results) { @@ -1026,6 +1047,12 @@ Optional> Translator::BuildOperator( return llvm::None; } + if (*builtin_code == tflite::BuiltinOperator_CALL_ONCE) { + if (auto initOp = dyn_cast(inst)) { + return BuildCallOnceOperator(initOp, operands, results); + } + } + std::string op_name = inst->getName().getStringRef().str(); uint32_t opcode_index = GetOpcodeIndex(op_name, *builtin_code); diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 5f1d9eadfe2..ae2e424ec81 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -4360,6 +4360,21 @@ def TFL_WhileOp : Op { + let summary = "Invokes an initialization function"; + + let description = [{ +This operation invokes the given initialization function for the session +initializer in tf saved model dialect. + }]; + + let arguments = (ins + StrAttr:$session_init_function + ); + + let results = (outs); +} + def TFL_CustomOp : Op { let summary = "Custom op"; diff --git a/tensorflow/compiler/mlir/lite/tests/insert_call_once_op.mlir b/tensorflow/compiler/mlir/lite/tests/insert_call_once_op.mlir new file mode 100644 index 00000000000..b9e3d8d1e88 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/insert_call_once_op.mlir @@ -0,0 +1,40 @@ +// RUN: tf-opt -split-input-file -tfl-insert-call-once-op %s | FileCheck %s + +// Tests that new call_once op is added when there is a session initializer. + +module attributes {tf_saved_model.semantics} { + "tf_saved_model.session_initializer"() {initializers = [@init_all_tables]} : () -> () + + func @init_all_tables() + attributes {tf_saved_model.exported_names = ["__tf_saved_model_session_initializer"]} { + %cst = constant dense<[1, 2, 3, 4]> : tensor<4xi64> + %cst_0 = constant dense<["a", "b", "c", "d"]> : tensor<4x!tf.string> + %0 = "tf.HashTableV2"() {container = "", device = "", key_dtype = i64, shared_name = "hash_table_dba2ccaa-f1b1-46d6-b276-98008f69da71", use_node_name_sharing = false, value_dtype = !tf.string} : () -> tensor + "tf.LookupTableImportV2"(%0, %cst, %cst_0) {device = ""} : (tensor, tensor<4xi64>, tensor<4x!tf.string>) -> () + return + // CHECK-LABEL: @init_all_tables + } + + func @serving_default(%arg0: tensor {tf_saved_model.index_path = ["x"]}) -> (tensor<*x!tf.string> {tf_saved_model.index_path = ["r"]}) + attributes {tf.entry_function = {control_outputs = "", inputs = "input:0", outputs = "hash_table_Lookup/LookupTableFindV2:0"}, tf_saved_model.exported_names = ["serving_default"]} { + %cst = constant dense<"f"> : tensor + %0 = "tf.HashTableV2"() {container = "", device = "", key_dtype = i64, shared_name = "hash_table_dba2ccaa-f1b1-46d6-b276-98008f69da71", use_node_name_sharing = false, value_dtype = !tf.string} : () -> tensor + %1 = "tf.LookupTableFindV2"(%0, %arg0, %cst) {device = ""} : (tensor, tensor, tensor) -> tensor<*x!tf.string> + return %1 : tensor<*x!tf.string> + // CHECK-LABEL: @serving_default + // CHECK: "tfl.call_once"() {session_init_function = "init_all_tables"} : () -> () + } +} + +// ----- + +// Tests that no call_once op is added. + +module attributes {tf_saved_model.semantics} { + func @no_call_once(%arg0: tensor {tf_saved_model.index_path = ["x"]}) -> (tensor {tf_saved_model.index_path = ["r"]}) + attributes {tf.entry_function = {control_outputs = "", inputs = "input:0", outputs = "output:0"}, tf_saved_model.exported_names = ["serving_default"]} { + return %arg0 : tensor + // CHECK-LABEL: no_call_once + // CHECK-NOT: "tfl.call_once" + } +} diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index 07e989c349f..c6b5f6b5e3e 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -234,6 +234,11 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // tf.variable to model this. pass_manager->addNestedPass( mlir::TFL::CreateSplitMergedOperandsPass()); + + // Add CallOnceOp when there is a session initializer function in tf saved + // model dialect. + pass_manager->addPass( + mlir::TFL::CreateInsertCallOnceOpFromSessionInitializerPass()); } } diff --git a/tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc b/tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc new file mode 100644 index 00000000000..6f413989ecf --- /dev/null +++ b/tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc @@ -0,0 +1,78 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/IR/OperationSupport.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" + +namespace mlir { +namespace TFL { +namespace { + +// This pass inserts a TFL::CallOnce op when tf_saved_model's session +// initializer is given. +class InsertCallOnceOpFromSessionInitializerPass + : public mlir::PassWrapper> { + private: + void runOnOperation() override; +}; + +void InsertCallOnceOpFromSessionInitializerPass::runOnOperation() { + ModuleOp module = getOperation(); + tf_saved_model::SessionInitializerOp session_init_op = + tf_saved_model::GetSessionInitializerOp(module); + + if (!session_init_op) return; + + SymbolTable symbol_table(module); + + for (auto sym_ref : session_init_op.initializers()) { + FuncOp init_func_op = symbol_table.lookup( + sym_ref.cast().getValue()); + + if (!init_func_op) { + module.emitError("no session initializer function found"); + return signalPassFailure(); + } + + for (auto func : module.getOps()) { + auto dict_attr = + func.getAttrOfType("tf.entry_function"); + if (!dict_attr) continue; + + OpBuilder builder(func.getContext()); + builder.setInsertionPointToStart(&func.getBlocks().front()); + builder.create(func.getLoc(), init_func_op.getName()); + } + } +} + +} // namespace + +// Inserts a TFL::CallOnce op when tf_saved_model's session initializer is +// given. +std::unique_ptr> +CreateInsertCallOnceOpFromSessionInitializerPass() { + return std::make_unique(); +} + +static PassRegistration pass( + "tfl-insert-call-once-op", + "Insert CallOnce op when tf_saved_model's session initializer is given"); + +} // namespace TFL +} // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/transforms/passes.h b/tensorflow/compiler/mlir/lite/transforms/passes.h index f2c3a6df1f6..58e7c929b73 100644 --- a/tensorflow/compiler/mlir/lite/transforms/passes.h +++ b/tensorflow/compiler/mlir/lite/transforms/passes.h @@ -94,6 +94,10 @@ std::unique_ptr> CreateRuntimeVerifyPass(); // Creates raise custom ops pass, which legalize custom ops to TFL::CustomOp std::unique_ptr> CreateRaiseCustomOpsPass(); +// Inserts an TFL::CallOnce op when the tf_saved_model's session initialzer is +// given. +std::unique_ptr> +CreateInsertCallOnceOpFromSessionInitializerPass(); } // namespace TFL } // namespace mlir diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index c5cf2328341..c61e7e4934f 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -189,6 +189,7 @@ py_test( ":lite", ":lite_v2_test_util", "//tensorflow:tensorflow_py", + "//tensorflow/lite/kernels/hashtable:hashtable_op_kernels", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", "@six_archive//:six", diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 3d4cc806bfd..8beffb2507b 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -31,6 +31,7 @@ from tensorflow.lite.python import lite from tensorflow.lite.python import lite_v2_test_util from tensorflow.lite.python.convert import mlir_quantize from tensorflow.lite.python.interpreter import Interpreter +from tensorflow.lite.python.interpreter import InterpreterWithCustomOps from tensorflow.lite.toco import types_pb2 as _types_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -740,6 +741,88 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): tflite_model = converter.convert() self.assertTrue(tflite_model) + def _createV1ModelWithHashTableInitializer(self): + # Create a v1 saved model with hash table initializers. + tf.compat.v1.disable_eager_execution() + saved_model_dir = os.path.join(self.get_temp_dir(), + 'savedmodel_with_hashtable') + + table_initializer = tf.lookup.KeyValueTensorInitializer( + keys=['a', 'b', 'c', 'd'], + values=[1, 2, 3, 4], + key_dtype=tf.string, + value_dtype=tf.int64) + table = tf.lookup.StaticHashTable( + table_initializer, default_value=tf.constant(-1, dtype=tf.int64)) + + x = tf.compat.v1.placeholder(tf.string, shape=(), name='input') + y = table.lookup(x) + + tensor_info_x = tf.compat.v1.saved_model.utils.build_tensor_info(x) + tensor_info_y = tf.compat.v1.saved_model.utils.build_tensor_info(y) + + signature_def_map, init_op, assets_collection = { + 'serving_default': + (tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + inputs={'x': tensor_info_x}, + outputs={'y': tensor_info_y}, + method_name='some_function')) + }, tf.compat.v1.tables_initializer(), None + + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder( + saved_model_dir) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map, + main_op=init_op, + assets_collection=assets_collection, + strip_default_attrs=True) + builder.save() + + # Restore TF v2 behavior. + tf.compat.v1.reset_default_graph() + tf.compat.v1.enable_eager_execution() + return saved_model_dir + + @test_util.run_v2_only + def testModelWithHashTableInitializer(self): + """Test a model with saved_model's session initializer for hash tables.""" + saved_model_dir = self._createV1ModelWithHashTableInitializer() + + # Convert model and ensure model is not None. + converter = lite.TFLiteConverterV2.from_saved_model(saved_model_dir) + converter.allow_custom_ops = True + tflite_model = converter.convert() + + # Check values from converted model. + interpreter = InterpreterWithCustomOps( + model_content=tflite_model, custom_op_registerers=['AddHashtableOps']) + input_details = interpreter.get_input_details() + output_details = interpreter.get_output_details() + + input_data = np.array(['a', 'b', 'c', 'z'], dtype=np.string_) + interpreter.resize_tensor_input( + input_details[0]['index'], [4], strict=False) + interpreter.allocate_tensors() + + interpreter.set_tensor(input_details[0]['index'], input_data) + + # Invoke multiple times to ensure the initializer graph runs only once. + interpreter.invoke() + actual_value = interpreter.get_tensor(output_details[0]['index']) + self.assertEqual([1, 2, 3, -1], list(actual_value)) + + interpreter.invoke() + actual_value = interpreter.get_tensor(output_details[0]['index']) + self.assertEqual([1, 2, 3, -1], list(actual_value)) + + interpreter.invoke() + actual_value = interpreter.get_tensor(output_details[0]['index']) + self.assertEqual([1, 2, 3, -1], list(actual_value)) + @test_util.run_v2_only def testConstModel(self): """Test a basic model with functions to make sure functions are inlined.""" From 721358ced1aeb1032d52a73c5f2dd048a77f4312 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 11 Nov 2020 12:34:17 -0800 Subject: [PATCH 182/220] Fix CollectiveOpsTest.testNcclOrdering : ) PiperOrigin-RevId: 341887752 Change-Id: Id995cf5edb3b081e1126a207177b4494582acdee --- tensorflow/python/distribute/cross_device_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/cross_device_ops_test.py b/tensorflow/python/distribute/cross_device_ops_test.py index 983e1db5f3d..652fde52096 100644 --- a/tensorflow/python/distribute/cross_device_ops_test.py +++ b/tensorflow/python/distribute/cross_device_ops_test.py @@ -940,7 +940,7 @@ class CollectiveOpsTest(test.TestCase, parameterized.TestCase): graph = f.get_concrete_function().graph should_be_ordered = set([ - "CollectiveReduce", "CollectiveGather", "If", "While", + "CollectiveReduceV2", "CollectiveGatherV2", "If", "While", "StatefulPartitionedCall" ]) nodes_by_device = {} From 303958a21168a5d4d3fbeb55bbbf182723ef3bb1 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Wed, 11 Nov 2020 12:34:25 -0800 Subject: [PATCH 183/220] Internal testing change. PiperOrigin-RevId: 341887791 Change-Id: I8f0ed64c2a4686190c6bce3d6cca9c81a834cf74 --- tensorflow/python/keras/distribute/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 9f340a4b7d6..f8b260bfb5b 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -442,6 +442,7 @@ distribute_py_test( "nogpu", # TODO(b/170905292) "notsan", ], + xla_enable_strict_auto_jit = False, # b/172956754 deps = [ ":keras_correctness_test_lib", ], From 659aca25dabfabdf66f51182aee8f94eae8f78cc Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 11 Nov 2020 12:37:55 -0800 Subject: [PATCH 184/220] Fix constructor of CommunicationOptions Fix #44768 PiperOrigin-RevId: 341888489 Change-Id: I625f2e5412a217a95e3306fc6345f378b932071b --- tensorflow/python/distribute/collective_util.py | 6 +++++- tensorflow/python/distribute/collective_util_test.py | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/distribute/collective_util.py b/tensorflow/python/distribute/collective_util.py index 0d4554480b5..4fef896a326 100644 --- a/tensorflow/python/distribute/collective_util.py +++ b/tensorflow/python/distribute/collective_util.py @@ -81,7 +81,11 @@ class _OptionsExported(object): """ def __new__(cls, *args, **kwargs): - return Options.__new__(Options, *args, **kwargs) + # We expose a dummy class so that we can separate internal and public APIs. + # Note that __init__ won't be called on the returned object if it's a + # different class [1]. + # [1] https://docs.python.org/3/reference/datamodel.html#object.__new__ + return Options(*args, **kwargs) def __init__(self, bytes_per_pack=0, diff --git a/tensorflow/python/distribute/collective_util_test.py b/tensorflow/python/distribute/collective_util_test.py index e75d520979b..984442901fb 100644 --- a/tensorflow/python/distribute/collective_util_test.py +++ b/tensorflow/python/distribute/collective_util_test.py @@ -25,8 +25,11 @@ from tensorflow.python.eager import test class OptionsTest(test.TestCase): def testCreateOptionsViaExportedAPI(self): - options = collective_util._OptionsExported() + options = collective_util._OptionsExported(bytes_per_pack=1) self.assertIsInstance(options, collective_util.Options) + self.assertEqual(options.bytes_per_pack, 1) + with self.assertRaises(ValueError): + collective_util._OptionsExported(bytes_per_pack=-1) def testCreateOptionsViaHints(self): with self.assertLogs() as cm: From 18de5404c17d720340a852257adbd5a6edb92036 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 11 Nov 2020 12:40:15 -0800 Subject: [PATCH 185/220] Use worklist for function type updates Switch to using a global worklist for functions to enable updating callers of functions. This enables propagating result types and so avoids the restriction on refining return types. Functions are added back into the worklist if they reference a FuncOp that was refined. Move the helper functions (and their callers) to the ShapeInference class to enable easier enqueuing/querying of these. Also cache the function call mapping during processing. PiperOrigin-RevId: 341889049 Change-Id: Ia4c6df696ca9d076e0598c63bb5d359c78c5dea6 --- .../tensorflow/tests/shape_inference.mlir | 8 +- .../tensorflow/transforms/shape_inference.cc | 467 +++++++++++------- 2 files changed, 283 insertions(+), 192 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir index f353bea9777..2fdf018d42d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir @@ -439,16 +439,16 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr return %arg0 : tensor<2xi32> } - // Test not updating call site if a std.call is used. + // Test iteratively updating call site if a std.call is used. // CHECK-LABEL: func @call_partitioned_call2( - // CHECK-SAME: -> tensor<*xi32> + // CHECK-SAME: -> tensor<1xi32> func @call_partitioned_call2() -> tensor<*xi32> { - // CHECK: () -> tensor<*xi32> + // CHECK: () -> tensor<1xi32> %0 = call @partitioned_called_func2() : () -> tensor<*xi32> return %0 : tensor<*xi32> } // CHECK-LABEL: func @partitioned_called_func2( - // CHECK-SAME: -> tensor<*xi32> + // CHECK-SAME: -> tensor<1xi32> func @partitioned_called_func2() -> (tensor<*xi32>) { %0 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> %1 = tensor_cast %0 : tensor<1xi32> to tensor<*xi32> diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index 90df0061c8d..cdd01399248 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include #include +#include #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" @@ -60,6 +61,10 @@ limitations under the License. #define DEBUG_TYPE "tf-shape-inference" +#define DCOMMENT(MSG) LLVM_DEBUG(llvm::dbgs() << MSG << "\n") +#define DCOMMENT_OP(OP, MSG) \ + LLVM_DEBUG(OP->print(llvm::dbgs() << MSG << " "); llvm::dbgs() << "\n") + using ::tensorflow::int64; using tensorflow::shape_inference::DimensionHandle; using tensorflow::shape_inference::InferenceContext; @@ -111,153 +116,6 @@ bool NeedsCastBack(OpOperand& use, Dialect* tf_dialect) { !IsSupportedNonTFOp(use.getOwner()); } -// Updates the result of an operation to a new inferred type. Also inserts -// tf.Cast operation for uses that are incompatible with the new type. -void UpdateTypeAndInsertIncompatibleUseCasts(Dialect* tf_dialect, Type new_type, - Value result) { - if (isa_and_nonnull(result.getDefiningOp())) { - result.setType(new_type); - return; - } - - // A tf.Cast operation is lazily created on the first use requires a cast. - TF::CastOp cast_op; - auto get_cast_op = [&]() { - if (!cast_op) { - Operation* op = result.getDefiningOp(); - OpBuilder b(op); - b.setInsertionPointAfter(op); - cast_op = b.create(op->getLoc(), result.getType(), result, - /*truncate=*/b.getBoolAttr(false)); - } - return Value(cast_op); - }; - // First insert cast back for uses that need a cast and then - // update the type. - for (OpOperand& use : make_early_inc_range(result.getUses())) { - if (NeedsCastBack(use, tf_dialect)) use.set(get_cast_op()); - } - - result.setType(new_type); -} - -// Refines the type of `result` of `op` using the type `potential_refined_type`. -// Return true if the type was changed. -bool RefineResultType(Operation* op, Value result, - Type potential_refined_type) { - if (!CanRefineTypeWith(result.getType(), potential_refined_type)) - return false; - - UpdateTypeAndInsertIncompatibleUseCasts(op->getDialect(), - potential_refined_type, result); - return true; -} - -// Infers the shape from a (Stateful)PartionedCall operation by looking up the -// called function and propagating the return type. -bool InferShapeForCall(CallOpInterface call_op) { - FuncOp func = dyn_cast(call_op.resolveCallable()); - if (!func) return false; - - LLVM_DEBUG(llvm::dbgs() << "Infer shape for call " << func.getName()); - Operation* op = call_op.getOperation(); - bool changed = false; - // Map each of the results of the call to the returned type of the - // function. - for (auto result : zip(op->getResults(), func.getType().getResults())) { - changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || - changed; - } - LLVM_DEBUG(llvm::dbgs() << " changed ? " << changed << "\n"); - - return changed; -} - -bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { - Value result = op.getResult(); - if (!CanBeRefined(result.getType())) return false; - - Type operand_type = op.getOperand().getType(); - auto ranked_op_type = operand_type.dyn_cast(); - if (!ranked_op_type) return false; - auto ranked_res_type = result.getType().dyn_cast(); - if (ranked_res_type && - ranked_op_type.getShape() == ranked_res_type.getShape()) - return false; - - // Avoid inserting a cast where no users types could be refined (e.g., where - // there would need to be a cast inserted for every user again). - if (llvm::all_of(result.getUses(), [tf_dialect](OpOperand& use) { - return NeedsCastBack(use, tf_dialect); - })) - return false; - - auto new_type = RankedTensorType::get( - ranked_op_type.getShape(), - result.getType().cast().getElementType()); - - UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect, new_type, op.getResult()); - return true; -} - -// Infer the shape IfOp outputs based on the shapes of the then and else -// function result types. -bool InferShapeForIf(IfOp op) { - bool changed = false; - auto then_results = op.then_function().getType().getResults(); - auto else_results = op.else_function().getType().getResults(); - for (auto it : llvm::zip(op.getResults(), then_results, else_results)) { - // If then and else types do not match, skip refinement for that result. - if (std::get<1>(it) != std::get<2>(it)) continue; - changed = RefineResultType(op, std::get<0>(it), std::get<1>(it)) || changed; - } - return changed; -} - -// Infer the shape IfRegion outputs based on the shapes of the then and else -// yields. -bool InferShapeForIfRegion(IfRegionOp op) { - bool changed = false; - - Operation* then_yield = op.then_branch().front().getTerminator(); - Operation* else_yield = op.else_branch().front().getTerminator(); - for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), - else_yield->getOperandTypes())) { - // If then and else types do not match, skip refinement for that result. - if (std::get<1>(result) != std::get<2>(result)) continue; - changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || - changed; - } - return changed; -} - -bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, - Dialect* tf_dialect) { - Operation* op = infer_ti.getOperation(); - SmallVector inferred; - LogicalResult res = infer_ti.inferReturnTypes( - op->getContext(), op->getLoc(), op->getOperands(), - op->getAttrDictionary(), op->getRegions(), inferred); - if (failed(res)) { - op->emitOpError("failed to refine type as inference failed"); - return false; - } - - if (inferred == op->getResultTypes()) return false; - - // Map each of the results of the call to the returned type of the - // function. - bool changed = false; - for (auto result : zip(op->getResults(), inferred)) { - if (std::get<0>(result).getType() == std::get<1>(result)) continue; - - UpdateTypeAndInsertIncompatibleUseCasts( - op->getDialect(), std::get<1>(result), std::get<0>(result)); - changed = true; - } - return changed; -} - } // namespace // Combination of value producer and port of value produced (e.g., @@ -494,13 +352,82 @@ class ShapeInference { // Infers shape for function return type and returns whether changed. void InferShapeForFunctionReturnType(FuncOp func); + // Enqueues function for processing. + void enqueue(FuncOp fn) { + LLVM_DEBUG(llvm::dbgs() + << "enqueue " << fn.getName() << " (" + << (queue_set_.count(fn) ? "already inserted" : "newly inserted") + << ")\n"); + if (queue_set_.insert(fn).second) queue_.push(fn); + } + + // Enqueues callers on functions. + void EnqueueCallers(FuncOp fn); + + // Returns the function at the front of the queue. + FuncOp front() { return queue_.front(); } + + // Returns whether work queue is empty. + bool EmptyQueue() const { return queue_.empty(); } + + // Returns function from the front of the work queue. + FuncOp pop_front() { + FuncOp ret = queue_.front(); + queue_.pop(); + queue_set_.erase(ret); + return ret; + } + + // Returns the current size of the queue. + std::queue::size_type QueueSize() const { return queue_.size(); } + Dialect* const tf_dialect_; private: + // Updates the result of an operation to a new inferred type. Also inserts + // tf.Cast operation for uses that are incompatible with the new type. + void UpdateTypeAndInsertIncompatibleUseCasts(Type new_type, Value result); + + // Refines the type of `result` of `op` using the type + // `potential_refined_type`. Return true if the type was changed. + bool RefineResultType(Operation* op, Value result, + Type potential_refined_type); + + // Infers the shape from a (Stateful)PartionedCall operation by looking up the + // called function and propagating the return type. + bool InferShapeForCall(CallOpInterface call_op); + + bool InferShapeForCast(CastOp op); + + // Infers the shape IfOp outputs based on the shapes of the then and else + // function result types. + bool InferShapeForIf(IfOp op); + + // Infers the shape IfRegion outputs based on the shapes of the then and else + // yields. + bool InferShapeForIfRegion(IfRegionOp op); + + bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti); + + // Returns all the callers of a function. + // Note: Usage of the return value of this function may not be interleaved + // with insertions to the callers map. This could occur if GetCallers is + // called with two separate functions, the 2nd one incurs a resize and then + // both first and 2nd stored callers are used. + ArrayRef GetCallers(FuncOp fn); + // Mapping between ValuePort (which corresponds to an OpResult or smaller, // e.g., first element of OpResult produced) to an Attribute if the ValuePort // corresponds to a constant value. ValuePortResultMap results_; + + // Map from a function to the callers of that function. + llvm::DenseMap> callers_of_func_; + + // Queue of functions being processed. + llvm::DenseSet queue_set_; + std::queue queue_; + int64_t graph_version_; // TODO(b/154065712): Remove propagate_caller_callee_constants once using @@ -514,6 +441,166 @@ ShapeInference::ShapeInference(int64_t graph_version, MLIRContext* context, graph_version_(graph_version), propagate_caller_callee_constants_(propagate_caller_callee_constants) {} +ArrayRef ShapeInference::GetCallers(FuncOp fn) { + auto pair = callers_of_func_.try_emplace(fn); + if (pair.second) { + ModuleOp module = fn.getParentOfType(); + auto uses = mlir::SymbolTable::getSymbolUses(fn.getOperation(), module); + if (uses) { + pair.first->second.reserve(pair.first->second.size()); + for (auto use : *uses) { + pair.first->second.push_back(use.getUser()->getParentOfType()); + } + } + } + return pair.first->second; +} + +void ShapeInference::EnqueueCallers(FuncOp fn) { + for (auto user : GetCallers(fn)) enqueue(user); +} + +void ShapeInference::UpdateTypeAndInsertIncompatibleUseCasts(Type new_type, + Value result) { + // A tf.Cast operation is lazily created on the first use requires a cast. + TF::CastOp cast_op; + auto get_cast_op = [&]() { + if (!cast_op) { + Operation* op = result.getDefiningOp(); + OpBuilder b(op); + b.setInsertionPointAfter(op); + cast_op = b.create(op->getLoc(), result.getType(), result, + /*truncate=*/b.getBoolAttr(false)); + } + return Value(cast_op); + }; + // First insert cast back for uses that need a cast and then + // update the type. + bool enqueue_callers = false; + for (OpOperand& use : make_early_inc_range(result.getUses())) { + if (isa(use.getOwner())) + enqueue_callers = true; + else if (NeedsCastBack(use, tf_dialect_)) + use.set(get_cast_op()); + } + + result.setType(new_type); + if (enqueue_callers) + EnqueueCallers(result.getDefiningOp()->getParentOfType()); +} + +bool ShapeInference::RefineResultType(Operation* op, Value result, + Type potential_refined_type) { + if (!CanRefineTypeWith(result.getType(), potential_refined_type)) + return false; + + UpdateTypeAndInsertIncompatibleUseCasts(potential_refined_type, result); + return true; +} + +// Infers the shape from a (Stateful)PartionedCall operation by looking up the +// called function and propagating the return type. +bool ShapeInference::InferShapeForCall(CallOpInterface call_op) { + FuncOp func = dyn_cast(call_op.resolveCallable()); + if (!func) return false; + + LLVM_DEBUG(llvm::dbgs() << "Infer shape for call " << func.getName()); + Operation* op = call_op.getOperation(); + bool changed = false; + // Map each of the results of the call to the returned type of the + // function. + for (auto result : zip(op->getResults(), func.getType().getResults())) { + changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || + changed; + } + LLVM_DEBUG(llvm::dbgs() << " changed ? " << changed << "\n"); + + return changed; +} + +bool ShapeInference::InferShapeForCast(CastOp op) { + DCOMMENT_OP(op.getOperation(), "Infering shape for "); + Value result = op.getResult(); + if (!CanBeRefined(result.getType())) return false; + + Type operand_type = op.getOperand().getType(); + auto ranked_op_type = operand_type.dyn_cast(); + if (!ranked_op_type) return false; + auto ranked_res_type = result.getType().dyn_cast(); + if (ranked_res_type && + ranked_op_type.getShape() == ranked_res_type.getShape()) + return false; + + // Avoid inserting a cast where no users types could be refined (e.g., where + // there would need to be a cast inserted for every user again). + if (llvm::all_of(result.getUses(), [this](OpOperand& use) { + return NeedsCastBack(use, tf_dialect_); + })) + return false; + + auto new_type = RankedTensorType::get( + ranked_op_type.getShape(), + result.getType().cast().getElementType()); + + UpdateTypeAndInsertIncompatibleUseCasts(new_type, op.getResult()); + return true; +} + +bool ShapeInference::InferShapeForIf(IfOp op) { + DCOMMENT_OP(op.getOperation(), "InferShapeForIf"); + bool changed = false; + auto then_results = op.then_function().getType().getResults(); + auto else_results = op.else_function().getType().getResults(); + for (auto it : llvm::zip(op.getResults(), then_results, else_results)) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(it) != std::get<2>(it)) continue; + changed = RefineResultType(op, std::get<0>(it), std::get<1>(it)) || changed; + } + return changed; +} + +bool ShapeInference::InferShapeForIfRegion(IfRegionOp op) { + bool changed = false; + + Operation* then_yield = op.then_branch().front().getTerminator(); + Operation* else_yield = op.else_branch().front().getTerminator(); + for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), + else_yield->getOperandTypes())) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(result) != std::get<2>(result)) continue; + changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || + changed; + } + return changed; +} + +bool ShapeInference::RefineWithInferTypeOpInterface( + InferTypeOpInterface infer_ti) { + Operation* op = infer_ti.getOperation(); + SmallVector inferred; + LogicalResult res = infer_ti.inferReturnTypes( + op->getContext(), op->getLoc(), op->getOperands(), + op->getAttrDictionary(), op->getRegions(), inferred); + if (failed(res)) { + op->emitOpError("failed to refine type as inference failed"); + return false; + } + + if (inferred == op->getResultTypes()) return false; + + // Map each of the results of the call to the returned type of the + // function. + bool changed = false; + for (auto result : zip(op->getResults(), inferred)) { + if (std::get<0>(result).getType() == std::get<1>(result)) continue; + + UpdateTypeAndInsertIncompatibleUseCasts(std::get<1>(result), + std::get<0>(result)); + changed = true; + } + return changed; +} + ShapeHandle ShapeInference::ComputeOutputAsShape(OpResult result, InferenceContext* ic) { LLVM_DEBUG(result.print(llvm::dbgs() << "\nEvaluate partially ")); @@ -599,13 +686,14 @@ bool ShapeInference::RefineTypeForPassThroughOperands(Operation* op, .isa()) continue; - UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, operand_type, result); + UpdateTypeAndInsertIncompatibleUseCasts(operand_type, result); changed = true; } return changed; } bool ShapeInference::RefineShapeForPassThroughOps(Operation* op) { + DCOMMENT_OP(op, "Pass through op"); auto is_allowed_dtype = [](Type t) { // Skip if element type is not in standard or TF dialect. // TODO(jpienaar): The tf.Cast op, which is uniformly inserted at the @@ -631,7 +719,7 @@ bool ShapeInference::RefineShapeForPassThroughOps(Operation* op) { auto new_type = RankedTensorType::get(operand_type.getShape(), result_type.getElementType()); - UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, new_type, result); + UpdateTypeAndInsertIncompatibleUseCasts(new_type, result); changed = true; } return changed; @@ -666,6 +754,7 @@ bool ShapeInference::InferShapeForNonTFDialectOperation(Operation* op) { if (op->hasTrait()) { return RefineShapeForPassThroughOps(op); } + if (auto call = dyn_cast(op)) return InferShapeForCall(call); return false; } @@ -698,8 +787,7 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { // tf.Cast are only inferred if they have at least one user in the TF dialect // or feeding into the function return. This is necessary to avoid inserting // casts which cannot be refined. - if (auto cast_op = dyn_cast(op)) - return InferShapeForCast(cast_op, tf_dialect_); + if (auto cast_op = dyn_cast(op)) return InferShapeForCast(cast_op); // Handle IfOp here by inferring the shape from the else/then function // results. Since `output_shapes` is a derived attribute, avoid going down the @@ -755,14 +843,11 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { inferred_type = UnrankedTensorType::get(inferred.getElementType()); if (op_result.getType() == inferred_type) continue; - UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, inferred_type, - op_result); + UpdateTypeAndInsertIncompatibleUseCasts(inferred_type, op_result); changed = true; } - if (changed) - LLVM_DEBUG(llvm::dbgs() - << "Modified after shape inference: '" << *op << "'\n"); + if (changed) DCOMMENT_OP(op, "Modified after shape inference:"); return changed; } @@ -774,13 +859,13 @@ LogicalResult ShapeInference::PropagateShapeToFunctions( // early exit and attempt to propagate shapes for all provided functions to // have a best-effort propagation. for (FuncOp func : functions) { - auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); - if (!llvm::hasSingleElement(func_uses.getValue())) { - int num_uses = std::distance(func_uses->begin(), func_uses->end()); + DCOMMENT("Propating shape to" << func.getName()); + auto func_uses = GetCallers(func); + if (!llvm::hasSingleElement(func_uses)) { func.emitWarning( formatv("expected control flow function @{0} to have exactly 1 use, " "found {1}.", - func.getName(), num_uses)); + func.getName(), func_uses.size())); all_succeeded = false; continue; } @@ -804,6 +889,7 @@ LogicalResult ShapeInference::PropagateShapeToFunctions( LogicalResult ShapeInference::PropagateShapeToRegions( Operation::operand_type_range input_types, ArrayRef regions, int64_t max_iteration) { + DCOMMENT("\tPropagating shapes to regions"); bool all_succeeded = true; // If shape propagation fails for one region, return failure, but do not // early exit and attempt to propagate shapes for all provided regions to @@ -827,8 +913,8 @@ LogicalResult ShapeInference::PropagateShapeToRegions( void ShapeInference::PropagateConstantToCallee(CallOpInterface call_op, FuncOp func, ModuleOp module) { - auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); - if (!llvm::hasSingleElement(func_uses.getValue())) return; + auto func_uses = GetCallers(func); + if (!llvm::hasSingleElement(func_uses)) return; OpBuilder builder(&func.front().front()); Operation* op = call_op.getOperation(); @@ -884,6 +970,7 @@ LogicalResult ShapeInference::PropagateShapeIntoAttachedFunctions( Operation* op, int64_t max_iteration) { ModuleOp module = op->getParentOfType(); if (auto if_op = dyn_cast(op)) { + DCOMMENT("Propagating shapes into If"); return PropagateShapeToFunctions( module, drop_begin(if_op.getOperandTypes(), 1), {if_op.then_function(), if_op.else_function()}, max_iteration); @@ -977,7 +1064,7 @@ LogicalResult ShapeInference::TryToFold(Operation* op) { if (ElementsAttr eattr = attr.dyn_cast_or_null()) { if (std::get<0>(result).getType() == eattr.getType()) continue; - UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, eattr.getType(), + UpdateTypeAndInsertIncompatibleUseCasts(eattr.getType(), std::get<0>(result)); } } @@ -1005,27 +1092,9 @@ void ShapeInference::InferShapeForFunctionReturnType(FuncOp func) { // Find the return type. auto return_op = return_ops.front(); - // Avoid refining result type if not used by TF dialect op. This can be - // relaxed once we move to a work queue, but at the moment this can result - // in invalid modules (in particular when a std.call is used but we've - // already processed the function where the call is made from before this). - auto uses = mlir::SymbolTable::getSymbolUses( - func.getOperation(), func.getParentOfType()); - if (!uses) { - LLVM_DEBUG(llvm::dbgs() << "Skipping refing return type of function " - "given unknown use\n"); - return; - } - for (auto use : *uses) { - if (use.getUser()->getDialect() != tf_dialect_) { - LLVM_DEBUG(llvm::dbgs() << "Skipping refing return type of function " - "given non-TF dialect use\n"); - return; - } - } - // Manually fold tf.Cast that precedes the return instruction and only differs // in shape refinement level. + bool changed = false; for (OpOperand& arg_op : return_op.getOperation()->getOpOperands()) { Operation* arg_defining_op = arg_op.get().getDefiningOp(); if (auto cast_op = dyn_cast_or_null(arg_defining_op)) { @@ -1042,7 +1111,7 @@ void ShapeInference::InferShapeForFunctionReturnType(FuncOp func) { // Shape inference should not change the element type. if (HasCompatibleElementTypes(input.getType(), result.getType())) { - arg_op.set(cast_op.x()); + arg_op.set(input); } else { OpBuilder b(return_op.getOperation()); auto type = RankedTensorType::get( @@ -1054,12 +1123,15 @@ void ShapeInference::InferShapeForFunctionReturnType(FuncOp func) { arg_op.set(new_cast_op); } if (cast_op.y().use_empty()) cast_op.erase(); + changed = true; } } - // Update function type. + DCOMMENT("Updating function type"); func.setType(FunctionType::get( func.getArgumentTypes(), return_op.getOperandTypes(), func.getContext())); + + if (changed) EnqueueCallers(func); } LogicalResult ShapeInference::InferShapeUntilFixPoint(Region* region, @@ -1075,12 +1147,15 @@ LogicalResult ShapeInference::InferShapeUntilFixPoint(Region* region, LLVM_DEBUG(llvm::dbgs() << "Shape inference, iteration " << iteration << "\n"); region->walk([&](Operation* op) { + DCOMMENT_OP(op, "Inferring for"); if (auto infer_ti = dyn_cast(op)) { - changed |= RefineWithInferTypeOpInterface(infer_ti, tf_dialect_); + DCOMMENT("\tRefinining with type op interface"); + changed |= RefineWithInferTypeOpInterface(infer_ti); return; } if (op->getDialect() != tf_dialect_) { + DCOMMENT("\tInfer non-TF dialect"); changed |= InferShapeForNonTFDialectOperation(op); return; } @@ -1107,7 +1182,7 @@ LogicalResult ShapeInference::InferShapeUntilFixPoint(Region* region, if (changed) { return region->getParentOp()->emitWarning() - << "Shape inference did not reach stable state after " + << "shape inference did not reach stable state after " << max_iteration << " iterations"; } return success(); @@ -1184,9 +1259,25 @@ LogicalResult InferModuleShape(ModuleOp module) { return success(); } int64_t producer = producer_or.ValueOrDie(); - for (auto func : module.getOps()) { - auto res = InferShapeForFunction(func, /*arg_shapes=*/{}, producer); + ShapeInference context(producer, module.getContext(), + /*propagate_caller_callee_constants=*/true); + if (auto main = module.lookupSymbol("main")) + context.enqueue(main); + for (auto func : module.getOps()) context.enqueue(func); + // Arbitrarily upper bound the maximum number of functions that get processed + // just to avoid pathological cases. + auto max_iteration = context.QueueSize() * 4; + while (!context.EmptyQueue()) { + FuncOp func = context.front(); + auto res = InferShapeForFunction(context, func); if (failed(res)) return res; + context.pop_front(); + + if ((--max_iteration) == 0) { + return emitWarning(UnknownLoc::get(module.getContext())) + << "shape inference did not reach stable state after " + << max_iteration << " iterations"; + } } return success(); } From 39aa67d16e02936673bd0f576482a9299f3d5c3b Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Wed, 11 Nov 2020 12:40:18 -0800 Subject: [PATCH 186/220] Some internal change PiperOrigin-RevId: 341889062 Change-Id: I53d892b2b3810b1c0377493c98205616007ef769 --- tensorflow/python/keras/mixed_precision/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/mixed_precision/BUILD b/tensorflow/python/keras/mixed_precision/BUILD index ccf55c8b166..24c973571df 100644 --- a/tensorflow/python/keras/mixed_precision/BUILD +++ b/tensorflow/python/keras/mixed_precision/BUILD @@ -266,6 +266,7 @@ cuda_py_test( python_version = "PY3", shard_count = 10, tags = [ + "no_cuda_asan", # b/173035482: times out "no_pip", "no_windows", # b/139083295: bfloat16 tests fail on Windows ], From f473a447dd34c616e0a3a03fb46135f9e0c60930 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Wed, 11 Nov 2020 12:40:35 -0800 Subject: [PATCH 187/220] Update sidecar_evaluator to use tf.logging instead of absl logging. PiperOrigin-RevId: 341889117 Change-Id: Ibc28eda3b95712c91cceaca2dc17dc48b8b8e5a2 --- tensorflow/python/keras/distribute/BUILD | 1 - tensorflow/python/keras/distribute/sidecar_evaluator.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index f8b260bfb5b..66e2d8a65c8 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -924,7 +924,6 @@ py_library( "//tensorflow/python:util", "//tensorflow/python:variables", "//tensorflow/python/training/tracking:util", - "@absl_py//absl/logging", ], ) diff --git a/tensorflow/python/keras/distribute/sidecar_evaluator.py b/tensorflow/python/keras/distribute/sidecar_evaluator.py index 1bd2b7e2f48..add90ccd021 100644 --- a/tensorflow/python/keras/distribute/sidecar_evaluator.py +++ b/tensorflow/python/keras/distribute/sidecar_evaluator.py @@ -19,12 +19,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from absl import logging # pylint: disable=g-direct-tensorflow-import from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.ops import variables +from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpoint_utils from tensorflow.python.training.tracking import util as tracking_util From 6a83a28990bd4da0fdf9986e22578c8445a348e3 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Wed, 11 Nov 2020 12:41:15 -0800 Subject: [PATCH 188/220] Link to the bug where we change the inter_op pool size in in-process cluster. PiperOrigin-RevId: 341889256 Change-Id: I8cdeadb775cf20c73d50fb6b0a47a50b5a78b330 --- tensorflow/python/distribute/multi_worker_test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/multi_worker_test_base.py b/tensorflow/python/distribute/multi_worker_test_base.py index 4feb2337b86..e07e19f621b 100644 --- a/tensorflow/python/distribute/multi_worker_test_base.py +++ b/tensorflow/python/distribute/multi_worker_test_base.py @@ -164,7 +164,8 @@ def create_in_process_cluster(num_workers, worker_config = config_pb2.ConfigProto() worker_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac - # The cluster may hang if workers don't have enough inter_op threads. + # The cluster may hang if workers don't have enough inter_op threads. See + # b/172296720 for more details. if multiprocessing.cpu_count() < 4: worker_config.inter_op_parallelism_threads = 4 From c77657c395c6428464ef20c608bf67534d369af2 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 11 Nov 2020 12:43:46 -0800 Subject: [PATCH 189/220] [XLA:GPU] Add unit test to test PadToStatic IR emission PiperOrigin-RevId: 341889831 Change-Id: I9e63d568c64fad69570ecceaa889ea88ad45c49a --- .../xla/service/gpu/tests/pad_to_static.hlo | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tensorflow/compiler/xla/service/gpu/tests/pad_to_static.hlo diff --git a/tensorflow/compiler/xla/service/gpu/tests/pad_to_static.hlo b/tensorflow/compiler/xla/service/gpu/tests/pad_to_static.hlo new file mode 100644 index 00000000000..e833658636d --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/pad_to_static.hlo @@ -0,0 +1,91 @@ +// RUN: hlo_to_llvm_ir %s | FileCheck %s + +// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +// CHECK-LABEL: entry: +// CHECK: %[[VAL_0:.*]] = getelementptr inbounds i8, i8* %[[VAL_1:.*]], i64 0 +// CHECK: %[[VAL_2:.*]] = bitcast i8* %[[VAL_0]] to [4 x i8*]* +// CHECK: %[[VAL_3:.*]] = getelementptr inbounds i8, i8* %[[VAL_4:.*]], i64 0 +// CHECK: %[[VAL_5:.*]] = bitcast i8* %[[VAL_3]] to [2 x [2 x [2 x i32]]]* +// CHECK: %[[VAL_6:.*]] = getelementptr inbounds i8, i8* %[[VAL_7:.*]], i64 0 +// CHECK: %[[VAL_8:.*]] = bitcast i8* %[[VAL_6]] to i32* +// CHECK: %[[VAL_9:.*]] = getelementptr inbounds i8, i8* %[[VAL_10:.*]], i64 0 +// CHECK: %[[VAL_11:.*]] = bitcast i8* %[[VAL_9]] to i32* +// CHECK: %[[VAL_12:.*]] = getelementptr inbounds i8, i8* %[[VAL_13:.*]], i64 0 +// CHECK: %[[VAL_14:.*]] = bitcast i8* %[[VAL_12]] to i32* +// CHECK: %[[VAL_15:.*]] = getelementptr inbounds i8, i8* %[[VAL_16:.*]], i64 0 +// CHECK: %[[VAL_17:.*]] = bitcast i8* %[[VAL_15]] to [2 x [2 x [2 x i32]]]* +// CHECK: %[[VAL_18:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_17]] to i8* +// CHECK: %[[VAL_19:.*]] = getelementptr inbounds i8, i8* %[[VAL_18]], i32 32 +// CHECK: %[[VAL_20:.*]] = bitcast i8* %[[VAL_19]] to i32* +// CHECK: %[[VAL_21:.*]] = load i32, i32* %[[VAL_20]], align 4 +// CHECK: %[[VAL_22:.*]] = getelementptr inbounds i8, i8* %[[VAL_18]], i32 36 +// CHECK: %[[VAL_23:.*]] = bitcast i8* %[[VAL_22]] to i32* +// CHECK: %[[VAL_24:.*]] = load i32, i32* %[[VAL_23]], align 4 +// CHECK: %[[VAL_25:.*]] = getelementptr inbounds i8, i8* %[[VAL_18]], i32 40 +// CHECK: %[[VAL_26:.*]] = bitcast i8* %[[VAL_25]] to i32* +// CHECK: %[[VAL_27:.*]] = load i32, i32* %[[VAL_26]], align 4 +// CHECK: %[[VAL_28:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK: %[[VAL_29:.*]] = icmp eq i32 0, %[[VAL_28]] +// CHECK: %[[VAL_30:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +// CHECK: %[[VAL_31:.*]] = icmp eq i32 0, %[[VAL_30]] +// CHECK: %[[VAL_32:.*]] = and i1 %[[VAL_29]], %[[VAL_31]] +// CHECK: br i1 %[[VAL_32]], label %[[VAL_33:.*]], label %[[VAL_34:.*]] +// CHECK: is_thred_0-after: ; preds = %[[VAL_33]], %[[VAL_35:.*]] +// CHECK: %[[VAL_36:.*]] = mul i32 1, %[[VAL_21]] +// CHECK: %[[VAL_37:.*]] = mul i32 %[[VAL_36]], %[[VAL_24]] +// CHECK: %[[VAL_38:.*]] = mul i32 %[[VAL_37]], %[[VAL_27]] +// CHECK: %[[VAL_39:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !2 +// CHECK: %[[VAL_40:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !3 +// CHECK: %[[VAL_41:.*]] = mul nuw nsw i32 %[[VAL_39]], 8 +// CHECK: %[[VAL_42:.*]] = add nuw nsw i32 %[[VAL_41]], %[[VAL_40]] +// CHECK: %[[VAL_43:.*]] = icmp ult i32 %[[VAL_42]], 8 +// CHECK: call void @llvm.assume(i1 %[[VAL_43]]) +// CHECK: %[[VAL_44:.*]] = udiv i32 %[[VAL_42]], 1 +// CHECK: %[[VAL_45:.*]] = urem i32 %[[VAL_44]], 2 +// CHECK: %[[VAL_46:.*]] = udiv i32 %[[VAL_42]], 2 +// CHECK: %[[VAL_47:.*]] = urem i32 %[[VAL_46]], 2 +// CHECK: %[[VAL_48:.*]] = udiv i32 %[[VAL_42]], 4 +// CHECK: %[[VAL_49:.*]] = icmp ult i32 %[[VAL_42]], 8 +// CHECK: br i1 %[[VAL_49]], label %[[VAL_50:.*]], label %[[VAL_51:.*]] +// CHECK: custom-call.2.in_bounds-after: ; preds = %[[VAL_52:.*]], %[[VAL_34]] +// CHECK: ret void +// CHECK: is_thred_0-true: ; preds = %[[VAL_35]] +// CHECK: store i32 %[[VAL_21]], i32* %[[VAL_8]], align 4 +// CHECK: store i32 %[[VAL_24]], i32* %[[VAL_11]], align 4 +// CHECK: store i32 %[[VAL_27]], i32* %[[VAL_14]], align 4 +// CHECK: br label %[[VAL_34]] +// CHECK: custom-call.2.in_bounds-true: ; preds = %[[VAL_34]] +// CHECK: %[[VAL_53:.*]] = mul nuw nsw i32 %[[VAL_45]], 1 +// CHECK: %[[VAL_54:.*]] = add nuw nsw i32 0, %[[VAL_53]] +// CHECK: %[[VAL_55:.*]] = mul nuw nsw i32 %[[VAL_47]], 2 +// CHECK: %[[VAL_56:.*]] = add nuw nsw i32 %[[VAL_54]], %[[VAL_55]] +// CHECK: %[[VAL_57:.*]] = mul nuw nsw i32 %[[VAL_48]], 4 +// CHECK: %[[VAL_58:.*]] = add nuw nsw i32 %[[VAL_56]], %[[VAL_57]] +// CHECK: %[[VAL_59:.*]] = icmp ult i32 %[[VAL_58]], %[[VAL_38]] +// CHECK: br i1 %[[VAL_59]], label %[[VAL_60:.*]], label %[[VAL_52]] +// CHECK: custom-call.2.in_dyn_bounds-after: ; preds = %[[VAL_60]], %[[VAL_50]] +// CHECK: br label %[[VAL_51]] +// CHECK: custom-call.2.in_dyn_bounds-true: ; preds = %[[VAL_50]] +// CHECK: %[[VAL_61:.*]] = udiv i32 %[[VAL_58]], 1 +// CHECK: %[[VAL_62:.*]] = urem i32 %[[VAL_61]], %[[VAL_27]] +// CHECK: %[[VAL_63:.*]] = mul i32 1, %[[VAL_27]] +// CHECK: %[[VAL_64:.*]] = udiv i32 %[[VAL_58]], %[[VAL_63]] +// CHECK: %[[VAL_65:.*]] = urem i32 %[[VAL_64]], %[[VAL_24]] +// CHECK: %[[VAL_66:.*]] = mul i32 %[[VAL_63]], %[[VAL_24]] +// CHECK: %[[VAL_67:.*]] = udiv i32 %[[VAL_58]], %[[VAL_66]] +// CHECK: %[[VAL_68:.*]] = bitcast [2 x [2 x [2 x i32]]]* %[[VAL_17]] to i32* +// CHECK: %[[VAL_69:.*]] = getelementptr inbounds i32, i32* %[[VAL_68]], i32 %[[VAL_42]] +// CHECK: %[[VAL_70:.*]] = load i32, i32* %[[VAL_69]], align 4, !invariant.load !4 +// CHECK: %[[VAL_71:.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], [2 x [2 x [2 x i32]]]* %[[VAL_5]], i32 0, i32 %[[VAL_67]], i32 %[[VAL_65]], i32 %[[VAL_62]] +// CHECK: store i32 %[[VAL_70]], i32* %[[VAL_71]], align 4 +// CHECK: br label %[[VAL_52]] + +HloModule PadToStatic + +ENTRY main { + %param = s32[2,<=2,2] parameter(0) + ROOT %custom-call.2 = (s32[2,2,2], s32[], s32[], s32[]) + custom-call(s32[2,<=2,2] %param), + custom_call_target="PadToStatic" +} From 2889cec62cae7e565309c8e4fe360fcebcc733e8 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 11 Nov 2020 12:48:45 -0800 Subject: [PATCH 190/220] Add `output_shapes` attribute to tf.WhileRegion. This will match the representation for tf.While in regards to output shapes. Shape inference for tf.While/tf.WhileRegion should be handled differently when `output_shapes` attribute is not empty, as tf.While/tf.WhileRegion supports dynamic shapes. Custom builders added for tf.While are removed as both functional and region based ops have explicit `output_shapes` attributes. PiperOrigin-RevId: 341890776 Change-Id: I92bcbec86b997ad466b771f4de17ad9fde904842 --- .../compiler/mlir/tensorflow/ir/tf_ops.td | 15 ++------- .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 33 ------------------- .../functional_control_flow_to_regions.cc | 3 +- .../region_control_flow_to_functional.cc | 3 +- .../tpu_extract_outside_compilation.cc | 3 +- 5 files changed, 9 insertions(+), 48 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 61a55c3534d..89273354d45 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -702,16 +702,6 @@ body: A function that takes a list of tensors and returns another return Verify(*this); }]; - let builders = [ - OpBuilderDAG<(ins "TypeRange":$output, "ValueRange":$input, - "FlatSymbolRefAttr":$cond, "FlatSymbolRefAttr":$body, - "IntegerAttr":$parallel_iterations, - "BoolAttr":$is_stateless)>, - OpBuilderDAG<(ins "TypeRange":$output, "ValueRange":$input, - "StringRef":$cond, "StringRef":$body, - "uint64_t":$parallel_iterations, "bool":$is_stateless)> - ]; - let extraClassDeclaration = [{ // Get the condition function. FuncOp cond_function() { @@ -764,8 +754,9 @@ def TF_WhileRegionOp : TF_Op<"WhileRegion", // Used to map StatelessWhile and While op defined in TensorFlow to a common // op. - DefaultValuedAttr:$is_stateless, - DefaultValuedAttr:$parallel_iterations + DefaultValuedAttr:$output_shapes, + DefaultValuedAttr:$parallel_iterations, + DefaultValuedAttr:$is_stateless ); let results = (outs Variadic:$output); diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 5ead88b2903..fdff883f7a5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -2605,39 +2605,6 @@ static LogicalResult Verify(WhileOp op) { return success(); } -namespace { - -ArrayAttr GetShapeArrayAttrFromTypes(mlir::MLIRContext *context, - TypeRange types) { - SmallVector shapes; - shapes.reserve(types.size()); - for (Type type : types) - shapes.push_back(ShapeAttr::get(context, type.cast())); - return ArrayAttr::get(shapes, context); -} - -} // namespace - -void WhileOp::build(OpBuilder &builder, OperationState &result, - TypeRange output, ValueRange input, FlatSymbolRefAttr cond, - FlatSymbolRefAttr body, IntegerAttr parallel_iterations, - BoolAttr is_stateless) { - ArrayAttr output_shapes = - GetShapeArrayAttrFromTypes(builder.getContext(), output); - build(builder, result, output, input, cond, body, output_shapes, - parallel_iterations, is_stateless); -} - -void WhileOp::build(OpBuilder &builder, OperationState &result, - TypeRange output, ValueRange input, StringRef cond, - StringRef body, uint64_t parallel_iterations, - bool is_stateless) { - ArrayAttr output_shapes = - GetShapeArrayAttrFromTypes(builder.getContext(), output); - build(builder, result, output, input, cond, body, output_shapes, - parallel_iterations, is_stateless); -} - //===----------------------------------------------------------------------===// // WhileRegionOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc index 87733bbbf3f..a92d3f367cf 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc @@ -112,7 +112,8 @@ LogicalResult ConvertIfOp(IfOp if_op) { LogicalResult ConvertWhileOp(WhileOp while_op) { auto while_region = OpBuilder(while_op).create( while_op.getLoc(), while_op.getResultTypes(), while_op.input(), - while_op.is_stateless(), while_op.parallel_iterations()); + while_op.output_shapes(), while_op.parallel_iterations(), + while_op.is_stateless()); CopyDeviceAndUnderscoredAttributes(while_op, while_region); YieldOp cond_yield = diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc index 9a6f8696285..66e736db869 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc @@ -398,7 +398,8 @@ LogicalResult RegionControlFlowToFunctional::ConvertWhileOp( OpBuilder builder(while_region); auto while_op = builder.create( while_region.getLoc(), new_result_types, new_inputs, cond_name, body_name, - while_region.parallel_iterations(), while_region.is_stateless()); + while_region.output_shapes(), while_region.parallel_iterations(), + while_region.is_stateless()); CopyDeviceAndUnderscoredAttributes(while_region, while_op); // Redirect old results to new results. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc index f1dc3f21087..7953dfe1832 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc @@ -255,7 +255,8 @@ TF::WhileRegionOp CloneEmptyWhile(bool is_stateless, OpBuilder& builder) { auto host_side_while = builder.create( loc, /*output=*/ArrayRef{}, /*input=*/ArrayRef{}, - is_stateless, parallel_iterations); + /*output_shapes=*/builder.getArrayAttr({}), parallel_iterations, + is_stateless); // Create empty else branch region. auto& body = host_side_while.body(); From c11d0dea22f1bc19af52a36709e80b9244dde629 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Wed, 11 Nov 2020 12:56:28 -0800 Subject: [PATCH 191/220] Internal change: Remove tensor_shape.as_dimension from Keras. To facilitate splitting Keras without relying on private apis. PiperOrigin-RevId: 341892302 Change-Id: I0713e2ea8409263e0e0e68fe7cfaf7c5074c7871 --- tensorflow/python/keras/engine/training_utils.py | 2 +- tensorflow/python/keras/engine/training_v1.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 4180c0b7e1d..d75b6a125bf 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -214,7 +214,7 @@ def get_static_batch_size(layer): """ batch_input_shape, _ = get_input_shape_and_dtype(layer) if batch_input_shape is not None: - return tensor_shape.as_dimension(batch_input_shape[0]).value + return tensor_shape.Dimension(batch_input_shape[0]).value return None diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py index dbf1703136b..6617c2dae09 100644 --- a/tensorflow/python/keras/engine/training_v1.py +++ b/tensorflow/python/keras/engine/training_v1.py @@ -1750,7 +1750,7 @@ class Model(training_lib.Model): # Check Dataset/Iterator batch size is consistent with InputLayer. if isinstance(x, (dataset_ops.DatasetV2, iterator_ops.Iterator, iterator_ops.IteratorBase)): - ds_batch_size = tensor_shape.as_dimension( + ds_batch_size = tensor_shape.Dimension( nest.flatten(dataset_ops.get_legacy_output_shapes(x))[0][0]).value if ds_batch_size is not None: if ds_batch_size % num_splits_for_ds != 0: From 8f56cdca6371a2addcda48c24a7ffd4a05ba46f9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 13:48:27 -0800 Subject: [PATCH 192/220] Fixes an issue where aar_with_jni fails when building with --config=android_java8_libs. PiperOrigin-RevId: 341902988 Change-Id: I930407c178f6c6d7ae8eeaff87f8f1468e1ddc6a --- tensorflow/lite/java/aar_with_jni.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/java/aar_with_jni.bzl b/tensorflow/lite/java/aar_with_jni.bzl index 34706c19c54..bedba12bac6 100644 --- a/tensorflow/lite/java/aar_with_jni.bzl +++ b/tensorflow/lite/java/aar_with_jni.bzl @@ -43,6 +43,7 @@ EOF manifest = name + "_generated_AndroidManifest.xml", custom_package = "dummy.package.for.so", deps = [android_library], + multidex = "native", # In some platforms we don't have an Android SDK/NDK and this target # can't be built. We need to prevent the build system from trying to # use the target in that case. From 4cd80bd6418663455bf78e1c09316830dd947f65 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Wed, 11 Nov 2020 13:49:30 -0800 Subject: [PATCH 193/220] [tf.data] Turn off the experiment `enable_gradient_descent` currently. PiperOrigin-RevId: 341903198 Change-Id: Ic015cc29b9d54a7270bef86b9c0e96035b5b1b14 --- tensorflow/core/kernels/data/optimize_dataset_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index b3df18a53c7..73a4d9e2afe 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -84,7 +84,7 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // of the Borg jobs, the experiments will be randomly turned on. // clang-format off absl::flat_hash_map live_experiments = { - {"enable_gradient_descent", 100}, + {"enable_gradient_descent", 0}, {"map_parallelization", 20} }; // clang-format on From aab9c69b692904df384f0e2a1b4f31cfc21fee42 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Wed, 11 Nov 2020 14:04:15 -0800 Subject: [PATCH 194/220] Fix Windows build. PiperOrigin-RevId: 341906486 Change-Id: Ibf124e862777f03b3229382b2151a2b226dfab90 --- tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc b/tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc index b2b8451536c..a7372437fdf 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass_test.cc @@ -31,7 +31,7 @@ class MockMlirOptimizationPass : public MlirOptimizationPass { MOCK_METHOD(llvm::StringRef, name, (), (const, override)); MOCK_METHOD(bool, IsEnabled, (const ConfigProto& config_proto, const Graph& graph), - (const override)); + (const, override)); MOCK_METHOD(Status, Run, (const ConfigProto& config_proto, mlir::ModuleOp module, const Graph& graph), From d0dbdb763ac3d058ea9cb43a62c1df25615c3818 Mon Sep 17 00:00:00 2001 From: Chuanhao Zhuge Date: Wed, 11 Nov 2020 14:08:52 -0800 Subject: [PATCH 195/220] Support py_func op in TFRT. The py_func op is executed in TFRT via runtime fallback. Updated the PyFuncOp to not use low level tensorflow:: APIs, in favor of newly introduced abstract C APIs. PiperOrigin-RevId: 341907440 Change-Id: I982f237689aa973e3fd2eda4a828a10790297c40 --- .../c/eager/immediate_execution_context.h | 21 ++++++++- .../core/common_runtime/eager/context.h | 9 ++++ tensorflow/core/common_runtime/eager/core.cc | 18 ++++++++ tensorflow/python/lib/core/py_func.cc | 43 +++++++++++-------- 4 files changed, 70 insertions(+), 21 deletions(-) diff --git a/tensorflow/c/eager/immediate_execution_context.h b/tensorflow/c/eager/immediate_execution_context.h index 27fa17127b8..b0fb9ca51d3 100644 --- a/tensorflow/c/eager/immediate_execution_context.h +++ b/tensorflow/c/eager/immediate_execution_context.h @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/c/tensor_interface.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/tstring.h" @@ -138,8 +139,8 @@ class ImmediateExecutionContext : public AbstractContext { } //===--------------------------------------------------------------------===// - // Following are legacy features in TF Eager Runtime. - // TODO(tf-runtime): Figure out a way to deprecate following features after + // Following are features in current TF Eager Runtime. + // TODO(tfrt-devs): Figure out a way to deprecate following features after // migrated to TFRT. //===--------------------------------------------------------------------===// // Clear pending nodes in thread executors and kernel caches. @@ -157,6 +158,22 @@ class ImmediateExecutionContext : public AbstractContext { // Update the Eager Executor for current thread. virtual void SetExecutorForThread(EagerExecutor* executor) = 0; + //===--------------------------------------------------------------------===// + // Following are helper functions to assist integrating TFRT with current + // TF eager runtime. + // TODO(b/172877902): These helper functions are currently used to support + // PyFuncOp on TFRT, and might be useful for ops that directly use low + // level TF APIs. Remove/replace the following functions when TFRT native + // ops are implemented. + //===--------------------------------------------------------------------===// + // Create an abstract tensor handle from tensorflow::Tensor. + virtual ImmediateExecutionTensorHandle* CreateLocalHandleFromTFTensor( + tensorflow::Tensor& t, const char* d_name) = 0; + + // Convert a TFRT TensorHandle to tensorflow::TensorHandle. + virtual ImmediateExecutionTensorHandle* TFTensorHandleFromInterface( + ImmediateExecutionTensorHandle* handle) = 0; + protected: explicit ImmediateExecutionContext(AbstractContextKind kind) : AbstractContext(kind) {} diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h index 62093dcc1d0..03f1a851bfb 100644 --- a/tensorflow/core/common_runtime/eager/context.h +++ b/tensorflow/core/common_runtime/eager/context.h @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/example/example.pb.h" #include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/util/device_name_utils.h" #if !defined(IS_MOBILE_PLATFORM) @@ -144,11 +145,19 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { ImmediateExecutionTensorHandle* CreateLocalHandle( AbstractTensorInterface* t) override; + // Create an abstract tensor handle from tensorflow::Tensor. + ImmediateExecutionTensorHandle* CreateLocalHandleFromTFTensor( + tensorflow::Tensor& t, const char* d_name) override; ImmediateExecutionTensorHandle* CopyTensorHandleToDevice( ImmediateExecutionTensorHandle* handle, const char* device_name, Status* status) override; ImmediateExecutionOperation* CreateOperation() override; + // Convert a TFRT TensorHandle to tensorflow::TensorHandle. In this case, + // just forward the input TensorHandle. + ImmediateExecutionTensorHandle* TFTensorHandleFromInterface( + ImmediateExecutionTensorHandle* handle) override; + Status RegisterFunction(AbstractFunction* f) override; bool UsesTFRT() override; diff --git a/tensorflow/core/common_runtime/eager/core.cc b/tensorflow/core/common_runtime/eager/core.cc index d1e1218a370..47864c7d7ec 100644 --- a/tensorflow/core/common_runtime/eager/core.cc +++ b/tensorflow/core/common_runtime/eager/core.cc @@ -171,6 +171,24 @@ ImmediateExecutionTensorHandle* EagerContext::CreateLocalHandle( /*op_device=*/nullptr, this); } +ImmediateExecutionTensorHandle* EagerContext::CreateLocalHandleFromTFTensor( + tensorflow::Tensor& t, const char* d_name) { + // If device name is not specified, create the TensorHandle on host cpu. + if (d_name == nullptr) + return TensorHandle::CreateLocalHandle(std::move(t), /*d=*/HostCPU(), + /*op_device=*/nullptr, this); + Device* d = nullptr; + auto status = FindDeviceFromName(d_name, &d); + if (!status.ok()) return nullptr; + return TensorHandle::CreateLocalHandle(std::move(t), /*d=*/d, + /*op_device=*/nullptr, this); +} + +ImmediateExecutionTensorHandle* EagerContext::TFTensorHandleFromInterface( + ImmediateExecutionTensorHandle* handle) { + return handle; +} + // TODO(b/152902651): We have to keep this function here since EagerOperation // depends on EagerContext. Thus, the context build target can't depend on // EagerOperation. diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index a3c83bb5d59..52bc6ee8233 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -85,19 +85,25 @@ bool IsCPUDevice(const Device* d) { // Givens the 'call', prepares the token and inputs as a python tuple // that is appropriate for calling the trampoline. -Status MakeArgTuple(const PyCall* call, EagerContext* ctx, PyObject** tuple) { +Status MakeArgTuple(const PyCall* call, TFE_Context* ctx, PyObject** tuple) { int64 n = call->ins.size(); PyObject* lst = PyList_New(n); CHECK(lst); // TFE_TensorHandle assumes that CPU is identified by nullptr. - Device* device = IsCPUDevice(call->device) ? nullptr : call->device; + // + // Set device name to be empty if the device is CPU. + const char* device_name = nullptr; + + if (call->device != nullptr && !IsCPUDevice(call->device)) + device_name = call->device->name().c_str(); + for (int64 i = 0; i < n; ++i) { PyObject* arg = nullptr; if (call->eager) { Tensor t = call->ins[i]; - arg = EagerTensorFromHandle( - tensorflow::wrap(TensorHandle::CreateLocalHandle( - std::move(t), ctx->CanonicalDevice(device), nullptr, ctx))); + arg = EagerTensorFromHandle(tensorflow::wrap( + tensorflow::unwrap(ctx)->CreateLocalHandleFromTFTensor(t, + device_name))); if (arg == nullptr) { Py_DECREF(lst); return errors::Internal("Unable to procure EagerTensor from Tensor."); @@ -112,8 +118,6 @@ Status MakeArgTuple(const PyCall* call, EagerContext* ctx, PyObject** tuple) { } PyList_SetItem(lst, i, arg); } - const char* device_name = - device == nullptr ? nullptr : device->attributes().name().c_str(); *tuple = Py_BuildValue("(ssN)", call->token.c_str(), device_name, lst); CHECK(*tuple); return Status::OK(); @@ -144,10 +148,13 @@ bool IsSingleNone(PyObject* obj) { // it isn't already there. This is left as a future exercise. The required // device-copying logic is implemented in Python at the moment. tensorflow::Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor, + TFE_Context* ctx, const Device* expected_device, const Tensor** output_tensor) { - tensorflow::TensorHandle* handle = tensorflow::TensorHandleFromInterface( - tensorflow::unwrap(EagerTensor_Handle(eager_tensor))); + tensorflow::TensorHandle* handle = down_cast( + tensorflow::unwrap(ctx)->TFTensorHandleFromInterface( + tensorflow::unwrap(EagerTensor_Handle(eager_tensor)))); + if (VariantDeviceIsCustom(handle->device())) { return errors::Unimplemented( "Custom devices are currently not supported with PyFuncs."); @@ -197,11 +204,10 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { TFE_Context* ctx = reinterpret_cast(PyCapsule_GetPointer( PyObject_GetAttrString(trampoline, "_ctx"), nullptr)); CHECK_NE(ctx, nullptr); - EagerContext* context = ContextFromInterface(tensorflow::unwrap(ctx)); - TF_RETURN_IF_ERROR(MakeArgTuple(call, context, &args)); + TF_RETURN_IF_ERROR(MakeArgTuple(call, ctx, &args)); new_executor.reset(new EagerExecutor(call->eager_async)); - old_executor = &context->Executor(); - context->SetExecutorForThread(new_executor.get()); + old_executor = &(tensorflow::unwrap(ctx)->Executor()); + tensorflow::unwrap(ctx)->SetExecutorForThread(new_executor.get()); } else { TF_RETURN_IF_ERROR(MakeArgTuple(call, nullptr, &args)); } @@ -234,12 +240,11 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { } } + TFE_Context* ctx = reinterpret_cast(PyCapsule_GetPointer( + PyObject_GetAttrString(trampoline, "_ctx"), /*name=*/nullptr)); if (new_executor != nullptr) { - TFE_Context* ctx = reinterpret_cast(PyCapsule_GetPointer( - PyObject_GetAttrString(trampoline, "_ctx"), nullptr)); - EagerContext* context = ContextFromInterface(tensorflow::unwrap(ctx)); s.Update(new_executor->WaitForAllPendingNodes()); - context->SetExecutorForThread(old_executor); + tensorflow::unwrap(ctx)->SetExecutorForThread(old_executor); } TF_RETURN_IF_ERROR(s); @@ -256,7 +261,7 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { const PyObject* item = PyList_GetItem(result, i); if (EagerTensor_CheckExact(item)) { const Tensor* tensor = nullptr; - s = ExtractTensorFromEagerTensor(item, call->device, &tensor); + s = ExtractTensorFromEagerTensor(item, ctx, call->device, &tensor); if (s.ok()) t = *tensor; } else { s = errors::FailedPrecondition( @@ -277,7 +282,7 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { DCHECK(call->eager); if (result != Py_None) { const Tensor* t = nullptr; - s = ExtractTensorFromEagerTensor(result, call->device, &t); + s = ExtractTensorFromEagerTensor(result, ctx, call->device, &t); if (s.ok()) call->out.push_back(*t); } } else if (PyArray_Check(result)) { From cc4ca559efdb760be5932c9012a29ed3e4eb8f0f Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Wed, 11 Nov 2020 14:43:02 -0800 Subject: [PATCH 196/220] Update the tutorial to simplify the datasets creation. PiperOrigin-RevId: 341914190 Change-Id: I3a8e7cd62d068fae9924f47d8a5115a9d73109f5 --- .../parameter_server_training_test.py | 73 +++++++++---------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/tensorflow/python/keras/distribute/parameter_server_training_test.py b/tensorflow/python/keras/distribute/parameter_server_training_test.py index 503dd68eb71..712eab6dcc5 100644 --- a/tensorflow/python/keras/distribute/parameter_server_training_test.py +++ b/tensorflow/python/keras/distribute/parameter_server_training_test.py @@ -66,6 +66,7 @@ def make_coordinator(num_workers, num_ps): parameter_server_strategy_v2.ParameterServerStrategyV2(cluster_resolver)) +# TODO(yuefengz): move this to keras/integration_tests. class KPLTest(test.TestCase, parameterized.TestCase): @classmethod @@ -98,7 +99,7 @@ class KPLTest(test.TestCase, parameterized.TestCase): feature_ps = keras.Model({"features": raw_feature_input}, feature_id_input) raw_label_input = keras.layers.Input( - shape=(), dtype=dtypes.string, name="label") + shape=(1,), dtype=dtypes.string, name="label") label_id_input = label_lookup_layer(raw_label_input) label_ps = keras.Model({"label": raw_label_input}, label_id_input) @@ -123,29 +124,22 @@ class KPLTest(test.TestCase, parameterized.TestCase): def feature_and_label_gen(): while True: features = random.sample(FEATURE_VOCAB, 3) - label = "yes" if "avenger" in features else "no" + label = ["yes"] if "avenger" in features else ["no"] yield {"features": features, "label": label} - # The dataset will be created on the coordinator? + # The dataset will be created on the coordinator. raw_dataset = dataset_ops.Dataset.from_generator( feature_and_label_gen, - output_types={ - "features": dtypes.string, - "label": dtypes.string - }).shuffle(200).batch(32) - preproc_dataset = raw_dataset.map( - lambda x: { # pylint: disable=g-long-lambda - "features": feature_ps(x["features"]), - "label": label_ps(x["label"]) - }) - train_dataset = preproc_dataset.map(lambda x: ( # pylint: disable=g-long-lambda - { - "features": x["features"] - }, [x["label"]])) - return train_dataset + output_signature={ + "features": tensor_spec.TensorSpec([3], dtypes.string), + "label": tensor_spec.TensorSpec([1], dtypes.string) + }).shuffle(100).batch(32) - distributed_dataset = self.coordinator.create_per_worker_dataset( - dataset_fn) + train_dataset = raw_dataset.map(lambda x: ( # pylint: disable=g-long-lambda + { + "features": feature_ps(x["features"]) + }, label_ps(x["label"]))) + return train_dataset # Create the model. The input needs to be compatible with KPLs. model_input = keras.layers.Input( @@ -161,33 +155,36 @@ class KPLTest(test.TestCase, parameterized.TestCase): emb_output) model = keras.Model({"features": model_input}, dense_output) - optimizer = rmsprop.RMSprop(learning_rate=0.01) + optimizer = rmsprop.RMSprop(learning_rate=0.1) accuracy = keras.metrics.Accuracy() - @def_function.function - def worker_fn(iterator): + @def_function.function + def worker_fn(iterator): - def replica_fn(iterator): - batch_data, labels = next(iterator) - with backprop.GradientTape() as tape: - pred = model(batch_data, training=True) - loss = nn.compute_average_loss( - keras.losses.BinaryCrossentropy( - reduction=loss_reduction.ReductionV2.NONE)(labels, pred)) - gradients = tape.gradient(loss, model.trainable_variables) + def replica_fn(iterator): + batch_data, labels = next(iterator) + with backprop.GradientTape() as tape: + pred = model(batch_data, training=True) + loss = nn.compute_average_loss( + keras.losses.BinaryCrossentropy( + reduction=loss_reduction.ReductionV2.NONE)(labels, pred)) + gradients = tape.gradient(loss, model.trainable_variables) - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64) - accuracy.update_state(labels, actual_pred) + actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64) + accuracy.update_state(labels, actual_pred) - self.coordinator._strategy.run(replica_fn, args=(iterator,)) + self.coordinator._strategy.run(replica_fn, args=(iterator,)) + distributed_dataset = self.coordinator.create_per_worker_dataset(dataset_fn) distributed_iterator = iter(distributed_dataset) - for _ in range(10): - self.coordinator.schedule(worker_fn, args=(distributed_iterator,)) - self.coordinator.join() - self.assertGreater(accuracy.result().numpy(), 0.0) + for _ in range(4): + accuracy.reset_states() + for _ in range(7): + self.coordinator.schedule(worker_fn, args=(distributed_iterator,)) + self.coordinator.join() + self.assertGreater(accuracy.result().numpy(), 0.5) # Create a saved model. model.feature_ps = feature_ps From 5980a3e436abe7c4dddb4cc68916726114e35815 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Wed, 11 Nov 2020 14:48:28 -0800 Subject: [PATCH 197/220] TF Internal API: tf_export ops.get_name_scope symbol as tf.__internal__.get_name_scope Note: this is needed because get_default_graph().get_name_scope does not work when running eagerly. To facilitate splitting Keras without relying on private apis. PiperOrigin-RevId: 341915155 Change-Id: Ic2a3fc0a650ef0807bb40e474e7cc9dc457dcd79 --- tensorflow/python/framework/ops.py | 2 ++ tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index ded284dcbc8..e76558b2f76 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -6030,6 +6030,8 @@ def has_default_graph(): return len(_default_graph_stack.stack) >= 1 +# Exported due to b/171079555 +@tf_export("__internal__.get_name_scope", v1=[]) def get_name_scope(): """Returns the current name scope in the default_graph. diff --git a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt index 2c967520b30..f8e51e8918b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt @@ -60,4 +60,8 @@ tf_module { name: "get_enclosing_xla_context" argspec: "args=[], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_name_scope" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } } From c4bfcaf61cfeb637c3d5aaf64f251bcef69330ea Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 11 Nov 2020 14:52:11 -0800 Subject: [PATCH 198/220] Fix single pip package renaming bug There is a bug in the single pip package renaming code: we do a replacement over the entire contents of `METADATA` and this causes the `tensorflow_estimator` dependency to be replaced with `tensorflow_gpu_estimator` (on 1.15 it was `tensorflow_cpu_estimator`). These packages don't exist by themseleves, Estimator has no CPU/GPU split. Previously this required a manual alteration of the Estimator package to fake it being the CPU/GPU version and a manual upload for that, but we should move away from this manual step as it always causes issues with new releases. See for example #44775 (there are a few more similar issues, both internally and externally, but this is the most recent one). We should build each pip package instead of doing the renaming. We do that on Linux/Mac already but Windows builds take too long so rather than rebuilding we just fake the new package via this renaming function. Future work in this area is needed to get rid of the renaming function, eventually removing it completely from both TF and TF ecosystem packages. PiperOrigin-RevId: 341915841 Change-Id: I2bd4c3621e581ccf31e7bdd52958937b93971b90 --- tensorflow/tools/ci_build/release/common.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh index 78789319c6c..8fe8e160d1e 100644 --- a/tensorflow/tools/ci_build/release/common.sh +++ b/tensorflow/tools/ci_build/release/common.sh @@ -247,7 +247,14 @@ function copy_to_new_project_name { ORIGINAL_PROJECT_NAME_DASH="${ORIGINAL_PROJECT_NAME//_/-}" NEW_PROJECT_NAME_DASH="${NEW_PROJECT_NAME//_/-}" - sed -i.bak "s/${ORIGINAL_PROJECT_NAME_DASH}/${NEW_PROJECT_NAME_DASH}/g" "${NEW_WHL_DIR_PREFIX}.dist-info/METADATA" + + # We need to change the name in the METADATA file, but we need to ensure that + # all other occurences of the name stay the same, otherwise things such as + # URLs and depedencies might be broken (for example, replacing without care + # might transform a `tensorflow_estimator` dependency into + # `tensorflow_gpu_estimator`, which of course does not exist -- except by + # manual upload of a manually altered `tensorflow_estimator` package) + sed -i.bak "s/Name: ${ORIGINAL_PROJECT_NAME_DASH}/Name: ${NEW_PROJECT_NAME_DASH}/g" "${NEW_WHL_DIR_PREFIX}.dist-info/METADATA" ${PYTHON_CMD} -m wheel pack . mv *.whl "${ORIGINAL_WHL_DIR}" From 94472e8efdc5d41ce4d0657b45e0cc8efe979d91 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Wed, 11 Nov 2020 14:54:26 -0800 Subject: [PATCH 199/220] Some Internal Cleanup. PiperOrigin-RevId: 341916236 Change-Id: I8dd706585ee1aa13ca5d8a8dc89444a0da72115d --- tensorflow/python/kernel_tests/signal/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/kernel_tests/signal/BUILD b/tensorflow/python/kernel_tests/signal/BUILD index bd893184570..fef34a6b704 100644 --- a/tensorflow/python/kernel_tests/signal/BUILD +++ b/tensorflow/python/kernel_tests/signal/BUILD @@ -25,6 +25,9 @@ cuda_py_tests( name = "dct_ops_test", srcs = ["dct_ops_test.py"], python_version = "PY3", + tags = [ + "no_cuda_asan", # b/173048748: times out + ], deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", From 16045f4f2c335851265b358e8e7b60890287c9c2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Nov 2020 15:20:17 -0800 Subject: [PATCH 200/220] [XLA:GPU] Remove obsolete and misleading comment. PiperOrigin-RevId: 341920836 Change-Id: Iaa605f1e49a42d248b6ddc17218656dadc354252 --- tensorflow/compiler/xla/service/gpu/gemm_thunk.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc index ea4f3951a3d..32e695b6b20 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc @@ -206,10 +206,6 @@ Status RunGemm(const GpuGemmConfig &gemm_config, CHECK_LT(shape->layout().minor_to_major(col_dim), 2); } - // BLAS gemm reduces rows of LHS and columns of RHS. The Dot operator between - // matrices reduces dimension 1 of LHS and dimension 0 of RHS regardless of - // their layout. Therefore, we should treat dimension 0 as row and dimension 1 - // as column when mapping a matrix Dot to BLAS gemm. int64 output_num_rows = output_shape.dimensions(row_dim); int64 output_num_cols = output_shape.dimensions(col_dim); From a2bd10cc6c2b3bd166ed40ccf990f1b2b2a959ae Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Wed, 11 Nov 2020 15:24:00 -0800 Subject: [PATCH 201/220] Move test_helpers.h/.cc to the testing directory. PiperOrigin-RevId: 341921518 Change-Id: I483822bd774c9ef5a48d5b8418b9fae09e26b087 --- tensorflow/lite/micro/tools/make/Makefile | 10 ++++++++++ tensorflow/lite/micro/tools/make/helper_functions.inc | 1 + 2 files changed, 11 insertions(+) diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index 337ef9bfefc..2a2d9e3e2b7 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -249,6 +249,9 @@ MICROLITE_CC_KERNEL_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_ MICROLITE_TEST_HDRS := \ $(wildcard tensorflow/lite/micro/testing/*.h) +MICROLITE_TEST_HELPERS_SRC := \ +tensorflow/lite/micro/test_helpers.cc + MICROLITE_CC_BASE_SRCS := \ $(wildcard tensorflow/lite/micro/*.cc) \ $(wildcard tensorflow/lite/micro/benchmarks/*model_data.cc) \ @@ -265,6 +268,10 @@ tensorflow/lite/schema/schema_utils.cc MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_BASE_SRCS)) MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_BENCHMARK_SRCS), $(MICROLITE_CC_SRCS)) +MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_HELPERS_SRC), $(MICROLITE_CC_SRCS)) + +MICROLITE_TEST_HELPERS_HDRS := \ +tensorflow/lite/micro/test_helpers.h MICROLITE_CC_HDRS := \ $(wildcard tensorflow/lite/micro/*.h) \ @@ -340,6 +347,8 @@ tensorflow/lite/schema/schema_generated.h \ tensorflow/lite/schema/schema_utils.h \ tensorflow/lite/version.h +MICROLITE_CC_HDRS := $(filter-out $(MICROLITE_TEST_HELPERS_HDRS), $(MICROLITE_CC_HDRS)) + # TODO(b/165940489): Figure out how to avoid including fixed point # platform-specific headers. THIRD_PARTY_CC_HDRS := \ @@ -436,6 +445,7 @@ ALL_TAGS += $(TARGET_ARCH) ALL_SRCS := \ $(MICROLITE_CC_SRCS) \ + $(MICROLITE_TEST_HELPERS_SRC) \ $(MICROLITE_TEST_SRCS) # Where compiled objects are stored. diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc index de22fed43ed..b457bf23189 100644 --- a/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -465,6 +465,7 @@ endif $(1)_LOCAL_SRCS := $(2) $(1)_LOCAL_SRCS := $$(call specialize,$$($(1)_LOCAL_SRCS)) +$(1)_LOCAL_SRCS += $(MICROLITE_TEST_HELPERS_SRC) ALL_SRCS += $$($(1)_LOCAL_SRCS) $(1)_LOCAL_HDRS := $(3) $(1)_LOCAL_OBJS := $$(addprefix $$(OBJDIR), \ From 0131d1a7d052ff5104c8c4ab22944b95ece130ed Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 11 Nov 2020 15:50:48 -0800 Subject: [PATCH 202/220] Add absl::Cord support to open source TensorFlow PiperOrigin-RevId: 341926653 Change-Id: Id6174cf149526cd07670bebb2be6c91dbbf11a50 --- tensorflow/core/BUILD | 1 + tensorflow/core/platform/BUILD | 1 + tensorflow/core/platform/default/BUILD | 1 + tensorflow/core/platform/default/cord.h | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 7c3c5c01472..9b4b1f4f44f 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -343,6 +343,7 @@ cc_library( ":lib_internal", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 7c5e6b98288..50f4437c9f0 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -181,6 +181,7 @@ cc_library( compatible_with = get_compatible_with_portable(), deps = [ ":platform", + "@com_google_absl//absl/strings:cord", ] + tf_platform_deps("cord"), ) diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD index ceea382d57b..0b0d0ee41c0 100644 --- a/tensorflow/core/platform/default/BUILD +++ b/tensorflow/core/platform/default/BUILD @@ -41,6 +41,7 @@ cc_library( "no_oss", "nobuilder", ], + deps = ["@com_google_absl//absl/strings:cord"], ) cc_library( diff --git a/tensorflow/core/platform/default/cord.h b/tensorflow/core/platform/default/cord.h index 5823374d1a0..f6e0391f254 100644 --- a/tensorflow/core/platform/default/cord.h +++ b/tensorflow/core/platform/default/cord.h @@ -16,6 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ #define TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ -// TODO(ebrevdo): Fill this in. +#include "absl/strings/cord.h" #endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ From 7ac4c1f85e3013e1e18197c251a879c9f90f9e15 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 11 Nov 2020 15:51:09 -0800 Subject: [PATCH 203/220] [XLA/GPU] Remove TupleSelect implementation. It is not used by major XLA/GPU users, and it adds a lot of implementation burden. PiperOrigin-RevId: 341926708 Change-Id: I8291f11969b15f8439d2f390bb4e840e5cd70c80 --- .../xla/service/gpu/custom_call_test.cc | 41 ------------------- .../compiler/xla/service/gpu/ir_emitter.cc | 14 ++----- .../xla/service/gpu/ir_emitter_unnested.cc | 6 --- .../xla/service/gpu/ir_emitter_unnested.h | 1 - .../xla/tests/local_client_execute_test.cc | 2 +- tensorflow/compiler/xla/tests/tuple_test.cc | 20 ++++----- 6 files changed, 14 insertions(+), 70 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/custom_call_test.cc b/tensorflow/compiler/xla/service/gpu/custom_call_test.cc index dd2761bcc28..e28cb662116 100644 --- a/tensorflow/compiler/xla/service/gpu/custom_call_test.cc +++ b/tensorflow/compiler/xla/service/gpu/custom_call_test.cc @@ -143,46 +143,5 @@ TEST_F(CustomCallTest, SubBuffers) { EXPECT_THAT(result.data({1, 1}), ::testing::Each(2)); EXPECT_THAT(result.data({2}), ::testing::Each(3)); } - -void Callback_TupleSelect(CUstream stream, void** buffers, - const char* /*opaque*/, size_t /*opaque_len*/) { - // Set the two output leaf buffers equal to the two input leaf buffers. - cudaMemcpyAsync(buffers[2], buffers[0], 10 * sizeof(float), - cudaMemcpyDeviceToDevice, stream); - cudaMemcpyAsync(buffers[3], buffers[1], 10 * sizeof(float), - cudaMemcpyDeviceToDevice, stream); -} -XLA_REGISTER_CUSTOM_CALL_TARGET(Callback_TupleSelect, "CUDA"); -// Tuple-shaped select is a case where XLA can't know all buffer assignments -// statically ahead of time and has to walk the on-device tuple sub-buffers. -TEST_F(CustomCallTest, TupleSelect) { - XlaBuilder b(TestName()); - auto tuple_shape = ShapeUtil::MakeTupleShape({ - ShapeUtil::MakeShape(F32, {10}), - ShapeUtil::MakeShape(F32, {10}), - }); - auto p0 = AddParam(LiteralUtil::CreateR0(false), &b); - auto p1 = - AddParam(LiteralUtil::MakeTupleOwned( - LiteralUtil::CreateR1(std::vector(10, 1.0f)), - LiteralUtil::CreateR1(std::vector(10, 2.0f))), - &b); - auto p2 = - AddParam(LiteralUtil::MakeTupleOwned( - LiteralUtil::CreateR1(std::vector(10, 10.0f)), - LiteralUtil::CreateR1(std::vector(10, 20.0f))), - &b); - auto cc = CustomCall(&b, "Callback_TupleSelect", - /*operands=*/{Select(p0, p1, p2)}, tuple_shape, - /*opaque=*/""); - - // Do a tuple-select on the custom-call result to ensure that the custom-call - // sets its output tuple index buffers. - Select(p0, p1, cc); - TF_ASSERT_OK_AND_ASSIGN(auto result, ComputeAndTransfer(&b, {})); - EXPECT_THAT(result.data({0}), ::testing::Each(10)); - EXPECT_THAT(result.data({1}), ::testing::Each(20)); -} - } // anonymous namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index 33033df246d..17fd1820acd 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -531,17 +531,9 @@ Status IrEmitter::HandleSelect(HloInstruction* select) { } Status IrEmitter::HandleTupleSelect(HloInstruction* tuple_select) { - auto pred = tuple_select->operand(0); - auto on_true = tuple_select->operand(1); - auto on_false = tuple_select->operand(2); - TF_RET_CHECK(pred->shape().element_type() == PRED); - TF_RET_CHECK(ShapeUtil::IsScalar(pred->shape())); - TF_RET_CHECK(tuple_select->shape().IsTuple()); - llvm_ir::EmitTupleSelect(GetIrArray(*tuple_select, *tuple_select), - GetIrArray(*pred, *tuple_select), - GetBasePointer(*on_true), GetBasePointer(*on_false), - &b_); - return Status::OK(); + return InternalError( + "Dynamic selection of tuples is not supported. Please file a bug against " + "XLA/GPU if you need it"); } namespace { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index dcbb887670d..350cc0124f2 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -2061,12 +2061,6 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput mlir_input) { return Status::OK(); } -Status IrEmitterUnnested::HandleTupleSelect(HloInstruction* tuple_select) { - AddThunkToThunkSequence( - BuildKernelThunk(tuple_select, /*implements_whole_instruction=*/true)); - return IrEmitter::HandleTupleSelect(tuple_select); -} - Status IrEmitterUnnested::HandleReplicaId(HloInstruction* hlo) { AddThunkToThunkSequence(absl::make_unique( GetThunkInfo(hlo), GetAllocationSlice(*hlo))); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 3c67323875b..17904151820 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -186,7 +186,6 @@ class IrEmitterUnnested : public IrEmitter, Status HandleSort(HloInstruction* sort) override; Status EmitSortFromMlir(MlirEmitterInput mlir_input); Status HandleTriangularSolve(HloInstruction* hlo) override; - Status HandleTupleSelect(HloInstruction* tuple_select) override; Status HandleAllReduce(HloInstruction* crs) override; Status HandleAfterAll(HloInstruction* after_all) override; Status HandleReplicaId(HloInstruction* hlo) override; diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc index cc277e603ca..fd83492a1ae 100644 --- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc @@ -732,7 +732,7 @@ XLA_TEST_F(LocalClientExecuteTest, RunOnUninitializedStream) { ContainsRegex("stream is uninitialized or in an error state")); } -XLA_TEST_F(LocalClientExecuteTest, SelectBetweenTuples) { +XLA_TEST_F(LocalClientExecuteTest, DISABLED_ON_GPU(SelectBetweenTuples)) { XlaBuilder builder(TestName()); std::initializer_list vec1 = {1.f, 2.f, 3.f}; diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index b6ad44497e6..d1e9a9c7aa2 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -202,7 +202,7 @@ XLA_TEST_F(TupleTest, TupleGTEToTuple) { ComputeAndCompareTuple(&builder, expected, {}, error_spec_); } -XLA_TEST_F(TupleTest, SelectBetweenPredTuples) { +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenPredTuples)) { XlaBuilder b(TestName()); XlaOp v1, v2; @@ -275,7 +275,7 @@ XLA_TEST_F(TupleTest, TupleGTEToTupleToGTEAdd) { ComputeAndCompareR2(&builder, expected, {}, error_spec_); } -XLA_TEST_F(TupleTest, SelectBetweenTuplesOnFalse) { +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenTuplesOnFalse)) { // Tests a selection between tuples with "false" path taken. XlaBuilder builder(TestName()); @@ -292,7 +292,7 @@ XLA_TEST_F(TupleTest, SelectBetweenTuplesOnFalse) { ComputeAndCompareTuple(&builder, expected, {}, error_spec_); } -XLA_TEST_F(TupleTest, TuplesInAMap) { +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(TuplesInAMap)) { XlaComputation tuple_computation; { // tuple_computation(x) = 100 * min(x, x^2) + max(x, x^2) using tuples. @@ -319,7 +319,7 @@ XLA_TEST_F(TupleTest, TuplesInAMap) { ComputeAndCompareR1(&b, {-99.0f, 101.0f, 214.41f}, {}, error_spec_); } -XLA_TEST_F(TupleTest, SelectBetweenTuplesOnTrue) { +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenTuplesOnTrue)) { // Tests a selection between tuples with "true" path taken. XlaBuilder builder(TestName()); @@ -336,7 +336,7 @@ XLA_TEST_F(TupleTest, SelectBetweenTuplesOnTrue) { ComputeAndCompareTuple(&builder, expected, {}, error_spec_); } -XLA_TEST_F(TupleTest, SelectBetweenTuplesElementResult) { +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenTuplesElementResult)) { // Tests a selection between tuples but the final result is an element of the // tuple, not the whole tuple. XlaBuilder builder(TestName()); @@ -355,7 +355,7 @@ XLA_TEST_F(TupleTest, SelectBetweenTuplesElementResult) { } // Cascaded selects between tuple types. -XLA_TEST_F(TupleTest, SelectBetweenTuplesCascaded) { +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenTuplesCascaded)) { // // vec1 vec2 vec2 vec1 // | | | | @@ -392,7 +392,7 @@ XLA_TEST_F(TupleTest, SelectBetweenTuplesCascaded) { ComputeAndCompareR1(&builder, {3.f, 6.f, 9.f}, {}, error_spec_); } -XLA_TEST_F(TupleTest, SelectBetweenTuplesReuseConstants) { +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenTuplesReuseConstants)) { // Similar to SelectBetweenTuples, but the constants are shared between the // input tuples. XlaBuilder builder(TestName()); @@ -535,8 +535,8 @@ XLA_TEST_F(TupleHloTest, BitcastAfterGTE) { } // Disabled on interpreter due to lack of outfeed. -XLA_TEST_F(TupleHloTest, - DISABLED_ON_INTERPRETER(NonAmbiguousTopLevelAllocation)) { +XLA_TEST_F(TupleHloTest, DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( + NonAmbiguousTopLevelAllocation))) { const char* testcase = R"( HloModule tuple @@ -577,7 +577,7 @@ XLA_TEST_F(TupleHloTest, EXPECT_TRUE(LiteralTestUtil::Equal(expected, literal)); } -XLA_TEST_F(TupleHloTest, TupleSelectOfSort) { +XLA_TEST_F(TupleHloTest, DISABLED_ON_GPU(TupleSelectOfSort)) { const char* testcase = R"( HloModule sort From f2846d35f44ae7a5cd0070381a5cff94f5ddb6fc Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 11 Nov 2020 16:20:51 -0800 Subject: [PATCH 204/220] Create BUILD files and corresponding targets for `tensorflow/core/lib/gif/BUILD`. PiperOrigin-RevId: 341932524 Change-Id: Ida6afc4314d0ff90aaab78feb229dc471235d11b --- tensorflow/core/BUILD | 45 ++---------------- tensorflow/core/lib/core/BUILD | 1 + tensorflow/core/lib/gif/BUILD | 66 ++++++++++++++++++++++++++ tensorflow/core/lib/gtl/BUILD | 5 +- tensorflow/core/platform/BUILD | 7 ++- tensorflow/core/platform/default/BUILD | 1 + 6 files changed, 81 insertions(+), 44 deletions(-) create mode 100644 tensorflow/core/lib/gif/BUILD diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 9b4b1f4f44f..14465980750 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1433,25 +1433,9 @@ cc_library( alwayslink = 1, ) -cc_library( +alias( name = "gif_internal", - srcs = [ - "lib/gif/gif_io.cc", - "//tensorflow/core/platform:gif_hdrs", - ], - hdrs = ["lib/gif/gif_io.h"], - copts = tf_copts(), - linkopts = select({ - "//tensorflow:freebsd": [], - "//tensorflow:windows": [], - "//conditions:default": ["-ldl"], - }), - deps = [ - ":lib", - ":lib_internal", - "//tensorflow/core/platform:gif", - "@com_google_absl//absl/strings", - ], + actual = "//tensorflow/core/lib/gif:gif_internal", ) alias( @@ -1486,30 +1470,9 @@ alias( actual = "//tensorflow/core/lib/jpeg:portable_jpeg_internal", ) -cc_library( +alias( name = "portable_gif_internal", - srcs = if_mobile([ - "lib/gif/gif_io.cc", - "//tensorflow/core/platform:gif_hdrs", - ]), - hdrs = [ - "lib/gif/gif_io.h", - "//tensorflow/core/lib/core:legacy_lib_core_stringpiece_header", - "//tensorflow/core/lib/gtl:legacy_android_gif_internal_headers", - "//tensorflow/core/platform:gif_internal_hdrs", - "//tensorflow/core/platform/default:integral_types.h", - "//tensorflow/core/platform/default:logging.h", - ], - copts = tf_copts(), - linkopts = if_android(["-ldl"]), - deps = [ - "//tensorflow/core/platform:dynamic_annotations", - "//tensorflow/core/platform:gif", - "//tensorflow/core/platform:logging", - "//tensorflow/core/platform:stringpiece", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - ], + actual = "//tensorflow/core/lib/gif:portable_gif_internal", ) alias( diff --git a/tensorflow/core/lib/core/BUILD b/tensorflow/core/lib/core/BUILD index 3621dfde4f3..7d2e4377863 100644 --- a/tensorflow/core/lib/core/BUILD +++ b/tensorflow/core/lib/core/BUILD @@ -246,6 +246,7 @@ filegroup( ], visibility = [ "//tensorflow/core:__pkg__", + "//tensorflow/core/lib/gif:__pkg__", "//tensorflow/core/lib/jpeg:__pkg__", ], ) diff --git a/tensorflow/core/lib/gif/BUILD b/tensorflow/core/lib/gif/BUILD new file mode 100644 index 00000000000..49ada18e31f --- /dev/null +++ b/tensorflow/core/lib/gif/BUILD @@ -0,0 +1,66 @@ +# Description: +# gif io package. + +load( + "//tensorflow:tensorflow.bzl", + "if_android", + "if_mobile", + "tf_copts", +) +load( + "//tensorflow/core/platform:rules_cc.bzl", + "cc_library", +) + +package( + default_visibility = ["//tensorflow/core:__pkg__"], + features = ["-parse_headers"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "gif_internal", + srcs = [ + "gif_io.cc", + "//tensorflow/core/platform:gif_hdrs", + ], + hdrs = ["gif_io.h"], + copts = tf_copts(), + linkopts = select({ + "//tensorflow:freebsd": [], + "//tensorflow:windows": [], + "//conditions:default": ["-ldl"], + }), + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/platform:gif", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "portable_gif_internal", + srcs = if_mobile([ + "gif_io.cc", + "//tensorflow/core/platform:gif_hdrs", + ]), + hdrs = [ + "gif_io.h", + "//tensorflow/core/lib/core:legacy_lib_core_stringpiece_header", + "//tensorflow/core/lib/gtl:legacy_android_gif_internal_headers", + "//tensorflow/core/platform:gif_internal_hdrs", + "//tensorflow/core/platform/default:integral_types.h", + "//tensorflow/core/platform/default:logging.h", + ], + copts = tf_copts(), + linkopts = if_android(["-ldl"]), + deps = [ + "//tensorflow/core/platform:dynamic_annotations", + "//tensorflow/core/platform:gif", + "//tensorflow/core/platform:logging", + "//tensorflow/core/platform:stringpiece", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + ], +) diff --git a/tensorflow/core/lib/gtl/BUILD b/tensorflow/core/lib/gtl/BUILD index 650d6f8ddf5..f6f679004c1 100644 --- a/tensorflow/core/lib/gtl/BUILD +++ b/tensorflow/core/lib/gtl/BUILD @@ -194,7 +194,10 @@ filegroup( srcs = [ "cleanup.h", ], - visibility = ["//tensorflow/core:__pkg__"], + visibility = [ + "//tensorflow/core:__pkg__", + "//tensorflow/core/lib/gif:__pkg__", + ], ) # Export source files needed for mobile builds, which do not use granular targets. diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 50f4437c9f0..cf6c818d9b7 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -1495,7 +1495,7 @@ filegroup( srcs = [ "gif.h", ], - visibility = ["//tensorflow/core:__pkg__"], + visibility = ["//tensorflow/core/lib/gif:__pkg__"], ) filegroup( @@ -1551,7 +1551,10 @@ filegroup( "tstring.h", "types.h", ], - visibility = ["//tensorflow/core:__pkg__"], + visibility = [ + "//tensorflow/core:__pkg__", + "//tensorflow/core/lib/gif:__pkg__", + ], ) # Export source files needed for mobile builds, which do not use granular targets. diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD index 0b0d0ee41c0..74370988d07 100644 --- a/tensorflow/core/platform/default/BUILD +++ b/tensorflow/core/platform/default/BUILD @@ -570,6 +570,7 @@ package_group( name = "core_and_platform_packages", packages = [ "//tensorflow/core", + "//tensorflow/core/lib/gif", "//tensorflow/core/lib/jpeg", "//tensorflow/core/platform", ], From 9f4c0a49ba1412622d56056bbc1aee1d9dc347ed Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 11 Nov 2020 16:47:15 -0800 Subject: [PATCH 205/220] [tpu1vm] Update a couple of C APIs to forward compatible APIs PiperOrigin-RevId: 341937267 Change-Id: Icd47140a3431764e1d986e2a60d32559ec84fc9b --- tensorflow/core/tpu/tpu_execute.cc | 16 +++++-- tensorflow/core/tpu/tpu_ops_c_api.h | 44 +++++++++++++++---- .../stream_executor/tpu/tpu_executable.cc | 17 +++++-- 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/tpu/tpu_execute.cc b/tensorflow/core/tpu/tpu_execute.cc index 71455936d60..5c9f73c5a3e 100644 --- a/tensorflow/core/tpu/tpu_execute.cc +++ b/tensorflow/core/tpu/tpu_execute.cc @@ -240,10 +240,20 @@ xla::Status UpdateDynamicInputs( ApiConverter::ToC(runtime_shape, &c_runtime_shape); ApiConverter::ToC(compile_time_shape, &c_compile_time_shape); StatusHelper status; + + TpuExecute_RuntimeInputToPaddedData_Params params; + params.struct_size = + TpuExecute_RuntimeInputToPaddedData_Params_SIZE; + params.runtime_input_ptr = raw_input_runtime->data(); + params.runtime_input_size = raw_input_runtime->size(); + params.padded_data_ptr = padded_data->data(); + params.padded_data_size = padded_data->size(); + params.runtime_shape = &c_runtime_shape; + params.compile_time_shape = &c_compile_time_shape; + params.status = status.c_status; + tensorflow::tpu::OpsApiFn()->TpuExecute_RuntimeInputToPaddedDataFn( - raw_input_runtime->data(), raw_input_runtime->size(), - padded_data->data(), padded_data->size(), &c_runtime_shape, - &c_compile_time_shape, status.c_status); + ¶ms); ApiConverter::Free(&c_runtime_shape); ApiConverter::Free(&c_compile_time_shape); return status.status(); diff --git a/tensorflow/core/tpu/tpu_ops_c_api.h b/tensorflow/core/tpu/tpu_ops_c_api.h index bbe954433ff..1168f4db21d 100644 --- a/tensorflow/core/tpu/tpu_ops_c_api.h +++ b/tensorflow/core/tpu/tpu_ops_c_api.h @@ -104,12 +104,26 @@ TFTPU_CAPI_EXPORT void TpuMeshState_Free(XLA_TpuMeshState* mesh_state); TFTPU_CAPI_EXPORT void* TpuMeshState_MeshCommonState( XLA_TpuMeshState* mesh_state); +typedef struct TpuExecutable_LoadProgramAndEnqueueToStream_Params { + int32_t struct_size; + void* priv; + const XLA_TpuProgram* program; + SE_DeviceMemoryBase* arguments; + size_t arguments_len; + SE_DeviceMemoryBase* result; + SE_DeviceMemoryBase* cross_program_prefetch_addr; + int32_t rng_seed; + XLA_DeviceAssignment* device_assignment; + SE_Stream* stream; + + TF_Status* status; // out +} TpuExecutable_LoadProgramAndEnqueueToStream_Params; + +#define TpuExecutable_LoadProgramAndEnqueueToStream_Params_SIZE \ + (sizeof(struct TpuExecutable_LoadProgramAndEnqueueToStream_Params)) + TFTPU_CAPI_EXPORT void TpuExecutable_LoadProgramAndEnqueueToStream( - const XLA_TpuProgram* program, SE_DeviceMemoryBase* arguments, - size_t arguments_len, SE_DeviceMemoryBase* result, - SE_DeviceMemoryBase* cross_program_prefetch_addr, int32_t rng_seed, - XLA_DeviceAssignment* device_assignment, SE_Stream* stream, - TF_Status* status); + TpuExecutable_LoadProgramAndEnqueueToStream_Params* params); TFTPU_CAPI_EXPORT void HardwareLayout_HostShapeToDeviceShape( XLA_Shape* host_shape, XLA_Shape* device_shape); @@ -117,10 +131,24 @@ TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSize(XLA_Shape* shape); TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSizeCompact(XLA_Shape* shape); TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSizeCompactRaw(XLA_Shape* shape); +typedef struct TpuExecute_RuntimeInputToPaddedData_Params { + int32_t struct_size; + void* priv; + uint32_t* runtime_input_ptr; + size_t runtime_input_size; + int8_t* padded_data_ptr; + size_t padded_data_size; + XLA_Shape* runtime_shape; + XLA_Shape* compile_time_shape; + + TF_Status* status; // out +} TpuExecute_RuntimeInputToPaddedData_Params; + +#define TpuExecute_RuntimeInputToPaddedData_Params_SIZE \ + (sizeof(struct TpuExecute_RuntimeInputToPaddedData_Params)) + TFTPU_CAPI_EXPORT void TpuExecute_RuntimeInputToPaddedData( - uint32_t* runtime_input_ptr, size_t runtime_input_size, - int8_t* padded_data_ptr, size_t padded_data_size, XLA_Shape* runtime_shape, - XLA_Shape* compile_time_shape, TF_Status* status); + TpuExecute_RuntimeInputToPaddedData_Params* params); TFTPU_CAPI_EXPORT void ConfigureDistributedTpuOp_DoWork( const size_t num_cores_per_host_size, const int32_t* num_cores_per_host, diff --git a/tensorflow/stream_executor/tpu/tpu_executable.cc b/tensorflow/stream_executor/tpu/tpu_executable.cc index 9a092046a38..54c17754cfa 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable.cc @@ -77,10 +77,21 @@ Status TpuExecutable::LoadProgramAndEnqueueToStream( run_options.run_options().stream()->implementation()); StatusHelper status; + TpuExecutable_LoadProgramAndEnqueueToStream_Params params; + params.struct_size = TpuExecutable_LoadProgramAndEnqueueToStream_Params_SIZE; + params.program = core_program_; + params.arguments = arguments_bases; + params.arguments_len = arguments.size(); + params.result = &result_base; + params.cross_program_prefetch_addr = + cross_program_prefetch_addr.has_value() ? &prefetch_base : nullptr; + params.rng_seed = rng_seed; + params.device_assignment = &c_dev_assign; + params.stream = stream; + params.status = status.c_status; + tensorflow::tpu::OpsApiFn()->TpuExecutable_LoadProgramAndEnqueueToStreamFn( - core_program_, arguments_bases, arguments.size(), &result_base, - (cross_program_prefetch_addr.has_value() ? &prefetch_base : nullptr), - rng_seed, &c_dev_assign, stream, status.c_status); + ¶ms); if (dev_assign != nullptr) { stream_executor::tpu::SerializedProto_Free(dev_assign_serialized); From 6aab1b8d4491fae035aeec06016d3442f165bd4d Mon Sep 17 00:00:00 2001 From: Richard Uhler Date: Wed, 11 Nov 2020 16:47:46 -0800 Subject: [PATCH 206/220] Set up a distribute_py_test for the tfr mnist example. So that we can demonstrate running the tfr mnist example on TPU. PiperOrigin-RevId: 341937348 Change-Id: I366798b82513e11bd9d13d02c908c9820cdedbf9 --- .../compiler/mlir/tfr/examples/mnist/BUILD | 28 ++- .../mlir/tfr/examples/mnist/mnist_train.py | 168 +++++++++--------- .../tfr/examples/mnist/mnist_train_test.py | 37 ++++ 3 files changed, 144 insertions(+), 89 deletions(-) create mode 100644 tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train_test.py diff --git a/tensorflow/compiler/mlir/tfr/examples/mnist/BUILD b/tensorflow/compiler/mlir/tfr/examples/mnist/BUILD index eeaee926c87..ced100d9408 100644 --- a/tensorflow/compiler/mlir/tfr/examples/mnist/BUILD +++ b/tensorflow/compiler/mlir/tfr/examples/mnist/BUILD @@ -1,6 +1,6 @@ -load("//tensorflow:tensorflow.bzl", "py_binary") load("//tensorflow:tensorflow.bzl", "tf_py_test") load("//tensorflow/compiler/mlir/tfr:build_defs.bzl", "gen_op_libraries") +load("//tensorflow/core/platform/default:distribute.bzl", "distribute_py_test") package( default_visibility = [ @@ -45,16 +45,36 @@ tf_py_test( ], ) -py_binary( +py_library( name = "mnist_train", srcs = ["mnist_train.py"], data = [":mnist_ops_mlir"], - python_version = "PY3", deps = [ ":mnist_ops", ":mnist_ops_py", "//tensorflow:tensorflow_py", - "@absl_py//absl:app", + "//tensorflow/python:framework", "@absl_py//absl/flags", ], ) + +distribute_py_test( + name = "mnist_train_test", + size = "medium", + srcs = ["mnist_train_test.py"], + data = [":mnist_ops_mlir"], + disable_mlir_bridge = False, + python_version = "PY3", + tags = [ + "no_oss", + ], + deps = [ + ":mnist_train", + "//tensorflow/python:client_testlib", + "//tensorflow/python:extra_py_tests_deps", + "//tensorflow/python/distribute:combinations", + "//tensorflow/python/distribute:strategy_combinations", + "//tensorflow/python/distribute:test_util", + "@absl_py//absl/testing:parameterized", + ], +) diff --git a/tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train.py b/tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train.py index a4adcf86d5b..7b8db2236da 100644 --- a/tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train.py +++ b/tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function import os -from absl import app from absl import flags import tensorflow as tf @@ -51,40 +50,41 @@ flatten_size = num_features // 16 * n_hidden_2 seed = 66478 -weights = { - 'f1': - tf.Variable( - tf.random.truncated_normal([5, 5, num_channels, n_hidden_1], - stddev=0.1, - seed=seed)), - 'f2': - tf.Variable( - tf.random.truncated_normal([5, 5, n_hidden_1, n_hidden_2], - stddev=0.1, - seed=seed)), - 'f3': - tf.Variable( - tf.random.truncated_normal([n_hidden_3, flatten_size], - stddev=0.1, - seed=seed)), - 'f4': - tf.Variable( - tf.random.truncated_normal([num_classes, n_hidden_3], - stddev=0.1, - seed=seed)), -} - -biases = { - 'b1': tf.Variable(tf.zeros([n_hidden_1])), - 'b2': tf.Variable(tf.zeros([n_hidden_2])), - 'b3': tf.Variable(tf.zeros([n_hidden_3])), - 'b4': tf.Variable(tf.zeros([num_classes])), -} - class FloatModel(tf.Module): """Float inference for mnist model.""" + def __init__(self): + self.weights = { + 'f1': + tf.Variable( + tf.random.truncated_normal([5, 5, num_channels, n_hidden_1], + stddev=0.1, + seed=seed)), + 'f2': + tf.Variable( + tf.random.truncated_normal([5, 5, n_hidden_1, n_hidden_2], + stddev=0.1, + seed=seed)), + 'f3': + tf.Variable( + tf.random.truncated_normal([n_hidden_3, flatten_size], + stddev=0.1, + seed=seed)), + 'f4': + tf.Variable( + tf.random.truncated_normal([num_classes, n_hidden_3], + stddev=0.1, + seed=seed)), + } + + self.biases = { + 'b1': tf.Variable(tf.zeros([n_hidden_1])), + 'b2': tf.Variable(tf.zeros([n_hidden_2])), + 'b3': tf.Variable(tf.zeros([n_hidden_3])), + 'b4': tf.Variable(tf.zeros([num_classes])), + } + @tf.function def __call__(self, data): """The Model definition.""" @@ -95,8 +95,8 @@ class FloatModel(tf.Module): # NOTE: The data/x/input is always specified in floating point precision. # output shape: [-1, 28, 28, 32] - conv1 = gen_mnist_ops.new_conv2d(x, weights['f1'], biases['b1'], 1, 1, 1, 1, - 'SAME', 'RELU') + conv1 = gen_mnist_ops.new_conv2d(x, self.weights['f1'], self.biases['b1'], + 1, 1, 1, 1, 'SAME', 'RELU') # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. @@ -104,8 +104,9 @@ class FloatModel(tf.Module): max_pool1 = gen_mnist_ops.new_max_pool(conv1, 2, 2, 2, 2, 'SAME') # output shape: [-1, 14, 14, 64] - conv2 = gen_mnist_ops.new_conv2d(max_pool1, weights['f2'], biases['b2'], 1, - 1, 1, 1, 'SAME', 'RELU') + conv2 = gen_mnist_ops.new_conv2d(max_pool1, self.weights['f2'], + self.biases['b2'], 1, 1, 1, 1, 'SAME', + 'RELU') # output shape: [-1, 7, 7, 64] max_pool2 = gen_mnist_ops.new_max_pool(conv2, 2, 2, 2, 2, 'SAME') @@ -116,64 +117,61 @@ class FloatModel(tf.Module): reshape = tf.reshape(max_pool2, [-1, flatten_size]) # output shape: [-1, 1024] - fc1 = gen_mnist_ops.new_fully_connected(reshape, weights['f3'], - biases['b3'], 'RELU') + fc1 = gen_mnist_ops.new_fully_connected(reshape, self.weights['f3'], + self.biases['b3'], 'RELU') # output shape: [-1, 10] - return gen_mnist_ops.new_fully_connected(fc1, weights['f4'], biases['b4']) + return gen_mnist_ops.new_fully_connected(fc1, self.weights['f4'], + self.biases['b4']) -def grad(model, inputs, labels, trainable_variables): - with tf.GradientTape() as tape: - logits = model(inputs) - loss_value = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels, logits)) - grads = tape.gradient(loss_value, trainable_variables) - correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) - accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) - return accuracy, loss_value, grads - - -def training_step(model, inputs, labels, optimizer, step): - trainable_variables = list(weights.values()) + list(biases.values()) - accuracy, loss_value, grads = grad(model, inputs, labels, trainable_variables) - if step % display_step == 0: - print('Step %d:' % step) - print(' Loss = %f' % loss_value) - print(' Batch accuracy: %f' % accuracy) - optimizer.apply_gradients(zip(grads, trainable_variables)) - - -def get_next_batch(iter_): - features = next(iter_) - images, labels = features['image'], features['label'] - return (mnist_preprocess(images), tf.one_hot(labels, num_classes)) - - -def mnist_preprocess(x): - x_float = tf.cast(x, tf.float32) - return x_float / 255.0 - - -def train(model, dataset, optimizer): - iter_ = iter(dataset) - for step in range(flags.FLAGS.train_steps): - inputs, labels = get_next_batch(iter_) - training_step(model, inputs, labels, optimizer, step) - - -def main(_): +def main(strategy): + """Trains an MNIST model using the given tf.distribute.Strategy.""" # TODO(fengliuai): put this in some automatically generated code. os.environ[ 'TF_MLIR_TFR_LIB_DIR'] = 'tensorflow/compiler/mlir/tfr/examples/mnist' - # Create an mnist float model with the specified float state. - model = FloatModel() - optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate) ds_train = tfds.load('mnist', split='train', shuffle_files=True) - ds_train = ds_train.shuffle(1024).batch(batch_size).prefetch(64) + ds_train = ds_train.shuffle(1024).repeat().batch(batch_size).prefetch(64) + ds_train = strategy.experimental_distribute_dataset(ds_train) - train(model, ds_train, optimizer) + with strategy.scope(): + # Create an mnist float model with the specified float state. + model = FloatModel() + optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate) + def train_step(features): + inputs = tf.image.convert_image_dtype( + features['image'], dtype=tf.float32, saturate=False) + labels = tf.one_hot(features['label'], num_classes) -if __name__ == '__main__': - app.run(main) + with tf.GradientTape() as tape: + logits = model(inputs) + loss_value = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels, logits)) + + grads = tape.gradient(loss_value, model.trainable_variables) + correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + return accuracy, loss_value + + @tf.function + def distributed_train_step(dist_inputs): + per_replica_accuracy, per_replica_losses = strategy.run( + train_step, args=(dist_inputs,)) + accuracy = strategy.reduce( + tf.distribute.ReduceOp.MEAN, per_replica_accuracy, axis=None) + loss_value = strategy.reduce( + tf.distribute.ReduceOp.MEAN, per_replica_losses, axis=None) + return accuracy, loss_value + + iterator = iter(ds_train) + accuracy = 0.0 + for step in range(flags.FLAGS.train_steps): + accuracy, loss_value = distributed_train_step(next(iterator)) + if step % display_step == 0: + tf.print('Step %d:' % step) + tf.print(' Loss = %f' % loss_value) + tf.print(' Batch accuracy = %f' % accuracy) + + return accuracy diff --git a/tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train_test.py b/tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train_test.py new file mode 100644 index 00000000000..04afecd0872 --- /dev/null +++ b/tensorflow/compiler/mlir/tfr/examples/mnist/mnist_train_test.py @@ -0,0 +1,37 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test for tfr mnist training example.""" + +from absl.testing import parameterized + +from tensorflow.compiler.mlir.tfr.examples.mnist import mnist_train +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from tensorflow.python.distribute import test_util +from tensorflow.python.platform import test + + +class MnistTrainTest(test.TestCase, parameterized.TestCase): + + # TODO(b/172367622) Switch to strategy_combinations.all_strategies after + # issues with TPU strategies are resolved. + @combinations.generate( + combinations.combine(strategy=strategy_combinations.strategies_minus_tpu)) + def testMnistTrain(self, strategy): + accuracy = mnist_train.main(strategy) + self.assertGreater(accuracy, 0.75, 'accuracy sanity check') + + +if __name__ == '__main__': + test_util.main() From 47f87c040292b2038cd807cd60109d44dd6b5b4d Mon Sep 17 00:00:00 2001 From: Robert Suderman Date: Wed, 11 Nov 2020 17:20:20 -0800 Subject: [PATCH 207/220] Lowering tf.image.resize_nearest_neighbour to XLA using a GatherV2 A nearest neighbour resize can be done using a single GatherV2 with the correct indices for lookup. Included the computation for these shapes. PiperOrigin-RevId: 341942832 Change-Id: I0f93778bc2b816c442b7256e74b0d7e6bf2c9e1f --- .../mlir/tensorflow/tests/lower_tf.mlir | 114 ++++++++ .../mlir/tensorflow/transforms/lower_tf.cc | 259 +++++++++++++++++- 2 files changed, 368 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index 2de6c3c16d3..3523187f49a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -768,3 +768,117 @@ func @lgamma(%arg0: tensor<4xf32>) -> tensor<4xf32> { %0 = "tf.Lgamma"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> return %0 : tensor<4xf32> } + +// CHECK-LABEL: func @imag_resize_nearest +func @imag_resize_nearest(%arg0: tensor<1x7x7x1xi32>) -> tensor<1x3x3x1xi32> { + %shape = "tf.Const"() {device = "", value = dense<3> : tensor<2xi32>} : () -> tensor<2xi32> + + // CHECK: [[VAL0:%.+]] = "tf.Const"() {value = dense<1> : tensor} + // CHECK: [[VAL1:%.+]] = "tf.Const"() {value = dense<[1, 3, 3, 1]> + // CHECK: [[VAL2:%.+]] = "tf.Const"() {value = dense<[1, 49, 1]> + // CHECK: [[VAL3:%.+]] = "tf.Const"() {value = dense<[0, 2, 4, 14, 16, 18, 28, 30, 32]> : tensor<9xi32>} + // CHECK: [[VAL4:%.+]] = "tf.Reshape"(%arg0, [[VAL2]]) + // CHECK: [[VAL5:%.+]] = "tf.GatherV2"([[VAL4]], [[VAL3]], [[VAL0]]) {batch_dims = 0 : i64} + // CHECK: [[VAL6:%.+]] = "tf.Reshape"([[VAL5]], [[VAL1]]) + // CHECK: return [[VAL6]] + %resize = "tf.ResizeNearestNeighbor"(%arg0, %shape) {align_corners = false, device = "", half_pixel_centers = false} : (tensor<1x7x7x1xi32>, tensor<2xi32>) -> tensor<1x3x3x1xi32> + return %resize: tensor<1x3x3x1xi32> +} + +// CHECK-LABEL: func @imag_resize_nearest_dyn_img +func @imag_resize_nearest_dyn_img(%arg0: tensor<1x?x?x1xi32>) -> tensor<1x3x3x1xi32> { + %shape = "tf.Const"() {device = "", value = dense<3> : tensor<2xi32>} : () -> tensor<2xi32> + + // CHECK: [[VAL0:%.+]] = "tf.Const"() {value = dense<1> : tensor} + // CHECK: [[VAL1:%.+]] = "tf.Const"() {value = dense<[3, 1]> : tensor<2xi32>} + // CHECK: [[VAL2:%.+]] = "tf.Const"() {value = dense<9> : tensor<1xi32>} + // CHECK: [[VAL3:%.+]] = "tf.Const"() {value = dense<3> : tensor<1xi32>} + // CHECK: [[VAL4:%.+]] = "tf.Const"() {value = dense<[1, 3]> : tensor<2xi32>} + // CHECK: [[VAL5:%.+]] = "tf.Const"() {value = dense<[0.000000e+00, 1.000000e+00, 2.000000e+00]> + // CHECK: [[VAL6:%.+]] = "tf.Const"() {value = dense<3.000000e+00> : tensor} + // CHECK: [[VAL7:%.+]] = "tf.Const"() {value = dense<0> : tensor} + // CHECK: [[VAL8:%.+]] = "tf.Shape"(%arg0) + // CHECK: [[VAL9:%.+]] = "tf.Cast"([[VAL8]]) + // CHECK: [[VAL10:%.+]]:4 = "tf.Unpack"([[VAL9]]) {axis = 0 : i64} + // CHECK: [[VAL11:%.+]] = "tf.Mul"([[VAL10]]#1, [[VAL10]]#2) + // CHECK: [[VAL12:%.+]] = "tf.ExpandDims"([[VAL10]]#0, [[VAL7]]) + // CHECK: [[VAL13:%.+]] = "tf.ExpandDims"([[VAL10]]#3, [[VAL7]]) + // CHECK: [[VAL14:%.+]] = "tf.ConcatV2"([[VAL12]], [[VAL3]], [[VAL3]], [[VAL13]], [[VAL7]]) + // CHECK: [[VAL15:%.+]] = "tf.Cast"([[VAL10]]#1) + // CHECK: [[VAL16:%.+]] = "tf.Div"([[VAL15]], [[VAL6]]) + // CHECK: [[VAL17:%.+]] = "tf.Mul"([[VAL16]], [[VAL5]]) + // CHECK: [[VAL18:%.+]] = "tf.Cast"([[VAL17]]) + // CHECK: [[VAL19:%.+]] = "tf.Reshape"([[VAL18]], [[VAL1]]) + // CHECK: [[VAL20:%.+]] = "tf.Mul"([[VAL19]], [[VAL10]]#2) + // CHECK: [[VAL21:%.+]] = "tf.Cast"([[VAL10]]#2) + // CHECK: [[VAL22:%.+]] = "tf.Div"([[VAL21]], [[VAL6]]) + // CHECK: [[VAL23:%.+]] = "tf.Mul"([[VAL22]], [[VAL5]]) + // CHECK: [[VAL24:%.+]] = "tf.Cast"([[VAL23]]) + // CHECK: [[VAL25:%.+]] = "tf.Reshape"([[VAL24]], [[VAL4]]) + // CHECK: [[VAL26:%.+]] = "tf.AddV2"([[VAL20]], [[VAL25]]) + // CHECK: [[VAL27:%.+]] = "tf.Reshape"([[VAL26]], [[VAL2]]) + // CHECK: [[VAL28:%.+]] = "tf.ExpandDims"([[VAL10]]#0, [[VAL7]]) + // CHECK: [[VAL29:%.+]] = "tf.ExpandDims"([[VAL11]], [[VAL7]]) + // CHECK: [[VAL30:%.+]] = "tf.ExpandDims"([[VAL10]]#3, [[VAL7]]) + // CHECK: [[VAL31:%.+]] = "tf.ConcatV2"([[VAL28]], [[VAL29]], [[VAL30]], [[VAL7]]) + // CHECK: [[VAL32:%.+]] = "tf.Reshape"(%arg0, [[VAL31]]) + // CHECK: [[VAL33:%.+]] = "tf.GatherV2"([[VAL32]], [[VAL27]], [[VAL0]]) {batch_dims = 0 : i64} + // CHECK: [[VAL34:%.+]] = "tf.Reshape"([[VAL33]], [[VAL14]]) + // CHECK: return [[VAL34]] + %resize = "tf.ResizeNearestNeighbor"(%arg0, %shape) {align_corners = false, device = "", half_pixel_centers = false} : (tensor<1x?x?x1xi32>, tensor<2xi32>) -> tensor<1x3x3x1xi32> + return %resize: tensor<1x3x3x1xi32> +} + +// CHECK-LABEL: func @imag_resize_nearest_full_dyn +func @imag_resize_nearest_full_dyn(%arg0: tensor<1x?x?x1xi32>, %arg1: tensor<2xi32>) -> tensor<1x?x?x1xi32> { + + // CHECK: [[VAL0:%.+]] = "tf.Const"() {value = dense<1> : tensor} + // CHECK: [[VAL1:%.+]] = "tf.Const"() {value = dense<0.000000e+00> : tensor} + // CHECK: [[VAL2:%.+]] = "tf.Const"() {value = dense<1.000000e+00> : tensor} + // CHECK: [[VAL3:%.+]] = "tf.Const"() {value = dense<1> : tensor<1xi32>} + // CHECK: [[VAL4:%.+]] = "tf.Const"() {value = dense<1> : tensor<1xi64>} + // CHECK: [[VAL5:%.+]] = "tf.Const"() {value = dense<0> : tensor} + // CHECK: [[VAL6:%.+]] = "tf.Shape"(%arg0) + // CHECK: [[VAL7:%.+]] = "tf.Cast"([[VAL6]]) + // CHECK: [[VAL8:%.+]]:4 = "tf.Unpack"([[VAL7]]) {axis = 0 : i64} + // CHECK: [[VAL9:%.+]] = "tf.Mul"([[VAL8]]#1, [[VAL8]]#2) + // CHECK: [[VAL10:%.+]]:2 = "tf.Unpack"(%arg1) {axis = 0 : i64} + // CHECK: [[VAL11:%.+]] = "tf.Mul"([[VAL10]]#0, [[VAL10]]#1) + // CHECK: [[VAL12:%.+]] = "tf.ExpandDims"([[VAL8]]#0, [[VAL5]]) + // CHECK: [[VAL13:%.+]] = "tf.ExpandDims"([[VAL10]]#0, [[VAL5]]) + // CHECK: [[VAL14:%.+]] = "tf.ExpandDims"([[VAL10]]#1, [[VAL5]]) + // CHECK: [[VAL15:%.+]] = "tf.ExpandDims"([[VAL8]]#3, [[VAL5]]) + // CHECK: [[VAL16:%.+]] = "tf.ConcatV2"([[VAL12]], [[VAL13]], [[VAL14]], [[VAL15]], [[VAL5]]) + // CHECK: [[VAL17:%.+]] = "tf.Cast"([[VAL8]]#1) + // CHECK: [[VAL18:%.+]] = "tf.Cast"([[VAL10]]#0) + // CHECK: [[VAL19:%.+]] = "tf.Div"([[VAL17]], [[VAL18]]) + // CHECK: [[VAL20:%.+]] = "tf.Range"([[VAL1]], [[VAL18]], [[VAL2]]) + // CHECK: [[VAL21:%.+]] = "tf.Mul"([[VAL20]], [[VAL19]]) + // CHECK: [[VAL22:%.+]] = "tf.Cast"([[VAL21]]) + // CHECK: [[VAL23:%.+]] = "tf.ExpandDims"([[VAL10]]#0, [[VAL5]]) + // CHECK: [[VAL24:%.+]] = "tf.ConcatV2"([[VAL23]], [[VAL3]], [[VAL5]]) + // CHECK: [[VAL25:%.+]] = "tf.Reshape"([[VAL22]], [[VAL24]]) + // CHECK: [[VAL26:%.+]] = "tf.Mul"([[VAL25]], [[VAL8]]#2) + // CHECK: [[VAL27:%.+]] = "tf.Cast"([[VAL8]]#2) + // CHECK: [[VAL28:%.+]] = "tf.Cast"([[VAL10]]#1) + // CHECK: [[VAL29:%.+]] = "tf.Div"([[VAL27]], [[VAL28]]) + // CHECK: [[VAL30:%.+]] = "tf.Range"([[VAL1]], [[VAL28]], [[VAL2]]) + // CHECK: [[VAL31:%.+]] = "tf.Mul"([[VAL30]], [[VAL29]]) + // CHECK: [[VAL32:%.+]] = "tf.Cast"([[VAL31]]) + // CHECK: [[VAL33:%.+]] = "tf.ExpandDims"([[VAL10]]#1, [[VAL5]]) + // CHECK: [[VAL34:%.+]] = "tf.ConcatV2"([[VAL3]], [[VAL33]], [[VAL5]]) + // CHECK: [[VAL35:%.+]] = "tf.Reshape"([[VAL32]], [[VAL34]]) + // CHECK: [[VAL36:%.+]] = "tf.AddV2"([[VAL26]], [[VAL35]]) + // CHECK: [[VAL37:%.+]] = "tf.Reshape"([[VAL11]], [[VAL4]]) + // CHECK: [[VAL38:%.+]] = "tf.Reshape"([[VAL36]], [[VAL37]]) + // CHECK: [[VAL39:%.+]] = "tf.ExpandDims"([[VAL8]]#0, [[VAL5]]) + // CHECK: [[VAL40:%.+]] = "tf.ExpandDims"([[VAL9]], [[VAL5]]) + // CHECK: [[VAL41:%.+]] = "tf.ExpandDims"([[VAL8]]#3, [[VAL5]]) + // CHECK: [[VAL42:%.+]] = "tf.ConcatV2"([[VAL39]], [[VAL40]], [[VAL41]], [[VAL5]]) + // CHECK: [[VAL43:%.+]] = "tf.Reshape"(%arg0, [[VAL42]]) + // CHECK: [[VAL44:%.+]] = "tf.GatherV2"([[VAL43]], [[VAL38]], [[VAL0]]) {batch_dims = 0 : i64} + // CHECK: [[VAL45:%.+]] = "tf.Reshape"([[VAL44]], [[VAL16]]) + // CHECK: return [[VAL45]] + %resize = "tf.ResizeNearestNeighbor"(%arg0, %arg1) {align_corners = false, device = "", half_pixel_centers = false} : (tensor<1x?x?x1xi32>, tensor<2xi32>) -> tensor<1x?x?x1xi32> + return %resize: tensor<1x?x?x1xi32> +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc index 53a73ce89e2..38b63ff1ff0 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc @@ -22,10 +22,13 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Diagnostics.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeRange.h" // from @llvm-project #include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" @@ -1255,15 +1258,261 @@ class Lower_UnaryOpsComposition } }; +// Lowers ResizeNearestNeighbor to an indices computations with a gather along +// the combined spatial dimensions. Generating the indices along the +// width/height index could be used to gather along each of W and H dimension +// of the input image array. To reduce to a single gather, these indices are +// combined, so a single gather can be performed along the combined spatial +// dimensions. +// +// Images must take the shape [b, h, w, c] and size is a rank-1 length-2 tensor +// containing the height and width values for the output tensor. This lowering +// should work with a dynamic images array. +// +// For example, a scaling with image shape [1, 3, 3, 1] to [2, 2] and unaligned +// corners would generate a [0, 1] lookup along both the x and y direction. +// Then when combined to form the 1-D spatial index the values would be +// [0, 1, 3, 4] which would gather along the reshape image tensor of shape +// [1, 9, 1], reshaped to the final [1, 3, 3, 1]. +class LowerResizeNearestNeighbor : public RewritePattern { + public: + explicit LowerResizeNearestNeighbor(MLIRContext *context) + : RewritePattern(ResizeNearestNeighborOp::getOperationName(), + { + BroadcastToOp::getOperationName(), + ConstOp::getOperationName(), + DivOp::getOperationName(), + PackOp::getOperationName(), + RangeOp::getOperationName(), + ReshapeOp::getOperationName(), + ShapeOp::getOperationName(), + SplitOp::getOperationName(), + TransposeOp::getOperationName(), + }, + 1, context) {} + + LogicalResult matchAndRewrite(Operation *src_op, + PatternRewriter &rewriter) const override { + auto op = cast(src_op); + auto loc = op.getLoc(); + auto result_ty = op.getType().cast(); + + auto input = op.images(); + auto input_ty = input.getType().cast(); + auto input_element_ty = input_ty.getElementType(); + auto out_size = op.size(); + auto out_size_ty = out_size.getType().cast(); + auto out_size_element_ty = out_size_ty.getElementType(); + + // Input should be rank 4. + if (!input_ty.hasRank() || input_ty.getRank() != 4) { + return failure(); + } + + // Check that out_size is rank-1, length-2. Otherwise the size is not legal. + if (!out_size_ty.hasRank() || out_size_ty.getRank() != 1 || + out_size_ty.getShape()[0] != 2) { + return failure(); + } + + // Extract the output width / height dim size. + int out_height_constant = -1; + int out_width_constant = -1; + DenseIntElementsAttr out_size_cst; + if (matchPattern(out_size, m_Constant(&out_size_cst))) { + llvm::SmallVector cst_size; + for (auto val : out_size_cst.getIntValues()) { + cst_size.push_back(val.getSExtValue()); + } + + out_height_constant = cst_size[0]; + out_width_constant = cst_size[1]; + + if (out_height_constant < 0 || out_width_constant < 0) return failure(); + } + + int out_spatial_cst = out_height_constant < 0 || out_width_constant < 0 + ? -1 + : out_height_constant * out_width_constant; + + // Input rank should be 4. Might be able to drop this requirement entirely + // as its an input requirement. + if (!input_ty.hasRank() || input_ty.getRank() != 4) { + return failure(); + } + + int batch_cst = input_ty.getShape()[0]; + int channels_cst = input_ty.getShape()[3]; + + int in_y_cst = input_ty.getShape()[1]; + int in_x_cst = input_ty.getShape()[2]; + int in_spatial_cst = + in_y_cst < 0 || in_x_cst < 0 ? -1 : in_y_cst * in_x_cst; + + // TODO(suderman): Add support for these optional parameters. + if (op.align_corners() == true || op.half_pixel_centers() == true) { + return failure(); + } + + auto one = + rewriter.create(loc, GetScalarOfType(out_size_element_ty, 1)); + + // Extract the image shape. + Value input_shape = rewriter.create( + loc, RankedTensorType::get({4}, rewriter.getI64Type()), input); + input_shape = rewriter.create( + loc, RankedTensorType::get({4}, out_size_element_ty), input_shape); + + auto scalar_dim_ty = RankedTensorType::get({}, out_size_element_ty); + auto split_image_shape = rewriter.create( + loc, + TypeRange({scalar_dim_ty, scalar_dim_ty, scalar_dim_ty, scalar_dim_ty}), + input_shape); + + // Extract the separate components from the input shape. + auto batch = split_image_shape.getResult(0); + auto in_y = split_image_shape.getResult(1); + auto in_x = split_image_shape.getResult(2); + auto channels = split_image_shape.getResult(3); + + auto in_count = rewriter.create( + loc, RankedTensorType::get({}, out_size_element_ty), in_y, in_x); + + // Unpack and separate the out width/height. + auto split_out_size = rewriter.create( + loc, TypeRange({scalar_dim_ty, scalar_dim_ty}), out_size); + + auto out_y = split_out_size.getResult(0); + auto out_x = split_out_size.getResult(1); + + auto out_count = rewriter.create( + loc, RankedTensorType::get({}, out_size_element_ty), out_y, out_x); + + // Generate what the final output shape will look like. + auto out_shape = rewriter.create( + loc, RankedTensorType::get({4}, out_size_element_ty), + ValueRange({batch, out_y, out_x, channels})); + + // Compute the indices along the vertical dimension. + auto in_y_f32 = rewriter.create( + loc, RankedTensorType::get({}, rewriter.getF32Type()), in_y); + auto out_w_f32 = rewriter.create( + loc, RankedTensorType::get({}, rewriter.getF32Type()), out_y); + + Value y_scale = rewriter.create( + loc, RankedTensorType::get({}, rewriter.getF32Type()), in_y_f32, + out_w_f32); + + Value zero_f32 = rewriter.create( + loc, GetScalarOfFloatType(rewriter.getF32Type(), 0.0)); + Value one_f32 = rewriter.create( + loc, GetScalarOfFloatType(rewriter.getF32Type(), 1.0)); + + Value y_range = rewriter.create( + loc, + RankedTensorType::get({out_height_constant}, rewriter.getF32Type()), + zero_f32, out_w_f32, one_f32); + + y_range = rewriter.create( + loc, + RankedTensorType::get({out_height_constant}, rewriter.getF32Type()), + y_range, y_scale); + + y_range = rewriter.create( + loc, RankedTensorType::get({out_height_constant}, out_size_element_ty), + y_range); + + y_range = rewriter.create( + loc, + RankedTensorType::get({out_height_constant, 1}, out_size_element_ty), + y_range, + rewriter.create(loc, + RankedTensorType::get({2}, out_size_element_ty), + ValueRange({out_y, one}))); + + Value y_indices = rewriter.create( + loc, + RankedTensorType::get({out_height_constant, 1}, out_size_element_ty), + y_range, in_x); + + // Compute the indices for the nearest neighbour lookup across the width + // dim. + auto in_x_f32 = rewriter.create( + loc, RankedTensorType::get({}, rewriter.getF32Type()), in_x); + auto out_h_f32 = rewriter.create( + loc, RankedTensorType::get({}, rewriter.getF32Type()), out_x); + + Value x_scale = rewriter.create( + loc, RankedTensorType::get({}, rewriter.getF32Type()), in_x_f32, + out_h_f32); + + Value x_range = rewriter.create( + loc, RankedTensorType::get({out_width_constant}, rewriter.getF32Type()), + zero_f32, out_h_f32, one_f32); + + x_range = rewriter.create( + loc, RankedTensorType::get({out_width_constant}, rewriter.getF32Type()), + x_range, x_scale); + + x_range = rewriter.create( + loc, RankedTensorType::get({out_width_constant}, out_size_element_ty), + x_range); + + Value x_indices = rewriter.create( + loc, + RankedTensorType::get({1, out_width_constant}, out_size_element_ty), + x_range, + rewriter.create(loc, + RankedTensorType::get({2}, out_size_element_ty), + ValueRange({one, out_x}))); + + // Generate the combined index array, reshape to be 1-D. + Value indices = rewriter.create( + loc, + RankedTensorType::get({out_height_constant, out_width_constant}, + out_size_element_ty), + y_indices, x_indices); + + indices = rewriter.create( + loc, RankedTensorType::get({out_spatial_cst}, out_size_element_ty), + indices, + rewriter.create( + loc, RankedTensorType::get({1}, out_size_element_ty), out_count, + rewriter.create(loc, rewriter.getI64TensorAttr({1})))); + + // Group the spatial indices and gather along that combined index. + Value input_collapsed_spatial = rewriter.create( + loc, + RankedTensorType::get({batch_cst, in_spatial_cst, channels_cst}, + input_element_ty), + input, + rewriter.create(loc, + RankedTensorType::get({3}, out_size_element_ty), + ValueRange({batch, in_count, channels}))); + + Value gathered_values = rewriter.create( + loc, + RankedTensorType::get({batch_cst, out_spatial_cst, channels_cst}, + input_element_ty), + input_collapsed_spatial, indices, /*axis=*/one); + + gathered_values = + rewriter.create(loc, result_ty, gathered_values, out_shape); + + rewriter.replaceOp(op, gathered_values); + return success(); + } +}; + } // namespace void PopulateLoweringTFPatterns(MLIRContext *context, OwningRewritePatternList *patterns) { - patterns - ->insert(context); + patterns->insert(context); populateWithGenerated(context, *patterns); } From 8f936eb15cc3b798dbe535ca1f4f0eff2b6b79bd Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Wed, 11 Nov 2020 17:31:19 -0800 Subject: [PATCH 208/220] Support dynamic value inference on iota instructions. We consider all iota output values are static. PiperOrigin-RevId: 341944607 Change-Id: Ie4c3b6dea7d168c41a10a0046eb280a5293adc60 --- tensorflow/compiler/xla/client/xla_builder.cc | 1 + .../compiler/xla/tests/dynamism_inference_test.cc | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 6594044f205..02fb3b78834 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -3401,6 +3401,7 @@ StatusOr XlaBuilder::BuildDynamicInferenceGraph(XlaOp root_op) { break; } case HloOpcode::kConstant: + case HloOpcode::kIota: SetInstructionAsConstant(new_instr, id, new_shape, false); break; case HloOpcode::kCustomCall: diff --git a/tensorflow/compiler/xla/tests/dynamism_inference_test.cc b/tensorflow/compiler/xla/tests/dynamism_inference_test.cc index a7e032448e0..0036889abef 100644 --- a/tensorflow/compiler/xla/tests/dynamism_inference_test.cc +++ b/tensorflow/compiler/xla/tests/dynamism_inference_test.cc @@ -104,6 +104,19 @@ TEST_F(DynamismInferenceTest, ScalarInt32Literal) { } } +TEST_F(DynamismInferenceTest, Iota) { + // The output of iota are consistened static. + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto computation = Iota(&b, S32, 2); + // Iota is not dynamic. + EXPECT_FALSE(ComputeDynamismLiteral(client, computation, &b) + .ValueOrDie() + .Get({0})); + } +} + TEST_F(DynamismInferenceTest, TupleSimple) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); From 3e03af4c7f532ed903da7809f85e344b7c2ef86f Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Wed, 11 Nov 2020 17:34:08 -0800 Subject: [PATCH 209/220] Make TraceMe movable PiperOrigin-RevId: 341945130 Change-Id: I77efd120a3daf6277a65702a9de7679329745764 --- tensorflow/core/profiler/lib/traceme.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/core/profiler/lib/traceme.h b/tensorflow/core/profiler/lib/traceme.h index 976fcfc82dd..98c7edc046d 100644 --- a/tensorflow/core/profiler/lib/traceme.h +++ b/tensorflow/core/profiler/lib/traceme.h @@ -143,6 +143,19 @@ class TraceMe { #endif } + // Movable. + TraceMe(TraceMe&& other) { *this = std::move(other); } + TraceMe& operator=(TraceMe&& other) { +#if !defined(IS_MOBILE_PLATFORM) + if (TF_PREDICT_FALSE(other.start_time_ != kUntracedActivity)) { + new (&no_init_.name) std::string(std::move(other.no_init_.name)); + other.no_init_.name.~string(); + start_time_ = std::exchange(other.start_time_, kUntracedActivity); + } +#endif + return *this; + } + ~TraceMe() { Stop(); } // Stop tracing the activity. Called by the destructor, but exposed to allow From 5a0ed634afbdc95a9524b5344e8a7b6c6621c3b7 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Wed, 11 Nov 2020 17:34:10 -0800 Subject: [PATCH 210/220] Always enable get_next_as_optional unless the dataset is finite. PiperOrigin-RevId: 341945136 Change-Id: I79fdec366be2119b6a28063f193e6cecb7a5f9e2 --- tensorflow/python/distribute/input_lib.py | 3 +- .../python/distribute/input_lib_test.py | 30 +++++++++---------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index 390d2612753..ba5590e8d10 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -2147,7 +2147,8 @@ def _enable_get_next_as_optional(strategy, dataset): # dataset is created in eager mode, as we need to evaluate the dataset # cardinality. with ops.device(dataset._variant_tensor.device): # pylint: disable=protected-access - return dataset.cardinality().numpy() != cardinality.INFINITE + if dataset.cardinality().numpy() == cardinality.INFINITE: + return False return not _is_statically_shaped( dataset.element_spec) or strategy.extended._in_multi_worker_mode() # pylint: disable=protected-access diff --git a/tensorflow/python/distribute/input_lib_test.py b/tensorflow/python/distribute/input_lib_test.py index 442dabfd02e..8a85f96d4b1 100644 --- a/tensorflow/python/distribute/input_lib_test.py +++ b/tensorflow/python/distribute/input_lib_test.py @@ -1118,21 +1118,21 @@ class DistributedIteratorTensorTypeTest(DistributedIteratorTestBase, except (StopIteration, errors.OutOfRangeError): return sums - expected_for_sum = 200. - if (not drop_remainder or input_type == "input_fn"): - expected_for_sum = 310. while_sums = sum_while_loop( iter(dataset), defun(lambda state, iterator: _reduce(state, next(iterator)))) - self.assertAllEqual(nest.flatten(while_sums), [expected_for_sum] * 3) - + self.assertAllEqual( + nest.flatten(while_sums), + # When there's no partial batch, the sum is smaller. + [200. if drop_remainder else 310.] * 3) + for_sums = defun(sum_for_loop)(dataset) # For loops always call get next as optional inside tf functions, so we # expect 310 here when using an input function (as there are 5 batches of # size 4 round robined over 2 replicas. expected_for_sum = 200. - if (not drop_remainder or input_type == "input_fn"): + if (not drop_remainder or ( + defun_type == "tf_function" and input_type == "input_fn")): expected_for_sum = 310. - for_sums = defun(sum_for_loop)(dataset) self.assertAllEqual(nest.flatten(for_sums), [expected_for_sum] * 3) @combinations.generate( @@ -1146,12 +1146,12 @@ class DistributedIteratorTensorTypeTest(DistributedIteratorTestBase, ], input_type=["dataset", "input_fn"], drop_remainder=[False, True], - repeat=[False, True], tensor_type=["sparse", "ragged"], - enable_get_next_as_optional=[True, False])) - def testRaggedSparseGetNextAsOptional(self, distribution, input_type, - drop_remainder, repeat, tensor_type, - enable_get_next_as_optional): + enable_get_next_as_optional=[True, False] + )) + def testRaggedSparseGetNextAsOptional( + self, distribution, input_type, drop_remainder, tensor_type, + enable_get_next_as_optional): """Test with `RaggedTensor`s and `SparseTensor`s.""" if not tf2.enabled(): self.skipTest("Only V2 is supported.") @@ -1172,8 +1172,6 @@ class DistributedIteratorTensorTypeTest(DistributedIteratorTestBase, ragged_tensor.to_sparse()), }) dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id) - if repeat: - dataset = dataset.repeat() return dataset.batch(batch_size, drop_remainder=drop_remainder) if input_type == "dataset": @@ -1183,8 +1181,8 @@ class DistributedIteratorTensorTypeTest(DistributedIteratorTestBase, ds = distribution.distribute_datasets_from_function(dataset_fn) iterator = iter(ds) - self.assertEqual(iterator._enable_get_next_as_optional, (not repeat) and - enable_get_next_as_optional) + self.assertEqual(iterator._enable_get_next_as_optional, + (not drop_remainder) and enable_get_next_as_optional) @combinations.generate( combinations.combine( From a802acb1efe4bb9d912babb8175f3cefd7bf7ec8 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Wed, 11 Nov 2020 17:56:06 -0800 Subject: [PATCH 211/220] Add absl::Cord support to open source TensorFlow PiperOrigin-RevId: 341948827 Change-Id: Ib9f2d360f99ff24eb02f0e82613eebf7b3c268bd --- tensorflow/core/BUILD | 1 - tensorflow/core/platform/BUILD | 1 - tensorflow/core/platform/default/BUILD | 1 - tensorflow/core/platform/default/cord.h | 2 +- 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 14465980750..9e06a07dd3a 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -343,7 +343,6 @@ cc_library( ":lib_internal", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index cf6c818d9b7..e5adc12d9fb 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -181,7 +181,6 @@ cc_library( compatible_with = get_compatible_with_portable(), deps = [ ":platform", - "@com_google_absl//absl/strings:cord", ] + tf_platform_deps("cord"), ) diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD index 74370988d07..65de6121c6f 100644 --- a/tensorflow/core/platform/default/BUILD +++ b/tensorflow/core/platform/default/BUILD @@ -41,7 +41,6 @@ cc_library( "no_oss", "nobuilder", ], - deps = ["@com_google_absl//absl/strings:cord"], ) cc_library( diff --git a/tensorflow/core/platform/default/cord.h b/tensorflow/core/platform/default/cord.h index f6e0391f254..5823374d1a0 100644 --- a/tensorflow/core/platform/default/cord.h +++ b/tensorflow/core/platform/default/cord.h @@ -16,6 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ #define TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ -#include "absl/strings/cord.h" +// TODO(ebrevdo): Fill this in. #endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ From 68134a60241cb3b778f2b27699f98eca87bd940a Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Wed, 11 Nov 2020 18:58:32 -0800 Subject: [PATCH 212/220] Support string input in TFLite StridedSlice kernel PiperOrigin-RevId: 341957475 Change-Id: I96c79ba6a95b09861fe90120f3b6431f3d8e3a53 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 4 +- .../compiler/mlir/lite/tests/legalize-tf.mlir | 7 +++ tensorflow/compiler/mlir/lite/tests/ops.mlir | 6 +++ .../internal/reference/strided_slice.h | 31 +++++++++-- tensorflow/lite/kernels/register.cc | 2 +- tensorflow/lite/kernels/strided_slice.cc | 14 +++-- tensorflow/lite/kernels/strided_slice_test.cc | 53 +++++++++++++++++++ .../lite/testing/op_tests/strided_slice.py | 14 +++++ .../lite/tools/versioning/op_version.cc | 3 ++ .../lite/tools/versioning/runtime_version.cc | 1 + 10 files changed, 123 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index ae2e424ec81..a4f67c5afe9 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -3405,7 +3405,7 @@ def TFL_StridedSliceOp: TFL_Op<"strided_slice", [ }]; let arguments = (ins - TFL_TensorOf<[F32, I32, I64, I8, UI8, QI8, QUI8, I1, I16, QI16, TFL_Quint8]>:$input, + TFL_TensorOf<[F32, I32, I64, I8, UI8, QI8, QUI8, I1, I16, QI16, TFL_Quint8, TFL_Str]>:$input, TFL_I32Tensor:$begin, TFL_I32Tensor:$end, TFL_I32Tensor:$strides, @@ -3418,7 +3418,7 @@ def TFL_StridedSliceOp: TFL_Op<"strided_slice", [ ); let results = (outs - TFL_TensorOf<[F32, I32, I64, I8, UI8, QI8, QUI8, I1, I16, QI16, TFL_Quint8]>:$output + TFL_TensorOf<[F32, I32, I64, I8, UI8, QI8, QUI8, I1, I16, QI16, TFL_Quint8, TFL_Str]>:$output ); let hasOptions = 1; diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index 5e36f4af802..dd8bbdb8372 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -1122,6 +1122,13 @@ func @strided_slice_with_constant_attributes(%arg0: tensor<10x10x10xf32>, %arg1: // CHECK-NEXT: "tfl.strided_slice"(%arg0, [[BEGIN]], [[END]], [[STRIDES]]) {begin_mask = 6 : i32, ellipsis_mask = 0 : i32, end_mask = 6 : i32, new_axis_mask = 0 : i32, shrink_axis_mask = 1 : i32} : (tensor<10x10x10xf32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<10x10xf32> } +func @strided_slice_with_string(%arg0: tensor<12x2x2x5x!tf.string>, %arg1: tensor<1xi32>, %arg2: tensor<1xi32>, %arg3: tensor<1xi32>) -> tensor<1x2x2x5x!tf.string> { + %0 = "tf.StridedSlice"(%arg0, %arg1, %arg2, %arg3) {begin_mask = 0 : i64, ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<12x2x2x5x!tf.string>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1x2x2x5x!tf.string> + return %0 : tensor<1x2x2x5x!tf.string> + // CHECK-LABEL: strided_slice_with_string + // CHECK: "tfl.strided_slice"(%arg0, %arg1, %arg2, %arg3) {begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, shrink_axis_mask = 0 : i32} : (tensor<12x2x2x5x!tf.string>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1x2x2x5x!tf.string> +} + func @slice1Tensor(%arg0: tensor<2x3x5xf32>, %arg1: tensor<3xi32>, %arg2: tensor<3xi32>) -> tensor { %0 = "tf.Slice"(%arg0, %arg1, %arg2) : (tensor<2x3x5xf32>, tensor<3xi32>, tensor<3xi32>) -> tensor return %0 : tensor diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 3a98f6db0c4..a3aea7bd593 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -1458,6 +1458,12 @@ func @testStridedSliceTFType(%arg0: tensor<12x2x2x5xui8>, %arg1: tensor<1xi32>, return %0 : tensor<1x2x2x5x!tf.quint8> } +// CHECK-LABEL: testStridedSliceWithString +func @testStridedSliceWithString(%arg0: tensor<12x2x2x5x!tf.string>, %arg1: tensor<1xi32>, %arg2: tensor<1xi32>, %arg3: tensor<1xi32>) -> tensor<1x2x2x5x!tf.string> { + %0 = "tfl.strided_slice"(%arg0, %arg1, %arg2, %arg3) {begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, shrink_axis_mask = 0 : i32} : (tensor<12x2x2x5x!tf.string>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1x2x2x5x!tf.string> + return %0 : tensor<1x2x2x5x!tf.string> +} + // ----- func @testStridedSliceWithInvalidOutputType(%arg0: tensor<12x2x2x5xf32>, %arg1: tensor<1xi32>, %arg2: tensor<1xi32>, %arg3: tensor<1xi32>) -> tensor<1x2x2x5xi32> { diff --git a/tensorflow/lite/kernels/internal/reference/strided_slice.h b/tensorflow/lite/kernels/internal/reference/strided_slice.h index 8b6f0c13da1..24aa798d9c9 100644 --- a/tensorflow/lite/kernels/internal/reference/strided_slice.h +++ b/tensorflow/lite/kernels/internal/reference/strided_slice.h @@ -17,18 +17,19 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/common.h" #include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/kernels/internal/portable_tensor.h" #include "tensorflow/lite/kernels/internal/strided_slice_logic.h" #include "tensorflow/lite/kernels/internal/types.h" namespace tflite { namespace reference_ops { + template inline void StridedSlice(const tflite::StridedSliceParams& op_params, const RuntimeShape& unextended_input_shape, - const T* input_data, const RuntimeShape& unextended_output_shape, - T* output_data) { + SequentialTensorWriter* writer) { using strided_slice::LoopCondition; using strided_slice::StartForAxis; using strided_slice::StopForAxis; @@ -57,7 +58,6 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params, const int start_4 = StartForAxis(params_copy, input_shape, 4); const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4); - T* out_ptr = output_data; for (int offset_0 = start_0 * input_shape.Dims(1), end_0 = stop_0 * input_shape.Dims(1), step_0 = params_copy.strides[0] * input_shape.Dims(1); @@ -81,13 +81,36 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params, for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4; !LoopCondition(offset_4, end_4, params_copy.strides[4]); offset_4 += params_copy.strides[4]) { - *out_ptr++ = input_data[offset_4]; + writer->Write(offset_4); } } } } } } + +template +inline void StridedSlice(const tflite::StridedSliceParams& op_params, + const RuntimeShape& unextended_input_shape, + const T* input_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + SequentialTensorWriter writer(input_data, output_data); + StridedSlice(op_params, unextended_input_shape, unextended_output_shape, + &writer); +} + +template +inline void StridedSlice(const tflite::StridedSliceParams& op_params, + const RuntimeShape& unextended_input_shape, + const TfLiteTensor* input, + const RuntimeShape& unextended_output_shape, + TfLiteTensor* output) { + SequentialTensorWriter writer(input, output); + StridedSlice(op_params, unextended_input_shape, unextended_output_shape, + &writer); +} + } // namespace reference_ops } // namespace tflite diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index cd0c297a545..9aa14e579d4 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -157,7 +157,7 @@ BuiltinOpResolver::BuiltinOpResolver() { /* max_version = */ 2); AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE(), /* min_version = */ 1, - /* max_version = */ 4); + /* max_version = */ 5); AddBuiltin(BuiltinOperator_EXP, Register_EXP()); AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2(), /* min_version = */ 1, diff --git a/tensorflow/lite/kernels/strided_slice.cc b/tensorflow/lite/kernels/strided_slice.cc index d10e99c1997..3f2fd580a0b 100644 --- a/tensorflow/lite/kernels/strided_slice.cc +++ b/tensorflow/lite/kernels/strided_slice.cc @@ -190,11 +190,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } StridedSliceParams op_params = BuildStridedSliceParams(&op_context); -#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ - kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \ - GetTensorData(op_context.input), \ - GetTensorShape(op_context.output), \ - GetTensorData(op_context.output)) +#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ + kernel_type::StridedSlice( \ + op_params, GetTensorShape(op_context.input), op_context.input, \ + GetTensorShape(op_context.output), op_context.output) switch (op_context.input->type) { case kTfLiteFloat32: @@ -232,6 +231,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_STRIDED_SLICE(reference_ops, bool); } break; + case kTfLiteString: + if (kernel_type == kReference) { + TF_LITE_STRIDED_SLICE(reference_ops, string); + } + break; default: TF_LITE_KERNEL_LOG(context, "Type %s is currently not supported " diff --git a/tensorflow/lite/kernels/strided_slice_test.cc b/tensorflow/lite/kernels/strided_slice_test.cc index d66cf884474..98521b889f9 100644 --- a/tensorflow/lite/kernels/strided_slice_test.cc +++ b/tensorflow/lite/kernels/strided_slice_test.cc @@ -55,6 +55,9 @@ class StridedSliceOpModel : public SingleOpModel { void SetInput(const std::vector data) { PopulateTensor(input_, data); } + void SetStringInput(std::initializer_list data) { + PopulateStringTensor(input_, data); + } void SetBegin(std::initializer_list data) { PopulateTensor(begin_, data); } @@ -68,6 +71,9 @@ class StridedSliceOpModel : public SingleOpModel { std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetStringOutput() { + return ExtractVector(output_); + } std::vector GetOutputShape() { return GetTensorShape(output_); } private: @@ -692,5 +698,52 @@ TYPED_TEST(StridedSliceOpTest, In3D_Backward) { EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0, 1, 2})); } +TEST(StridedSliceOpTest, In1D_String_NegativeBegin) { + StridedSliceOpModel m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0); + m.SetStringInput({"a", "b", "c", "d"}); + m.SetBegin({-3}); + m.SetEnd({3}); + m.SetStrides({1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); + EXPECT_THAT(m.GetStringOutput(), ElementsAreArray({"b", "c"})); +} + +TEST(StridedSliceOpTest, In3D_String_BackwardSmallBegin) { + StridedSliceOpModel m({1, 1, 2}, {1}, {1}, {1}, 0, 1, 0, 0, 0); + m.SetStringInput({"a", "b"}); + m.SetBegin({1}); + m.SetEnd({0}); + m.SetStrides({1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0, 1, 2})); +} + +TEST(StridedSliceOpTest, In3D_String_SmallBeginWithhrinkAxis1) { + StridedSliceOpModel m({2, 3, 2}, {1}, {1}, {1}, 0, 0, 0, 0, 1); + m.SetStringInput( + {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"}); + m.SetBegin({0}); + m.SetEnd({1}); + m.SetStrides({1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 2})); + EXPECT_THAT(m.GetStringOutput(), + ElementsAreArray({"1", "2", "3", "4", "5", "6"})); +} + +TEST(StridedSliceOpTest, In5D_String_IdentityShrinkAxis1) { + StridedSliceOpModel m({2, 2, 2, 1, 2}, {5}, {5}, {5}, 0, 0, 0, 0, + 1); + m.SetStringInput({"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", + "12", "13", "14", "15", "16"}); + m.SetBegin({0, 0, 0, 0, 0}); + m.SetEnd({2, 1, 2, 1, 2}); + m.SetStrides({1, 1, 1, 1, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2, 1, 2})); + EXPECT_THAT(m.GetStringOutput(), ElementsAreArray({"1", "2", "3", "4"})); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/testing/op_tests/strided_slice.py b/tensorflow/lite/testing/op_tests/strided_slice.py index 3a04354c202..8668e139f34 100644 --- a/tensorflow/lite/testing/op_tests/strided_slice.py +++ b/tensorflow/lite/testing/op_tests/strided_slice.py @@ -230,6 +230,20 @@ def make_strided_slice_tests(options): "shrink_axis_mask": [0], "constant_indices": [True, False], "fully_quantize": [False], + }, + # String input. + { + "dtype": [tf.string], + "index_type": [tf.int32], + "input_shape": [[12, 2, 2, 5]], + "begin": [[0, 0, 0, 0]], + "end": [[8, 2, 2, 3]], + "strides": [[2, 1, 3, 1]], + "begin_mask": [8], + "end_mask": [3], + "shrink_axis_mask": [None, -1], + "constant_indices": [True, False], + "fully_quantize": [False], } ] _make_strided_slice_tests(options, test_parameters, expected_tf_failures=2) diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc index 6b9ff9c1dcf..1f84c261cdb 100644 --- a/tensorflow/lite/tools/versioning/op_version.cc +++ b/tensorflow/lite/tools/versioning/op_version.cc @@ -387,6 +387,9 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { return 1; case BuiltinOperator_STRIDED_SLICE: + if (op_sig.input_types.at(0) == TensorType_STRING) { + return 5; + } if (op_sig.options.single_input_op.num_dims > 4) { return 4; } diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index 2e71882f469..fa0b01fc939 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -218,6 +218,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_STRIDED_SLICE, 2}, "1.14.0"}, {{BuiltinOperator_STRIDED_SLICE, 3}, "2.1.0"}, {{BuiltinOperator_STRIDED_SLICE, 4}, "2.2.0"}, + {{BuiltinOperator_STRIDED_SLICE, 5}, kPendingReleaseVersion}, {{BuiltinOperator_TOPK_V2, 1}, "1.7.0"}, {{BuiltinOperator_TOPK_V2, 2}, "1.14.0"}, {{BuiltinOperator_ARG_MAX, 1}, "1.9.0"}, From b6a59d53d359c027844fd27f7e4494a1667c0994 Mon Sep 17 00:00:00 2001 From: Chuan He Date: Wed, 11 Nov 2020 19:30:43 -0800 Subject: [PATCH 213/220] treat the stateful variable as normal values when importing flatbuffer. PiperOrigin-RevId: 341960782 Change-Id: I5a31b5db041ea1ccc38bd99008cb011399936146 --- .../compiler/mlir/lite/flatbuffer_import.cc | 77 +++++++++++++------ .../mlir/lite/tests/flatbuffer2mlir/lstm.mlir | 6 +- 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc index 986ee590457..1bcd8fb2744 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc @@ -448,13 +448,54 @@ StatusOr BuildExternalConstOp(const tflite::TensorT& tensor, return op.getOperation(); } +// Gets a constant splat for the given value of type. Requires value to be of +// type static shaped RankedTensorType. `unique_index` is used to get the unique +// value for the attribute. +static mlir::ElementsAttr GetSplat(RankedTensorType type, int unique_index, + OpBuilder builder) { + mlir::Type element_ty = getElementTypeOrSelf(type); + + if (element_ty.isSignlessInteger()) + return DenseElementsAttr::get( + type, builder.getIntegerAttr(element_ty, unique_index)); + + if (element_ty.isa()) + return DenseElementsAttr::get( + type, builder.getFloatAttr(element_ty, unique_index)); + + if (auto qtype = element_ty.dyn_cast()) { + mlir::RankedTensorType new_type = + RankedTensorType::get(type.getShape(), qtype.getStorageType()); + return DenseElementsAttr::get( + new_type, builder.getIntegerAttr(qtype.getStorageType(), unique_index)); + } + llvm_unreachable("unhandled element type"); +} + +// TODO(b/172664358): Creates a new op instead of reusing constant op. +// Creates a constant op to represent stateful variable. The function static +// variable `stateful_variable_idx` is used as a unique value for each constant +// to avoid CSEed. `tensor` is the data structure of flatbuffer. `shaped_type` +// is the ShapedType for the const op. +Operation* BuildVariableOp(const tflite::TensorT& tensor, + mlir::RankedTensorType shaped_type, + OpBuilder builder, Location loc) { + static int stateful_variable_idx = 0; + mlir::ElementsAttr value = + GetSplat(shaped_type, stateful_variable_idx++, builder); + if (IsQuantized(tensor)) { + auto op = builder.create( + loc, mlir::TypeAttr::get(shaped_type), value); + return op.getOperation(); + } + auto op = builder.create(loc, value); + return op.getOperation(); +} + StatusOr BuildConstOp(const tflite::TensorT& tensor, const std::vector& buffer, - OpBuilder builder, Location loc) { - if (buffer.empty()) { - return errors::InvalidArgument("Constant's buffer may not be empty"); - } - + bool is_variable, OpBuilder builder, + Location loc) { TF_ASSIGN_OR_RETURN(auto type, GetTensorType(tensor, builder, /*shapeless_are_scalars=*/true, /*is_constant=*/true)); @@ -466,7 +507,9 @@ StatusOr BuildConstOp(const tflite::TensorT& tensor, auto elem_type = shaped_type.getElementType(); mlir::ElementsAttr value; - if (auto float_type = elem_type.dyn_cast()) { + if (is_variable) { + return BuildVariableOp(tensor, shaped_type, builder, loc); + } else if (auto float_type = elem_type.dyn_cast()) { TF_ASSIGN_OR_RETURN(value, ConvertFloatBuffer(shaped_type, float_type, buffer)); } else if (elem_type.isa()) { @@ -846,19 +889,8 @@ StatusOr ConvertSubgraph( GetTensorIndices(subgraph, ordered_input_arrays)); } - // Add state variables to inputs. - absl::flat_hash_set input_index_set(func_inputs.begin(), - func_inputs.end()); - for (int i = 0, end = subgraph.tensors.size(); i < end; i++) { - auto& tensor = *subgraph.tensors.at(i); - if (tensor.is_variable && !input_index_set.contains(i)) { - func_inputs.emplace_back(i); - input_index_set.insert(i); - } - } - - for (auto input_or_variable : func_inputs) { - auto& tensor = *subgraph.tensors.at(input_or_variable); + for (int input : func_inputs) { + auto& tensor = *subgraph.tensors.at(input); // TODO(b/138222071) Graph inputs must have static shape per the exporter, // but we cannot differentiate scalars from unranked tensors. // Here we reverse the default assumption that shape = [] means unranked. @@ -889,7 +921,8 @@ StatusOr ConvertSubgraph( } for (auto output : func_outputs) { - const bool is_func_input = input_index_set.contains(output); + const bool is_func_input = std::find(func_inputs.begin(), func_inputs.end(), + output) != func_inputs.end(); bool is_constant = !is_op_output[output] && !is_func_input; // There are 2 cases tensor is scalar when it doesn't have a shape in // flatbuffer: @@ -991,7 +1024,7 @@ StatusOr ConvertSubgraph( ? BuildExternalConstOp(const_tensor, const_tensor.buffer, op_builder, const_loc) : BuildConstOp(const_tensor, buffers[const_tensor.buffer]->data, - op_builder, const_loc); + const_tensor.is_variable, op_builder, const_loc); if (!op_or_err.ok()) { return emitError(const_loc, op_or_err.status().ToString()), op_or_err.status(); @@ -1051,7 +1084,7 @@ StatusOr ConvertSubgraph( ? BuildExternalConstOp(const_tensor, const_tensor.buffer, op_builder, const_loc) : BuildConstOp(const_tensor, buffers[const_tensor.buffer]->data, - op_builder, const_loc); + const_tensor.is_variable, op_builder, const_loc); if (!op_or_err.ok()) { return emitError(const_loc, op_or_err.status().ToString()), op_or_err.status(); diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/lstm.mlir b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/lstm.mlir index e50d2cf526b..c56b14b31ba 100644 --- a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/lstm.mlir +++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/lstm.mlir @@ -8,9 +8,11 @@ func @main(%arg0: tensor<1x4xf32>, %arg1: tensor<4x4xf32>, %arg2: tensor<4x4xf32 return %24 : tensor<1x4xf32> // CHECK-LABEL: main // seperate lines since there is no region for this op. third_party/tensorflow/compiler/mlir/lite/ir/tfl_ops.td: 3252 -// CHECK: %[[RES0:.*]] = "tfl.lstm"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg22, %arg23, %arg18, %arg19, %arg20, %arg21) ( { +// CHECK: %[[RES0:.*]] = "tfl.pseudo_const"() {value = dense<{{.*}}> : tensor<1x4xf32>} : () -> tensor<1x4xf32> +// CHECK: %[[RES1:.*]] = "tfl.pseudo_const"() {value = dense<{{.*}}> : tensor<1x4xf32>} : () -> tensor<1x4xf32> +// CHECK: %[[RES2:.*]] = "tfl.lstm"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %[[RES0]], %[[RES1]], %arg18, %arg19, %arg20, %arg21) ( { // CHECK: }) {cell_clip = 0.000000e+00 : f32, fused_activation_function = "NONE", kernel_type = "FULL", proj_clip = 0.000000e+00 : f32} : (tensor<1x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<1x4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4x4xf32>, tensor<4xf32>, tensor<1x4xf32>, tensor<1x4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<1x4xf32> -// CHECK: return %[[RES0]] +// CHECK: return %[[RES2]] } From 41485207f39f788907044679e0fbd60cb0ea77d0 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Wed, 11 Nov 2020 19:44:32 -0800 Subject: [PATCH 214/220] Add annotations for producing and consuming batch in adaptive shared batch scheduler. PiperOrigin-RevId: 341962301 Change-Id: I14f6536c8ca399cd8ff6bafa42d8b8d535a8bd7c --- tensorflow/core/kernels/batching_util/BUILD | 1 + .../adaptive_shared_batch_scheduler.h | 50 +++++++++++++++++-- .../core/profiler/lib/connected_traceme.h | 1 + 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD index 8f233957032..5bbfbddf0d4 100644 --- a/tensorflow/core/kernels/batching_util/BUILD +++ b/tensorflow/core/kernels/batching_util/BUILD @@ -123,6 +123,7 @@ cc_library( ":batch_scheduler", ":periodic_function_dynamic", "//tensorflow/core:lib", + "//tensorflow/core/profiler/lib:connected_traceme", ], ) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 3e587038005..b6bb90fa5a1 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_ #include +#include #include #include #include @@ -34,6 +35,7 @@ limitations under the License. #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/threadpool_interface.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" namespace tensorflow { namespace serving { @@ -277,6 +279,10 @@ class ASBSQueue : public BatchScheduler { // Number of size 1 tasks which could currently be scheduled without failing. size_t SchedulingCapacityLocked() const TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); + // Returns uint64 one greater than was returned by the previous call. + // Context id is reused after std::numeric_limits::max is exhausted. + static uint64 NewTraceMeContextIdForBatch(); + std::shared_ptr> scheduler_; const QueueOptions options_; // Owned by scheduler_. @@ -292,10 +298,11 @@ template class ASBSBatch : public Batch { public: ASBSBatch(ASBSQueue* queue, int64 creation_time_micros, - int64 batch_timeout_micros) + int64 batch_timeout_micros, uint64 traceme_context_id) : queue_(queue), creation_time_micros_(creation_time_micros), - schedulable_time_micros_(creation_time_micros + batch_timeout_micros) {} + schedulable_time_micros_(creation_time_micros + batch_timeout_micros), + traceme_context_id_(traceme_context_id) {} ~ASBSBatch() override {} @@ -305,10 +312,13 @@ class ASBSBatch : public Batch { int64 schedulable_time_micros() const { return schedulable_time_micros_; } + uint64 traceme_context_id() const { return traceme_context_id_; } + private: ASBSQueue* queue_; const int64 creation_time_micros_; const int64 schedulable_time_micros_; + const uint64 traceme_context_id_; TF_DISALLOW_COPY_AND_ASSIGN(ASBSBatch); }; } // namespace internal @@ -505,6 +515,13 @@ void AdaptiveSharedBatchScheduler::CallbackWrapper( const internal::ASBSBatch* batch, AdaptiveSharedBatchScheduler::BatchProcessor callback, bool is_express) { + profiler::TraceMeConsumer trace_me( + [&] { + return profiler::TraceMeEncode( + "ProcessBatch", {{"batch_size_before_padding", batch->size()}}); + }, + profiler::ContextType::kAdaptiveSharedBatchScheduler, + batch->traceme_context_id()); int64 start_time = batch->creation_time_micros(); callback(std::unique_ptr>( const_cast*>(batch))); @@ -599,6 +616,7 @@ Status ASBSQueue::Schedule(std::unique_ptr* task) { if (size > SchedulingCapacityLocked()) { return errors::Unavailable("The batch scheduling queue is full"); } + int remaining_batch_size = current_batch_ == nullptr ? options_.max_batch_size @@ -626,11 +644,26 @@ Status ASBSQueue::Schedule(std::unique_ptr* task) { } if (!current_batch_) { num_enqueued_batches_++; - current_batch_ = - new ASBSBatch(this, scheduler_->GetEnv()->NowMicros(), - options_.batch_timeout_micros); + // batch.traceme_context_id connects TraceMeProducer and + // TraceMeConsumer. + // When multiple calls to "ASBS::Schedule" accumulate to one batch, they + // are processed in the same batch and should share traceme_context_id. + current_batch_ = new ASBSBatch( + this, scheduler_->GetEnv()->NowMicros(), + options_.batch_timeout_micros, NewTraceMeContextIdForBatch()); new_batches.push_back(current_batch_); } + + // Annotate each task (corresponds to one call of schedule) with a + // TraceMeProducer. + profiler::TraceMeProducer trace_me( + [task_size = task->size()] { + return profiler::TraceMeEncode( + "ASBSQueue::Schedule", + {{"batching_input_task_size", task_size}}); + }, + profiler::ContextType::kAdaptiveSharedBatchScheduler, + this->current_batch_->traceme_context_id()); current_batch_->AddTask(std::move(task)); num_enqueued_tasks_++; // If current_batch_ is now full, allow it to be processed immediately. @@ -683,6 +716,13 @@ size_t ASBSQueue::SchedulingCapacityLocked() const { options_.max_enqueued_batches - num_enqueued_batches_; return spare_batches * options_.max_batch_size + current_batch_capacity; } + +template +// static +uint64 ASBSQueue::NewTraceMeContextIdForBatch() { + static std::atomic traceme_context_id(0); + return traceme_context_id.fetch_add(1, std::memory_order_relaxed); +} } // namespace internal } // namespace serving } // namespace tensorflow diff --git a/tensorflow/core/profiler/lib/connected_traceme.h b/tensorflow/core/profiler/lib/connected_traceme.h index b55c4407fe6..fa137d8913d 100644 --- a/tensorflow/core/profiler/lib/connected_traceme.h +++ b/tensorflow/core/profiler/lib/connected_traceme.h @@ -30,6 +30,7 @@ enum class ContextType : int { kTfExecutor, kSharedBatchScheduler, kPjRt, + kAdaptiveSharedBatchScheduler, }; /* From 07baead493562b244b2731e37a40415c96cb588d Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Wed, 11 Nov 2020 20:08:18 -0800 Subject: [PATCH 215/220] Expose eager_mode() to tf.__internal__ API. PiperOrigin-RevId: 341964986 Change-Id: I00a1d6f8ec600a792fc49a8a2ac3155fb9d8ce67 --- tensorflow/python/eager/context.py | 2 ++ .../api/golden/v2/tensorflow.__internal__.eager_context.pbtxt | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index fc6a744a673..e02e616a60a 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -2026,6 +2026,8 @@ def graph_mode(): return context()._mode(GRAPH_MODE) # pylint: disable=protected-access +# Used by b/167638505 for keras backend API and Lambda layer. +@tf_export("__internal__.eager_context.eager_mode", v1=[]) def eager_mode(): """Context-manager to enable eager execution for the current thread.""" return context()._mode(EAGER_MODE) # pylint: disable=protected-access diff --git a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.eager_context.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.eager_context.pbtxt index 70c4d74f936..8645bf71a0c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.eager_context.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.eager_context.pbtxt @@ -1,5 +1,9 @@ path: "tensorflow.__internal__.eager_context" tf_module { + member_method { + name: "eager_mode" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_config" argspec: "args=[], varargs=None, keywords=None, defaults=None" From 8ecaaaf6efe577e4469ddf6b720b1d319d137a4e Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Wed, 11 Nov 2020 20:46:30 -0800 Subject: [PATCH 216/220] Remove unused deps of keras from docstring test. PiperOrigin-RevId: 341968734 Change-Id: I46b25629338c62d4d03eebc7e4e3c10b35a6be52 --- tensorflow/python/keras/preprocessing/BUILD | 2 -- tensorflow/tools/docs/BUILD | 1 - 2 files changed, 3 deletions(-) diff --git a/tensorflow/python/keras/preprocessing/BUILD b/tensorflow/python/keras/preprocessing/BUILD index 665a4bbcce7..c74c248b5cc 100644 --- a/tensorflow/python/keras/preprocessing/BUILD +++ b/tensorflow/python/keras/preprocessing/BUILD @@ -5,8 +5,6 @@ load("//tensorflow:tensorflow.bzl", "tf_py_test") package( default_visibility = [ - # TODO(scottzhu): Remove non-keras deps from TF. - "//tensorflow/tools/docs:__pkg__", "//tensorflow/python/keras:__subpackages__", ], licenses = ["notice"], # Apache 2.0 diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index 6adc0a73610..72374a8b51c 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -78,7 +78,6 @@ tpu_py_test( deps = [ ":tf_doctest_lib", "//tensorflow:tensorflow_py", - "//tensorflow/python/keras/preprocessing", "//third_party/py/numpy", ], ) From 4fee5e9fa5a6efe1180f9c3e108083ecf9fae93a Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Wed, 11 Nov 2020 21:08:39 -0800 Subject: [PATCH 217/220] Add dummy rule for keras_preprocessing so that it can be swapped by copybara in OSS. PiperOrigin-RevId: 341971051 Change-Id: I1ae5bc36262499ea99716710eeeff194d5b427e8 --- tensorflow/python/keras/BUILD | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 2aceafa2760..7031d54951e 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -666,6 +666,14 @@ py_library( visibility = ["//visibility:public"], ) +py_library( + name = "expect_keras_preprocessing_installed", + # This is a dummy rule used as a keras_preprocessing dependency in open-source. + # We expect keras_preprocessing to already be installed on the system, e.g. via + # `pip install keras_preprocessing` + visibility = ["//visibility:public"], +) + py_library( name = "expect_numpy_installed", # This is a dummy rule used as a numpy dependency in open-source. From 4c7aeb0a0c3ffd4ec547efb37719c62a09137031 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Wed, 11 Nov 2020 22:32:56 -0800 Subject: [PATCH 218/220] Fix crashing on unknown rank tensors at creating protobuf for converter backend PiperOrigin-RevId: 341981639 Change-Id: I50b229d622beefca1abcfe6db6d9e77f0591902d --- tensorflow/lite/python/convert.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py index 68f49d50498..2e9b1a6de12 100644 --- a/tensorflow/lite/python/convert.py +++ b/tensorflow/lite/python/convert.py @@ -492,15 +492,16 @@ def build_toco_convert_protos(input_tensors, else: shape = input_shapes[idx] - # Create shapes with -1 for unknown dimensions. - dims = [] - for dim in shape: - if (dim is None or - (isinstance(dim, tensor_shape.Dimension) and dim.value is None)): - dims.append(-1) - else: - dims.append(int(dim)) - input_array.shape.dims.extend(dims) + if shape.rank is not None: + # Create shapes with -1 for unknown dimensions. + dims = [] + for dim in shape: + if (dim is None or + (isinstance(dim, tensor_shape.Dimension) and dim.value is None)): + dims.append(-1) + else: + dims.append(int(dim)) + input_array.shape.dims.extend(dims) for output_tensor in output_tensors: if saved_model_dir: From 6887e936ced820e57f01ace19f364e27c49916bc Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Wed, 11 Nov 2020 23:00:44 -0800 Subject: [PATCH 219/220] Expose the symbol `context.is_tfrt_enabled` as tf.__internal__ API. PiperOrigin-RevId: 341984239 Change-Id: I83e2f7d1bfdf8791d1ee61619c61f056205cb63c --- tensorflow/python/eager/context.py | 3 +++ tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index e02e616a60a..56541584c0c 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -78,6 +78,9 @@ _python_eager_context_create_counter = monitoring.Counter( # Re-exporting through context. is_tfrt_enabled = tfrt_utils.enabled +# Expose it as internally public APIs for Keras use cases in b/171080602. +tf_export("__internal__.is_tfrt_enabled", v1=[])(is_tfrt_enabled) + class _EagerTensorCache(object): """Simple cache which evicts items based on length in a FIFO manner.""" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt index f8e51e8918b..22eccb3a750 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.__internal__.pbtxt @@ -64,4 +64,8 @@ tf_module { name: "get_name_scope" argspec: "args=[], varargs=None, keywords=None, defaults=None" } + member_method { + name: "is_tfrt_enabled" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } } From f481df18f7a6da94229b7ff84b28720795925aa4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 12 Nov 2020 00:06:28 -0800 Subject: [PATCH 220/220] Minor description fix. PiperOrigin-RevId: 341991277 Change-Id: Ib5b5bf897aa00c6238774bfbe0a7c24cf225ffb4 --- tensorflow/python/distribute/coordinator/cluster_coordinator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/coordinator/cluster_coordinator.py b/tensorflow/python/distribute/coordinator/cluster_coordinator.py index 3db84a1060b..ca330b88ed0 100644 --- a/tensorflow/python/distribute/coordinator/cluster_coordinator.py +++ b/tensorflow/python/distribute/coordinator/cluster_coordinator.py @@ -1001,7 +1001,7 @@ class ClusterCoordinator(object): This method is non-blocking in that it queues the `fn` which will be executed later and returns a `tf.distribute.experimental.coordinator.RemoteValue` object immediately. - `fetch` can be called on the it to wait for the function execution to finish + `fetch` can be called on it to wait for the function execution to finish and retrieve its output from a remote worker. On the other hand, call `tf.distribute.experimental.coordinator.ClusterCoordinator.join` to wait for all scheduled functions to finish.