From 157209a692281127c4303c3634e3f34a2470cf52 Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Tue, 7 Apr 2020 12:10:32 -0700 Subject: [PATCH] Add check to ensure SVDF input size is a multiple of two. This is required for optimized kernels for some platforms with alignment constraints. PiperOrigin-RevId: 305312483 Change-Id: Idab2dd7fe7dbc3abbd5e24c3e016f4e40ee3c00c --- tensorflow/lite/micro/kernels/svdf_test.cc | 179 +++++++++++++++--- .../lite/micro/kernels/xtensa_hifi/svdf.cc | 6 + .../micro/kernels/xtensa_hifimini/svdf.cc | 6 + 3 files changed, 162 insertions(+), 29 deletions(-) diff --git a/tensorflow/lite/micro/kernels/svdf_test.cc b/tensorflow/lite/micro/kernels/svdf_test.cc index cb0270822f2..179cafd152c 100644 --- a/tensorflow/lite/micro/kernels/svdf_test.cc +++ b/tensorflow/lite/micro/kernels/svdf_test.cc @@ -25,7 +25,8 @@ namespace tflite { namespace testing { namespace { -static float svdf_input[] = { +// naming as follows: svdf__xx +static float svdf_input_3x2x10[] = { 0.12609188, -0.46347019, -0.89598465, 0.35867718, 0.36897406, 0.73463392, @@ -57,7 +58,49 @@ static float svdf_input[] = { -0.6230064, 0.29819036, 1.06939757, }; -static float svdf_golden_output_rank_1[] = { +static float svdf_input_2x2x10[] = { + 0.12609188, -0.46347019, 0.35867718, 0.36897406, + + 0.14278367, -1.64410412, -0.57290924, 0.12729003, + + 0.49837467, 0.19278903, 0.17660543, 0.52949083, + + -0.11186574, 0.13164264, -0.72674477, -0.5683046, + + -0.68892461, 0.37783599, -0.63690937, 0.44483393, + + -0.81299269, -0.86831826, -0.95760226, 1.82078898, + + -1.45006323, -0.82251364, -1.65087092, -1.89238167, + + 0.03966608, -0.24936394, 2.06740379, -1.51439476, + + 0.11771342, -0.23761693, 0.31088525, -1.55601168, + + -0.89477462, 1.67204106, -0.6230064, 0.29819036, +}; + +static float svdf_golden_output_2x2x30_rank_1[] = { + -0.044205, -0.013757, 0.050369, -0.018447, 0.073010, 0.025142, -0.021154, + 0.013551, -0.209613, -0.062421, 0.150209, -0.108334, 0.028256, -0.006950, + -0.030885, 0.009603, -0.076800, -0.037075, -0.087198, -0.155183, 0.091069, + 0.098446, -0.016083, 0.106475, -0.082123, -0.162238, -0.084434, -0.141074, + -0.029340, -0.090685, 0.053302, -0.030604, -0.201440, 0.088424, 0.139877, + 0.012416, -0.113212, 0.103893, -0.100842, 0.122780, -0.166632, -0.116705, + 0.175298, -0.047163, 0.313077, -0.166485, -0.285860, 0.129069, -0.625911, + 0.046134, 0.138081, -0.129581, -0.521455, -0.061579, 0.230289, 0.114963, + -0.216693, -0.161643, -0.179177, -0.052599, -0.213239, 0.029502, 0.260858, + 0.275045, -0.213689, -0.323608, -0.285635, -0.317687, -0.324092, -0.317972, + -0.208450, -0.462504, -0.255126, -0.218576, -0.041528, 0.179421, -0.440583, + 0.072127, -0.284136, 0.241570, -0.582490, 0.253004, 0.156972, 0.132266, + -0.175340, -0.269495, -0.005782, -0.125683, -0.461215, 0.257511, 0.340125, + 0.140569, -0.866940, -0.075565, 0.484422, 0.018665, 0.059312, -0.006378, + -0.465532, 0.291374, -0.182749, 0.232608, 0.479811, 0.541274, 0.286369, + -0.188810, -0.011561, 0.022947, 0.451862, 0.214710, -0.367849, -0.722380, + -0.072298, -0.270524, -0.083401, -0.038342, -0.035884, -0.565247, -0.427794, + 0.015071}; + +static float svdf_golden_output_3x2x10_rank_1[] = { 0.014899, -0.0517661, -0.143725, -0.00271883, -0.03004015, 0.09565311, 0.1587342, 0.00784263, @@ -89,7 +132,7 @@ static float svdf_golden_output_rank_1[] = { 0.17012937, -0.34447709, 0.38505614, -0.28158101, }; -static float svdf_golden_output_rank_2[] = { +static float svdf_golden_output_3x2x10_rank_2[] = { -0.09623547, -0.10193135, 0.11083051, -0.0347917, 0.1141196, 0.12965347, -0.12652366, 0.01007236, @@ -160,7 +203,12 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units, node.custom_initial_data_size = 0; node.delegate = nullptr; if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + TfLiteStatus prepare_status = registration->prepare(&context, &node); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, prepare_status); + // Abort early to make it clear prepare failed. + if (prepare_status != kTfLiteOk) { + return; + } } TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); @@ -171,14 +219,18 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units, float* input_batch_end = input_batch_start + input_size * batch_size; PopulateFloatTensor(&tensors[0], input_batch_start, input_batch_end); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TfLiteStatus status = registration->invoke(&context, &node); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, status); - int output_idx = 0; - int golden_idx = i * batch_size * num_units; - for (int j = golden_idx; j < golden_idx + batch_size * num_units; ++j) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output[j], output_data[output_idx], - tolerance); - output_idx++; + // Only validate outputs when invoke has succeeded. + if (status == kTfLiteOk) { + int output_idx = 0; + int golden_idx = i * batch_size * num_units; + for (int j = golden_idx; j < golden_idx + batch_size * num_units; ++j) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output[j], output_data[output_idx], + tolerance); + output_idx++; + } } } @@ -226,7 +278,12 @@ void ValidateIntegerSVDFGoldens(const int batch_size, const int num_units, node.delegate = nullptr; if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + TfLiteStatus prepare_status = registration->prepare(&context, &node); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, prepare_status); + // Abort early to make it clear prepare failed. + if (prepare_status != kTfLiteOk) { + return; + } } TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); @@ -398,7 +455,7 @@ inline void TestIntegerSVDF( TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(BlackBoxTestRank1) { +TF_LITE_MICRO_TEST(SvdfFloatInputSize3Rank1ShouldMatchGolden) { constexpr int batch_size = 2; constexpr int num_units = 4; constexpr int input_size = 3; @@ -440,12 +497,12 @@ TF_LITE_MICRO_TEST(BlackBoxTestRank1) { tflite::testing::TestSVDF( batch_size, num_units, input_size, memory_size, rank, input_data, weights_feature_data, weights_time_data, activation_state_data, - scratch_data, output_data, tflite::testing::svdf_input, - sizeof(tflite::testing::svdf_input), - tflite::testing::svdf_golden_output_rank_1); + scratch_data, output_data, tflite::testing::svdf_input_3x2x10, + sizeof(tflite::testing::svdf_input_3x2x10), + tflite::testing::svdf_golden_output_3x2x10_rank_1); } -TF_LITE_MICRO_TEST(BlackBoxTestRank2) { +TF_LITE_MICRO_TEST(SvdfFloatInputSize3Rank2ShouldMatchGolden) { constexpr int batch_size = 2; constexpr int num_units = 4; constexpr int input_size = 3; @@ -500,15 +557,75 @@ TF_LITE_MICRO_TEST(BlackBoxTestRank2) { tflite::testing::TestSVDF( batch_size, num_units, input_size, memory_size, rank, input_data, weights_feature_data, weights_time_data, activation_state_data, - scratch_data, output_data, tflite::testing::svdf_input, - sizeof(tflite::testing::svdf_input), - tflite::testing::svdf_golden_output_rank_2); + scratch_data, output_data, tflite::testing::svdf_input_3x2x10, + sizeof(tflite::testing::svdf_input_3x2x10), + tflite::testing::svdf_golden_output_3x2x10_rank_2); } -TF_LITE_MICRO_TEST(BlackBoxTestIntegerRank1) { +TF_LITE_MICRO_TEST(SvdfFloatInputSize2Rank1ShouldMatchGolden) { constexpr int batch_size = 2; constexpr int num_units = 4; - constexpr int input_size = 3; + constexpr int input_size = 2; + constexpr int memory_size = 10; + constexpr int rank = 2; + constexpr int num_filters = num_units * rank; + + float weights_feature_data[] = { + -0.31930989, 0.0079667, 0.39296314, 0.37613347, 0.12416199, + 0.15785322, 0.27901134, 0.3905206, 0.21931258, -0.36137494, + -0.10640851, 0.31053296, -0.36118156, -0.0976817, -0.36916667, + 0.22197971, 0.15294972, 0.38031587, 0.27557442, 0.39635518, + -0.21580373, -0.06634006, -0.02702999, 0.27072677}; + + float weights_time_data[] = { + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657, + + -0.14884081, 0.19931212, -0.36002168, 0.34663299, -0.11405486, + 0.12672701, 0.39463779, -0.07886535, -0.06384811, 0.08249187, + + -0.26816407, -0.19905911, 0.29211238, 0.31264046, -0.28664589, + 0.05698794, 0.11613581, 0.14078894, 0.02187902, -0.21781836, + + -0.15567942, 0.08693647, -0.38256618, 0.36580828, -0.22922277, + -0.0226903, 0.12878349, -0.28122205, -0.10850525, -0.11955214, + + 0.27179423, -0.04710215, 0.31069002, 0.22672787, 0.09580326, + 0.08682203, 0.1258215, 0.1851041, 0.29228821, 0.12366763}; + + const int input_size_dims_count = batch_size * input_size; + float input_data[input_size_dims_count]; + + const int activation_state_dims_count = + batch_size * memory_size * num_filters; + float activation_state_data[activation_state_dims_count]; + const int scratch_dims_count = batch_size * num_filters; + float scratch_data[scratch_dims_count]; + + const int output_dims_count = batch_size * num_units; + float output_data[output_dims_count]; + + tflite::testing::TestSVDF( + batch_size, num_units, input_size, memory_size, rank, input_data, + weights_feature_data, weights_time_data, activation_state_data, + scratch_data, output_data, tflite::testing::svdf_input_2x2x10, + sizeof(tflite::testing::svdf_input_2x2x10), + tflite::testing::svdf_golden_output_2x2x30_rank_1); +} + +TF_LITE_MICRO_TEST(SvdfIntegerInputSize2Rank1ShouldMatchGolden) { + constexpr int batch_size = 2; + constexpr int num_units = 4; + constexpr int input_size = 2; constexpr int memory_size = 10; constexpr int rank = 1; constexpr int num_filters = num_units * rank; @@ -537,13 +654,17 @@ TF_LITE_MICRO_TEST(BlackBoxTestIntegerRank1) { }; int8_t expected_output[] = { - -9, 24, 31, 1, -10, 10, -3, 0, 2, 4, -44, -7, -10, 32, - 52, 1, 12, -17, 9, -8, 7, 16, -11, -8, -26, 29, 28, 16, - -23, 26, 30, -6, -8, -25, -86, -5, -44, 59, 81, 15, 62, -16, - -37, 3, 27, 14, 34, -10, 1, 24, -25, 23, 31, 61, 67, 11, - -64, -65, -128, -25, -53, 59, 127, 20, 20, -29, -20, -15, -28, 0, - 8, -27, 54, 61, -67, 38, 38, 64, 115, 0, -44, -75, -128, -20, - -19, 93, 101, 35, -5, -56, 30, -18, -40, -9, -8, -31, + -9, 9, 18, -2, -6, 8, 13, -2, 2, -16, 2, 5, 2, -7, + 0, 3, 7, 0, 5, 7, -11, 18, 30, 0, -9, -24, 14, -12, + -1, 1, -20, 2, -19, -20, 20, -13, -1, -10, 50, 4, 26, 32, + 2, -12, -12, 11, -10, -29, 50, -61, 4, 15, 19, -39, 13, 19, + -56, 49, 12, 13, 29, -3, -4, -22, -76, -29, -14, 38, -30, -30, + 27, 0, 39, 16, 49, -14, -18, 28, -35, 11, 45, 0, -13, -61, + 34, -80, 37, 26, 15, -23, 12, 15, 18, 83, -28, -21, -27, -48, + 17, 2, -113, -52, 9, 48, -4, -1, 15, -7, 39, 16, 49, -14, + -18, 28, -35, 11, 45, 0, -13, -61, 34, -80, 37, 26, 15, -23, + 12, 15, 18, 83, -28, -21, -27, -48, 17, 2, -113, -52, 9, 48, + -4, -1, 15, -7, }; const int input_size_dims_count = batch_size * input_size; diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc index d79f53ccd56..f21e6d67921 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc @@ -385,6 +385,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int rank = params->rank; const int input_size = input->dims->data[1]; const int batch_size = input->dims->data[0]; + // Ensure the input size is a multiple of two. This is necessary since + // optimized kernels access the memory in chunks of two, and all accesses + // must be aligned to 16 bits. + // TODO(b/153202598): Remove when padding is allowed in TFLite tensors. + TF_LITE_ENSURE_EQ(context, input_size % 2, 0); + const int num_filters = weights_feature->dims->data[0]; TF_LITE_ENSURE_EQ(context, num_filters % rank, 0); const int num_units = num_filters / rank; diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc index 98abf0ad476..53ee9d70b64 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc @@ -282,6 +282,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int rank = params->rank; const int input_size = input->dims->data[1]; const int batch_size = input->dims->data[0]; + // Ensure the input size is a multiple of two. This is necessary since + // optimized kernels access the memory in chunks of two, and all accesses + // must be aligned to 16 bits. + // TODO(b/153202598): Remove when padding is allowed in TFLite tensors. + TF_LITE_ENSURE_EQ(context, input_size % 2, 0); + const int num_filters = weights_feature->dims->data[0]; TF_LITE_ENSURE_EQ(context, num_filters % rank, 0); const int num_units = num_filters / rank;