diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 1ba500fd61b..a1003a84201 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -69,7 +69,6 @@ cc_library( "xtensa_hifimini/quantize.cc", "xtensa_hifimini/softmax.cc", "xtensa_hifimini/svdf.cc", - "xtensa_hifimini/utils.h", ], }), hdrs = ["micro_ops.h"], diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc index 7a31eb77491..03eba5082af 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h" namespace tflite { namespace ops { @@ -66,7 +65,7 @@ void ConvPerChannel(const ConvParams& params, const int32* output_multiplier, const int output_width = output_shape.Dims(2); const int output_depth = output_shape.Dims(3); - ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset); + ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset); ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset); ae_q56s output_activation_min_56 = AE_CVTQ48A32S(output_activation_min); ae_q56s output_activation_max_56 = AE_CVTQ48A32S(output_activation_max); @@ -150,9 +149,6 @@ void ConvPerChannel(const ConvParams& params, const int32* output_multiplier, acc_24x2, output_multiplier[out_channel], output_shift[out_channel]); - // Shift from 48bit aligned to 32bit: - acc_56 = AE_Q56S_SLAI(acc_56, 16); - // Add output offset, cap activation, and assign to the output: acc_56 = AE_ADDQ56(acc_56, output_offset_56); acc_56 = AE_MINQ56S(acc_56, output_activation_max_56); @@ -178,7 +174,7 @@ inline void Conv1x32Input32x32Filter( const RuntimeShape& filter_shape, const int8* filter_data, const RuntimeShape& bias_shape, const int32* bias_data, const RuntimeShape& output_shape, int8* output_data) { - ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset); + ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset); ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset); ae_q56s output_activation_max_56 = AE_CVTQ48A32S(quantized_activation_max); ae_q56s output_activation_min_56 = AE_CVTQ48A32S(quantized_activation_min); @@ -227,13 +223,10 @@ inline void Conv1x32Input32x32Filter( acc_56 = AE_Q56S_SLAI(acc_56, 8); ae_p24x2s acc_24x2 = AE_TRUNCP24Q48(acc_56); - // Apply quantized multiplier and accumulate result at 48bit - // alignment: + // Apply quantized multiplier and accumulate result at 48bit alignment. + // Convert the (unsigned) 32-bit multiplier down to a 24-bit multiplier. acc_56 = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( - acc_24x2, output_multiplier[ch], output_shift[ch]); - - // Shift from 48bit aligned to 32bit: - acc_56 = AE_Q56S_SLAI(acc_56, 16); + acc_24x2, output_multiplier[ch] >> 8, output_shift[ch]); // Add output offset, cap activation, and assign to the output: acc_56 = AE_ADDQ56(acc_56, output_offset_56); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc index 4781f70b1fa..75bc29efdfc 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h" namespace tflite { namespace ops { @@ -69,7 +68,7 @@ inline void DepthwiseConvPerChannel( const int output_width = output_shape.Dims(2); const int output_depth = output_shape.Dims(3); - ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset); + ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset); ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset); ae_q56s output_activation_min_56 = AE_CVTQ48A32S(output_activation_min); ae_q56s output_activation_max_56 = AE_CVTQ48A32S(output_activation_max); @@ -114,14 +113,14 @@ inline void DepthwiseConvPerChannel( // shift into 24bit space. Note: value is duplicated in the HH // and LL register - but all calculations are done on the HH // side. - ae_p24x2s input_val_24x2 = AE_CONVERT_INT32_24x2(input_val); + ae_p24x2s input_val_24x2 = AE_MOVPA24(input_val); // Add input offset (24bit aligned): input_val_24x2 = AE_P24S_ADDS_P24X2S(input_val_24x2, input_offset_24x2); // Load filter 8bit value into 24bit alignment: - ae_p24x2s filter_val_24x2 = AE_CONVERT_INT32_24x2(filter_val); + ae_p24x2s filter_val_24x2 = AE_MOVPA24(filter_val); // Multiply and accumulate the HH side of each 24x24 PR // register: @@ -150,9 +149,6 @@ inline void DepthwiseConvPerChannel( acc_24x2, output_multiplier[output_channel], output_shift[output_channel]); - // Shift from 48bit aligned to 32bit: - acc_56 = AE_Q56S_SLAI(acc_56, 16); - // Add output offset, cap activation, and assign to the output: acc_56 = AE_ADDQ56(acc_56, output_offset_56); acc_56 = AE_MINQ56S(acc_56, output_activation_max_56); @@ -181,9 +177,10 @@ inline void DepthwiseConv4x32MatchingInputAndFilter( const RuntimeShape& filter_shape, const int8* filter_data, const RuntimeShape& bias_shape, const int32* bias_data, const RuntimeShape& output_shape, int8* output_data) { - const int32_t mult = output_multiplier[0]; + // Convert the (unsigned) 32-bit multiplier down to a 24-bit multiplier. + const int32_t mult = output_multiplier[0] >> 8; const int32_t shift = output_shift[0]; - ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset); + ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset); ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset); ae_q56s output_activation_min_56 = AE_CVTQ48A32S(quantized_activation_min); ae_q56s output_activation_max_56 = AE_CVTQ48A32S(quantized_activation_max); @@ -270,10 +267,6 @@ inline void DepthwiseConv4x32MatchingInputAndFilter( block_1_acc = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( acc_24x2_1, mult, shift); - // Shift from 48bit aligned to 32bit: - block_0_acc = AE_Q56S_SLAI(block_0_acc, 16); - block_1_acc = AE_Q56S_SLAI(block_1_acc, 16); - // Add output offset, cap activation, and assign to the output: block_0_acc = AE_ADDQ56(block_0_acc, output_offset_56); block_1_acc = AE_ADDQ56(block_1_acc, output_offset_56); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h b/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h index 4ffb3653f50..918192c4d8f 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h @@ -23,7 +23,6 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h" namespace tflite { namespace ops { @@ -31,80 +30,9 @@ namespace micro { namespace xtensa { namespace hifimini { -// -// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit -// aligned value in the QR register. -// -inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x, - int32_t quantized_multiplier, - int shift) { - // These boolean factors will carry an additional 2^8 (e.g 256) factor - // throughout the equation to cover the missing 8 bits of precision when a - // 32bit integer is outside the bounds of INT24. The additional scaling factor - // will be adjusted after the final multiplication in this method. - // - // The Q-notation comments in this method describe the calculations that take - // place when both |x| and the shifted value of |1| overflow the INT24 limits. - bool x_exceeds_24bits = (x <= INT24_MIN || x >= INT24_MAX); - bool shift_exceeds_24bits = false; - - // Q31.0 -> Q23.0 / 2^8 - ae_p24x2s x_24x2 = AE_CONVERT_INT32_24x2(x); - - if (shift > 0) { - int shifted = 1 << shift; - if (shifted <= INT24_MIN || shifted >= INT24_MAX) { - shift_exceeds_24bits = true; - } - - // Load the shifted value into the PR register: - // Q31.0 -> Q23.0 / 2^8 - ae_p24x2s shifted_24x2 = AE_CONVERT_INT32_24x2(shifted); - - // (Q23.0 / 2^8) * (Q23.0 / 2^8) = Q47.0 / 2^16 - ae_q56s sum_56 = AE_MULP24S_HH(x_24x2, shifted_24x2); - - // Shift left into 24bit space: - // ((Q47.0 / 2^16) << 24) = Q23.24 / 2^16 - sum_56 = AE_Q56S_SLAI(sum_56, 24); - - // Truncate and place on the PR register: - // (Q23.24 / 2^16) -> Q23.0 / 2^16 - x_24x2 = AE_TRUNCP24Q48(sum_56); - } - - // Load the quantized multiplier into the PR register. - // NOTE: This method assumes that this param has been calculated for 24bit - // space - not 32bits. - // Q0.31 -> Q0.23 - ae_p24x2s quantized_multiplier_24x2 = - AE_CONVERT_INT32_24x2(quantized_multiplier); - - // Adjust for the additional 8 bits of lost precision throughout this - // function: - int shift_amount = 23; - if (x_exceeds_24bits) { - shift_amount = shift_amount - 8; - } - if (shift_exceeds_24bits) { - shift_amount = shift_amount - 8; - } - - // Find the product of x and the quantized_multiplier and right shift - // to 48bit aligned. - // (Q23.0 / 2^16) * Q23.0 = Q47.0 / 2^16 - // (Q47.0 / 2^16) >> 7 = Q47.0 - ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2); - if (shift_amount > 0) { - result_56 = AE_Q56S_SRA(result_56, shift_amount); - } - - if (shift < 0) { - // Handle any negative shift directly on the 48 bit value. - result_56 = AE_Q56S_SRA(result_56, -shift); - } - return result_56; -} +// INT24 MIN/MAX +#define INT24_MIN -8388608 +#define INT24_MAX 8388607 // // Multiply 24bit value by a quantized multiplier (w/ shift) and returns a 48bit @@ -113,62 +41,62 @@ inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x, inline ae_q56s MultiplyByQuantizedMultiplier(ae_p24x2s x_24x2, int32_t quantized_multiplier, int shift) { - // NOTE: x_24x2 = Q23.0 - - // This is an optimized version of a 32 bit MultiplyByQuantizedMultiplier - // operation of TFLite. Sometimes, the shifted value of |x_24x2| can exceed - // the limits of INT24, which requires |AE_CONVERT_INT32_24x2()| to load the - // left-most 24 bits of a 32bit integer. When this occurs, all Q values here - // carry an additional division of 2^8 to account for this loss in precision. - // This division will be applied to the final shift after multiplication. + // A value with 1 sign bit, N integer bits and M fractional bits is + // represented as QN+1.M since the sign bit is included in the integer bits. + // + // The Q notation in this method explains the values represented in each + // variable, along with an implicit division since the quantized_multiplier + // represents a value between 0.5 and 1.0 (Q1.X-1 where X is the bit precision + // of the type). // - // The Q-notation comments in this method describe the calculations that take - // place when both |x| and the shifted value of |1| overflow the INT24 limits. - bool shift_exceeds_24bits = false; - - ae_p24x2s x_shifted_24x2 = x_24x2; - if (shift > 0) { - int shifted = 1 << shift; - if (shifted <= INT24_MIN || shifted >= INT24_MAX) { - shift_exceeds_24bits = true; - } - // Load the shifted value into the PR register: - // Q31.0 -> Q23.0 / 2^8 - ae_p24x2s shifted_24x2 = AE_CONVERT_INT32_24x2(shifted); - - // Q23.0 * (Q23.0 / 2^8) = Q47.0 / 2^8 - ae_q56s sum_56 = AE_MULP24S_HH(x_24x2, shifted_24x2); - - // Shift left into 24bit space: - // ((Q47.0 / 2^8) << 24) = Q23.24 / 2^8 - sum_56 = AE_Q56S_SLAI(sum_56, 24); - - // Truncate and place on the PR register: - // (Q23.24 / 2^8) -> Q23.0 / 2^8 - x_shifted_24x2 = AE_ROUNDSP24Q48SYM(sum_56); - } - // Load the quantized multiplier into the PR register. // NOTE: This method assumes that this param has been calculated for 24bit // space - not 32bits. - // Q0.31 -> Q0.23 - ae_p24x2s quantized_multiplier_24x2 = - AE_CONVERT_INT32_24x2(quantized_multiplier); + // Q32.0 / 2^23 -> Q24.0 / 2^23 representing a Q1.23 multiplier. + ae_p24x2s quantized_multiplier_24x2 = AE_MOVPA24(quantized_multiplier); + // Shift right by 23 - 16 bits minus the specified shift. This is because we + // keep 16 fractional bits until the end to perform rounding. Subtract shift + // since shift is a left shift, and the 23-16 is a right shift. + int shift_amount = 7 - shift; - // Find the product of x and the quantized_multiplier and right shift - // to 48bit aligned. - // NOTE: Adjust for the additional 8 bits of lost precision throughout this - // function: - // (Q23.0 / 2^8) * Q23.0 = Q47.0 / 2^8 - // (Q47.0 / 2^8) >> 7 = Q47.0 - ae_q56s result = AE_MULP24S_HH(x_shifted_24x2, quantized_multiplier_24x2); - result = AE_Q56S_SRA(result, shift_exceeds_24bits ? 15 : 23); + // Find the product of x and the quantized_multiplier. + // Q24.0 / 2^23 * Q24.0 = Q48.0 / 2^23 + // Q48.0 / 2^23 >> 7 = Q48.0 / 2^16 + ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2); - if (shift < 0) { - // Handle any negative shift directly on the 48 bit value. - result = AE_Q56S_SRA(result, -shift); + // Shift right if shift amount is positive, left if shift amount is negative. + if (shift_amount >= 0) { + result_56 = AE_Q56S_SRA(result_56, shift_amount); + } else { + result_56 = AE_Q56S_SLA(result_56, -shift_amount); } - return result; + + // Round off the bottom 16 bits. + // Q48.0 / 2^16 -> Q32.0 aligned to 48 bits. + result_56 = AE_ROUNDSQ32SYM(result_56); + return result_56; +} + +// +// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit +// aligned value in the QR register. +// +inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x, + int32_t quantized_multiplier, + int shift) { + // Convert x into a 2x24bit PR register file. If x is outside the numerical + // limits of a 24bit integer, the "fractional" or lower 8bits are discarded. + // If x is within the range of a 24 bit integer, the "signed" or upper 8bits + // are discarded. + ae_p24x2s x_24x2; + if (x > INT24_MIN && x < INT24_MAX) { + x_24x2 = AE_MOVPA24(x); + } else { + x_24x2 = static_cast(*reinterpret_cast(&x)); + shift += 8; + } + + return MultiplyByQuantizedMultiplier(x_24x2, quantized_multiplier, shift); } // @@ -193,6 +121,8 @@ inline void QuantizeMultiplier(float multiplier, int32_t* quantized_multiplier, } TFLITE_CHECK_LE(q_fixed, INT24_MAX); + // Ensure shift does not exceed 24-bit range. + TFLITE_CHECK_LE(*shift, 23); if (*shift < -23) { *shift = 0; q_fixed = 0; diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc index 7a535120216..c2c2c86fe81 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h" namespace tflite { namespace ops { @@ -108,9 +107,6 @@ inline void FullyConnected( sum_56 = MultiplyByQuantizedMultiplier(sum_24x2, output_multiplier, output_shift); - // Align from 48bit to 32bit on the QR register: - sum_56 = AE_Q56S_SLAI(sum_56, 16); - // Add output_offset and cap min/max values: sum_56 = AE_ADDQ56(sum_56, output_offset_56); sum_56 = AE_MINQ56S(sum_56, output_activation_max_56); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc index 0708ee7f973..2177bf1c363 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h" namespace tflite { namespace ops { @@ -43,7 +42,7 @@ void AffineQuantize(int scale_multiplier, const ae_p16x2s* input_data_ptr = (const ae_p16x2s*)(input_data - 2); - ae_p24x2s scale_multiplier_24x2 = AE_CONVERT_INT32_24x2(scale_multiplier); + ae_p24x2s scale_multiplier_24x2 = AE_MOVPA24(scale_multiplier); int iters = flat_size / 2; for (int i = 0; i < iters; i++) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc index 1847a4e88e8..2b14bedc01f 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc @@ -25,8 +25,6 @@ limitations under the License. #include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/micro/kernels/activation_utils.h" #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h" -#include "tensorflow/lite/micro/micro_utils.h" namespace tflite { namespace ops { @@ -99,7 +97,7 @@ void EvalIntegerSVDF( ae_q56s output_int16_max_56 = AE_CVTQ48A32S(INT16_MAX); ae_q56s output_int16_min_56 = AE_CVTQ48A32S(INT16_MIN); - ae_p24x2s input_zp_24x2 = AE_CONVERT_INT32_24x2(input_zp); + ae_p24x2s input_zp_24x2 = AE_MOVPA24(input_zp); for (int b = 0; b < n_batch; b++) { const int8_t* weight_feature_ptr = weight_feature - 2; @@ -140,8 +138,6 @@ void EvalIntegerSVDF( tflite::ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( dot_prod_24x2, scale_1_a, scale_1_b); - // Align from 48bit to 32bit on the QR register - dot_prod_56 = AE_Q56S_SLAI(dot_prod_56, 16); // Cap min/max and convert to int32: dot_prod_56 = AE_MAXQ56S(dot_prod_56, output_int16_min_56); dot_prod_56 = AE_MINQ56S(dot_prod_56, output_int16_max_56); @@ -232,8 +228,6 @@ void EvalIntegerSVDF( ae_q56s x_56 = tflite::ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( scratch_output_tensor[i], scale_2_a, scale_2_b); - // Align from 48bit to 32bit on the QR register: - x_56 = AE_Q56S_SLAI(x_56, 16); // Add output adjustment: x_56 = AE_ADDQ56(x_56, output_zp_56); // Cap min/max and convert to int32 (already aligned to 32bit): diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h b/tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h deleted file mode 100644 index 59caf4bbf2f..00000000000 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_UTILS_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_UTILS_H_ - -#include - -#include - -// INT24 MIN/MAX -#define INT24_MIN -8388608 -#define INT24_MAX 8388607 - -// Converts an int32 value into a 2x24bit PR register file. If the int32 value -// is outside the numerical limits of a 24bit integer, the "fractional" or lower -// 8bits are discarded. If the value is within the range of a 24 bit integer, -// the "signed" or upper 8bits are discarded. -inline ae_p24x2s AE_CONVERT_INT32_24x2(int32_t v) { - if (v > INT24_MIN && v < INT24_MAX) { - return *reinterpret_cast(&v); - } else { - return static_cast(*reinterpret_cast(&v)); - } -} - -// Shifts a 48bit accumulator value into 32bit space and returns the value. -#define AE_CONVERT_Q56_INT32(v) AE_TRUNCA32Q48(AE_Q56S_SLAI(v, 16)) - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_UTILS_H_