Clang format changes

This commit is contained in:
Siju Samuel 2019-03-20 21:06:15 +05:30
parent 203c4f750e
commit 96c8ad77f2
20 changed files with 75 additions and 86 deletions

View File

@ -37,7 +37,7 @@ using namespace tflite;
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if(argc != 2) { if (argc != 2) {
fprintf(stderr, "minimal <tflite model>\n"); fprintf(stderr, "minimal <tflite model>\n");
return 1; return 1;
} }

View File

@ -22,6 +22,6 @@ extern "C" void DebugLog(const char* s) {
"mov r1, %[str]\n" "mov r1, %[str]\n"
"bkpt #0xAB\n" "bkpt #0xAB\n"
: :
: [ str ] "r"(s) : [str] "r"(s)
: "r0", "r1"); : "r0", "r1");
} }

View File

@ -23,7 +23,7 @@ void FftWriteMemmapPreamble(FILE* fp, const struct FftState* state) {
} }
void FftWriteMemmap(FILE* fp, const struct FftState* state, void FftWriteMemmap(FILE* fp, const struct FftState* state,
const char* variable) { const char* variable) {
fprintf(fp, "%s->input = fft_input;\n", variable); fprintf(fp, "%s->input = fft_input;\n", variable);
fprintf(fp, "%s->output = fft_output;\n", variable); fprintf(fp, "%s->output = fft_output;\n", variable);
fprintf(fp, "%s->fft_size = %zu;\n", variable, state->fft_size); fprintf(fp, "%s->fft_size = %zu;\n", variable, state->fft_size);

View File

@ -68,4 +68,3 @@ void FftFreeStateContents(struct FftState* state) {
free(state->output); free(state->output);
free(state->scratch); free(state->scratch);
} }

View File

@ -53,8 +53,8 @@ void FilterbankAccumulateChannels(struct FilterbankState* state,
const int width = *channel_widths++; const int width = *channel_widths++;
int j; int j;
for (j = 0; j < width; ++j) { for (j = 0; j < width; ++j) {
weight_accumulator += *weights++ * ((uint64_t) *magnitudes); weight_accumulator += *weights++ * ((uint64_t)*magnitudes);
unweight_accumulator += *unweights++ * ((uint64_t) *magnitudes); unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes);
++magnitudes; ++magnitudes;
} }
*work++ = weight_accumulator; *work++ = weight_accumulator;
@ -93,7 +93,7 @@ static uint32_t Sqrt64(uint64_t num) {
// clear. This will cause a slight off by one issue for numbers close to 2^32, // clear. This will cause a slight off by one issue for numbers close to 2^32,
// but it probably isn't going to matter (and gives us a big performance win). // but it probably isn't going to matter (and gives us a big performance win).
if ((num >> 32) == 0) { if ((num >> 32) == 0) {
return Sqrt32((uint32_t) num); return Sqrt32((uint32_t)num);
} }
uint64_t res = 0; uint64_t res = 0;
int max_bit_number = 64 - MostSignificantBit64(num); int max_bit_number = 64 - MostSignificantBit64(num);
@ -121,12 +121,12 @@ uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
const int64_t* work = state->work + 1; const int64_t* work = state->work + 1;
// Reuse the work buffer since we're fine clobbering it at this point to hold // Reuse the work buffer since we're fine clobbering it at this point to hold
// the output. // the output.
uint32_t* output = (uint32_t*) state->work; uint32_t* output = (uint32_t*)state->work;
int i; int i;
for (i = 0; i < num_channels; ++i) { for (i = 0; i < num_channels; ++i) {
*output++ = Sqrt64(*work++) >> scale_down_shift; *output++ = Sqrt64(*work++) >> scale_down_shift;
} }
return (uint32_t*) state->work; return (uint32_t*)state->work;
} }
void FilterbankReset(struct FilterbankState* state) { void FilterbankReset(struct FilterbankState* state) {

View File

@ -40,22 +40,22 @@ static void CalculateCenterFrequencies(const int num_channels,
const float mel_low = FreqToMel(lower_frequency_limit); const float mel_low = FreqToMel(lower_frequency_limit);
const float mel_hi = FreqToMel(upper_frequency_limit); const float mel_hi = FreqToMel(upper_frequency_limit);
const float mel_span = mel_hi - mel_low; const float mel_span = mel_hi - mel_low;
const float mel_spacing = mel_span / ((float) num_channels); const float mel_spacing = mel_span / ((float)num_channels);
int i; int i;
for (i = 0; i < num_channels; ++i) { for (i = 0; i < num_channels; ++i) {
center_frequencies[i] = mel_low + (mel_spacing * (i + 1)); center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
} }
} }
static void QuantizeFilterbankWeights(const float float_weight, static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
int16_t* weight, int16_t* unweight) { int16_t* unweight) {
*weight = floor(float_weight * (1 << kFilterbankBits) + 0.5); *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
*unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5); *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
} }
int FilterbankPopulateState(const struct FilterbankConfig* config, int FilterbankPopulateState(const struct FilterbankConfig* config,
struct FilterbankState* state, struct FilterbankState* state, int sample_rate,
int sample_rate, int spectrum_size) { int spectrum_size) {
state->num_channels = config->num_channels; state->num_channels = config->num_channels;
const int num_channels_plus_1 = config->num_channels + 1; const int num_channels_plus_1 = config->num_channels + 1;
@ -81,10 +81,8 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
malloc(num_channels_plus_1 * sizeof(*actual_channel_widths)); malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
if (state->channel_frequency_starts == NULL || if (state->channel_frequency_starts == NULL ||
state->channel_weight_starts == NULL || state->channel_weight_starts == NULL || state->channel_widths == NULL ||
state->channel_widths == NULL || center_mel_freqs == NULL || actual_channel_starts == NULL ||
center_mel_freqs == NULL ||
actual_channel_starts == NULL ||
actual_channel_widths == NULL) { actual_channel_widths == NULL) {
free(center_mel_freqs); free(center_mel_freqs);
free(actual_channel_starts); free(actual_channel_starts);
@ -97,7 +95,7 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
config->upper_band_limit, center_mel_freqs); config->upper_band_limit, center_mel_freqs);
// Always exclude DC. // Always exclude DC.
const float hz_per_sbin = 0.5 * sample_rate / ((float) spectrum_size - 1); const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1);
state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin; state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
state->end_index = 0; // Initialized to zero here, but actually set below. state->end_index = 0; // Initialized to zero here, but actually set below.
@ -115,7 +113,7 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
for (chan = 0; chan < num_channels_plus_1; ++chan) { for (chan = 0; chan < num_channels_plus_1; ++chan) {
// Keep jumping frequencies until we overshoot the bound on this channel. // Keep jumping frequencies until we overshoot the bound on this channel.
int freq_index = chan_freq_index_start; int freq_index = chan_freq_index_start;
while (FreqToMel((freq_index) * hz_per_sbin) <= center_mel_freqs[chan]) { while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
++freq_index; ++freq_index;
} }
@ -146,8 +144,7 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
// alignment? // alignment?
const int aligned_start = const int aligned_start =
(chan_freq_index_start / index_alignment) * index_alignment; (chan_freq_index_start / index_alignment) * index_alignment;
const int aligned_width = const int aligned_width = (chan_freq_index_start - aligned_start + width);
(chan_freq_index_start - aligned_start + width);
const int padded_width = const int padded_width =
(((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) * (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
kFilterbankChannelBlockSize; kFilterbankChannelBlockSize;

View File

@ -37,7 +37,7 @@ struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
FftCompute(&state->fft, state->window.output, input_shift); FftCompute(&state->fft, state->window.output, input_shift);
// We can re-ruse the fft's output buffer to hold the energy. // We can re-ruse the fft's output buffer to hold the energy.
int32_t* energy = (int32_t*) state->fft.output; int32_t* energy = (int32_t*)state->fft.output;
FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output, FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
energy); energy);

View File

@ -31,7 +31,6 @@ int main(int argc, char** argv) {
return 1; return 1;
} }
FILE* fp = fopen(filename, "r"); FILE* fp = fopen(filename, "r");
if (fp == NULL) { if (fp == NULL) {
fprintf(stderr, "Failed to open %s for read\n", filename); fprintf(stderr, "Failed to open %s for read\n", filename);

View File

@ -14,8 +14,8 @@ limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <stdio.h> #include <stdio.h>
#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
#include "memmap.h" #include "memmap.h"
#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
int main(int argc, char** argv) { int main(int argc, char** argv) {
struct FrontendState* frontend_state = GetFrontendStateMemmap(); struct FrontendState* frontend_state = GetFrontendStateMemmap();

View File

@ -57,12 +57,10 @@ int FrontendPopulateState(const struct FrontendConfig* config,
int input_correction_bits = int input_correction_bits =
MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2); MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
if (!PcanGainControlPopulateState(&config->pcan_gain_control, if (!PcanGainControlPopulateState(
&state->pcan_gain_control, &config->pcan_gain_control, &state->pcan_gain_control,
state->noise_reduction.estimate, state->noise_reduction.estimate, state->filterbank.num_channels,
state->filterbank.num_channels, state->noise_reduction.smoothing_bits, input_correction_bits)) {
state->noise_reduction.smoothing_bits,
input_correction_bits)) {
fprintf(stderr, "Failed to populate pcan gain control state\n"); fprintf(stderr, "Failed to populate pcan gain control state\n");
return 0; return 0;
} }

View File

@ -37,7 +37,7 @@ static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
// Part 2 // Part 2
const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2); const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
const uint32_t seg_unit = const uint32_t seg_unit =
(((uint32_t) 1) << kLogScaleLog2) >> kLogSegmentsLog2; (((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2;
const int32_t c0 = kLogLut[base_seg]; const int32_t c0 = kLogLut[base_seg];
const int32_t c1 = kLogLut[base_seg + 1]; const int32_t c1 = kLogLut[base_seg + 1];
@ -51,8 +51,7 @@ static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
const uint32_t fraction = Log2FractionPart(x, integer); const uint32_t fraction = Log2FractionPart(x, integer);
const uint32_t log2 = (integer << kLogScaleLog2) + fraction; const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
const uint32_t round = kLogScale / 2; const uint32_t round = kLogScale / 2;
const uint32_t loge = const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2;
(((uint64_t) kLogCoeff) * log2 + round) >> kLogScaleLog2;
// Finally scale to our output scale // Finally scale to our output scale
const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2; const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
return loge_scaled; return loge_scaled;
@ -61,7 +60,7 @@ static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal, uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
int signal_size, int correction_bits) { int signal_size, int correction_bits) {
const int scale_shift = state->scale_shift; const int scale_shift = state->scale_shift;
uint16_t* output = (uint16_t*) signal; uint16_t* output = (uint16_t*)signal;
uint16_t* ret = output; uint16_t* ret = output;
int i; int i;
for (i = 0; i < signal_size; ++i) { for (i = 0; i < signal_size; ++i) {

View File

@ -26,8 +26,8 @@ void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
// Update the estimate of the noise. // Update the estimate of the noise.
const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits; const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
uint32_t estimate = uint32_t estimate =
(((uint64_t) signal_scaled_up * smoothing) + (((uint64_t)signal_scaled_up * smoothing) +
((uint64_t) state->estimate[i] * one_minus_smoothing)) >> ((uint64_t)state->estimate[i] * one_minus_smoothing)) >>
kNoiseReductionBits; kNoiseReductionBits;
state->estimate[i] = estimate; state->estimate[i] = estimate;
@ -37,10 +37,10 @@ void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
} }
const uint32_t floor = const uint32_t floor =
((uint64_t) signal[i] * state->min_signal_remaining) >> ((uint64_t)signal[i] * state->min_signal_remaining) >>
kNoiseReductionBits; kNoiseReductionBits;
const uint32_t subtracted = (signal_scaled_up - estimate) >> const uint32_t subtracted =
state->smoothing_bits; (signal_scaled_up - estimate) >> state->smoothing_bits;
const uint32_t output = subtracted > floor ? subtracted : floor; const uint32_t output = subtracted > floor ? subtracted : floor;
signal[i] = output; signal[i] = output;
} }

View File

@ -24,17 +24,16 @@ int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
const int16_t interval = MostSignificantBit32(x); const int16_t interval = MostSignificantBit32(x);
lut += 4 * interval - 6; lut += 4 * interval - 6;
const int16_t frac = ((interval < 11) const int16_t frac =
? (x << (11 - interval)) ((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
: (x >> (interval - 11)) 0x3FF;
) & 0x3FF;
int32_t result = ((int32_t) lut[2] * frac) >> 5; int32_t result = ((int32_t)lut[2] * frac) >> 5;
result += ((int32_t) lut[1]) << 5; result += ((int32_t)lut[1]) << 5;
result *= frac; result *= frac;
result = (result + (1 << 14)) >> 15; result = (result + (1 << 14)) >> 15;
result += lut[0]; result += lut[0];
return (int16_t) result; return (int16_t)result;
} }
uint32_t PcanShrink(const uint32_t x) { uint32_t PcanShrink(const uint32_t x) {
@ -49,9 +48,9 @@ void PcanGainControlApply(struct PcanGainControlState* state,
uint32_t* signal) { uint32_t* signal) {
int i; int i;
for (i = 0; i < state->num_channels; ++i) { for (i = 0; i < state->num_channels; ++i) {
const uint32_t gain = WideDynamicFunction(state->noise_estimate[i], const uint32_t gain =
state->gain_lut); WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
const uint32_t snr = ((uint64_t) signal[i] * gain) >> state->snr_shift; const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift;
signal[i] = PcanShrink(snr); signal[i] = PcanShrink(snr);
} }
} }

View File

@ -29,14 +29,15 @@ void PcanGainControlFillConfigWithDefaults(
int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config, int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
int32_t input_bits, uint32_t x) { int32_t input_bits, uint32_t x) {
const float x_as_float = ((float) x) / ((uint32_t) 1 << input_bits); const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits);
const float gain_as_float = ((uint32_t) 1 << config->gain_bits) * const float gain_as_float =
((uint32_t)1 << config->gain_bits) *
powf(x_as_float + config->offset, -config->strength); powf(x_as_float + config->offset, -config->strength);
if (gain_as_float > kint16max) { if (gain_as_float > kint16max) {
return kint16max; return kint16max;
} }
return (int16_t) (gain_as_float + 0.5f); return (int16_t)(gain_as_float + 0.5f);
} }
int PcanGainControlPopulateState(const struct PcanGainControlConfig* config, int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
@ -64,23 +65,23 @@ int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
state->gain_lut -= 6; state->gain_lut -= 6;
int interval; int interval;
for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) { for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
const uint32_t x0 = (uint32_t) 1 << (interval - 1); const uint32_t x0 = (uint32_t)1 << (interval - 1);
const uint32_t x1 = x0 + (x0 >> 1); const uint32_t x1 = x0 + (x0 >> 1);
const uint32_t x2 = (interval == kWideDynamicFunctionBits) const uint32_t x2 =
? x0 + (x0 - 1) : 2 * x0; (interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0); const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1); const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2); const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
const int32_t diff1 = (int32_t) y1 - y0; const int32_t diff1 = (int32_t)y1 - y0;
const int32_t diff2 = (int32_t) y2 - y0; const int32_t diff2 = (int32_t)y2 - y0;
const int32_t a1 = 4 * diff1 - diff2; const int32_t a1 = 4 * diff1 - diff2;
const int32_t a2 = diff2 - a1; const int32_t a2 = diff2 - a1;
state->gain_lut[4 * interval] = y0; state->gain_lut[4 * interval] = y0;
state->gain_lut[4 * interval + 1] = (int16_t) a1; state->gain_lut[4 * interval + 1] = (int16_t)a1;
state->gain_lut[4 * interval + 2] = (int16_t) a2; state->gain_lut[4 * interval + 2] = (int16_t)a2;
} }
state->gain_lut += 6; state->gain_lut += 6;
return 1; return 1;

View File

@ -43,7 +43,7 @@ int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
int16_t max_abs_output_value = 0; int16_t max_abs_output_value = 0;
for (i = 0; i < size; ++i) { for (i = 0; i < size; ++i) {
int16_t new_value = int16_t new_value =
(((int32_t) *input++) * *coefficients++) >> kFrontendWindowBits; (((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits;
*output++ = new_value; *output++ = new_value;
if (new_value < 0) { if (new_value < 0) {
new_value = -new_value; new_value = -new_value;

View File

@ -29,15 +29,14 @@ int WindowPopulateState(const struct WindowConfig* config,
state->size = config->size_ms * sample_rate / 1000; state->size = config->size_ms * sample_rate / 1000;
state->step = config->step_size_ms * sample_rate / 1000; state->step = config->step_size_ms * sample_rate / 1000;
state->coefficients = malloc( state->coefficients = malloc(state->size * sizeof(*state->coefficients));
state->size * sizeof(*state->coefficients));
if (state->coefficients == NULL) { if (state->coefficients == NULL) {
fprintf(stderr, "Failed to allocate window coefficients\n"); fprintf(stderr, "Failed to allocate window coefficients\n");
return 0; return 0;
} }
// Populate the window values. // Populate the window values.
const float arg = M_PI * 2.0 / ((float) state->size); const float arg = M_PI * 2.0 / ((float)state->size);
int i; int i;
for (i = 0; i < state->size; ++i) { for (i = 0; i < state->size; ++i) {
float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5))); float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
@ -47,15 +46,13 @@ int WindowPopulateState(const struct WindowConfig* config,
} }
state->input_used = 0; state->input_used = 0;
state->input = malloc( state->input = malloc(state->size * sizeof(*state->input));
state->size * sizeof(*state->input));
if (state->input == NULL) { if (state->input == NULL) {
fprintf(stderr, "Failed to allocate window input\n"); fprintf(stderr, "Failed to allocate window input\n");
return 0; return 0;
} }
state->output = malloc( state->output = malloc(state->size * sizeof(*state->output));
state->size * sizeof(*state->output));
if (state->output == NULL) { if (state->output == NULL) {
fprintf(stderr, "Failed to allocate window output\n"); fprintf(stderr, "Failed to allocate window output\n");
return 0; return 0;

View File

@ -180,11 +180,11 @@ const int8_t* ShuffleVectors(const int8_t* vectors, const int n_batch,
"st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n" "st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n"
"st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n" "st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n"
: [ unshuffled_vec0_ptr ] "+r"(unshuffled_vec0_ptr), : [unshuffled_vec0_ptr] "+r"(unshuffled_vec0_ptr),
[ unshuffled_vec1_ptr ] "+r"(unshuffled_vec1_ptr), [unshuffled_vec1_ptr] "+r"(unshuffled_vec1_ptr),
[ unshuffled_vec2_ptr ] "+r"(unshuffled_vec2_ptr), [unshuffled_vec2_ptr] "+r"(unshuffled_vec2_ptr),
[ unshuffled_vec3_ptr ] "+r"(unshuffled_vec3_ptr), [unshuffled_vec3_ptr] "+r"(unshuffled_vec3_ptr),
[ shuffled_vectors_ptr ] "+r"(shuffled_vectors_ptr) [shuffled_vectors_ptr] "+r"(shuffled_vectors_ptr)
: :
: "v0", "v1", "v2", "v3", "cc", "memory"); : "v0", "v1", "v2", "v3", "cc", "memory");
} }
@ -297,11 +297,11 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate(
"st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
"st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
"st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
: [ mat_ptr0 ] "+r"(mat_ptr0), [ mat_ptr1 ] "+r"(mat_ptr1), : [mat_ptr0] "+r"(mat_ptr0), [mat_ptr1] "+r"(mat_ptr1),
[ vec_ptr ] "+r"(vec_ptr), [ result_ptr ] "+r"(result_ptr) [vec_ptr] "+r"(vec_ptr), [result_ptr] "+r"(result_ptr)
: [ mat_ptr0_end ] "r"(mat_ptr0_end), : [mat_ptr0_end] "r"(mat_ptr0_end),
[ scaling_factors_ptr ] "r"(scaling_factors_ptr), [scaling_factors_ptr] "r"(scaling_factors_ptr),
[ wide_rows ] "r"(wide_rows) [wide_rows] "r"(wide_rows)
: "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "cc", "memory"); "v10", "v11", "v12", "v13", "cc", "memory");
} }
@ -363,9 +363,9 @@ static void DotprodSparseMatrixBatchVectorMultiplyAccumulate(
// We have to be careful to cast this value to 32 bits in order // We have to be careful to cast this value to 32 bits in order
// to interpret the sign bit properly. // to interpret the sign bit properly.
"mov %[row_sum], v1.d[0]\n" "mov %[row_sum], v1.d[0]\n"
: [ row_sum ] "=r"(row_sum), [ ledger_ptr ] "+r"(ledger_ptr), : [row_sum] "=r"(row_sum), [ledger_ptr] "+r"(ledger_ptr),
[ mat_ptr ] "+r"(mat_ptr), [ vec_ptr ] "+r"(vec_ptr) [mat_ptr] "+r"(mat_ptr), [vec_ptr] "+r"(vec_ptr)
: [ ledger_end ] "r"(ledger_end) : [ledger_end] "r"(ledger_end)
: "x0", "x1", "x7", "x8", "v0", "v1", "v8", "v9", "cc", "memory"); : "x0", "x1", "x7", "x8", "v0", "v1", "v8", "v9", "cc", "memory");
} }
result[(batch * m_rows + row) * result_stride] += result[(batch * m_rows + row) * result_stride] +=

View File

@ -59,11 +59,11 @@ limitations under the License.
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <farmhash.h>
#include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/c_api_internal.h" #include "tensorflow/lite/c/c_api_internal.h"
#include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/kernels/op_macros.h"
#include <farmhash.h>
namespace tflite { namespace tflite {
namespace ops { namespace ops {

View File

@ -85,7 +85,7 @@ struct MinimumOp {
template <typename data_type, typename op_type> template <typename data_type, typename op_type>
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
const OpContext& op_context) { const OpContext& op_context) {
reference_ops::MaximumMinimumBroadcast4DSlow( reference_ops::MaximumMinimumBroadcast4DSlow(
GetTensorShape(op_context.input1), GetTensorShape(op_context.input1),
GetTensorData<data_type>(op_context.input1), GetTensorData<data_type>(op_context.input1),
@ -112,7 +112,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLiteOperation<int8_t, OpType>(context, node, op_context); TFLiteOperation<int8_t, OpType>(context, node, op_context);
break; break;
case kTfLiteInt32: case kTfLiteInt32:
TFLiteOperation<int32_t, OpType>(context, node, op_context); TFLiteOperation<int32_t, OpType>(context, node, op_context);
break; break;
case kTfLiteInt64: case kTfLiteInt64:
TFLiteOperation<int64_t, OpType>(context, node, op_context); TFLiteOperation<int64_t, OpType>(context, node, op_context);

View File

@ -296,8 +296,8 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
GetTensorData<data_type>(input1), GetTensorShape(input2), \ GetTensorData<data_type>(input1), GetTensorShape(input2), \
GetTensorData<data_type>(input2), GetTensorShape(output), \ GetTensorData<data_type>(input2), GetTensorShape(output), \
GetTensorData<data_type>(output)) GetTensorData<data_type>(output))
// NOTE: We are using the add kernels. This is possible as the second values // NOTE: We are using the add kernels. This is possible as the second values
// multiplier is negated before being passed down. // multiplier is negated before being passed down.
if (output->type == kTfLiteInt8) { if (output->type == kTfLiteInt8) {
if (need_broadcast) { if (need_broadcast) {
TF_LITE_SUB(reference_integer_ops, BroadcastAdd4DSlow, int8_t); TF_LITE_SUB(reference_integer_ops, BroadcastAdd4DSlow, int8_t);