From e2ea0b8ef4f1b8874c38d682237059aeadd74a14 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Aug 2019 11:28:05 -0700 Subject: [PATCH] Added support for microspeech in NXP FRDM-K66F MCU PiperOrigin-RevId: 261162225 --- tensorflow/lite/experimental/micro/README.md | 64 ++- .../micro_speech/nxp_k66f/audio_provider.cc | 380 ++++++++++++++++++ 2 files changed, 443 insertions(+), 1 deletion(-) create mode 100644 tensorflow/lite/experimental/micro/examples/micro_speech/nxp_k66f/audio_provider.cc diff --git a/tensorflow/lite/experimental/micro/README.md b/tensorflow/lite/experimental/micro/README.md index b70aeb60515..c5e69c9aa66 100644 --- a/tensorflow/lite/experimental/micro/README.md +++ b/tensorflow/lite/experimental/micro/README.md @@ -18,6 +18,7 @@ detection model, takes up a total of 22KB. * [Building for Ambiq Micro Apollo3Blue EVB using Make](#building-for-ambiq-micro-apollo3blue-evb-using-make) * [Additional Apollo3 Instructions](#additional-apollo3-instructions) * [Building for the Eta Compute ECM3531 EVB using Make](#Building-for-the-Eta-Compute-ECM3531-EVB-using-Make) + * [Building for NXP FRDM K66F EVB using mbed](#Building-for-NXP-FRDM-K66F-using-mbed) - [Goals](#goals) @@ -341,7 +342,7 @@ To flash a part with JFlash Lite, do the following: to down load the Tensorflow source code and the support libraries \(but do not run the make command shown there.\) 2. Download the Eta Compute SDK, version 0.0.17. Contact info@etacompute.com -3. You will need the the Arm compiler arm-none-eabi-gcc, version 7.3.1 +3. You will need the Arm compiler arm-none-eabi-gcc, version 7.3.1 20180622, release ARM/embedded-7-branch revision 261907, 7-2018-q2-update. This compiler is downloaded through make. 4. Edit the file @@ -379,6 +380,67 @@ To flash a part with JFlash Lite, do the following: tensorflow/lite/experimental/micro/tools/make/targets/ecm3531 \     ./flash_program executable_name to load into flash. +## Building for NXP FRDM K66F using mbed + +1. Follow the instructions at + [Tensorflow Micro Speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech#getting-started) + to download the Tensorflow source code and the support libraries +2. Follow instructions from [mbed website](https://os.mbed.com/docs/mbed-os/v5.13/tools/installation-and-setup.html) to setup and install mbed CLI +3. Compile tensorflow with the following command to generate mbed project: + + ``` + make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=mbed TAGS="nxp_k66f" generate_micro_speech_mbed_project + ``` +4. Go to the location of the generated project. The generated project is usally + in tensorflow/lite/experimental/micro/tools/make/gen/mbed_cortex-m4/prj/micro_speech/mbed +5. Create a mbed project using the generated files: ```mbed new .``` +6. Change the project setting to use C++ 11 rather than C++ 14 using + + ``` + python -c 'import fileinput, glob; + for filename in glob.glob("mbed-os/tools/profiles/*.json"): + for line in fileinput.input(filename, inplace=True): + print line.replace("\"-std=gnu++14\"","\"-std=c++11\", \"-fpermissive\"")' + ``` +7. To compile project, use the following command: + + ``` + mbed compile --target K66F --toolchain GCC_ARM --profile release + ``` +8. For some mbed compliers, you may get compile error in mbed_rtc_time.cpp. + Go to mbed-os/platform/mbed_rtc_time.h and comment line 32 and line 37 + + ``` + //#if !defined(__GNUC__) || defined(__CC_ARM) || defined(__clang__) + struct timeval { + time_t tv_sec; + int32_t tv_usec; + }; + //#endif + ``` +9. Look at helpful resources from NXP website such as [NXP FRDM-K66F User guide](https://www.nxp.com/docs/en/user-guide/FRDMK66FUG.pdf) and [NXP FRDM-K66F Getting Started](https://www.nxp.com/document/guide/get-started-with-the-frdm-k66f:NGS-FRDM-K66F) + to understand information about the board. +10. Connect USB cable to micro USB port. When ethernet port is face towards you, + The micro USB port is left of the ethernet port. +11. To compile and flash in a single step, add --flash option: + + ``` + mbed compile --target K66F --toolchain GCC_ARM --profile release --flash + ``` +12. Disconnect USB cable from the device to power down the device and connect + back the power cable to start running the model +13. Connect to serial port with baud rate of 9600 and correct serial device + to view the output from the MCU. In linux, you can run the following screen + command if the serial device is /dev/ttyACM0 + + ``` + sudo screen /dev/ttyACM0 9600 + ``` +14. Saying "Yes" will print "Yes" and "No" will print "No" on the serial port +15. A loopback path from microphone to headset jack is enabled. Headset jack is + in black color. If there is no output on the serial port, you can connect + headphone to headphone port to check if audio loopback path is working + ## Implement target optimized kernels The reference kernels in tensorflow/lite/experimental/micro/kernels are diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/nxp_k66f/audio_provider.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/nxp_k66f/audio_provider.cc new file mode 100644 index 00000000000..55267e5ad50 --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/micro_speech/nxp_k66f/audio_provider.cc @@ -0,0 +1,380 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// TensorFlow Headers +#include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h" + +#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h" + +// mbed and NXP FRDM-K66F Headers +#include "fsl_clock_config.h" // NOLINT +#include "fsl_common.h" // NOLINT +#include "fsl_dmamux.h" // NOLINT +#include "fsl_edma.h" // NOLINT +#include "fsl_gpio.h" // NOLINT +#include "fsl_i2c.h" // NOLINT +#include "fsl_lmem_cache.h" // NOLINT +#include "fsl_port.h" // NOLINT +#include "fsl_sai.h" // NOLINT +#include "fsl_sai_edma.h" // NOLINT +#include "mbed.h" // NOLINT + +// Compiler pragma for alignment of data to make efficient use of DMA +#if (defined(__ICCARM__)) +#if ((!(defined(FSL_FEATURE_HAS_NO_NONCACHEABLE_SECTION) && \ + FSL_FEATURE_HAS_NO_NONCACHEABLE_SECTION)) && \ + defined(FSL_FEATURE_L1ICACHE_LINESIZE_BYTE)) +#define AT_NONCACHEABLE_SECTION_ALIGN(var, alignbytes) \ + SDK_PRAGMA(data_alignment = alignbytes) var @"NonCacheable" +#else +#define AT_NONCACHEABLE_SECTION_ALIGN(var, alignbytes) \ + SDK_PRAGMA(data_alignment = alignbytes) var +#endif +#elif (defined(__CC_ARM) || defined(__ARMCC_VERSION)) +#if ((!(defined(FSL_FEATURE_HAS_NO_NONCACHEABLE_SECTION) && \ + FSL_FEATURE_HAS_NO_NONCACHEABLE_SECTION)) && \ + defined(FSL_FEATURE_L1ICACHE_LINESIZE_BYTE)) +#define AT_NONCACHEABLE_SECTION_ALIGN(var, alignbytes) \ + __attribute__((section("NonCacheable"), zero_init)) \ + __attribute__((aligned(alignbytes))) var +#else +#define AT_NONCACHEABLE_SECTION_ALIGN(var, alignbytes) \ + __attribute__((aligned(alignbytes))) var +#endif +#elif (defined(__GNUC__)) +#if ((!(defined(FSL_FEATURE_HAS_NO_NONCACHEABLE_SECTION) && \ + FSL_FEATURE_HAS_NO_NONCACHEABLE_SECTION)) && \ + defined(FSL_FEATURE_L1ICACHE_LINESIZE_BYTE)) +#define AT_NONCACHEABLE_SECTION_ALIGN(var, alignbytes) \ + __attribute__((section("NonCacheable,\"aw\",%nobits @"))) var \ + __attribute__((aligned(alignbytes))) +#else +#define AT_NONCACHEABLE_SECTION_ALIGN(var, alignbytes) \ + var __attribute__((aligned(alignbytes))) +#endif +#else +#error Toolchain not supported. +#define AT_NONCACHEABLE_SECTION_ALIGN(var, alignbytes) var +#endif + +namespace { + +// Buffer configuration for receiving audio data +constexpr int kNoOfSamples = 512; +constexpr int kBufferSize = kNoOfSamples * 2; +constexpr int kNoOfBuffers = 4; +constexpr int kOverSampleRate = 384; + +// Buffer management +AT_NONCACHEABLE_SECTION_ALIGN( + static int16_t g_rx_buffer[kNoOfBuffers * kNoOfSamples], 4); +sai_edma_handle_t g_tx_sai_handle; +sai_edma_handle_t g_rx_sai_handle; +static volatile uint32_t g_tx_index = 0; +static volatile uint32_t g_rx_index = 0; +edma_handle_t g_tx_dma_handle = {0}; +edma_handle_t g_rx_dma_handle = {0}; +sai_transfer_t g_sai_transfer; + +bool g_is_audio_initialized = false; +constexpr int kAudioCaptureBufferSize = kAudioSampleFrequency * 0.5; +int16_t g_audio_capture_buffer[kAudioCaptureBufferSize]; +int16_t g_audio_output_buffer[kMaxAudioSampleSize]; +int32_t g_latest_audio_timestamp = 0; + +// DA7212 configuration +constexpr int da7212ConfigurationSize = 48; +constexpr int da7212I2cAddress = 0x1A; +volatile uint8_t g_da7212_register_config[da7212ConfigurationSize][2] = { + {0x21, 0x10}, // Set DIG_ROUTING_DAI to ADC right and ADC left + {0x22, 0x05}, // Set Sampling rate to 16 KHz + {0x23, 0x08}, // Enable master bias + {0x24, 0x00}, // Clear PLL Fractional division top + {0x25, 0x00}, // Clear PLL Fractional division bottom + {0x26, 0x20}, // Set PLL Integer division to 32 + {0x27, 0x80}, // Set PLL input range to 2-10 MHz,system clock is PLL output + {0x28, 0x01}, // 64 BCLK per WCLK and S + {0x29, 0xC0}, // I2S 16-bit per channel, output is driven, DAI enable + {0x2A, 0x32}, // One stream for left and another for right + {0x45, 0x67}, // Set DAC Gain to 6 dB + {0x46, 0x67}, // Set DAC Gain to 6 dB + {0x47, 0xF1}, // Enable charge pump + {0x4B, 0x08}, // DAC_L selected + {0x4C, 0x08}, // DAC_R selected + {0x69, 0xA0}, // Enable DAC_L + {0x6A, 0xA0}, // Enable DAC_R + {0x6B, 0xB8}, // Enable HP_L + {0x6C, 0xB8}, // Enable HP_R + {0x6E, 0x98}, // Enable MIXOUT_L + {0x6F, 0x98}, // Enable MIXOUT_R + {0x95, 0x32}, {0xE0, 0x00}, {0x32, 0x80}, // Enable MIC + {0x33, 0x80}, // Enable MIC + {0x34, 0x03}, // Add MXIN Gain + {0x35, 0x03}, // Add MXIN Gain + {0x36, 0x78}, // Add ADC Gain + {0x37, 0x78}, // Add ADC Gain + {0x60, 0xB0}, {0x61, 0xB0}, {0x65, 0x88}, {0x66, 0x88}, {0x67, 0xA0}, + {0x68, 0xA0}, {0x62, 0xA9}, {0x50, 0xFE}, {0x51, 0xF7}, {0x93, 0x07}, + {0x3A, 0x04}, {0x64, 0x84}, {0x39, 0x01}, {0x63, 0x80}, {0x38, 0x88}, + {0x24, 0x00}, {0x25, 0x00}, {0x26, 0x20}, {0x20, 0x80}}; + +// Save audio samples into intermediate buffer +void CaptureSamples(const int16_t *sample_data) { + const int sample_size = kNoOfSamples; + const int32_t time_in_ms = + g_latest_audio_timestamp + (sample_size / (kAudioSampleFrequency / 1000)); + + const int32_t start_sample_offset = + g_latest_audio_timestamp * (kAudioSampleFrequency / 1000); + for (int i = 0; i < sample_size; ++i) { + const int capture_index = + (start_sample_offset + i) % kAudioCaptureBufferSize; + g_audio_capture_buffer[capture_index] = sample_data[i]; + } + // This is how we let the outside world know that new audio data has arrived. + g_latest_audio_timestamp = time_in_ms; +} + +// Callback function for SAI RX EDMA transfer complete +static void SaiRxCallback(I2S_Type *base, sai_edma_handle_t *handle, + status_t status, void *userData) { + if (kStatus_SAI_RxError == status) { + // Handle the error + } else { + // Save audio data into intermediate buffer + CaptureSamples( + reinterpret_cast(g_rx_buffer + g_tx_index * kNoOfSamples)); + + // Submit received audio buffer to SAI TX for audio loopback debug + g_sai_transfer.data = (uint8_t *)(g_rx_buffer + g_tx_index * kNoOfSamples); + g_sai_transfer.dataSize = kBufferSize; + if (kStatus_Success == + SAI_TransferSendEDMA(I2S0, &g_tx_sai_handle, &g_sai_transfer)) { + g_tx_index++; + } + if (g_tx_index == kNoOfBuffers) { + g_tx_index = 0U; + } + + // Submit buffer to SAI RX to receive audio data + g_sai_transfer.data = (uint8_t *)(g_rx_buffer + g_rx_index * kNoOfSamples); + g_sai_transfer.dataSize = kBufferSize; + if (kStatus_Success == + SAI_TransferReceiveEDMA(I2S0, &g_rx_sai_handle, &g_sai_transfer)) { + g_rx_index++; + } + if (g_rx_index == kNoOfBuffers) { + g_rx_index = 0U; + } + } +} + +// Callback function for TX Buffer transfer +static void SaiTxCallback(I2S_Type *base, sai_edma_handle_t *handle, + status_t status, void *userData) { + if (kStatus_SAI_TxError == status) { + // Handle the error + } + // Do nothing +} + +// Initialize MCU pins +void McuInitializePins(void) { + // Port B Clock Gate Control: Clock enabled + CLOCK_EnableClock(kCLOCK_PortB); + // Port C Clock Gate Control: Clock enabled + CLOCK_EnableClock(kCLOCK_PortC); + // Port E Clock Gate Control: Clock enabled + CLOCK_EnableClock(kCLOCK_PortE); + + // PORTB16 (pin E10) is configured as UART0_RX + PORT_SetPinMux(PORTB, 16U, kPORT_MuxAlt3); + // PORTB17 (pin E9) is configured as UART0_TX + PORT_SetPinMux(PORTB, 17U, kPORT_MuxAlt3); + // PORTC1 (pin B11) is configured as I2S0_TXD0 + PORT_SetPinMux(PORTC, 1U, kPORT_MuxAlt6); + + // PORTC10 (pin C7) is configured as I2C1_SCL + const port_pin_config_t portc10_pinC7_config = { + kPORT_PullUp, kPORT_FastSlewRate, kPORT_PassiveFilterDisable, + kPORT_OpenDrainEnable, kPORT_LowDriveStrength, kPORT_MuxAlt2, + kPORT_UnlockRegister}; + PORT_SetPinConfig(PORTC, 10U, &portc10_pinC7_config); + + // PORTC11 (pin B7) is configured as I2C1_SDA + const port_pin_config_t portc11_pinB7_config = { + kPORT_PullUp, kPORT_FastSlewRate, kPORT_PassiveFilterDisable, + kPORT_OpenDrainEnable, kPORT_LowDriveStrength, kPORT_MuxAlt2, + kPORT_UnlockRegister}; + PORT_SetPinConfig(PORTC, 11U, &portc11_pinB7_config); + + // PORTC6 (pin C8) is configured as I2S0_MCLK + PORT_SetPinMux(PORTC, 6U, kPORT_MuxAlt6); + // PORTE11 (pin G4) is configured as I2S0_TX_FS + PORT_SetPinMux(PORTE, 11U, kPORT_MuxAlt4); + // PORTE12 (pin G3) is configured as I2S0_TX_BCLK + PORT_SetPinMux(PORTE, 12U, kPORT_MuxAlt4); + SIM->SOPT5 = + ((SIM->SOPT5 & (~(SIM_SOPT5_UART0TXSRC_MASK))) | SIM_SOPT5_UART0TXSRC(0)); + // PORTE7 (pin F4) is configured as I2S0_RXD0 + PORT_SetPinMux(PORTE, 7U, kPORT_MuxAlt4); + SIM->SOPT5 = + ((SIM->SOPT5 & (~(SIM_SOPT5_UART0TXSRC_MASK))) | SIM_SOPT5_UART0TXSRC(0)); +} + +// Write DA7212 registers using I2C +status_t Da7212WriteRegister(uint8_t register_address, uint8_t register_data) { + uint8_t data[1]; + data[0] = (uint8_t)register_data; + i2c_master_transfer_t i2c_data; + i2c_data.slaveAddress = da7212I2cAddress; + i2c_data.direction = kI2C_Write; + i2c_data.subaddress = register_address; + i2c_data.subaddressSize = 1; + i2c_data.data = (uint8_t * volatile) data; + i2c_data.dataSize = 1; + i2c_data.flags = kI2C_TransferDefaultFlag; + return I2C_MasterTransferBlocking(I2C1, &i2c_data); +} + +// Initialize DA7212 +void Da7212Initialize(void) { + for (uint32_t i = 0; i < da7212ConfigurationSize; i++) { + Da7212WriteRegister(g_da7212_register_config[i][0], + g_da7212_register_config[i][1]); + } +} + +// Initalization for receiving audio data +TfLiteStatus InitAudioRecording(tflite::ErrorReporter *error_reporter) { + edma_config_t dma_config = {0}; + sai_config_t sai_config; + sai_transfer_format_t sai_format; + volatile uint32_t delay_cycle = 500000; + i2c_master_config_t i2c_config = {0}; + + // Initialize FRDM-K66F pins + McuInitializePins(); + + // Set Clock to 180 MHz + // BOARD_BootClockRUN(); + BOARD_BootClockHSRUN(); + + // Enable Code Caching to improve performance + LMEM_EnableCodeCache(LMEM, true); + + // Initialize I2C + I2C_MasterGetDefaultConfig(&i2c_config); + I2C_MasterInit(I2C1, &i2c_config, CLOCK_GetFreq(kCLOCK_BusClk)); + + // Initialize SAI + memset(&sai_format, 0U, sizeof(sai_transfer_format_t)); + SAI_TxGetDefaultConfig(&sai_config); + SAI_TxInit(I2S0, &sai_config); + SAI_RxGetDefaultConfig(&sai_config); + SAI_RxInit(I2S0, &sai_config); + sai_format.bitWidth = kSAI_WordWidth16bits; + sai_format.channel = 0U; + sai_format.sampleRate_Hz = kSAI_SampleRate16KHz; + sai_format.masterClockHz = kOverSampleRate * sai_format.sampleRate_Hz; + sai_format.protocol = sai_config.protocol; + sai_format.stereo = kSAI_MonoRight; + sai_format.watermark = FSL_FEATURE_SAI_FIFO_COUNT / 2U; + + // Initialize DA7212 + Da7212Initialize(); + + // Initialize SAI EDMA + EDMA_GetDefaultConfig(&dma_config); + EDMA_Init(DMA0, &dma_config); + EDMA_CreateHandle(&g_tx_dma_handle, DMA0, 0); + EDMA_CreateHandle(&g_rx_dma_handle, DMA0, 1); + + // Initialize DMA MUX + DMAMUX_Init(DMAMUX); + DMAMUX_SetSource(DMAMUX, 0, (uint8_t)kDmaRequestMux0I2S0Tx); + DMAMUX_EnableChannel(DMAMUX, 0); + DMAMUX_SetSource(DMAMUX, 1, (uint8_t)kDmaRequestMux0I2S0Rx); + DMAMUX_EnableChannel(DMAMUX, 1); + + // Wait few cycles for DA7212 + while (delay_cycle) { + __ASM("nop"); + delay_cycle--; + } + + // Setup SAI EDMA Callbacks + SAI_TransferTxCreateHandleEDMA(I2S0, &g_tx_sai_handle, SaiTxCallback, NULL, + &g_tx_dma_handle); + SAI_TransferRxCreateHandleEDMA(I2S0, &g_rx_sai_handle, SaiRxCallback, NULL, + &g_rx_dma_handle); + SAI_TransferTxSetFormatEDMA(I2S0, &g_tx_sai_handle, &sai_format, + CLOCK_GetFreq(kCLOCK_CoreSysClk), + sai_format.masterClockHz); + SAI_TransferRxSetFormatEDMA(I2S0, &g_rx_sai_handle, &sai_format, + CLOCK_GetFreq(kCLOCK_CoreSysClk), + sai_format.masterClockHz); + + // Submit buffers to SAI RX to start receiving audio + g_sai_transfer.data = (uint8_t *)(g_rx_buffer + g_rx_index * kNoOfSamples); + g_sai_transfer.dataSize = kBufferSize; + if (kStatus_Success == + SAI_TransferReceiveEDMA(I2S0, &g_rx_sai_handle, &g_sai_transfer)) { + g_rx_index++; + } + if (g_rx_index == kNoOfBuffers) { + g_rx_index = 0U; + } + g_sai_transfer.data = (uint8_t *)(g_rx_buffer + g_rx_index * kNoOfSamples); + g_sai_transfer.dataSize = kBufferSize; + if (kStatus_Success == + SAI_TransferReceiveEDMA(I2S0, &g_rx_sai_handle, &g_sai_transfer)) { + g_rx_index++; + } + if (g_rx_index == kNoOfBuffers) { + g_rx_index = 0U; + } + return kTfLiteOk; +} + +} // namespace + +// Main entry point for getting audio data. +TfLiteStatus GetAudioSamples(tflite::ErrorReporter *error_reporter, + int start_ms, int duration_ms, + int *audio_samples_size, int16_t **audio_samples) { + if (!g_is_audio_initialized) { + TfLiteStatus init_status = InitAudioRecording(error_reporter); + if (init_status != kTfLiteOk) { + return init_status; + } + g_is_audio_initialized = true; + } + // This should only be called when the main thread notices that the latest + // audio sample data timestamp has changed, so that there's new data in the + // capture ring buffer. The ring buffer will eventually wrap around and + // overwrite the data, but the assumption is that the main thread is checking + // often enough and the buffer is large enough that this call will be made + // before that happens. + const int start_offset = start_ms * (kAudioSampleFrequency / 1000); + const int duration_sample_count = + duration_ms * (kAudioSampleFrequency / 1000); + for (int i = 0; i < duration_sample_count; ++i) { + const int capture_index = (start_offset + i) % kAudioCaptureBufferSize; + g_audio_output_buffer[i] = g_audio_capture_buffer[capture_index]; + } + *audio_samples_size = kMaxAudioSampleSize; + *audio_samples = g_audio_output_buffer; + return kTfLiteOk; +} + +int32_t LatestAudioTimestamp() { return g_latest_audio_timestamp; }