Skip to content

Commit

Permalink
Merge branch 'feat/async_memcpy_any_alignment' into 'master'
Browse files Browse the repository at this point in the history
async memcpy destination address doesn't have to be cache aligned

Closes IDFCI-2359 and IDF-11785

See merge request espressif/esp-idf!35849
  • Loading branch information
suda-morris committed Jan 24, 2025
2 parents 3a30e43 + 0c7fef8 commit c586527
Show file tree
Hide file tree
Showing 10 changed files with 470 additions and 519 deletions.
327 changes: 142 additions & 185 deletions components/esp_hw_support/dma/async_memcpy_gdma.c

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions components/esp_hw_support/dma/esp_async_memcpy_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include "esp_async_memcpy.h"
#include "soc/soc_caps.h"

#define ALIGN_DOWN(val, align) ((val) & ~((align) - 1))

#define DEFAULT_TRANSACTION_QUEUE_LENGTH 4

#ifdef __cplusplus
Expand Down
127 changes: 80 additions & 47 deletions components/esp_hw_support/dma/esp_dma_utils.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2023-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
Expand All @@ -24,68 +24,101 @@
static const char *TAG = "dma_utils";

#define ALIGN_UP_BY(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#define ALIGN_DOWN_BY(num, align) ((num) & (~((align) - 1)))

esp_err_t esp_dma_split_buffer_to_aligned(void *input_buffer, size_t input_buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array)
esp_err_t esp_dma_split_rx_buffer_to_cache_aligned(void *rx_buffer, size_t buffer_len, dma_buffer_split_array_t *align_buf_array, uint8_t** ret_stash_buffer)
{
esp_err_t ret = ESP_OK;
ESP_RETURN_ON_FALSE(align_array && input_buffer && input_buffer_len && stash_buffer && split_alignment && !(split_alignment & (split_alignment - 1)
&& (stash_buffer_len >= 2 * split_alignment)), ESP_ERR_INVALID_ARG, TAG, "invalid argument");
ESP_RETURN_ON_FALSE(!((uintptr_t)stash_buffer % split_alignment), ESP_ERR_INVALID_ARG, TAG, "extra buffer is not aligned");

// calculate head_overflow_len
size_t head_overflow_len = (uintptr_t)input_buffer % split_alignment;
head_overflow_len = head_overflow_len ? split_alignment - head_overflow_len : 0;
ESP_LOGD(TAG, "head_addr:%p split_alignment:%zu head_overflow_len:%zu", input_buffer, split_alignment, head_overflow_len);
// calculate tail_overflow_len
size_t tail_overflow_len = ((uintptr_t)input_buffer + input_buffer_len) % split_alignment;
ESP_LOGD(TAG, "tail_addr:%p split_alignment:%zu tail_overflow_len:%zu", input_buffer + input_buffer_len - tail_overflow_len, split_alignment, tail_overflow_len);

uint32_t extra_buf_count = 0;
input_buffer = (uint8_t*)input_buffer;
stash_buffer = (uint8_t*)stash_buffer;
align_array->buf.head.recovery_address = input_buffer;
align_array->buf.head.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++;
align_array->buf.head.length = head_overflow_len;
align_array->buf.body.recovery_address = input_buffer + head_overflow_len;
align_array->buf.body.aligned_buffer = input_buffer + head_overflow_len;
align_array->buf.body.length = input_buffer_len - head_overflow_len - tail_overflow_len;
align_array->buf.tail.recovery_address = input_buffer + input_buffer_len - tail_overflow_len;
align_array->buf.tail.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++;
align_array->buf.tail.length = tail_overflow_len;

// special handling when input_buffer length is no more than buffer alignment
if(head_overflow_len >= input_buffer_len || tail_overflow_len >= input_buffer_len)
{
align_array->buf.head.length = input_buffer_len ;
align_array->buf.body.length = 0 ;
align_array->buf.tail.length = 0 ;
ESP_RETURN_ON_FALSE(rx_buffer && buffer_len && align_buf_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument");

// read the cache line size of internal and external memory, we also use this information to check if a given memory is behind the cache
size_t int_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
size_t ext_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA);

size_t split_line_size = 0;
if (esp_ptr_external_ram(rx_buffer)) {
split_line_size = ext_mem_cache_line_size;
} else if (esp_ptr_internal(rx_buffer)) {
split_line_size = int_mem_cache_line_size;
}
ESP_LOGV(TAG, "split_line_size:%zu", split_line_size);

// allocate the stash buffer from internal RAM
// Note, the split_line_size can be 0, in this case, the stash_buffer is also NULL, which is fine
uint8_t* stash_buffer = heap_caps_calloc(2, split_line_size, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
ESP_RETURN_ON_FALSE(!(split_line_size && !stash_buffer), ESP_ERR_NO_MEM, TAG, "no mem for stash buffer");

// clear align_array to avoid garbage data
memset(align_buf_array, 0, sizeof(dma_buffer_split_array_t));
bool need_cache_sync[3] = {false};

// if split_line_size is non-zero, split the buffer into head, body and tail
if (split_line_size > 0) {
// calculate head_overflow_len
size_t head_overflow_len = (uintptr_t)rx_buffer % split_line_size;
head_overflow_len = head_overflow_len ? split_line_size - head_overflow_len : 0;
ESP_LOGV(TAG, "head_addr:%p head_overflow_len:%zu", rx_buffer, head_overflow_len);
// calculate tail_overflow_len
size_t tail_overflow_len = ((uintptr_t)rx_buffer + buffer_len) % split_line_size;
ESP_LOGV(TAG, "tail_addr:%p tail_overflow_len:%zu", rx_buffer + buffer_len - tail_overflow_len, tail_overflow_len);

uint8_t extra_buf_count = 0;
uint8_t* input_buffer = (uint8_t*)rx_buffer;
align_buf_array->buf.head.recovery_address = input_buffer;
align_buf_array->buf.head.aligned_buffer = stash_buffer + split_line_size * extra_buf_count++;
align_buf_array->buf.head.length = head_overflow_len;
need_cache_sync[0] = int_mem_cache_line_size > 0;
align_buf_array->buf.body.recovery_address = input_buffer + head_overflow_len;
align_buf_array->buf.body.aligned_buffer = input_buffer + head_overflow_len;
align_buf_array->buf.body.length = buffer_len - head_overflow_len - tail_overflow_len;
need_cache_sync[1] = true;
align_buf_array->buf.tail.recovery_address = input_buffer + buffer_len - tail_overflow_len;
align_buf_array->buf.tail.aligned_buffer = stash_buffer + split_line_size * extra_buf_count++;
align_buf_array->buf.tail.length = tail_overflow_len;
need_cache_sync[2] = int_mem_cache_line_size > 0;

// special handling when input_buffer length is no more than buffer alignment
if (head_overflow_len >= buffer_len || tail_overflow_len >= buffer_len) {
align_buf_array->buf.head.length = buffer_len ;
align_buf_array->buf.body.length = 0 ;
align_buf_array->buf.tail.length = 0 ;
}
} else {
align_buf_array->buf.body.aligned_buffer = rx_buffer;
align_buf_array->buf.body.recovery_address = rx_buffer;
align_buf_array->buf.body.length = buffer_len;
need_cache_sync[1] = false;
}

for(int i = 0; i < 3; i++) {
if(!align_array->aligned_buffer[i].length) {
align_array->aligned_buffer[i].aligned_buffer = NULL;
align_array->aligned_buffer[i].recovery_address = NULL;
for (int i = 0; i < 3; i++) {
if (align_buf_array->aligned_buffer[i].length == 0) {
align_buf_array->aligned_buffer[i].aligned_buffer = NULL;
align_buf_array->aligned_buffer[i].recovery_address = NULL;
need_cache_sync[i] = false;
}
}

return ret;
// invalidate the aligned buffer if necessary
for (int i = 0; i < 3; i++) {
if (need_cache_sync[i]) {
esp_cache_msync(align_buf_array->aligned_buffer[i].aligned_buffer, align_buf_array->aligned_buffer[i].length, ESP_CACHE_MSYNC_FLAG_DIR_M2C);
}
}

*ret_stash_buffer = stash_buffer;
return ESP_OK;
}

esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array)
esp_err_t esp_dma_merge_aligned_rx_buffers(dma_buffer_split_array_t *align_array)
{
esp_err_t ret = ESP_OK;
ESP_RETURN_ON_FALSE(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
ESP_RETURN_ON_FALSE_ISR(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument");

// only need to copy the head and tail buffer
if(align_array->buf.head.length) {
if (align_array->buf.head.length) {
memcpy(align_array->buf.head.recovery_address, align_array->buf.head.aligned_buffer, align_array->buf.head.length);
}
if(align_array->buf.tail.length) {
if (align_array->buf.tail.length) {
memcpy(align_array->buf.tail.recovery_address, align_array->buf.tail.aligned_buffer, align_array->buf.tail.length);
}

return ret;
return ESP_OK;
}

esp_err_t esp_dma_capable_malloc(size_t size, const esp_dma_mem_info_t *dma_mem_info, void **out_ptr, size_t *actual_size)
Expand Down
6 changes: 0 additions & 6 deletions components/esp_hw_support/dma/gdma_link.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,8 @@

#include <stdlib.h>
#include <string.h>
#include <stdatomic.h>
#include <sys/cdefs.h>
#include <sys/lock.h>
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "soc/soc_caps.h"
#include "soc/ext_mem_defs.h"
#include "esp_log.h"
#include "esp_check.h"
#include "esp_memory_utils.h"
Expand Down
31 changes: 17 additions & 14 deletions components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2023-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
Expand All @@ -24,6 +24,8 @@ typedef struct {

/**
* @brief DMA buffer aligned array
* The array contains three parts: head, body and tail.
* Length of each part will be >=0, especially, length=0 means that there is no such part.
*/
typedef struct {
union {
Expand All @@ -37,22 +39,21 @@ typedef struct {
} dma_buffer_split_array_t;

/**
* @brief Split unaligned DMA buffer to aligned DMA buffer or aligned DMA buffer array
* @brief Split DMA RX buffer to cache aligned buffers
*
* @note Returned align array contains three parts: head, body and tail. Length of each buffer will be >=0, length 0 means that there is no such part
* @note After the original RX buffer is split into an array, caller should mount the buffer array to the DMA controller in scatter-gather mode.
* Don't read/write the aligned buffers before the DMA finished using them.
*
* @param[in] buffer Origin DMA buffer address
* @param[in] buffer_len Origin DMA buffer length
* @param[in] stash_buffer Needed extra buffer to stash aligned buffer, should be allocated with DMA capable memory and aligned to split_alignment
* @param[in] stash_buffer_len stash_buffer length
* @param[in] split_alignment Alignment of each buffer required by the DMA
* @param[out] align_array Aligned DMA buffer array
* @param[in] rx_buffer The origin DMA buffer used for receiving data
* @param[in] buffer_len rx_buffer length
* @param[out] align_buf_array Aligned DMA buffer array
* @param[out] ret_stash_buffer Allocated stash buffer (caller should free it after use)
* @return
* - ESP_OK: Split to aligned buffer successfully
* - ESP_ERR_INVALID_ARG: Split to aligned buffer failed because of invalid argument
*
* brief sketch:
* buffer alignment delimiter buffer alignment delimiter
* cache alignment delimiter cache alignment delimiter
* │ │
* Origin Buffer │ Origin Buffer │
* │ │ │ │
Expand All @@ -68,17 +69,19 @@ typedef struct {
* ▼ ▼
* |xxxxx......| |xxxxx......|
*/
esp_err_t esp_dma_split_buffer_to_aligned(void *buffer, size_t buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array);
esp_err_t esp_dma_split_rx_buffer_to_cache_aligned(void *rx_buffer, size_t buffer_len, dma_buffer_split_array_t *align_buf_array, uint8_t** ret_stash_buffer);

/**
* @brief Merge aligned buffer array to origin buffer
* @brief Merge aligned RX buffer array to origin buffer
*
* @param[in] align_array Aligned DMA buffer array
* @note This function can be used in the ISR context.
*
* @param[in] align_buf_array Aligned DMA buffer array
* @return
* - ESP_OK: Merge aligned buffer to origin buffer successfully
* - ESP_ERR_INVALID_ARG: Merge aligned buffer to origin buffer failed because of invalid argument
*/
esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array);
esp_err_t esp_dma_merge_aligned_rx_buffers(dma_buffer_split_array_t *align_buf_array);

#ifdef __cplusplus
}
Expand Down
Loading

0 comments on commit c586527

Please sign in to comment.