From 1853cc5143f82842aa7b1f9f80bde64ce65845fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Jastrz=C4=99bski?= Date: Mon, 23 Jan 2023 08:37:34 +0100 Subject: [PATCH] perf(dam2d): rework stm32 dma2d (#3904) --- src/draw/lv_draw_img.c | 9 +- src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.c | 798 +++++++++++++++++----- src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.h | 77 ++- 3 files changed, 685 insertions(+), 199 deletions(-) diff --git a/src/draw/lv_draw_img.c b/src/draw/lv_draw_img.c index 41dc0f039..1deb39e0c 100644 --- a/src/draw/lv_draw_img.c +++ b/src/draw/lv_draw_img.c @@ -69,18 +69,19 @@ void lv_draw_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * dsc, const if(dsc->opa <= LV_OPA_MIN) return; - lv_res_t res; + lv_res_t res = LV_RES_INV; + if(draw_ctx->draw_img) { res = draw_ctx->draw_img(draw_ctx, dsc, coords, src); } - else { + + if(res != LV_RES_OK) { res = decode_and_draw(draw_ctx, dsc, coords, src); } - if(res == LV_RES_INV) { + if(res != LV_RES_OK) { LV_LOG_WARN("Image draw error"); show_error(draw_ctx, coords, "No\ndata"); - return; } } diff --git a/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.c b/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.c index 4eb1940ef..b1ae6552a 100644 --- a/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.c +++ b/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.c @@ -11,100 +11,71 @@ #if LV_USE_GPU_STM32_DMA2D -#include LV_GPU_DMA2D_CMSIS_INCLUDE - /********************* * DEFINES *********************/ - #if LV_COLOR_16_SWAP - // TODO: F7 has red blue swap bit in control register for all layers and output - #error "Can't use DMA2D with LV_COLOR_16_SWAP 1" + // Note: DMA2D red/blue swap (RBS) works for all color modes + #define RBS_BIT 1U +#else + #define RBS_BIT 0U #endif -#if LV_COLOR_DEPTH == 8 - #error "Can't use DMA2D with LV_COLOR_DEPTH == 8" -#endif +#define CACHE_ROW_SIZE 32U // cache row size in Bytes + +// For code/implementation discussion refer to https://github.com/lvgl/lvgl/issues/3714#issuecomment-1365187036 +// astyle --options=lvgl/scripts/code-format.cfg --ignore-exclude-errors lvgl/src/draw/stm32_dma2d/*.c lvgl/src/draw/stm32_dma2d/*.h #if LV_COLOR_DEPTH == 16 - #define LV_DMA2D_COLOR_FORMAT LV_DMA2D_RGB565 + const dma2d_color_format_t LvglColorFormat = RGB565; #elif LV_COLOR_DEPTH == 32 - #define LV_DMA2D_COLOR_FORMAT LV_DMA2D_ARGB8888 + const dma2d_color_format_t LvglColorFormat = ARGB8888; #else - /*Can't use GPU with other formats*/ + #error "Cannot use DMA2D with LV_COLOR_DEPTH other than 16 or 32" #endif -/********************** - * TYPEDEFS - **********************/ - -/********************** - * STATIC PROTOTYPES - **********************/ - -static void lv_draw_stm32_dma2d_blend_fill(lv_color_t * dest_buf, lv_coord_t dest_stride, const lv_area_t * fill_area, - lv_color_t color); - - -static void lv_draw_stm32_dma2d_blend_map(lv_color_t * dest_buf, const lv_area_t * dest_area, lv_coord_t dest_stride, - const lv_color_t * src_buf, lv_coord_t src_stride, lv_opa_t opa); - -static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw, const lv_draw_img_dsc_t * dsc, - const lv_area_t * coords, const uint8_t * map_p, lv_img_cf_t color_format); - - -static void invalidate_cache(void); - -/********************** - * STATIC VARIABLES - **********************/ - -/********************** - * MACROS - **********************/ - -/********************** - * GLOBAL FUNCTIONS - **********************/ +static bool isDma2dInProgess = false; // indicates whether DMA2D transfer *initiated here* is in progress /** * Turn on the peripheral and set output color mode, this only needs to be done once */ void lv_draw_stm32_dma2d_init(void) { - /*Enable DMA2D clock*/ + // Enable DMA2D clock #if defined(STM32F4) || defined(STM32F7) - RCC->AHB1ENR |= RCC_AHB1ENR_DMA2DEN; + RCC->AHB1ENR |= RCC_AHB1ENR_DMA2DEN; // enable DMA2D #elif defined(STM32H7) RCC->AHB3ENR |= RCC_AHB3ENR_DMA2DEN; #else # warning "LVGL can't enable the clock of DMA2D" #endif - /*Wait for hardware access to complete*/ + // Wait for hardware access to complete __asm volatile("DSB\n"); - /*Delay after setting peripheral clock*/ + // Delay after setting peripheral clock volatile uint32_t temp = RCC->AHB1ENR; LV_UNUSED(temp); - /*set output colour mode*/ - DMA2D->OPFCCR = LV_DMA2D_COLOR_FORMAT; + // AHB master timer configuration + DMA2D->AMTCR = 0; // AHB bus guaranteed dead time disabled +#if defined(LV_STM32_DMA2D_TEST) + _lv_gpu_stm32_dwt_init(); // init µs timer +#endif } - void lv_draw_stm32_dma2d_ctx_init(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx) { - lv_draw_sw_init_ctx(drv, draw_ctx); lv_draw_stm32_dma2d_ctx_t * dma2d_draw_ctx = (lv_draw_sw_ctx_t *)draw_ctx; dma2d_draw_ctx->blend = lv_draw_stm32_dma2d_blend; - // dma2d_draw_ctx->base_draw.draw_img_decoded = lv_draw_stm32_dma2d_img_decoded; - dma2d_draw_ctx->base_draw.wait_for_finish = lv_gpu_stm32_dma2d_wait_cb; + dma2d_draw_ctx->base_draw.draw_img_decoded = lv_draw_stm32_dma2d_img_decoded; + //dma2d_draw_ctx->base_draw.draw_img = lv_draw_stm32_dma2d_img; + // Note: currently it does not make sense use lv_gpu_stm32_dma2d_wait_cb() since waiting starts right after the dma2d transfer + //dma2d_draw_ctx->base_draw.wait_for_finish = lv_gpu_stm32_dma2d_wait_cb; dma2d_draw_ctx->base_draw.buffer_copy = lv_draw_stm32_dma2d_buffer_copy; - } void lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx) @@ -113,153 +84,636 @@ void lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ct LV_UNUSED(draw_ctx); } - -void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc) +static void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc) { - lv_area_t blend_area; - if(!_lv_area_intersect(&blend_area, dsc->blend_area, draw_ctx->clip_area)) return; + if(dsc->blend_mode != LV_BLEND_MODE_NORMAL) { + lv_draw_sw_blend_basic(draw_ctx, dsc); + return; + } + // Note: x1 must be zero. Otherwise, there is no way to correctly calculate dest_stride. + //LV_ASSERT_MSG(draw_ctx->buf_area->x1 == 0); // critical? + // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated. + //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t); + //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it + //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical? - bool done = false; - - if(dsc->mask_buf == NULL && dsc->blend_mode == LV_BLEND_MODE_NORMAL && lv_area_get_size(&blend_area) > 100) { - lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area); - - lv_color_t * dest_buf = draw_ctx->buf; - dest_buf += dest_stride * (blend_area.y1 - draw_ctx->buf_area->y1) + (blend_area.x1 - draw_ctx->buf_area->x1); - - const lv_color_t * src_buf = dsc->src_buf; - if(src_buf) { - lv_draw_sw_blend_basic(draw_ctx, dsc); - lv_coord_t src_stride; - src_stride = lv_area_get_width(dsc->blend_area); - src_buf += src_stride * (blend_area.y1 - dsc->blend_area->y1) + (blend_area.x1 - dsc->blend_area->x1); - lv_area_move(&blend_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1); - lv_draw_stm32_dma2d_blend_map(dest_buf, &blend_area, dest_stride, src_buf, src_stride, dsc->opa); - done = true; - } - else if(dsc->opa >= LV_OPA_MAX) { - lv_area_move(&blend_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1); - lv_draw_stm32_dma2d_blend_fill(dest_buf, dest_stride, &blend_area, dsc->color); - done = true; - } + if(dsc->src_buf) { + // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned. + //uint32_t srcBufferLength = lv_area_get_size(dsc->blend_area) * sizeof(lv_color_t); + //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical) + //LV_ASSERT_MSG((uint32_t)dsc->src_buf % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical) } - if(!done) lv_draw_sw_blend_basic(draw_ctx, dsc); -} + lv_area_t draw_area; + if(!_lv_area_intersect(&draw_area, dsc->blend_area, draw_ctx->clip_area)) return; + // + draw_ctx->buf_area has the entire draw buffer location + // + draw_ctx->clip_area has the current draw buffer location + // + dsc->blend_area has the location of the area intended to be painted - image etc. + // + draw_area has the area actually being painted + // All coordinates are relative to the screen. -void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx, - void * dest_buf, lv_coord_t dest_stride, const lv_area_t * dest_area, - void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area) -{ - LV_UNUSED(draw_ctx); - lv_draw_stm32_dma2d_blend_map(dest_buf, dest_area, dest_stride, src_buf, src_stride, LV_OPA_MAX); -} + const lv_opa_t * mask = dsc->mask_buf; + if(dsc->mask_buf && dsc->mask_res == LV_DRAW_MASK_RES_TRANSP) return; + else if(dsc->mask_res == LV_DRAW_MASK_RES_FULL_COVER) mask = NULL; -static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * dsc, - const lv_area_t * coords, const uint8_t * map_p, lv_img_cf_t color_format) -{ - /*TODO basic ARGB8888 image can be handles here*/ + lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area); + if(mask != NULL) { + // For performance reasons, both mask buffer start address and buffer size *should* be 32-byte aligned since mask buffer cache is being cleaned. + //uint32_t srcBufferLength = lv_area_get_size(dsc->mask_area) * sizeof(lv_opa_t); + //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical) + //LV_ASSERT_MSG((uint32_t)mask % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical) - lv_draw_sw_img_decoded(draw_ctx, dsc, coords, map_p, color_format); -} + lv_coord_t mask_stride = lv_area_get_width(dsc->mask_area); + lv_point_t mask_offset = lv_area_get_offset(dsc->mask_area, &draw_area); // mask offset in relation to draw_area -static void lv_draw_stm32_dma2d_blend_fill(lv_color_t * dest_buf, lv_coord_t dest_stride, const lv_area_t * fill_area, - lv_color_t color) -{ - /*Simply fill an area*/ - int32_t area_w = lv_area_get_width(fill_area); - int32_t area_h = lv_area_get_height(fill_area); - invalidate_cache(); + if(dsc->src_buf == NULL) { // 93.5% + lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1); + _lv_draw_stm32_dma2d_blend_paint(draw_ctx->buf, dest_stride, &draw_area, mask, mask_stride, &mask_offset, dsc->color, + dsc->opa); + } + else { // 0.2% + // note: (x)RGB dsc->src_buf does not carry alpha channel bytes, + // alpha channel bytes are carried in dsc->mask_buf +#if LV_COLOR_DEPTH == 32 + lv_coord_t src_stride = lv_area_get_width(dsc->blend_area); + lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area + lv_coord_t draw_width = lv_area_get_width(&draw_area); + lv_coord_t draw_height = lv_area_get_height(&draw_area); - DMA2D->CR = 0x30000; - DMA2D->OMAR = (uint32_t)dest_buf; - /*as input color mode is same as output we don't need to convert here do we?*/ - DMA2D->OCOLR = color.full; - DMA2D->OOR = dest_stride - area_w; - DMA2D->NLR = (area_w << DMA2D_NLR_PL_Pos) | (area_h << DMA2D_NLR_NL_Pos); + // merge mask alpha bytes with src RGB bytes + // TODO: optimize by reading 4 or 8 mask bytes at a time + mask += (mask_stride * mask_offset.y) + mask_offset.x; + lv_color_t * src_buf = (lv_color_t *)dsc->src_buf; + src_buf += (src_stride * src_offset.y) + src_offset.x; + uint16_t mask_buffer_offset = mask_stride - draw_width; + uint16_t src_buffer_offset = src_stride - draw_width; + while(draw_height > 0) { + draw_height--; + for(uint16_t x = 0; x < draw_width; x++) { + (*src_buf).ch.alpha = *mask; + src_buf++; + mask++; + } + mask += mask_buffer_offset; + src_buf += src_buffer_offset; + } - /*start transfer*/ - DMA2D->CR |= DMA2D_CR_START_Msk; + lv_area_move(&draw_area, -draw_ctx->buf_area->x1, + -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area + _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa, + ARGB8888, false); +#else + // Note: 16-bit bitmap hardware blending with mask and background is possible, but requires a temp 24 or 32-bit buffer to combine bitmap with mask first. -} - - -static void lv_draw_stm32_dma2d_blend_map(lv_color_t * dest_buf, const lv_area_t * dest_area, lv_coord_t dest_stride, - const lv_color_t * src_buf, lv_coord_t src_stride, lv_opa_t opa) -{ - - /*Simple copy*/ - int32_t dest_w = lv_area_get_width(dest_area); - int32_t dest_h = lv_area_get_height(dest_area); - - invalidate_cache(); - if(opa >= LV_OPA_MAX) { - DMA2D->CR = 0; - /*copy output colour mode, this register controls both input and output colour format*/ - DMA2D->FGPFCCR = LV_DMA2D_COLOR_FORMAT; - DMA2D->FGMAR = (uint32_t)src_buf; - DMA2D->FGOR = src_stride - dest_w; - DMA2D->OMAR = (uint32_t)dest_buf; - DMA2D->OOR = dest_stride - dest_w; - DMA2D->NLR = (dest_w << DMA2D_NLR_PL_Pos) | (dest_h << DMA2D_NLR_NL_Pos); - - /*start transfer*/ - DMA2D->CR |= DMA2D_CR_START_Msk; + lv_draw_sw_blend_basic(draw_ctx, dsc); // (e.g. Shop Items) + // clean cache after software drawing - this does not help since this is not the only place where buffer is written without dma2d + // lv_coord_t draw_width = lv_area_get_width(&draw_area); + // lv_coord_t draw_height = lv_area_get_height(&draw_area); + // uint32_t dest_address = (uint32_t)(draw_ctx->buf + (dest_stride * draw_area.y1) + draw_area.x1); + // _lv_gpu_stm32_dma2d_clean_cache(dest_address, dest_stride - draw_width, draw_width, draw_height, sizeof(lv_color_t)); +#endif + } } else { - DMA2D->CR = 0x20000; - - DMA2D->BGPFCCR = LV_DMA2D_COLOR_FORMAT; - DMA2D->BGMAR = (uint32_t)dest_buf; - DMA2D->BGOR = dest_stride - dest_w; - - DMA2D->FGPFCCR = (uint32_t)LV_DMA2D_COLOR_FORMAT - /*alpha mode 2, replace with foreground * alpha value*/ - | (2 << DMA2D_FGPFCCR_AM_Pos) - /*alpha value*/ - | (opa << DMA2D_FGPFCCR_ALPHA_Pos); - DMA2D->FGMAR = (uint32_t)src_buf; - DMA2D->FGOR = src_stride - dest_w; - - DMA2D->OMAR = (uint32_t)dest_buf; - DMA2D->OOR = dest_stride - dest_w; - DMA2D->NLR = (dest_w << DMA2D_NLR_PL_Pos) | (dest_h << DMA2D_NLR_NL_Pos); - - /*start transfer*/ - DMA2D->CR |= DMA2D_CR_START_Msk; + if(dsc->src_buf == NULL) { // 6.1% + lv_area_move(&draw_area, -draw_ctx->buf_area->x1, + -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area + _lv_draw_stm32_dma2d_blend_fill(draw_ctx->buf, dest_stride, &draw_area, dsc->color, dsc->opa); + } + else { // 0.2% + lv_coord_t src_stride = lv_area_get_width(dsc->blend_area); + lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area + lv_area_move(&draw_area, -draw_ctx->buf_area->x1, + -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area + _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa, + LvglColorFormat, true); + } } } -void lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx) +// Does dest_area = intersect(draw_ctx->clip_area, src_area) ? +// See: https://github.com/lvgl/lvgl/issues/3714#issuecomment-1331710788 +static void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx, void * dest_buf, lv_coord_t dest_stride, + const lv_area_t * dest_area, void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area) +{ + // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated. + //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t); + //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it + //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical? + // FIXME: + // 1. Both src_buf and dest_buf pixel size *must* be known to use DMA2D. + // 2. Verify both buffers start addresses and lengths are 32-byte (cache row size) aligned. + LV_UNUSED(draw_ctx); + lv_point_t src_offset = lv_area_get_offset(src_area, dest_area); + // FIXME: use lv_area_move(dest_area, -dest_area->x1, -dest_area->y1) here ? + // TODO: It is assumed that dest_buf and src_buf buffers are of lv_color_t type. Verify it, this assumption may be incorrect. + _lv_draw_stm32_dma2d_blend_map((const lv_color_t *)dest_buf, dest_stride, dest_area, (const lv_color_t *)src_buf, + src_stride, &src_offset, 0xff, LvglColorFormat, true); + // TODO: Investigate if output buffer cache needs to be invalidated. It depends on what the destination buffer is and how it is used next - by dma2d or not. + _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // TODO: is this line needed here? +} + +static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc, + const lv_area_t * coords, const uint8_t * src_buf, lv_img_cf_t color_format) +{ + if(draw_ctx->draw_img_decoded == NULL) return; + lv_area_t draw_area; + lv_area_copy(&draw_area, draw_ctx->clip_area); + + bool mask_any = lv_draw_mask_is_any(&draw_area); + bool transform = img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE; + const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(color_format); + const bool ignoreBitmapAlpha = (color_format == LV_IMG_CF_RGBX8888); + + if(!mask_any && !transform && bitmapColorFormat != UNSUPPORTED && img_dsc->recolor_opa == LV_OPA_TRANSP) { + // simple bitmap blending, optionally with supported color format conversion - handle directly by dma2d + lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area); + lv_coord_t src_stride = lv_area_get_width(coords); + lv_point_t src_offset = lv_area_get_offset(coords, &draw_area); // source image offset in relation to draw_area + lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1); + _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, src_buf, src_stride, &src_offset, + img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha); + } + else { + // all more complex cases which require additional image transformations + lv_draw_sw_img_decoded(draw_ctx, img_dsc, coords, src_buf, color_format); + + } +} + +static lv_point_t lv_area_get_offset(const lv_area_t * area1, const lv_area_t * area2) +{ + lv_point_t offset = {x: area2->x1 - area1->x1, y: area2->y1 - area1->y1}; + return offset; +} + +static dma2d_color_format_t lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format) +{ + switch(color_format) { + case LV_IMG_CF_RGBA8888: + // note: LV_IMG_CF_RGBA8888 is actually ARGB8888 + return ARGB8888; + case LV_IMG_CF_RGBX8888: + // note: LV_IMG_CF_RGBX8888 is actually XRGB8888 + return ARGB8888; + case LV_IMG_CF_RGB565: + return RGB565; + case LV_IMG_CF_TRUE_COLOR: + return LvglColorFormat; + case LV_IMG_CF_TRUE_COLOR_ALPHA: +#if LV_COLOR_DEPTH == 16 + // bitmap color format is 24b ARGB8565 - dma2d unsupported + return UNSUPPORTED; +#elif LV_COLOR_DEPTH == 32 + return ARGB8888; +#else + // unknown bitmap color format + return UNSUPPORTED; +#endif + default: + return UNSUPPORTED; + } +} + +static lv_res_t lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc, + const lv_area_t * src_area, + const void * src) +{ + //if(lv_img_src_get_type(src) != LV_IMG_SRC_VARIABLE) return LV_RES_INV; + return LV_RES_INV; + if(img_dsc->opa <= LV_OPA_MIN) return LV_RES_OK; + const lv_img_dsc_t * img = src; + const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(img->header.cf); + const bool ignoreBitmapAlpha = (img->header.cf == LV_IMG_CF_RGBX8888); + + if(bitmapColorFormat == UNSUPPORTED || img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE) { + return LV_RES_INV; // sorry, dma2d can handle this + } + + // FIXME: handle dsc.pivot, dsc.recolor, dsc.blend_mode + // FIXME: src pixel size *must* be known to use DMA2D + // FIXME: If image is drawn by SW, then output cache needs to be cleaned next. Currently it is not possible. + // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated. + //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t); + //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it + //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical? + + // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned. + //uint32_t srcBufferLength = lv_area_get_size(src_area) * sizeof(lv_color_t); // TODO: verify src pixel size = sizeof(lv_color_t) + //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical) + //LV_ASSERT_MSG((uint32_t)src % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical) + + lv_area_t draw_area; + if(!_lv_area_intersect(&draw_area, src_area, draw_ctx->clip_area)) return LV_RES_OK; + + lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area); + lv_point_t src_offset = lv_area_get_offset(src_area, &draw_area); // source image offset in relation to draw_area + lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1); + _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, img->data, img->header.w, + &src_offset, img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha); + return LV_RES_OK; +} + +static void lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx) { lv_disp_t * disp = _lv_refr_get_disp_refreshing(); - if(disp->driver && disp->driver->wait_cb) { - while(DMA2D->CR & DMA2D_CR_START_Msk) { - disp->driver->wait_cb(disp->driver); - } - } - else { - while(DMA2D->CR & DMA2D_CR_START_Msk); - } + _lv_gpu_stm32_dma2d_await_dma_transfer_finish(disp->driver); lv_draw_sw_wait_for_finish(draw_ctx); - } /********************** * STATIC FUNCTIONS **********************/ -static void invalidate_cache(void) +/** + * @brief Fills draw_area with specified color. + * @param color color to be painted, note: alpha is ignored + */ +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dest_buf, lv_coord_t dest_stride, + const lv_area_t * draw_area, lv_color_t color, lv_opa_t opa) { - lv_disp_t * disp = _lv_refr_get_disp_refreshing(); - if(disp->driver->clean_dcache_cb) disp->driver->clean_dcache_cb(disp->driver); - else { -#if __CORTEX_M >= 0x07 - if((SCB->CCR) & (uint32_t)SCB_CCR_DC_Msk) - SCB_CleanInvalidateDCache(); + LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical + lv_coord_t draw_width = lv_area_get_width(draw_area); + lv_coord_t draw_height = lv_area_get_height(draw_area); + + _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); + + if(opa >= LV_OPA_MAX) { + DMA2D->CR = 0x3UL << DMA2D_CR_MODE_Pos; // Register-to-memory (no FG nor BG, only output stage active) + + DMA2D->OPFCCR = LvglColorFormat; + DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos); + DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1); + DMA2D->OOR = dest_stride - draw_width; // out buffer offset + // Note: unlike FGCOLR and BGCOLR, OCOLR bits must match DMA2D_OUTPUT_COLOR, alpha can be specified +#if RBS_BIT + // swap red/blue bits + DMA2D->OCOLR = (color.ch.blue << 11) | (color.ch.green_l << 5 | color.ch.green_h << 8) | (color.ch.red); +#else + DMA2D->OCOLR = color.full; #endif } + else { + DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending) + + DMA2D->FGPFCCR = A8; + DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos); + // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA + DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos); + //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos); + + // Note: in Alpha Mode 1 FGMAR and FGOR are not used to supply foreground A8 bytes, + // those bytes are replaced by constant ALPHA defined in FGPFCCR + DMA2D->FGMAR = (uint32_t)dest_buf; + DMA2D->FGOR = dest_stride; + DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff; // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set + + DMA2D->BGPFCCR = LvglColorFormat; + DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos); + DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1); + DMA2D->BGOR = dest_stride - draw_width; + DMA2D->BGCOLR = 0; // used in A4 and A8 modes only + _lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t)); + + DMA2D->OPFCCR = LvglColorFormat; + DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos); + DMA2D->OMAR = DMA2D->BGMAR; + DMA2D->OOR = DMA2D->BGOR; + DMA2D->OCOLR = 0; + } + // PL - pixel per lines (14 bit), NL - number of lines (16 bit) + DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos); + + _lv_gpu_stm32_dma2d_start_dma_transfer(); +} + +/** + * @brief Draws src (foreground) map on dst (background) map. + * @param src_offset src offset in relation to dst, useful when src is larger than draw_area + * @param opa constant opacity to be applied + * @param bitmapColorCode bitmap color type + * @param ignoreAlpha if TRUE, bitmap src alpha channel is ignored + */ +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf, lv_coord_t dest_stride, + const lv_area_t * draw_area, const void * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset, lv_opa_t opa, + dma2d_color_format_t src_color_format, bool ignore_src_alpha) +{ + LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical + if(opa <= LV_OPA_MIN || src_color_format == UNSUPPORTED) return; + lv_coord_t draw_width = lv_area_get_width(draw_area); + lv_coord_t draw_height = lv_area_get_height(draw_area); + bool bitmapHasOpacity = !ignore_src_alpha && (src_color_format == ARGB8888 || src_color_format == ARGB1555 || + src_color_format == ARGB4444); + + if(opa >= LV_OPA_MAX) opa = 0xff; + + uint8_t srcBpp; // source bytes per pixel + switch(src_color_format) { + case ARGB8888: + srcBpp = 4; + break; + case RGB888: + srcBpp = 3; + break; + case RGB565: + case ARGB1555: + case ARGB4444: + srcBpp = 2; + break; + default: + LV_LOG_ERROR("unsupported color format"); + return; + } + + _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); + + DMA2D->FGPFCCR = src_color_format; + + if(opa == 0xff && !bitmapHasOpacity) { + // no need to blend + if(src_color_format == LvglColorFormat) { + // no need to convert pixel format (PFC) either + DMA2D->CR = 0x0UL; + } + else { + DMA2D->CR = 0x1UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with PFC (FG fetch only with FG PFC active) + } + // Alpha Mode 0: No modification of the foreground image alpha channel value + } + else { + // blend + DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending) + DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos); + if(bitmapHasOpacity) { + // Alpha Mode 2: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value + DMA2D->FGPFCCR |= (0x2UL << DMA2D_FGPFCCR_AM_Pos); + } + else { + // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA + DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos); + } + } + + DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos); + DMA2D->FGMAR = ((uint32_t)src_buf) + srcBpp * ((src_stride * src_offset->y) + src_offset->x); + DMA2D->FGOR = src_stride - draw_width; + DMA2D->FGCOLR = 0; // used in A4 and A8 modes only + _lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, srcBpp); + + DMA2D->OPFCCR = LvglColorFormat; + DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos); + DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1); + DMA2D->OOR = dest_stride - draw_width; + DMA2D->OCOLR = 0; + + if(opa != 0xff || bitmapHasOpacity) { + // use background (BG*) registers + DMA2D->BGPFCCR = LvglColorFormat; + DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos); + DMA2D->BGMAR = DMA2D->OMAR; + DMA2D->BGOR = DMA2D->OOR; + DMA2D->BGCOLR = 0; // used in A4 and A8 modes only + _lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t)); + } + + // PL - pixel per lines (14 bit), NL - number of lines (16 bit) + DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos); + + _lv_gpu_stm32_dma2d_start_dma_transfer(); +} + +/** + * @brief Paints solid color with alpha mask with additional constant opacity. Useful e.g. for painting anti-aliased fonts. + * @param src_offset src offset in relation to dst, useful when src (alpha mask) is larger than draw_area + * @param color color to paint, note: alpha is ignored + * @param opa constant opacity to be applied + */ +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dest_buf, lv_coord_t dest_stride, + const lv_area_t * draw_area, const lv_opa_t * mask_buf, lv_coord_t mask_stride, const lv_point_t * mask_offset, + lv_color_t color, lv_opa_t opa) +{ + LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical + lv_coord_t draw_width = lv_area_get_width(draw_area); + lv_coord_t draw_height = lv_area_get_height(draw_area); + + _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); + + DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending) + + DMA2D->FGPFCCR = A8; + if(opa < LV_OPA_MAX) { + DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos); + DMA2D->FGPFCCR |= (0x2UL << + DMA2D_FGPFCCR_AM_Pos); // Alpha Mode: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value + } + //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos); + DMA2D->FGMAR = (uint32_t)(mask_buf + (mask_stride * mask_offset->y) + mask_offset->x); + DMA2D->FGOR = mask_stride - draw_width; + DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff; // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set + _lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_opa_t)); + + DMA2D->BGPFCCR = LvglColorFormat; + DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos); + DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1); + DMA2D->BGOR = dest_stride - draw_width; + DMA2D->BGCOLR = 0; // used in A4 and A8 modes only + _lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t)); + + DMA2D->OPFCCR = LvglColorFormat; + DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos); + DMA2D->OMAR = DMA2D->BGMAR; + DMA2D->OOR = DMA2D->BGOR; + DMA2D->OCOLR = 0; + // PL - pixel per lines (14 bit), NL - number of lines (16 bit) + DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos); + + _lv_gpu_stm32_dma2d_start_dma_transfer(); +} + +/** + * @brief Copies src (foreground) map to the dst (background) map. + * @param src_offset src offset in relation to dst, useful when src is larger than draw_area + */ +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf, lv_coord_t dest_stride, + const lv_area_t * draw_area, const lv_color_t * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset) +{ + LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical + lv_coord_t draw_width = lv_area_get_width(draw_area); + lv_coord_t draw_height = lv_area_get_height(draw_area); + + _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); + + DMA2D->CR = 0x0UL; // Memory-to-memory (FG fetch only) + + DMA2D->FGPFCCR = LvglColorFormat; + DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos); + DMA2D->FGMAR = (uint32_t)(src_buf + (src_stride * src_offset->y) + src_offset->x); + DMA2D->FGOR = src_stride - draw_width; + DMA2D->FGCOLR = 0; // used in A4 and A8 modes only + _lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_color_t)); + + // Note BG* registers do not need to be set up since BG is not used + + DMA2D->OPFCCR = LvglColorFormat; + DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos); + DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1); + DMA2D->OOR = dest_stride - draw_width; + DMA2D->OCOLR = 0; + + // PL - pixel per lines (14 bit), NL - number of lines (16 bit) + DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos); + + _lv_gpu_stm32_dma2d_start_dma_transfer(); +} + +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_start_dma_transfer(void) +{ + LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); + isDma2dInProgess = true; + DMA2D->IFCR = 0x3FU; // trigger ISR flags reset + // Note: cleaning output buffer cache is needed only when buffer may be misaligned or adjacent area may have been drawn in sw-fashion, e.g. using lv_draw_sw_blend_basic() +#if LV_COLOR_DEPTH == 16 + _lv_gpu_stm32_dma2d_clean_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos, + (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t)); +#endif + DMA2D->CR |= DMA2D_CR_START; + // Note: for some reason mask buffer gets damaged during transfer if waiting is postponed + _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // FIXME: this line should not be needed here, but it is +} + +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv) +{ + if(disp_drv && disp_drv->wait_cb) { + while((DMA2D->CR & DMA2D_CR_START) != 0U) { + disp_drv->wait_cb(disp_drv); + } + } + else { + while((DMA2D->CR & DMA2D_CR_START) != 0U); + } + + __IO uint32_t isrFlags = DMA2D->ISR; + + if(isrFlags & DMA2D_ISR_CEIF) { + LV_LOG_ERROR("DMA2D config error"); + } + + if(isrFlags & DMA2D_ISR_TEIF) { + LV_LOG_ERROR("DMA2D transfer error"); + } + + DMA2D->IFCR = 0x3FU; // trigger ISR flags reset + + if(isDma2dInProgess) { + // invalidate output buffer cached memory ONLY after DMA2D transfer + //_lv_gpu_stm32_dma2d_invalidate_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos, (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t)); + isDma2dInProgess = false; + } +} + +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address, lv_coord_t offset, lv_coord_t width, + lv_coord_t height, uint8_t pixel_size) +{ + if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled + uint16_t stride = pixel_size * (width + offset); // in bytes + uint16_t ll = pixel_size * width; // line length in bytes + uint32_t n = 0; // address of the next cache row after the last invalidated row + lv_coord_t h = 0; + + __DSB(); + + while(h < height) { + uint32_t a = address + (h * stride); + uint32_t e = a + ll; // end address, address of the first byte after the current line + a &= ~(CACHE_ROW_SIZE - 1U); + if(a < n) a = n; // prevent the previous last cache row from being invalidated again + + while(a < e) { + SCB->DCIMVAC = a; + a += CACHE_ROW_SIZE; + } + + n = a; + h++; + }; + + __DSB(); + __ISB(); +} + +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_clean_cache(uint32_t address, lv_coord_t offset, lv_coord_t width, + lv_coord_t height, uint8_t pixel_size) +{ + if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled + uint16_t stride = pixel_size * (width + offset); // in bytes + uint16_t ll = pixel_size * width; // line length in bytes + uint32_t n = 0; // address of the next cache row after the last cleaned row + lv_coord_t h = 0; + __DSB(); + + while(h < height) { + uint32_t a = address + (h * stride); + uint32_t e = a + ll; // end address, address of the first byte after the current line + a &= ~(CACHE_ROW_SIZE - 1U); + if(a < n) a = n; // prevent the previous last cache row from being cleaned again + + while(a < e) { + SCB->DCCMVAC = a; + a += CACHE_ROW_SIZE; + } + + n = a; + h++; + }; + + __DSB(); + __ISB(); +} + +// initialize µs timer +LV_STM32_DMA2D_STATIC bool _lv_gpu_stm32_dwt_init(void) +{ + // disable TRC + CoreDebug->DEMCR &= ~CoreDebug_DEMCR_TRCENA_Msk; + // enable TRC + CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk; + + DWT->LAR = 0xC5ACCE55; + + // disable clock cycle counter + DWT->CTRL &= ~DWT_CTRL_CYCCNTENA_Msk; + // enable clock cycle counter + DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk; + + // reset the clock cycle counter value + DWT->CYCCNT = 0; + + // 3 NO OPERATION instructions + __ASM volatile("NOP"); + __ASM volatile("NOP"); + __ASM volatile("NOP"); + + // check if clock cycle counter has started + if(DWT->CYCCNT) { + return true; // clock cycle counter started + } + else { + return false; // clock cycle counter not started + } +} + +// get elapsed µs since reset +LV_STM32_DMA2D_STATIC uint32_t _lv_gpu_stm32_dwt_get_us(void) +{ + uint32_t us = (DWT->CYCCNT * 1000000) / HAL_RCC_GetHCLKFreq(); + return us; +} + +// reset µs timer +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dwt_reset(void) +{ + DWT->CYCCNT = 0; } #endif diff --git a/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.h b/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.h index fa7070e2a..5ecce6deb 100644 --- a/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.h +++ b/src/draw/stm32_dma2d/lv_gpu_stm32_dma2d.h @@ -10,52 +10,83 @@ extern "C" { #endif -/********************* - * INCLUDES - *********************/ #include "../../misc/lv_color.h" #include "../../hal/lv_hal_disp.h" #include "../sw/lv_draw_sw.h" #if LV_USE_GPU_STM32_DMA2D +/********************* + * INCLUDES + *********************/ +#include LV_GPU_DMA2D_CMSIS_INCLUDE + /********************* * DEFINES *********************/ - -#define LV_DMA2D_ARGB8888 0 -#define LV_DMA2D_RGB888 1 -#define LV_DMA2D_RGB565 2 -#define LV_DMA2D_ARGB1555 3 -#define LV_DMA2D_ARGB4444 4 +#if defined(LV_STM32_DMA2D_TEST) +// removes "static" modifier for some internal methods in order to test them +#define LV_STM32_DMA2D_STATIC +#else +#define LV_STM32_DMA2D_STATIC static +#endif /********************** * TYPEDEFS **********************/ +enum dma2d_color_format { + ARGB8888 = 0x0, + RGB888 = 0x01, + RGB565 = 0x02, + ARGB1555 = 0x03, + ARGB4444 = 0x04, + A8 = 0x09, + UNSUPPORTED = 0xff, +}; +typedef enum dma2d_color_format dma2d_color_format_t; typedef lv_draw_sw_ctx_t lv_draw_stm32_dma2d_ctx_t; - struct _lv_disp_drv_t; /********************** * GLOBAL PROTOTYPES **********************/ - -/** - * Turn on the peripheral and set output color mode, this only needs to be done once - */ void lv_draw_stm32_dma2d_init(void); - void lv_draw_stm32_dma2d_ctx_init(struct _lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx); - void lv_draw_stm32_dma2d_ctx_deinit(struct _lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx); +static void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc); +static void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx, + void * dest_buf, lv_coord_t dest_stride, const lv_area_t * dest_area, + void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area); +static lv_res_t lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc, + const lv_area_t * src_area, const void * src); +static void lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx); +static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc, + const lv_area_t * coords, const uint8_t * src_buf, lv_img_cf_t color_format); +static dma2d_color_format_t lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format); +static lv_point_t lv_area_get_offset(const lv_area_t * area1, const lv_area_t * area2); -void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc); - -void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx, - void * dest_buf, lv_coord_t dest_stride, const lv_area_t * dest_area, - void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area); - -void lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx); +/********************** + * STATIC PROTOTYPES + **********************/ +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dst_buf, lv_coord_t dst_stride, + const lv_area_t * draw_area, lv_color_t color, lv_opa_t opa); +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf, lv_coord_t dest_stride, + const lv_area_t * draw_area, const void * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset, lv_opa_t opa, + dma2d_color_format_t src_color_format, bool ignore_src_alpha); +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dst_buf, lv_coord_t dst_stride, + const lv_area_t * draw_area, const lv_opa_t * mask_buf, lv_coord_t mask_stride, const lv_point_t * mask_offset, + lv_color_t color, lv_opa_t opa); +LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf, lv_coord_t dest_stride, + const lv_area_t * draw_area, const lv_color_t * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset); +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv); +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_start_dma_transfer(void); +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address, lv_coord_t offset, + lv_coord_t width, lv_coord_t height, uint8_t pixel_size); +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_clean_cache(uint32_t address, lv_coord_t offset, lv_coord_t width, + lv_coord_t height, uint8_t pixel_size); +LV_STM32_DMA2D_STATIC bool _lv_gpu_stm32_dwt_init(void); +LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dwt_reset(void); +LV_STM32_DMA2D_STATIC uint32_t _lv_gpu_stm32_dwt_get_us(void); /********************** * MACROS