feat(draw): add unified NEON acceleration (#4860)

Signed-off-by: Peter Bee <bijunda1@xiaomi.com>
Co-authored-by: Gabor Kiss-Vamosi <kisvegabor@gmail.com>
This commit is contained in:
Peter Bee
2023-11-27 05:32:55 -06:00
committed by GitHub
parent 767a44bdaa
commit a51beb9051
11 changed files with 2046 additions and 1660 deletions

View File

@@ -17,7 +17,9 @@
#ifndef LV_CONF_H
#define LV_CONF_H
#ifndef __ASSEMBLY__
#include <stdint.h>
#endif
/*====================
COLOR SETTINGS

View File

@@ -1,8 +1,12 @@
LVGL_PATH ?= ${shell pwd}/lvgl
ASRCS += $(shell find $(LVGL_PATH)/src -type f -name '*.S')
CSRCS += $(shell find $(LVGL_PATH)/src -type f -name '*.c')
CSRCS += $(shell find $(LVGL_PATH)/demos -type f -name '*.c')
CSRCS += $(shell find $(LVGL_PATH)/examples -type f -name '*.c')
CXXSRCS += $(shell find $(LVGL_PATH)/src/libs/thorvg -type f -name '*.cpp')
CFLAGS += "-I$(LVGL_PATH)"
CXXEXT := .cpp
CXXSRCS += $(shell find $(LVGL_PATH)/src/libs/thorvg -type f -name '*${CXXEXT}')
AFLAGS += "-I$(LVGL_PATH)"
CFLAGS += "-I$(LVGL_PATH)"
CXXFLAGS += "-I$(LVGL_PATH)"

View File

@@ -150,15 +150,19 @@ fout.write(
* End of parsing lv_conf_template.h
-----------------------------------*/
#ifndef __ASSEMBLY__
LV_EXPORT_CONST_INT(LV_DPI_DEF);
#endif
#undef _LV_KCONFIG_PRESENT
#ifndef __ASSEMBLY__
#if LV_USE_FLOAT
typedef float lv_value_precise_t;
#else
typedef int32_t lv_value_precise_t;
#endif
#endif
/*Set some defines if a dependency is disabled*/
#if LV_USE_LOG == 0

View File

@@ -16,6 +16,12 @@
#include "../../../misc/lv_color.h"
#include "../../../stdlib/lv_string.h"
#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_NEON
#include "neon/lv_blend_neon.h"
#elif LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_CUSTOM
#include LV_DRAW_SW_ASM_CUSTOM_INCLUDE
#endif
/*********************
* DEFINES
*********************/
@@ -78,8 +84,20 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_argb8888(_lv_draw_sw_blend_
int32_t x;
int32_t y;
LV_UNUSED(w);
LV_UNUSED(h);
LV_UNUSED(x);
LV_UNUSED(y);
LV_UNUSED(opa);
LV_UNUSED(mask);
LV_UNUSED(mask_stride);
LV_UNUSED(dest_stride);
/*Simple fill*/
if(mask == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888
LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888(dsc);
#else
uint32_t color32 = lv_color_to_u32(dsc->color);
uint32_t * dest_buf = dsc->dest_buf;
for(y = 0; y < h; y++) {
@@ -110,9 +128,13 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_argb8888(_lv_draw_sw_blend_
dest_buf = drawbuf_next_row(dest_buf, dest_stride);
}
#endif
}
/*Opacity only*/
else if(mask == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_OPA
LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_OPA(dsc);
#else
lv_color32_t color_argb = lv_color_to_32(dsc->color, opa);
lv_color32_t * dest_buf = dsc->dest_buf;
@@ -122,9 +144,13 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_argb8888(_lv_draw_sw_blend_
}
dest_buf = drawbuf_next_row(dest_buf, dest_stride);
}
#endif
}
/*Masked with full opacity*/
else if(mask && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_MASK
LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_MASK(dsc);
#else
lv_color32_t color_argb = lv_color_to_32(dsc->color, 0xff);
lv_color32_t * dest_buf = dsc->dest_buf;
for(y = 0; y < h; y++) {
@@ -136,9 +162,13 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_argb8888(_lv_draw_sw_blend_
dest_buf = drawbuf_next_row(dest_buf, dest_stride);
mask += mask_stride;
}
#endif
}
/*Masked with opacity*/
else {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_MIX_MASK_OPA
LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_MIX_MASK_OPA(dsc);
#else
lv_color32_t color_argb = lv_color_to_32(dsc->color, opa);
lv_color32_t * dest_buf = dsc->dest_buf;
for(y = 0; y < h; y++) {
@@ -149,6 +179,7 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_argb8888(_lv_draw_sw_blend_
dest_buf = drawbuf_next_row(dest_buf, dest_stride);
mask += mask_stride;
}
#endif
}
}
@@ -196,8 +227,18 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
int32_t x;
int32_t y;
LV_UNUSED(color_argb);
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if(mask_buf == NULL) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888
if(opa >= LV_OPA_MAX) {
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888(dsc);
}
else {
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc);
}
#else
color_argb.alpha = opa;
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
@@ -209,8 +250,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride);
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
}
#endif
}
else if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_MASK
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
color_argb.alpha = mask_buf[x];
@@ -223,8 +268,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
mask_buf += mask_stride;
}
#endif
}
else {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
color_argb.alpha = LV_OPA_MIX2(mask_buf[x], opa);
@@ -237,6 +286,7 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
mask_buf += mask_stride;
}
#endif
}
}
else {
@@ -278,9 +328,14 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
int32_t src_x;
int32_t y;
LV_UNUSED(color_argb);
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
/*Special case*/
if(mask_buf == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888(dsc, src_px_size);
#else
if(src_px_size == 4) {
uint32_t line_in_bytes = w * 4;
for(y = 0; y < h; y++) {
@@ -301,8 +356,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf = drawbuf_next_row(src_buf, src_stride);
}
}
#endif
}
if(mask_buf == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc, src_px_size);
#else
color_argb.alpha = opa;
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) {
@@ -314,8 +373,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride);
src_buf = drawbuf_next_row(src_buf, src_stride);
}
#endif
}
if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc, src_px_size);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) {
color_argb.alpha = mask_buf[dest_x];
@@ -328,8 +391,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf = drawbuf_next_row(src_buf, src_stride);
mask_buf += mask_stride;
}
#endif
}
if(mask_buf && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc, src_px_size);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) {
color_argb.alpha = (opa * mask_buf[dest_x]) >> 8;
@@ -342,6 +409,7 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf = drawbuf_next_row(src_buf, src_stride);
mask_buf += mask_stride;
}
#endif
}
}
else {
@@ -384,6 +452,9 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if(mask_buf == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
dest_buf_c32[x] = lv_color_32_32_mix(src_buf_c32[x], dest_buf_c32[x], &cache);
@@ -391,8 +462,12 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride);
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
}
#endif
}
else if(mask_buf == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
color_argb = src_buf_c32[x];
@@ -402,8 +477,12 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride);
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
}
#endif
}
else if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
color_argb = src_buf_c32[x];
@@ -414,8 +493,12 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
mask_buf += mask_stride;
}
#endif
}
else if(mask_buf && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
color_argb = src_buf_c32[x];
@@ -426,6 +509,7 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
mask_buf += mask_stride;
}
#endif
}
}
else {

View File

@@ -1,5 +1,5 @@
/**
* @file lv_draw_sw_blend.c
* @file lv_draw_sw_blend_to_rgb565.c
*
*/
@@ -16,6 +16,12 @@
#include "../../../misc/lv_color.h"
#include "../../../stdlib/lv_string.h"
#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_NEON
#include "neon/lv_blend_neon.h"
#elif LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_CUSTOM
#include LV_DRAW_SW_ASM_CUSTOM_INCLUDE
#endif
/*********************
* DEFINES
*********************/
@@ -77,8 +83,22 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_rgb565(_lv_draw_sw_blend_fi
int32_t x;
int32_t y;
LV_UNUSED(w);
LV_UNUSED(h);
LV_UNUSED(x);
LV_UNUSED(y);
LV_UNUSED(opa);
LV_UNUSED(mask);
LV_UNUSED(color16);
LV_UNUSED(mask_stride);
LV_UNUSED(dest_stride);
LV_UNUSED(dest_buf_u16);
/*Simple fill*/
if(mask == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_RGB565
LV_DRAW_SW_COLOR_BLEND_TO_RGB565(dsc);
#else
for(y = 0; y < h; y++) {
uint16_t * dest_end_final = dest_buf_u16 + w;
uint32_t * dest_end_mid = (uint32_t *)((uint16_t *) dest_buf_u16 + ((w - 1) & ~(0xF)));
@@ -111,9 +131,13 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_rgb565(_lv_draw_sw_blend_fi
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
dest_buf_u16 -= w;
}
#endif
}
/*Opacity only*/
else if(mask == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_OPA
LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_OPA(dsc);
#else
uint32_t last_dest32_color = dest_buf_u16[0] + 1; /*Set to value which is not equal to the first pixel*/
uint32_t last_res32_color = 0;
@@ -150,10 +174,14 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_rgb565(_lv_draw_sw_blend_fi
}
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
}
#endif
}
/*Masked with full opacity*/
else if(mask && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_MASK
LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_MASK(dsc);
#else
uint32_t c32 = color16 + ((uint32_t)color16 << 16);
for(y = 0; y < h; y++) {
for(x = 0; x < w && ((lv_uintptr_t)(mask) & 0x3); x++) {
@@ -189,9 +217,13 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_rgb565(_lv_draw_sw_blend_fi
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
mask += mask_stride;
}
#endif
}
/*Masked with opacity*/
else if(mask && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_COLOR_BLEND_TO_RGB565_MIX_MASK_OPA
LV_DRAW_SW_COLOR_BLEND_TO_RGB565_MIX_MASK_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
dest_buf_u16[x] = lv_color_16_16_mix(color16, dest_buf_u16[x], LV_OPA_MIX2(mask[x], opa));
@@ -199,6 +231,7 @@ LV_ATTRIBUTE_FAST_MEM void lv_draw_sw_blend_color_to_rgb565(_lv_draw_sw_blend_fi
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
mask += mask_stride;
}
#endif
}
}
@@ -244,14 +277,21 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if(mask_buf == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565(dsc);
#else
uint32_t line_in_bytes = w * 2;
for(y = 0; y < h; y++) {
lv_memcpy(dest_buf_u16, src_buf_u16, line_in_bytes);
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride);
}
#endif
}
else if(mask_buf == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_OPA
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
dest_buf_u16[x] = lv_color_16_16_mix(src_buf_u16[x], dest_buf_u16[x], opa);
@@ -259,8 +299,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride);
}
#endif
}
else if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_MASK
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
dest_buf_u16[x] = lv_color_16_16_mix(src_buf_u16[x], dest_buf_u16[x], mask_buf[x]);
@@ -269,8 +313,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride);
mask_buf += mask_stride;
}
#endif
}
else {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
dest_buf_u16[x] = lv_color_16_16_mix(src_buf_u16[x], dest_buf_u16[x], LV_OPA_MIX2(mask_buf[x], opa));
@@ -279,6 +327,7 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride);
mask_buf += mask_stride;
}
#endif
}
}
else {
@@ -345,6 +394,9 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if(mask_buf == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565(dsc, src_px_size);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) {
dest_buf_u16[dest_x] = ((src_buf_u8[src_x + 2] & 0xF8) << 8) +
@@ -354,8 +406,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
src_buf_u8 += src_stride;
}
#endif
}
else if(mask_buf == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_OPA
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc, src_px_size);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) {
dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], opa);
@@ -363,8 +419,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
src_buf_u8 += src_stride;
}
#endif
}
if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_MASK
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc, src_px_size);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) {
dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], mask_buf[dest_x]);
@@ -373,8 +433,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_u8 += src_stride;
mask_buf += mask_stride;
}
#endif
}
if(mask_buf && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc, src_px_size);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) {
dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], LV_OPA_MIX2(mask_buf[dest_x], opa));
@@ -383,6 +447,7 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_u8 += src_stride;
mask_buf += mask_stride;
}
#endif
}
}
else {
@@ -445,6 +510,9 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if(mask_buf == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565(dsc);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) {
dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], src_buf_u8[src_x + 3]);
@@ -452,8 +520,12 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
src_buf_u8 += src_stride;
}
#endif
}
else if(mask_buf == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_OPA
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) {
dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], LV_OPA_MIX2(src_buf_u8[src_x + 3],
@@ -462,8 +534,12 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
src_buf_u8 += src_stride;
}
#endif
}
else if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_MASK
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) {
dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x],
@@ -473,8 +549,12 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
src_buf_u8 += src_stride;
mask_buf += mask_stride;
}
#endif
}
else if(mask_buf && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA
LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) {
dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x],
@@ -484,6 +564,7 @@ LV_ATTRIBUTE_FAST_MEM static void argb8888_image_blend(_lv_draw_sw_blend_image_d
src_buf_u8 += src_stride;
mask_buf += mask_stride;
}
#endif
}
}
else {

View File

@@ -1,5 +1,5 @@
/**
* @file lv_draw_sw_blend_rgb888.c
* @file lv_draw_sw_blend_to_rgb888.c
*
*/
@@ -17,7 +17,7 @@
#include "../../../stdlib/lv_string.h"
#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_NEON
#include "neon/lv_blend_to_rgb888_neon.h"
#include "neon/lv_blend_neon.h"
#elif LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_CUSTOM
#include LV_DRAW_SW_ASM_CUSTOM_INCLUDE
#endif
@@ -237,6 +237,9 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if(mask_buf == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size);
#else
for(y = 0; y < h; y++) {
for(src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
dest_buf_u8[dest_x + 2] = (src_buf_c16[src_x].red * 2106) >> 8; /*To make it rounded*/
@@ -246,8 +249,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf_u8 += dest_stride;
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
}
#endif
}
else if(mask_buf == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_OPA
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size);
#else
uint8_t res[3];
for(y = 0; y < h; y++) {
for(src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
@@ -259,8 +266,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf_u8 += dest_stride;
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
}
#endif
}
else if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_MASK
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size);
#else
uint8_t res[3];
for(y = 0; y < h; y++) {
for(src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
@@ -273,8 +284,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
mask_buf += mask_stride;
}
#endif
}
else {
#ifdef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size);
#else
uint8_t res[3];
for(y = 0; y < h; y++) {
for(src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
@@ -287,6 +302,7 @@ LV_ATTRIBUTE_FAST_MEM static void rgb565_image_blend(_lv_draw_sw_blend_image_dsc
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
mask_buf += mask_stride;
}
#endif
}
}
else {
@@ -327,6 +343,9 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
if(dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
/*Special case*/
if(mask_buf == NULL && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size, src_px_size);
#else
if(src_px_size == dest_px_size) {
for(y = 0; y < h; y++) {
lv_memcpy(dest_buf, src_buf, w);
@@ -345,8 +364,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf += src_stride;
}
}
#endif
}
if(mask_buf == NULL && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_OPA
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size, src_px_size);
#else
for(y = 0; y < h; y++) {
for(dest_x = 0, src_x = 0; dest_x < w; dest_x += dest_px_size, src_x += src_px_size) {
lv_color_24_24_mix(&src_buf[src_x], &dest_buf[dest_x], opa);
@@ -354,8 +377,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
dest_buf += dest_stride;
src_buf += src_stride;
}
#endif
}
if(mask_buf && opa >= LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_MASK
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size, src_px_size);
#else
uint32_t mask_x;
for(y = 0; y < h; y++) {
for(mask_x = 0, dest_x = 0, src_x = 0; dest_x < w; mask_x++, dest_x += dest_px_size, src_x += src_px_size) {
@@ -365,8 +392,12 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf += src_stride;
mask_buf += mask_stride;
}
#endif
}
if(mask_buf && opa < LV_OPA_MAX) {
#ifdef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size, src_px_size);
#else
uint32_t mask_x;
for(y = 0; y < h; y++) {
for(mask_x = 0, dest_x = 0, src_x = 0; dest_x < w; mask_x++, dest_x += dest_px_size, src_x += src_px_size) {
@@ -376,6 +407,7 @@ LV_ATTRIBUTE_FAST_MEM static void rgb888_image_blend(_lv_draw_sw_blend_image_dsc
src_buf += src_stride;
mask_buf += mask_stride;
}
#endif
}
}
else {

View File

@@ -0,0 +1,680 @@
/**
* @file lv_blend_to_neon.S
*
*/
#include "lv_blend_neon.h"
#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_NEON
.text
.fpu neon
.arch armv7a
.syntax unified
.altmacro
.p2align 2
@ d0 ~ d3 : src B,G,R,A
@ d4 ~ d7 : dst B,G,R,A
@ q8 : src RGB565 raw
@ q9 : dst RGB565 raw
@ q10 ~ q12: pre-multipled src
@ d26~29 : temp
@ d30 : mask
@ d31 : opa
FG_MASK .req r0
BG_MASK .req r1
DST_ADDR .req r2
DST_W .req r3
DST_H .req r4
DST_STRIDE .req r5
SRC_ADDR .req r6
SRC_STRIDE .req r7
MASK_ADDR .req r8
MASK_STRIDE .req r9
W .req r10
H .req r11
S_8888_L .qn q0
S_8888_H .qn q1
D_8888_L .qn q2
D_8888_H .qn q3
S_B .dn d0
S_G .dn d1
S_R .dn d2
S_A .dn d3
D_B .dn d4
D_G .dn d5
D_R .dn d6
D_A .dn d7
S_565 .qn q8
D_565 .qn q9
S_565_L .dn d16
S_565_H .dn d17
D_565_L .dn d18
D_565_H .dn d19
PREMULT_B .qn q10
PREMULT_G .qn q11
PREMULT_R .qn q12
TMP_Q0 .qn q13
TMP_D0 .dn d26
TMP_D1 .dn d27
TMP_Q1 .qn q14
TMP_D2 .dn d28
TMP_D3 .dn d29
M_A .dn d30
OPA .dn d31
.macro convert reg, bpp, intlv
.if bpp >= 31
.if intlv
vzip.8 reg&_B, reg&_R @ BRBRBRBR GGGGGGGG BRBRBRBR AAAAAAAA
vzip.8 reg&_G, reg&_A @ BRBRBRBR GAGAGAGA BRBRBRBR GAGAGAGA
vzip.8 reg&_R, reg&_A @ BRBRBRBR GAGAGAGA BGRABGRA BGRABGRA
vzip.8 reg&_B, reg&_G @ BGRABGRA BGRABGRA BGRABGRA BGRABGRA
.else
vuzp.8 reg&_B, reg&_G @ BRBRBRBR GAGAGAGA BGRABGRA BGRABGRA
vuzp.8 reg&_R, reg&_A @ BRBRBRBR GAGAGAGA BRBRBRBR GAGAGAGA
vuzp.8 reg&_G, reg&_A @ BRBRBRBR GGGGGGGG BRBRBRBR AAAAAAAA
vuzp.8 reg&_B, reg&_R @ BBBBBBBB GGGGGGGG RRRRRRRR AAAAAAAA
.endif
.elseif bpp == 24
.if intlv @ for init only (same B,G,R for all channel)
vzip.8 reg&_B, reg&_G @ BGBGBGBG BGBGBGBG RRRRRRRR
vzip.16 reg&_B, reg&_R @ BGRRBGRR BGBGBGBG BGRRBGRR
vsli.64 reg&_8888_L, reg&_8888_L, #24 @ BGRBGRRB BGBBGBGB
vsli.64 reg&_B, reg&_G, #48 @ BGRBGRBG
vsri.64 reg&_R, reg&_B, #8 @ GRBGRBGR
vsri.64 reg&_G, reg&_R, #8 @ RBGRBGRB
.endif
.elseif bpp == 16
.if intlv
vshll.u8 reg&_565, reg&_R, #8 @ RRRrrRRR 00000000
vshll.u8 TMP_Q0, reg&_G, #8 @ GGGgggGG 00000000
vshll.u8 TMP_Q1, reg&_B, #8 @ BBBbbBBB 00000000
vsri.16 reg&_565, TMP_Q0, #5 @ RRRrrGGG gggGG000
vsri.16 reg&_565, TMP_Q1, #11 @ RRRrrGGG gggBBBbb
.else
vshr.u8 TMP_Q0, reg&_565, #3 @ 000RRRrr 000gggBB
vshrn.i16 reg&_G, reg&_565, #5 @ rrGGGggg
vshrn.i16 reg&_R, TMP_Q0, #5 @ RRRrr000
vshl.i8 reg&_G, reg&_G, #2 @ GGGggg00
vshl.i16 TMP_Q1, reg&_565, #3 @ rrGGGggg BBBbb000
vsri.8 reg&_R, reg&_R, #5 @ RRRrrRRR
vmovn.i16 reg&_B, TMP_Q1 @ BBBbb000
vsri.8 reg&_G, reg&_G, #6 @ GGGgggGG
vsri.8 reg&_B, reg&_B, #5 @ BBBbbBBB
.endif
.endif
.endm
.macro ldst op, bpp, len, mem, reg, cvt, wb
.if bpp >= 31
.if len == 8
.if cvt
v&op&4.8 {reg&_B, reg&_G, reg&_R, reg&_A}, [mem&_ADDR]&wb
.else
v&op&1.32 {reg&_8888_L, reg&_8888_H}, [mem&_ADDR]&wb
.endif
.else
.if (op == st) && cvt
convert reg, bpp, 1
.endif
.if len == 7
v&op&1.32 {reg&_8888_L}, [mem&_ADDR]!
v&op&1.32 {reg&_R}, [mem&_ADDR]!
v&op&1.32 {reg&_A[0]}, [mem&_ADDR]!
.elseif len == 6
v&op&1.32 {reg&_8888_L}, [mem&_ADDR]!
v&op&1.32 {reg&_R}, [mem&_ADDR]!
.elseif len == 5
v&op&1.32 {reg&_8888_L}, [mem&_ADDR]!
v&op&1.32 {reg&_R[0]}, [mem&_ADDR]!
.elseif len == 4
v&op&1.32 {reg&_8888_L}, [mem&_ADDR]&wb
.elseif len == 3
v&op&1.32 {reg&_B}, [mem&_ADDR]!
v&op&1.32 {reg&_G[0]}, [mem&_ADDR]!
.elseif len == 2
v&op&1.32 {reg&_B}, [mem&_ADDR]&wb
.elseif len == 1
v&op&1.32 {reg&_B[0]}, [mem&_ADDR]&wb
.else
.error "[32bpp]len should be 1~8"
.endif
.if (op == ld) && cvt
convert reg, bpp, 0
.endif
.if (wb&1) && (len != 4) && (len != 2) && (len != 1)
sub mem&_ADDR, #4*len
.endif
.endif
.elseif bpp == 24
.if len == 8
.if cvt
v&op&3.8 {reg&_B, reg&_G, reg&_R}, [mem&_ADDR]&wb
.else
v&op&1.8 {reg&_B, reg&_G, reg&_R}, [mem&_ADDR]&wb
.endif
.elseif (len < 8) && (len > 0)
.if cvt
v&op&3.8 {reg&_B[0], reg&_G[0], reg&_R[0]}, [mem&_ADDR]!
.if len > 1
v&op&3.8 {reg&_B[1], reg&_G[1], reg&_R[1]}, [mem&_ADDR]!
.endif
.if len > 2
v&op&3.8 {reg&_B[2], reg&_G[2], reg&_R[2]}, [mem&_ADDR]!
.endif
.if len > 3
v&op&3.8 {reg&_B[3], reg&_G[3], reg&_R[3]}, [mem&_ADDR]!
.endif
.if len > 4
v&op&3.8 {reg&_B[4], reg&_G[4], reg&_R[4]}, [mem&_ADDR]!
.endif
.if len > 5
v&op&3.8 {reg&_B[5], reg&_G[5], reg&_R[5]}, [mem&_ADDR]!
.endif
.if len > 6
v&op&3.8 {reg&_B[6], reg&_G[6], reg&_R[6]}, [mem&_ADDR]!
.endif
.if wb&1
sub mem&_ADDR, #3*len
.endif
.else
.if len == 7
v&op&1.32 {reg&_8888_L}, [mem&_ADDR]!
v&op&1.32 {reg&_R[0]}, [mem&_ADDR]!
v&op&1.8 {reg&_R[4]}, [mem&_ADDR]!
.elseif len == 6
v&op&1.32 {reg&_8888_L}, [mem&_ADDR]!
v&op&1.16 {reg&_R[0]}, [mem&_ADDR]!
.elseif len == 5
v&op&1.32 {reg&_B}, [mem&_ADDR]!
v&op&1.32 {reg&_G[0]}, [mem&_ADDR]!
v&op&1.16 {reg&_G[2]}, [mem&_ADDR]!
v&op&1.8 {reg&_G[6]}, [mem&_ADDR]!
.elseif len == 4
v&op&1.32 {reg&_B}, [mem&_ADDR]!
v&op&1.32 {reg&_G[0]}, [mem&_ADDR]!
.elseif len == 3
v&op&1.32 {reg&_B}, [mem&_ADDR]!
v&op&1.8 {reg&_G[0]}, [mem&_ADDR]!
.elseif len == 2
v&op&1.32 {reg&_B[0]}, [mem&_ADDR]!
v&op&1.16 {reg&_B[2]}, [mem&_ADDR]!
.elseif len == 1
v&op&1.16 {reg&_B[0]}, [mem&_ADDR]!
v&op&1.8 {reg&_B[2]}, [mem&_ADDR]!
.endif
.if wb&1
sub mem&_ADDR, #3*len
.endif
.endif
.else
.error "[24bpp]len should be 1~8"
.endif
.elseif bpp == 16
.if (op == st) && cvt
convert reg, bpp, 1
.endif
.if len == 8
v&op&1.16 {reg&_565}, [mem&_ADDR]&wb
.elseif len == 7
v&op&1.16 {reg&_565_L}, [mem&_ADDR]!
v&op&1.32 {reg&_565_H[0]}, [mem&_ADDR]!
v&op&1.16 {reg&_565_H[2]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #14
.endif
.elseif len == 6
v&op&1.16 {reg&_565_L}, [mem&_ADDR]!
v&op&1.32 {reg&_565_H[0]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #12
.endif
.elseif len == 5
v&op&1.16 {reg&_565_L}, [mem&_ADDR]!
v&op&1.16 {reg&_565_H[0]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #10
.endif
.elseif len == 4
v&op&1.16 {reg&_565_L}, [mem&_ADDR]&wb
.elseif len == 3
v&op&1.32 {reg&_565_L[0]}, [mem&_ADDR]!
v&op&1.16 {reg&_565_L[2]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #6
.endif
.elseif len == 2
v&op&1.32 {reg&_565_L[0]}, [mem&_ADDR]&wb
.elseif len == 1
v&op&1.16 {reg&_565_L[0]}, [mem&_ADDR]&wb
.else
.error "[16bpp]len should be 1~8"
.endif
.if (op == ld) && cvt
convert reg, bpp, 0
.endif
.elseif bpp == 8
.if len == 8
v&op&1.8 {reg&_A}, [mem&_ADDR]&wb
.elseif len == 7
v&op&1.32 {reg&_A[0]}, [mem&_ADDR]!
v&op&1.16 {reg&_A[2]}, [mem&_ADDR]!
v&op&1.8 {reg&_A[6]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #7
.endif
.elseif len == 6
v&op&1.32 {reg&_A[0]}, [mem&_ADDR]!
v&op&1.16 {reg&_A[2]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #6
.endif
.elseif len == 5
v&op&1.32 {reg&_A[0]}, [mem&_ADDR]!
v&op&1.8 {reg&_A[4]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #5
.endif
.elseif len == 4
v&op&1.32 {reg&_A[0]}, [mem&_ADDR]&wb
.elseif len == 3
v&op&1.16 {reg&_A[0]}, [mem&_ADDR]!
v&op&1.8 {reg&_A[2]}, [mem&_ADDR]!
.if wb&1
sub mem&_ADDR, #3
.endif
.elseif len == 2
v&op&1.16 {reg&_A[0]}, [mem&_ADDR]&wb
.elseif len == 1
v&op&1.8 {reg&_A[0]}, [mem&_ADDR]&wb
.else
.error "[8bpp]len should be 1~8"
.endif
.elseif (bpp == 0) && wb&1
.if len == 8
v&op&3.8 {reg&_B[], reg&_G[], reg&_R[]}, [mem&_ADDR]
.else
.error "[color]len should be 8"
.endif
.endif
.if (op == ld) && cvt && (bpp > 8) && (bpp < 32)
vmov.u8 reg&_A, #0xFF
.endif
.endm
.macro premult alpha
vmull.u8 PREMULT_B, S_B, alpha
vmull.u8 PREMULT_G, S_G, alpha
vmull.u8 PREMULT_R, S_R, alpha
.endm
.macro init src_bpp, dst_bpp, mask, opa
ldr DST_ADDR, [r0, #4]
ldr DST_W, [r0, #8]
ldr DST_H, [r0, #12]
ldr DST_STRIDE, [r0, #16]
ldr SRC_ADDR, [r0, #20]
.if src_bpp > 0
ldr SRC_STRIDE, [r0, #24]
.endif
.if mask
ldr MASK_ADDR, [r0, #28]
ldr MASK_STRIDE, [r0, #32]
sub MASK_STRIDE, MASK_STRIDE, DST_W
.endif
.if opa
vld1.8 {OPA[]}, [r0]
.else
vmov.u8 OPA, #0xFF
.endif
sub DST_STRIDE, DST_STRIDE, DST_W
vmvn D_A, OPA
.if dst_bpp == 16
lsl DST_STRIDE, DST_STRIDE, #1
.elseif dst_bpp == 24
add DST_STRIDE, DST_STRIDE, DST_STRIDE, lsl #1
.elseif dst_bpp >= 31
lsl DST_STRIDE, DST_STRIDE, #2
.endif
.if src_bpp == 0
.if mask || opa
ldst ld, src_bpp, 8, SRC, S, 1
vmov.u8 S_A, #0xFF
premult OPA
.else
ldst ld, src_bpp, 8, SRC, D, 1
vmov.u8 D_A, #0xFF
convert D, dst_bpp, 1
.endif
.else
sub SRC_STRIDE, SRC_STRIDE, DST_W
.if src_bpp == 16
lsl SRC_STRIDE, SRC_STRIDE, #1
.elseif src_bpp == 24
add SRC_STRIDE, SRC_STRIDE, SRC_STRIDE, lsl #1
.elseif src_bpp >= 31
lsl SRC_STRIDE, SRC_STRIDE, #2
.endif
.endif
mvn FG_MASK, #0
mvn BG_MASK, #0
.endm
@ input: M_A = 255 - fg.alpha
.macro calc_alpha len
vmov.u8 TMP_D0, #0xFD
vmvn D_A, D_A
vcge.u8 TMP_D1, S_A, TMP_D0 @ if (fg.alpha >= LV_OPA_MAX
vcge.u8 TMP_D2, D_A, TMP_D0 @ || bg.alpha <= LV_OPA_MIN)
vorr TMP_D2, TMP_D1
vcge.u8 TMP_D3, M_A, TMP_D0 @ elseif (fg.alpha <= LV_OPA_MIN)
vmvn TMP_Q1, TMP_Q1
vshrn.i16 TMP_D0, TMP_Q1, #4
vmov FG_MASK, BG_MASK, TMP_D0
cbz FG_MASK, 99f @ return fg;
vmull.u8 TMP_Q0, M_A, D_A @ D_A = 255 - LV_OPA_MIX2(255 - fg.alpha, 255 - bg.alpha)
vqrshrn.u16 M_A, TMP_Q0, #8
vbif M_A, D_A, TMP_D3 @ insert original D_A when fg.alpha <= LV_OPA_MIN
vmvn D_A, M_A
cbz BG_MASK, 99f @ return bg;
vmov.u8 TMP_D2, #0xFF
vmovl.u8 TMP_Q0, D_A
.if len > 4
vmovl.u16 S_565, TMP_D1
.endif
vmovl.u16 TMP_Q0, TMP_D0
vmull.u8 TMP_Q1, S_A, TMP_D2
vcvt.f32.u32 TMP_Q0, TMP_Q0
.if len > 4
vmovl.u16 D_565, TMP_D3
vcvt.f32.u32 S_565, S_565
.endif
vmovl.u16 TMP_Q1, TMP_D2
vrecpe.f32 TMP_Q0, TMP_Q0
vcvt.f32.u32 TMP_Q1, TMP_Q1
.if len > 4
vcvt.f32.u32 D_565, D_565
vrecpe.f32 S_565, S_565
.endif
vmul.f32 TMP_Q0, TMP_Q0, TMP_Q1
.if len > 4
vmul.f32 S_565, S_565, D_565
.endif
vcvt.u32.f32 TMP_Q0, TMP_Q0
.if len > 4
vcvt.u32.f32 S_565, S_565
.endif
vmovn.u32 TMP_D0, TMP_Q0
.if len > 4
vmovn.u32 TMP_D1, S_565
.endif
vmovn.u16 TMP_D0, TMP_Q0
premult TMP_D0
vmvn M_A, TMP_D0
99:
.endm
.macro blend mode, dst_bpp
.if dst_bpp == 32
vmov TMP_D0, FG_MASK, BG_MASK
vmovl.s8 TMP_Q0, TMP_D0
vsli.8 TMP_Q0, TMP_Q0, #4
cbz FG_MASK, 98f
.endif
.if mode == normal
.if dst_bpp == 32
cbz BG_MASK, 97f
mvns BG_MASK, BG_MASK
beq 96f
vmov S_565_L, D_B
vmov S_565_H, D_G
vmov D_565_L, D_R
.endif
96:
vmlal.u8 PREMULT_B, D_B, M_A
vmlal.u8 PREMULT_G, D_G, M_A
vmlal.u8 PREMULT_R, D_R, M_A
vqrshrn.u16 D_B, PREMULT_B, #8
vqrshrn.u16 D_G, PREMULT_G, #8
vqrshrn.u16 D_R, PREMULT_R, #8
.if dst_bpp == 32
beq 97f
vbif D_B, S_565_L, TMP_D1
vbif D_G, S_565_H, TMP_D1
vbif D_R, D_565_L, TMP_D1
97:
mvns FG_MASK, FG_MASK
beq 99f
.endif
.else
.error "blend mode is unsupported"
.endif
.if dst_bpp == 32
98:
vbif D_B, S_B, TMP_D0
vbif D_G, S_G, TMP_D0
vbif D_R, S_R, TMP_D0
vbif D_A, S_A, TMP_D0
99:
.endif
.endm
.macro process len, src_bpp, dst_bpp, mask, opa, mode
.if (src_bpp < 32) && (mask == 0) && (opa == 0)
@ no blend
.if src_bpp == 0 || src_bpp == dst_bpp
ldst ld, src_bpp, len, SRC, D, 0, !
ldst st, dst_bpp, len, DST, D, 0, !
.else
ldst ld, src_bpp, len, SRC, D, 1, !
ldst st, dst_bpp, len, DST, D, 1, !
.endif
.elseif src_bpp < 32
@ no src_a
.if src_bpp > 0
ldst ld, src_bpp, len, SRC, S, 1, !
.endif
ldst ld, dst_bpp, len, DST, D, 1
.if mask
ldst ld, 8, len, MASK, S, 1, !
.if opa
vmull.u8 TMP_Q0, S_A, OPA
vqrshrn.u16 S_A, TMP_Q0, #8
.endif
vmvn M_A, S_A
.if dst_bpp < 32
premult S_A
.else
calc_alpha len
.endif
.else
vmvn M_A, OPA
.if dst_bpp < 32
premult OPA
.else
vmov S_A, OPA
calc_alpha len
.endif
.endif
blend mode, dst_bpp
ldst st, dst_bpp, len, DST, D, 1, !
.else
@ src_a (+mask) (+opa)
ldst ld, src_bpp, len, SRC, S, 1, !
ldst ld, dst_bpp, len, DST, D, 1
.if mask == 0
.if opa
vmull.u8 TMP_Q0, S_A, OPA
vqrshrn.u16 S_A, TMP_Q0, #8
.endif
.else
ldst ld, 8, len, MASK, M, 1, !
vmull.u8 TMP_Q0, S_A, M_A
vqrshrn.u16 S_A, TMP_Q0, #8
.if opa
vmull.u8 TMP_Q0, S_A, OPA
vqrshrn.u16 S_A, TMP_Q0, #8
.endif
.endif
vmvn M_A, S_A
.if dst_bpp < 32
premult S_A
.else
calc_alpha len
.endif
blend mode, dst_bpp
ldst st, dst_bpp, len, DST, D, 1, !
.endif
.endm
.macro tail src_bpp, dst_bpp, mask, opa, mode
tst DST_W, #4
beq 3f
tst DST_W, #2
beq 5f
tst DST_W, #1
beq 6f
process 7, src_bpp, dst_bpp, mask, opa, mode
b 0f
6:
process 6, src_bpp, dst_bpp, mask, opa, mode
b 0f
5:
tst DST_W, #1
beq 4f
process 5, src_bpp, dst_bpp, mask, opa, mode
b 0f
4:
process 4, src_bpp, dst_bpp, mask, opa, mode
b 0f
3:
tst DST_W, #2
beq 1f
tst DST_W, #1
beq 2f
process 3, src_bpp, dst_bpp, mask, opa, mode
b 0f
2:
process 2, src_bpp, dst_bpp, mask, opa, mode
b 0f
1:
process 1, src_bpp, dst_bpp, mask, opa, mode
0:
.endm
.macro next src_bpp, mask
add DST_ADDR, DST_ADDR, DST_STRIDE
.if src_bpp
add SRC_ADDR, SRC_ADDR, SRC_STRIDE
.endif
.if mask
add MASK_ADDR, MASK_ADDR, MASK_STRIDE
.endif
.endm
.macro enter
push {r4-r11, lr}
.endm
.macro exit
pop {r4-r11, pc}
.endm
.macro preload mem, bpp
.if bpp >= 31
pld [mem&_ADDR, DST_W, lsl #2]
.elseif bpp == 24
add W, DST_W, DST_W, lsl #1
pld [mem&_ADDR, W]
.elseif bpp == 16
pld [mem&_ADDR, DST_W, lsl #1]
.elseif bpp == 8
pld [mem&_ADDR, DST_W]
.endif
.endm
.macro blender src_bpp, dst_bpp, mask, opa, mode
enter
init src_bpp, dst_bpp, mask, opa
movs H, DST_H
beq 0f
preload SRC, src_bpp
.if mask || opa || (src_bpp == 32)
preload DST, dst_bpp
.endif
subs W, DST_W, #8
blt 7f
9:
process 8, src_bpp, dst_bpp, mask, opa, mode
subs W, W, #8
bge 9b
tst DST_W, #7
beq 8f
tail src_bpp, dst_bpp, mask, opa, mode
8:
next src_bpp, mask
preload SRC, src_bpp
.if mask || opa || (src_bpp == 32)
preload DST, dst_bpp
.endif
sub W, DST_W, #8
subs H, H, #1
bgt 9b
exit
7:
tail src_bpp, dst_bpp, mask, opa, mode
next src_bpp, mask
subs H, H, #1
bgt 7b
exit
.endm
.macro export name, src_bpp, dst_bpp, mask, opa, mode
.thumb_func
.func name
.global name
name&:
blender src_bpp, dst_bpp, mask, opa, mode
.endfunc
.endm
.macro export_set src, dst, src_bpp, dst_bpp, mode
.if src == color
export lv_&src&_blend_to_&dst&_neon, src_bpp, dst_bpp, 0, 0, mode
export lv_&src&_blend_to_&dst&_with_opa_neon, src_bpp, dst_bpp, 0, 1, mode
export lv_&src&_blend_to_&dst&_with_mask_neon, src_bpp, dst_bpp, 1, 0, mode
export lv_&src&_blend_to_&dst&_mix_mask_opa_neon, src_bpp, dst_bpp, 1, 1, mode
.else
export lv_&src&_blend_&mode&_to_&dst&_neon, src_bpp, dst_bpp, 0, 0, mode
export lv_&src&_blend_&mode&_to_&dst&_with_opa_neon, src_bpp, dst_bpp, 0, 1, mode
export lv_&src&_blend_&mode&_to_&dst&_with_mask_neon, src_bpp, dst_bpp, 1, 0, mode
export lv_&src&_blend_&mode&_to_&dst&_mix_mask_opa_neon, src_bpp, dst_bpp, 1, 1, mode
.endif
.endm
export_set color, rgb565, 0, 16, normal
export_set rgb565, rgb565, 16, 16, normal
export_set rgb888, rgb565, 24, 16, normal
export_set xrgb8888, rgb565, 31, 16, normal
export_set argb8888, rgb565, 32, 16, normal
export_set color, rgb888, 0, 24, normal
export_set rgb565, rgb888, 16, 24, normal
export_set rgb888, rgb888, 24, 24, normal
export_set xrgb8888, rgb888, 31, 24, normal
export_set argb8888, rgb888, 32, 24, normal
export_set color, xrgb8888, 0, 31, normal
export_set rgb565, xrgb8888, 16, 31, normal
export_set rgb888, xrgb8888, 24, 31, normal
export_set xrgb8888, xrgb8888, 31, 31, normal
export_set argb8888, xrgb8888, 32, 31, normal
export_set color, argb8888, 0, 32, normal
export_set rgb565, argb8888, 16, 32, normal
export_set rgb888, argb8888, 24, 32, normal
export_set xrgb8888, argb8888, 31, 32, normal
export_set argb8888, argb8888, 32, 32, normal
#endif /*LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_NEON*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -55,7 +55,9 @@
* Start parsing lv_conf_template.h
-----------------------------------*/
#ifndef __ASSEMBLY__
#include <stdint.h>
#endif
/*====================
COLOR SETTINGS
@@ -2890,15 +2892,19 @@
* End of parsing lv_conf_template.h
-----------------------------------*/
#ifndef __ASSEMBLY__
LV_EXPORT_CONST_INT(LV_DPI_DEF);
#endif
#undef _LV_KCONFIG_PRESENT
#ifndef __ASSEMBLY__
#if LV_USE_FLOAT
typedef float lv_value_precise_t;
#else
typedef int32_t lv_value_precise_t;
#endif
#endif
/*Set some defines if a dependency is disabled*/
#if LV_USE_LOG == 0

View File

@@ -13,7 +13,10 @@ extern "C" {
/*********************
* INCLUDES
*********************/
#ifndef __ASSEMBLY__
#include <stdint.h>
#endif
/*********************
* DEFINES
@@ -55,6 +58,9 @@ extern "C" {
* TYPEDEFS
**********************/
/* Exclude C enum and struct definitions when included by assembly code */
#ifndef __ASSEMBLY__
/**
* LVGL error codes.
*/
@@ -88,6 +94,8 @@ typedef int32_t lv_intptr_t;
#endif
#endif /*__ASSEMBLY__*/
/**********************
* GLOBAL PROTOTYPES
**********************/