mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-04 23:07:46 -04:00
some drawing code C optimisations
This commit is contained in:
parent
f6b4a9ca53
commit
f740428b81
5 changed files with 54 additions and 40 deletions
4
Makefile
4
Makefile
|
@ -5,6 +5,10 @@ CFLAGS += -I.
|
|||
ifeq "$(DEBUG)" "0"
|
||||
CFLAGS += -O3 -DNDEBUG
|
||||
endif
|
||||
ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1"))
|
||||
# very small caches, avoid optimization options making the binary much bigger
|
||||
CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp-clone # -fno-ipa-cp
|
||||
endif
|
||||
|
||||
# This is actually needed, bevieve me.
|
||||
# If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere.
|
||||
|
|
|
@ -29,8 +29,8 @@ assuming $TC points to the appropriate cross compile toolchain directory:
|
|||
|
||||
platform|toolchain|configure command
|
||||
--------|---------|-----------------
|
||||
gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x
|
||||
gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x
|
||||
gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x
|
||||
gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x
|
||||
opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux
|
||||
opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux
|
||||
gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0
|
||||
|
|
|
@ -42,31 +42,42 @@ static void convert_pal555(int invert_prio)
|
|||
const unsigned int m1 = 0x001f; \
|
||||
const unsigned int m2 = 0x03e0; \
|
||||
const unsigned int m3 = 0x7c00; \
|
||||
int i; \
|
||||
\
|
||||
for (i = 320; i > 0; i--, pd++, p32x++, pmd++) { \
|
||||
unsigned short t = *p32x; \
|
||||
if ((*pmd & 0x3f) != mdbg && !((t ^ inv) & 0x8000)) { \
|
||||
pmd_draw_code; \
|
||||
continue; \
|
||||
} \
|
||||
unsigned short t; \
|
||||
int i = 320; \
|
||||
\
|
||||
while (i > 0) { \
|
||||
for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) { \
|
||||
t = *p32x++; \
|
||||
*pd = ((t&m1) << 11) | ((t&m2) << 1) | ((t&m3) >> 10); \
|
||||
} \
|
||||
for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) { \
|
||||
t = *p32x++; \
|
||||
if ((t ^ inv) & 0x8000) \
|
||||
*pd = ((t&m1) << 11) | ((t&m2) << 1) | ((t&m3) >> 10); \
|
||||
else \
|
||||
pmd_draw_code; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// packed pixel mode
|
||||
#define do_line_pp(pd, p32x, pmd, pmd_draw_code) \
|
||||
{ \
|
||||
unsigned short t; \
|
||||
int i; \
|
||||
for (i = 320; i > 0; i--, pd++, p32x++, pmd++) { \
|
||||
t = pal[*(unsigned char *)((uintptr_t)p32x ^ 1)]; \
|
||||
if ((t & 0x20) || (*pmd & 0x3f) == mdbg) \
|
||||
int i = 320; \
|
||||
while (i > 0) { \
|
||||
for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) { \
|
||||
t = pal[*(unsigned char *)((uintptr_t)(p32x++) ^ 1)]; \
|
||||
*pd = t; \
|
||||
} \
|
||||
for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) { \
|
||||
t = pal[*(unsigned char *)((uintptr_t)(p32x++) ^ 1)]; \
|
||||
if (t & 0x20) \
|
||||
*pd = t; \
|
||||
else \
|
||||
pmd_draw_code; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// run length mode
|
||||
|
|
10
pico/draw.c
10
pico/draw.c
|
@ -1341,8 +1341,14 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est)
|
|||
#if 1
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
pd[i] = pal[ps[i]];
|
||||
for (i = len; i > 0; i-=4) {
|
||||
*pd++ = pal[*ps++];
|
||||
*pd++ = pal[*ps++];
|
||||
*pd++ = pal[*ps++];
|
||||
*pd++ = pal[*ps++];
|
||||
}
|
||||
// for (i = 0; i < len; i++)
|
||||
// pd[i] = pal[ps[i]];
|
||||
#else
|
||||
extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
|
||||
extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
|
||||
|
|
|
@ -89,7 +89,8 @@ static const struct in_pdata in_sdl_platform_data = {
|
|||
/* YUV stuff */
|
||||
static int yuv_ry[32], yuv_gy[32], yuv_by[32];
|
||||
static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
|
||||
static int yuv_y[256];
|
||||
static unsigned char yuv_y[256];
|
||||
static struct uyvy { unsigned int y:8; unsigned int vyu:24; } yuv_uyvy[65536];
|
||||
|
||||
void bgr_to_uyvy_init(void)
|
||||
{
|
||||
|
@ -124,34 +125,26 @@ void bgr_to_uyvy_init(void)
|
|||
for (i = 0; i < 256; i++) {
|
||||
yuv_y[i] = 16 + 219 * i / 32;
|
||||
}
|
||||
// everything combined into one large array for speed
|
||||
for (i = 0; i < 65536; i++) {
|
||||
int r = (i >> 11) & 0x1f, g = (i >> 6) & 0x1f, b = (i >> 0) & 0x1f;
|
||||
int y = (yuv_ry[r] + yuv_gy[g] + yuv_by[b]) >> 16;
|
||||
yuv_uyvy[i].y = yuv_y[y];
|
||||
yuv_uyvy[i].vyu = (yuv_v[r-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[b-y + 32];
|
||||
}
|
||||
}
|
||||
|
||||
void rgb565_to_uyvy(void *d, const void *s, int pixels)
|
||||
{
|
||||
unsigned int *dst = d;
|
||||
const unsigned short *src = s;
|
||||
const unsigned char *yu = yuv_u + 32;
|
||||
const unsigned char *yv = yuv_v + 32;
|
||||
int r0, g0, b0, r1, g1, b1;
|
||||
int y0, y1, u, v;
|
||||
|
||||
for (; pixels > 0; src += 2, dst++, pixels -= 2)
|
||||
for (; pixels > 0; src += 4, dst += 2, pixels -= 4)
|
||||
{
|
||||
r0 = (src[0] >> 11) & 0x1f;
|
||||
g0 = (src[0] >> 6) & 0x1f;
|
||||
b0 = src[0] & 0x1f;
|
||||
r1 = (src[1] >> 11) & 0x1f;
|
||||
g1 = (src[1] >> 6) & 0x1f;
|
||||
b1 = src[1] & 0x1f;
|
||||
y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16;
|
||||
y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16;
|
||||
u = yu[b0 - y0];
|
||||
v = yv[r0 - y0];
|
||||
// valid Y range seems to be 16..235
|
||||
y0 = yuv_y[y0];
|
||||
y1 = yuv_y[y1];
|
||||
|
||||
*dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
|
||||
struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1];
|
||||
struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3];
|
||||
dst[0] = (uyvy1->y << 24) | uyvy0->vyu;
|
||||
dst[1] = (uyvy3->y << 24) | uyvy2->vyu;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue