mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-04 23:07:46 -04:00
minor changes
This commit is contained in:
parent
b1a047c926
commit
32feba7458
9 changed files with 151 additions and 78 deletions
|
@ -194,11 +194,11 @@ void PicoPower32x(void)
|
||||||
|
|
||||||
void PicoUnload32x(void)
|
void PicoUnload32x(void)
|
||||||
{
|
{
|
||||||
|
sh2_finish(&msh2);
|
||||||
|
sh2_finish(&ssh2);
|
||||||
if (Pico32xMem != NULL)
|
if (Pico32xMem != NULL)
|
||||||
plat_munmap(Pico32xMem, sizeof(*Pico32xMem));
|
plat_munmap(Pico32xMem, sizeof(*Pico32xMem));
|
||||||
Pico32xMem = NULL;
|
Pico32xMem = NULL;
|
||||||
sh2_finish(&msh2);
|
|
||||||
sh2_finish(&ssh2);
|
|
||||||
|
|
||||||
PicoIn.AHW &= ~PAHW_32X;
|
PicoIn.AHW &= ~PAHW_32X;
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,19 +87,19 @@ cyclone_fetch32:
|
||||||
orrcc r0, r1, r0, lsl #16
|
orrcc r0, r1, r0, lsl #16
|
||||||
bxcc lr
|
bxcc lr
|
||||||
|
|
||||||
stmfd sp!,{r0,r1,lr}
|
stmfd sp!,{r0,r1,r2,lr}
|
||||||
mov lr, pc
|
mov lr, pc
|
||||||
bx r1
|
bx r1
|
||||||
mov r2, r0, lsl #16
|
mov r2, r0, lsl #16
|
||||||
ldmia sp, {r0,r1}
|
ldmfd sp!, {r0,r1}
|
||||||
str r2, [sp]
|
str r2, [sp]
|
||||||
add r0, r0, #2
|
add r0, r0, #2
|
||||||
mov lr, pc
|
mov lr, pc
|
||||||
bx r1
|
bx r1
|
||||||
ldr r1, [sp]
|
ldmfd sp!, {r1,lr}
|
||||||
mov r0, r0, lsl #16
|
mov r0, r0, lsl #16
|
||||||
orr r0, r1, r0, lsr #16
|
orr r0, r1, r0, lsr #16
|
||||||
ldmfd sp!,{r1,r2,pc}
|
bx lr
|
||||||
|
|
||||||
|
|
||||||
cyclone_write8: @ u32 a, u8 d
|
cyclone_write8: @ u32 a, u8 d
|
||||||
|
|
|
@ -241,7 +241,7 @@ extern SH2 sh2s[2];
|
||||||
# define sh2_pc(sh2) (sh2)->pc
|
# define sh2_pc(sh2) (sh2)->pc
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define sh2_cycles_done(sh2) ((unsigned)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))
|
#define sh2_cycles_done(sh2) (unsigned)((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))
|
||||||
#define sh2_cycles_done_t(sh2) \
|
#define sh2_cycles_done_t(sh2) \
|
||||||
(unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2))
|
(unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2))
|
||||||
#define sh2_cycles_done_m68k(sh2) \
|
#define sh2_cycles_done_m68k(sh2) \
|
||||||
|
@ -650,6 +650,7 @@ PICO_INTERNAL void PicoFrameStart(void);
|
||||||
void PicoDrawSync(int to, int blank_last_line);
|
void PicoDrawSync(int to, int blank_last_line);
|
||||||
void BackFill(int reg7, int sh, struct PicoEState *est);
|
void BackFill(int reg7, int sh, struct PicoEState *est);
|
||||||
void FinalizeLine555(int sh, int line, struct PicoEState *est);
|
void FinalizeLine555(int sh, int line, struct PicoEState *est);
|
||||||
|
void PicoDrawSetOutBufMD(void *dest, int increment);
|
||||||
extern int (*PicoScanBegin)(unsigned int num);
|
extern int (*PicoScanBegin)(unsigned int num);
|
||||||
extern int (*PicoScanEnd)(unsigned int num);
|
extern int (*PicoScanEnd)(unsigned int num);
|
||||||
#define MAX_LINE_SPRITES 29
|
#define MAX_LINE_SPRITES 29
|
||||||
|
|
|
@ -46,8 +46,8 @@ static void vdp_data_write(unsigned char d)
|
||||||
struct PicoVideo *pv = &Pico.video;
|
struct PicoVideo *pv = &Pico.video;
|
||||||
|
|
||||||
if (pv->type == 3) {
|
if (pv->type == 3) {
|
||||||
|
if (PicoMem.cram[pv->addr & 0x1f] != d) Pico.m.dirtyPal = 1;
|
||||||
PicoMem.cram[pv->addr & 0x1f] = d;
|
PicoMem.cram[pv->addr & 0x1f] = d;
|
||||||
Pico.m.dirtyPal = 1;
|
|
||||||
} else {
|
} else {
|
||||||
PicoMem.vramb[pv->addr] = d;
|
PicoMem.vramb[pv->addr] = d;
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,70 +53,5 @@ void *memmove (void *dest, const void *src, size_t n)
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
/* memcpy/memmove in C with some simple optimizations.
|
#include "../memcpy.c"
|
||||||
* ATTN does dirty aliasing tricks with undefined behaviour by standard.
|
|
||||||
* (this works fine with gcc, though...)
|
|
||||||
*/
|
|
||||||
void *memcpy(void *dest, const void *src, size_t n)
|
|
||||||
{
|
|
||||||
struct _16 { uint32_t a[4]; };
|
|
||||||
union { const void *v; char *c; uint64_t *l; struct _16 *s; }
|
|
||||||
ss = { src }, ds = { dest };
|
|
||||||
const int lm = sizeof(uint32_t)-1;
|
|
||||||
|
|
||||||
if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) {
|
|
||||||
/* fast copy if pointers have the same aligment */
|
|
||||||
while (((unsigned)ss.c & lm) && n > 0) /* align to word */
|
|
||||||
*ds.c++ = *ss.c++, n--;
|
|
||||||
while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */
|
|
||||||
*ds.s++ = *ss.s++, n -= sizeof(struct _16);
|
|
||||||
if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */
|
|
||||||
*ds.l++ = *ss.l++, n -= sizeof(uint64_t);
|
|
||||||
} else {
|
|
||||||
/* byte copy if pointers are unaligned */
|
|
||||||
while (n >= 8) { /* copy 8 byte blocks */
|
|
||||||
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
|
|
||||||
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
|
|
||||||
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
|
|
||||||
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* copy max. 8 leftover bytes */
|
|
||||||
while (n > 0)
|
|
||||||
*ds.c++ = *ss.c++, n--;
|
|
||||||
return dest;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *memmove (void *dest, const void *src, size_t n)
|
|
||||||
{
|
|
||||||
struct _16 { uint32_t a[4]; };
|
|
||||||
union { const void *v; char *c; uint64_t *l; struct _16 *s; }
|
|
||||||
ss = { src+n }, ds = { dest+n };
|
|
||||||
const int lm = sizeof(uint32_t)-1;
|
|
||||||
|
|
||||||
if (dest <= src || dest >= src+n)
|
|
||||||
return memcpy(dest, src, n);
|
|
||||||
|
|
||||||
if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) {
|
|
||||||
/* fast copy if pointers have the same aligment */
|
|
||||||
while (((unsigned)ss.c & lm) && n > 0)
|
|
||||||
*--ds.c = *--ss.c, n--;
|
|
||||||
while (n >= sizeof(struct _16))
|
|
||||||
*--ds.s = *--ss.s, n -= sizeof(struct _16);
|
|
||||||
if (n >= sizeof(uint64_t))
|
|
||||||
*--ds.l = *--ss.l, n -= sizeof(uint64_t);
|
|
||||||
} else {
|
|
||||||
/* byte copy if pointers are unaligned */
|
|
||||||
while (n >= 8) {
|
|
||||||
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
|
|
||||||
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
|
|
||||||
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
|
|
||||||
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* copy max. 8 leftover bytes */
|
|
||||||
while (n > 0)
|
|
||||||
*--ds.c = *--ss.c, n--;
|
|
||||||
return dest;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
125
platform/common/memcpy.c
Normal file
125
platform/common/memcpy.c
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
/*
|
||||||
|
* (C) 2018 Kai-Uwe Bloem <derkub@gmail.com>
|
||||||
|
*
|
||||||
|
* 32bit ARM/MIPS optimized C implementation of memcpy and memove, designed for
|
||||||
|
* good performance with gcc.
|
||||||
|
* - if src and dest have the same alignment, 4-word copy is used.
|
||||||
|
* - if src and dest are unaligned to each other, still loads word data and
|
||||||
|
* stores correctly shifted word data (for all but the first and last bytes
|
||||||
|
* to avoid under/overstepping the src region).
|
||||||
|
*
|
||||||
|
* ATTN does dirty aliasing tricks with undefined behaviour by standard.
|
||||||
|
* (however, this was needed to improve the generated code).
|
||||||
|
* ATTN uses struct assignment, which only works if the compiler is inlining
|
||||||
|
* this (else it would probably call memcpy :-)).
|
||||||
|
*/
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <endian.h>
|
||||||
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||||
|
#define _L_ >>
|
||||||
|
#define _U_ <<
|
||||||
|
#else
|
||||||
|
#define _L_ <<
|
||||||
|
#define _U_ >>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void *memcpy(void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
struct _16 { uint32_t a[4]; };
|
||||||
|
union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; }
|
||||||
|
ss = { src }, ds = { dest };
|
||||||
|
const int lm = sizeof(uint32_t)-1;
|
||||||
|
|
||||||
|
/* align src to word */
|
||||||
|
while (((unsigned)ss.c & lm) && n > 0)
|
||||||
|
*ds.c++ = *ss.c++, n--;
|
||||||
|
if (((unsigned)ds.c & lm) == 0) {
|
||||||
|
/* fast copy if pointers have the same aligment */
|
||||||
|
while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */
|
||||||
|
*ds.s++ = *ss.s++, n -= sizeof(struct _16);
|
||||||
|
if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */
|
||||||
|
*ds.l++ = *ss.l++, n -= sizeof(uint64_t);
|
||||||
|
} else if (n >= 2*sizeof(uint32_t)) {
|
||||||
|
/* unaligned data big enough to avoid overstepping src */
|
||||||
|
uint32_t v1, v2, b, s;
|
||||||
|
/* align dest to word */
|
||||||
|
while (((unsigned)ds.c & lm) && n > 0)
|
||||||
|
*ds.c++ = *ss.c++, n--;
|
||||||
|
/* copy loop: load aligned words and store shifted words */
|
||||||
|
b = (unsigned)ss.c & lm, s = b*8; ss.c -= b;
|
||||||
|
v1 = *ss.i++, v2 = *ss.i++;
|
||||||
|
while (n >= 3*sizeof(uint32_t)) {
|
||||||
|
*ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++;
|
||||||
|
*ds.i++ = (v2 _L_ s) | (v1 _U_ (32-s)); v2 = *ss.i++;
|
||||||
|
n -= 2*sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
/* data for one more store is already loaded */
|
||||||
|
if (n >= sizeof(uint32_t)) {
|
||||||
|
*ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s));
|
||||||
|
n -= sizeof(uint32_t);
|
||||||
|
ss.c += sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
ss.c += b - 2*sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
/* copy 0-7 leftover bytes */
|
||||||
|
while (n >= 4) {
|
||||||
|
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
|
||||||
|
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
|
||||||
|
}
|
||||||
|
while (n > 0)
|
||||||
|
*ds.c++ = *ss.c++, n--;
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *memmove (void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
struct _16 { uint32_t a[4]; };
|
||||||
|
union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; }
|
||||||
|
ss = { src+n }, ds = { dest+n };
|
||||||
|
const int lm = sizeof(uint32_t)-1;
|
||||||
|
|
||||||
|
if (dest <= src || dest >= src+n)
|
||||||
|
return memcpy(dest, src, n);
|
||||||
|
|
||||||
|
/* align src to word */
|
||||||
|
while (((unsigned)ss.c & lm) && n > 0)
|
||||||
|
*--ds.c = *--ss.c, n--;
|
||||||
|
if (((unsigned)ds.c & lm) == 0) {
|
||||||
|
/* fast copy if pointers have the same aligment */
|
||||||
|
while (n >= sizeof(struct _16)) /* copy 16 byte blocks */
|
||||||
|
*--ds.s = *--ss.s, n -= sizeof(struct _16);
|
||||||
|
if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */
|
||||||
|
*--ds.l = *--ss.l, n -= sizeof(uint64_t);
|
||||||
|
} else if (n >= 2*sizeof(uint32_t)) {
|
||||||
|
/* unaligned data big enough to avoid understepping src */
|
||||||
|
uint32_t v1, v2, b, s;
|
||||||
|
/* align dest to word */
|
||||||
|
while (((unsigned)ds.c & lm) && n > 0)
|
||||||
|
*--ds.c = *--ss.c, n--;
|
||||||
|
/* copy loop: load aligned words and store shifted words */
|
||||||
|
b = (unsigned)ss.c & lm, s = b*8; ss.c += b;
|
||||||
|
v1 = *--ss.i, v2 = *--ss.i;
|
||||||
|
while (n >= 3*sizeof(uint32_t)) {
|
||||||
|
*--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i;
|
||||||
|
*--ds.i = (v2 _U_ s) | (v1 _L_ (32-s)); v2 = *--ss.i;
|
||||||
|
n -= 2*sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
/* data for one more store is already loaded */
|
||||||
|
if (n >= sizeof(uint32_t)) {
|
||||||
|
*--ds.i = (v1 _U_ s) | (v2 _L_ (32-s));
|
||||||
|
n -= sizeof(uint32_t);
|
||||||
|
ss.c -= sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
ss.c -= b - 2*sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
/* copy 0-7 leftover bytes */
|
||||||
|
while (n >= 4) {
|
||||||
|
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
|
||||||
|
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
|
||||||
|
}
|
||||||
|
while (n > 0)
|
||||||
|
*--ds.c = *--ss.c, n--;
|
||||||
|
return dest;
|
||||||
|
}
|
|
@ -89,6 +89,7 @@ static const struct in_pdata in_sdl_platform_data = {
|
||||||
/* YUV stuff */
|
/* YUV stuff */
|
||||||
static int yuv_ry[32], yuv_gy[32], yuv_by[32];
|
static int yuv_ry[32], yuv_gy[32], yuv_by[32];
|
||||||
static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
|
static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
|
||||||
|
static int yuv_y[256];
|
||||||
|
|
||||||
void bgr_to_uyvy_init(void)
|
void bgr_to_uyvy_init(void)
|
||||||
{
|
{
|
||||||
|
@ -119,6 +120,10 @@ void bgr_to_uyvy_init(void)
|
||||||
v = 255;
|
v = 255;
|
||||||
yuv_v[i + 32] = v;
|
yuv_v[i + 32] = v;
|
||||||
}
|
}
|
||||||
|
// valid Y range seems to be 16..235
|
||||||
|
for (i = 0; i < 256; i++) {
|
||||||
|
yuv_y[i] = 16 + 219 * i / 32;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void rgb565_to_uyvy(void *d, const void *s, int pixels)
|
void rgb565_to_uyvy(void *d, const void *s, int pixels)
|
||||||
|
@ -143,8 +148,8 @@ void rgb565_to_uyvy(void *d, const void *s, int pixels)
|
||||||
u = yu[b0 - y0];
|
u = yu[b0 - y0];
|
||||||
v = yv[r0 - y0];
|
v = yv[r0 - y0];
|
||||||
// valid Y range seems to be 16..235
|
// valid Y range seems to be 16..235
|
||||||
y0 = 16 + 219 * y0 / 31;
|
y0 = yuv_y[y0];
|
||||||
y1 = 16 + 219 * y1 / 31;
|
y1 = yuv_y[y1];
|
||||||
|
|
||||||
*dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
|
*dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
#define VERSION "1.93"
|
#define VERSION "1.93+"
|
||||||
|
|
|
@ -1,16 +1,21 @@
|
||||||
# usage: mkoffsets <output dir>
|
|
||||||
# automatically compute structure offsets for gcc targets in ELF format
|
# automatically compute structure offsets for gcc targets in ELF format
|
||||||
|
# (C) 2018 Kai-Uwe Bloem. This work is placed in the public domain.
|
||||||
|
#
|
||||||
|
# usage: mkoffsets <output dir>
|
||||||
|
|
||||||
CC=${CC:-gcc}
|
CC=${CC:-gcc}
|
||||||
|
|
||||||
# endianess of target (automagically determined below)
|
# endianess of target (automagically determined below)
|
||||||
ENDIAN=
|
ENDIAN=
|
||||||
|
|
||||||
|
# compile with target C compiler and extract value from .rodata section
|
||||||
compile_rodata ()
|
compile_rodata ()
|
||||||
{
|
{
|
||||||
$CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1
|
$CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1
|
||||||
|
# find the name of the .rodata section (in case -fdata-sections is used)
|
||||||
rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' |
|
rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' |
|
||||||
sed 's/^[^.]*././;s/ .*//')
|
sed 's/^[^.]*././;s/ .*//')
|
||||||
|
# read out .rodata section as hex string (should be only 4 or 8 bytes)
|
||||||
objcopy --dump-section $rosect=/tmp/getoffs.ro /tmp/getoffs.o || exit 1
|
objcopy --dump-section $rosect=/tmp/getoffs.ro /tmp/getoffs.o || exit 1
|
||||||
ro=$(xxd -ps /tmp/getoffs.ro)
|
ro=$(xxd -ps /tmp/getoffs.ro)
|
||||||
if [ "$ENDIAN" = "le" ]; then
|
if [ "$ENDIAN" = "le" ]; then
|
||||||
|
@ -22,9 +27,11 @@ compile_rodata ()
|
||||||
else
|
else
|
||||||
hex=$ro
|
hex=$ro
|
||||||
fi
|
fi
|
||||||
|
# extract decimal value from hex string
|
||||||
rodata=$(printf "%d" 0x$hex)
|
rodata=$(printf "%d" 0x$hex)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# determine member offset and create #define
|
||||||
get_define () # prefix struct member member...
|
get_define () # prefix struct member member...
|
||||||
{
|
{
|
||||||
prefix=$1; shift
|
prefix=$1; shift
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue