bug fix in comm poll fifo, and back to -O3

This commit is contained in:
kub 2019-08-22 22:57:42 +02:00
parent e5274cc92d
commit f6b4a9ca53
3 changed files with 29 additions and 15 deletions

View file

@ -3,7 +3,7 @@ DEBUG ?= 0
CFLAGS += -Wall -ggdb -ffunction-sections -fdata-sections CFLAGS += -Wall -ggdb -ffunction-sections -fdata-sections
CFLAGS += -I. CFLAGS += -I.
ifeq "$(DEBUG)" "0" ifeq "$(DEBUG)" "0"
CFLAGS += -O2 -finline-functions -DNDEBUG CFLAGS += -O3 -DNDEBUG
endif endif
# This is actually needed, bevieve me. # This is actually needed, bevieve me.

View file

@ -29,11 +29,11 @@ assuming $TC points to the appropriate cross compile toolchain directory:
platform|toolchain|configure command platform|toolchain|configure command
--------|---------|----------------- --------|---------|-----------------
gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common -finline-limit=42" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x
gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x
opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux
opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux
gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0
For gp2x, wiz, and caanoo you may need to compile libpng first, and additionally For gp2x, wiz, and caanoo you may need to compile libpng first, and additionally
cyclone_gp2x.patch may need to be applied to the cpu/cyclone submodule: cyclone_gp2x.patch may need to be applied to the cpu/cyclone submodule:
@ -67,7 +67,7 @@ You need to install the resulting binary onto your device manually.
For opendingux and gcw0, copy the opk to your SD card. For opendingux and gcw0, copy the opk to your SD card.
For gp2x, wiz and caanoo, the easiest way is to unpack For gp2x, wiz and caanoo, the easiest way is to unpack
[PicoDrive_191.zip](http://notaz.gp2x.de/releases/PicoDrive/PicoDrive_191.zip) [PicoDrive_191.zip](http://notaz.gp2x.de/releases/PicoDrive/PicoDrive_191.zip)
on you SD card and replace the PicoDrive binary. on your SD card and replace the PicoDrive binary.
Send bug reports, fixes etc to <derkub@gmail.com> Send bug reports, fixes etc to <derkub@gmail.com>
Kai-Uwe Bloem Kai-Uwe Bloem

View file

@ -191,7 +191,7 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2)
int hix = (a >> 1) % PFIFO_CNT; int hix = (a >> 1) % PFIFO_CNT;
struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix];
struct sh2_poll_fifo *p; struct sh2_poll_fifo *p;
int cpu = sh2 ? sh2->is_slave+1 : 0; int cpu = sh2 ? sh2->is_slave : -1;
unsigned idx; unsigned idx;
a &= ~0x20000000; // ignore writethrough bit a &= ~0x20000000; // ignore writethrough bit
@ -204,7 +204,7 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2)
if (cpu != p->cpu) { if (cpu != p->cpu) {
if (CYCLES_GT(cycles, p->cycles+80)) { if (CYCLES_GT(cycles, p->cycles+80)) {
// drop older fifo stores that may cause synchronisation problems. // drop older fifo stores that may cause synchronisation problems.
sh2_poll_rd[hix] = idx; p->a = -1;
} else if (p->a == a) { } else if (p->a == a) {
// replace current data with fifo value and discard fifo entry // replace current data with fifo value and discard fifo entry
d = p->d; d = p->d;
@ -221,24 +221,37 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2)
int hix = (a >> 1) % PFIFO_CNT; int hix = (a >> 1) % PFIFO_CNT;
struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix];
struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ];
int cpu = sh2 ? sh2->is_slave+1 : 0; int cpu = sh2 ? sh2->is_slave : -1;
unsigned rd = sh2_poll_rd[hix], wr = sh2_poll_wr[hix];
unsigned idx, nrd;
a &= ~0x20000000; // ignore writethrough bit a &= ~0x20000000; // ignore writethrough bit
// throw out any values written by other cpus, plus heading cancelled stuff
for (idx = nrd = wr; idx != rd; ) {
idx = (idx-1) % PFIFO_SZ;
if (fifo[idx].a == a && fifo[idx].cpu != cpu) { fifo[idx].a = -1; }
if (fifo[idx].a != -1) { nrd = idx; }
}
rd = nrd;
// fold 2 consecutive writes to the same address to avoid reading of // fold 2 consecutive writes to the same address to avoid reading of
// intermediate values that may cause synchronisation problems. // intermediate values that may cause synchronisation problems.
// NB this can take an eternity on m68k: mov.b <addr1.l>,<addr2.l> needs // NB this can take an eternity on m68k: mov.b <addr1.l>,<addr2.l> needs
// 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head)
if (q->a == a && sh2_poll_wr[hix] != sh2_poll_rd[hix] && !CYCLES_GT(cycles,q->cycles+30)) { if (q->a == a && rd != wr && !CYCLES_GT(cycles,q->cycles+30)) {
q->d = d; q->d = d;
} else { } else {
// store write to poll address in fifo // store write to poll address in fifo
fifo[sh2_poll_wr[hix]] = fifo[wr] =
(struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu }; (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu };
sh2_poll_wr[hix] = (sh2_poll_wr[hix]+1) % PFIFO_SZ; wr = (wr+1) % PFIFO_SZ;
if (sh2_poll_wr[hix] == sh2_poll_rd[hix]) if (wr == rd)
// fifo overflow, discard oldest value // fifo overflow, discard oldest value
sh2_poll_rd[hix] = (sh2_poll_rd[hix]+1) % PFIFO_SZ; rd = (rd+1) % PFIFO_SZ;
} }
sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr;
} }
u32 REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, u32 d, SH2 *sh2) u32 REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, u32 d, SH2 *sh2)
@ -2367,6 +2380,7 @@ void PicoMemSetup32x(void)
sh2_drc_mem_setup(&ssh2); sh2_drc_mem_setup(&ssh2);
memset(sh2_poll_rd, 0, sizeof(sh2_poll_rd)); memset(sh2_poll_rd, 0, sizeof(sh2_poll_rd));
memset(sh2_poll_wr, 0, sizeof(sh2_poll_wr)); memset(sh2_poll_wr, 0, sizeof(sh2_poll_wr));
memset(sh2_poll_fifo, -1, sizeof(sh2_poll_fifo));
// z80 hack // z80 hack
z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1); z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1);