mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 07:17:45 -04:00
various small fixes and optimsations
This commit is contained in:
parent
b90e104fc9
commit
8284ab7107
9 changed files with 39 additions and 27 deletions
8
Makefile
8
Makefile
|
@ -236,6 +236,14 @@ pico/cd/cd_file.o: CFLAGS += -fno-strict-aliasing
|
|||
pico/cd/pcm.o: CFLAGS += -fno-strict-aliasing
|
||||
pico/cd/LC89510.o: CFLAGS += -fno-strict-aliasing
|
||||
pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing
|
||||
ifeq (1,$(use_sh2drc))
|
||||
ifneq (,$(findstring -flto,$(CFLAGS)))
|
||||
# if using the DRC, memory and sh2soc use a global register variable to avoid
|
||||
# saving and reloading the SH2 SR. However, this collides with the use of LTO.
|
||||
pico/32x/memory.o: CFLAGS += -fno-lto
|
||||
pico/32x/sh2soc.o: CFLAGS += -fno-lto
|
||||
endif
|
||||
endif
|
||||
|
||||
# fame needs ~2GB of RAM to compile on gcc 4.8
|
||||
# on x86, this is reduced by ~300MB when debug info is off (but not on ARM)
|
||||
|
|
|
@ -26,7 +26,7 @@ CFLAGS ?=
|
|||
STATIC_LINKING:= 0
|
||||
TARGET_NAME := picodrive
|
||||
LIBM := -lm
|
||||
GIT_VERSION ?= " $(shell git rev-parse --short HEAD || echo unknown)"
|
||||
GIT_VERSION ?= $(shell git rev-parse --short HEAD || echo unknown)
|
||||
ifneq ($(GIT_VERSION)," unknown")
|
||||
CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
|
||||
endif
|
||||
|
@ -427,6 +427,7 @@ else ifeq ($(platform), gcw0)
|
|||
use_fame = 1
|
||||
use_drz80 = 0
|
||||
use_cz80 = 1
|
||||
use_sh2drc = 1
|
||||
|
||||
# Windows
|
||||
else
|
||||
|
|
|
@ -1174,6 +1174,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
|
|||
#define host_arg2reg(rd, arg) \
|
||||
rd = arg
|
||||
|
||||
#define emith_rw_offs_max() 0xff
|
||||
|
||||
/* SH2 drc specific */
|
||||
/* pushes r12 for eabi alignment */
|
||||
#define emith_sh2_drc_entry() \
|
||||
|
|
|
@ -1117,6 +1117,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
|
|||
#define emith_flush() /**/
|
||||
#define host_instructions_updated(base, end) __builtin___clear_cache(base, end)
|
||||
#define emith_jump_patch_size() 8
|
||||
#define emith_rw_offs_max() 0xff
|
||||
|
||||
|
||||
// SH2 drc specific
|
||||
|
|
|
@ -394,7 +394,7 @@ int emith_flg_noV; // V flag known not to be set
|
|||
// NB: for adcf and sbcf, carry-in must be dealt with separately (see there)
|
||||
static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
|
||||
{
|
||||
if (sub && rd == FNZ && rt && rs) // is this cmp_r_r?
|
||||
if (sub && rd == FNZ && rt > AT && rs > AT) // is this cmp_r_r?
|
||||
emith_flg_rs = rs, emith_flg_rt = rt;
|
||||
else emith_flg_rs = emith_flg_rt = 0;
|
||||
|
||||
|
@ -858,7 +858,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
|
|||
// NB: mips32r2 has EXT and INS
|
||||
#define emith_clear_msb(d, s, count) /* bits to clear */ do { \
|
||||
u32 t; \
|
||||
if ((count) > 16) { \
|
||||
if ((count) >= 16) { \
|
||||
t = (count) - 16; \
|
||||
t = 0xffff >> t; \
|
||||
emith_and_r_r_imm(d, s, t); \
|
||||
|
@ -1262,6 +1262,7 @@ static int emith_cond_check(int cond, int *r)
|
|||
// NB: mips32r2 has SYNCI
|
||||
#define host_instructions_updated(base, end) __builtin___clear_cache(base, end)
|
||||
#define emith_jump_patch_size() 4
|
||||
#define emith_rw_offs_max() 0x7fff
|
||||
|
||||
// SH2 drc specific
|
||||
#define emith_sh2_drc_entry() do { \
|
||||
|
|
|
@ -986,6 +986,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
|
||||
#define host_instructions_updated(base, end)
|
||||
|
||||
#define emith_rw_offs_max() 0xffffffff
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#define HOST_REGS 16
|
||||
|
|
|
@ -419,8 +419,8 @@ typedef struct {
|
|||
static int rcache_get_tmp(void);
|
||||
static void rcache_free_tmp(int hr);
|
||||
|
||||
// Note: cache_regs[] must have at least the amount of REG and TEMP registers
|
||||
// used by handlers in worst case (currently 4).
|
||||
// Note: cache_regs[] must have at least the amount of HRF_REG registers used
|
||||
// by handlers in worst case (currently 4).
|
||||
// Register assignment goes by ABI convention. Caller save registers are TEMP,
|
||||
// the others are either static or REG. SR must be static, R0 very recommended.
|
||||
// VBR, PC, PR must not be static (read from context in utils).
|
||||
|
@ -2418,7 +2418,7 @@ static void rcache_init(void)
|
|||
// NB may return either REG or TEMP
|
||||
static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs)
|
||||
{
|
||||
uptr omask = 0xff; // offset mask, XXX: ARM oriented..
|
||||
uptr omask = emith_rw_offs_max(); // offset mask
|
||||
u32 mask = 0;
|
||||
u32 a;
|
||||
int poffs;
|
||||
|
@ -4447,7 +4447,7 @@ end_op:
|
|||
|
||||
static void sh2_generate_utils(void)
|
||||
{
|
||||
int arg0, arg1, arg2, arg3, sr, tmp;
|
||||
int arg0, arg1, arg2, arg3, sr, tmp, tmp2;
|
||||
|
||||
host_arg2reg(arg0, 0);
|
||||
host_arg2reg(arg1, 1);
|
||||
|
@ -4689,18 +4689,18 @@ static void sh2_generate_utils(void)
|
|||
emith_sub_r_imm(tmp, 4*2);
|
||||
rcache_clean();
|
||||
// push SR
|
||||
tmp = rcache_get_reg_arg(0, SHR_SP, NULL);
|
||||
emith_add_r_imm(tmp, 4);
|
||||
tmp = rcache_get_reg_arg(0, SHR_SP,&tmp2);
|
||||
emith_add_r_r_imm(tmp, tmp2, 4);
|
||||
tmp = rcache_get_reg_arg(1, SHR_SR, NULL);
|
||||
emith_clear_msb(tmp, tmp, 22);
|
||||
emith_move_r_r_ptr(arg2, CONTEXT_REG);
|
||||
rcache_invalidate();
|
||||
rcache_invalidate_tmp();
|
||||
emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32?
|
||||
// push PC
|
||||
rcache_get_reg_arg(0, SHR_SP, NULL);
|
||||
emith_ctx_read(arg1, SHR_PC * 4);
|
||||
emith_move_r_r_ptr(arg2, CONTEXT_REG);
|
||||
rcache_invalidate();
|
||||
rcache_invalidate_tmp();
|
||||
emith_call(p32x_sh2_write32);
|
||||
// update I, cycles, do callback
|
||||
emith_ctx_read(arg1, offsetof(SH2, pending_level));
|
||||
|
|
|
@ -197,24 +197,19 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2)
|
|||
// fetch oldest write to address from fifo, but stop when reaching the present
|
||||
idx = sh2_poll_rd[hix];
|
||||
while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) {
|
||||
// int oidx = idx;
|
||||
p = &fifo[idx];
|
||||
idx = (idx+1) % PFIFO_SZ;
|
||||
|
||||
if (CYCLES_GT(cycles, p->cycles+80)) {
|
||||
// drop older fifo stores that may cause synchronisation problems.
|
||||
// NB unfortunately this cycle diff is quite sensitive:
|
||||
// observed in Brutal Unleashed: min 80, observed in Afterburner: max 110
|
||||
sh2_poll_rd[hix] = idx;
|
||||
} else if (p->a == a) {
|
||||
// replace current data with fifo value and discard fifo entry
|
||||
if (cpu != p->cpu) {
|
||||
if (cpu != p->cpu) {
|
||||
if (CYCLES_GT(cycles, p->cycles+80)) {
|
||||
// drop older fifo stores that may cause synchronisation problems.
|
||||
sh2_poll_rd[hix] = idx;
|
||||
} else if (p->a == a) {
|
||||
// replace current data with fifo value and discard fifo entry
|
||||
d = p->d;
|
||||
p->a = -1;
|
||||
// if (oidx == sh2_poll_rd[hix])
|
||||
// sh2_poll_rd[hix] = idx;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return d;
|
||||
|
@ -224,7 +219,6 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2)
|
|||
{
|
||||
int hix = (a >> 1) % PFIFO_CNT;
|
||||
struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix];
|
||||
struct sh2_poll_fifo *p = &fifo[sh2_poll_wr[hix]];
|
||||
struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ];
|
||||
int cpu = sh2 ? sh2->is_slave+1 : 0;
|
||||
|
||||
|
@ -233,15 +227,16 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2)
|
|||
// intermediate values that may cause synchronisation problems.
|
||||
// NB this can take an eternity on m68k: mov.b <addr1.l>,<addr2.l> needs
|
||||
// 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head)
|
||||
if (q->a == a && !CYCLES_GT(cycles,q->cycles+30)) {
|
||||
if (q->a == a && sh2_poll_wr[hix] != sh2_poll_rd[hix] && !CYCLES_GT(cycles,q->cycles+30)) {
|
||||
q->d = d;
|
||||
} else {
|
||||
// store write to poll address in fifo
|
||||
fifo[sh2_poll_wr[hix]] =
|
||||
(struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu };
|
||||
sh2_poll_wr[hix] = (sh2_poll_wr[hix]+1) % PFIFO_SZ;
|
||||
if (sh2_poll_wr[hix] == sh2_poll_rd[hix])
|
||||
// fifo overflow, discard oldest value
|
||||
sh2_poll_rd[hix] = (sh2_poll_rd[hix]+1) % PFIFO_SZ;
|
||||
*p = (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu };
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2369,6 +2364,8 @@ void PicoMemSetup32x(void)
|
|||
|
||||
sh2_drc_mem_setup(&msh2);
|
||||
sh2_drc_mem_setup(&ssh2);
|
||||
memset(sh2_poll_rd, 0, sizeof(sh2_poll_rd));
|
||||
memset(sh2_poll_wr, 0, sizeof(sh2_poll_wr));
|
||||
|
||||
// z80 hack
|
||||
z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1);
|
||||
|
|
|
@ -11,7 +11,7 @@ ENDIAN=
|
|||
# compile with target C compiler and extract value from .rodata section
|
||||
compile_rodata ()
|
||||
{
|
||||
$CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1
|
||||
$CC $CFLAGS -I .. -shared /tmp/getoffs.c -o /tmp/getoffs.o || exit 1
|
||||
# find the name of the .rodata section (in case -fdata-sections is used)
|
||||
rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' |
|
||||
sed 's/^[^.]*././;s/ .*//')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue