mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 07:17:45 -04:00
sh2 drc: fix speed regression
This commit is contained in:
parent
90b1c9db91
commit
a5e51c16e6
8 changed files with 35 additions and 18 deletions
|
@ -20,7 +20,7 @@
|
||||||
#define TEMPORARY_REGS { 12, 14 }
|
#define TEMPORARY_REGS { 12, 14 }
|
||||||
|
|
||||||
#define CONTEXT_REG 11
|
#define CONTEXT_REG 11
|
||||||
#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R0,8 , SHR_R0+1,9 }
|
#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R(0),8 , SHR_R(1),9 }
|
||||||
|
|
||||||
// XXX: tcache_ptr type for SVP and SH2 compilers differs..
|
// XXX: tcache_ptr type for SVP and SH2 compilers differs..
|
||||||
#define EMIT_PTR(ptr, x) \
|
#define EMIT_PTR(ptr, x) \
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
#define TEMPORARY_REGS { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }
|
#define TEMPORARY_REGS { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }
|
||||||
|
|
||||||
#define CONTEXT_REG 29
|
#define CONTEXT_REG 29
|
||||||
#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R0,27 , SHR_R0+1,26 }
|
#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R(0),27 , SHR_R(1),26 }
|
||||||
|
|
||||||
// R31 doesn't exist, it aliases either with zero or SP
|
// R31 doesn't exist, it aliases either with zero or SP
|
||||||
#define SP 31 // stack pointer
|
#define SP 31 // stack pointer
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
#define TEMPORARY_REGS { 2, 3, 8, 9, 10, 11, 12, 13, 14 } // v0-v1,t0-t6
|
#define TEMPORARY_REGS { 2, 3, 8, 9, 10, 11, 12, 13, 14 } // v0-v1,t0-t6
|
||||||
|
|
||||||
#define CONTEXT_REG 23 // s7
|
#define CONTEXT_REG 23 // s7
|
||||||
#define STATIC_SH2_REGS { SHR_SR,22 , SHR_R0,21 , SHR_R0+1,20 }
|
#define STATIC_SH2_REGS { SHR_SR,22 , SHR_R(0),21 , SHR_R(1),20 }
|
||||||
|
|
||||||
// NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset
|
// NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset
|
||||||
#ifndef __mips_isa_rev
|
#ifndef __mips_isa_rev
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
#define TEMPORARY_REGS { 5, 6, 7 } // t0-t2
|
#define TEMPORARY_REGS { 5, 6, 7 } // t0-t2
|
||||||
|
|
||||||
#define CONTEXT_REG 9 // s1
|
#define CONTEXT_REG 9 // s1
|
||||||
#define STATIC_SH2_REGS { SHR_SR,27 , SHR_R0,26 , SHR_R0+1,25 }
|
#define STATIC_SH2_REGS { SHR_SR,27 , SHR_R(0),26 , SHR_R(1),25 }
|
||||||
|
|
||||||
// registers usable for user code: r1-r25, others reserved or special
|
// registers usable for user code: r1-r25, others reserved or special
|
||||||
#define Z0 0 // zero register
|
#define Z0 0 // zero register
|
||||||
|
|
|
@ -1072,7 +1072,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
||||||
#define PARAM_REGS { xCX, xDX, xR8, xR9 }
|
#define PARAM_REGS { xCX, xDX, xR8, xR9 }
|
||||||
#define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP }
|
#define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP }
|
||||||
#define TEMPORARY_REGS { xAX, xR10, xR11 }
|
#define TEMPORARY_REGS { xAX, xR10, xR11 }
|
||||||
#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R0,xR15 , SH2_R0+1,xR14 }
|
#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R(0),xR15 , SH2_R(1),xR14 }
|
||||||
|
|
||||||
#define host_arg2reg(rd, arg) \
|
#define host_arg2reg(rd, arg) \
|
||||||
switch (arg) { \
|
switch (arg) { \
|
||||||
|
|
|
@ -33,34 +33,50 @@ unsigned short scan_block(uint32_t base_pc, int is_slave,
|
||||||
uint32_t *base_literals, uint32_t *end_literals);
|
uint32_t *base_literals, uint32_t *end_literals);
|
||||||
|
|
||||||
#if defined(DRC_SH2) && defined(__GNUC__)
|
#if defined(DRC_SH2) && defined(__GNUC__)
|
||||||
// direct access to some host CPU registers used by the DRC
|
// direct access to some host CPU registers used by the DRC if gcc is used.
|
||||||
// XXX MUST match definitions for SHR_SR in cpu/drc/emit_*.c
|
// XXX MUST match SHR_SR definitions in cpu/drc/emit_*.c; should be moved there
|
||||||
|
// XXX yuck, there's no portable way to determine register size. Use long long
|
||||||
|
// if target is 64 bit and data model is ILP32 or LLP64(windows), else long
|
||||||
#if defined(__arm__)
|
#if defined(__arm__)
|
||||||
#define DRC_SR_REG "r10"
|
#define DRC_SR_REG "r10"
|
||||||
|
#define DRC_REG_LL 0 // 32 bit
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
#define DRC_SR_REG "r28"
|
#define DRC_SR_REG "r28"
|
||||||
|
#define DRC_REG_LL (__ILP32__ || _WIN32)
|
||||||
#elif defined(__mips__)
|
#elif defined(__mips__)
|
||||||
#define DRC_SR_REG "s6"
|
#define DRC_SR_REG "s6"
|
||||||
|
#define DRC_REG_LL (_MIPS_SIM == _ABIN32)
|
||||||
#elif defined(__riscv__) || defined(__riscv)
|
#elif defined(__riscv__) || defined(__riscv)
|
||||||
#define DRC_SR_REG "s11"
|
#define DRC_SR_REG "s11"
|
||||||
|
#define DRC_REG_LL 0 // no ABI for (__ILP32__ && __riscv_xlen != 32)
|
||||||
#elif defined(__i386__)
|
#elif defined(__i386__)
|
||||||
#define DRC_SR_REG "edi"
|
#define DRC_SR_REG "edi"
|
||||||
|
#define DRC_REG_LL 0 // 32 bit
|
||||||
#elif defined(__x86_64__)
|
#elif defined(__x86_64__)
|
||||||
#define DRC_SR_REG "ebx"
|
#define DRC_SR_REG "rbx"
|
||||||
|
#define DRC_REG_LL (__ILP32__ || _WIN32)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DRC_SR_REG
|
#ifdef DRC_SR_REG
|
||||||
|
// XXX this is more clear but produces too much overhead for slow platforms
|
||||||
extern void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2);
|
extern void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2);
|
||||||
extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2);
|
extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2);
|
||||||
|
|
||||||
#define DRC_DECLARE_SR register int32_t sh2_sr asm(DRC_SR_REG)
|
// NB: sh2_sr MUST have register size if optimizing with -O3 (-fif-conversion)
|
||||||
|
#if DRC_REG_LL
|
||||||
|
#define DRC_DECLARE_SR register long long _sh2_sr asm(DRC_SR_REG)
|
||||||
|
#else
|
||||||
|
#define DRC_DECLARE_SR register long _sh2_sr asm(DRC_SR_REG)
|
||||||
|
#endif
|
||||||
#define DRC_SAVE_SR(sh2) \
|
#define DRC_SAVE_SR(sh2) \
|
||||||
if (likely((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \
|
if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \
|
||||||
sh2_drc_save_sr(sh2)
|
sh2->sr = (s32)_sh2_sr
|
||||||
|
// sh2_drc_save_sr(sh2)
|
||||||
#define DRC_RESTORE_SR(sh2) \
|
#define DRC_RESTORE_SR(sh2) \
|
||||||
if (likely((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \
|
if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \
|
||||||
sh2_drc_restore_sr(sh2)
|
_sh2_sr = (s32)sh2->sr
|
||||||
|
// sh2_drc_restore_sr(sh2)
|
||||||
#else
|
#else
|
||||||
#define DRC_DECLARE_SR
|
#define DRC_DECLARE_SR
|
||||||
#define DRC_SAVE_SR(sh2)
|
#define DRC_SAVE_SR(sh2)
|
||||||
|
|
|
@ -10,6 +10,7 @@ typedef enum {
|
||||||
SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL,
|
SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL,
|
||||||
SH2_REGS // register set size
|
SH2_REGS // register set size
|
||||||
} sh2_reg_e;
|
} sh2_reg_e;
|
||||||
|
#define SHR_R(n) (SHR_R0+(n))
|
||||||
|
|
||||||
typedef struct SH2_
|
typedef struct SH2_
|
||||||
{
|
{
|
||||||
|
|
|
@ -254,14 +254,14 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2)
|
||||||
sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr;
|
sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, u32 d, SH2 *sh2)
|
u32 REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2)
|
||||||
{
|
{
|
||||||
int shift = (a & 1 ? 0 : 8);
|
int shift = (a & 1 ? 0 : 8);
|
||||||
d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift);
|
d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift);
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2)
|
u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2)
|
||||||
{
|
{
|
||||||
unsigned char *p = sh2->p_drcblk_ram;
|
unsigned char *p = sh2->p_drcblk_ram;
|
||||||
unsigned int cycles;
|
unsigned int cycles;
|
||||||
|
@ -281,7 +281,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2)
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2)
|
u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2)
|
||||||
{
|
{
|
||||||
unsigned char *p = sh2->p_drcblk_ram;
|
unsigned char *p = sh2->p_drcblk_ram;
|
||||||
unsigned int cycles;
|
unsigned int cycles;
|
||||||
|
@ -2017,9 +2017,9 @@ int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2)
|
||||||
|
|
||||||
// -----------------------------------------------------------------
|
// -----------------------------------------------------------------
|
||||||
|
|
||||||
static void z80_md_bank_write_32x(unsigned int a, unsigned char d)
|
static void z80_md_bank_write_32x(u32 a, unsigned char d)
|
||||||
{
|
{
|
||||||
unsigned int addr68k;
|
u32 addr68k;
|
||||||
|
|
||||||
addr68k = Pico.m.z80_bank68k << 15;
|
addr68k = Pico.m.z80_bank68k << 15;
|
||||||
addr68k += a & 0x7fff;
|
addr68k += a & 0x7fff;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue