sh2 drc: fix speed regression

This commit is contained in:
kub 2019-12-13 18:23:03 +01:00
parent 90b1c9db91
commit a5e51c16e6
8 changed files with 35 additions and 18 deletions

View file

@ -20,7 +20,7 @@
#define TEMPORARY_REGS { 12, 14 } #define TEMPORARY_REGS { 12, 14 }
#define CONTEXT_REG 11 #define CONTEXT_REG 11
#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R0,8 , SHR_R0+1,9 } #define STATIC_SH2_REGS { SHR_SR,10 , SHR_R(0),8 , SHR_R(1),9 }
// XXX: tcache_ptr type for SVP and SH2 compilers differs.. // XXX: tcache_ptr type for SVP and SH2 compilers differs..
#define EMIT_PTR(ptr, x) \ #define EMIT_PTR(ptr, x) \

View file

@ -15,7 +15,7 @@
#define TEMPORARY_REGS { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 } #define TEMPORARY_REGS { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }
#define CONTEXT_REG 29 #define CONTEXT_REG 29
#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R0,27 , SHR_R0+1,26 } #define STATIC_SH2_REGS { SHR_SR,28 , SHR_R(0),27 , SHR_R(1),26 }
// R31 doesn't exist, it aliases either with zero or SP // R31 doesn't exist, it aliases either with zero or SP
#define SP 31 // stack pointer #define SP 31 // stack pointer

View file

@ -17,7 +17,7 @@
#define TEMPORARY_REGS { 2, 3, 8, 9, 10, 11, 12, 13, 14 } // v0-v1,t0-t6 #define TEMPORARY_REGS { 2, 3, 8, 9, 10, 11, 12, 13, 14 } // v0-v1,t0-t6
#define CONTEXT_REG 23 // s7 #define CONTEXT_REG 23 // s7
#define STATIC_SH2_REGS { SHR_SR,22 , SHR_R0,21 , SHR_R0+1,20 } #define STATIC_SH2_REGS { SHR_SR,22 , SHR_R(0),21 , SHR_R(1),20 }
// NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset // NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset
#ifndef __mips_isa_rev #ifndef __mips_isa_rev

View file

@ -16,7 +16,7 @@
#define TEMPORARY_REGS { 5, 6, 7 } // t0-t2 #define TEMPORARY_REGS { 5, 6, 7 } // t0-t2
#define CONTEXT_REG 9 // s1 #define CONTEXT_REG 9 // s1
#define STATIC_SH2_REGS { SHR_SR,27 , SHR_R0,26 , SHR_R0+1,25 } #define STATIC_SH2_REGS { SHR_SR,27 , SHR_R(0),26 , SHR_R(1),25 }
// registers usable for user code: r1-r25, others reserved or special // registers usable for user code: r1-r25, others reserved or special
#define Z0 0 // zero register #define Z0 0 // zero register

View file

@ -1072,7 +1072,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#define PARAM_REGS { xCX, xDX, xR8, xR9 } #define PARAM_REGS { xCX, xDX, xR8, xR9 }
#define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP } #define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP }
#define TEMPORARY_REGS { xAX, xR10, xR11 } #define TEMPORARY_REGS { xAX, xR10, xR11 }
#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R0,xR15 , SH2_R0+1,xR14 } #define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R(0),xR15 , SH2_R(1),xR14 }
#define host_arg2reg(rd, arg) \ #define host_arg2reg(rd, arg) \
switch (arg) { \ switch (arg) { \

View file

@ -33,34 +33,50 @@ unsigned short scan_block(uint32_t base_pc, int is_slave,
uint32_t *base_literals, uint32_t *end_literals); uint32_t *base_literals, uint32_t *end_literals);
#if defined(DRC_SH2) && defined(__GNUC__) #if defined(DRC_SH2) && defined(__GNUC__)
// direct access to some host CPU registers used by the DRC // direct access to some host CPU registers used by the DRC if gcc is used.
// XXX MUST match definitions for SHR_SR in cpu/drc/emit_*.c // XXX MUST match SHR_SR definitions in cpu/drc/emit_*.c; should be moved there
// XXX yuck, there's no portable way to determine register size. Use long long
// if target is 64 bit and data model is ILP32 or LLP64(windows), else long
#if defined(__arm__) #if defined(__arm__)
#define DRC_SR_REG "r10" #define DRC_SR_REG "r10"
#define DRC_REG_LL 0 // 32 bit
#elif defined(__aarch64__) #elif defined(__aarch64__)
#define DRC_SR_REG "r28" #define DRC_SR_REG "r28"
#define DRC_REG_LL (__ILP32__ || _WIN32)
#elif defined(__mips__) #elif defined(__mips__)
#define DRC_SR_REG "s6" #define DRC_SR_REG "s6"
#define DRC_REG_LL (_MIPS_SIM == _ABIN32)
#elif defined(__riscv__) || defined(__riscv) #elif defined(__riscv__) || defined(__riscv)
#define DRC_SR_REG "s11" #define DRC_SR_REG "s11"
#define DRC_REG_LL 0 // no ABI for (__ILP32__ && __riscv_xlen != 32)
#elif defined(__i386__) #elif defined(__i386__)
#define DRC_SR_REG "edi" #define DRC_SR_REG "edi"
#define DRC_REG_LL 0 // 32 bit
#elif defined(__x86_64__) #elif defined(__x86_64__)
#define DRC_SR_REG "ebx" #define DRC_SR_REG "rbx"
#define DRC_REG_LL (__ILP32__ || _WIN32)
#endif #endif
#endif #endif
#ifdef DRC_SR_REG #ifdef DRC_SR_REG
// XXX this is more clear but produces too much overhead for slow platforms
extern void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2); extern void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2);
extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2); extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2);
#define DRC_DECLARE_SR register int32_t sh2_sr asm(DRC_SR_REG) // NB: sh2_sr MUST have register size if optimizing with -O3 (-fif-conversion)
#if DRC_REG_LL
#define DRC_DECLARE_SR register long long _sh2_sr asm(DRC_SR_REG)
#else
#define DRC_DECLARE_SR register long _sh2_sr asm(DRC_SR_REG)
#endif
#define DRC_SAVE_SR(sh2) \ #define DRC_SAVE_SR(sh2) \
if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \
sh2_drc_save_sr(sh2) sh2->sr = (s32)_sh2_sr
// sh2_drc_save_sr(sh2)
#define DRC_RESTORE_SR(sh2) \ #define DRC_RESTORE_SR(sh2) \
if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \
sh2_drc_restore_sr(sh2) _sh2_sr = (s32)sh2->sr
// sh2_drc_restore_sr(sh2)
#else #else
#define DRC_DECLARE_SR #define DRC_DECLARE_SR
#define DRC_SAVE_SR(sh2) #define DRC_SAVE_SR(sh2)

View file

@ -10,6 +10,7 @@ typedef enum {
SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL,
SH2_REGS // register set size SH2_REGS // register set size
} sh2_reg_e; } sh2_reg_e;
#define SHR_R(n) (SHR_R0+(n))
typedef struct SH2_ typedef struct SH2_
{ {

View file

@ -254,14 +254,14 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2)
sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr; sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr;
} }
u32 REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, u32 d, SH2 *sh2) u32 REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2)
{ {
int shift = (a & 1 ? 0 : 8); int shift = (a & 1 ? 0 : 8);
d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift); d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift);
return d; return d;
} }
u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2) u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2)
{ {
unsigned char *p = sh2->p_drcblk_ram; unsigned char *p = sh2->p_drcblk_ram;
unsigned int cycles; unsigned int cycles;
@ -281,7 +281,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2)
return d; return d;
} }
u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2) u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2)
{ {
unsigned char *p = sh2->p_drcblk_ram; unsigned char *p = sh2->p_drcblk_ram;
unsigned int cycles; unsigned int cycles;
@ -2017,9 +2017,9 @@ int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2)
// ----------------------------------------------------------------- // -----------------------------------------------------------------
static void z80_md_bank_write_32x(unsigned int a, unsigned char d) static void z80_md_bank_write_32x(u32 a, unsigned char d)
{ {
unsigned int addr68k; u32 addr68k;
addr68k = Pico.m.z80_bank68k << 15; addr68k = Pico.m.z80_bank68k << 15;
addr68k += a & 0x7fff; addr68k += a & 0x7fff;