mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 07:17:45 -04:00
added branch cache to sh2 drc to improve cross-tcache jump speed
This commit is contained in:
parent
6822ba9d64
commit
d760c90f3a
4 changed files with 143 additions and 16 deletions
|
@ -179,6 +179,7 @@
|
|||
/* ldr and str */
|
||||
#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,0,1,rn,rd,offset_12)
|
||||
#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,1,1,rn,rd,offset_12)
|
||||
#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,abs(offset_12))
|
||||
|
||||
#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,offset_12)
|
||||
#define EOP_LDR_NEGIMM(rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,0,0,1,rn,rd,offset_12)
|
||||
|
@ -478,6 +479,9 @@ static int emith_xbranch(int cond, void *target, int is_call)
|
|||
#define emith_add_r_r(d, s) \
|
||||
emith_add_r_r_r(d, d, s)
|
||||
|
||||
#define emith_add_r_r_ptr(d, s) \
|
||||
emith_add_r_r_r(d, d, s)
|
||||
|
||||
#define emith_sub_r_r(d, s) \
|
||||
EOP_SUB_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0)
|
||||
|
||||
|
@ -684,6 +688,8 @@ static int emith_xbranch(int cond, void *target, int is_call)
|
|||
// misc
|
||||
#define emith_read_r_r_offs_c(cond, r, rs, offs) \
|
||||
EOP_LDR_IMM2(cond, r, rs, offs)
|
||||
#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \
|
||||
emith_read_r_r_offs_c(cond, r, rs, offs)
|
||||
#define emith_read_r_r_r_c(cond, r, rs, rm) \
|
||||
EOP_LDR_REG_LSL(cond, r, rs, rm, 0)
|
||||
#define emith_read_r_r_r(r, rs, rm) \
|
||||
|
@ -716,8 +722,15 @@ static int emith_xbranch(int cond, void *target, int is_call)
|
|||
#define emith_read16_r_r_offs(r, rs, offs) \
|
||||
emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs)
|
||||
|
||||
#define emith_write_r_r_offs_c(cond, r, rs, offs) \
|
||||
EOP_STR_IMM2(cond, r, rs, offs)
|
||||
#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \
|
||||
emith_write_r_r_offs_c(cond, r, rs, offs)
|
||||
|
||||
#define emith_ctx_read_c(cond, r, offs) \
|
||||
emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs)
|
||||
#define emith_ctx_read(r, offs) \
|
||||
emith_read_r_r_offs(r, CONTEXT_REG, offs)
|
||||
emith_ctx_read_c(A_COND_AL, r, offs)
|
||||
|
||||
#define emith_ctx_read_ptr(r, offs) \
|
||||
emith_ctx_read(r, offs)
|
||||
|
|
|
@ -122,7 +122,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
|
|||
EMIT_OP_MODRM(0x01, 3, s, d)
|
||||
|
||||
#define emith_add_r_r_ptr(d, s) do { \
|
||||
EMIT_REX_IF(1, dst, src); \
|
||||
EMIT_REX_IF(1, s, d); \
|
||||
EMIT_OP_MODRM64(0x01, 3, s, d); \
|
||||
} while (0)
|
||||
|
||||
|
@ -260,6 +260,21 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
// _r_r_r_shift
|
||||
#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \
|
||||
int tmp_ = rcache_get_tmp(); \
|
||||
emith_lsl(tmp_, s2, lslimm); \
|
||||
emith_add_r_r_r(d, s1, tmp_); \
|
||||
rcache_free_tmp(tmp_); \
|
||||
} while (0)
|
||||
|
||||
#define emith_add_r_r_r_lsr(d, s1, s2, lslimm) do { \
|
||||
int tmp_ = rcache_get_tmp(); \
|
||||
emith_lsr(tmp_, s2, lslimm); \
|
||||
emith_add_r_r_r(d, s1, tmp_); \
|
||||
rcache_free_tmp(tmp_); \
|
||||
} while (0)
|
||||
|
||||
// _r_r_shift
|
||||
#define emith_or_r_r_lsl(d, s, lslimm) do { \
|
||||
int tmp_ = rcache_get_tmp(); \
|
||||
|
@ -361,8 +376,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
|
|||
|
||||
#define emith_read_r_r_offs_c(cond, r, rs, offs) \
|
||||
emith_read_r_r_offs(r, rs, offs)
|
||||
#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \
|
||||
emith_read_r_r_offs_ptr(r, rs, offs)
|
||||
#define emith_write_r_r_offs_c(cond, r, rs, offs) \
|
||||
emith_write_r_r_offs(r, rs, offs)
|
||||
#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \
|
||||
emith_write_r_r_offs_ptr(r, rs, offs)
|
||||
#define emith_read8_r_r_offs_c(cond, r, rs, offs) \
|
||||
emith_read8_r_r_offs(r, rs, offs)
|
||||
#define emith_write8_r_r_offs_c(cond, r, rs, offs) \
|
||||
|
@ -583,9 +602,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
|
|||
|
||||
#define emith_read_r_r_offs(r, rs, offs) \
|
||||
emith_deref_op(0x8b, r, rs, offs)
|
||||
#define emith_read_r_r_offs_ptr(r, rs, offs) \
|
||||
EMIT_REX_IF(1, r, rs); \
|
||||
emith_deref_op(0x8b, r, rs, offs)
|
||||
|
||||
#define emith_write_r_r_offs(r, rs, offs) \
|
||||
emith_deref_op(0x89, r, rs, offs)
|
||||
#define emith_write_r_r_offs_ptr(r, rs, offs) \
|
||||
EMIT_REX_IF(1, r, rs); \
|
||||
emith_deref_op(0x89, r, rs, offs)
|
||||
|
||||
// note: don't use prefixes on this
|
||||
#define emith_read8_r_r_offs(r, rs, offs) do { \
|
||||
|
@ -664,6 +689,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
|
|||
|
||||
#define emith_ctx_read(r, offs) \
|
||||
emith_read_r_r_offs(r, CONTEXT_REG, offs)
|
||||
#define emith_ctx_read_c(cond, r, offs) \
|
||||
emith_ctx_read(r, offs)
|
||||
|
||||
#define emith_ctx_read_ptr(r, offs) do { \
|
||||
EMIT_REX_IF(1, r, CONTEXT_REG); \
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
// features
|
||||
#define PROPAGATE_CONSTANTS 1
|
||||
#define LINK_BRANCHES 1
|
||||
#define BRANCH_CACHE 1
|
||||
#define ALIAS_REGISTERS 1
|
||||
#define REMAP_REGISTER 1
|
||||
|
||||
|
@ -57,10 +58,11 @@
|
|||
// 10 - smc self-check
|
||||
// 100 - write trace
|
||||
// 200 - compare trace
|
||||
// 400 - print block entry backtrace
|
||||
// 400 - block entry backtraceA on exit
|
||||
// 800 - state dump on exit
|
||||
// {
|
||||
#ifndef DRC_DEBUG
|
||||
#define DRC_DEBUG 0
|
||||
#define DRC_DEBUG 0x800
|
||||
#endif
|
||||
|
||||
#if DRC_DEBUG
|
||||
|
@ -159,8 +161,6 @@ static char sh2dasm_buff[64];
|
|||
#define do_host_disasm(x)
|
||||
#endif
|
||||
|
||||
#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB)
|
||||
|
||||
#define SH2_DUMP(sh2, reason) { \
|
||||
char ms = (sh2)->is_slave ? 's' : 'm'; \
|
||||
printf("%csh2 %s %08x\n", ms, reason, (sh2)->pc); \
|
||||
|
@ -178,6 +178,8 @@ static char sh2dasm_buff[64];
|
|||
(sh2)->pdb_io_csum[0], (sh2)->pdb_io_csum[1], (sh2)->state, \
|
||||
(sh2)->poll_addr, (sh2)->poll_cycles, (sh2)->poll_cnt); \
|
||||
}
|
||||
|
||||
#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB)
|
||||
static SH2 csh2[2][4];
|
||||
static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
|
||||
{
|
||||
|
@ -631,6 +633,14 @@ static void REGPARM(1) flush_tcache(int tcid)
|
|||
memset(Pico32xMem->drcblk_da[tcid - 1], 0,
|
||||
sizeof(Pico32xMem->drcblk_da[0]));
|
||||
}
|
||||
#if BRANCH_CACHE
|
||||
if (tcid)
|
||||
memset32(sh2s[tcid-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
|
||||
else {
|
||||
memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
|
||||
memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
|
||||
}
|
||||
#endif
|
||||
#if (DRC_DEBUG & 4)
|
||||
tcache_dsm_ptrs[tcid] = tcache_bases[tcid];
|
||||
#endif
|
||||
|
@ -3727,14 +3737,35 @@ static void sh2_generate_utils(void)
|
|||
|
||||
// sh2_drc_dispatcher(void)
|
||||
sh2_drc_dispatcher = (void *)tcache_ptr;
|
||||
sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
|
||||
emith_cmp_r_imm(sr, 0);
|
||||
emith_jump_cond(DCOND_LT, sh2_drc_exit);
|
||||
rcache_invalidate();
|
||||
emith_ctx_read(arg0, SHR_PC * 4);
|
||||
#if BRANCH_CACHE
|
||||
// check if PC is in branch target cache
|
||||
emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*4);
|
||||
// TODO implement emith_add_r_r_r_lsl_ptr, saves one insn on 32bit ARM
|
||||
emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1);
|
||||
emith_add_r_r_ptr(arg1, CONTEXT_REG);
|
||||
emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache));
|
||||
emith_cmp_r_r(arg2, arg0);
|
||||
EMITH_SJMP_START(DCOND_NE);
|
||||
emith_read_r_r_offs_ptr_c(DCOND_EQ, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
|
||||
emith_jump_reg_c(DCOND_EQ, RET_REG);
|
||||
EMITH_SJMP_END(DCOND_NE);
|
||||
#endif
|
||||
emith_ctx_read(arg1, offsetof(SH2, is_slave));
|
||||
emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp));
|
||||
emith_call(dr_lookup_block);
|
||||
#if BRANCH_CACHE
|
||||
// store PC and block entry ptr (in arg0) in branch target cache
|
||||
emith_tst_r_r_ptr(RET_REG, RET_REG);
|
||||
EMITH_SJMP_START(DCOND_EQ);
|
||||
emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4);
|
||||
emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4);
|
||||
emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1);
|
||||
emith_add_r_r_ptr(arg1, CONTEXT_REG);
|
||||
emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache));
|
||||
emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
|
||||
EMITH_SJMP_END(DCOND_EQ);
|
||||
#endif
|
||||
emit_block_entry();
|
||||
// lookup failed, call sh2_translate()
|
||||
emith_move_r_r_ptr(arg0, CONTEXT_REG);
|
||||
|
@ -3904,6 +3935,15 @@ static void sh2_smc_rm_block(struct block_desc *bd, int tcache_id, u32 ram_mask)
|
|||
|
||||
bd->addr = bd->size = bd->size_nolit = 0;
|
||||
bd->entry_count = 0;
|
||||
|
||||
#if BRANCH_CACHE
|
||||
if (tcache_id)
|
||||
memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
|
||||
else {
|
||||
memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
|
||||
memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4015,9 +4055,9 @@ int sh2_execute_drc(SH2 *sh2c, int cycles)
|
|||
return ret_cycles;
|
||||
}
|
||||
|
||||
#if (DRC_DEBUG & 2)
|
||||
void block_stats(void)
|
||||
static void block_stats(void)
|
||||
{
|
||||
#if (DRC_DEBUG & 2)
|
||||
int c, b, i, total = 0;
|
||||
|
||||
printf("block stats:\n");
|
||||
|
@ -4048,12 +4088,10 @@ void block_stats(void)
|
|||
for (b = 0; b < ARRAY_SIZE(block_tables); b++)
|
||||
for (i = 0; i < block_counts[b]; i++)
|
||||
block_tables[b][i].refcount = 0;
|
||||
}
|
||||
#else
|
||||
#define block_stats()
|
||||
#endif
|
||||
}
|
||||
|
||||
void sh2_drc_flush_all(void)
|
||||
static void backtrace(void)
|
||||
{
|
||||
#if (DRC_DEBUG & 1024)
|
||||
int i;
|
||||
|
@ -4064,6 +4102,52 @@ void sh2_drc_flush_all(void)
|
|||
for (i = 0; i < ARRAY_SIZE(csh2[1]); i++)
|
||||
SH2_DUMP(&csh2[1][i], "bt ssh2");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void state_dump(void)
|
||||
{
|
||||
#if (DRC_DEBUG & 2048)
|
||||
int i;
|
||||
|
||||
SH2_DUMP(&sh2s[0], "master");
|
||||
printf("VBR msh2: %x\n", sh2s[0].vbr);
|
||||
for (i = 0; i < 0x60; i++) {
|
||||
printf("%08x ",p32x_sh2_read32(sh2s[0].vbr + i*4, &sh2s[0]));
|
||||
if ((i+1) % 8 == 0) printf("\n");
|
||||
}
|
||||
printf("stack msh2: %x\n", sh2s[0].r[15]);
|
||||
for (i = -0x30; i < 0x30; i++) {
|
||||
printf("%08x ",p32x_sh2_read32(sh2s[0].r[15] + i*4, &sh2s[0]));
|
||||
if ((i+1) % 8 == 0) printf("\n");
|
||||
}
|
||||
printf("branch cache master:\n");
|
||||
for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) {
|
||||
printf("%08x ",sh2s[0].branch_cache[i].pc);
|
||||
if ((i+1) % 8 == 0) printf("\n");
|
||||
}
|
||||
SH2_DUMP(&sh2s[1], "slave");
|
||||
printf("VBR ssh2: %x\n", sh2s[1].vbr);
|
||||
for (i = 0; i < 0x60; i++) {
|
||||
printf("%08x ",p32x_sh2_read32(sh2s[1].vbr + i*4, &sh2s[1]));
|
||||
if ((i+1) % 8 == 0) printf("\n");
|
||||
}
|
||||
printf("stack ssh2: %x\n", sh2s[1].r[15]);
|
||||
for (i = -0x30; i < 0x30; i++) {
|
||||
printf("%08x ",p32x_sh2_read32(sh2s[1].r[15] + i*4, &sh2s[1]));
|
||||
if ((i+1) % 8 == 0) printf("\n");
|
||||
}
|
||||
printf("branch cache slave:\n");
|
||||
for (i = 0; i < ARRAY_SIZE(sh2s[1].branch_cache); i++) {
|
||||
printf("%08x ",sh2s[1].branch_cache[i].pc);
|
||||
if ((i+1) % 8 == 0) printf("\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void sh2_drc_flush_all(void)
|
||||
{
|
||||
backtrace();
|
||||
state_dump();
|
||||
block_stats();
|
||||
flush_tcache(0);
|
||||
flush_tcache(1);
|
||||
|
|
|
@ -50,6 +50,9 @@ typedef struct SH2_
|
|||
int poll_cycles;
|
||||
int poll_cnt;
|
||||
|
||||
// DRC branch cache. size must be 2^n and <=128
|
||||
struct { unsigned int pc; void *code; } branch_cache[128];
|
||||
|
||||
// interpreter stuff
|
||||
int icount; // cycles left in current timeslice
|
||||
unsigned int ea;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue