mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 07:17:45 -04:00
sh2 drc: speed optimization and bugfixing
This commit is contained in:
parent
c3ebe082d3
commit
7869213d35
5 changed files with 61 additions and 55 deletions
|
@ -160,7 +160,7 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe };
|
|||
#define A64_ROR_REG(rd, rn, rm) \
|
||||
A64_INSN(0xd,0x0,0x3,_,rm,_,0xb,rn,rd)
|
||||
|
||||
// rd = REVERSE(n) rn
|
||||
// rd = REVERSE(rn)
|
||||
#define A64_RBIT_REG(rd, rn) \
|
||||
A64_INSN(0xd,0x2,0x3,_,_,_,_,rn,rd)
|
||||
|
||||
|
@ -327,9 +327,10 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
|
|||
|
||||
|
||||
// if-then-else conditional execution helpers
|
||||
#define JMP_POS(ptr) \
|
||||
#define JMP_POS(ptr) { \
|
||||
ptr = tcache_ptr; \
|
||||
EMIT(A64_B(0));
|
||||
EMIT(A64_B(0)); \
|
||||
}
|
||||
|
||||
#define JMP_EMIT(cond, ptr) { \
|
||||
u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \
|
||||
|
@ -1225,9 +1226,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
|
|||
emith_tst_r_imm(sr, S); \
|
||||
EMITH_SJMP_START(DCOND_EQ); \
|
||||
/* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
|
||||
/* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
|
||||
emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
|
||||
emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
|
||||
/* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
|
||||
emith_asr(rn, mh, 15); \
|
||||
emith_addf_r_r_r_lsr(rn, rn, mh, 31); \
|
||||
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
|
||||
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
|
||||
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
|
||||
|
@ -1280,11 +1281,12 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
|
|||
#define emith_tpop_carry(sr, is_sub) do { \
|
||||
if (is_sub) \
|
||||
emith_eor_r_imm(sr, 1); \
|
||||
emith_lsrf(sr, sr, 1); \
|
||||
emith_ror(sr, sr, 1); \
|
||||
emith_addf_r_r(sr, sr); \
|
||||
} while (0)
|
||||
|
||||
#define emith_tpush_carry(sr, is_sub) do { \
|
||||
emith_adc_r_r(sr, sr); \
|
||||
emith_adc_r_r(sr, Z0); \
|
||||
if (is_sub) \
|
||||
emith_eor_r_imm(sr, 1); \
|
||||
} while (0)
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#define AT 1 // used to hold intermediate results
|
||||
#define FNZ 15 // emulated processor flags: N (bit 31) ,Z (all bits)
|
||||
#define FC 24 // emulated processor flags: C (bit 0), others 0
|
||||
#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others ?
|
||||
#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x
|
||||
|
||||
|
||||
// unified conditions; virtual, not corresponding to anything real on MIPS
|
||||
|
@ -208,8 +208,8 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
|
|||
} while (0)
|
||||
|
||||
// FIFO for 2 instructions, for delay slot handling
|
||||
u32 emith_last_insns[2] = { -1,-1 };
|
||||
int emith_last_idx, emith_last_cnt;
|
||||
static u32 emith_last_insns[2] = { -1,-1 };
|
||||
static int emith_last_idx, emith_last_cnt;
|
||||
|
||||
#define EMIT_PUSHOP() \
|
||||
do { \
|
||||
|
@ -248,7 +248,7 @@ static int emith_is_b(u32 op) // B
|
|||
((op>>26) == OP__RT && ((op>>16) & 036) == RT_BLTZ); }
|
||||
// register usage for dependency evaluation XXX better do this as in emit_arm?
|
||||
static uint64_t emith_has_rs[3] = // OP__FN, OP__RT, others
|
||||
{ 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007f30ULL };
|
||||
{ 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007ff0ULL };
|
||||
static uint64_t emith_has_rt[3] = // OP__FN, OP__RT, others
|
||||
{ 0xff00fffffff00cffULL, 0x00000000UL, 0x8000ff0000000030ULL };
|
||||
static uint64_t emith_has_rd[3] = // OP__FN, OP__RT, others (rt instead of rd)
|
||||
|
@ -308,21 +308,23 @@ static void *emith_branch(u32 op)
|
|||
bop = emith_b_isswap(op, op2);
|
||||
}
|
||||
|
||||
// flush FIFO and branch
|
||||
tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt);
|
||||
if (emith_last_insns[idx^1] != -1)
|
||||
EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]);
|
||||
if (bop) { // can swap
|
||||
tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt);
|
||||
if (emith_last_insns[idx^1] != -1)
|
||||
EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]);
|
||||
bp = tcache_ptr;
|
||||
EMIT_PTR(tcache_ptr, bop); COUNT_OP;
|
||||
EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
|
||||
emith_last_insns[0] = emith_last_insns[1] = -1;
|
||||
emith_last_cnt = 0;
|
||||
} else { // can't swap
|
||||
emith_flush();
|
||||
if (emith_last_insns[idx] != -1)
|
||||
EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
|
||||
bp = tcache_ptr;
|
||||
EMIT_PTR(tcache_ptr, op); COUNT_OP;
|
||||
EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP;
|
||||
}
|
||||
emith_last_insns[0] = emith_last_insns[1] = -1;
|
||||
emith_last_cnt = 0;
|
||||
return bp;
|
||||
}
|
||||
|
||||
|
@ -392,8 +394,8 @@ static void *emith_branch(u32 op)
|
|||
|
||||
// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns.
|
||||
// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check()
|
||||
int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (aka cmp_r_r)
|
||||
int emith_flg_noV; // V flag known not to be set
|
||||
static int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (cmp_r_r)
|
||||
static int emith_flg_noV; // V flag known not to be set
|
||||
|
||||
// store minimal cc information: rd, rt^rs, carry
|
||||
// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt.
|
||||
|
@ -625,7 +627,11 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
|
|||
// move immediate
|
||||
static void emith_move_imm(int r, uintptr_t imm)
|
||||
{
|
||||
if ((s16)imm != imm) {
|
||||
if ((s16)imm == imm) {
|
||||
EMIT(MIPS_ADD_IMM(r, Z0, imm));
|
||||
} else if (!(imm >> 16)) {
|
||||
EMIT(MIPS_OR_IMM(r, Z0, imm));
|
||||
} else {
|
||||
int s = Z0;
|
||||
if (imm >> 16) {
|
||||
EMIT(MIPS_MOVT_IMM(r, imm >> 16));
|
||||
|
@ -633,8 +639,7 @@ static void emith_move_imm(int r, uintptr_t imm)
|
|||
}
|
||||
if ((u16)imm)
|
||||
EMIT(MIPS_OR_IMM(r, s, (u16)imm));
|
||||
} else
|
||||
EMIT(MIPS_ADD_IMM(r, Z0, imm));
|
||||
}
|
||||
}
|
||||
|
||||
#define emith_move_r_ptr_imm(r, imm) \
|
||||
|
@ -1372,16 +1377,17 @@ static int emith_cond_check(int cond, int *r)
|
|||
emith_tst_r_imm(sr, S); \
|
||||
EMITH_SJMP_START(DCOND_EQ); \
|
||||
/* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
|
||||
/* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
|
||||
emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
|
||||
emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
|
||||
/* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
|
||||
emith_asr(rn, mh, 15); \
|
||||
emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
|
||||
emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
|
||||
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
|
||||
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
|
||||
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
|
||||
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
|
||||
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
|
||||
emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
|
||||
EMITH_SJMP_END(DCOND_LE); \
|
||||
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
|
||||
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
|
||||
emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
|
||||
EMITH_SJMP_END(DCOND_PL); \
|
||||
EMITH_SJMP_END(DCOND_EQ); \
|
||||
EMITH_SJMP_END(DCOND_EQ); \
|
||||
} while (0)
|
||||
|
@ -1399,14 +1405,15 @@ static int emith_cond_check(int cond, int *r)
|
|||
/* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \
|
||||
/* to check: add MACL[31] to MACH. this is 0 if no overflow */ \
|
||||
emith_lsr(rn, ml, 31); \
|
||||
emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \
|
||||
emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \
|
||||
emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
|
||||
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
|
||||
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
|
||||
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
|
||||
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
|
||||
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
|
||||
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
|
||||
EMITH_SJMP_END(DCOND_LE); \
|
||||
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
|
||||
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
|
||||
EMITH_SJMP_END(DCOND_PL); \
|
||||
EMITH_SJMP_END(DCOND_EQ); \
|
||||
EMITH_SJMP_END(DCOND_EQ); \
|
||||
} while (0)
|
||||
|
|
|
@ -1225,6 +1225,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
rcache_free_tmp(tmp_); \
|
||||
} while (0)
|
||||
|
||||
#define emith_carry_to_t(sr, is_sub) do { \
|
||||
emith_rorc(sr); \
|
||||
emith_rol(sr, sr, 1); \
|
||||
} while (0)
|
||||
|
||||
#define emith_tpop_carry(sr, is_sub) \
|
||||
emith_lsr(sr, sr, 1)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue