mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 15:27:46 -04:00
SH2 drc: bug fixing and small speed improvements
This commit is contained in:
parent
1891e649e5
commit
748b8187db
8 changed files with 254 additions and 203 deletions
|
@ -6,3 +6,39 @@ extern u8 *tcache;
|
|||
void drc_cmn_init(void);
|
||||
void drc_cmn_cleanup(void);
|
||||
|
||||
#define BITMASK1(v0) (1 << (v0))
|
||||
#define BITMASK2(v0,v1) ((1 << (v0)) | (1 << (v1)))
|
||||
#define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2)))
|
||||
#define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3)))
|
||||
#define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4)))
|
||||
#define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5)))
|
||||
#define BITRANGE(v0,v1) (BITMASK1(v1+1)-BITMASK1(v0)) // set with v0..v1
|
||||
|
||||
// binary search approach, since we don't have CLZ on ARM920T
|
||||
#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \
|
||||
u32 __mask = mask; \
|
||||
for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \
|
||||
if (!(__mask & (0xffff << 16))) \
|
||||
bit -= 16, __mask <<= 16; \
|
||||
if (!(__mask & (0xff << 24))) \
|
||||
bit -= 8, __mask <<= 8; \
|
||||
if (!(__mask & (0xf << 28))) \
|
||||
bit -= 4, __mask <<= 4; \
|
||||
if (!(__mask & (0x3 << 30))) \
|
||||
bit -= 2, __mask <<= 2; \
|
||||
if (!(__mask & (0x1 << 31))) \
|
||||
bit -= 1, __mask <<= 1; \
|
||||
if (__mask & (0x1 << 31)) { \
|
||||
code; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// inspired by https://graphics.stanford.edu/~seander/bithacks.html
|
||||
static inline int count_bits(unsigned val)
|
||||
{
|
||||
val = val - ((val >> 1) & 0x55555555);
|
||||
val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
|
||||
return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
|
||||
}
|
||||
|
||||
|
|
|
@ -382,13 +382,6 @@ static void emith_flush(void)
|
|||
#define EOP_MOVT(rd,imm) \
|
||||
EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO)
|
||||
|
||||
static inline int count_bits(unsigned val)
|
||||
{
|
||||
val = val - ((val >> 1) & 0x55555555);
|
||||
val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
|
||||
return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
|
||||
}
|
||||
|
||||
// host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096)
|
||||
#define MAX_HOST_LITERALS 128
|
||||
static u32 literal_pool[MAX_HOST_LITERALS];
|
||||
|
@ -429,18 +422,26 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
|
|||
// count insns needed for mov/orr #imm
|
||||
for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++)
|
||||
v = (v << 2) | (v >> 30);
|
||||
#ifdef HAVE_ARMV7
|
||||
for (i = 2; i > 0; i--, v >>= 8)
|
||||
while (v > 0xff && !(v & 3))
|
||||
v >>= 2;
|
||||
if (v) { // 3+ insns needed...
|
||||
if (op == A_OP_MVN)
|
||||
imm = ~imm;
|
||||
#ifdef HAVE_ARMV7
|
||||
// ...prefer movw/movt
|
||||
EOP_MOVW(rd, imm);
|
||||
if (imm & 0xffff0000)
|
||||
EOP_MOVT(rd, imm);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
for (i = 3; i > 0; i--, v >>= 8)
|
||||
while (v > 0xff && !(v & 3))
|
||||
v >>= 2;
|
||||
if (v) { // 4 insns needed...
|
||||
if (op == A_OP_MVN)
|
||||
imm = ~imm;
|
||||
// ...emit literal load
|
||||
int idx, o;
|
||||
if (literal_iindex >= MAX_HOST_LITERALS) {
|
||||
|
@ -455,9 +456,9 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
|
|||
EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o);
|
||||
else if (o < 0)
|
||||
EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
|
||||
case A_OP_AND:
|
||||
|
@ -544,7 +545,7 @@ static int emith_xbranch(int cond, void *target, int is_call)
|
|||
EMIT((u32)target,M1(PC),0);
|
||||
#else
|
||||
// should never happen
|
||||
elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %08x->%08x", target, tcache_ptr);
|
||||
elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %8p->%8p", target, tcache_ptr);
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
|
@ -633,9 +634,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
|
|||
#define EMITH_NOTHING1(cond) \
|
||||
(void)(cond)
|
||||
|
||||
#define EMITH_SJMP_DECL_()
|
||||
#define EMITH_SJMP_START_(cond) EMITH_NOTHING1(cond)
|
||||
#define EMITH_SJMP_END_(cond) EMITH_NOTHING1(cond)
|
||||
#define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond)
|
||||
#define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond)
|
||||
#define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond)
|
||||
|
@ -806,6 +804,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
|
|||
#define emith_eor_r_imm(r, imm) \
|
||||
emith_op_imm(A_COND_AL, 0, A_OP_EOR, r, imm)
|
||||
|
||||
#define emith_eor_r_imm_ptr(r, imm) \
|
||||
emith_eor_r_imm(r, imm)
|
||||
|
||||
// note: only use 8bit imm for these
|
||||
#define emith_tst_r_imm(r, imm) \
|
||||
emith_top_imm(A_COND_AL, A_OP_TST, r, imm)
|
||||
|
@ -837,6 +838,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
|
|||
#define emith_eor_r_imm_c(cond, r, imm) \
|
||||
emith_op_imm(cond, 0, A_OP_EOR, r, imm)
|
||||
|
||||
#define emith_eor_r_imm_ptr_c(cond, r, imm) \
|
||||
emith_eor_r_imm_c(cond, r, imm)
|
||||
|
||||
#define emith_bic_r_imm_c(cond, r, imm) \
|
||||
emith_op_imm(cond, 0, A_OP_BIC, r, imm)
|
||||
|
||||
|
@ -1139,6 +1143,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
|
|||
emith_jump(target); \
|
||||
} while (0)
|
||||
|
||||
#define emith_call_cleanup() /**/
|
||||
|
||||
#define emith_ret_c(cond) \
|
||||
emith_jump_reg_c(cond, LR)
|
||||
|
||||
|
@ -1228,10 +1234,10 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
|
|||
/* if (reg <= turns) turns = reg-1 */ \
|
||||
t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \
|
||||
emith_cmp_r_r(t3, t2); \
|
||||
emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \
|
||||
emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \
|
||||
/* if (reg <= 1) turns = 0 */ \
|
||||
emith_cmp_r_imm(t3, 1); \
|
||||
emith_move_r_imm_c(DCOND_LE, t2, 0); \
|
||||
emith_move_r_imm_c(DCOND_LS, t2, 0); \
|
||||
/* reg -= turns */ \
|
||||
emith_sub_r_r(t3, t2); \
|
||||
} \
|
||||
|
@ -1361,7 +1367,7 @@ static int tcond = -1;
|
|||
#define emith_set_t(sr, val) \
|
||||
tcond = ((val) ? A_COND_AL: A_COND_NV)
|
||||
|
||||
static void emith_sync_t(sr)
|
||||
static void emith_sync_t(int sr)
|
||||
{
|
||||
if (tcond == A_COND_AL)
|
||||
emith_or_r_imm(sr, T);
|
||||
|
|
|
@ -396,6 +396,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
#define emith_cmp_r_imm(r, imm) \
|
||||
emith_arith_r_imm(7, r, imm)
|
||||
|
||||
#define emith_eor_r_imm_ptr(r, imm) do { \
|
||||
EMIT_REX_IF(1, 0, r); \
|
||||
EMIT_OP_MODRM64(0x81, 3, 6, r); \
|
||||
EMIT(imm, u32); \
|
||||
} while (0)
|
||||
|
||||
#define emith_tst_r_imm(r, imm) do { \
|
||||
EMIT_REX_IF(0, 0, r); \
|
||||
EMIT_OP_MODRM64(0xf7, 3, 0, r); \
|
||||
|
@ -417,6 +423,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
emith_or_r_imm(r, imm)
|
||||
#define emith_eor_r_imm_c(cond, r, imm) \
|
||||
emith_eor_r_imm(r, imm)
|
||||
#define emith_eor_r_imm_ptr_c(cond, r, imm) \
|
||||
emith_eor_r_imm_ptr(r, imm)
|
||||
#define emith_bic_r_imm_c(cond, r, imm) \
|
||||
emith_bic_r_imm(r, imm)
|
||||
#define emith_tst_r_imm_c(cond, r, imm) \
|
||||
|
@ -589,9 +597,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
// XXX: stupid mess
|
||||
#define emith_mul_(op, dlo, dhi, s1, s2) do { \
|
||||
int rmr; \
|
||||
if (dlo != xAX && dhi != xAX) \
|
||||
if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \
|
||||
emith_push(xAX); \
|
||||
if (dlo != xDX && dhi != xDX) \
|
||||
if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \
|
||||
emith_push(xDX); \
|
||||
if ((s1) == xAX) \
|
||||
rmr = s2; \
|
||||
|
@ -609,9 +617,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
} \
|
||||
if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \
|
||||
emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \
|
||||
if (dlo != xDX && dhi != xDX) \
|
||||
if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \
|
||||
emith_pop(xDX); \
|
||||
if (dlo != xAX && dhi != xAX) \
|
||||
if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \
|
||||
emith_pop(xAX); \
|
||||
} while (0)
|
||||
|
||||
|
@ -898,6 +906,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
emith_jump(target); \
|
||||
} while (0)
|
||||
|
||||
#define emith_call_cleanup() \
|
||||
emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr
|
||||
|
||||
#define emith_ret() \
|
||||
EMIT_OP(0xc3)
|
||||
|
||||
|
@ -912,10 +923,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
#define emith_push_ret(r) do { \
|
||||
int r_ = (r >= 0 ? r : xSI); \
|
||||
emith_push(r_); /* always push to align */ \
|
||||
emith_add_r_r_ptr_imm(xSP, xSP, -8*4); /* args shadow space */ \
|
||||
} while (0)
|
||||
|
||||
#define emith_pop_and_ret(r) do { \
|
||||
int r_ = (r >= 0 ? r : xSI); \
|
||||
emith_add_r_r_ptr_imm(xSP, xSP, 8*4); /* args shadow space */ \
|
||||
emith_pop(r_); \
|
||||
emith_ret(); \
|
||||
} while (0)
|
||||
|
@ -942,15 +955,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
|
||||
// "simple" jump (no more then a few insns)
|
||||
// ARM will use conditional instructions here
|
||||
#define EMITH_SJMP_DECL_() \
|
||||
u8 *cond_ptr
|
||||
|
||||
#define EMITH_SJMP_START_(cond) \
|
||||
JMP8_POS(cond_ptr)
|
||||
|
||||
#define EMITH_SJMP_END_(cond) \
|
||||
JMP8_EMIT(cond, cond_ptr)
|
||||
|
||||
#define EMITH_SJMP_START EMITH_JMP_START
|
||||
#define EMITH_SJMP_END EMITH_JMP_END
|
||||
|
||||
|
@ -1046,7 +1050,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
emith_push(xR15); \
|
||||
emith_push(xSI); \
|
||||
emith_push(xDI); \
|
||||
emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + ABI param area */ \
|
||||
emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + args shadow space */ \
|
||||
} while (0)
|
||||
|
||||
#define emith_sh2_drc_exit() do { \
|
||||
|
@ -1106,19 +1110,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
#endif
|
||||
|
||||
#define emith_save_caller_regs(mask) do { \
|
||||
if ((mask) & (1 << xAX)) emith_push(xAX); \
|
||||
if ((mask) & (1 << xCX)) emith_push(xCX); \
|
||||
if ((mask) & (1 << xDX)) emith_push(xDX); \
|
||||
if ((mask) & (1 << xSI)) emith_push(xSI); \
|
||||
if ((mask) & (1 << xDI)) emith_push(xDI); \
|
||||
int _c; u32 _m = mask & 0xfc7; /* AX, CX, DX, SI, DI, 8, 9, 10, 11 */ \
|
||||
if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \
|
||||
for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \
|
||||
if (_m & (1 << _c)) emith_push(_c); \
|
||||
} while (0)
|
||||
|
||||
#define emith_restore_caller_regs(mask) do { \
|
||||
if ((mask) & (1 << xDI)) emith_pop(xDI); \
|
||||
if ((mask) & (1 << xSI)) emith_pop(xSI); \
|
||||
if ((mask) & (1 << xDX)) emith_pop(xDX); \
|
||||
if ((mask) & (1 << xCX)) emith_pop(xCX); \
|
||||
if ((mask) & (1 << xAX)) emith_pop(xAX); \
|
||||
int _c; u32 _m = mask & 0xfc7; \
|
||||
if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \
|
||||
for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \
|
||||
if (_m & (1 << _c)) emith_pop(_c); \
|
||||
} while (0)
|
||||
|
||||
#define emith_sh2_rcall(a, tab, func, mask) do { \
|
||||
|
@ -1192,14 +1194,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
|
|||
/* if (reg <= turns) turns = reg-1 */ \
|
||||
t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \
|
||||
emith_cmp_r_r(t3, t2); \
|
||||
EMITH_SJMP_START(DCOND_GT); \
|
||||
emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \
|
||||
EMITH_SJMP_END(DCOND_GT); \
|
||||
EMITH_SJMP_START(DCOND_HI); \
|
||||
emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \
|
||||
EMITH_SJMP_END(DCOND_HI); \
|
||||
/* if (reg <= 1) turns = 0 */ \
|
||||
emith_cmp_r_imm(t3, 1); \
|
||||
EMITH_SJMP_START(DCOND_GT); \
|
||||
emith_move_r_imm_c(DCOND_LE, t2, 0); \
|
||||
EMITH_SJMP_END(DCOND_GT); \
|
||||
EMITH_SJMP_START(DCOND_HI); \
|
||||
emith_move_r_imm_c(DCOND_LS, t2, 0); \
|
||||
EMITH_SJMP_END(DCOND_HI); \
|
||||
/* reg -= turns */ \
|
||||
emith_sub_r_r(t3, t2); \
|
||||
} \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue