sh2 drc: improved RTS call stack cache

This commit is contained in:
kub 2019-09-19 22:14:28 +02:00
parent 58a444a295
commit 36614252d9
6 changed files with 130 additions and 189 deletions

View file

@ -1000,10 +1000,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) emith_read_r_r_offs_c(A_COND_AL, r, rs, offs)
#define emith_read_r_r_r(r, rs, rm) \ #define emith_read_r_r_r(r, rs, rm) \
EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0)
#define emith_read_r_r_r_wb(r, rs, rm) \
EOP_LDR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0)
#define emith_read_r_r_r_ptr_wb(r, rs, rm) \
emith_read_r_r_r_wb(r, rs, rm)
#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ #define emith_read8_r_r_offs_c(cond, r, rs, offs) \
EOP_LDRB_IMM2(cond, r, rs, offs) EOP_LDRB_IMM2(cond, r, rs, offs)
@ -1049,10 +1045,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) emith_write_r_r_offs_c(A_COND_AL, r, rs, offs)
#define emith_write_r_r_offs_ptr(r, rs, offs) \ #define emith_write_r_r_offs_ptr(r, rs, offs) \
emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) emith_write_r_r_offs_c(A_COND_AL, r, rs, offs)
#define emith_write_r_r_r_wb(r, rs, rm) \
EOP_STR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0)
#define emith_write_r_r_r_ptr_wb(r, rs, rm) \
emith_write_r_r_r_wb(r, rs, rm)
#define emith_ctx_read_c(cond, r, offs) \ #define emith_ctx_read_c(cond, r, offs) \
emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs) emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs)
@ -1133,21 +1125,21 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 4
#define emith_jump_cond(cond, target) \ #define emith_jump_cond(cond, target) \
emith_xbranch(cond, target, 0) emith_xbranch(cond, target, 0)
#define emith_jump_cond_inrange(target) !0
#define emith_jump_cond_patchable(cond, target) \ #define emith_jump_cond_patchable(cond, target) \
emith_jump_cond(cond, target) emith_jump_cond(cond, target)
#define emith_jump_patch(ptr, target) ({ \ #define emith_jump_patch(ptr, target, pos) do { \
u32 *ptr_ = ptr; \ u32 *ptr_ = ptr; \
u32 val_ = (u32 *)(target) - ptr_ - 2; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \
*ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \
(u8 *)ptr; \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \
}) } while (0)
#define emith_jump_cond_inrange(target) !0
#define emith_jump_patch_size() 4 #define emith_jump_patch_size() 4
#define emith_jump_at(ptr, target) do { \ #define emith_jump_at(ptr, target) do { \
@ -1184,11 +1176,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
emith_jump_ctx(offs); \ emith_jump_ctx(offs); \
} while (0) } while (0)
#define emith_call_link(r, target) do { \
emith_move_r_r(r, PC); \
emith_jump(target); \
} while (0)
#define emith_call_cleanup() /**/ #define emith_call_cleanup() /**/
#define emith_ret_c(cond) \ #define emith_ret_c(cond) \
@ -1200,6 +1187,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_ret_to_ctx(offs) \ #define emith_ret_to_ctx(offs) \
emith_ctx_write(LR, offs) emith_ctx_write(LR, offs)
#define emith_add_r_ret_imm(r, imm) \
emith_add_r_r_ptr_imm(r, LR, imm)
/* pushes r12 for eabi alignment */ /* pushes r12 for eabi alignment */
#define emith_push_ret(r) do { \ #define emith_push_ret(r) do { \
int r_ = (r >= 0 ? r : 12); \ int r_ = (r >= 0 ? r : 12); \

View file

@ -865,15 +865,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
#define emith_read_r_r_r_c(cond, r, rs, rm) \ #define emith_read_r_r_r_c(cond, r, rs, rm) \
emith_read_r_r_r(r, rs, rm) emith_read_r_r_r(r, rs, rm)
#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \
emith_read_r_r_r_ptr(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_read_r_r_r_wb(r, rs, rm) do { \
emith_read_r_r_r(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_read8_r_r_offs(r, rs, offs) \ #define emith_read8_r_r_offs(r, rs, offs) \
emith_ldst_offs(AM_B, r, rs, offs, LT_LD, AM_IDX) emith_ldst_offs(AM_B, r, rs, offs, LT_LD, AM_IDX)
#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ #define emith_read8_r_r_offs_c(cond, r, rs, offs) \
@ -935,15 +926,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
#define emith_write_r_r_r_c(cond, r, rs, rm) \ #define emith_write_r_r_r_c(cond, r, rs, rm) \
emith_write_r_r_r(r, rs, rm) emith_write_r_r_r(r, rs, rm)
#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \
emith_write_r_r_r_ptr(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_write_r_r_r_wb(r, rs, rm) do { \
emith_write_r_r_r(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_ctx_read_ptr(r, offs) \ #define emith_ctx_read_ptr(r, offs) \
emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs)
@ -1031,6 +1013,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 4
#define emith_jump_cond(cond, target) \ #define emith_jump_cond(cond, target) \
emith_bcond(tcache_ptr, 0, cond, target) emith_bcond(tcache_ptr, 0, cond, target)
@ -1039,9 +1022,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
emith_bcond(tcache_ptr, 1, cond, target) emith_bcond(tcache_ptr, 1, cond, target)
#define emith_jump_cond_inrange(target) \ #define emith_jump_cond_inrange(target) \
!(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 22) !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21)
#define emith_jump_patch(ptr, target) ({ \ #define emith_jump_patch(ptr, target, pos) do { \
u32 *ptr_ = (u32 *)ptr; \ u32 *ptr_ = (u32 *)ptr; \
u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \
int cond_ = ptr_[0] & 0xf; \ int cond_ = ptr_[0] & 0xf; \
@ -1051,8 +1034,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
} else if (ptr_[0] & 0x80000000) \ } else if (ptr_[0] & 0x80000000) \
EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \ EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \
else EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \ else EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \
(u8 *)ptr; \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \
}) } while (0)
#define emith_jump_patch_size() 8
#define emith_jump_reg(r) \ #define emith_jump_reg(r) \
EMIT(A64_BR(r)) EMIT(A64_BR(r))
@ -1085,11 +1069,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
rcache_free_tmp(_t); \ rcache_free_tmp(_t); \
} while (0) } while (0)
#define emith_call_link(r, target) do { \
EMIT(A64_ADRXLIT_IMM(r, 8)); \
emith_jump(target); \
} while (0)
#define emith_call_cleanup() /**/ #define emith_call_cleanup() /**/
#define emith_ret() \ #define emith_ret() \
@ -1100,6 +1079,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
#define emith_ret_to_ctx(offs) \ #define emith_ret_to_ctx(offs) \
emith_ctx_write_ptr(LR, offs) emith_ctx_write_ptr(LR, offs)
#define emith_add_r_ret_imm(r, imm) \
emith_add_r_r_ptr_imm(r, LR, imm)
// NB: pushes r or r18 for SP hardware alignment // NB: pushes r or r18 for SP hardware alignment
#define emith_push_ret(r) do { \ #define emith_push_ret(r) do { \
int r_ = (r >= 0 ? r : 18); \ int r_ = (r >= 0 ? r : 18); \
@ -1120,7 +1102,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
#define emith_flush() /**/ #define emith_flush() /**/
#define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define host_instructions_updated(base, end) __builtin___clear_cache(base, end)
#define emith_update_cache() /**/ #define emith_update_cache() /**/
#define emith_jump_patch_size() 8
#define emith_rw_offs_max() 0xff #define emith_rw_offs_max() 0xff

View file

@ -950,13 +950,6 @@ static void emith_lohi_nops(void)
#define emith_read_r_r_r_c(cond, r, rs, rm) \ #define emith_read_r_r_r_c(cond, r, rs, rm) \
emith_read_r_r_r(r, rs, rm) emith_read_r_r_r(r, rs, rm)
#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \
emith_add_r_r_r(rs, rs, rm); \
EMIT(MIPS_LW(r, rs, 0)); \
} while (0)
#define emith_read_r_r_r_wb(r, rs, rm) \
emith_read_r_r_r_ptr_wb(r, rs, rm)
#define emith_read8_r_r_offs(r, rs, offs) \ #define emith_read8_r_r_offs(r, rs, offs) \
EMIT(MIPS_LBU(r, rs, offs)) EMIT(MIPS_LBU(r, rs, offs))
#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ #define emith_read8_r_r_offs_c(cond, r, rs, offs) \
@ -1028,13 +1021,6 @@ static void emith_lohi_nops(void)
#define emith_write_r_r_r_c(cond, r, rs, rm) \ #define emith_write_r_r_r_c(cond, r, rs, rm) \
emith_write_r_r_r(r, rs, rm) emith_write_r_r_r(r, rs, rm)
#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \
emith_add_r_r_r(rs, rs, rm); \
EMIT(MIPS_SW(r, rs, 0)); \
} while (0)
#define emith_write_r_r_r_wb(r, rs, rm) \
emith_write_r_r_r_ptr_wb(r, rs, rm)
#define emith_ctx_read_ptr(r, offs) \ #define emith_ctx_read_ptr(r, offs) \
emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs)
@ -1176,6 +1162,7 @@ static int emith_cond_check(int cond, int *r)
emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff))
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 8 /* J+delayslot */
// NB: MIPS conditional branches have only +/- 128KB range // NB: MIPS conditional branches have only +/- 128KB range
#define emith_jump_cond(cond, target) do { \ #define emith_jump_cond(cond, target) do { \
@ -1190,6 +1177,8 @@ static int emith_cond_check(int cond, int *r)
EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \
} \ } \
} while (0) } while (0)
#define emith_jump_cond_inrange(target) \
!(((u8 *)target - (u8 *)tcache_ptr + 0x20000) >> 18)
#define emith_jump_cond_patchable(cond, target) do { \ #define emith_jump_cond_patchable(cond, target) do { \
int r_, mcond_ = emith_cond_check(cond, &r_); \ int r_, mcond_ = emith_cond_check(cond, &r_); \
@ -1199,16 +1188,14 @@ static int emith_cond_check(int cond, int *r)
EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \
} while (0) } while (0)
#define emith_jump_cond_inrange(target) \
!(((u8 *)target - (u8 *)tcache_ptr + 0x10000) >> 18)
// NB: returns position of patch for cache maintenance // NB: returns position of patch for cache maintenance
#define emith_jump_patch(ptr, target) ({ \ #define emith_jump_patch(ptr, target, pos) do { \
u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \
while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \
EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \
(u8 *)(ptr_-1); \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \
}) } while (0)
#define emith_jump_patch_size() 4
#define emith_jump_reg(r) \ #define emith_jump_reg(r) \
emith_branch(MIPS_JR(r)) emith_branch(MIPS_JR(r))
@ -1235,11 +1222,6 @@ static int emith_cond_check(int cond, int *r)
emith_call_reg(AT); \ emith_call_reg(AT); \
} while (0) } while (0)
#define emith_call_link(r, target) do { \
EMIT(MIPS_BL(4)); EMIT(MIPS_ADD_IMM(r, LR, 8)); emith_flush(); \
emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \
} while (0)
#define emith_call_cleanup() /**/ #define emith_call_cleanup() /**/
#define emith_ret() \ #define emith_ret() \
@ -1250,6 +1232,9 @@ static int emith_cond_check(int cond, int *r)
#define emith_ret_to_ctx(offs) \ #define emith_ret_to_ctx(offs) \
emith_ctx_write_ptr(LR, offs) emith_ctx_write_ptr(LR, offs)
#define emith_add_r_ret_imm(r, imm) \
emith_add_r_r_ptr_imm(r, LR, imm)
// NB: ABI SP alignment is 8 for compatibility with MIPS IV // NB: ABI SP alignment is 8 for compatibility with MIPS IV
#define emith_push_ret(r) do { \ #define emith_push_ret(r) do { \
emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \
@ -1271,7 +1256,6 @@ static int emith_cond_check(int cond, int *r)
// NB: mips32r2 has SYNCI // NB: mips32r2 has SYNCI
#define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define host_instructions_updated(base, end) __builtin___clear_cache(base, end)
#define emith_update_cache() /**/ #define emith_update_cache() /**/
#define emith_jump_patch_size() 4
#define emith_rw_offs_max() 0x7fff #define emith_rw_offs_max() 0x7fff
// SH2 drc specific // SH2 drc specific

View file

@ -297,54 +297,61 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
// _r_r_r_shift // _r_r_r_shift
#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \ #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \
if (lslimm) { \
int tmp_ = rcache_get_tmp(); \ int tmp_ = rcache_get_tmp(); \
emith_lsl(tmp_, s2, lslimm); \ emith_lsl(tmp_, s2, lslimm); \
emith_add_r_r_r(d, s1, tmp_); \ emith_add_r_r_r(d, s1, tmp_); \
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} else emith_add_r_r_r(d, s1, s2); \
} while (0) } while (0)
#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) do { \ #define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) do { \
if (lslimm) { \
int tmp_ = rcache_get_tmp(); \ int tmp_ = rcache_get_tmp(); \
emith_lsl(tmp_, s2, lslimm); \ emith_lsl(tmp_, s2, lslimm); \
emith_add_r_r_r_ptr(d, s1, tmp_); \ emith_add_r_r_r_ptr(d, s1, tmp_); \
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} else emith_add_r_r_r_ptr(d, s1, s2); \
} while (0) } while (0)
#define emith_add_r_r_r_lsr(d, s1, s2, lsrimm) do { \ #define emith_add_r_r_r_lsr(d, s1, s2, lsrimm) do { \
if (lsrimm) { \
int tmp_ = rcache_get_tmp(); \ int tmp_ = rcache_get_tmp(); \
emith_lsr(tmp_, s2, lsrimm); \ emith_lsr(tmp_, s2, lsrimm); \
emith_add_r_r_r(d, s1, tmp_); \ emith_add_r_r_r(d, s1, tmp_); \
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} else emith_add_r_r_r(d, s1, s2); \
} while (0) } while (0)
#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) do { \ #define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) do { \
if (lslimm) { \
int tmp_ = rcache_get_tmp(); \ int tmp_ = rcache_get_tmp(); \
emith_lsl(tmp_, s2, lslimm); \ emith_lsl(tmp_, s2, lslimm); \
emith_sub_r_r_r(d, s1, tmp_); \ emith_sub_r_r_r(d, s1, tmp_); \
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} else emith_sub_r_r_r(d, s1, s2); \
} while (0) } while (0)
#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) do { \ #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) do { \
if (lslimm) { \
int tmp_ = rcache_get_tmp(); \ int tmp_ = rcache_get_tmp(); \
emith_lsl(tmp_, s2, lslimm); \ emith_lsl(tmp_, s2, lslimm); \
emith_or_r_r_r(d, s1, tmp_); \ emith_or_r_r_r(d, s1, tmp_); \
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} else emith_or_r_r_r(d, s1, s2); \
} while (0) } while (0)
// _r_r_shift // _r_r_shift
#define emith_or_r_r_lsl(d, s, lslimm) do { \ #define emith_or_r_r_lsl(d, s, lslimm) \
int tmp_ = rcache_get_tmp(); \ emith_or_r_r_r_lsl(d, d, s, lslimm)
emith_lsl(tmp_, s, lslimm); \
emith_or_r_r(d, tmp_); \
rcache_free_tmp(tmp_); \
} while (0)
// d != s
#define emith_eor_r_r_lsr(d, s, lsrimm) do { \ #define emith_eor_r_r_lsr(d, s, lsrimm) do { \
emith_push(s); \ if (lsrimm) { \
emith_lsr(s, s, lsrimm); \ int tmp_ = rcache_get_tmp(); \
emith_eor_r_r(d, s); \ emith_lsr(tmp_, s, lsrimm); \
emith_pop(s); \ emith_eor_r_r(d, tmp_); \
rcache_free_tmp(tmp_); \
} else emith_eor_r_r(d, s); \
} while (0) } while (0)
// _r_imm // _r_imm
@ -792,14 +799,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
EMIT_OP_MODRM64(0x8b, 0, r, 4); \ EMIT_OP_MODRM64(0x8b, 0, r, 4); \
EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \
} while (0) } while (0)
#define emith_read_r_r_r_wb(r, rs, rm) do { \
emith_read_r_r_r(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \
emith_read_r_r_r_ptr(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_write_r_r_r(r, rs, rm) do { \ #define emith_write_r_r_r(r, rs, rm) do { \
EMIT_XREX_IF(0, r, rm, rs); \ EMIT_XREX_IF(0, r, rm, rs); \
@ -811,15 +810,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
EMIT_OP_MODRM64(0x89, 0, r, 4); \ EMIT_OP_MODRM64(0x89, 0, r, 4); \
EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \ EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \
} while (0) } while (0)
#define emith_write_r_r_r_wb(r, rs, rm) do { \
emith_write_r_r_r(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \
emith_write_r_r_r_ptr(r, rs, rm); \
emith_add_r_r_ptr(rs, rm); \
} while (0)
#define emith_ctx_read(r, offs) \ #define emith_ctx_read(r, offs) \
emith_read_r_r_offs(r, CONTEXT_REG, offs) emith_read_r_r_offs(r, CONTEXT_REG, offs)
@ -846,10 +836,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
emith_ctx_write(r_, offs_); \ emith_ctx_write(r_, offs_); \
} while (0) } while (0)
// assumes EBX is free
#define emith_ret_to_ctx(offs) do { \ #define emith_ret_to_ctx(offs) do { \
emith_pop(xBX); \ int tmp_ = rcache_get_tmp(); \
emith_ctx_write(xBX, offs); \ emith_pop(tmp_); \
emith_ctx_write(tmp_, offs); \
rcache_free_tmp(tmp_); \
} while (0) } while (0)
#define emith_jump(ptr) do { \ #define emith_jump(ptr) do { \
@ -860,24 +851,24 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 5 /* JMP rel32 */
#define emith_jump_cond(cond, ptr) do { \ #define emith_jump_cond(cond, ptr) do { \
u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \
EMIT_OP(0x0f80 | (cond)); \ EMIT_OP(0x0f80 | (cond)); \
EMIT(disp, u32); \ EMIT(disp, u32); \
} while (0) } while (0)
#define emith_jump_cond_inrange(ptr) !0
#define emith_jump_cond_patchable(cond, target) \ #define emith_jump_cond_patchable(cond, target) \
emith_jump_cond(cond, target) emith_jump_cond(cond, target)
#define emith_jump_patch(ptr, target) ({ \ #define emith_jump_patch(ptr, target, pos) do { \
u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \
u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \
EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \
ptr; \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \
}) } while (0)
#define emith_jump_cond_inrange(ptr) !0
#define emith_jump_patch_size() 6 #define emith_jump_patch_size() 6
#define emith_jump_at(ptr, target) do { \ #define emith_jump_at(ptr, target) do { \
@ -903,20 +894,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
EMIT(offs, u32); \ EMIT(offs, u32); \
} while (0) } while (0)
#define emith_call_link(r, target) do { \
EMIT_OP(0xe8); \
EMIT(0, u32); /* call pc+0 */ \
emith_pop(r); \
emith_add_r_r_ptr_imm(r, r, 13); \
emith_jump(target); \
} while (0)
#define emith_call_cleanup() \ #define emith_call_cleanup() \
emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr
#define emith_ret() \ #define emith_ret() \
EMIT_OP(0xc3) EMIT_OP(0xc3)
#define emith_add_r_ret_imm(r, imm) do { \
emith_read_r_r_offs_ptr(r, xSP, 0); \
emith_add_r_r_ptr_imm(r, r, imm); \
} while (0)
#define emith_jump_reg(r) \ #define emith_jump_reg(r) \
EMIT_OP_MODRM(0xff, 3, 4, r) EMIT_OP_MODRM(0xff, 3, 4, r)

View file

@ -40,7 +40,7 @@
#define PROPAGATE_CONSTANTS 1 #define PROPAGATE_CONSTANTS 1
#define LINK_BRANCHES 1 #define LINK_BRANCHES 1
#define BRANCH_CACHE 1 #define BRANCH_CACHE 1
#define CALL_STACK 0 #define CALL_STACK 1
#define ALIAS_REGISTERS 1 #define ALIAS_REGISTERS 1
#define REMAP_REGISTER 1 #define REMAP_REGISTER 1
#define LOOP_DETECTION 1 #define LOOP_DETECTION 1
@ -635,7 +635,7 @@ static signed char reg_map_host[HOST_REGS];
static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2);
static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc); static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc);
#if CALL_STACK #if CALL_STACK
static void REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc, uptr host_pr); static u32 REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc);
static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc); static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc);
#endif #endif
static void REGPARM(1) (*sh2_drc_exit)(u32 pc); static void REGPARM(1) (*sh2_drc_exit)(u32 pc);
@ -1150,7 +1150,8 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi
bl->jump, bl->target_pc, be->tcache_ptr); bl->jump, bl->target_pc, be->tcache_ptr);
if (emit_jump) { if (emit_jump) {
u8 *jump = emith_jump_patch(bl->jump, be->tcache_ptr); u8 *jump;
emith_jump_patch(bl->jump, be->tcache_ptr, &jump);
// only needs sync if patch is possibly crossing cacheline (assume 16 byte) // only needs sync if patch is possibly crossing cacheline (assume 16 byte)
if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4)
host_instructions_updated(jump, jump+emith_jump_patch_size()); host_instructions_updated(jump, jump+emith_jump_patch_size());
@ -1171,7 +1172,8 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump)
if (bl->target) { if (bl->target) {
if (emit_jump) { if (emit_jump) {
u8 *jump = emith_jump_patch(bl->jump, sh2_drc_dispatcher); u8 *jump;
emith_jump_patch(bl->jump, sh2_drc_dispatcher, &jump);
// update cpu caches since the previous jump target doesn't exist anymore // update cpu caches since the previous jump target doesn't exist anymore
host_instructions_updated(jump, jump+emith_jump_patch_size()); host_instructions_updated(jump, jump+emith_jump_patch_size());
} }
@ -1381,7 +1383,7 @@ static void rcache_remap_vreg(int x);
{ d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\ { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\
if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\ if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\
{ d = 1; printf("cache check r=%d CDIRTY?\n", i); } \ { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \
if (gp->flags & GRF_PINNED) { \ if (gp->flags & (GRF_STATIC|GRF_PINNED)) { \
if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\ if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\
{ d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \ { d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \
else m &= ~(1 << gp->sreg); \ else m &= ~(1 << gp->sreg); \
@ -4407,7 +4409,7 @@ end_op:
} }
#endif #endif
rcache_unlock_all(); // may lock delay_reg rcache_unlock_all();
#if LOOP_OPTIMIZER #if LOOP_OPTIMIZER
if (target && pinned_loop_pc[pinned_loop_count] == target_pc) { if (target && pinned_loop_pc[pinned_loop_count] == target_pc) {
rcache_unpin_all(); rcache_unpin_all();
@ -4427,31 +4429,27 @@ end_op:
#if CALL_STACK #if CALL_STACK
if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
// BSR // BSR
tmp = rcache_get_tmp_arg(1); emith_call(sh2_drc_dispatcher_call);
emith_call_link(tmp, sh2_drc_dispatcher_call); }
rcache_free_tmp(tmp);
} else
#endif #endif
target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
patchable = 1; patchable = 1;
} }
// create branch // create branch
if (patchable) { if (cond != -1) {
if (cond != -1) if (patchable)
emith_jump_cond_patchable(cond, target); emith_jump_cond_patchable(cond, target);
else if (target != NULL) { else
rcache_invalidate();
emith_jump_patchable(target);
}
} else {
if (cond != -1)
emith_jump_cond(cond, target); emith_jump_cond(cond, target);
else if (target != NULL) { } else {
rcache_invalidate(); rcache_invalidate();
if (patchable)
emith_jump_patchable(target);
else
emith_jump(target); emith_jump(target);
} }
}
// branch not taken, correct cycle count // branch not taken, correct cycle count
if (ctaken) if (ctaken)
@ -4476,14 +4474,14 @@ end_op:
rcache_invalidate(); rcache_invalidate();
#if CALL_STACK #if CALL_STACK
struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd;
if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
// JSR/BSRF
emith_call(sh2_drc_dispatcher_call);
}
if (opd_b->rm == SHR_PR) { if (opd_b->rm == SHR_PR) {
// RTS // RTS
emith_jump(sh2_drc_dispatcher_return); emith_jump(sh2_drc_dispatcher_return);
} else if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
// JSR/BSRF
tmp = rcache_get_tmp_arg(1);
emith_call_link(tmp, sh2_drc_dispatcher_call);
rcache_free(tmp);
} else } else
#endif #endif
if (gconst_get(SHR_PC, &target_pc)) { if (gconst_get(SHR_PC, &target_pc)) {
@ -4544,7 +4542,7 @@ end_op:
rcache_flush(); rcache_flush();
emith_jump(sh2_drc_dispatcher); emith_jump(sh2_drc_dispatcher);
} }
emith_jump_patch(branch_patch_ptr[i], target); emith_jump_patch(branch_patch_ptr[i], target, NULL);
} }
emith_pool_commit(0); emith_pool_commit(0);
@ -4713,20 +4711,6 @@ static void sh2_generate_utils(void)
emith_sh2_drc_exit(); emith_sh2_drc_exit();
emith_flush(); emith_flush();
#if CALL_STACK
// sh2_drc_dispatcher_call(u32 pc, uptr host_pr)
sh2_drc_dispatcher_call = (void *)tcache_ptr;
emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
emith_add_r_imm(arg2, 2*sizeof(void *));
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
emith_add_r_r_ptr_imm(arg3, CONTEXT_REG, offsetof(SH2, rts_cache) + sizeof(void *));
emith_write_r_r_r_ptr_wb(arg1, arg2, arg3);
emith_ctx_read(arg3, SHR_PR * 4);
emith_write_r_r_offs(arg3, arg2, (s8)-sizeof(void *));
emith_flush();
// FALLTHROUGH
#endif
// sh2_drc_dispatcher(u32 pc) // sh2_drc_dispatcher(u32 pc)
sh2_drc_dispatcher = (void *)tcache_ptr; sh2_drc_dispatcher = (void *)tcache_ptr;
emith_ctx_write(arg0, SHR_PC * 4); emith_ctx_write(arg0, SHR_PC * 4);
@ -4782,30 +4766,44 @@ static void sh2_generate_utils(void)
emith_flush(); emith_flush();
#if CALL_STACK #if CALL_STACK
// pc = sh2_drc_dispatcher_call(u32 pc)
sh2_drc_dispatcher_call = (void *)tcache_ptr;
emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
emith_ctx_read(arg1, SHR_PR * 4);
emith_add_r_imm(arg2, 2*sizeof(void *));
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0);
emith_write_r_r_offs(arg1, arg2, offsetof(SH2, rts_cache));
emith_add_r_ret_imm(arg1, emith_jump_patchable_size()); // skip jump_patchable for rts host address
emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache) + sizeof(void *));
emith_ret();
emith_flush();
// sh2_drc_dispatcher_return(u32 pc) // sh2_drc_dispatcher_return(u32 pc)
sh2_drc_dispatcher_return = (void *)tcache_ptr; sh2_drc_dispatcher_return = (void *)tcache_ptr;
emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
emith_add_r_r_ptr_imm(arg1, CONTEXT_REG, offsetof(SH2, rts_cache)); emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0);
emith_read_r_r_r_wb(arg3, arg1, arg2); emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache));
emith_cmp_r_r(arg0, arg3); emith_cmp_r_r(arg0, arg3);
#if (DRC_DEBUG & 128) #if (DRC_DEBUG & 128)
EMITH_SJMP_START(DCOND_EQ); EMITH_SJMP_START(DCOND_EQ);
emith_move_r_ptr_imm(arg2, (uptr)&rcmiss); emith_move_r_ptr_imm(arg3, (uptr)&rcmiss);
emith_read_r_r_offs_c(DCOND_NE, arg1, arg2, 0); emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
emith_add_r_imm_c(DCOND_NE, arg1, 1); emith_add_r_imm_c(DCOND_NE, arg1, 1);
emith_write_r_r_offs_c(DCOND_NE, arg1, arg2, 0); emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
EMITH_SJMP_END(DCOND_EQ); EMITH_SJMP_END(DCOND_EQ);
#endif #endif
emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
emith_read_r_r_offs_ptr(arg0, arg1, sizeof(void *)); emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *));
emith_sub_r_imm(arg2, 2*sizeof(void *)); emith_sub_r_imm(arg2, 2*sizeof(void *));
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
#if (DRC_DEBUG & 128) #if (DRC_DEBUG & 128)
emith_move_r_ptr_imm(arg2, (uptr)&rchit); emith_move_r_ptr_imm(arg3, (uptr)&rchit);
emith_read_r_r_offs(arg1, arg2, 0); emith_read_r_r_offs(arg1, arg3, 0);
emith_add_r_imm(arg1, 1); emith_add_r_imm(arg1, 1);
emith_write_r_r_offs(arg1, arg2, 0); emith_write_r_r_offs(arg1, arg3, 0);
#endif #endif
emith_jump_reg(arg0); emith_jump_reg(arg0);
emith_flush(); emith_flush();