sh2 drc: drc exit, block linking and branch handling revised

This commit is contained in:
kub 2019-09-28 16:39:26 +02:00
parent 36614252d9
commit 06bc3c0693
5 changed files with 462 additions and 246 deletions

View file

@ -631,8 +631,8 @@ static void emith_pool_commit(int jumpover)
static inline void emith_pool_check(void) static inline void emith_pool_check(void)
{ {
// check if pool must be committed // check if pool must be committed
if (literal_iindex > MAX_HOST_LITERALS-4 || if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex &&
(u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00) (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00))
// pool full, or displacement is approaching the limit // pool full, or displacement is approaching the limit
emith_pool_commit(1); emith_pool_commit(1);
} }
@ -889,11 +889,19 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_tst_r_imm_c(cond, r, imm) \ #define emith_tst_r_imm_c(cond, r, imm) \
emith_top_imm(cond, A_OP_TST, r, imm) emith_top_imm(cond, A_OP_TST, r, imm)
#define emith_move_r_imm_s8(r, imm) do { \ #define emith_move_r_imm_s8_patchable(r, imm) do { \
emith_flush(); \
if ((s8)(imm) < 0) \ if ((s8)(imm) < 0) \
EOP_MVN_IMM(r, 0, ((u8)(imm) ^ 0xff)); \ EOP_MVN_IMM(r, 0, (u8)~(imm)); \
else \ else \
EOP_MOV_IMM(r, 0, (u8)imm); \ EOP_MOV_IMM(r, 0, (u8)(imm)); \
} while (0)
#define emith_move_r_imm_s8_patch(ptr, imm) do { \
u32 *ptr_ = (u32 *)ptr; u32 op_ = *ptr_ & 0xfe1ff000; \
if ((s8)(imm) < 0) \
EMIT_PTR(ptr_, op_ | (A_OP_MVN<<21) | (u8)~(imm));\
else \
EMIT_PTR(ptr_, op_ | (A_OP_MOV<<21) | (u8)(imm));\
} while (0) } while (0)
#define emith_and_r_r_imm(d, s, imm) \ #define emith_and_r_r_imm(d, s, imm) \
@ -1125,7 +1133,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 4
#define emith_jump_cond(cond, target) \ #define emith_jump_cond(cond, target) \
emith_xbranch(cond, target, 0) emith_xbranch(cond, target, 0)
@ -1135,18 +1142,19 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
emith_jump_cond(cond, target) emith_jump_cond(cond, target)
#define emith_jump_patch(ptr, target, pos) do { \ #define emith_jump_patch(ptr, target, pos) do { \
u32 *ptr_ = ptr; \ u32 *ptr_ = (u32 *)ptr; \
u32 val_ = (u32 *)(target) - ptr_ - 2; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \
*ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \
if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \
} while (0) } while (0)
#define emith_jump_patch_inrange(ptr, target) !0
#define emith_jump_patch_size() 4 #define emith_jump_patch_size() 4
#define emith_jump_at(ptr, target) do { \ #define emith_jump_at(ptr, target) do { \
u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \
emith_flush(); \
EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \ EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \
} while (0) } while (0)
#define emith_jump_at_size() 4
#define emith_jump_reg_c(cond, r) \ #define emith_jump_reg_c(cond, r) \
EOP_C_BX(cond, r) EOP_C_BX(cond, r)
@ -1187,8 +1195,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_ret_to_ctx(offs) \ #define emith_ret_to_ctx(offs) \
emith_ctx_write(LR, offs) emith_ctx_write(LR, offs)
#define emith_add_r_ret_imm(r, imm) \ #define emith_add_r_ret(r) \
emith_add_r_r_ptr_imm(r, LR, imm) emith_add_r_r_ptr(r, LR)
/* pushes r12 for eabi alignment */ /* pushes r12 for eabi alignment */
#define emith_push_ret(r) do { \ #define emith_push_ret(r) do { \

View file

@ -447,6 +447,8 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
#define emith_eor_r_r_r(d, s1, s2) \ #define emith_eor_r_r_r(d, s1, s2) \
emith_eor_r_r_r_lsl(d, s1, s2, 0) emith_eor_r_r_r_lsl(d, s1, s2, 0)
#define emith_add_r_r_r_ptr(d, s1, s2) \
emith_add_r_r_r_lsl_ptr(d, s1, s2, 0)
#define emith_and_r_r_r(d, s1, s2) \ #define emith_and_r_r_r(d, s1, s2) \
emith_and_r_r_r_lsl(d, s1, s2, 0) emith_and_r_r_r_lsl(d, s1, s2, 0)
@ -546,6 +548,20 @@ static void emith_move_imm64(int r, int wx, int64_t imm)
#define emith_move_r_imm_c(cond, r, imm) \ #define emith_move_r_imm_c(cond, r, imm) \
emith_move_r_imm(r, imm) emith_move_r_imm(r, imm)
#define emith_move_r_imm_s8_patchable(r, imm) do { \
if ((s8)(imm) < 0) \
EMIT(A64_MOVN_IMM(r, ~(s8)(imm), 0)); \
else \
EMIT(A64_MOVZ_IMM(r, (s8)(imm), 0)); \
} while (0)
#define emith_move_r_imm_s8_patch(ptr, imm) do { \
u32 *ptr_ = (u32 *)ptr; \
int r_ = *ptr_ & 0x1f; \
if ((s8)(imm) < 0) \
EMIT_PTR(ptr_, A64_MOVN_IMM(r_, ~(s8)(imm), 0)); \
else \
EMIT_PTR(ptr_, A64_MOVZ_IMM(r_, (s8)(imm), 0)); \
} while (0)
// arithmetic, immediate // arithmetic, immediate
static void emith_arith_imm(int op, int wx, int rd, int rn, s32 imm) static void emith_arith_imm(int op, int wx, int rd, int rn, s32 imm)
@ -995,16 +1011,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
emith_move_r_imm(arg, imm) emith_move_r_imm(arg, imm)
// branching; NB: A64 B.cond has only +/- 1MB range // branching; NB: A64 B.cond has only +/- 1MB range
#define emith_bcond(ptr, patch, cond, target) do { \
u32 disp_ = (u8 *)target - (u8 *)ptr; \
if (disp_ >= 0xfff00000 || disp_ <= 0x000fffff) { /* can use near B.c */ \
EMIT_PTR(ptr, A64_BCOND(cond, disp_ & 0x001fffff)); \
if (patch) EMIT_PTR(ptr, A64_NOP); /* reserve space for far B */ \
} else { /* far branch if near branch isn't possible */ \
EMIT_PTR(ptr, A64_BCOND(emith_invert_cond(cond), 8)); \
EMIT_PTR(ptr, A64_B((disp_ - 4) & 0x0fffffff)); \
} \
} while (0)
#define emith_jump(target) do {\ #define emith_jump(target) do {\
u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \
@ -1013,30 +1019,37 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 4
#define emith_jump_cond(cond, target) \ #define emith_jump_cond(cond, target) do { \
emith_bcond(tcache_ptr, 0, cond, target) u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \
EMIT(A64_BCOND(cond, disp_ & 0x001fffff)); \
} while (0)
#define emith_jump_cond_patchable(cond, target) \ #define emith_jump_cond_patchable(cond, target) \
emith_bcond(tcache_ptr, 1, cond, target) emith_jump_cond(cond, target)
#define emith_jump_cond_inrange(target) \ #define emith_jump_cond_inrange(target) \
!(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21) !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21)
#define emith_jump_patch(ptr, target, pos) do { \ #define emith_jump_patch(ptr, target, pos) do { \
u32 *ptr_ = (u32 *)ptr; \ u32 *ptr_ = (u32 *)ptr; \
u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ u32 disp_ = (u8 *)target - (u8 *)ptr, mask_; \
int cond_ = ptr_[0] & 0xf; \ if ((*ptr_ & 0xff000000) == 0x54000000) \
if ((ptr_[0] & 0xff000000) == 0x54000000) { /* B.cond */ \ mask_ = 0xff00001f, disp_ <<= 5; /* B.cond, range 21 bit */ \
if (ptr_[1] != A64_NOP) cond_ = emith_invert_cond(cond_); \ else mask_ = 0xfc000000; /* B[L], range 28 bit */ \
emith_bcond(ptr_, 1, cond_, target); \ EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \
} else if (ptr_[0] & 0x80000000) \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \
EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \
else EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \
if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \
} while (0) } while (0)
#define emith_jump_patch_size() 8
#define emith_jump_patch_inrange(ptr, target) \
!(((u8 *)target - (u8 *)ptr + 0x100000) >> 21)
#define emith_jump_patch_size() 4
#define emith_jump_at(ptr, target) do { \
u32 disp_ = (u8 *)target - (u8 *)ptr; \
EMIT_PTR(ptr, A64_B(disp_ & 0x0fffffff)); \
} while (0)
#define emith_jump_at_size() 4
#define emith_jump_reg(r) \ #define emith_jump_reg(r) \
EMIT(A64_BR(r)) EMIT(A64_BR(r))
@ -1079,8 +1092,8 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
#define emith_ret_to_ctx(offs) \ #define emith_ret_to_ctx(offs) \
emith_ctx_write_ptr(LR, offs) emith_ctx_write_ptr(LR, offs)
#define emith_add_r_ret_imm(r, imm) \ #define emith_add_r_ret(r) \
emith_add_r_r_ptr_imm(r, LR, imm) emith_add_r_r_r_ptr(r, LR, r)
// NB: pushes r or r18 for SP hardware alignment // NB: pushes r or r18 for SP hardware alignment
#define emith_push_ret(r) do { \ #define emith_push_ret(r) do { \

View file

@ -285,7 +285,7 @@ static int emith_b_isswap(u32 bop, u32 lop)
return bop; return bop;
else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop)) else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop))
if ((bop & 0xffff) != 0x7fff) // displacement overflow? if ((bop & 0xffff) != 0x7fff) // displacement overflow?
return (bop & 0xffff0000) | ((bop & 0xffff)+1); return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff);
return 0; return 0;
} }
@ -332,14 +332,14 @@ static void *emith_branch(u32 op)
#define JMP_EMIT(cond, ptr) { \ #define JMP_EMIT(cond, ptr) { \
u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \
EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \
emith_flush(); /* NO delay slot handling across jump targets */ \ emith_flush(); /* NO delay slot handling across jump targets */ \
EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \
} }
#define JMP_EMIT_NC(ptr) { \ #define JMP_EMIT_NC(ptr) { \
u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \
EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \
emith_flush(); \ emith_flush(); \
EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \
} }
#define EMITH_JMP_START(cond) { \ #define EMITH_JMP_START(cond) { \
@ -645,6 +645,13 @@ static void emith_move_imm(int r, uintptr_t imm)
#define emith_move_r_imm_c(cond, r, imm) \ #define emith_move_r_imm_c(cond, r, imm) \
emith_move_r_imm(r, imm) emith_move_r_imm(r, imm)
#define emith_move_r_imm_s8_patchable(r, imm) \
EMIT(MIPS_ADD_IMM(r, Z0, (s8)(imm)))
#define emith_move_r_imm_s8_patch(ptr, imm) do { \
u32 *ptr_ = (u32 *)ptr; \
while (*ptr_ >> 26 != OP_ADDIU) ptr_++; \
EMIT_PTR(ptr_, (*ptr_ & 0xffff0000) | (u16)(s8)(imm)); \
} while (0)
// arithmetic, immediate // arithmetic, immediate
static void emith_arith_imm(int op, int rd, int rs, u32 imm) static void emith_arith_imm(int op, int rd, int rs, u32 imm)
@ -1162,41 +1169,44 @@ static int emith_cond_check(int cond, int *r)
emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff))
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 8 /* J+delayslot */
// NB: MIPS conditional branches have only +/- 128KB range // NB: MIPS conditional branches have only +/- 128KB range
#define emith_jump_cond(cond, target) do { \ #define emith_jump_cond(cond, target) do { \
int r_, mcond_ = emith_cond_check(cond, &r_); \ int r_, mcond_ = emith_cond_check(cond, &r_); \
u32 disp_ = (u8 *)target - (u8 *)tcache_ptr - 4; \ u32 disp_ = (u8 *)target - (u8 *)tcache_ptr - 4; \
if (disp_ >= 0xfffe0000 || disp_ <= 0x0001ffff) { /* can use near B */ \ emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \
emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \
} else { /* far branch if near branch isn't possible */ \
mcond_ = emith_invert_branch(mcond_); \
u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0)); \
emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \
EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \
} \
} while (0) } while (0)
#define emith_jump_cond_inrange(target) \ #define emith_jump_cond_patchable(cond, target) \
!(((u8 *)target - (u8 *)tcache_ptr + 0x20000) >> 18) emith_jump_cond(cond, target)
#define emith_jump_cond_patchable(cond, target) do { \ #define emith_jump_cond_inrange(target) \
int r_, mcond_ = emith_cond_check(cond, &r_); \ ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x00020000U || \
mcond_ = emith_invert_branch(mcond_); \ (u8 *)target - (u8 *)tcache_ptr - 4 >= 0xfffe0010U) // mind cond_check
u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0));\
emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \
EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \
} while (0)
// NB: returns position of patch for cache maintenance // NB: returns position of patch for cache maintenance
#define emith_jump_patch(ptr, target, pos) do { \ #define emith_jump_patch(ptr, target, pos) do { \
u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \
while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ u32 disp_, mask_; \
EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ while (!emith_is_j(*ptr_) && !emith_is_b(*ptr_)) ptr_ ++; \
if (emith_is_b(*ptr_)) \
mask_ = 0xffff0000, disp_ = (u8 *)target - (u8 *)ptr_ - 4; \
else mask_ = 0xfc000000, disp_ = (uintptr_t)target; \
EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \
if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \
} while (0) } while (0)
#define emith_jump_patch_inrange(ptr, target) \
((u8 *)target - (u8 *)ptr - 4 < 0x00020000U || \
(u8 *)target - (u8 *)ptr - 4 >= 0xfffe0010U) // mind cond_check
#define emith_jump_patch_size() 4 #define emith_jump_patch_size() 4
#define emith_jump_at(ptr, target) do { \
u32 *ptr_ = (u32 *)ptr; \
EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \
EMIT_PTR(ptr_, MIPS_NOP); \
} while (0)
#define emith_jump_at_size() 8
#define emith_jump_reg(r) \ #define emith_jump_reg(r) \
emith_branch(MIPS_JR(r)) emith_branch(MIPS_JR(r))
#define emith_jump_reg_c(cond, r) \ #define emith_jump_reg_c(cond, r) \
@ -1232,8 +1242,8 @@ static int emith_cond_check(int cond, int *r)
#define emith_ret_to_ctx(offs) \ #define emith_ret_to_ctx(offs) \
emith_ctx_write_ptr(LR, offs) emith_ctx_write_ptr(LR, offs)
#define emith_add_r_ret_imm(r, imm) \ #define emith_add_r_ret(r) \
emith_add_r_r_ptr_imm(r, LR, imm) emith_add_r_r_ptr(r, LR)
// NB: ABI SP alignment is 8 for compatibility with MIPS IV // NB: ABI SP alignment is 8 for compatibility with MIPS IV
#define emith_push_ret(r) do { \ #define emith_push_ret(r) do { \

View file

@ -371,8 +371,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
} \ } \
} while (0) } while (0)
#define emith_move_r_imm_s8(r, imm) \ #define emith_move_r_imm_s8_patchable(r, imm) do { \
emith_move_r_imm(r, (u32)(signed int)(signed char)(imm)) EMIT_REX_IF(0, 0, r); \
EMIT_OP(0xb8 + ((r)&7)); \
EMIT((s8)(imm), u32); \
} while (0)
#define emith_move_r_imm_s8_patch(ptr, imm) do { \
u8 *ptr_ = ptr; \
while ((*ptr_ & 0xf8) != 0xb8) ptr_++; \
EMIT_PTR(ptr_ + 1, (s8)(imm), u32); \
} while (0)
#define emith_arith_r_imm(op, r, imm) do { \ #define emith_arith_r_imm(op, r, imm) do { \
EMIT_REX_IF(0, 0, r); \ EMIT_REX_IF(0, 0, r); \
@ -851,7 +859,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#define emith_jump_patchable(target) \ #define emith_jump_patchable(target) \
emith_jump(target) emith_jump(target)
#define emith_jump_patchable_size() 5 /* JMP rel32 */
#define emith_jump_cond(cond, ptr) do { \ #define emith_jump_cond(cond, ptr) do { \
u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \
@ -867,15 +874,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \
u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \
EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \
if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr + offs_; \
} while (0) } while (0)
#define emith_jump_patch_size() 6 #define emith_jump_patch_size() 4
#define emith_jump_patch_inrange(ptr, target) !0
#define emith_jump_at(ptr, target) do { \ #define emith_jump_at(ptr, target) do { \
u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \
EMIT_PTR(ptr, 0xe9, u8); \ EMIT_PTR(ptr, 0xe9, u8); \
EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \
} while (0) } while (0)
#define emith_jump_at_size() 5
#define emith_call(ptr) do { \ #define emith_call(ptr) do { \
u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \
@ -900,9 +909,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#define emith_ret() \ #define emith_ret() \
EMIT_OP(0xc3) EMIT_OP(0xc3)
#define emith_add_r_ret_imm(r, imm) do { \ #define emith_add_r_ret(r) do { \
emith_read_r_r_offs_ptr(r, xSP, 0); \ EMIT_REX_IF(1, r, xSP); \
emith_add_r_r_ptr_imm(r, r, imm); \ emith_deref_modrm(0x03, 0, r, xSP); /* add r, [xsp] */ \
} while (0) } while (0)
#define emith_jump_reg(r) \ #define emith_jump_reg(r) \
@ -974,7 +983,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
emith_move_r_imm(rd, imm); \ emith_move_r_imm(rd, imm); \
} while (0) } while (0)
#define host_instructions_updated(base, end) #define host_instructions_updated(base, end) (void)(base),(void)(end)
#define emith_update_cache() /**/ #define emith_update_cache() /**/
#define emith_rw_offs_max() 0xffffffff #define emith_rw_offs_max() 0xffffffff

View file

@ -69,7 +69,7 @@
// 800 - state dump on exit // 800 - state dump on exit
// { // {
#ifndef DRC_DEBUG #ifndef DRC_DEBUG
#define DRC_DEBUG 0//x8e7 #define DRC_DEBUG 0//x8c7
#endif #endif
#if DRC_DEBUG #if DRC_DEBUG
@ -288,15 +288,19 @@ static u8 *tcache_ptr;
#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6) #define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6)
enum { BL_JMP=1, BL_LDJMP, BL_JCCBLX };
struct block_link { struct block_link {
short tcache_id;
short type; // BL_JMP et al
u32 target_pc; u32 target_pc;
void *jump; // insn address void *jump; // insn address
void *blx; // block link/exit area if any
u8 jdisp[8]; // jump backup buffer
struct block_link *next; // either in block_entry->links or unresolved struct block_link *next; // either in block_entry->links or unresolved
struct block_link *o_next; // ...in block_entry->o_links struct block_link *o_next; // ...in block_entry->o_links
struct block_link *prev; struct block_link *prev;
struct block_link *o_prev; struct block_link *o_prev;
struct block_entry *target;// target block this is linked in (be->links) struct block_entry *target;// target block this is linked in (be->links)
int tcache_id;
}; };
struct block_entry { struct block_entry {
@ -686,18 +690,24 @@ static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask)
return poffs; return poffs;
} }
static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) static int dr_get_tcache_id(u32 pc, int is_slave)
{ {
struct block_entry *be;
u32 tcid = 0; u32 tcid = 0;
if ((pc & 0xe0000000) == 0xc0000000) if ((pc & 0xe0000000) == 0xc0000000)
tcid = 1 + is_slave; // data array tcid = 1 + is_slave; // data array
if ((pc & ~0xfff) == 0) if ((pc & ~0xfff) == 0)
tcid = 1 + is_slave; // BIOS tcid = 1 + is_slave; // BIOS
*tcache_id = tcid; return tcid;
}
be = HASH_FUNC(hash_tables[tcid], pc, HASH_TABLE_SIZE(tcid) - 1); static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id)
{
struct block_entry *be;
*tcache_id = dr_get_tcache_id(pc, is_slave);
be = HASH_FUNC(hash_tables[*tcache_id], pc, HASH_TABLE_SIZE(*tcache_id) - 1);
if (be != NULL) // don't ask... gcc code generation hint if (be != NULL) // don't ask... gcc code generation hint
for (; be != NULL; be = be->next) for (; be != NULL; be = be->next)
if (be->pc == pc) if (be->pc == pc)
@ -1101,17 +1111,11 @@ static struct block_desc *dr_add_block(u32 addr, int size,
bd->size_lit = size_lit; bd->size_lit = size_lit;
bd->tcache_ptr = tcache_ptr; bd->tcache_ptr = tcache_ptr;
bd->crc = crc; bd->crc = crc;
bd->active = 1; bd->active = 0;
bd->entry_count = 0;
bd->entry_count = 1;
bd->entryp[0].pc = addr;
bd->entryp[0].tcache_ptr = tcache_ptr;
bd->entryp[0].links = bd->entryp[0].o_links = NULL;
#if (DRC_DEBUG & 2) #if (DRC_DEBUG & 2)
bd->entryp[0].block = bd;
bd->refcount = 0; bd->refcount = 0;
#endif #endif
add_to_hashlist(&bd->entryp[0], tcache_id);
*blk_id = *bcount; *blk_id = *bcount;
(*bcount)++; (*bcount)++;
@ -1150,11 +1154,33 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi
bl->jump, bl->target_pc, be->tcache_ptr); bl->jump, bl->target_pc, be->tcache_ptr);
if (emit_jump) { if (emit_jump) {
u8 *jump; u8 *jump = bl->jump;
emith_jump_patch(bl->jump, be->tcache_ptr, &jump); int jsz = emith_jump_patch_size();
if (bl->type == BL_JMP) { // patch: jump @entry
// inlined: @jump far jump to target
emith_jump_patch(jump, be->tcache_ptr, &jump);
} else if (bl->type == BL_LDJMP) { // write: jump @entry
// inlined: @jump far jump to target
emith_jump_at(jump, be->tcache_ptr);
jsz = emith_jump_at_size();
} else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry
if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) {
// inlined: @jump near jumpcc to target
emith_jump_patch(jump, be->tcache_ptr, &jump);
} else { // dispatcher cond immediate
// via blx: @jump near jumpcc to blx; @blx far jump
emith_jump_patch(jump, bl->blx, &jump);
emith_jump_at(bl->blx, be->tcache_ptr);
if ((((uintptr_t)bl->blx & 0xf) + emith_jump_at_size()-1) > 0xf)
host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1);
}
} else {
printf("unknown BL type %d\n", bl->type);
exit(1);
}
// only needs sync if patch is possibly crossing cacheline (assume 16 byte) // only needs sync if patch is possibly crossing cacheline (assume 16 byte)
if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) if ((((uintptr_t)jump & 0xf) + jsz-1) > 0xf)
host_instructions_updated(jump, jump+emith_jump_patch_size()); host_instructions_updated(jump, jump + jsz-1);
} }
// move bl to block_entry // move bl to block_entry
@ -1172,10 +1198,26 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump)
if (bl->target) { if (bl->target) {
if (emit_jump) { if (emit_jump) {
u8 *jump; u8 *jump = bl->jump;
emith_jump_patch(bl->jump, sh2_drc_dispatcher, &jump); int jsz = emith_jump_patch_size();
if (bl->type == BL_JMP) { // jump_patch @dispatcher
// inlined: @jump far jump to dispatcher
emith_jump_patch(jump, sh2_drc_dispatcher, &jump);
} else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher
// inlined: @jump load target_pc, far jump to dispatcher
memcpy(jump, bl->jdisp, emith_jump_at_size());
jsz = emith_jump_at_size();
} else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump
// via blx: @jump near jumpcc to blx; @blx load target_pc, far jump
emith_jump_patch(bl->jump, bl->blx, &jump);
memcpy(bl->blx, bl->jdisp, emith_jump_at_size());
host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1);
} else {
printf("unknown BL type %d\n", bl->type);
exit(1);
}
// update cpu caches since the previous jump target doesn't exist anymore // update cpu caches since the previous jump target doesn't exist anymore
host_instructions_updated(jump, jump+emith_jump_patch_size()); host_instructions_updated(jump, jump + jsz-1);
} }
if (bl->prev) if (bl->prev)
@ -1189,18 +1231,17 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump)
} }
#endif #endif
static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id)
{ {
#if LINK_BRANCHES #if LINK_BRANCHES
struct block_link *bl = block_link_pool[tcache_id]; struct block_link *bl = block_link_pool[tcache_id];
int cnt = block_link_pool_counts[tcache_id]; int cnt = block_link_pool_counts[tcache_id];
struct block_entry *be = NULL;
int target_tcache_id; int target_tcache_id;
// get the target block entry // get the target block entry
be = dr_get_entry(pc, is_slave, &target_tcache_id); target_tcache_id = dr_get_tcache_id(pc, is_slave);
if (target_tcache_id && target_tcache_id != tcache_id) if (target_tcache_id && target_tcache_id != tcache_id)
return sh2_drc_dispatcher; return NULL;
// get a block link // get a block link
if (blink_free[tcache_id] != NULL) { if (blink_free[tcache_id] != NULL) {
@ -1208,29 +1249,24 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla
blink_free[tcache_id] = bl->next; blink_free[tcache_id] = bl->next;
} else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) {
dbg(1, "bl overflow for tcache %d", tcache_id); dbg(1, "bl overflow for tcache %d", tcache_id);
return sh2_drc_dispatcher; return NULL;
} else { } else {
bl += cnt; bl += cnt;
block_link_pool_counts[tcache_id] = cnt+1; block_link_pool_counts[tcache_id] = cnt+1;
} }
// prepare link and add to ougoing list of owner // prepare link and add to outgoing list of owner
bl->tcache_id = tcache_id; bl->tcache_id = tcache_id;
bl->target_pc = pc; bl->target_pc = pc;
bl->jump = tcache_ptr; bl->jump = tcache_ptr;
bl->blx = NULL;
bl->o_next = owner->o_links; bl->o_next = owner->o_links;
owner->o_links = bl; owner->o_links = bl;
if (be != NULL) { add_to_hashlist_unresolved(bl, tcache_id);
dr_block_link(be, bl, 0); // jump not yet emitted by translate() return bl;
return be->tcache_ptr;
}
else {
add_to_hashlist_unresolved(bl, tcache_id);
return sh2_drc_dispatcher;
}
#else #else
return sh2_drc_dispatcher; return NULL;
#endif #endif
} }
@ -1272,6 +1308,27 @@ static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave
#endif #endif
} }
static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave)
{
int i;
// connect branches
for (i = 0; i < bd->entry_count; i++) {
struct block_entry *entry = &bd->entryp[i];
add_to_hashlist(entry, tcache_id);
// incoming branches
dr_link_blocks(entry, tcache_id);
if (!tcache_id)
dr_link_blocks(entry, is_slave?2:1);
// outgoing branches
dr_link_outgoing(entry, tcache_id, is_slave);
}
// mark memory for overwrite detection
dr_mark_memory(1, bd, tcache_id, 0);
bd->active = 1;
}
#define ADD_TO_ARRAY(array, count, item, failcode) { \ #define ADD_TO_ARRAY(array, count, item, failcode) { \
if (count >= ARRAY_SIZE(array)) { \ if (count >= ARRAY_SIZE(array)) { \
dbg(1, "warning: " #array " overflow"); \ dbg(1, "warning: " #array " overflow"); \
@ -2422,6 +2479,7 @@ static void rcache_invalidate(void)
{ {
int i; int i;
gconst_invalidate(); gconst_invalidate();
rcache_unlock_all();
for (i = 0; i < ARRAY_SIZE(cache_regs); i++) for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
rcache_free_vreg(i); rcache_free_vreg(i);
@ -2446,7 +2504,6 @@ static void rcache_invalidate(void)
static void rcache_flush(void) static void rcache_flush(void)
{ {
rcache_unlock_all();
rcache_clean(); rcache_clean();
rcache_invalidate(); rcache_invalidate();
} }
@ -2916,13 +2973,22 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2);
static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
{ {
// branch targets in current block
u32 branch_target_pc[MAX_LOCAL_BRANCHES]; u32 branch_target_pc[MAX_LOCAL_BRANCHES];
void *branch_target_ptr[MAX_LOCAL_BRANCHES]; void *branch_target_ptr[MAX_LOCAL_BRANCHES];
int branch_target_count = 0; int branch_target_count = 0;
void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; // unresolved local forward branches, for fixup at block end
u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; u32 branch_patch_pc[MAX_LOCAL_BRANCHES];
void *branch_patch_ptr[MAX_LOCAL_BRANCHES];
int branch_patch_count = 0; int branch_patch_count = 0;
// external branch targets with a block link/exit area
u32 blx_target_pc[MAX_LOCAL_BRANCHES];
void *blx_target_ptr[MAX_LOCAL_BRANCHES];
struct block_link *blx_target_bl[MAX_LOCAL_BRANCHES];
int blx_target_count = 0;
u8 op_flags[BLOCK_INSN_LIMIT]; u8 op_flags[BLOCK_INSN_LIMIT];
struct drcf { struct drcf {
int delay_reg:8; int delay_reg:8;
u32 loop_type:8; u32 loop_type:8;
@ -2931,9 +2997,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
u32 pending_branch_direct:1; u32 pending_branch_direct:1;
u32 pending_branch_indirect:1; u32 pending_branch_indirect:1;
} drcf = { 0, }; } drcf = { 0, };
#if LOOP_OPTIMIZER #if LOOP_OPTIMIZER
void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; // loops with pinned registers for optimzation
// pinned regs are like statics and don't need saving/restoring inside a loop
u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16];
void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16];
u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16];
int pinned_loop_count = 0; int pinned_loop_count = 0;
#endif #endif
@ -2976,24 +3045,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
base_literals, end_literals - base_literals); base_literals, end_literals - base_literals);
if (block) { if (block) {
// connect branches
dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm',
base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr);
for (i = 0; i < block->entry_count; i++) { dr_activate_block(block, tcache_id, sh2->is_slave);
entry = &block->entryp[i];
add_to_hashlist(entry, tcache_id);
#if LINK_BRANCHES
// incoming branches
dr_link_blocks(entry, tcache_id);
if (!tcache_id)
dr_link_blocks(entry, sh2->is_slave?2:1);
// outgoing branches
dr_link_outgoing(entry, tcache_id, sh2->is_slave);
#endif
}
// mark memory for overwrite detection
dr_mark_memory(1, block, tcache_id, 0);
block->active = 1;
emith_update_cache(); emith_update_cache();
return block->entryp[0].tcache_ptr; return block->entryp[0].tcache_ptr;
} }
@ -3069,7 +3123,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
if (op_flags[v] & OF_BASIC_LOOP) { if (op_flags[v] & OF_BASIC_LOOP) {
m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM); m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM);
if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) && if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) &&
pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)) { pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) {
pinned_loop_mask[pinned_loop_count] = m3; pinned_loop_mask[pinned_loop_count] = m3;
pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v;
} else } else
@ -3080,6 +3134,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
} }
#endif #endif
} }
pinned_loop_pc[pinned_loop_count] = -1;
if (branch_target_count > 0) { if (branch_target_count > 0) {
memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count);
@ -3101,7 +3156,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
// clear stale state after compile errors // clear stale state after compile errors
rcache_unlock_all();
rcache_invalidate(); rcache_invalidate();
emith_invalidate_t(); emith_invalidate_t();
drcf = (struct drcf) { 0 }; drcf = (struct drcf) { 0 };
@ -3146,39 +3200,31 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_sync_t(sr); emith_sync_t(sr);
rcache_flush(); rcache_flush();
emith_flush(); emith_flush();
// make block entry
v = block->entry_count;
entry = &block->entryp[v];
if (v < ARRAY_SIZE(block->entryp))
{
entry = &block->entryp[v];
entry->pc = pc;
entry->tcache_ptr = tcache_ptr;
entry->links = entry->o_links = NULL;
#if (DRC_DEBUG & 2)
entry->block = block;
#endif
add_to_hashlist(entry, tcache_id);
block->entry_count++;
dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p",
sh2->is_slave ? 's' : 'm', tcache_id, blkid_main,
pc, tcache_ptr);
}
else {
dbg(1, "too many entryp for block #%d,%d pc=%08x",
tcache_id, blkid_main, pc);
break;
}
} else {
entry = block->entryp;
} }
// since we made a block entry, link any other blocks that jump to it // make block entry
dr_link_blocks(entry, tcache_id); v = block->entry_count;
if (!tcache_id) // can safely link from cpu-local to global memory entry = &block->entryp[v];
dr_link_blocks(entry, sh2->is_slave?2:1); if (v < ARRAY_SIZE(block->entryp))
{
entry = &block->entryp[v];
entry->pc = pc;
entry->tcache_ptr = tcache_ptr;
entry->links = entry->o_links = NULL;
#if (DRC_DEBUG & 2)
entry->block = block;
#endif
block->entry_count++;
dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p",
sh2->is_slave ? 's' : 'm', tcache_id, blkid_main,
pc, tcache_ptr);
}
else {
dbg(1, "too many entryp for block #%d,%d pc=%08x",
tcache_id, blkid_main, pc);
break;
}
v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); v = find_in_sorted_array(branch_target_pc, branch_target_count, pc);
if (v >= 0) if (v >= 0)
@ -3220,29 +3266,35 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
#endif #endif
// check cycles // check cycles
tmp = rcache_get_tmp_arg(0);
sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
emith_cmp_r_imm(sr, 0); emith_cmp_r_imm(sr, 0);
#if LOOP_OPTIMIZER #if LOOP_OPTIMIZER
// on drc exit pinned registers must be saved u8 *jp = NULL;
if (op_flags[i] & OF_BASIC_LOOP) { if (op_flags[i] & OF_BASIC_LOOP) {
EMITH_JMP_START(DCOND_GT); // if exiting a pinned loop pinned regs must be written back to ctx
// since they are reloaded in the loop entry code
jp = tcache_ptr;
emith_jump_cond_patchable(DCOND_GT, jp); // XXX need API for JMP_POS
rcache_save_pinned(); rcache_save_pinned();
emith_move_r_imm(tmp, pc);
emith_jump(sh2_drc_exit);
EMITH_JMP_END(DCOND_GT);
} else
#endif
if (emith_jump_cond_inrange(sh2_drc_exit)) {
emith_move_r_imm_c(DCOND_LE, tmp, pc);
emith_jump_cond(DCOND_LE, sh2_drc_exit);
} else {
EMITH_JMP_START(DCOND_GT);
emith_move_r_imm(tmp, pc);
emith_jump(sh2_drc_exit);
EMITH_JMP_END(DCOND_GT);
} }
rcache_free_tmp(tmp); #endif
if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
// exit via stub in blx table (saves some 1-3 insns in the main flow)
blx_target_pc[blx_target_count] = pc|1;
blx_target_bl[blx_target_count] = NULL;
blx_target_ptr[blx_target_count++] = tcache_ptr;
} else {
// blx table full, must inline exit code
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm_c(DCOND_LE, tmp, pc);
rcache_free_tmp(tmp);
}
emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
#if LOOP_OPTIMIZER
if (op_flags[i] & OF_BASIC_LOOP)
emith_jump_patch(jp, tcache_ptr, NULL);
#endif
#if (DRC_DEBUG & 32) #if (DRC_DEBUG & 32)
// block hit counter // block hit counter
@ -3880,7 +3932,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 2: // SHAL Rn 0100nnnn00100000 case 2: // SHAL Rn 0100nnnn00100000
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr); emith_invalidate_t();
emith_tpop_carry(sr, 0); // dummy emith_tpop_carry(sr, 0); // dummy
emith_lslf(tmp, tmp2, 1); emith_lslf(tmp, tmp2, 1);
emith_tpush_carry(sr, 0); emith_tpush_carry(sr, 0);
@ -3909,7 +3961,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 2: // SHAR Rn 0100nnnn00100001 case 2: // SHAR Rn 0100nnnn00100001
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr); emith_invalidate_t();
emith_tpop_carry(sr, 0); // dummy emith_tpop_carry(sr, 0); // dummy
if (op & 0x20) { if (op & 0x20) {
emith_asrf(tmp, tmp2, 1); emith_asrf(tmp, tmp2, 1);
@ -3967,7 +4019,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 0x05: // ROTR Rn 0100nnnn00000101 case 0x05: // ROTR Rn 0100nnnn00000101
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr); emith_invalidate_t();
emith_tpop_carry(sr, 0); // dummy emith_tpop_carry(sr, 0); // dummy
if (op & 1) { if (op & 1) {
emith_rorf(tmp, tmp2, 1); emith_rorf(tmp, tmp2, 1);
@ -4351,11 +4403,12 @@ end_op:
int cond = -1; int cond = -1;
int ctaken = 0; int ctaken = 0;
void *target = NULL; void *target = NULL;
int patchable = 0; struct block_link *bl = NULL;
if (OP_ISBRACND(opd_b->op)) if (OP_ISBRACND(opd_b->op))
ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2;
cycles += ctaken; // assume branch taken cycles += ctaken; // assume branch taken
#if LOOP_OPTIMIZER #if LOOP_OPTIMIZER
if ((drcf.loop_type == OF_IDLE_LOOP || if ((drcf.loop_type == OF_IDLE_LOOP ||
(drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0)))
@ -4365,14 +4418,35 @@ end_op:
emith_sh2_delay_loop(cycles, drcf.delay_reg); emith_sh2_delay_loop(cycles, drcf.delay_reg);
drcf.polling = drcf.loop_type = 0; drcf.polling = drcf.loop_type = 0;
} }
if (target_pc < pc && pinned_loop_pc[pinned_loop_count] == target_pc) {
// backward jump at end of optimized loop
rcache_unpin_all();
target = pinned_loop_ptr[pinned_loop_count];
pinned_loop_count ++;
}
#endif #endif
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
FLUSH_CYCLES(sr); FLUSH_CYCLES(sr);
rcache_unlock_all();
rcache_clean(); rcache_clean();
// emit condition test for conditional branch #if CALL_STACK
void *rtsadd = NULL, *rtsret = NULL;
if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
// BSR - save rts data
tmp = rcache_get_tmp_arg(1);
rtsadd = tcache_ptr;
emith_move_r_imm_s8_patchable(tmp, 0);
rcache_invalidate_tmp();
emith_call(sh2_drc_dispatcher_call);
rtsret = tcache_ptr;
}
#endif
if (OP_ISBRACND(opd_b->op)) { if (OP_ISBRACND(opd_b->op)) {
// BT[S], BF[S] - emit condition test
cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE;
if (delay_dep_fw & BITMASK1(SHR_T)) { if (delay_dep_fw & BITMASK1(SHR_T)) {
emith_sync_t(sr); emith_sync_t(sr);
@ -4396,61 +4470,118 @@ end_op:
{ {
// local branch // local branch
if (branch_target_ptr[v]) { if (branch_target_ptr[v]) {
// jumps back can be linked here since host PC is already known // local backward jump, link here now since host PC is already known
target = branch_target_ptr[v]; target = branch_target_ptr[v];
if (cond != -1)
emith_jump_cond(cond, target);
else {
emith_jump(target);
rcache_invalidate();
}
} else if (branch_patch_count < MAX_LOCAL_BRANCHES) { } else if (branch_patch_count < MAX_LOCAL_BRANCHES) {
// local forward jump
target = tcache_ptr; target = tcache_ptr;
branch_patch_pc[branch_patch_count] = target_pc; branch_patch_pc[branch_patch_count] = target_pc;
branch_patch_ptr[branch_patch_count] = target; branch_patch_ptr[branch_patch_count] = target;
branch_patch_count++; branch_patch_count++;
patchable = 1; if (cond != -1)
emith_jump_cond_patchable(cond, target);
else {
emith_jump_patchable(target);
rcache_invalidate();
}
} else } else
dbg(1, "warning: too many local branches"); dbg(1, "warning: too many local branches");
} }
#endif #endif
rcache_unlock_all();
#if LOOP_OPTIMIZER
if (target && pinned_loop_pc[pinned_loop_count] == target_pc) {
rcache_unpin_all();
target = pinned_loop_ptr[pinned_loop_count];
pinned_loop_count ++;
}
#endif
if (target == NULL) if (target == NULL)
{ {
// can't resolve branch locally, make a block exit // can't resolve branch locally, make a block exit
rcache_clean(); bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
tmp = rcache_get_tmp_arg(0); if (cond != -1) {
emith_move_r_imm(tmp, target_pc); #if 1
rcache_free_tmp(tmp); if (bl) {
if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
// conditional jumps get a blx stub for the far jump
blx_target_pc[blx_target_count] = target_pc;
blx_target_bl[blx_target_count] = bl;
blx_target_ptr[blx_target_count++] = tcache_ptr;
bl->type = BL_JCCBLX;
target = tcache_ptr;
} else {
// blx table full, patch jump only
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, target_pc);
rcache_free_tmp(tmp);
bl->jump = tcache_ptr;
bl->type = BL_JMP;
target = sh2_drc_dispatcher;
}
emith_jump_cond_patchable(cond, target);
} else {
// cannot link, inline jump @dispatcher
EMITH_JMP_START(emith_invert_cond(cond));
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, target_pc);
rcache_free_tmp(tmp);
target = sh2_drc_dispatcher;
#if CALL_STACK emith_jump(target);
if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { EMITH_JMP_END(emith_invert_cond(cond));
// BSR }
emith_call(sh2_drc_dispatcher_call); #elif 1
} // jump @dispatcher - ARM 32bit version with conditional execution
#endif EMITH_SJMP_START(emith_invert_cond(cond));
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm_c(cond, tmp, target_pc);
rcache_free_tmp(tmp);
target = sh2_drc_dispatcher;
target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); if (bl) {
patchable = 1; bl->jump = tcache_ptr;
} bl->type = BL_JMP;
}
// create branch
if (cond != -1) {
if (patchable)
emith_jump_cond_patchable(cond, target); emith_jump_cond_patchable(cond, target);
else EMITH_SJMP_END(emith_invert_cond(cond));
emith_jump_cond(cond, target); #else
} else { // jump @dispatcher - generic version (jump !cond @over, jump @trgt)
rcache_invalidate(); EMITH_JMP_START(emith_invert_cond(cond));
if (patchable) if (bl) {
bl->jump = tcache_ptr;
bl->type = BL_LDJMP;
}
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, target_pc);
rcache_free_tmp(tmp);
target = sh2_drc_dispatcher;
emith_jump_patchable(target); emith_jump_patchable(target);
else EMITH_JMP_END(emith_invert_cond(cond));
emith_jump(target); #endif
} else {
// unconditional, has the far jump inlined
if (bl)
bl->type = BL_LDJMP;
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, target_pc);
rcache_free_tmp(tmp);
target = sh2_drc_dispatcher;
emith_jump_patchable(target);
rcache_invalidate();
}
} }
emith_flush();
if (bl)
memcpy(bl->jdisp, bl->jump, emith_jump_at_size());
#if CALL_STACK
if (rtsadd)
emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
#endif
// branch not taken, correct cycle count // branch not taken, correct cycle count
if (ctaken) if (ctaken)
emith_add_r_imm(sr, ctaken << 12); emith_add_r_imm(sr, ctaken << 12);
@ -4463,35 +4594,57 @@ end_op:
drcf.polling = drcf.loop_type = 0; drcf.polling = drcf.loop_type = 0;
} }
else if (drcf.pending_branch_indirect) { else if (drcf.pending_branch_indirect) {
void *target;
u32 target_pc; u32 target_pc;
struct block_link *bl = NULL;
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
FLUSH_CYCLES(sr); FLUSH_CYCLES(sr);
emith_sync_t(sr); emith_sync_t(sr);
rcache_clean(); rcache_clean();
tmp = rcache_get_reg_arg(0, SHR_PC, NULL); tmp = rcache_get_reg_arg(0, SHR_PC, NULL);
rcache_invalidate();
#if CALL_STACK #if CALL_STACK
struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd;
if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { void *rtsadd = NULL, *rtsret = NULL;
// JSR/BSRF
emith_call(sh2_drc_dispatcher_call);
}
if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
// JSR, BSRF - save rts data
tmp = rcache_get_tmp_arg(1);
rtsadd = tcache_ptr;
emith_move_r_imm_s8_patchable(tmp, 0);
rcache_invalidate_tmp();
emith_call(sh2_drc_dispatcher_call);
rtsret = tcache_ptr;
}
#endif
#if CALL_STACK
if (opd_b->rm == SHR_PR) { if (opd_b->rm == SHR_PR) {
// RTS // RTS - restore rts data, else jump to dispatcher
emith_jump(sh2_drc_dispatcher_return); emith_jump(sh2_drc_dispatcher_return);
} else } else
#endif #endif
if (gconst_get(SHR_PC, &target_pc)) { if (gconst_get(SHR_PC, &target_pc)) {
// JMP const, treat like unconditional direct branch // JMP, JSR, BRAF, BSRF const - treat like unconditional direct branch
target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
emith_jump_patchable(target); if (bl) { // pc already loaded somewhere else, can patch jump only
bl->type = BL_JMP;
bl->jump = tcache_ptr;
}
emith_jump_patchable(sh2_drc_dispatcher);
} else { } else {
// JMP // JMP, JSR, BRAF, BSRF not const
emith_jump(sh2_drc_dispatcher); emith_jump(sh2_drc_dispatcher);
} }
rcache_invalidate();
emith_flush();
#if CALL_STACK
if (rtsadd)
emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
#endif
drcf.pending_branch_indirect = 0; drcf.pending_branch_indirect = 0;
drcf.polling = drcf.loop_type = 0; drcf.polling = drcf.loop_type = 0;
} }
@ -4508,24 +4661,48 @@ end_op:
if (! OP_ISBRAUC(opd->op)) if (! OP_ISBRAUC(opd->op))
{ {
void *target; struct block_link *bl;
tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
FLUSH_CYCLES(tmp); FLUSH_CYCLES(tmp);
emith_sync_t(tmp); emith_sync_t(tmp);
rcache_clean(); rcache_clean();
bl = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id);
if (bl)
bl->type = BL_LDJMP;
tmp = rcache_get_tmp_arg(0); tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, pc); emith_move_r_imm(tmp, pc);
emith_jump_patchable(sh2_drc_dispatcher);
target = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id);
if (target == NULL)
return NULL;
rcache_invalidate(); rcache_invalidate();
emith_jump_patchable(target); emith_flush();
if (bl)
memcpy(bl->jdisp, bl->jump, emith_jump_at_size());
} else } else
rcache_flush(); rcache_flush();
// emit blx area
for (i = 0; i < blx_target_count; i++) {
void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher);
struct block_link *bl = blx_target_bl[i];
emith_pool_check();
if (bl)
bl->blx = tcache_ptr;
emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL);
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, blx_target_pc[i] & ~1);
emith_jump(target);
rcache_invalidate();
emith_flush();
if (bl)
memcpy(bl->jdisp, bl->blx, emith_jump_at_size());
}
emith_flush(); emith_flush();
do_host_disasm(tcache_id);
emith_pool_commit(0);
// link local branches // link local branches
for (i = 0; i < branch_patch_count; i++) { for (i = 0; i < branch_patch_count; i++) {
@ -4539,20 +4716,18 @@ end_op:
target = tcache_ptr; target = tcache_ptr;
tmp = rcache_get_tmp_arg(0); tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, branch_patch_pc[i]); emith_move_r_imm(tmp, branch_patch_pc[i]);
rcache_flush();
emith_jump(sh2_drc_dispatcher); emith_jump(sh2_drc_dispatcher);
rcache_flush();
} }
emith_jump_patch(branch_patch_ptr[i], target, NULL); emith_jump_patch(branch_patch_ptr[i], target, NULL);
} }
emith_pool_commit(0);
dr_mark_memory(1, block, tcache_id, 0);
tcache_ptrs[tcache_id] = tcache_ptr; tcache_ptrs[tcache_id] = tcache_ptr;
host_instructions_updated(block_entry_ptr, tcache_ptr); host_instructions_updated(block_entry_ptr, tcache_ptr);
dr_activate_block(block, tcache_id, sh2->is_slave);
emith_update_cache();
do_host_disasm(tcache_id); do_host_disasm(tcache_id);
dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f", dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f",
@ -4574,7 +4749,6 @@ end_op:
fflush(stdout); fflush(stdout);
#endif #endif
emith_update_cache();
return block_entry_ptr; return block_entry_ptr;
} }
@ -4769,14 +4943,14 @@ static void sh2_generate_utils(void)
// pc = sh2_drc_dispatcher_call(u32 pc) // pc = sh2_drc_dispatcher_call(u32 pc)
sh2_drc_dispatcher_call = (void *)tcache_ptr; sh2_drc_dispatcher_call = (void *)tcache_ptr;
emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
emith_ctx_read(arg1, SHR_PR * 4);
emith_add_r_imm(arg2, 2*sizeof(void *)); emith_add_r_imm(arg2, 2*sizeof(void *));
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0);
emith_write_r_r_offs(arg1, arg2, offsetof(SH2, rts_cache)); emith_ctx_read(arg3, SHR_PR * 4);
emith_add_r_ret_imm(arg1, emith_jump_patchable_size()); // skip jump_patchable for rts host address emith_add_r_ret(arg1);
emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache) + sizeof(void *)); emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache)+sizeof(void *));
emith_write_r_r_offs(arg3, arg2, offsetof(SH2, rts_cache));
emith_ret(); emith_ret();
emith_flush(); emith_flush();
@ -5378,10 +5552,8 @@ void sh2_drc_finish(SH2 *sh2)
if (block_tables[0] == NULL) if (block_tables[0] == NULL)
return; return;
sh2_drc_flush_all();
for (i = 0; i < TCACHE_BUFFERS; i++) {
#if (DRC_DEBUG & 4) #if (DRC_DEBUG & 4)
for (i = 0; i < TCACHE_BUFFERS; i++) {
printf("~~~ tcache %d\n", i); printf("~~~ tcache %d\n", i);
#if 0 #if 0
tcache_dsm_ptrs[i] = tcache_bases[i]; tcache_dsm_ptrs[i] = tcache_bases[i];
@ -5394,8 +5566,12 @@ void sh2_drc_finish(SH2 *sh2)
} }
#endif #endif
printf("max links: %d\n", block_link_pool_counts[i]); printf("max links: %d\n", block_link_pool_counts[i]);
}
#endif #endif
sh2_drc_flush_all();
for (i = 0; i < TCACHE_BUFFERS; i++) {
if (block_tables[i] != NULL) if (block_tables[i] != NULL)
free(block_tables[i]); free(block_tables[i]);
block_tables[i] = NULL; block_tables[i] = NULL;