mirror of
				https://github.com/RaySollium99/picodrive.git
				synced 2025-10-26 16:29:37 -04:00 
			
		
		
		
	sh2 drc: drc exit, block linking and branch handling revised
This commit is contained in:
		
							parent
							
								
									36614252d9
								
							
						
					
					
						commit
						06bc3c0693
					
				
					 5 changed files with 462 additions and 246 deletions
				
			
		|  | @ -631,8 +631,8 @@ static void emith_pool_commit(int jumpover) | ||||||
| static inline void emith_pool_check(void) | static inline void emith_pool_check(void) | ||||||
| { | { | ||||||
| 	// check if pool must be committed
 | 	// check if pool must be committed
 | ||||||
| 	if (literal_iindex > MAX_HOST_LITERALS-4 || | 	if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && | ||||||
| 		    (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00) | 		    (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00)) | ||||||
| 		// pool full, or displacement is approaching the limit
 | 		// pool full, or displacement is approaching the limit
 | ||||||
| 		emith_pool_commit(1); | 		emith_pool_commit(1); | ||||||
| } | } | ||||||
|  | @ -889,11 +889,19 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) | ||||||
| #define emith_tst_r_imm_c(cond, r, imm) \ | #define emith_tst_r_imm_c(cond, r, imm) \ | ||||||
| 	emith_top_imm(cond, A_OP_TST, r, imm) | 	emith_top_imm(cond, A_OP_TST, r, imm) | ||||||
| 
 | 
 | ||||||
| #define emith_move_r_imm_s8(r, imm) do { \ | #define emith_move_r_imm_s8_patchable(r, imm) do { \ | ||||||
|  | 	emith_flush(); \ | ||||||
| 	if ((s8)(imm) < 0) \ | 	if ((s8)(imm) < 0) \ | ||||||
| 		EOP_MVN_IMM(r, 0, ((u8)(imm) ^ 0xff)); \ | 		EOP_MVN_IMM(r, 0, (u8)~(imm)); \ | ||||||
| 	else \ | 	else \ | ||||||
| 		EOP_MOV_IMM(r, 0, (u8)imm); \ | 		EOP_MOV_IMM(r, 0, (u8)(imm)); \ | ||||||
|  | } while (0) | ||||||
|  | #define emith_move_r_imm_s8_patch(ptr, imm) do { \ | ||||||
|  | 	u32 *ptr_ = (u32 *)ptr; u32 op_ = *ptr_ & 0xfe1ff000; \ | ||||||
|  | 	if ((s8)(imm) < 0) \ | ||||||
|  | 		EMIT_PTR(ptr_, op_ | (A_OP_MVN<<21) | (u8)~(imm));\ | ||||||
|  | 	else \ | ||||||
|  | 		EMIT_PTR(ptr_, op_ | (A_OP_MOV<<21) | (u8)(imm));\ | ||||||
| } while (0) | } while (0) | ||||||
| 
 | 
 | ||||||
| #define emith_and_r_r_imm(d, s, imm) \ | #define emith_and_r_r_imm(d, s, imm) \ | ||||||
|  | @ -1125,7 +1133,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_patchable(target) \ | #define emith_jump_patchable(target) \ | ||||||
| 	emith_jump(target) | 	emith_jump(target) | ||||||
| #define emith_jump_patchable_size() 4 |  | ||||||
| 
 | 
 | ||||||
| #define emith_jump_cond(cond, target) \ | #define emith_jump_cond(cond, target) \ | ||||||
| 	emith_xbranch(cond, target, 0) | 	emith_xbranch(cond, target, 0) | ||||||
|  | @ -1135,18 +1142,19 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) | ||||||
| 	emith_jump_cond(cond, target) | 	emith_jump_cond(cond, target) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_patch(ptr, target, pos) do { \ | #define emith_jump_patch(ptr, target, pos) do { \ | ||||||
| 	u32 *ptr_ = ptr; \ | 	u32 *ptr_ = (u32 *)ptr; \ | ||||||
| 	u32 val_ = (u32 *)(target) - ptr_ - 2; \ | 	u32 val_ = (u32 *)(target) - ptr_ - 2; \ | ||||||
| 	*ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ | 	*ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ | ||||||
| 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ | 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ | ||||||
| } while (0) | } while (0) | ||||||
|  | #define emith_jump_patch_inrange(ptr, target) !0 | ||||||
| #define emith_jump_patch_size() 4 | #define emith_jump_patch_size() 4 | ||||||
| 
 | 
 | ||||||
| #define emith_jump_at(ptr, target) do { \ | #define emith_jump_at(ptr, target) do { \ | ||||||
| 	u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ | 	u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ | ||||||
| 	emith_flush(); \ |  | ||||||
| 	EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \ | 	EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \ | ||||||
| } while (0) | } while (0) | ||||||
|  | #define emith_jump_at_size() 4 | ||||||
| 
 | 
 | ||||||
| #define emith_jump_reg_c(cond, r) \ | #define emith_jump_reg_c(cond, r) \ | ||||||
| 	EOP_C_BX(cond, r) | 	EOP_C_BX(cond, r) | ||||||
|  | @ -1187,8 +1195,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) | ||||||
| #define emith_ret_to_ctx(offs) \ | #define emith_ret_to_ctx(offs) \ | ||||||
| 	emith_ctx_write(LR, offs) | 	emith_ctx_write(LR, offs) | ||||||
| 
 | 
 | ||||||
| #define emith_add_r_ret_imm(r, imm) \ | #define emith_add_r_ret(r) \ | ||||||
| 	emith_add_r_r_ptr_imm(r, LR, imm) | 	emith_add_r_r_ptr(r, LR) | ||||||
| 
 | 
 | ||||||
| /* pushes r12 for eabi alignment */ | /* pushes r12 for eabi alignment */ | ||||||
| #define emith_push_ret(r) do { \ | #define emith_push_ret(r) do { \ | ||||||
|  |  | ||||||
|  | @ -447,6 +447,8 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; | ||||||
| #define emith_eor_r_r_r(d, s1, s2) \ | #define emith_eor_r_r_r(d, s1, s2) \ | ||||||
| 	emith_eor_r_r_r_lsl(d, s1, s2, 0) | 	emith_eor_r_r_r_lsl(d, s1, s2, 0) | ||||||
| 
 | 
 | ||||||
|  | #define emith_add_r_r_r_ptr(d, s1, s2) \ | ||||||
|  | 	emith_add_r_r_r_lsl_ptr(d, s1, s2, 0) | ||||||
| #define emith_and_r_r_r(d, s1, s2) \ | #define emith_and_r_r_r(d, s1, s2) \ | ||||||
| 	emith_and_r_r_r_lsl(d, s1, s2, 0) | 	emith_and_r_r_r_lsl(d, s1, s2, 0) | ||||||
| 
 | 
 | ||||||
|  | @ -546,6 +548,20 @@ static void emith_move_imm64(int r, int wx, int64_t imm) | ||||||
| #define emith_move_r_imm_c(cond, r, imm) \ | #define emith_move_r_imm_c(cond, r, imm) \ | ||||||
| 	emith_move_r_imm(r, imm) | 	emith_move_r_imm(r, imm) | ||||||
| 
 | 
 | ||||||
|  | #define emith_move_r_imm_s8_patchable(r, imm) do { \ | ||||||
|  | 	if ((s8)(imm) < 0) \ | ||||||
|  | 		EMIT(A64_MOVN_IMM(r, ~(s8)(imm), 0)); \ | ||||||
|  | 	else \ | ||||||
|  | 		EMIT(A64_MOVZ_IMM(r, (s8)(imm), 0)); \ | ||||||
|  | } while (0) | ||||||
|  | #define emith_move_r_imm_s8_patch(ptr, imm) do { \ | ||||||
|  | 	u32 *ptr_ = (u32 *)ptr; \ | ||||||
|  | 	int r_ = *ptr_ & 0x1f; \ | ||||||
|  | 	if ((s8)(imm) < 0) \ | ||||||
|  | 		EMIT_PTR(ptr_, A64_MOVN_IMM(r_, ~(s8)(imm), 0)); \ | ||||||
|  | 	else \ | ||||||
|  | 		EMIT_PTR(ptr_, A64_MOVZ_IMM(r_, (s8)(imm), 0)); \ | ||||||
|  | } while (0) | ||||||
| 
 | 
 | ||||||
| // arithmetic, immediate
 | // arithmetic, immediate
 | ||||||
| static void emith_arith_imm(int op, int wx, int rd, int rn, s32 imm) | static void emith_arith_imm(int op, int wx, int rd, int rn, s32 imm) | ||||||
|  | @ -995,16 +1011,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) | ||||||
| 	emith_move_r_imm(arg, imm) | 	emith_move_r_imm(arg, imm) | ||||||
| 
 | 
 | ||||||
| // branching; NB: A64 B.cond has only +/- 1MB range
 | // branching; NB: A64 B.cond has only +/- 1MB range
 | ||||||
| #define emith_bcond(ptr, patch, cond, target) do { \ |  | ||||||
| 	u32 disp_ = (u8 *)target - (u8 *)ptr; \ |  | ||||||
| 	if (disp_ >= 0xfff00000 || disp_ <= 0x000fffff) { /* can use near B.c */ \ |  | ||||||
| 		EMIT_PTR(ptr, A64_BCOND(cond, disp_ & 0x001fffff)); \ |  | ||||||
| 		if (patch) EMIT_PTR(ptr, A64_NOP); /* reserve space for far B */ \ |  | ||||||
| 	} else { /* far branch if near branch isn't possible */ \ |  | ||||||
| 		EMIT_PTR(ptr, A64_BCOND(emith_invert_cond(cond), 8)); \ |  | ||||||
| 		EMIT_PTR(ptr, A64_B((disp_ - 4) & 0x0fffffff)); \ |  | ||||||
| 	} \ |  | ||||||
| } while (0) |  | ||||||
| 
 | 
 | ||||||
| #define emith_jump(target) do {\ | #define emith_jump(target) do {\ | ||||||
| 	u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ | 	u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ | ||||||
|  | @ -1013,30 +1019,37 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_patchable(target) \ | #define emith_jump_patchable(target) \ | ||||||
| 	emith_jump(target) | 	emith_jump(target) | ||||||
| #define emith_jump_patchable_size() 4 |  | ||||||
| 
 | 
 | ||||||
| #define emith_jump_cond(cond, target) \ | #define emith_jump_cond(cond, target) do { \ | ||||||
| 	emith_bcond(tcache_ptr, 0, cond, target) | 	u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ | ||||||
|  | 	EMIT(A64_BCOND(cond, disp_ & 0x001fffff)); \ | ||||||
|  | } while (0) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_cond_patchable(cond, target) \ | #define emith_jump_cond_patchable(cond, target) \ | ||||||
| 	emith_bcond(tcache_ptr, 1, cond, target) | 	emith_jump_cond(cond, target) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_cond_inrange(target) \ | #define emith_jump_cond_inrange(target) \ | ||||||
| 	!(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21) | 	!(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_patch(ptr, target, pos) do { \ | #define emith_jump_patch(ptr, target, pos) do { \ | ||||||
| 	u32 *ptr_ = (u32 *)ptr; \ | 	u32 *ptr_ = (u32 *)ptr; \ | ||||||
| 	u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ | 	u32 disp_ = (u8 *)target - (u8 *)ptr, mask_; \ | ||||||
| 	int cond_ = ptr_[0] & 0xf; \ | 	if ((*ptr_ & 0xff000000) == 0x54000000) \ | ||||||
| 	if ((ptr_[0] & 0xff000000) == 0x54000000) { /* B.cond */ \ | 		mask_ = 0xff00001f, disp_ <<= 5; /* B.cond, range 21 bit */ \ | ||||||
| 		if (ptr_[1] != A64_NOP)	cond_ = emith_invert_cond(cond_); \ | 	else	mask_ = 0xfc000000;		 /* B[L], range 28 bit */ \ | ||||||
| 		emith_bcond(ptr_, 1, cond_, target); \ | 	EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \ | ||||||
| 	} else if (ptr_[0] & 0x80000000) \ | 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ | ||||||
| 		EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \ |  | ||||||
| 	else	EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \ |  | ||||||
| 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ |  | ||||||
| } while (0) | } while (0) | ||||||
| #define emith_jump_patch_size()	8 | 
 | ||||||
|  | #define emith_jump_patch_inrange(ptr, target) \ | ||||||
|  | 	!(((u8 *)target - (u8 *)ptr + 0x100000) >> 21) | ||||||
|  | #define emith_jump_patch_size()	4 | ||||||
|  | 
 | ||||||
|  | #define emith_jump_at(ptr, target) do { \ | ||||||
|  | 	u32 disp_ = (u8 *)target - (u8 *)ptr; \ | ||||||
|  | 	EMIT_PTR(ptr, A64_B(disp_ & 0x0fffffff)); \ | ||||||
|  | } while (0) | ||||||
|  | #define emith_jump_at_size() 4 | ||||||
| 
 | 
 | ||||||
| #define emith_jump_reg(r) \ | #define emith_jump_reg(r) \ | ||||||
| 	EMIT(A64_BR(r)) | 	EMIT(A64_BR(r)) | ||||||
|  | @ -1079,8 +1092,8 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) | ||||||
| #define emith_ret_to_ctx(offs) \ | #define emith_ret_to_ctx(offs) \ | ||||||
| 	emith_ctx_write_ptr(LR, offs) | 	emith_ctx_write_ptr(LR, offs) | ||||||
| 
 | 
 | ||||||
| #define emith_add_r_ret_imm(r, imm) \ | #define emith_add_r_ret(r) \ | ||||||
| 	emith_add_r_r_ptr_imm(r, LR, imm) | 	emith_add_r_r_r_ptr(r, LR, r) | ||||||
| 
 | 
 | ||||||
| // NB: pushes r or r18 for SP hardware alignment
 | // NB: pushes r or r18 for SP hardware alignment
 | ||||||
| #define emith_push_ret(r) do { \ | #define emith_push_ret(r) do { \ | ||||||
|  |  | ||||||
|  | @ -285,7 +285,7 @@ static int emith_b_isswap(u32 bop, u32 lop) | ||||||
| 		return bop; | 		return bop; | ||||||
| 	else if (emith_is_b(bop) &&  emith_rd(lop) != emith_rs(bop)) | 	else if (emith_is_b(bop) &&  emith_rd(lop) != emith_rs(bop)) | ||||||
| 		if ((bop & 0xffff) != 0x7fff)	// displacement overflow?
 | 		if ((bop & 0xffff) != 0x7fff)	// displacement overflow?
 | ||||||
| 			return (bop & 0xffff0000) | ((bop & 0xffff)+1); | 			return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -332,14 +332,14 @@ static void *emith_branch(u32 op) | ||||||
| 
 | 
 | ||||||
| #define JMP_EMIT(cond, ptr) { \ | #define JMP_EMIT(cond, ptr) { \ | ||||||
| 	u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ | 	u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ | ||||||
| 	EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ |  | ||||||
| 	emith_flush(); /* NO delay slot handling across jump targets */ \ | 	emith_flush(); /* NO delay slot handling across jump targets */ \ | ||||||
|  | 	EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define JMP_EMIT_NC(ptr) { \ | #define JMP_EMIT_NC(ptr) { \ | ||||||
| 	u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ | 	u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ | ||||||
| 	EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ |  | ||||||
| 	emith_flush(); \ | 	emith_flush(); \ | ||||||
|  | 	EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define EMITH_JMP_START(cond) { \ | #define EMITH_JMP_START(cond) { \ | ||||||
|  | @ -645,6 +645,13 @@ static void emith_move_imm(int r, uintptr_t imm) | ||||||
| #define emith_move_r_imm_c(cond, r, imm) \ | #define emith_move_r_imm_c(cond, r, imm) \ | ||||||
| 	emith_move_r_imm(r, imm) | 	emith_move_r_imm(r, imm) | ||||||
| 
 | 
 | ||||||
|  | #define emith_move_r_imm_s8_patchable(r, imm) \ | ||||||
|  | 	EMIT(MIPS_ADD_IMM(r, Z0, (s8)(imm))) | ||||||
|  | #define emith_move_r_imm_s8_patch(ptr, imm) do { \ | ||||||
|  | 	u32 *ptr_ = (u32 *)ptr; \ | ||||||
|  | 	while (*ptr_ >> 26 != OP_ADDIU) ptr_++; \ | ||||||
|  | 	EMIT_PTR(ptr_, (*ptr_ & 0xffff0000) | (u16)(s8)(imm)); \ | ||||||
|  | } while (0) | ||||||
| 
 | 
 | ||||||
| // arithmetic, immediate
 | // arithmetic, immediate
 | ||||||
| static void emith_arith_imm(int op, int rd, int rs, u32 imm) | static void emith_arith_imm(int op, int rd, int rs, u32 imm) | ||||||
|  | @ -1162,41 +1169,44 @@ static int emith_cond_check(int cond, int *r) | ||||||
| 	emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) | 	emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) | ||||||
| #define emith_jump_patchable(target) \ | #define emith_jump_patchable(target) \ | ||||||
| 	emith_jump(target) | 	emith_jump(target) | ||||||
| #define emith_jump_patchable_size() 8 /* J+delayslot */ |  | ||||||
| 
 | 
 | ||||||
| // NB: MIPS conditional branches have only +/- 128KB range
 | // NB: MIPS conditional branches have only +/- 128KB range
 | ||||||
| #define emith_jump_cond(cond, target) do { \ | #define emith_jump_cond(cond, target) do { \ | ||||||
| 	int r_, mcond_ = emith_cond_check(cond, &r_); \ | 	int r_, mcond_ = emith_cond_check(cond, &r_); \ | ||||||
| 	u32 disp_ = (u8 *)target - (u8 *)tcache_ptr - 4; \ | 	u32 disp_ = (u8 *)target - (u8 *)tcache_ptr - 4; \ | ||||||
| 	if (disp_ >= 0xfffe0000 || disp_ <= 0x0001ffff) { /* can use near B */ \ | 	emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ | ||||||
| 		emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ |  | ||||||
| 	} else { /* far branch if near branch isn't possible */ \ |  | ||||||
| 		mcond_ = emith_invert_branch(mcond_); \ |  | ||||||
| 		u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0)); \ |  | ||||||
| 		emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ |  | ||||||
| 		EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ |  | ||||||
| 	} \ |  | ||||||
| } while (0) | } while (0) | ||||||
| #define emith_jump_cond_inrange(target) \ | #define emith_jump_cond_patchable(cond, target) \ | ||||||
| 	!(((u8 *)target - (u8 *)tcache_ptr + 0x20000) >> 18) | 	emith_jump_cond(cond, target) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_cond_patchable(cond, target) do { \ | #define emith_jump_cond_inrange(target) \ | ||||||
| 	int r_, mcond_ = emith_cond_check(cond, &r_); \ | 	((u8 *)target - (u8 *)tcache_ptr - 4 <  0x00020000U || \ | ||||||
| 	mcond_ = emith_invert_branch(mcond_); \ | 	 (u8 *)target - (u8 *)tcache_ptr - 4 >= 0xfffe0010U) // mind cond_check
 | ||||||
| 	u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0));\ |  | ||||||
| 	emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ |  | ||||||
| 	EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ |  | ||||||
| } while (0) |  | ||||||
| 
 | 
 | ||||||
| // NB: returns position of patch for cache maintenance
 | // NB: returns position of patch for cache maintenance
 | ||||||
| #define emith_jump_patch(ptr, target, pos) do { \ | #define emith_jump_patch(ptr, target, pos) do { \ | ||||||
| 	u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ | 	u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ | ||||||
| 	while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ | 	u32 disp_, mask_; \ | ||||||
| 	EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ | 	while (!emith_is_j(*ptr_) && !emith_is_b(*ptr_)) ptr_ ++; \ | ||||||
|  | 	if (emith_is_b(*ptr_)) \ | ||||||
|  | 		mask_ = 0xffff0000, disp_ = (u8 *)target - (u8 *)ptr_ - 4; \ | ||||||
|  | 	else	mask_ = 0xfc000000, disp_ = (uintptr_t)target; \ | ||||||
|  | 	EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \ | ||||||
| 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ | 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ | ||||||
| } while (0) | } while (0) | ||||||
|  | 
 | ||||||
|  | #define emith_jump_patch_inrange(ptr, target) \ | ||||||
|  | 	((u8 *)target - (u8 *)ptr - 4 <  0x00020000U || \ | ||||||
|  | 	 (u8 *)target - (u8 *)ptr - 4 >= 0xfffe0010U) // mind cond_check
 | ||||||
| #define emith_jump_patch_size() 4 | #define emith_jump_patch_size() 4 | ||||||
| 
 | 
 | ||||||
|  | #define emith_jump_at(ptr, target) do { \ | ||||||
|  | 	u32 *ptr_ = (u32 *)ptr; \ | ||||||
|  | 	EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ | ||||||
|  | 	EMIT_PTR(ptr_, MIPS_NOP); \ | ||||||
|  | } while (0) | ||||||
|  | #define emith_jump_at_size() 8 | ||||||
|  | 
 | ||||||
| #define emith_jump_reg(r) \ | #define emith_jump_reg(r) \ | ||||||
| 	emith_branch(MIPS_JR(r)) | 	emith_branch(MIPS_JR(r)) | ||||||
| #define emith_jump_reg_c(cond, r) \ | #define emith_jump_reg_c(cond, r) \ | ||||||
|  | @ -1232,8 +1242,8 @@ static int emith_cond_check(int cond, int *r) | ||||||
| #define emith_ret_to_ctx(offs) \ | #define emith_ret_to_ctx(offs) \ | ||||||
| 	emith_ctx_write_ptr(LR, offs) | 	emith_ctx_write_ptr(LR, offs) | ||||||
| 
 | 
 | ||||||
| #define emith_add_r_ret_imm(r, imm) \ | #define emith_add_r_ret(r) \ | ||||||
| 	emith_add_r_r_ptr_imm(r, LR, imm) | 	emith_add_r_r_ptr(r, LR) | ||||||
| 
 | 
 | ||||||
| // NB: ABI SP alignment is 8 for compatibility with MIPS IV
 | // NB: ABI SP alignment is 8 for compatibility with MIPS IV
 | ||||||
| #define emith_push_ret(r) do { \ | #define emith_push_ret(r) do { \ | ||||||
|  |  | ||||||
|  | @ -371,8 +371,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,	// x86-64,i386 common | ||||||
| 	} \ | 	} \ | ||||||
| } while (0) | } while (0) | ||||||
| 
 | 
 | ||||||
| #define emith_move_r_imm_s8(r, imm) \ | #define emith_move_r_imm_s8_patchable(r, imm) do { \ | ||||||
| 	emith_move_r_imm(r, (u32)(signed int)(signed char)(imm)) | 	EMIT_REX_IF(0, 0, r); \ | ||||||
|  | 	EMIT_OP(0xb8 + ((r)&7)); \ | ||||||
|  | 	EMIT((s8)(imm), u32); \ | ||||||
|  | } while (0) | ||||||
|  | #define emith_move_r_imm_s8_patch(ptr, imm) do { \ | ||||||
|  | 	u8 *ptr_ = ptr; \ | ||||||
|  | 	while ((*ptr_ & 0xf8) != 0xb8) ptr_++; \ | ||||||
|  | 	EMIT_PTR(ptr_ + 1, (s8)(imm), u32); \ | ||||||
|  | } while (0) | ||||||
| 
 | 
 | ||||||
| #define emith_arith_r_imm(op, r, imm) do { \ | #define emith_arith_r_imm(op, r, imm) do { \ | ||||||
| 	EMIT_REX_IF(0, 0, r); \ | 	EMIT_REX_IF(0, 0, r); \ | ||||||
|  | @ -851,7 +859,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,	// x86-64,i386 common | ||||||
| 
 | 
 | ||||||
| #define emith_jump_patchable(target) \ | #define emith_jump_patchable(target) \ | ||||||
| 	emith_jump(target) | 	emith_jump(target) | ||||||
| #define emith_jump_patchable_size() 5 /* JMP rel32 */ |  | ||||||
| 
 | 
 | ||||||
| #define emith_jump_cond(cond, ptr) do { \ | #define emith_jump_cond(cond, ptr) do { \ | ||||||
| 	u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ | 	u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ | ||||||
|  | @ -867,15 +874,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,	// x86-64,i386 common | ||||||
| 	u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ | 	u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ | ||||||
| 	u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ | 	u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ | ||||||
| 	EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ | 	EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ | ||||||
| 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ | 	if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr + offs_; \ | ||||||
| } while (0) | } while (0) | ||||||
| #define emith_jump_patch_size() 6 | #define emith_jump_patch_size() 4 | ||||||
|  | #define emith_jump_patch_inrange(ptr, target) !0 | ||||||
| 
 | 
 | ||||||
| #define emith_jump_at(ptr, target) do { \ | #define emith_jump_at(ptr, target) do { \ | ||||||
| 	u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ | 	u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ | ||||||
| 	EMIT_PTR(ptr, 0xe9, u8); \ | 	EMIT_PTR(ptr, 0xe9, u8); \ | ||||||
| 	EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ | 	EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ | ||||||
| } while (0) | } while (0) | ||||||
|  | #define emith_jump_at_size() 5 | ||||||
| 
 | 
 | ||||||
| #define emith_call(ptr) do { \ | #define emith_call(ptr) do { \ | ||||||
| 	u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ | 	u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ | ||||||
|  | @ -900,9 +909,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,	// x86-64,i386 common | ||||||
| #define emith_ret() \ | #define emith_ret() \ | ||||||
| 	EMIT_OP(0xc3) | 	EMIT_OP(0xc3) | ||||||
| 
 | 
 | ||||||
| #define emith_add_r_ret_imm(r, imm) do { \ | #define emith_add_r_ret(r) do { \ | ||||||
| 	emith_read_r_r_offs_ptr(r, xSP, 0); \ | 	EMIT_REX_IF(1, r, xSP); \ | ||||||
| 	emith_add_r_r_ptr_imm(r, r, imm); \ | 	emith_deref_modrm(0x03, 0, r, xSP); /* add r, [xsp] */ \ | ||||||
| } while (0) | } while (0) | ||||||
| 
 | 
 | ||||||
| #define emith_jump_reg(r) \ | #define emith_jump_reg(r) \ | ||||||
|  | @ -974,7 +983,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,	// x86-64,i386 common | ||||||
| 	emith_move_r_imm(rd, imm); \ | 	emith_move_r_imm(rd, imm); \ | ||||||
| } while (0) | } while (0) | ||||||
| 
 | 
 | ||||||
| #define host_instructions_updated(base, end) | #define host_instructions_updated(base, end)	(void)(base),(void)(end) | ||||||
| #define	emith_update_cache()	/**/ | #define	emith_update_cache()	/**/ | ||||||
| 
 | 
 | ||||||
| #define emith_rw_offs_max()	0xffffffff | #define emith_rw_offs_max()	0xffffffff | ||||||
|  |  | ||||||
|  | @ -69,7 +69,7 @@ | ||||||
| // 800 - state dump on exit
 | // 800 - state dump on exit
 | ||||||
| // {
 | // {
 | ||||||
| #ifndef DRC_DEBUG | #ifndef DRC_DEBUG | ||||||
| #define DRC_DEBUG 0//x8e7
 | #define DRC_DEBUG 0//x8c7
 | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #if DRC_DEBUG | #if DRC_DEBUG | ||||||
|  | @ -288,15 +288,19 @@ static u8 *tcache_ptr; | ||||||
| 
 | 
 | ||||||
| #define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6) | #define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6) | ||||||
| 
 | 
 | ||||||
|  | enum { BL_JMP=1, BL_LDJMP, BL_JCCBLX }; | ||||||
| struct block_link { | struct block_link { | ||||||
|  |   short tcache_id; | ||||||
|  |   short type;                // BL_JMP et al
 | ||||||
|   u32 target_pc; |   u32 target_pc; | ||||||
|   void *jump;                // insn address
 |   void *jump;                // insn address
 | ||||||
|  |   void *blx;                 // block link/exit  area if any
 | ||||||
|  |   u8 jdisp[8];               // jump backup buffer
 | ||||||
|   struct block_link *next;   // either in block_entry->links or unresolved
 |   struct block_link *next;   // either in block_entry->links or unresolved
 | ||||||
|   struct block_link *o_next; //     ...in block_entry->o_links
 |   struct block_link *o_next; //     ...in block_entry->o_links
 | ||||||
|   struct block_link *prev; |   struct block_link *prev; | ||||||
|   struct block_link *o_prev; |   struct block_link *o_prev; | ||||||
|   struct block_entry *target;// target block this is linked in (be->links)
 |   struct block_entry *target;// target block this is linked in (be->links)
 | ||||||
|   int tcache_id; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct block_entry { | struct block_entry { | ||||||
|  | @ -686,18 +690,24 @@ static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask) | ||||||
|   return poffs; |   return poffs; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) | static int dr_get_tcache_id(u32 pc, int is_slave) | ||||||
| { | { | ||||||
|   struct block_entry *be; |  | ||||||
|   u32 tcid = 0; |   u32 tcid = 0; | ||||||
|   |   | ||||||
|   if ((pc & 0xe0000000) == 0xc0000000) |   if ((pc & 0xe0000000) == 0xc0000000) | ||||||
|     tcid = 1 + is_slave; // data array
 |     tcid = 1 + is_slave; // data array
 | ||||||
|   if ((pc & ~0xfff) == 0) |   if ((pc & ~0xfff) == 0) | ||||||
|     tcid = 1 + is_slave; // BIOS
 |     tcid = 1 + is_slave; // BIOS
 | ||||||
|   *tcache_id = tcid; |   return tcid; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|   be = HASH_FUNC(hash_tables[tcid], pc, HASH_TABLE_SIZE(tcid) - 1); | static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) | ||||||
|  | { | ||||||
|  |   struct block_entry *be; | ||||||
|  |   | ||||||
|  |   *tcache_id = dr_get_tcache_id(pc, is_slave); | ||||||
|  | 
 | ||||||
|  |   be = HASH_FUNC(hash_tables[*tcache_id], pc, HASH_TABLE_SIZE(*tcache_id) - 1); | ||||||
|   if (be != NULL) // don't ask... gcc code generation hint
 |   if (be != NULL) // don't ask... gcc code generation hint
 | ||||||
|   for (; be != NULL; be = be->next) |   for (; be != NULL; be = be->next) | ||||||
|     if (be->pc == pc) |     if (be->pc == pc) | ||||||
|  | @ -1101,17 +1111,11 @@ static struct block_desc *dr_add_block(u32 addr, int size, | ||||||
|   bd->size_lit = size_lit; |   bd->size_lit = size_lit; | ||||||
|   bd->tcache_ptr = tcache_ptr; |   bd->tcache_ptr = tcache_ptr; | ||||||
|   bd->crc = crc; |   bd->crc = crc; | ||||||
|   bd->active = 1; |   bd->active = 0; | ||||||
| 
 |   bd->entry_count = 0; | ||||||
|   bd->entry_count = 1; |  | ||||||
|   bd->entryp[0].pc = addr; |  | ||||||
|   bd->entryp[0].tcache_ptr = tcache_ptr; |  | ||||||
|   bd->entryp[0].links = bd->entryp[0].o_links = NULL; |  | ||||||
| #if (DRC_DEBUG & 2) | #if (DRC_DEBUG & 2) | ||||||
|   bd->entryp[0].block = bd; |  | ||||||
|   bd->refcount = 0; |   bd->refcount = 0; | ||||||
| #endif | #endif | ||||||
|   add_to_hashlist(&bd->entryp[0], tcache_id); |  | ||||||
| 
 | 
 | ||||||
|   *blk_id = *bcount; |   *blk_id = *bcount; | ||||||
|   (*bcount)++; |   (*bcount)++; | ||||||
|  | @ -1150,11 +1154,33 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi | ||||||
|     bl->jump, bl->target_pc, be->tcache_ptr); |     bl->jump, bl->target_pc, be->tcache_ptr); | ||||||
| 
 | 
 | ||||||
|   if (emit_jump) { |   if (emit_jump) { | ||||||
|     u8 *jump; |     u8 *jump = bl->jump; | ||||||
|     emith_jump_patch(bl->jump, be->tcache_ptr, &jump); |     int jsz = emith_jump_patch_size(); | ||||||
|  |     if (bl->type == BL_JMP) { // patch: jump @entry
 | ||||||
|  |       // inlined: @jump far jump to target
 | ||||||
|  |       emith_jump_patch(jump, be->tcache_ptr, &jump); | ||||||
|  |     } else if (bl->type == BL_LDJMP) { // write: jump @entry
 | ||||||
|  |       // inlined: @jump far jump to target
 | ||||||
|  |       emith_jump_at(jump, be->tcache_ptr); | ||||||
|  |       jsz = emith_jump_at_size(); | ||||||
|  |     } else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry
 | ||||||
|  |       if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) { | ||||||
|  |         // inlined: @jump near jumpcc to target
 | ||||||
|  |         emith_jump_patch(jump, be->tcache_ptr, &jump); | ||||||
|  |       } else { // dispatcher cond immediate
 | ||||||
|  |         // via blx: @jump near jumpcc to blx; @blx far jump
 | ||||||
|  |         emith_jump_patch(jump, bl->blx, &jump); | ||||||
|  |         emith_jump_at(bl->blx, be->tcache_ptr); | ||||||
|  |         if ((((uintptr_t)bl->blx & 0xf) + emith_jump_at_size()-1) > 0xf) | ||||||
|  |           host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); | ||||||
|  |       } | ||||||
|  |     } else { | ||||||
|  |       printf("unknown BL type %d\n", bl->type); | ||||||
|  |       exit(1); | ||||||
|  |     } | ||||||
|     // only needs sync if patch is possibly crossing cacheline (assume 16 byte)
 |     // only needs sync if patch is possibly crossing cacheline (assume 16 byte)
 | ||||||
|     if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) |     if ((((uintptr_t)jump & 0xf) + jsz-1) > 0xf) | ||||||
|       host_instructions_updated(jump, jump+emith_jump_patch_size()); |       host_instructions_updated(jump, jump + jsz-1); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   // move bl to block_entry
 |   // move bl to block_entry
 | ||||||
|  | @ -1172,10 +1198,26 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) | ||||||
| 
 | 
 | ||||||
|   if (bl->target) { |   if (bl->target) { | ||||||
|     if (emit_jump) { |     if (emit_jump) { | ||||||
|       u8 *jump; |       u8 *jump = bl->jump; | ||||||
|       emith_jump_patch(bl->jump, sh2_drc_dispatcher, &jump); |       int jsz = emith_jump_patch_size(); | ||||||
|  |       if (bl->type == BL_JMP) { // jump_patch @dispatcher
 | ||||||
|  |         // inlined: @jump far jump to dispatcher
 | ||||||
|  |         emith_jump_patch(jump, sh2_drc_dispatcher, &jump); | ||||||
|  |       } else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher
 | ||||||
|  |         // inlined: @jump load target_pc, far jump to dispatcher
 | ||||||
|  |         memcpy(jump, bl->jdisp, emith_jump_at_size()); | ||||||
|  |         jsz = emith_jump_at_size(); | ||||||
|  |       } else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump
 | ||||||
|  |         // via blx: @jump near jumpcc to blx; @blx load target_pc, far jump
 | ||||||
|  |         emith_jump_patch(bl->jump, bl->blx, &jump); | ||||||
|  |         memcpy(bl->blx, bl->jdisp, emith_jump_at_size()); | ||||||
|  |         host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); | ||||||
|  |       } else { | ||||||
|  |         printf("unknown BL type %d\n", bl->type); | ||||||
|  |         exit(1); | ||||||
|  |       } | ||||||
|       // update cpu caches since the previous jump target doesn't exist anymore
 |       // update cpu caches since the previous jump target doesn't exist anymore
 | ||||||
|       host_instructions_updated(jump, jump+emith_jump_patch_size()); |       host_instructions_updated(jump, jump + jsz-1); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (bl->prev) |     if (bl->prev) | ||||||
|  | @ -1189,18 +1231,17 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) | static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) | ||||||
| { | { | ||||||
| #if LINK_BRANCHES | #if LINK_BRANCHES | ||||||
|   struct block_link *bl = block_link_pool[tcache_id]; |   struct block_link *bl = block_link_pool[tcache_id]; | ||||||
|   int cnt = block_link_pool_counts[tcache_id]; |   int cnt = block_link_pool_counts[tcache_id]; | ||||||
|   struct block_entry *be = NULL; |  | ||||||
|   int target_tcache_id; |   int target_tcache_id; | ||||||
| 
 | 
 | ||||||
|   // get the target block entry
 |   // get the target block entry
 | ||||||
|   be = dr_get_entry(pc, is_slave, &target_tcache_id); |   target_tcache_id = dr_get_tcache_id(pc, is_slave); | ||||||
|   if (target_tcache_id && target_tcache_id != tcache_id) |   if (target_tcache_id && target_tcache_id != tcache_id) | ||||||
|     return sh2_drc_dispatcher; |     return NULL; | ||||||
| 
 | 
 | ||||||
|   // get a block link
 |   // get a block link
 | ||||||
|   if (blink_free[tcache_id] != NULL) { |   if (blink_free[tcache_id] != NULL) { | ||||||
|  | @ -1208,29 +1249,24 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla | ||||||
|     blink_free[tcache_id] = bl->next; |     blink_free[tcache_id] = bl->next; | ||||||
|   } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { |   } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { | ||||||
|     dbg(1, "bl overflow for tcache %d", tcache_id); |     dbg(1, "bl overflow for tcache %d", tcache_id); | ||||||
|     return sh2_drc_dispatcher; |     return NULL; | ||||||
|   } else { |   } else { | ||||||
|     bl += cnt; |     bl += cnt; | ||||||
|     block_link_pool_counts[tcache_id] = cnt+1; |     block_link_pool_counts[tcache_id] = cnt+1; | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   // prepare link and add to ougoing list of owner
 |   // prepare link and add to outgoing list of owner
 | ||||||
|   bl->tcache_id = tcache_id; |   bl->tcache_id = tcache_id; | ||||||
|   bl->target_pc = pc; |   bl->target_pc = pc; | ||||||
|   bl->jump = tcache_ptr; |   bl->jump = tcache_ptr; | ||||||
|  |   bl->blx = NULL; | ||||||
|   bl->o_next = owner->o_links; |   bl->o_next = owner->o_links; | ||||||
|   owner->o_links = bl; |   owner->o_links = bl; | ||||||
| 
 | 
 | ||||||
|   if (be != NULL) { |   add_to_hashlist_unresolved(bl, tcache_id); | ||||||
|     dr_block_link(be, bl, 0); // jump not yet emitted by translate()
 |   return bl; | ||||||
|     return be->tcache_ptr; |  | ||||||
|   } |  | ||||||
|   else { |  | ||||||
|     add_to_hashlist_unresolved(bl, tcache_id); |  | ||||||
|     return sh2_drc_dispatcher; |  | ||||||
|   } |  | ||||||
| #else | #else | ||||||
|   return sh2_drc_dispatcher; |   return NULL; | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1272,6 +1308,27 @@ static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave) | ||||||
|  | { | ||||||
|  |   int i; | ||||||
|  | 
 | ||||||
|  |   // connect branches
 | ||||||
|  |   for (i = 0; i < bd->entry_count; i++) { | ||||||
|  |     struct block_entry *entry = &bd->entryp[i]; | ||||||
|  |     add_to_hashlist(entry, tcache_id); | ||||||
|  |     // incoming branches
 | ||||||
|  |     dr_link_blocks(entry, tcache_id); | ||||||
|  |     if (!tcache_id) | ||||||
|  |       dr_link_blocks(entry, is_slave?2:1); | ||||||
|  |     // outgoing branches
 | ||||||
|  |     dr_link_outgoing(entry, tcache_id, is_slave); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   // mark memory for overwrite detection
 | ||||||
|  |   dr_mark_memory(1, bd, tcache_id, 0); | ||||||
|  |   bd->active = 1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #define ADD_TO_ARRAY(array, count, item, failcode) { \ | #define ADD_TO_ARRAY(array, count, item, failcode) { \ | ||||||
|   if (count >= ARRAY_SIZE(array)) { \ |   if (count >= ARRAY_SIZE(array)) { \ | ||||||
|     dbg(1, "warning: " #array " overflow"); \ |     dbg(1, "warning: " #array " overflow"); \ | ||||||
|  | @ -2422,6 +2479,7 @@ static void rcache_invalidate(void) | ||||||
| { | { | ||||||
|   int i; |   int i; | ||||||
|   gconst_invalidate(); |   gconst_invalidate(); | ||||||
|  |   rcache_unlock_all(); | ||||||
| 
 | 
 | ||||||
|   for (i = 0; i < ARRAY_SIZE(cache_regs); i++) |   for (i = 0; i < ARRAY_SIZE(cache_regs); i++) | ||||||
|     rcache_free_vreg(i); |     rcache_free_vreg(i); | ||||||
|  | @ -2446,7 +2504,6 @@ static void rcache_invalidate(void) | ||||||
| 
 | 
 | ||||||
| static void rcache_flush(void) | static void rcache_flush(void) | ||||||
| { | { | ||||||
|   rcache_unlock_all(); |  | ||||||
|   rcache_clean(); |   rcache_clean(); | ||||||
|   rcache_invalidate(); |   rcache_invalidate(); | ||||||
| } | } | ||||||
|  | @ -2916,13 +2973,22 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2); | ||||||
| 
 | 
 | ||||||
| static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
| { | { | ||||||
|  |   // branch targets in current block
 | ||||||
|   u32 branch_target_pc[MAX_LOCAL_BRANCHES]; |   u32 branch_target_pc[MAX_LOCAL_BRANCHES]; | ||||||
|   void *branch_target_ptr[MAX_LOCAL_BRANCHES]; |   void *branch_target_ptr[MAX_LOCAL_BRANCHES]; | ||||||
|   int branch_target_count = 0; |   int branch_target_count = 0; | ||||||
|   void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; |   // unresolved local forward branches, for fixup at block end
 | ||||||
|   u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; |   u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; | ||||||
|  |   void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; | ||||||
|   int branch_patch_count = 0; |   int branch_patch_count = 0; | ||||||
|  |   // external branch targets with a block link/exit area
 | ||||||
|  |   u32 blx_target_pc[MAX_LOCAL_BRANCHES]; | ||||||
|  |   void *blx_target_ptr[MAX_LOCAL_BRANCHES]; | ||||||
|  |   struct block_link *blx_target_bl[MAX_LOCAL_BRANCHES]; | ||||||
|  |   int blx_target_count = 0; | ||||||
|  | 
 | ||||||
|   u8 op_flags[BLOCK_INSN_LIMIT]; |   u8 op_flags[BLOCK_INSN_LIMIT]; | ||||||
|  | 
 | ||||||
|   struct drcf { |   struct drcf { | ||||||
|     int delay_reg:8; |     int delay_reg:8; | ||||||
|     u32 loop_type:8; |     u32 loop_type:8; | ||||||
|  | @ -2931,9 +2997,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|     u32 pending_branch_direct:1; |     u32 pending_branch_direct:1; | ||||||
|     u32 pending_branch_indirect:1; |     u32 pending_branch_indirect:1; | ||||||
|   } drcf = { 0, }; |   } drcf = { 0, }; | ||||||
|  | 
 | ||||||
| #if LOOP_OPTIMIZER | #if LOOP_OPTIMIZER | ||||||
|   void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; |   // loops with pinned registers for optimzation
 | ||||||
|  |   // pinned regs are like statics and don't need saving/restoring inside a loop
 | ||||||
|   u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; |   u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; | ||||||
|  |   void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; | ||||||
|   u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; |   u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; | ||||||
|   int pinned_loop_count = 0; |   int pinned_loop_count = 0; | ||||||
| #endif | #endif | ||||||
|  | @ -2976,24 +3045,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|     base_literals, end_literals - base_literals); |     base_literals, end_literals - base_literals); | ||||||
| 
 | 
 | ||||||
|   if (block) { |   if (block) { | ||||||
|     // connect branches
 |  | ||||||
|     dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', |     dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', | ||||||
|       base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); |       base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); | ||||||
|     for (i = 0; i < block->entry_count; i++) { |     dr_activate_block(block, tcache_id, sh2->is_slave); | ||||||
|       entry = &block->entryp[i]; |  | ||||||
|       add_to_hashlist(entry, tcache_id); |  | ||||||
| #if LINK_BRANCHES |  | ||||||
|       // incoming branches
 |  | ||||||
|       dr_link_blocks(entry, tcache_id); |  | ||||||
|       if (!tcache_id) |  | ||||||
|         dr_link_blocks(entry, sh2->is_slave?2:1); |  | ||||||
|       // outgoing branches
 |  | ||||||
|       dr_link_outgoing(entry, tcache_id, sh2->is_slave); |  | ||||||
| #endif |  | ||||||
|     } |  | ||||||
|     // mark memory for overwrite detection
 |  | ||||||
|     dr_mark_memory(1, block, tcache_id, 0); |  | ||||||
|     block->active = 1; |  | ||||||
|     emith_update_cache(); |     emith_update_cache(); | ||||||
|     return block->entryp[0].tcache_ptr; |     return block->entryp[0].tcache_ptr; | ||||||
|   } |   } | ||||||
|  | @ -3069,7 +3123,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|         if (op_flags[v] & OF_BASIC_LOOP) { |         if (op_flags[v] & OF_BASIC_LOOP) { | ||||||
|           m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM); |           m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM); | ||||||
|           if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) && |           if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) && | ||||||
|               pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)) { |               pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) { | ||||||
|             pinned_loop_mask[pinned_loop_count] = m3; |             pinned_loop_mask[pinned_loop_count] = m3; | ||||||
|             pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; |             pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; | ||||||
|           } else |           } else | ||||||
|  | @ -3080,6 +3134,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|   } |   } | ||||||
|  |   pinned_loop_pc[pinned_loop_count] = -1; | ||||||
| 
 | 
 | ||||||
|   if (branch_target_count > 0) { |   if (branch_target_count > 0) { | ||||||
|     memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); |     memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); | ||||||
|  | @ -3101,7 +3156,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|   // clear stale state after compile errors
 |   // clear stale state after compile errors
 | ||||||
|   rcache_unlock_all(); |  | ||||||
|   rcache_invalidate(); |   rcache_invalidate(); | ||||||
|   emith_invalidate_t(); |   emith_invalidate_t(); | ||||||
|   drcf = (struct drcf) { 0 }; |   drcf = (struct drcf) { 0 }; | ||||||
|  | @ -3146,39 +3200,31 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|         emith_sync_t(sr); |         emith_sync_t(sr); | ||||||
|         rcache_flush(); |         rcache_flush(); | ||||||
|         emith_flush(); |         emith_flush(); | ||||||
| 
 |  | ||||||
|         // make block entry
 |  | ||||||
|         v = block->entry_count; |  | ||||||
|         entry = &block->entryp[v]; |  | ||||||
|         if (v < ARRAY_SIZE(block->entryp)) |  | ||||||
|         { |  | ||||||
|           entry = &block->entryp[v]; |  | ||||||
|           entry->pc = pc; |  | ||||||
|           entry->tcache_ptr = tcache_ptr; |  | ||||||
|           entry->links = entry->o_links = NULL; |  | ||||||
| #if (DRC_DEBUG & 2) |  | ||||||
|           entry->block = block; |  | ||||||
| #endif |  | ||||||
|           add_to_hashlist(entry, tcache_id); |  | ||||||
|           block->entry_count++; |  | ||||||
| 
 |  | ||||||
|           dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", |  | ||||||
|             sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, |  | ||||||
|             pc, tcache_ptr); |  | ||||||
|         } |  | ||||||
|         else { |  | ||||||
|           dbg(1, "too many entryp for block #%d,%d pc=%08x", |  | ||||||
|             tcache_id, blkid_main, pc); |  | ||||||
|           break; |  | ||||||
|         } |  | ||||||
|       } else { |  | ||||||
|         entry = block->entryp; |  | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       // since we made a block entry, link any other blocks that jump to it
 |       // make block entry
 | ||||||
|       dr_link_blocks(entry, tcache_id); |       v = block->entry_count; | ||||||
|       if (!tcache_id) // can safely link from cpu-local to global memory
 |       entry = &block->entryp[v]; | ||||||
|         dr_link_blocks(entry, sh2->is_slave?2:1); |       if (v < ARRAY_SIZE(block->entryp)) | ||||||
|  |       { | ||||||
|  |         entry = &block->entryp[v]; | ||||||
|  |         entry->pc = pc; | ||||||
|  |         entry->tcache_ptr = tcache_ptr; | ||||||
|  |         entry->links = entry->o_links = NULL; | ||||||
|  | #if (DRC_DEBUG & 2) | ||||||
|  |         entry->block = block; | ||||||
|  | #endif | ||||||
|  |         block->entry_count++; | ||||||
|  | 
 | ||||||
|  |         dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", | ||||||
|  |           sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, | ||||||
|  |           pc, tcache_ptr); | ||||||
|  |       } | ||||||
|  |       else { | ||||||
|  |         dbg(1, "too many entryp for block #%d,%d pc=%08x", | ||||||
|  |           tcache_id, blkid_main, pc); | ||||||
|  |         break; | ||||||
|  |       } | ||||||
| 
 | 
 | ||||||
|       v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); |       v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); | ||||||
|       if (v >= 0) |       if (v >= 0) | ||||||
|  | @ -3220,29 +3266,35 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|       // check cycles
 |       // check cycles
 | ||||||
|       tmp = rcache_get_tmp_arg(0); |  | ||||||
|       sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); |       sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); | ||||||
|       emith_cmp_r_imm(sr, 0); |       emith_cmp_r_imm(sr, 0); | ||||||
|  | 
 | ||||||
| #if LOOP_OPTIMIZER | #if LOOP_OPTIMIZER | ||||||
|       // on drc exit pinned registers must be saved
 |       u8 *jp = NULL; | ||||||
|       if (op_flags[i] & OF_BASIC_LOOP) { |       if (op_flags[i] & OF_BASIC_LOOP) { | ||||||
|         EMITH_JMP_START(DCOND_GT); |         // if exiting a pinned loop pinned regs must be written back to ctx
 | ||||||
|  |         // since they are reloaded in the loop entry code
 | ||||||
|  |         jp = tcache_ptr; | ||||||
|  |         emith_jump_cond_patchable(DCOND_GT, jp); // XXX need API for JMP_POS
 | ||||||
|         rcache_save_pinned(); |         rcache_save_pinned(); | ||||||
|         emith_move_r_imm(tmp, pc); |  | ||||||
|         emith_jump(sh2_drc_exit); |  | ||||||
|         EMITH_JMP_END(DCOND_GT); |  | ||||||
|       } else |  | ||||||
| #endif |  | ||||||
|       if (emith_jump_cond_inrange(sh2_drc_exit)) { |  | ||||||
|         emith_move_r_imm_c(DCOND_LE, tmp, pc); |  | ||||||
|         emith_jump_cond(DCOND_LE, sh2_drc_exit); |  | ||||||
|       } else { |  | ||||||
|         EMITH_JMP_START(DCOND_GT); |  | ||||||
|         emith_move_r_imm(tmp, pc); |  | ||||||
|         emith_jump(sh2_drc_exit); |  | ||||||
|         EMITH_JMP_END(DCOND_GT); |  | ||||||
|       } |       } | ||||||
|       rcache_free_tmp(tmp); | #endif | ||||||
|  |       if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { | ||||||
|  |         // exit via stub in blx table (saves some 1-3 insns in the main flow)
 | ||||||
|  |         blx_target_pc[blx_target_count] = pc|1; | ||||||
|  |         blx_target_bl[blx_target_count] = NULL; | ||||||
|  |         blx_target_ptr[blx_target_count++] = tcache_ptr; | ||||||
|  |       } else { | ||||||
|  |         // blx table full, must inline exit code
 | ||||||
|  |         tmp = rcache_get_tmp_arg(0); | ||||||
|  |         emith_move_r_imm_c(DCOND_LE, tmp, pc); | ||||||
|  |         rcache_free_tmp(tmp); | ||||||
|  |       } | ||||||
|  |       emith_jump_cond_patchable(DCOND_LE, tcache_ptr); | ||||||
|  | #if LOOP_OPTIMIZER | ||||||
|  |       if (op_flags[i] & OF_BASIC_LOOP) | ||||||
|  |         emith_jump_patch(jp, tcache_ptr, NULL); | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| #if (DRC_DEBUG & 32) | #if (DRC_DEBUG & 32) | ||||||
|       // block hit counter
 |       // block hit counter
 | ||||||
|  | @ -3880,7 +3932,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|         case 2: // SHAL Rn    0100nnnn00100000
 |         case 2: // SHAL Rn    0100nnnn00100000
 | ||||||
|           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); |           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); | ||||||
|           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|           emith_sync_t(sr); |           emith_invalidate_t(); | ||||||
|           emith_tpop_carry(sr, 0); // dummy
 |           emith_tpop_carry(sr, 0); // dummy
 | ||||||
|           emith_lslf(tmp, tmp2, 1); |           emith_lslf(tmp, tmp2, 1); | ||||||
|           emith_tpush_carry(sr, 0); |           emith_tpush_carry(sr, 0); | ||||||
|  | @ -3909,7 +3961,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|         case 2: // SHAR Rn    0100nnnn00100001
 |         case 2: // SHAR Rn    0100nnnn00100001
 | ||||||
|           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); |           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); | ||||||
|           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|           emith_sync_t(sr); |           emith_invalidate_t(); | ||||||
|           emith_tpop_carry(sr, 0); // dummy
 |           emith_tpop_carry(sr, 0); // dummy
 | ||||||
|           if (op & 0x20) { |           if (op & 0x20) { | ||||||
|             emith_asrf(tmp, tmp2, 1); |             emith_asrf(tmp, tmp2, 1); | ||||||
|  | @ -3967,7 +4019,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|         case 0x05: // ROTR   Rn          0100nnnn00000101
 |         case 0x05: // ROTR   Rn          0100nnnn00000101
 | ||||||
|           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); |           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); | ||||||
|           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|           emith_sync_t(sr); |           emith_invalidate_t(); | ||||||
|           emith_tpop_carry(sr, 0); // dummy
 |           emith_tpop_carry(sr, 0); // dummy
 | ||||||
|           if (op & 1) { |           if (op & 1) { | ||||||
|             emith_rorf(tmp, tmp2, 1); |             emith_rorf(tmp, tmp2, 1); | ||||||
|  | @ -4351,11 +4403,12 @@ end_op: | ||||||
|       int cond = -1; |       int cond = -1; | ||||||
|       int ctaken = 0; |       int ctaken = 0; | ||||||
|       void *target = NULL; |       void *target = NULL; | ||||||
|       int patchable = 0; |       struct block_link *bl = NULL; | ||||||
| 
 | 
 | ||||||
|       if (OP_ISBRACND(opd_b->op)) |       if (OP_ISBRACND(opd_b->op)) | ||||||
|         ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; |         ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; | ||||||
|       cycles += ctaken; // assume branch taken
 |       cycles += ctaken; // assume branch taken
 | ||||||
|  | 
 | ||||||
| #if LOOP_OPTIMIZER | #if LOOP_OPTIMIZER | ||||||
|       if ((drcf.loop_type == OF_IDLE_LOOP || |       if ((drcf.loop_type == OF_IDLE_LOOP || | ||||||
|           (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) |           (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) | ||||||
|  | @ -4365,14 +4418,35 @@ end_op: | ||||||
|         emith_sh2_delay_loop(cycles, drcf.delay_reg); |         emith_sh2_delay_loop(cycles, drcf.delay_reg); | ||||||
|         drcf.polling = drcf.loop_type = 0; |         drcf.polling = drcf.loop_type = 0; | ||||||
|       } |       } | ||||||
|  | 
 | ||||||
|  |       if (target_pc < pc && pinned_loop_pc[pinned_loop_count] == target_pc) { | ||||||
|  |         // backward jump at end of optimized loop
 | ||||||
|  |         rcache_unpin_all(); | ||||||
|  |         target = pinned_loop_ptr[pinned_loop_count]; | ||||||
|  |         pinned_loop_count ++; | ||||||
|  |       } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|       FLUSH_CYCLES(sr); |       FLUSH_CYCLES(sr); | ||||||
|  |       rcache_unlock_all(); | ||||||
|       rcache_clean(); |       rcache_clean(); | ||||||
| 
 | 
 | ||||||
|       // emit condition test for conditional branch
 | #if CALL_STACK | ||||||
|  |       void *rtsadd = NULL, *rtsret = NULL; | ||||||
|  |       if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { | ||||||
|  |         // BSR - save rts data
 | ||||||
|  |         tmp = rcache_get_tmp_arg(1); | ||||||
|  |         rtsadd = tcache_ptr; | ||||||
|  |         emith_move_r_imm_s8_patchable(tmp, 0); | ||||||
|  |         rcache_invalidate_tmp(); | ||||||
|  |         emith_call(sh2_drc_dispatcher_call); | ||||||
|  |         rtsret = tcache_ptr; | ||||||
|  |       } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|       if (OP_ISBRACND(opd_b->op)) { |       if (OP_ISBRACND(opd_b->op)) { | ||||||
|  |         // BT[S], BF[S] - emit condition test
 | ||||||
|         cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; |         cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; | ||||||
|         if (delay_dep_fw & BITMASK1(SHR_T)) { |         if (delay_dep_fw & BITMASK1(SHR_T)) { | ||||||
|           emith_sync_t(sr); |           emith_sync_t(sr); | ||||||
|  | @ -4396,61 +4470,118 @@ end_op: | ||||||
|       { |       { | ||||||
|         // local branch
 |         // local branch
 | ||||||
|         if (branch_target_ptr[v]) { |         if (branch_target_ptr[v]) { | ||||||
|           // jumps back can be linked here since host PC is already known
 |           // local backward jump, link here now since host PC is already known
 | ||||||
|           target = branch_target_ptr[v]; |           target = branch_target_ptr[v]; | ||||||
|  |           if (cond != -1) | ||||||
|  |             emith_jump_cond(cond, target); | ||||||
|  |           else { | ||||||
|  |             emith_jump(target); | ||||||
|  |             rcache_invalidate(); | ||||||
|  |           } | ||||||
|         } else if (branch_patch_count < MAX_LOCAL_BRANCHES) { |         } else if (branch_patch_count < MAX_LOCAL_BRANCHES) { | ||||||
|  |           // local forward jump
 | ||||||
|           target = tcache_ptr; |           target = tcache_ptr; | ||||||
|           branch_patch_pc[branch_patch_count] = target_pc; |           branch_patch_pc[branch_patch_count] = target_pc; | ||||||
|           branch_patch_ptr[branch_patch_count] = target; |           branch_patch_ptr[branch_patch_count] = target; | ||||||
|           branch_patch_count++; |           branch_patch_count++; | ||||||
|           patchable = 1; |           if (cond != -1) | ||||||
|  |             emith_jump_cond_patchable(cond, target); | ||||||
|  |           else { | ||||||
|  |             emith_jump_patchable(target); | ||||||
|  |             rcache_invalidate(); | ||||||
|  |           } | ||||||
|         } else |         } else | ||||||
|           dbg(1, "warning: too many local branches"); |           dbg(1, "warning: too many local branches"); | ||||||
|       } |       } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|       rcache_unlock_all(); |  | ||||||
| #if LOOP_OPTIMIZER |  | ||||||
|       if (target && pinned_loop_pc[pinned_loop_count] == target_pc) { |  | ||||||
|         rcache_unpin_all(); |  | ||||||
|         target = pinned_loop_ptr[pinned_loop_count]; |  | ||||||
|         pinned_loop_count ++; |  | ||||||
|       } |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
|       if (target == NULL) |       if (target == NULL) | ||||||
|       { |       { | ||||||
|         // can't resolve branch locally, make a block exit
 |         // can't resolve branch locally, make a block exit
 | ||||||
|         rcache_clean(); |         bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); | ||||||
|         tmp = rcache_get_tmp_arg(0); |         if (cond != -1) { | ||||||
|         emith_move_r_imm(tmp, target_pc); | #if 1 | ||||||
|         rcache_free_tmp(tmp); |           if (bl) { | ||||||
|  |             if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { | ||||||
|  |               // conditional jumps get a blx stub for the far jump
 | ||||||
|  |               blx_target_pc[blx_target_count] = target_pc; | ||||||
|  |               blx_target_bl[blx_target_count] = bl; | ||||||
|  |               blx_target_ptr[blx_target_count++] = tcache_ptr; | ||||||
|  |               bl->type = BL_JCCBLX; | ||||||
|  |               target = tcache_ptr; | ||||||
|  |             } else { | ||||||
|  |               // blx table full, patch jump only
 | ||||||
|  |               tmp = rcache_get_tmp_arg(0); | ||||||
|  |               emith_move_r_imm(tmp, target_pc); | ||||||
|  |               rcache_free_tmp(tmp); | ||||||
|  |               bl->jump = tcache_ptr; | ||||||
|  |               bl->type = BL_JMP; | ||||||
|  |               target = sh2_drc_dispatcher; | ||||||
|  |             } | ||||||
|  |             emith_jump_cond_patchable(cond, target); | ||||||
|  |           } else { | ||||||
|  |             // cannot link, inline jump @dispatcher
 | ||||||
|  |             EMITH_JMP_START(emith_invert_cond(cond)); | ||||||
|  |             tmp = rcache_get_tmp_arg(0); | ||||||
|  |             emith_move_r_imm(tmp, target_pc); | ||||||
|  |             rcache_free_tmp(tmp); | ||||||
|  |             target = sh2_drc_dispatcher; | ||||||
| 
 | 
 | ||||||
| #if CALL_STACK |             emith_jump(target); | ||||||
|         if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { |             EMITH_JMP_END(emith_invert_cond(cond)); | ||||||
|           // BSR
 |           } | ||||||
|           emith_call(sh2_drc_dispatcher_call); | #elif 1 | ||||||
|         } |           // jump @dispatcher - ARM 32bit version with conditional execution
 | ||||||
| #endif |           EMITH_SJMP_START(emith_invert_cond(cond)); | ||||||
|  |           tmp = rcache_get_tmp_arg(0); | ||||||
|  |           emith_move_r_imm_c(cond, tmp, target_pc); | ||||||
|  |           rcache_free_tmp(tmp); | ||||||
|  |           target = sh2_drc_dispatcher; | ||||||
| 
 | 
 | ||||||
|         target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); |           if (bl) { | ||||||
|         patchable = 1; |             bl->jump = tcache_ptr; | ||||||
|       } |             bl->type = BL_JMP; | ||||||
| 
 |           } | ||||||
|       // create branch
 |  | ||||||
|       if (cond != -1) { |  | ||||||
|         if (patchable) |  | ||||||
|           emith_jump_cond_patchable(cond, target); |           emith_jump_cond_patchable(cond, target); | ||||||
|         else |           EMITH_SJMP_END(emith_invert_cond(cond)); | ||||||
|           emith_jump_cond(cond, target); | #else | ||||||
|       } else { |           // jump @dispatcher - generic version (jump !cond @over, jump @trgt)
 | ||||||
|         rcache_invalidate(); |           EMITH_JMP_START(emith_invert_cond(cond)); | ||||||
|         if (patchable) |           if (bl) { | ||||||
|  |             bl->jump = tcache_ptr; | ||||||
|  |             bl->type = BL_LDJMP; | ||||||
|  |           } | ||||||
|  |           tmp = rcache_get_tmp_arg(0); | ||||||
|  |           emith_move_r_imm(tmp, target_pc); | ||||||
|  |           rcache_free_tmp(tmp); | ||||||
|  |           target = sh2_drc_dispatcher; | ||||||
|  | 
 | ||||||
|           emith_jump_patchable(target); |           emith_jump_patchable(target); | ||||||
|         else |           EMITH_JMP_END(emith_invert_cond(cond)); | ||||||
|           emith_jump(target); | #endif | ||||||
|  |         } else { | ||||||
|  |           // unconditional, has the far jump inlined
 | ||||||
|  |           if (bl) | ||||||
|  |             bl->type = BL_LDJMP; | ||||||
|  | 
 | ||||||
|  |           tmp = rcache_get_tmp_arg(0); | ||||||
|  |           emith_move_r_imm(tmp, target_pc); | ||||||
|  |           rcache_free_tmp(tmp); | ||||||
|  |           target = sh2_drc_dispatcher; | ||||||
|  | 
 | ||||||
|  |           emith_jump_patchable(target); | ||||||
|  |           rcache_invalidate(); | ||||||
|  |         } | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|  |       emith_flush(); | ||||||
|  |       if (bl) | ||||||
|  |         memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); | ||||||
|  | #if CALL_STACK | ||||||
|  |       if (rtsadd) | ||||||
|  |         emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|       // branch not taken, correct cycle count
 |       // branch not taken, correct cycle count
 | ||||||
|       if (ctaken) |       if (ctaken) | ||||||
|         emith_add_r_imm(sr, ctaken << 12); |         emith_add_r_imm(sr, ctaken << 12); | ||||||
|  | @ -4463,35 +4594,57 @@ end_op: | ||||||
|         drcf.polling = drcf.loop_type = 0; |         drcf.polling = drcf.loop_type = 0; | ||||||
|     } |     } | ||||||
|     else if (drcf.pending_branch_indirect) { |     else if (drcf.pending_branch_indirect) { | ||||||
|       void *target; |  | ||||||
|       u32 target_pc; |       u32 target_pc; | ||||||
|  |       struct block_link *bl = NULL; | ||||||
| 
 | 
 | ||||||
|       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|       FLUSH_CYCLES(sr); |       FLUSH_CYCLES(sr); | ||||||
|       emith_sync_t(sr); |       emith_sync_t(sr); | ||||||
|       rcache_clean(); |       rcache_clean(); | ||||||
|  | 
 | ||||||
|       tmp = rcache_get_reg_arg(0, SHR_PC, NULL); |       tmp = rcache_get_reg_arg(0, SHR_PC, NULL); | ||||||
|       rcache_invalidate(); | 
 | ||||||
| #if CALL_STACK | #if CALL_STACK | ||||||
|       struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; |       struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; | ||||||
|       if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { |       void *rtsadd = NULL, *rtsret = NULL; | ||||||
|         // JSR/BSRF
 |  | ||||||
|         emith_call(sh2_drc_dispatcher_call); |  | ||||||
|       } |  | ||||||
| 
 | 
 | ||||||
|  |       if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { | ||||||
|  |         // JSR, BSRF - save rts data
 | ||||||
|  |         tmp = rcache_get_tmp_arg(1); | ||||||
|  |         rtsadd = tcache_ptr; | ||||||
|  |         emith_move_r_imm_s8_patchable(tmp, 0); | ||||||
|  |         rcache_invalidate_tmp(); | ||||||
|  |         emith_call(sh2_drc_dispatcher_call); | ||||||
|  |         rtsret = tcache_ptr; | ||||||
|  |       } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if CALL_STACK | ||||||
|       if (opd_b->rm == SHR_PR) { |       if (opd_b->rm == SHR_PR) { | ||||||
|         // RTS
 |         // RTS - restore rts data, else jump to dispatcher
 | ||||||
|         emith_jump(sh2_drc_dispatcher_return); |         emith_jump(sh2_drc_dispatcher_return); | ||||||
|       } else |       } else | ||||||
| #endif | #endif | ||||||
|       if (gconst_get(SHR_PC, &target_pc)) { |       if (gconst_get(SHR_PC, &target_pc)) { | ||||||
|         // JMP const, treat like unconditional direct branch
 |         // JMP, JSR, BRAF, BSRF const - treat like unconditional direct branch
 | ||||||
|         target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); |         bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); | ||||||
|         emith_jump_patchable(target); |         if (bl) { // pc already loaded somewhere else, can patch jump only
 | ||||||
|  |           bl->type = BL_JMP; | ||||||
|  |           bl->jump = tcache_ptr; | ||||||
|  |         } | ||||||
|  |         emith_jump_patchable(sh2_drc_dispatcher); | ||||||
|       } else { |       } else { | ||||||
|         // JMP
 |         // JMP, JSR, BRAF, BSRF not const
 | ||||||
|         emith_jump(sh2_drc_dispatcher); |         emith_jump(sh2_drc_dispatcher); | ||||||
|       } |       } | ||||||
|  |       rcache_invalidate(); | ||||||
|  | 
 | ||||||
|  |       emith_flush(); | ||||||
|  | #if CALL_STACK | ||||||
|  |       if (rtsadd) | ||||||
|  |         emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|       drcf.pending_branch_indirect = 0; |       drcf.pending_branch_indirect = 0; | ||||||
|       drcf.polling = drcf.loop_type = 0; |       drcf.polling = drcf.loop_type = 0; | ||||||
|     } |     } | ||||||
|  | @ -4508,24 +4661,48 @@ end_op: | ||||||
| 
 | 
 | ||||||
|   if (! OP_ISBRAUC(opd->op)) |   if (! OP_ISBRAUC(opd->op)) | ||||||
|   { |   { | ||||||
|     void *target; |     struct block_link *bl; | ||||||
| 
 | 
 | ||||||
|     tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |     tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|     FLUSH_CYCLES(tmp); |     FLUSH_CYCLES(tmp); | ||||||
|     emith_sync_t(tmp); |     emith_sync_t(tmp); | ||||||
| 
 | 
 | ||||||
|     rcache_clean(); |     rcache_clean(); | ||||||
|  |     bl = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); | ||||||
|  |     if (bl) | ||||||
|  |       bl->type = BL_LDJMP; | ||||||
|     tmp = rcache_get_tmp_arg(0); |     tmp = rcache_get_tmp_arg(0); | ||||||
|     emith_move_r_imm(tmp, pc); |     emith_move_r_imm(tmp, pc); | ||||||
| 
 |     emith_jump_patchable(sh2_drc_dispatcher); | ||||||
|     target = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); |  | ||||||
|     if (target == NULL) |  | ||||||
|       return NULL; |  | ||||||
|     rcache_invalidate(); |     rcache_invalidate(); | ||||||
|     emith_jump_patchable(target); |     emith_flush(); | ||||||
|  |     if (bl) | ||||||
|  |       memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); | ||||||
|   } else |   } else | ||||||
|     rcache_flush(); |     rcache_flush(); | ||||||
|  | 
 | ||||||
|  |   // emit blx area
 | ||||||
|  |   for (i = 0; i < blx_target_count; i++) { | ||||||
|  |     void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher); | ||||||
|  |     struct block_link *bl = blx_target_bl[i]; | ||||||
|  | 
 | ||||||
|  |     emith_pool_check(); | ||||||
|  |     if (bl) | ||||||
|  |       bl->blx = tcache_ptr; | ||||||
|  |     emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL); | ||||||
|  |     tmp = rcache_get_tmp_arg(0); | ||||||
|  |     emith_move_r_imm(tmp, blx_target_pc[i] & ~1); | ||||||
|  |     emith_jump(target); | ||||||
|  |     rcache_invalidate(); | ||||||
|  |     emith_flush(); | ||||||
|  |     if (bl) | ||||||
|  |       memcpy(bl->jdisp, bl->blx, emith_jump_at_size()); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|   emith_flush(); |   emith_flush(); | ||||||
|  |   do_host_disasm(tcache_id); | ||||||
|  | 
 | ||||||
|  |   emith_pool_commit(0); | ||||||
| 
 | 
 | ||||||
|   // link local branches
 |   // link local branches
 | ||||||
|   for (i = 0; i < branch_patch_count; i++) { |   for (i = 0; i < branch_patch_count; i++) { | ||||||
|  | @ -4539,20 +4716,18 @@ end_op: | ||||||
|       target = tcache_ptr; |       target = tcache_ptr; | ||||||
|       tmp = rcache_get_tmp_arg(0); |       tmp = rcache_get_tmp_arg(0); | ||||||
|       emith_move_r_imm(tmp, branch_patch_pc[i]); |       emith_move_r_imm(tmp, branch_patch_pc[i]); | ||||||
|       rcache_flush(); |  | ||||||
|       emith_jump(sh2_drc_dispatcher); |       emith_jump(sh2_drc_dispatcher); | ||||||
|  |       rcache_flush(); | ||||||
|     } |     } | ||||||
|     emith_jump_patch(branch_patch_ptr[i], target, NULL); |     emith_jump_patch(branch_patch_ptr[i], target, NULL); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   emith_pool_commit(0); |  | ||||||
| 
 |  | ||||||
|   dr_mark_memory(1, block, tcache_id, 0); |  | ||||||
| 
 |  | ||||||
|   tcache_ptrs[tcache_id] = tcache_ptr; |   tcache_ptrs[tcache_id] = tcache_ptr; | ||||||
| 
 |  | ||||||
|   host_instructions_updated(block_entry_ptr, tcache_ptr); |   host_instructions_updated(block_entry_ptr, tcache_ptr); | ||||||
| 
 | 
 | ||||||
|  |   dr_activate_block(block, tcache_id, sh2->is_slave); | ||||||
|  |   emith_update_cache(); | ||||||
|  | 
 | ||||||
|   do_host_disasm(tcache_id); |   do_host_disasm(tcache_id); | ||||||
| 
 | 
 | ||||||
|   dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f", |   dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f", | ||||||
|  | @ -4574,7 +4749,6 @@ end_op: | ||||||
|   fflush(stdout); |   fflush(stdout); | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|   emith_update_cache(); |  | ||||||
|   return block_entry_ptr; |   return block_entry_ptr; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -4769,14 +4943,14 @@ static void sh2_generate_utils(void) | ||||||
|   // pc = sh2_drc_dispatcher_call(u32 pc)
 |   // pc = sh2_drc_dispatcher_call(u32 pc)
 | ||||||
|   sh2_drc_dispatcher_call = (void *)tcache_ptr; |   sh2_drc_dispatcher_call = (void *)tcache_ptr; | ||||||
|   emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); |   emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); | ||||||
|   emith_ctx_read(arg1, SHR_PR * 4); |  | ||||||
|   emith_add_r_imm(arg2, 2*sizeof(void *)); |   emith_add_r_imm(arg2, 2*sizeof(void *)); | ||||||
|   emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); |   emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); | ||||||
|   emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); |   emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); | ||||||
|   emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); |   emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); | ||||||
|   emith_write_r_r_offs(arg1, arg2, offsetof(SH2, rts_cache)); |   emith_ctx_read(arg3, SHR_PR * 4); | ||||||
|   emith_add_r_ret_imm(arg1, emith_jump_patchable_size()); // skip jump_patchable for rts host address
 |   emith_add_r_ret(arg1); | ||||||
|   emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache) + sizeof(void *)); |   emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache)+sizeof(void *)); | ||||||
|  |   emith_write_r_r_offs(arg3, arg2, offsetof(SH2, rts_cache)); | ||||||
|   emith_ret(); |   emith_ret(); | ||||||
|   emith_flush(); |   emith_flush(); | ||||||
| 
 | 
 | ||||||
|  | @ -5378,10 +5552,8 @@ void sh2_drc_finish(SH2 *sh2) | ||||||
|   if (block_tables[0] == NULL) |   if (block_tables[0] == NULL) | ||||||
|     return; |     return; | ||||||
| 
 | 
 | ||||||
|   sh2_drc_flush_all(); |  | ||||||
| 
 |  | ||||||
|   for (i = 0; i < TCACHE_BUFFERS; i++) { |  | ||||||
| #if (DRC_DEBUG & 4) | #if (DRC_DEBUG & 4) | ||||||
|  |   for (i = 0; i < TCACHE_BUFFERS; i++) { | ||||||
|     printf("~~~ tcache %d\n", i); |     printf("~~~ tcache %d\n", i); | ||||||
| #if 0 | #if 0 | ||||||
|     tcache_dsm_ptrs[i] = tcache_bases[i]; |     tcache_dsm_ptrs[i] = tcache_bases[i]; | ||||||
|  | @ -5394,8 +5566,12 @@ void sh2_drc_finish(SH2 *sh2) | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|     printf("max links: %d\n", block_link_pool_counts[i]); |     printf("max links: %d\n", block_link_pool_counts[i]); | ||||||
|  |   } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  |   sh2_drc_flush_all(); | ||||||
|  | 
 | ||||||
|  |   for (i = 0; i < TCACHE_BUFFERS; i++) { | ||||||
|     if (block_tables[i] != NULL) |     if (block_tables[i] != NULL) | ||||||
|       free(block_tables[i]); |       free(block_tables[i]); | ||||||
|     block_tables[i] = NULL; |     block_tables[i] = NULL; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 kub
						kub