mirror of
				https://github.com/RaySollium99/picodrive.git
				synced 2025-10-26 16:29:37 -04:00 
			
		
		
		
	sh2 drc, add loop detector, handle delay/idle loops
This commit is contained in:
		
							parent
							
								
									e01deede1b
								
							
						
					
					
						commit
						213b7f42c1
					
				
					 4 changed files with 247 additions and 30 deletions
				
			
		|  | @ -846,6 +846,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) | ||||||
| #define emith_add_r_r_ptr_imm(d, s, imm) \ | #define emith_add_r_r_ptr_imm(d, s, imm) \ | ||||||
| 	emith_add_r_r_imm(d, s, imm) | 	emith_add_r_r_imm(d, s, imm) | ||||||
| 
 | 
 | ||||||
|  | #define emith_sub_r_r_imm_c(cond, d, s, imm) \ | ||||||
|  | 	emith_op_imm2(cond, 0, A_OP_SUB, d, s, (imm)) | ||||||
|  | 
 | ||||||
| #define emith_sub_r_r_imm(d, s, imm) \ | #define emith_sub_r_r_imm(d, s, imm) \ | ||||||
| 	emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) | 	emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) | ||||||
| 
 | 
 | ||||||
|  | @ -1172,6 +1175,38 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) | ||||||
| 	rcache_free_tmp(tmp_);                                               \ | 	rcache_free_tmp(tmp_);                                               \ | ||||||
| } while (0) | } while (0) | ||||||
| 
 | 
 | ||||||
|  | #define emith_sh2_delay_loop(cycles, reg) do {			\ | ||||||
|  | 	int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);	\ | ||||||
|  | 	int t1 = rcache_get_tmp();				\ | ||||||
|  | 	int t2 = rcache_get_tmp();				\ | ||||||
|  | 	int t3 = rcache_get_tmp();				\ | ||||||
|  | 	/* if (sr < 0) return */				\ | ||||||
|  | 	emith_asrf(t2, sr, 12);					\ | ||||||
|  | 	EMITH_JMP_START(DCOND_LE);				\ | ||||||
|  | 	/* turns = sr.cycles / cycles */			\ | ||||||
|  | 	emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1);	\ | ||||||
|  | 	emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */	\ | ||||||
|  | 	rcache_free_tmp(t3);					\ | ||||||
|  | 	if (reg >= 0) {						\ | ||||||
|  | 		/* if (reg <= turns) turns = reg-1 */		\ | ||||||
|  | 		t3 = rcache_get_reg(reg, RC_GR_RMW, NULL);	\ | ||||||
|  | 		emith_cmp_r_r(t3, t2);				\ | ||||||
|  | 		emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1);	\ | ||||||
|  | 		/* if (reg <= 1) turns = 0 */			\ | ||||||
|  | 		emith_cmp_r_imm(t3, 1);				\ | ||||||
|  | 		emith_move_r_imm_c(DCOND_LE, t2, 0);		\ | ||||||
|  | 		/* reg -= turns */				\ | ||||||
|  | 		emith_sub_r_r(t3, t2);				\ | ||||||
|  | 	}							\ | ||||||
|  | 	/* sr.cycles -= turns * cycles; */			\ | ||||||
|  | 	emith_move_r_imm(t1, cycles);				\ | ||||||
|  | 	emith_mul(t1, t2, t1);					\ | ||||||
|  | 	emith_sub_r_r_r_lsl(sr, sr, t1, 12);			\ | ||||||
|  | 	EMITH_JMP_END(DCOND_LE);				\ | ||||||
|  | 	rcache_free_tmp(t1);					\ | ||||||
|  | 	rcache_free_tmp(t2);					\ | ||||||
|  | } while (0) | ||||||
|  | 
 | ||||||
| #define emith_write_sr(sr, srcr) do { \ | #define emith_write_sr(sr, srcr) do { \ | ||||||
| 	emith_lsr(sr, sr, 10); \ | 	emith_lsr(sr, sr, 10); \ | ||||||
| 	emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ | 	emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ | ||||||
|  |  | ||||||
|  | @ -293,6 +293,20 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; | ||||||
| 	rcache_free_tmp(tmp_); \ | 	rcache_free_tmp(tmp_); \ | ||||||
| } while (0) | } while (0) | ||||||
| 
 | 
 | ||||||
|  | #define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) do { \ | ||||||
|  | 	int tmp_ = rcache_get_tmp(); \ | ||||||
|  | 	emith_lsl(tmp_, s2, lslimm); \ | ||||||
|  | 	emith_sub_r_r_r(d, s1, tmp_); \ | ||||||
|  | 	rcache_free_tmp(tmp_); \ | ||||||
|  | } while (0) | ||||||
|  | 
 | ||||||
|  | #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) do { \ | ||||||
|  | 	int tmp_ = rcache_get_tmp(); \ | ||||||
|  | 	emith_lsl(tmp_, s2, lslimm); \ | ||||||
|  | 	emith_or_r_r_r(d, s1, tmp_); \ | ||||||
|  | 	rcache_free_tmp(tmp_); \ | ||||||
|  | } while (0) | ||||||
|  | 
 | ||||||
| // _r_r_shift
 | // _r_r_shift
 | ||||||
| #define emith_or_r_r_lsl(d, s, lslimm) do { \ | #define emith_or_r_r_lsl(d, s, lslimm) do { \ | ||||||
| 	int tmp_ = rcache_get_tmp(); \ | 	int tmp_ = rcache_get_tmp(); \ | ||||||
|  | @ -394,6 +408,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; | ||||||
| 	emith_ror(d, s, cnt) | 	emith_ror(d, s, cnt) | ||||||
| #define emith_and_r_r_c(cond, d, s) \ | #define emith_and_r_r_c(cond, d, s) \ | ||||||
| 	emith_and_r_r(d, s); | 	emith_and_r_r(d, s); | ||||||
|  | #define emith_add_r_r_imm_c(cond, d, s, imm) \ | ||||||
|  | 	emith_add_r_r_imm(d, s, imm); | ||||||
|  | #define emith_sub_r_r_imm_c(cond, d, s, imm) \ | ||||||
|  | 	emith_sub_r_r_imm(d, s, imm); | ||||||
| 
 | 
 | ||||||
| #define emith_read8_r_r_r_c(cond, r, rs, rm) \ | #define emith_read8_r_r_r_c(cond, r, rs, rm) \ | ||||||
| 	emith_read8_r_r_r(r, rs, rm) | 	emith_read8_r_r_r(r, rs, rm) | ||||||
|  | @ -1034,6 +1052,44 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; | ||||||
| 	rcache_free_tmp(tmp_);                           \ | 	rcache_free_tmp(tmp_);                           \ | ||||||
| } while (0) | } while (0) | ||||||
| 
 | 
 | ||||||
|  | #define emith_sh2_delay_loop(cycles, reg) do {			\ | ||||||
|  | 	int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);	\ | ||||||
|  | 	int t1 = rcache_get_tmp();				\ | ||||||
|  | 	int t2 = rcache_get_tmp();				\ | ||||||
|  | 	int t3 = rcache_get_tmp();				\ | ||||||
|  | 	if (t3 == xAX) { t3 = t1; t1 = xAX; } /* for MUL */	\ | ||||||
|  | 	if (t3 == xDX) { t3 = t2; t2 = xDX; }			\ | ||||||
|  | 	/* if (sr < 0) return */				\ | ||||||
|  | 	emith_asrf(t2, sr, 12);					\ | ||||||
|  | 	EMITH_JMP_START(DCOND_LE);				\ | ||||||
|  | 	/* turns = sr.cycles / cycles */			\ | ||||||
|  | 	emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1);	\ | ||||||
|  | 	emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */	\ | ||||||
|  | 	rcache_free_tmp(t3);					\ | ||||||
|  | 	if (reg >= 0) {						\ | ||||||
|  | 		/* if (reg <= turns) turns = reg-1 */		\ | ||||||
|  | 		t3 = rcache_get_reg(reg, RC_GR_RMW, NULL);	\ | ||||||
|  | 		emith_cmp_r_r(t3, t2);				\ | ||||||
|  | 		EMITH_SJMP_START(DCOND_GT);			\ | ||||||
|  | 		emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1);	\ | ||||||
|  | 		EMITH_SJMP_END(DCOND_GT);			\ | ||||||
|  | 		/* if (reg <= 1) turns = 0 */			\ | ||||||
|  | 		emith_cmp_r_imm(t3, 1);				\ | ||||||
|  | 		EMITH_SJMP_START(DCOND_GT);			\ | ||||||
|  | 		emith_move_r_imm_c(DCOND_LE, t2, 0);		\ | ||||||
|  | 		EMITH_SJMP_END(DCOND_GT);			\ | ||||||
|  | 		/* reg -= turns */				\ | ||||||
|  | 		emith_sub_r_r(t3, t2);				\ | ||||||
|  | 	}							\ | ||||||
|  | 	/* sr.cycles -= turns * cycles; */			\ | ||||||
|  | 	emith_move_r_imm(t1, cycles);				\ | ||||||
|  | 	emith_mul_u64(t1, t2, t1, t2);				\ | ||||||
|  | 	emith_sub_r_r_r_lsl(sr, sr, t1, 12);			\ | ||||||
|  | 	EMITH_JMP_END(DCOND_LE);				\ | ||||||
|  | 	rcache_free_tmp(t1);					\ | ||||||
|  | 	rcache_free_tmp(t2);					\ | ||||||
|  | } while (0) | ||||||
|  | 
 | ||||||
| #define emith_write_sr(sr, srcr) do { \ | #define emith_write_sr(sr, srcr) do { \ | ||||||
| 	int tmp_ = rcache_get_tmp(); \ | 	int tmp_ = rcache_get_tmp(); \ | ||||||
| 	emith_clear_msb(tmp_, srcr, 22); \ | 	emith_clear_msb(tmp_, srcr, 22); \ | ||||||
|  |  | ||||||
|  | @ -41,6 +41,7 @@ | ||||||
| #define BRANCH_CACHE            1 | #define BRANCH_CACHE            1 | ||||||
| #define ALIAS_REGISTERS         1 | #define ALIAS_REGISTERS         1 | ||||||
| #define REMAP_REGISTER          1 | #define REMAP_REGISTER          1 | ||||||
|  | #define LOOP_DETECTION          1 | ||||||
| 
 | 
 | ||||||
| // limits (per block)
 | // limits (per block)
 | ||||||
| #define MAX_BLOCK_SIZE          (BLOCK_INSN_LIMIT * 6 * 6) | #define MAX_BLOCK_SIZE          (BLOCK_INSN_LIMIT * 6 * 6) | ||||||
|  | @ -135,6 +136,7 @@ enum op_types { | ||||||
|   OP_BRANCH_RF, // indirect far (PC + Rm)
 |   OP_BRANCH_RF, // indirect far (PC + Rm)
 | ||||||
|   OP_SETCLRT,   // T flag set/clear
 |   OP_SETCLRT,   // T flag set/clear
 | ||||||
|   OP_MOVE,      // register move
 |   OP_MOVE,      // register move
 | ||||||
|  |   OP_LOAD_CONST,// load const to register
 | ||||||
|   OP_LOAD_POOL, // literal pool load, imm is address
 |   OP_LOAD_POOL, // literal pool load, imm is address
 | ||||||
|   OP_MOVA, |   OP_MOVA, | ||||||
|   OP_SLEEP, |   OP_SLEEP, | ||||||
|  | @ -147,6 +149,9 @@ enum op_types { | ||||||
| #define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ | #define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ | ||||||
|                                 & BITMASK1(op)) |                                 & BITMASK1(op)) | ||||||
| #define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) & BITMASK1(op)) | #define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) & BITMASK1(op)) | ||||||
|  | #define OP_ISBRAIMM(op) (BITMASK3(OP_BRANCH, OP_BRANCH_CT, OP_BRANCH_CF) \ | ||||||
|  | 				& BITMASK1(op)) | ||||||
|  | #define OP_ISBRAIND(op) (BITMASK2(OP_BRANCH_R, OP_BRANCH_RF) & BITMASK1(op)) | ||||||
| 
 | 
 | ||||||
| #ifdef DRC_SH2 | #ifdef DRC_SH2 | ||||||
| 
 | 
 | ||||||
|  | @ -2537,7 +2542,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|   u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; |   u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; | ||||||
|   int branch_patch_count = 0; |   int branch_patch_count = 0; | ||||||
|   u8 op_flags[BLOCK_INSN_LIMIT]; |   u8 op_flags[BLOCK_INSN_LIMIT]; | ||||||
|   struct { |   struct drcf { | ||||||
|  |     int delay_reg:8; | ||||||
|  |     u32 loop_type:8; | ||||||
|     u32 test_irq:1; |     u32 test_irq:1; | ||||||
|     u32 pending_branch_direct:1; |     u32 pending_branch_direct:1; | ||||||
|     u32 pending_branch_indirect:1; |     u32 pending_branch_indirect:1; | ||||||
|  | @ -2556,7 +2563,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|   int tmp, tmp2; |   int tmp, tmp2; | ||||||
|   int cycles; |   int cycles; | ||||||
|   int i, v; |   int i, v; | ||||||
|   u32 u; |   u32 u, m1, m2; | ||||||
|   int op; |   int op; | ||||||
|   u16 crc; |   u16 crc; | ||||||
| 
 | 
 | ||||||
|  | @ -2603,14 +2610,64 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   // collect branch_targets that don't land on delay slots
 |   // collect branch_targets that don't land on delay slots
 | ||||||
|  |   m1 = m2 = v = op = 0; | ||||||
|   for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { |   for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { | ||||||
|     if (!(op_flags[i] & OF_BTARGET)) |     if (op_flags[i] & OF_DELAY_OP) | ||||||
|       continue; |  | ||||||
|     if (op_flags[i] & OF_DELAY_OP) { |  | ||||||
|       op_flags[i] &= ~OF_BTARGET; |       op_flags[i] &= ~OF_BTARGET; | ||||||
|       continue; |     if (op_flags[i] & OF_BTARGET) | ||||||
|  |       ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); | ||||||
|  | #if LOOP_DETECTION | ||||||
|  |     // loop types detected:
 | ||||||
|  |     // 1. target: ... BRA target -> idle loop
 | ||||||
|  |     // 2. target: ... delay insn ... BF target -> delay loop
 | ||||||
|  |     // 3. target: ... poll  insn ... BF/BT target -> poll loop
 | ||||||
|  |     // 4. target: ... poll  insn ... BF/BT exit ... BRA target, exit: -> poll
 | ||||||
|  |     // conditions:
 | ||||||
|  |     // a. no further branch targets between target and back jump.
 | ||||||
|  |     // b. no unconditional branch insn inside the loop.
 | ||||||
|  |     // c. exactly one poll or delay insn is allowed inside a delay/poll loop
 | ||||||
|  |     // (scan_block marks loops only if they meet conditions a through c)
 | ||||||
|  |     // d. idle loops do not modify anything but PC,SR and contain no branches
 | ||||||
|  |     // e. delay/poll loops do not modify anything but the concerned reg,PC,SR
 | ||||||
|  |     // f. loading constants into registers inside the loop is allowed
 | ||||||
|  |     // g. a delay/poll loop must have a conditional branch somewhere
 | ||||||
|  |     // h. an idle loop must not have a conditional branch
 | ||||||
|  |     if (op_flags[i] & OF_BTARGET) { | ||||||
|  |       // possible loop entry point
 | ||||||
|  |       drcf.loop_type = op_flags[i] & OF_LOOP; | ||||||
|  |       drcf.pending_branch_direct = drcf.pending_branch_indirect = 0; | ||||||
|  |       op = OF_IDLE_LOOP; // loop type
 | ||||||
|  |       v = i; | ||||||
|  |       m1 = m2 = 0; | ||||||
|     } |     } | ||||||
|     ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, break); |     if (drcf.loop_type) { | ||||||
|  |       // detect loop type, and store poll/delay register
 | ||||||
|  |       if (op_flags[i] & OF_POLL_INSN) { | ||||||
|  |         op = OF_POLL_LOOP; | ||||||
|  |         m1 |= ops[i].dest;   // loop poll/delay regs
 | ||||||
|  |       } else if (op_flags[i] & OF_DELAY_INSN) { | ||||||
|  |         op = OF_DELAY_LOOP; | ||||||
|  |         m1 |= ops[i].dest; | ||||||
|  |       } else if (ops[i].op != OP_LOAD_POOL && ops[i].op != OP_LOAD_CONST | ||||||
|  |               && (ops[i].op != OP_MOVE || op != OF_POLL_LOOP)) { | ||||||
|  |         // not (MOV @(PC) or MOV # or (MOV reg and poll)),   condition f
 | ||||||
|  |         m2 |= ops[i].dest;   // regs modified by other insns
 | ||||||
|  |       } | ||||||
|  |       // branch detector
 | ||||||
|  |       if (OP_ISBRAIMM(ops[i].op) && ops[i].imm == base_pc + 2*v) | ||||||
|  |         drcf.pending_branch_direct = 1;         // backward branch detected
 | ||||||
|  |       if (OP_ISBRACND(ops[i].op)) | ||||||
|  |         drcf.pending_branch_indirect = 1;       // conditions g,h - cond.branch
 | ||||||
|  |       // poll/idle loops terminate with their backwards branch to the loop start
 | ||||||
|  |       if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) { | ||||||
|  |         m2 &= ~(m1 | BITMASK2(SHR_PC, SHR_SR)); // conditions d,e + g,h
 | ||||||
|  |         if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect))) | ||||||
|  |           op = 0;                               // conditions not met
 | ||||||
|  |         op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type
 | ||||||
|  |         drcf.loop_type = 0; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   if (branch_target_count > 0) { |   if (branch_target_count > 0) { | ||||||
|  | @ -2634,6 +2691,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
| 
 | 
 | ||||||
|   // clear stale state after compile errors
 |   // clear stale state after compile errors
 | ||||||
|   rcache_invalidate(); |   rcache_invalidate(); | ||||||
|  |   drcf = (struct drcf) { 0 }; | ||||||
| 
 | 
 | ||||||
|   // -------------------------------------------------
 |   // -------------------------------------------------
 | ||||||
|   // 3rd pass: actual compilation
 |   // 3rd pass: actual compilation
 | ||||||
|  | @ -2653,8 +2711,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
| #endif | #endif | ||||||
| #if (DRC_DEBUG & 4) | #if (DRC_DEBUG & 4) | ||||||
|     DasmSH2(sh2dasm_buff, pc, op); |     DasmSH2(sh2dasm_buff, pc, op); | ||||||
|     printf("%c%08x %04x %s\n", (op_flags[i] & OF_BTARGET) ? '*' : ' ', |     if (op_flags[i] & OF_BTARGET) { | ||||||
|       pc, op, sh2dasm_buff); |       if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP)     tmp3 = '+'; | ||||||
|  |       else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; | ||||||
|  |       else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; | ||||||
|  |       else                                              tmp3 = '*'; | ||||||
|  |     } else if (drcf.loop_type)                          tmp3 = '.'; | ||||||
|  |     else                                                tmp3 = ' '; | ||||||
|  |     printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|     if (op_flags[i] & OF_BTARGET) |     if (op_flags[i] & OF_BTARGET) | ||||||
|  | @ -2702,6 +2766,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|       v = find_in_array(branch_target_pc, branch_target_count, pc); |       v = find_in_array(branch_target_pc, branch_target_count, pc); | ||||||
|       if (v >= 0) |       if (v >= 0) | ||||||
|         branch_target_ptr[v] = tcache_ptr; |         branch_target_ptr[v] = tcache_ptr; | ||||||
|  | #if LOOP_DETECTION | ||||||
|  |       drcf.loop_type = op_flags[i] & OF_LOOP; | ||||||
|  |       drcf.delay_reg = -1; | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
|       // must update PC
 |       // must update PC
 | ||||||
|       emit_move_r_imm32(SHR_PC, pc); |       emit_move_r_imm32(SHR_PC, pc); | ||||||
|  | @ -3388,6 +3456,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | ||||||
|           goto end_op; |           goto end_op; | ||||||
|         case 1: // DT Rn      0100nnnn00010000
 |         case 1: // DT Rn      0100nnnn00010000
 | ||||||
|           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|  | #if LOOP_DETECTION | ||||||
|  |           if (drcf.loop_type == OF_DELAY_LOOP) { | ||||||
|  |             if (drcf.delay_reg == -1) | ||||||
|  |               drcf.delay_reg = GET_Rn(); | ||||||
|  |             else | ||||||
|  |               drcf.loop_type = 0; | ||||||
|  |           } | ||||||
|  | #endif | ||||||
|           emith_bic_r_imm(sr, T); |           emith_bic_r_imm(sr, T); | ||||||
|           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); |           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); | ||||||
|           emith_subf_r_r_imm(tmp, tmp2, 1); |           emith_subf_r_r_imm(tmp, tmp2, 1); | ||||||
|  | @ -3832,7 +3908,7 @@ end_op: | ||||||
|       drcf.test_irq = 0; |       drcf.test_irq = 0; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // branch handling (with/without delay)
 |     // branch handling
 | ||||||
|     if (drcf.pending_branch_direct) |     if (drcf.pending_branch_direct) | ||||||
|     { |     { | ||||||
|       struct op_data *opd_b = |       struct op_data *opd_b = | ||||||
|  | @ -3846,6 +3922,16 @@ end_op: | ||||||
|         ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; |         ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; | ||||||
|       } |       } | ||||||
|       cycles += ctaken; // assume branch taken
 |       cycles += ctaken; // assume branch taken
 | ||||||
|  | #if LOOP_DETECTION | ||||||
|  |       if ((drcf.loop_type == OF_IDLE_LOOP || | ||||||
|  |           (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) | ||||||
|  |       { | ||||||
|  |         // idle or delay loop
 | ||||||
|  |         emith_sh2_delay_loop(cycles, drcf.delay_reg); | ||||||
|  |         drcf.loop_type = 0; | ||||||
|  |       } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|       FLUSH_CYCLES(sr); |       FLUSH_CYCLES(sr); | ||||||
|       rcache_clean(); |       rcache_clean(); | ||||||
|  | @ -3902,6 +3988,8 @@ end_op: | ||||||
|         emith_add_r_imm(sr, ctaken << 12); |         emith_add_r_imm(sr, ctaken << 12); | ||||||
| 
 | 
 | ||||||
|       drcf.pending_branch_direct = 0; |       drcf.pending_branch_direct = 0; | ||||||
|  |       if (target_pc >= base_pc && target_pc < pc) | ||||||
|  |         drcf.loop_type = 0; | ||||||
|     } |     } | ||||||
|     else if (drcf.pending_branch_indirect) { |     else if (drcf.pending_branch_indirect) { | ||||||
|       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); |       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); | ||||||
|  | @ -3909,6 +3997,7 @@ end_op: | ||||||
|       rcache_flush(); |       rcache_flush(); | ||||||
|       emith_jump(sh2_drc_dispatcher); |       emith_jump(sh2_drc_dispatcher); | ||||||
|       drcf.pending_branch_indirect = 0; |       drcf.pending_branch_indirect = 0; | ||||||
|  |       drcf.loop_type = 0; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     do_host_disasm(tcache_id); |     do_host_disasm(tcache_id); | ||||||
|  | @ -4729,6 +4818,9 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, | ||||||
|   int end_block = 0; |   int end_block = 0; | ||||||
|   int i, i_end; |   int i, i_end; | ||||||
|   u32 crc = 0; |   u32 crc = 0; | ||||||
|  |   // 2nd pass stuff
 | ||||||
|  |   int last_btarget; // loop detector 
 | ||||||
|  |   enum { T_UNKNOWN, T_CLEAR, T_SET } t; // T propagation state
 | ||||||
| 
 | 
 | ||||||
|   memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT); |   memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT); | ||||||
|   op_flags[0] |= OF_BTARGET; // block start is always a target
 |   op_flags[0] |= OF_BTARGET; // block start is always a target
 | ||||||
|  | @ -4903,6 +4995,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, | ||||||
|       case 0x0e: // MOV.L    @(R0,Rm),Rn      0000nnnnmmmm1110
 |       case 0x0e: // MOV.L    @(R0,Rm),Rn      0000nnnnmmmm1110
 | ||||||
|         opd->source = BITMASK3(GET_Rm(), SHR_R0, SHR_MEM); |         opd->source = BITMASK3(GET_Rm(), SHR_R0, SHR_MEM); | ||||||
|         opd->dest = BITMASK1(GET_Rn()); |         opd->dest = BITMASK1(GET_Rn()); | ||||||
|  |         op_flags[i] |= OF_POLL_INSN; | ||||||
|         break; |         break; | ||||||
|       case 0x0f: // MAC.L   @Rm+,@Rn+  0000nnnnmmmm1111
 |       case 0x0f: // MAC.L   @Rm+,@Rn+  0000nnnnmmmm1111
 | ||||||
|         opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM); |         opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM); | ||||||
|  | @ -5027,6 +5120,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, | ||||||
|         case 1: // DT Rn      0100nnnn00010000
 |         case 1: // DT Rn      0100nnnn00010000
 | ||||||
|           opd->source = BITMASK1(GET_Rn()); |           opd->source = BITMASK1(GET_Rn()); | ||||||
|           opd->dest = BITMASK2(GET_Rn(), SHR_T); |           opd->dest = BITMASK2(GET_Rn(), SHR_T); | ||||||
|  |           op_flags[i] |= OF_DELAY_INSN; | ||||||
|           break; |           break; | ||||||
|         default: |         default: | ||||||
|           goto undefined; |           goto undefined; | ||||||
|  | @ -5235,6 +5329,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, | ||||||
|       opd->source = BITMASK2(GET_Rm(), SHR_MEM); |       opd->source = BITMASK2(GET_Rm(), SHR_MEM); | ||||||
|       opd->dest = BITMASK1(GET_Rn()); |       opd->dest = BITMASK1(GET_Rn()); | ||||||
|       opd->imm = (op & 0x0f) * 4; |       opd->imm = (op & 0x0f) * 4; | ||||||
|  |       op_flags[i] |= OF_POLL_INSN; | ||||||
|       break; |       break; | ||||||
| 
 | 
 | ||||||
|     /////////////////////////////////////////////
 |     /////////////////////////////////////////////
 | ||||||
|  | @ -5252,6 +5347,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, | ||||||
|       case 0x02: // MOV.L @Rm,Rn        0110nnnnmmmm0010
 |       case 0x02: // MOV.L @Rm,Rn        0110nnnnmmmm0010
 | ||||||
|         opd->dest = BITMASK1(GET_Rn()); |         opd->dest = BITMASK1(GET_Rn()); | ||||||
|         opd->source = BITMASK2(GET_Rm(), SHR_MEM); |         opd->source = BITMASK2(GET_Rm(), SHR_MEM); | ||||||
|  |         op_flags[i] |= OF_POLL_INSN; | ||||||
|         break; |         break; | ||||||
|       case 0x0a: // NEGC   Rm,Rn        0110nnnnmmmm1010
 |       case 0x0a: // NEGC   Rm,Rn        0110nnnnmmmm1010
 | ||||||
|         opd->source = BITMASK2(GET_Rm(), SHR_T); |         opd->source = BITMASK2(GET_Rm(), SHR_T); | ||||||
|  | @ -5394,6 +5490,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, | ||||||
|         opd->dest = BITMASK1(SHR_R0); |         opd->dest = BITMASK1(SHR_R0); | ||||||
|         opd->size = (op & 0x300) >> 8; |         opd->size = (op & 0x300) >> 8; | ||||||
|         opd->imm = (op & 0xff) << opd->size; |         opd->imm = (op & 0xff) << opd->size; | ||||||
|  |         op_flags[i] |= OF_POLL_INSN; | ||||||
|         break; |         break; | ||||||
|       case 0x0300: // TRAPA #imm      11000011iiiiiiii
 |       case 0x0300: // TRAPA #imm      11000011iiiiiiii
 | ||||||
|         opd->op = OP_TRAPA; |         opd->op = OP_TRAPA; | ||||||
|  | @ -5481,6 +5578,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, | ||||||
|     /////////////////////////////////////////////
 |     /////////////////////////////////////////////
 | ||||||
|     case 0x0e: |     case 0x0e: | ||||||
|       // MOV #imm,Rn   1110nnnniiiiiiii
 |       // MOV #imm,Rn   1110nnnniiiiiiii
 | ||||||
|  |       opd->op = OP_LOAD_CONST; | ||||||
|       opd->dest = BITMASK1(GET_Rn()); |       opd->dest = BITMASK1(GET_Rn()); | ||||||
|       opd->imm = (s8)op; |       opd->imm = (s8)op; | ||||||
|       break; |       break; | ||||||
|  | @ -5517,32 +5615,29 @@ end: | ||||||
| 
 | 
 | ||||||
|   // 2nd pass: some analysis
 |   // 2nd pass: some analysis
 | ||||||
|   lowest_literal = end_literals = lowest_mova = 0; |   lowest_literal = end_literals = lowest_mova = 0; | ||||||
|  |   t = T_UNKNOWN; | ||||||
|  |   last_btarget = 0; | ||||||
|  |   op = 0; // delay/poll insns counter
 | ||||||
|   for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { |   for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { | ||||||
|     opd = &ops[i]; |     opd = &ops[i]; | ||||||
|     crc += FETCH_OP(pc); |     crc += FETCH_OP(pc); | ||||||
| 
 | 
 | ||||||
|     // propagate T (TODO: DIV0U)
 |     // propagate T (TODO: DIV0U)
 | ||||||
|     if ((opd->op == OP_SETCLRT && !opd->imm) || opd->op == OP_BRANCH_CT) |  | ||||||
|       op_flags[i + 1] |= OF_T_CLEAR; |  | ||||||
|     else if ((opd->op == OP_SETCLRT && opd->imm) || opd->op == OP_BRANCH_CF) |  | ||||||
|       op_flags[i + 1] |= OF_T_SET; |  | ||||||
| 
 |  | ||||||
|     if ((op_flags[i] & OF_BTARGET) || (opd->dest & BITMASK1(SHR_T))) |     if ((op_flags[i] & OF_BTARGET) || (opd->dest & BITMASK1(SHR_T))) | ||||||
|       op_flags[i] &= ~(OF_T_SET | OF_T_CLEAR); |       t = T_UNKNOWN; | ||||||
|     else |  | ||||||
|       op_flags[i + 1] |= op_flags[i] & (OF_T_SET | OF_T_CLEAR); |  | ||||||
| 
 | 
 | ||||||
|     if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_CLEAR)) || |     if ((opd->op == OP_BRANCH_CT && t == T_SET) || | ||||||
|         (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_SET))) |         (opd->op == OP_BRANCH_CF && t == T_CLEAR)) { | ||||||
|       opd->op = OP_BRANCH_N; |  | ||||||
|     else if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_SET)) || |  | ||||||
|              (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_CLEAR))) { |  | ||||||
|       opd->op = OP_BRANCH; |       opd->op = OP_BRANCH; | ||||||
|       if (op_flags[i + 1] & OF_DELAY_OP) |       opd->cycles = (op_flags[i + 1] & OF_DELAY_OP) ? 2 : 3; | ||||||
|         opd->cycles = 2; |     } else if ((opd->op == OP_BRANCH_CT && t == T_CLEAR) || | ||||||
|       else |                (opd->op == OP_BRANCH_CF && t == T_SET)) | ||||||
|         opd->cycles = 3; |       opd->op = OP_BRANCH_N; | ||||||
|     } |     else if ((opd->op == OP_SETCLRT && !opd->imm) || opd->op == OP_BRANCH_CT) | ||||||
|  |       t = T_CLEAR; | ||||||
|  |     else if ((opd->op == OP_SETCLRT && opd->imm) || opd->op == OP_BRANCH_CF) | ||||||
|  |       t = T_SET; | ||||||
|  | 
 | ||||||
|     // "overscan" detection: unreachable code after unconditional branch
 |     // "overscan" detection: unreachable code after unconditional branch
 | ||||||
|     // this can happen if the insn after a forward branch isn't a local target
 |     // this can happen if the insn after a forward branch isn't a local target
 | ||||||
|     if (OP_ISBRAUC(opd->op)) { |     if (OP_ISBRAUC(opd->op)) { | ||||||
|  | @ -5575,6 +5670,32 @@ end: | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  | #if LOOP_DETECTION | ||||||
|  |     // inner loop detection
 | ||||||
|  |     // 1. a loop always starts with a branch target (for the backwards jump)
 | ||||||
|  |     // 2. it doesn't contain more than one polling and/or delaying insn
 | ||||||
|  |     // 3. it doesn't contain unconditional jumps
 | ||||||
|  |     // 4. no overlapping of loops
 | ||||||
|  |     if (op_flags[i] & OF_BTARGET) { | ||||||
|  |       last_btarget = i;         // possible loop starting point
 | ||||||
|  |       op = 0; | ||||||
|  |     } | ||||||
|  |     // XXX let's hope nobody is putting a delay or poll insn in a delay slot :-/
 | ||||||
|  |     if (OP_ISBRAIMM(opd->op)) { | ||||||
|  |       // BSR, BRA, BT, BF with immediate target
 | ||||||
|  |       int i_tmp = (opd->imm - base_pc) / 2; // branch target, index in ops
 | ||||||
|  |       if (i_tmp == last_btarget && op <= 1) { | ||||||
|  |         op_flags[i_tmp] |= OF_LOOP; // conditions met -> mark loop
 | ||||||
|  |         last_btarget = i+1;     // condition 4
 | ||||||
|  |       } else if (opd->op == OP_BRANCH) | ||||||
|  |         last_btarget = i+1;     // condition 3
 | ||||||
|  |     } | ||||||
|  |     else if (OP_ISBRAIND(opd->op)) | ||||||
|  |       // BRAF, BSRF, JMP, JSR, register indirect. treat it as off-limits jump
 | ||||||
|  |       last_btarget = i+1;       // condition 3
 | ||||||
|  |     else if (op_flags[i] & (OF_POLL_INSN|OF_DELAY_INSN)) | ||||||
|  |       op ++;                    // condition 2 
 | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
|   end_pc = base_pc + i_end * 2; |   end_pc = base_pc + i_end * 2; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -18,9 +18,14 @@ void sh2_drc_frame(void); | ||||||
| /* op_flags */ | /* op_flags */ | ||||||
| #define OF_DELAY_OP   (1 << 0) | #define OF_DELAY_OP   (1 << 0) | ||||||
| #define OF_BTARGET    (1 << 1) | #define OF_BTARGET    (1 << 1) | ||||||
| #define OF_T_SET      (1 << 2) // T is known to be set
 | #define OF_LOOP       (3 << 2) // NONE, IDLE, DELAY, POLL loop
 | ||||||
| #define OF_T_CLEAR    (1 << 3) // ... clear
 |  | ||||||
| #define OF_B_IN_DS    (1 << 4) | #define OF_B_IN_DS    (1 << 4) | ||||||
|  | #define OF_DELAY_INSN (1 << 5) // DT, (TODO ADD+CMP?)
 | ||||||
|  | #define OF_POLL_INSN  (1 << 6) // MOV @(...),Rn (no post increment), TST @(...)
 | ||||||
|  | 
 | ||||||
|  | #define OF_IDLE_LOOP  (1 << 2) | ||||||
|  | #define OF_DELAY_LOOP (2 << 2) | ||||||
|  | #define OF_POLL_LOOP  (3 << 2) | ||||||
| 
 | 
 | ||||||
| unsigned short scan_block(unsigned int base_pc, int is_slave, | unsigned short scan_block(unsigned int base_pc, int is_slave, | ||||||
| 		unsigned char *op_flags, unsigned int *end_pc, | 		unsigned char *op_flags, unsigned int *end_pc, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 kub
						kub