mirror of
				https://github.com/RaySollium99/picodrive.git
				synced 2025-10-26 08:19:38 -04:00 
			
		
		
		
	sh2: optimisations in drc
This commit is contained in:
		
							parent
							
								
									39c5ec3f4c
								
							
						
					
					
						commit
						904fb98e6c
					
				
					 2 changed files with 72 additions and 51 deletions
				
			
		|  | @ -17,18 +17,18 @@ void drc_cmn_cleanup(void); | |||
| // binary search approach, since we don't have CLZ on ARM920T
 | ||||
| #define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ | ||||
|   u32 __mask = mask; \ | ||||
|   for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \ | ||||
|     if (!(__mask & (0xffff << 16))) \ | ||||
|       bit -= 16, __mask <<= 16; \ | ||||
|     if (!(__mask & (0xff << 24))) \ | ||||
|       bit -= 8, __mask <<= 8; \ | ||||
|     if (!(__mask & (0xf << 28))) \ | ||||
|       bit -= 4, __mask <<= 4; \ | ||||
|     if (!(__mask & (0x3 << 30))) \ | ||||
|       bit -= 2, __mask <<= 2; \ | ||||
|     if (!(__mask & (0x1 << 31))) \ | ||||
|       bit -= 1, __mask <<= 1; \ | ||||
|     if (__mask & (0x1 << 31)) { \ | ||||
|   for (bit = 0; bit < 32 && mask; bit++, __mask >>= 1) { \ | ||||
|     if (!(__mask & 0xffff)) \ | ||||
|       bit += 16,__mask >>= 16; \ | ||||
|     if (!(__mask & 0xff)) \ | ||||
|       bit += 8, __mask >>= 8; \ | ||||
|     if (!(__mask & 0xf)) \ | ||||
|       bit += 4, __mask >>= 4; \ | ||||
|     if (!(__mask & 0x3)) \ | ||||
|       bit += 2, __mask >>= 2; \ | ||||
|     if (!(__mask & 0x1)) \ | ||||
|       bit += 1, __mask >>= 1; \ | ||||
|     if (__mask & 0x1) { \ | ||||
|       code; \ | ||||
|     } \ | ||||
|   } \ | ||||
|  |  | |||
|  | @ -1549,21 +1549,30 @@ static u32 rcache_regs_clean;    // regs needing cleaning | |||
| static void rcache_lock_vreg(int x) | ||||
| { | ||||
|   if (x >= 0) { | ||||
|     cache_regs[x].locked ++; | ||||
| #if DRC_DEBUG & 64 | ||||
|     if (cache_regs[x].type == HR_FREE) { | ||||
|       printf("locking free vreg %x, aborting\n", x); | ||||
|       exit(1); | ||||
|     } | ||||
|     cache_regs[x].locked ++; | ||||
|     if (!cache_regs[x].locked) { | ||||
|       printf("locking overflow vreg %x, aborting\n", x); | ||||
|       exit(1); | ||||
|     } | ||||
| #endif | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| static void rcache_unlock_vreg(int x) | ||||
| { | ||||
|   if (x >= 0) { | ||||
| #if DRC_DEBUG & 64 | ||||
|     if (cache_regs[x].type == HR_FREE) { | ||||
|       printf("unlocking free vreg %x, aborting\n", x); | ||||
|       exit(1); | ||||
|     } | ||||
| #endif | ||||
|     if (cache_regs[x].locked) | ||||
|       cache_regs[x].locked --; | ||||
|   } | ||||
| } | ||||
|  | @ -1582,7 +1591,7 @@ static void rcache_unmap_vreg(int x) | |||
|   FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, | ||||
|       if (guest_regs[i].flags & GRF_DIRTY) { | ||||
|         // if a dirty reg is unmapped save its value to context
 | ||||
|         if (~rcache_regs_discard & (1 << i)) | ||||
|         if ((~rcache_regs_discard | rcache_regs_now) & (1 << i)) | ||||
|           emith_ctx_write(cache_regs[x].hreg, i * 4); | ||||
|         guest_regs[i].flags &= ~GRF_DIRTY; | ||||
|       } | ||||
|  | @ -1700,26 +1709,28 @@ static int rcache_allocate(int what, int minprio) | |||
|       continue; | ||||
|     if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) { | ||||
|       // REG is free
 | ||||
|       prio = 6; | ||||
|       prio = 10; | ||||
|       oldest = i; | ||||
|       break; | ||||
|     } | ||||
|     if (cache_regs[i].type == HR_CACHED) { | ||||
|       if (rcache_regs_now & cache_regs[i].gregs) | ||||
|         // REGs needed for the current insn
 | ||||
|         i_prio = 1; | ||||
|         i_prio = 0; | ||||
|       else if (rcache_regs_soon & cache_regs[i].gregs) | ||||
|         // REGs needed in the next insns
 | ||||
|         i_prio = 2; | ||||
|       else if (rcache_regs_late & cache_regs[i].gregs) | ||||
|         // REGs needed in some future insn
 | ||||
|         i_prio = 3; | ||||
|       else if (!(~rcache_regs_discard & cache_regs[i].gregs)) | ||||
|         // REGs not needed in the foreseeable future
 | ||||
|         i_prio = 4; | ||||
|       else if (~rcache_regs_discard & cache_regs[i].gregs) | ||||
|         // REGs not needed in the foreseeable future
 | ||||
|         i_prio = 6; | ||||
|       else | ||||
|         // REGs soon overwritten anyway
 | ||||
|         i_prio = 5; | ||||
|         i_prio = 8; | ||||
|       if (!(cache_regs[i].flags & HRF_DIRTY)) i_prio ++; | ||||
| 
 | ||||
|       if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { | ||||
|         min_stamp = cache_regs[i].stamp; | ||||
|         oldest = i; | ||||
|  | @ -1744,21 +1755,21 @@ static int rcache_allocate_vreg(int needed) | |||
| { | ||||
|   int x; | ||||
|    | ||||
|   x = rcache_allocate(1, needed ? 0 : 3); | ||||
|   x = rcache_allocate(1, needed ? 0 : 4); | ||||
|   if (x < 0) | ||||
|     x = rcache_allocate(-1, 1); | ||||
|     x = rcache_allocate(-1, 0); | ||||
|   return x; | ||||
| } | ||||
| 
 | ||||
| static int rcache_allocate_nontemp(void) | ||||
| { | ||||
|   int x = rcache_allocate(0, 3); | ||||
|   int x = rcache_allocate(0, 4); | ||||
|   return x; | ||||
| } | ||||
| 
 | ||||
| static int rcache_allocate_temp(void) | ||||
| { | ||||
|   int x = rcache_allocate(-1, 1); | ||||
|   int x = rcache_allocate(-1, 0); | ||||
|   if (x < 0) | ||||
|     x = rcache_allocate(0, 0); | ||||
|   return x; | ||||
|  | @ -1821,20 +1832,25 @@ static void rcache_remap_vreg(int x) | |||
|   int d; | ||||
| 
 | ||||
|   // x must be a cached vreg
 | ||||
|   if (cache_regs[x].type != HR_CACHED) | ||||
|   if (cache_regs[x].type != HR_CACHED || cache_regs[x].locked) | ||||
|     return; | ||||
|   // don't do it if x is already a REG or isn't used or to be cleaned anyway
 | ||||
|   if ((cache_regs[x].htype & HRT_REG) || | ||||
|       !(rsl_d & cache_regs[x].gregs)) { | ||||
|   // don't do it if x isn't used
 | ||||
|   if (!(rsl_d & cache_regs[x].gregs)) { | ||||
|     // clean here to avoid data loss on invalidation
 | ||||
|     rcache_clean_vreg(x); | ||||
|     return; | ||||
|   } | ||||
| 
 | ||||
|   if (cache_regs[x].locked) { | ||||
|     printf("remap vreg %d is locked\n", x); | ||||
|     exit(1); | ||||
|   FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, d, | ||||
|     if ((guest_regs[d].flags & (GRF_STATIC|GRF_PINNED)) && | ||||
|         !cache_regs[guest_regs[d].sreg].locked && | ||||
|         !((rsl_d|rcache_regs_now) & cache_regs[guest_regs[d].sreg].gregs)) { | ||||
|       // STATIC not in its sreg and sreg is available
 | ||||
|       rcache_evict_vreg(guest_regs[d].sreg); | ||||
|       rcache_move_vreg(guest_regs[d].sreg, x); | ||||
|       return; | ||||
|     } | ||||
|   ) | ||||
| 
 | ||||
|   // allocate a non-TEMP vreg
 | ||||
|   rcache_lock_vreg(x); // lock to avoid evicting x
 | ||||
|  | @ -1891,8 +1907,8 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr | |||
| { | ||||
|   int src, dst, ali; | ||||
|   cache_reg_t *tr; | ||||
|   u32 rsp_d = (rcache_regs_now | rcache_regs_soon | | ||||
|                rcache_regs_static | rcache_regs_pinned) & ~rcache_regs_discard; | ||||
|   u32 rsp_d = (rcache_regs_soon | rcache_regs_static | rcache_regs_pinned) & | ||||
|                ~rcache_regs_discard; | ||||
| 
 | ||||
|   dst = src = guest_regs[r].vreg; | ||||
| 
 | ||||
|  | @ -1901,7 +1917,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr | |||
|   if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && | ||||
|       src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) && | ||||
|       !cache_regs[guest_regs[r].sreg].locked && | ||||
|       !(rsp_d & cache_regs[guest_regs[r].sreg].gregs)) { | ||||
|       !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[r].sreg].gregs)) { | ||||
|     dst = guest_regs[r].sreg; | ||||
|     rcache_evict_vreg(dst); | ||||
|   } else if (dst < 0) { | ||||
|  | @ -1926,7 +1942,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr | |||
|   ali = tr->gregs & ~(1 << r); | ||||
|   if (mode != RC_GR_READ && src == dst && ali) { | ||||
|     int x = -1; | ||||
|     if (rsp_d & ali) { | ||||
|     if ((rsp_d|rcache_regs_now) & ali) { | ||||
|       if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && | ||||
|           guest_regs[r].sreg == dst && !tr->locked) { | ||||
|         // split aliases if r is STATIC in sreg and dst isn't already locked
 | ||||
|  | @ -1935,7 +1951,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr | |||
|           if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) && | ||||
|               !(ali & ~(1 << t)) && | ||||
|               !cache_regs[guest_regs[t].sreg].locked && | ||||
|               !(rsp_d & cache_regs[guest_regs[t].sreg].gregs)) { | ||||
|               !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[t].sreg].gregs)) { | ||||
|             // alias is a single STATIC and its sreg is available
 | ||||
|             x = guest_regs[t].sreg; | ||||
|             rcache_evict_vreg(x); | ||||
|  | @ -1947,8 +1963,9 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr | |||
|           break; | ||||
|         ) | ||||
|         if (x >= 0) { | ||||
|           src = x; | ||||
|           rcache_move_vreg(src, dst); | ||||
|           rcache_remove_vreg_alias(src, r); | ||||
|           src = dst; | ||||
|           rcache_move_vreg(x, dst); | ||||
|         } | ||||
|       } else { | ||||
|         // split r
 | ||||
|  | @ -1956,6 +1973,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr | |||
|         x = rcache_allocate_vreg(rsp_d & (1 << r)); | ||||
|         rcache_unlock_vreg(src); | ||||
|         if (x >= 0) { | ||||
|           rcache_remove_vreg_alias(src, r); | ||||
|           dst = x; | ||||
|           tr = &cache_regs[dst]; | ||||
|           tr->stamp = rcache_counter; | ||||
|  | @ -1965,8 +1983,6 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr | |||
|     if (x < 0) | ||||
|       // aliases not needed or no vreg available, remove them
 | ||||
|       rcache_evict_vreg_aliases(dst, r); | ||||
|     else if (src != dst) | ||||
|       rcache_remove_vreg_alias(src, r); | ||||
|   } | ||||
| 
 | ||||
|   // assign r to dst
 | ||||
|  | @ -2342,13 +2358,16 @@ static void rcache_clean_tmp(void) | |||
| static void rcache_clean_masked(u32 mask) | ||||
| { | ||||
|   int i, r, hr; | ||||
|   u32 m; | ||||
| 
 | ||||
|   rcache_regs_clean |= mask; | ||||
|   mask = rcache_regs_clean; | ||||
| 
 | ||||
|   // clean constants where all aliases are covered by the mask
 | ||||
|   // clean constants where all aliases are covered by the mask, exempt statics
 | ||||
|   // to avoid flushing them to context if sreg isn't available
 | ||||
|   m = mask & ~(rcache_regs_static | rcache_regs_pinned); | ||||
|   for (i = 0; i < ARRAY_SIZE(gconsts); i++) | ||||
|     if ((gconsts[i].gregs & mask) && !(gconsts[i].gregs & ~mask)) { | ||||
|     if ((gconsts[i].gregs & m) && !(gconsts[i].gregs & ~mask)) { | ||||
|       FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r, | ||||
|           if (guest_regs[r].flags & GRF_CDIRTY) { | ||||
|             hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); | ||||
|  | @ -2479,6 +2498,9 @@ static void rcache_create(void) | |||
|   } | ||||
| 
 | ||||
|   // create static host register mapping for SH2 regs
 | ||||
|   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { | ||||
|     guest_regs[i] = (guest_reg_t){.sreg = -1}; | ||||
|   } | ||||
|   for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) { | ||||
|     for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--) | ||||
|       if (cache_regs[x].hreg == regs_static[i+1])	break; | ||||
|  | @ -2486,8 +2508,7 @@ static void rcache_create(void) | |||
|       guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x}; | ||||
|       rcache_regs_static |= (1 << regs_static[i]); | ||||
|       rcache_vregs_reg &= ~(1 << x); | ||||
|     } else | ||||
|       guest_regs[regs_static[i]] = (guest_reg_t){.sreg = -1}; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n", | ||||
|  | @ -3501,7 +3522,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) | |||
|     rcache_set_usage_now(opd[0].source);   // current insn
 | ||||
|     rcache_set_usage_soon(soon);           // insns 1-4
 | ||||
|     rcache_set_usage_late(late & ~soon);   // insns 5-9
 | ||||
|     rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); | ||||
|     rcache_set_usage_discard(write & ~(late|soon)); | ||||
|     if (v <= 9) | ||||
|       // upcoming rcache_flush, start writing back unused dirty stuff
 | ||||
|       rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); | ||||
|  | @ -4717,7 +4738,7 @@ end_op: | |||
| 
 | ||||
|       // branch not taken, correct cycle count
 | ||||
|       if (ctaken) | ||||
|         emith_add_r_imm(sr, ctaken << 12); | ||||
|         cycles -= ctaken; | ||||
|       // set T bit to reflect branch not taken for OP_BRANCH_CT/CF
 | ||||
|       if (emith_get_t_cond() >= 0) // T is synced for all other cases
 | ||||
|         emith_set_t(sr, opd_b->op == OP_BRANCH_CF); | ||||
|  | @ -5263,11 +5284,11 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) | |||
|     start_lit = block->addr_lit & wtmask; | ||||
|     end_lit = start_lit + block->size_lit; | ||||
|     // disable/delete block if it covers the modified address
 | ||||
|     if ((start_addr <= a+len && a < end_addr) || | ||||
|         (start_lit <= a+len && a < end_lit)) | ||||
|     if ((start_addr < a+len && a < end_addr) || | ||||
|         (start_lit < a+len && a < end_lit)) | ||||
|     { | ||||
|       dbg(2, "smc remove @%08x", a); | ||||
|       end_addr = (start_lit <= a+len && block->size_lit ? a : 0); | ||||
|       end_addr = (start_lit < a+len && block->size_lit ? a : 0); | ||||
|       dr_rm_block_entry(block, tcache_id, end_addr, 0); | ||||
| #if (DRC_DEBUG & 2) | ||||
|       removed = 1; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 kub
						kub