sh2: optimisations in drc

This commit is contained in:
kub 2020-05-06 23:06:10 +02:00
parent 39c5ec3f4c
commit 904fb98e6c
2 changed files with 72 additions and 51 deletions

View file

@ -1549,22 +1549,31 @@ static u32 rcache_regs_clean; // regs needing cleaning
static void rcache_lock_vreg(int x)
{
if (x >= 0) {
cache_regs[x].locked ++;
#if DRC_DEBUG & 64
if (cache_regs[x].type == HR_FREE) {
printf("locking free vreg %x, aborting\n", x);
exit(1);
}
cache_regs[x].locked ++;
if (!cache_regs[x].locked) {
printf("locking overflow vreg %x, aborting\n", x);
exit(1);
}
#endif
}
}
static void rcache_unlock_vreg(int x)
{
if (x >= 0) {
#if DRC_DEBUG & 64
if (cache_regs[x].type == HR_FREE) {
printf("unlocking free vreg %x, aborting\n", x);
exit(1);
}
cache_regs[x].locked --;
#endif
if (cache_regs[x].locked)
cache_regs[x].locked --;
}
}
@ -1582,7 +1591,7 @@ static void rcache_unmap_vreg(int x)
FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i,
if (guest_regs[i].flags & GRF_DIRTY) {
// if a dirty reg is unmapped save its value to context
if (~rcache_regs_discard & (1 << i))
if ((~rcache_regs_discard | rcache_regs_now) & (1 << i))
emith_ctx_write(cache_regs[x].hreg, i * 4);
guest_regs[i].flags &= ~GRF_DIRTY;
}
@ -1700,26 +1709,28 @@ static int rcache_allocate(int what, int minprio)
continue;
if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) {
// REG is free
prio = 6;
prio = 10;
oldest = i;
break;
}
if (cache_regs[i].type == HR_CACHED) {
if (rcache_regs_now & cache_regs[i].gregs)
// REGs needed for the current insn
i_prio = 1;
i_prio = 0;
else if (rcache_regs_soon & cache_regs[i].gregs)
// REGs needed in the next insns
i_prio = 2;
else if (rcache_regs_late & cache_regs[i].gregs)
// REGs needed in some future insn
i_prio = 3;
else if (!(~rcache_regs_discard & cache_regs[i].gregs))
// REGs not needed in the foreseeable future
i_prio = 4;
else if (~rcache_regs_discard & cache_regs[i].gregs)
// REGs not needed in the foreseeable future
i_prio = 6;
else
// REGs soon overwritten anyway
i_prio = 5;
i_prio = 8;
if (!(cache_regs[i].flags & HRF_DIRTY)) i_prio ++;
if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) {
min_stamp = cache_regs[i].stamp;
oldest = i;
@ -1744,21 +1755,21 @@ static int rcache_allocate_vreg(int needed)
{
int x;
x = rcache_allocate(1, needed ? 0 : 3);
x = rcache_allocate(1, needed ? 0 : 4);
if (x < 0)
x = rcache_allocate(-1, 1);
x = rcache_allocate(-1, 0);
return x;
}
static int rcache_allocate_nontemp(void)
{
int x = rcache_allocate(0, 3);
int x = rcache_allocate(0, 4);
return x;
}
static int rcache_allocate_temp(void)
{
int x = rcache_allocate(-1, 1);
int x = rcache_allocate(-1, 0);
if (x < 0)
x = rcache_allocate(0, 0);
return x;
@ -1821,20 +1832,25 @@ static void rcache_remap_vreg(int x)
int d;
// x must be a cached vreg
if (cache_regs[x].type != HR_CACHED)
if (cache_regs[x].type != HR_CACHED || cache_regs[x].locked)
return;
// don't do it if x is already a REG or isn't used or to be cleaned anyway
if ((cache_regs[x].htype & HRT_REG) ||
!(rsl_d & cache_regs[x].gregs)) {
// don't do it if x isn't used
if (!(rsl_d & cache_regs[x].gregs)) {
// clean here to avoid data loss on invalidation
rcache_clean_vreg(x);
return;
}
if (cache_regs[x].locked) {
printf("remap vreg %d is locked\n", x);
exit(1);
}
FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, d,
if ((guest_regs[d].flags & (GRF_STATIC|GRF_PINNED)) &&
!cache_regs[guest_regs[d].sreg].locked &&
!((rsl_d|rcache_regs_now) & cache_regs[guest_regs[d].sreg].gregs)) {
// STATIC not in its sreg and sreg is available
rcache_evict_vreg(guest_regs[d].sreg);
rcache_move_vreg(guest_regs[d].sreg, x);
return;
}
)
// allocate a non-TEMP vreg
rcache_lock_vreg(x); // lock to avoid evicting x
@ -1891,8 +1907,8 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
{
int src, dst, ali;
cache_reg_t *tr;
u32 rsp_d = (rcache_regs_now | rcache_regs_soon |
rcache_regs_static | rcache_regs_pinned) & ~rcache_regs_discard;
u32 rsp_d = (rcache_regs_soon | rcache_regs_static | rcache_regs_pinned) &
~rcache_regs_discard;
dst = src = guest_regs[r].vreg;
@ -1901,7 +1917,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) &&
!cache_regs[guest_regs[r].sreg].locked &&
!(rsp_d & cache_regs[guest_regs[r].sreg].gregs)) {
!((rsp_d|rcache_regs_now) & cache_regs[guest_regs[r].sreg].gregs)) {
dst = guest_regs[r].sreg;
rcache_evict_vreg(dst);
} else if (dst < 0) {
@ -1926,7 +1942,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
ali = tr->gregs & ~(1 << r);
if (mode != RC_GR_READ && src == dst && ali) {
int x = -1;
if (rsp_d & ali) {
if ((rsp_d|rcache_regs_now) & ali) {
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
guest_regs[r].sreg == dst && !tr->locked) {
// split aliases if r is STATIC in sreg and dst isn't already locked
@ -1935,7 +1951,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) &&
!(ali & ~(1 << t)) &&
!cache_regs[guest_regs[t].sreg].locked &&
!(rsp_d & cache_regs[guest_regs[t].sreg].gregs)) {
!((rsp_d|rcache_regs_now) & cache_regs[guest_regs[t].sreg].gregs)) {
// alias is a single STATIC and its sreg is available
x = guest_regs[t].sreg;
rcache_evict_vreg(x);
@ -1947,8 +1963,9 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
break;
)
if (x >= 0) {
src = x;
rcache_move_vreg(src, dst);
rcache_remove_vreg_alias(src, r);
src = dst;
rcache_move_vreg(x, dst);
}
} else {
// split r
@ -1956,6 +1973,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
x = rcache_allocate_vreg(rsp_d & (1 << r));
rcache_unlock_vreg(src);
if (x >= 0) {
rcache_remove_vreg_alias(src, r);
dst = x;
tr = &cache_regs[dst];
tr->stamp = rcache_counter;
@ -1965,8 +1983,6 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
if (x < 0)
// aliases not needed or no vreg available, remove them
rcache_evict_vreg_aliases(dst, r);
else if (src != dst)
rcache_remove_vreg_alias(src, r);
}
// assign r to dst
@ -2342,13 +2358,16 @@ static void rcache_clean_tmp(void)
static void rcache_clean_masked(u32 mask)
{
int i, r, hr;
u32 m;
rcache_regs_clean |= mask;
mask = rcache_regs_clean;
// clean constants where all aliases are covered by the mask
// clean constants where all aliases are covered by the mask, exempt statics
// to avoid flushing them to context if sreg isn't available
m = mask & ~(rcache_regs_static | rcache_regs_pinned);
for (i = 0; i < ARRAY_SIZE(gconsts); i++)
if ((gconsts[i].gregs & mask) && !(gconsts[i].gregs & ~mask)) {
if ((gconsts[i].gregs & m) && !(gconsts[i].gregs & ~mask)) {
FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r,
if (guest_regs[r].flags & GRF_CDIRTY) {
hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
@ -2479,6 +2498,9 @@ static void rcache_create(void)
}
// create static host register mapping for SH2 regs
for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
guest_regs[i] = (guest_reg_t){.sreg = -1};
}
for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) {
for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--)
if (cache_regs[x].hreg == regs_static[i+1]) break;
@ -2486,8 +2508,7 @@ static void rcache_create(void)
guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x};
rcache_regs_static |= (1 << regs_static[i]);
rcache_vregs_reg &= ~(1 << x);
} else
guest_regs[regs_static[i]] = (guest_reg_t){.sreg = -1};
}
}
printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n",
@ -3501,7 +3522,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
rcache_set_usage_now(opd[0].source); // current insn
rcache_set_usage_soon(soon); // insns 1-4
rcache_set_usage_late(late & ~soon); // insns 5-9
rcache_set_usage_discard(write & ~(late|soon|opd[0].source));
rcache_set_usage_discard(write & ~(late|soon));
if (v <= 9)
// upcoming rcache_flush, start writing back unused dirty stuff
rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest));
@ -4717,7 +4738,7 @@ end_op:
// branch not taken, correct cycle count
if (ctaken)
emith_add_r_imm(sr, ctaken << 12);
cycles -= ctaken;
// set T bit to reflect branch not taken for OP_BRANCH_CT/CF
if (emith_get_t_cond() >= 0) // T is synced for all other cases
emith_set_t(sr, opd_b->op == OP_BRANCH_CF);
@ -5263,11 +5284,11 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift)
start_lit = block->addr_lit & wtmask;
end_lit = start_lit + block->size_lit;
// disable/delete block if it covers the modified address
if ((start_addr <= a+len && a < end_addr) ||
(start_lit <= a+len && a < end_lit))
if ((start_addr < a+len && a < end_addr) ||
(start_lit < a+len && a < end_lit))
{
dbg(2, "smc remove @%08x", a);
end_addr = (start_lit <= a+len && block->size_lit ? a : 0);
end_addr = (start_lit < a+len && block->size_lit ? a : 0);
dr_rm_block_entry(block, tcache_id, end_addr, 0);
#if (DRC_DEBUG & 2)
removed = 1;