mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 15:27:46 -04:00
sh2: optimisations in drc
This commit is contained in:
parent
39c5ec3f4c
commit
904fb98e6c
2 changed files with 72 additions and 51 deletions
|
@ -17,18 +17,18 @@ void drc_cmn_cleanup(void);
|
||||||
// binary search approach, since we don't have CLZ on ARM920T
|
// binary search approach, since we don't have CLZ on ARM920T
|
||||||
#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \
|
#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \
|
||||||
u32 __mask = mask; \
|
u32 __mask = mask; \
|
||||||
for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \
|
for (bit = 0; bit < 32 && mask; bit++, __mask >>= 1) { \
|
||||||
if (!(__mask & (0xffff << 16))) \
|
if (!(__mask & 0xffff)) \
|
||||||
bit -= 16, __mask <<= 16; \
|
bit += 16,__mask >>= 16; \
|
||||||
if (!(__mask & (0xff << 24))) \
|
if (!(__mask & 0xff)) \
|
||||||
bit -= 8, __mask <<= 8; \
|
bit += 8, __mask >>= 8; \
|
||||||
if (!(__mask & (0xf << 28))) \
|
if (!(__mask & 0xf)) \
|
||||||
bit -= 4, __mask <<= 4; \
|
bit += 4, __mask >>= 4; \
|
||||||
if (!(__mask & (0x3 << 30))) \
|
if (!(__mask & 0x3)) \
|
||||||
bit -= 2, __mask <<= 2; \
|
bit += 2, __mask >>= 2; \
|
||||||
if (!(__mask & (0x1 << 31))) \
|
if (!(__mask & 0x1)) \
|
||||||
bit -= 1, __mask <<= 1; \
|
bit += 1, __mask >>= 1; \
|
||||||
if (__mask & (0x1 << 31)) { \
|
if (__mask & 0x1) { \
|
||||||
code; \
|
code; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
|
|
|
@ -1549,21 +1549,30 @@ static u32 rcache_regs_clean; // regs needing cleaning
|
||||||
static void rcache_lock_vreg(int x)
|
static void rcache_lock_vreg(int x)
|
||||||
{
|
{
|
||||||
if (x >= 0) {
|
if (x >= 0) {
|
||||||
|
cache_regs[x].locked ++;
|
||||||
|
#if DRC_DEBUG & 64
|
||||||
if (cache_regs[x].type == HR_FREE) {
|
if (cache_regs[x].type == HR_FREE) {
|
||||||
printf("locking free vreg %x, aborting\n", x);
|
printf("locking free vreg %x, aborting\n", x);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
cache_regs[x].locked ++;
|
if (!cache_regs[x].locked) {
|
||||||
|
printf("locking overflow vreg %x, aborting\n", x);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rcache_unlock_vreg(int x)
|
static void rcache_unlock_vreg(int x)
|
||||||
{
|
{
|
||||||
if (x >= 0) {
|
if (x >= 0) {
|
||||||
|
#if DRC_DEBUG & 64
|
||||||
if (cache_regs[x].type == HR_FREE) {
|
if (cache_regs[x].type == HR_FREE) {
|
||||||
printf("unlocking free vreg %x, aborting\n", x);
|
printf("unlocking free vreg %x, aborting\n", x);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
if (cache_regs[x].locked)
|
||||||
cache_regs[x].locked --;
|
cache_regs[x].locked --;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1582,7 +1591,7 @@ static void rcache_unmap_vreg(int x)
|
||||||
FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i,
|
FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i,
|
||||||
if (guest_regs[i].flags & GRF_DIRTY) {
|
if (guest_regs[i].flags & GRF_DIRTY) {
|
||||||
// if a dirty reg is unmapped save its value to context
|
// if a dirty reg is unmapped save its value to context
|
||||||
if (~rcache_regs_discard & (1 << i))
|
if ((~rcache_regs_discard | rcache_regs_now) & (1 << i))
|
||||||
emith_ctx_write(cache_regs[x].hreg, i * 4);
|
emith_ctx_write(cache_regs[x].hreg, i * 4);
|
||||||
guest_regs[i].flags &= ~GRF_DIRTY;
|
guest_regs[i].flags &= ~GRF_DIRTY;
|
||||||
}
|
}
|
||||||
|
@ -1700,26 +1709,28 @@ static int rcache_allocate(int what, int minprio)
|
||||||
continue;
|
continue;
|
||||||
if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) {
|
if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) {
|
||||||
// REG is free
|
// REG is free
|
||||||
prio = 6;
|
prio = 10;
|
||||||
oldest = i;
|
oldest = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (cache_regs[i].type == HR_CACHED) {
|
if (cache_regs[i].type == HR_CACHED) {
|
||||||
if (rcache_regs_now & cache_regs[i].gregs)
|
if (rcache_regs_now & cache_regs[i].gregs)
|
||||||
// REGs needed for the current insn
|
// REGs needed for the current insn
|
||||||
i_prio = 1;
|
i_prio = 0;
|
||||||
else if (rcache_regs_soon & cache_regs[i].gregs)
|
else if (rcache_regs_soon & cache_regs[i].gregs)
|
||||||
// REGs needed in the next insns
|
// REGs needed in the next insns
|
||||||
i_prio = 2;
|
i_prio = 2;
|
||||||
else if (rcache_regs_late & cache_regs[i].gregs)
|
else if (rcache_regs_late & cache_regs[i].gregs)
|
||||||
// REGs needed in some future insn
|
// REGs needed in some future insn
|
||||||
i_prio = 3;
|
|
||||||
else if (!(~rcache_regs_discard & cache_regs[i].gregs))
|
|
||||||
// REGs not needed in the foreseeable future
|
|
||||||
i_prio = 4;
|
i_prio = 4;
|
||||||
|
else if (~rcache_regs_discard & cache_regs[i].gregs)
|
||||||
|
// REGs not needed in the foreseeable future
|
||||||
|
i_prio = 6;
|
||||||
else
|
else
|
||||||
// REGs soon overwritten anyway
|
// REGs soon overwritten anyway
|
||||||
i_prio = 5;
|
i_prio = 8;
|
||||||
|
if (!(cache_regs[i].flags & HRF_DIRTY)) i_prio ++;
|
||||||
|
|
||||||
if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) {
|
if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) {
|
||||||
min_stamp = cache_regs[i].stamp;
|
min_stamp = cache_regs[i].stamp;
|
||||||
oldest = i;
|
oldest = i;
|
||||||
|
@ -1744,21 +1755,21 @@ static int rcache_allocate_vreg(int needed)
|
||||||
{
|
{
|
||||||
int x;
|
int x;
|
||||||
|
|
||||||
x = rcache_allocate(1, needed ? 0 : 3);
|
x = rcache_allocate(1, needed ? 0 : 4);
|
||||||
if (x < 0)
|
if (x < 0)
|
||||||
x = rcache_allocate(-1, 1);
|
x = rcache_allocate(-1, 0);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rcache_allocate_nontemp(void)
|
static int rcache_allocate_nontemp(void)
|
||||||
{
|
{
|
||||||
int x = rcache_allocate(0, 3);
|
int x = rcache_allocate(0, 4);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rcache_allocate_temp(void)
|
static int rcache_allocate_temp(void)
|
||||||
{
|
{
|
||||||
int x = rcache_allocate(-1, 1);
|
int x = rcache_allocate(-1, 0);
|
||||||
if (x < 0)
|
if (x < 0)
|
||||||
x = rcache_allocate(0, 0);
|
x = rcache_allocate(0, 0);
|
||||||
return x;
|
return x;
|
||||||
|
@ -1821,20 +1832,25 @@ static void rcache_remap_vreg(int x)
|
||||||
int d;
|
int d;
|
||||||
|
|
||||||
// x must be a cached vreg
|
// x must be a cached vreg
|
||||||
if (cache_regs[x].type != HR_CACHED)
|
if (cache_regs[x].type != HR_CACHED || cache_regs[x].locked)
|
||||||
return;
|
return;
|
||||||
// don't do it if x is already a REG or isn't used or to be cleaned anyway
|
// don't do it if x isn't used
|
||||||
if ((cache_regs[x].htype & HRT_REG) ||
|
if (!(rsl_d & cache_regs[x].gregs)) {
|
||||||
!(rsl_d & cache_regs[x].gregs)) {
|
|
||||||
// clean here to avoid data loss on invalidation
|
// clean here to avoid data loss on invalidation
|
||||||
rcache_clean_vreg(x);
|
rcache_clean_vreg(x);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cache_regs[x].locked) {
|
FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, d,
|
||||||
printf("remap vreg %d is locked\n", x);
|
if ((guest_regs[d].flags & (GRF_STATIC|GRF_PINNED)) &&
|
||||||
exit(1);
|
!cache_regs[guest_regs[d].sreg].locked &&
|
||||||
|
!((rsl_d|rcache_regs_now) & cache_regs[guest_regs[d].sreg].gregs)) {
|
||||||
|
// STATIC not in its sreg and sreg is available
|
||||||
|
rcache_evict_vreg(guest_regs[d].sreg);
|
||||||
|
rcache_move_vreg(guest_regs[d].sreg, x);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
)
|
||||||
|
|
||||||
// allocate a non-TEMP vreg
|
// allocate a non-TEMP vreg
|
||||||
rcache_lock_vreg(x); // lock to avoid evicting x
|
rcache_lock_vreg(x); // lock to avoid evicting x
|
||||||
|
@ -1891,8 +1907,8 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
|
||||||
{
|
{
|
||||||
int src, dst, ali;
|
int src, dst, ali;
|
||||||
cache_reg_t *tr;
|
cache_reg_t *tr;
|
||||||
u32 rsp_d = (rcache_regs_now | rcache_regs_soon |
|
u32 rsp_d = (rcache_regs_soon | rcache_regs_static | rcache_regs_pinned) &
|
||||||
rcache_regs_static | rcache_regs_pinned) & ~rcache_regs_discard;
|
~rcache_regs_discard;
|
||||||
|
|
||||||
dst = src = guest_regs[r].vreg;
|
dst = src = guest_regs[r].vreg;
|
||||||
|
|
||||||
|
@ -1901,7 +1917,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
|
||||||
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
|
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
|
||||||
src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) &&
|
src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) &&
|
||||||
!cache_regs[guest_regs[r].sreg].locked &&
|
!cache_regs[guest_regs[r].sreg].locked &&
|
||||||
!(rsp_d & cache_regs[guest_regs[r].sreg].gregs)) {
|
!((rsp_d|rcache_regs_now) & cache_regs[guest_regs[r].sreg].gregs)) {
|
||||||
dst = guest_regs[r].sreg;
|
dst = guest_regs[r].sreg;
|
||||||
rcache_evict_vreg(dst);
|
rcache_evict_vreg(dst);
|
||||||
} else if (dst < 0) {
|
} else if (dst < 0) {
|
||||||
|
@ -1926,7 +1942,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
|
||||||
ali = tr->gregs & ~(1 << r);
|
ali = tr->gregs & ~(1 << r);
|
||||||
if (mode != RC_GR_READ && src == dst && ali) {
|
if (mode != RC_GR_READ && src == dst && ali) {
|
||||||
int x = -1;
|
int x = -1;
|
||||||
if (rsp_d & ali) {
|
if ((rsp_d|rcache_regs_now) & ali) {
|
||||||
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
|
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
|
||||||
guest_regs[r].sreg == dst && !tr->locked) {
|
guest_regs[r].sreg == dst && !tr->locked) {
|
||||||
// split aliases if r is STATIC in sreg and dst isn't already locked
|
// split aliases if r is STATIC in sreg and dst isn't already locked
|
||||||
|
@ -1935,7 +1951,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
|
||||||
if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) &&
|
if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) &&
|
||||||
!(ali & ~(1 << t)) &&
|
!(ali & ~(1 << t)) &&
|
||||||
!cache_regs[guest_regs[t].sreg].locked &&
|
!cache_regs[guest_regs[t].sreg].locked &&
|
||||||
!(rsp_d & cache_regs[guest_regs[t].sreg].gregs)) {
|
!((rsp_d|rcache_regs_now) & cache_regs[guest_regs[t].sreg].gregs)) {
|
||||||
// alias is a single STATIC and its sreg is available
|
// alias is a single STATIC and its sreg is available
|
||||||
x = guest_regs[t].sreg;
|
x = guest_regs[t].sreg;
|
||||||
rcache_evict_vreg(x);
|
rcache_evict_vreg(x);
|
||||||
|
@ -1947,8 +1963,9 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
|
||||||
break;
|
break;
|
||||||
)
|
)
|
||||||
if (x >= 0) {
|
if (x >= 0) {
|
||||||
src = x;
|
rcache_remove_vreg_alias(src, r);
|
||||||
rcache_move_vreg(src, dst);
|
src = dst;
|
||||||
|
rcache_move_vreg(x, dst);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// split r
|
// split r
|
||||||
|
@ -1956,6 +1973,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
|
||||||
x = rcache_allocate_vreg(rsp_d & (1 << r));
|
x = rcache_allocate_vreg(rsp_d & (1 << r));
|
||||||
rcache_unlock_vreg(src);
|
rcache_unlock_vreg(src);
|
||||||
if (x >= 0) {
|
if (x >= 0) {
|
||||||
|
rcache_remove_vreg_alias(src, r);
|
||||||
dst = x;
|
dst = x;
|
||||||
tr = &cache_regs[dst];
|
tr = &cache_regs[dst];
|
||||||
tr->stamp = rcache_counter;
|
tr->stamp = rcache_counter;
|
||||||
|
@ -1965,8 +1983,6 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
|
||||||
if (x < 0)
|
if (x < 0)
|
||||||
// aliases not needed or no vreg available, remove them
|
// aliases not needed or no vreg available, remove them
|
||||||
rcache_evict_vreg_aliases(dst, r);
|
rcache_evict_vreg_aliases(dst, r);
|
||||||
else if (src != dst)
|
|
||||||
rcache_remove_vreg_alias(src, r);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// assign r to dst
|
// assign r to dst
|
||||||
|
@ -2342,13 +2358,16 @@ static void rcache_clean_tmp(void)
|
||||||
static void rcache_clean_masked(u32 mask)
|
static void rcache_clean_masked(u32 mask)
|
||||||
{
|
{
|
||||||
int i, r, hr;
|
int i, r, hr;
|
||||||
|
u32 m;
|
||||||
|
|
||||||
rcache_regs_clean |= mask;
|
rcache_regs_clean |= mask;
|
||||||
mask = rcache_regs_clean;
|
mask = rcache_regs_clean;
|
||||||
|
|
||||||
// clean constants where all aliases are covered by the mask
|
// clean constants where all aliases are covered by the mask, exempt statics
|
||||||
|
// to avoid flushing them to context if sreg isn't available
|
||||||
|
m = mask & ~(rcache_regs_static | rcache_regs_pinned);
|
||||||
for (i = 0; i < ARRAY_SIZE(gconsts); i++)
|
for (i = 0; i < ARRAY_SIZE(gconsts); i++)
|
||||||
if ((gconsts[i].gregs & mask) && !(gconsts[i].gregs & ~mask)) {
|
if ((gconsts[i].gregs & m) && !(gconsts[i].gregs & ~mask)) {
|
||||||
FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r,
|
FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r,
|
||||||
if (guest_regs[r].flags & GRF_CDIRTY) {
|
if (guest_regs[r].flags & GRF_CDIRTY) {
|
||||||
hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
|
hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
|
||||||
|
@ -2479,6 +2498,9 @@ static void rcache_create(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
// create static host register mapping for SH2 regs
|
// create static host register mapping for SH2 regs
|
||||||
|
for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
|
||||||
|
guest_regs[i] = (guest_reg_t){.sreg = -1};
|
||||||
|
}
|
||||||
for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) {
|
for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) {
|
||||||
for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--)
|
for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--)
|
||||||
if (cache_regs[x].hreg == regs_static[i+1]) break;
|
if (cache_regs[x].hreg == regs_static[i+1]) break;
|
||||||
|
@ -2486,8 +2508,7 @@ static void rcache_create(void)
|
||||||
guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x};
|
guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x};
|
||||||
rcache_regs_static |= (1 << regs_static[i]);
|
rcache_regs_static |= (1 << regs_static[i]);
|
||||||
rcache_vregs_reg &= ~(1 << x);
|
rcache_vregs_reg &= ~(1 << x);
|
||||||
} else
|
}
|
||||||
guest_regs[regs_static[i]] = (guest_reg_t){.sreg = -1};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n",
|
printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n",
|
||||||
|
@ -3501,7 +3522,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
|
||||||
rcache_set_usage_now(opd[0].source); // current insn
|
rcache_set_usage_now(opd[0].source); // current insn
|
||||||
rcache_set_usage_soon(soon); // insns 1-4
|
rcache_set_usage_soon(soon); // insns 1-4
|
||||||
rcache_set_usage_late(late & ~soon); // insns 5-9
|
rcache_set_usage_late(late & ~soon); // insns 5-9
|
||||||
rcache_set_usage_discard(write & ~(late|soon|opd[0].source));
|
rcache_set_usage_discard(write & ~(late|soon));
|
||||||
if (v <= 9)
|
if (v <= 9)
|
||||||
// upcoming rcache_flush, start writing back unused dirty stuff
|
// upcoming rcache_flush, start writing back unused dirty stuff
|
||||||
rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest));
|
rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest));
|
||||||
|
@ -4717,7 +4738,7 @@ end_op:
|
||||||
|
|
||||||
// branch not taken, correct cycle count
|
// branch not taken, correct cycle count
|
||||||
if (ctaken)
|
if (ctaken)
|
||||||
emith_add_r_imm(sr, ctaken << 12);
|
cycles -= ctaken;
|
||||||
// set T bit to reflect branch not taken for OP_BRANCH_CT/CF
|
// set T bit to reflect branch not taken for OP_BRANCH_CT/CF
|
||||||
if (emith_get_t_cond() >= 0) // T is synced for all other cases
|
if (emith_get_t_cond() >= 0) // T is synced for all other cases
|
||||||
emith_set_t(sr, opd_b->op == OP_BRANCH_CF);
|
emith_set_t(sr, opd_b->op == OP_BRANCH_CF);
|
||||||
|
@ -5263,11 +5284,11 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift)
|
||||||
start_lit = block->addr_lit & wtmask;
|
start_lit = block->addr_lit & wtmask;
|
||||||
end_lit = start_lit + block->size_lit;
|
end_lit = start_lit + block->size_lit;
|
||||||
// disable/delete block if it covers the modified address
|
// disable/delete block if it covers the modified address
|
||||||
if ((start_addr <= a+len && a < end_addr) ||
|
if ((start_addr < a+len && a < end_addr) ||
|
||||||
(start_lit <= a+len && a < end_lit))
|
(start_lit < a+len && a < end_lit))
|
||||||
{
|
{
|
||||||
dbg(2, "smc remove @%08x", a);
|
dbg(2, "smc remove @%08x", a);
|
||||||
end_addr = (start_lit <= a+len && block->size_lit ? a : 0);
|
end_addr = (start_lit < a+len && block->size_lit ? a : 0);
|
||||||
dr_rm_block_entry(block, tcache_id, end_addr, 0);
|
dr_rm_block_entry(block, tcache_id, end_addr, 0);
|
||||||
#if (DRC_DEBUG & 2)
|
#if (DRC_DEBUG & 2)
|
||||||
removed = 1;
|
removed = 1;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue