sh2 drc, improved constant handling and register allocator

This commit is contained in:
kub 2019-04-25 19:03:58 +02:00
parent 08626dab12
commit ed7e915078

View file

@ -395,10 +395,10 @@ enum {
} guest_reg_flags; } guest_reg_flags;
typedef struct { typedef struct {
u16 flags; // guest flags: is constant, is dirty? u8 flags; // guest flags: is constant, is dirty?
s8 sreg; // cache reg for static mapping s8 sreg; // cache reg for static mapping
s8 vreg; // cache_reg this is currently mapped to, -1 if not mapped s8 vreg; // cache_reg this is currently mapped to, -1 if not mapped
u32 val; // value if this is constant s8 cnst; // const index if this is constant
} guest_reg_t; } guest_reg_t;
@ -1153,7 +1153,7 @@ static int find_in_array(u32 *array, size_t size, u32 what)
// NB rcache allocation dependencies: // NB rcache allocation dependencies:
// - get_reg_arg/get_tmp_arg first (might evict other regs just allocated) // - get_reg_arg/get_tmp_arg first (might evict other regs just allocated)
// - get_reg(..., NULL) before get_reg(..., &x) if it might get the same reg // - get_reg(..., NULL) before get_reg(..., &hr) if it might get the same reg
// - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg) // - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg)
// register cache / constant propagation stuff // register cache / constant propagation stuff
@ -1163,7 +1163,15 @@ typedef enum {
RC_GR_RMW, RC_GR_RMW,
} rc_gr_mode; } rc_gr_mode;
typedef struct {
u32 gregs;
u32 val;
} gconst_t;
gconst_t gconsts[ARRAY_SIZE(guest_regs)];
static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr); static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr);
static void rcache_add_vreg_alias(int x, sh2_reg_e r);
static void rcache_remove_vreg_alias(int x, sh2_reg_e r); static void rcache_remove_vreg_alias(int x, sh2_reg_e r);
#define RCACHE_DUMP(msg) { \ #define RCACHE_DUMP(msg) { \
@ -1185,101 +1193,6 @@ static void rcache_remove_vreg_alias(int x, sh2_reg_e r);
} \ } \
} }
#if PROPAGATE_CONSTANTS
static void gconst_set(sh2_reg_e r, u32 val)
{
guest_regs[r].flags |= GRF_CONST;
guest_regs[r].val = val;
}
static void gconst_new(sh2_reg_e r, u32 val)
{
gconst_set(r, val);
guest_regs[r].flags |= GRF_CDIRTY;
// throw away old r that we might have cached
if (guest_regs[r].vreg >= 0)
rcache_remove_vreg_alias(guest_regs[r].vreg, r);
}
static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs)
{
guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY);
if (guest_regs[rs].flags & GRF_CONST)
gconst_set(rd, guest_regs[rs].val);
}
#endif
static int gconst_get(sh2_reg_e r, u32 *val)
{
if (guest_regs[r].flags & GRF_CONST) {
*val = guest_regs[r].val;
return 1;
}
return 0;
}
static int gconst_check(sh2_reg_e r)
{
if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY))
return 1;
return 0;
}
// update hr if dirty, else do nothing
static int gconst_try_read(int hr, sh2_reg_e r)
{
if (guest_regs[r].flags & GRF_CDIRTY) {
emith_move_r_imm(hr, guest_regs[r].val);
guest_regs[r].flags &= ~GRF_CDIRTY;
return 1;
}
return 0;
}
static u32 gconst_dirty_mask(void)
{
u32 mask = 0;
int i;
for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
if (guest_regs[i].flags & GRF_CDIRTY)
mask |= (1 << i);
return mask;
}
static void gconst_kill(sh2_reg_e r)
{
guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY);
}
static void gconst_clean(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
if (guest_regs[i].flags & GRF_CDIRTY) {
// using RC_GR_READ here: it will call gconst_try_read,
// cache the reg and mark it dirty.
rcache_get_reg_(i, RC_GR_READ, 0, NULL);
}
}
static void gconst_invalidate(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY);
}
static u16 rcache_counter;
static u32 rcache_static;
static u32 rcache_locked;
static u32 rcache_hint_soon;
static u32 rcache_hint_late;
#define rcache_hint (rcache_hint_soon|rcache_hint_late)
// binary search approach, since we don't have CLZ on ARM920T // binary search approach, since we don't have CLZ on ARM920T
#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ #define FOR_ALL_BITS_SET_DO(mask, bit, code) { \
u32 __mask = mask; \ u32 __mask = mask; \
@ -1300,6 +1213,142 @@ static u32 rcache_hint_late;
} \ } \
} }
#if PROPAGATE_CONSTANTS
static inline int gconst_alloc(sh2_reg_e r)
{
int i, n = -1;
for (i = 0; i < ARRAY_SIZE(gconsts); i++) {
if (gconsts[i].gregs & (1 << r))
gconsts[i].gregs &= ~(1 << r);
if (gconsts[i].gregs == 0 && n < 0)
n = i;
}
if (n >= 0)
gconsts[n].gregs = (1 << r);
else
exit(1); // cannot happen - more constants than guest regs?
return n;
}
static void gconst_set(sh2_reg_e r, u32 val)
{
int i = gconst_alloc(r);
guest_regs[r].flags |= GRF_CONST;
guest_regs[r].cnst = i;
gconsts[i].val = val;
}
static void gconst_new(sh2_reg_e r, u32 val)
{
gconst_set(r, val);
guest_regs[r].flags |= GRF_CDIRTY;
// throw away old r that we might have cached
if (guest_regs[r].vreg >= 0)
rcache_remove_vreg_alias(guest_regs[r].vreg, r);
}
static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs)
{
if (guest_regs[rd].flags & GRF_CONST) {
guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY);
gconsts[guest_regs[rd].cnst].gregs &= ~(1 << rd);
}
if (guest_regs[rs].flags & GRF_CONST) {
guest_regs[rd].flags |= GRF_CONST;
guest_regs[rd].cnst = guest_regs[rs].cnst;
gconsts[guest_regs[rd].cnst].gregs |= (1 << rd);
}
}
#endif
static int gconst_get(sh2_reg_e r, u32 *val)
{
if (guest_regs[r].flags & GRF_CONST) {
*val = gconsts[guest_regs[r].cnst].val;
return 1;
}
return 0;
}
static int gconst_check(sh2_reg_e r)
{
if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY))
return 1;
return 0;
}
// update hr if dirty, else do nothing
static int gconst_try_read(int vreg, sh2_reg_e r)
{
int i, x;
if (guest_regs[r].flags & GRF_CDIRTY) {
x = guest_regs[r].cnst;
emith_move_r_imm(cache_regs[vreg].hreg, gconsts[x].val);
FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i,
{
if (guest_regs[i].vreg >= 0 && i != r)
rcache_remove_vreg_alias(guest_regs[i].vreg, i);
rcache_add_vreg_alias(vreg, i);
guest_regs[i].flags &= ~GRF_CDIRTY;
guest_regs[i].flags |= GRF_DIRTY;
});
return 1;
}
return 0;
}
static u32 gconst_dirty_mask(void)
{
u32 mask = 0;
int i;
for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
if (guest_regs[i].flags & GRF_CDIRTY)
mask |= (1 << i);
return mask;
}
static void gconst_kill(sh2_reg_e r)
{
if (guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY))
gconsts[guest_regs[r].cnst].gregs &= ~(1 << r);
guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY);
}
static void gconst_clean(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
if (guest_regs[i].flags & GRF_CDIRTY) {
// using RC_GR_READ here: it will call gconst_try_read,
// cache the reg and mark it dirty.
rcache_get_reg_(i, RC_GR_READ, 0, NULL);
}
}
static void gconst_invalidate(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
if (guest_regs[i].flags & (GRF_CONST|GRF_CDIRTY))
gconsts[guest_regs[i].cnst].gregs &= ~(1 << i);
guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY);
}
}
static u16 rcache_counter;
static u32 rcache_static;
static u32 rcache_locked;
static u32 rcache_hint_soon;
static u32 rcache_hint_late;
static u32 rcache_hint_write;
#define rcache_hint (rcache_hint_soon|rcache_hint_late)
static void rcache_unmap_vreg(int x) static void rcache_unmap_vreg(int x)
{ {
int i; int i;
@ -1328,8 +1377,7 @@ static void rcache_clean_vreg(int x)
rcache_unmap_vreg(guest_regs[r].sreg); rcache_unmap_vreg(guest_regs[r].sreg);
emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg); emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg);
rcache_remove_vreg_alias(x, r); rcache_remove_vreg_alias(x, r);
cache_regs[guest_regs[r].sreg].gregs = (1 << r); rcache_add_vreg_alias(guest_regs[r].sreg, r);
guest_regs[r].vreg = guest_regs[r].sreg;
} else { } else {
// must evict since sreg is locked // must evict since sreg is locked
emith_ctx_write(cache_regs[x].hreg, r * 4); emith_ctx_write(cache_regs[x].hreg, r * 4);
@ -1343,6 +1391,12 @@ static void rcache_clean_vreg(int x)
} }
} }
static void rcache_add_vreg_alias(int x, sh2_reg_e r)
{
cache_regs[x].gregs |= (1 << r);
guest_regs[r].vreg = x;
}
static void rcache_remove_vreg_alias(int x, sh2_reg_e r) static void rcache_remove_vreg_alias(int x, sh2_reg_e r)
{ {
cache_regs[x].gregs &= ~(1 << r); cache_regs[x].gregs &= ~(1 << r);
@ -1396,9 +1450,12 @@ static cache_reg_t *rcache_evict(void)
else if (rcache_hint_late & cache_regs[i].gregs) else if (rcache_hint_late & cache_regs[i].gregs)
// REGs needed in some future insn // REGs needed in some future insn
i_prio = 3; i_prio = 3;
else else if ((rcache_hint_write & cache_regs[i].gregs) != cache_regs[i].gregs)
// REGs not needed soon // REGs not needed soon
i_prio = 4; i_prio = 4;
else
// REGs soon overwritten anyway
i_prio = 5;
if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) {
min_stamp = cache_regs[i].stamp; min_stamp = cache_regs[i].stamp;
@ -1549,6 +1606,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
h = guest_regs[r].sreg; h = guest_regs[r].sreg;
rcache_evict_vreg(h); rcache_evict_vreg(h);
tr = &cache_regs[h]; tr = &cache_regs[h];
tr->gregs = 1 << r;
if (i >= 0) { if (i >= 0) {
if (mode != RC_GR_WRITE) { if (mode != RC_GR_WRITE) {
if (hr) if (hr)
@ -1559,14 +1617,13 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
} }
rcache_remove_vreg_alias(guest_regs[r].vreg, r); rcache_remove_vreg_alias(guest_regs[r].vreg, r);
} else if (mode != RC_GR_WRITE) { } else if (mode != RC_GR_WRITE) {
if (gconst_try_read(tr->hreg, r)) { if (gconst_try_read(h, r)) {
tr->flags |= HRF_DIRTY; tr->flags |= HRF_DIRTY;
guest_regs[r].flags |= GRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY;
} else } else
emith_ctx_read(tr->hreg, r * 4); emith_ctx_read(tr->hreg, r * 4);
} }
guest_regs[r].vreg = guest_regs[r].sreg; guest_regs[r].vreg = guest_regs[r].sreg;
tr->gregs = 1 << r;
goto end; goto end;
} else if (i >= 0) { } else if (i >= 0) {
if (mode == RC_GR_READ || !(cache_regs[i].gregs & ~(1 << r))) { if (mode == RC_GR_READ || !(cache_regs[i].gregs & ~(1 << r))) {
@ -1608,7 +1665,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
guest_regs[r].vreg = tr - cache_regs; guest_regs[r].vreg = tr - cache_regs;
if (mode != RC_GR_WRITE) { if (mode != RC_GR_WRITE) {
if (gconst_try_read(tr->hreg, r)) { if (gconst_try_read(guest_regs[r].vreg, r)) {
tr->flags |= HRF_DIRTY; tr->flags |= HRF_DIRTY;
guest_regs[r].flags |= GRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY;
} else if (split >= 0) { } else if (split >= 0) {
@ -1747,7 +1804,7 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr)
srcr = dstr; srcr = dstr;
if (rcache_static & (1 << r)) if (rcache_static & (1 << r))
srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
else if (gconst_try_read(srcr, r)) else if (gconst_try_read(guest_regs[r].vreg, r))
dirty = 1; dirty = 1;
else else
emith_ctx_read(srcr, r * 4); emith_ctx_read(srcr, r * 4);
@ -1780,8 +1837,10 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr)
emith_move_r_r(dstr, srcr); emith_move_r_r(dstr, srcr);
} else if (hr != NULL) { } else if (hr != NULL) {
// caller will modify arg, so it will soon be out of sync with r // caller will modify arg, so it will soon be out of sync with r
if (dirty || src_dirty) if (dirty || src_dirty) {
emith_ctx_write(dstr, r * 4); // must clean since arg will be modified emith_ctx_write(dstr, r * 4); // must clean since arg will be modified
guest_regs[r].flags &= ~GRF_DIRTY;
}
} else if (guest_regs[r].vreg < 0) { } else if (guest_regs[r].vreg < 0) {
// keep arg as vreg for r // keep arg as vreg for r
cache_regs[dstid].type = HR_CACHED; cache_regs[dstid].type = HR_CACHED;
@ -1909,6 +1968,11 @@ static inline void rcache_set_hint_late(u32 mask)
rcache_hint_late = mask & ~rcache_static; rcache_hint_late = mask & ~rcache_static;
} }
static inline void rcache_set_hint_write(u32 mask)
{
rcache_hint_write = mask & ~rcache_static;
}
static inline int rcache_is_hinted(sh2_reg_e r) static inline int rcache_is_hinted(sh2_reg_e r)
{ {
// consider static REGs as always hinted, since they are always there // consider static REGs as always hinted, since they are always there
@ -2038,7 +2102,7 @@ static void rcache_invalidate(void)
} }
rcache_counter = 0; rcache_counter = 0;
rcache_hint_soon = rcache_hint_late = 0; rcache_hint_soon = rcache_hint_late = rcache_hint_write = 0;
gconst_invalidate(); gconst_invalidate();
} }
@ -2155,10 +2219,9 @@ static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src)
if (guest_regs[dst].vreg >= 0) if (guest_regs[dst].vreg >= 0)
rcache_remove_vreg_alias(guest_regs[dst].vreg, dst); rcache_remove_vreg_alias(guest_regs[dst].vreg, dst);
// make dst an alias of src // make dst an alias of src
cache_regs[i].gregs |= (1 << dst); rcache_add_vreg_alias(i, dst);
cache_regs[i].flags |= HRF_DIRTY; cache_regs[i].flags |= HRF_DIRTY;
guest_regs[dst].flags |= GRF_DIRTY; guest_regs[dst].flags |= GRF_DIRTY;
guest_regs[dst].vreg = i;
gconst_kill(dst); gconst_kill(dst);
#if PROPAGATE_CONSTANTS #if PROPAGATE_CONSTANTS
gconst_copy(dst, src); gconst_copy(dst, src);
@ -2772,6 +2835,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk); dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk);
rcache_set_hint_soon(0); rcache_set_hint_soon(0);
rcache_set_hint_late(0); rcache_set_hint_late(0);
rcache_set_hint_write(0);
} }
else else
{ {
@ -2802,6 +2866,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
} }
rcache_set_hint_soon(late); // insns 1-3 rcache_set_hint_soon(late); // insns 1-3
rcache_set_hint_late(late & ~soon); // insns 4-9 rcache_set_hint_late(late & ~soon); // insns 4-9
rcache_set_hint_write(write & ~(late|soon)); // next access is write
} }
rcache_set_locked(opd[0].source); // try not to evict src regs for this op rcache_set_locked(opd[0].source); // try not to evict src regs for this op