improved sh2 clock handling, bug fixing + small improvement to drc emitters

This commit is contained in:
kub 2019-03-20 23:39:45 +01:00
parent f5939109c4
commit 2fa02d5a63
8 changed files with 77 additions and 62 deletions

View file

@ -86,7 +86,7 @@
#define A_OP_TST 0x8 #define A_OP_TST 0x8
#define A_OP_TEQ 0x9 #define A_OP_TEQ 0x9
#define A_OP_CMP 0xa #define A_OP_CMP 0xa
#define A_OP_CMN 0xa #define A_OP_CMN 0xb
#define A_OP_ORR 0xc #define A_OP_ORR 0xc
#define A_OP_MOV 0xd #define A_OP_MOV 0xd
#define A_OP_BIC 0xe #define A_OP_BIC 0xe
@ -250,7 +250,16 @@
#define EOP_MOVT(rd,imm) \ #define EOP_MOVT(rd,imm) \
EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000)) EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000))
// XXX: AND, RSB, *C, will break if 1 insn is not enough static int count_bits(unsigned val)
{
val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
return (val & 0xffff) + (val >> 16);
}
// XXX: RSB, *S will break if 1 insn is not enough
static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm) static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm)
{ {
int ror2; int ror2;
@ -259,23 +268,11 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
switch (op) { switch (op) {
case A_OP_MOV: case A_OP_MOV:
rn = 0; rn = 0;
if (~imm < 0x10000) { // count bits in imm and use MVN if more bits 1 than 0
if (count_bits(imm) > 16) {
imm = ~imm; imm = ~imm;
op = A_OP_MVN; op = A_OP_MVN;
} }
#ifdef HAVE_ARMV7
for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2)
ror2--;
if (v >> 8) {
/* 2+ insns needed - prefer movw/movt */
if (op == A_OP_MVN)
imm = ~imm;
EOP_MOVW(rd, imm);
if (imm & 0xffff0000)
EOP_MOVT(rd, imm);
return;
}
#endif
break; break;
case A_OP_EOR: case A_OP_EOR:
@ -283,27 +280,37 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
case A_OP_ADD: case A_OP_ADD:
case A_OP_ORR: case A_OP_ORR:
case A_OP_BIC: case A_OP_BIC:
if (s == 0 && imm == 0) if (s == 0 && imm == 0 && rd == rn)
return; return;
break; break;
} }
for (v = imm, ror2 = 0; ; ror2 -= 8/2) { again:
/* shift down to get 'best' rot2 */ v = imm, ror2 = 32/2; // arm imm shift is ROR, so rotate for best fit
for (; v && !(v & 3); v >>= 2) while ((v >> 24) && !(v & 0xc0))
ror2--; v = (v << 2) | (v >> 30), ror2++;
do {
EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0x0f, v & 0xff); // shift down to get 'best' rot2
while (v > 0xff && !(v & 3))
v >>= 8; v >>= 2, ror2--;
if (v == 0) // AND must fit into 1 insn. if not, use BIC
break; if (op == A_OP_AND && v != (v & 0xff)) {
if (op == A_OP_MOV) imm = ~imm;
op = A_OP_ORR;
if (op == A_OP_MVN)
op = A_OP_BIC; op = A_OP_BIC;
rn = rd; goto again;
} }
EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0xf, v & 0xff);
switch (op) {
case A_OP_MOV: op = A_OP_ORR; break;
case A_OP_MVN: op = A_OP_BIC; break;
case A_OP_ADC: op = A_OP_ADD; break;
case A_OP_SBC: op = A_OP_SUB; break;
}
rn = rd;
v >>= 8, ror2 -= 8/2;
} while (v);
} }
#define emith_op_imm(cond, s, op, r, imm) \ #define emith_op_imm(cond, s, op, r, imm) \
@ -491,7 +498,7 @@ static int emith_xbranch(int cond, void *target, int is_call)
#define emith_cmp_r_imm(r, imm) { \ #define emith_cmp_r_imm(r, imm) { \
u32 op = A_OP_CMP, imm_ = imm; \ u32 op = A_OP_CMP, imm_ = imm; \
if (~imm_ < 0x100) { \ if (~imm_ < 0x100) { \
imm_ = ~imm_; \ imm_ = -imm_; \
op = A_OP_CMN; \ op = A_OP_CMN; \
} \ } \
emith_top_imm(A_COND_AL, op, r, imm); \ emith_top_imm(A_COND_AL, op, r, imm); \
@ -652,12 +659,10 @@ static int emith_xbranch(int cond, void *target, int is_call)
if ((count) <= 8) { \ if ((count) <= 8) { \
t = (count) - 8; \ t = (count) - 8; \
t = (0xff << t) & 0xff; \ t = (0xff << t) & 0xff; \
EOP_BIC_IMM(d,s,8/2,t); \
EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \ EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \
} else if ((count) >= 24) { \ } else if ((count) >= 24) { \
t = (count) - 24; \ t = (count) - 24; \
t = 0xff >> t; \ t = 0xff >> t; \
EOP_AND_IMM(d,s,0,t); \
EOP_C_DOP_IMM(cond,A_OP_AND,0,s,d,0,t); \ EOP_C_DOP_IMM(cond,A_OP_AND,0,s,d,0,t); \
} else { \ } else { \
EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \ EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \

View file

@ -421,13 +421,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
rmr = s2; \ rmr = s2; \
} \ } \
EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \ EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \
/* XXX: using push/pop for the case of edx->eax; eax->edx */ \
if (dhi != xDX && dhi != -1) \
emith_push(xDX); \
if (dlo != xAX) \ if (dlo != xAX) \
emith_move_r_r(dlo, xAX); \ EMIT_OP(0x90 + (dlo)); /* XCHG eax, dlo */ \
if (dhi != xDX && dhi != -1) \ if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \
emith_pop(dhi); \ emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \
if (dlo != xDX && dhi != xDX) \ if (dlo != xDX && dhi != xDX) \
emith_pop(xDX); \ emith_pop(xDX); \
if (dlo != xAX && dhi != xAX) \ if (dlo != xAX && dhi != xAX) \
@ -474,12 +471,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
#define emith_deref_op(op, r, rs, offs) do { \ #define emith_deref_op(op, r, rs, offs) do { \
/* mov r <-> [ebp+#offs] */ \ /* mov r <-> [ebp+#offs] */ \
if ((offs) >= 0x80) { \ if (abs(offs) >= 0x80) { \
EMIT_OP_MODRM64(op, 2, r, rs); \ EMIT_OP_MODRM64(op, 2, r, rs); \
EMIT(offs, u32); \ EMIT(offs, u32); \
} else { \ } else { \
EMIT_OP_MODRM64(op, 1, r, rs); \ EMIT_OP_MODRM64(op, 1, r, rs); \
EMIT(offs, u8); \ EMIT((u8)offs, u8); \
} \ } \
} while (0) } while (0)
@ -496,7 +493,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
int r_ = r; \ int r_ = r; \
if (!is_abcdx(r)) \ if (!is_abcdx(r)) \
r_ = rcache_get_tmp(); \ r_ = rcache_get_tmp(); \
emith_deref_op(0x8a, r_, rs, offs); \ EMIT(0x0f, u8); \
emith_deref_op(0xb6, r_, rs, offs); \
if ((r) != r_) { \ if ((r) != r_) { \
emith_move_r_r(r, r_); \ emith_move_r_r(r, r_); \
rcache_free_tmp(r_); \ rcache_free_tmp(r_); \
@ -515,8 +513,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
} while (0) } while (0)
#define emith_read16_r_r_offs(r, rs, offs) do { \ #define emith_read16_r_r_offs(r, rs, offs) do { \
EMIT(0x66, u8); /* operand override */ \ EMIT(0x0f, u8); \
emith_read_r_r_offs(r, rs, offs); \ emith_deref_op(0xb7, r, rs, offs); \
} while (0) } while (0)
#define emith_write16_r_r_offs(r, rs, offs) do { \ #define emith_write16_r_r_offs(r, rs, offs) do { \
@ -688,6 +686,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
case 0: rd = xDI; break; \ case 0: rd = xDI; break; \
case 1: rd = xSI; break; \ case 1: rd = xSI; break; \
case 2: rd = xDX; break; \ case 2: rd = xDX; break; \
case 2: rd = xBX; break; \
} }
#define emith_sh2_drc_entry() { \ #define emith_sh2_drc_entry() { \

View file

@ -84,7 +84,7 @@ int sh2_irl_irq(SH2 *sh2, int level, int nested_call)
// do this to avoid missing irqs that other SH2 might clear // do this to avoid missing irqs that other SH2 might clear
int vector = sh2->irq_callback(sh2, level); int vector = sh2->irq_callback(sh2, level);
sh2_do_irq(sh2, level, vector); sh2_do_irq(sh2, level, vector);
sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, 13); sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, 13);
} }
else else
sh2->test_irq = 1; sh2->test_irq = 1;

View file

@ -72,9 +72,9 @@ typedef struct SH2_
#define CYCLE_MULT_SHIFT 10 #define CYCLE_MULT_SHIFT 10
#define C_M68K_TO_SH2(xsh2, c) \ #define C_M68K_TO_SH2(xsh2, c) \
((int)((c) * (xsh2).mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) ((int)((long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT)
#define C_SH2_TO_M68K(xsh2, c) \ #define C_SH2_TO_M68K(xsh2, c) \
((int)((c + 3) * (xsh2).mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) ((int)((long long)(c+3) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT)
int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2); int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2);
void sh2_finish(SH2 *sh2); void sh2_finish(SH2 *sh2);

View file

@ -254,8 +254,8 @@ static void p32x_start_blank(void)
} }
p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VINT); p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VINT);
p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, SekCyclesDone());
p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone());
} }
void p32x_schedule_hint(SH2 *sh2, int m68k_cycles) void p32x_schedule_hint(SH2 *sh2, int m68k_cycles)
@ -323,8 +323,12 @@ void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after)
p32x_event_schedule(now, event, after); p32x_event_schedule(now, event, after);
left_to_next = (event_time_next - now) * 3; left_to_next = C_M68K_TO_SH2(sh2, (int)(event_time_next - now));
if (sh2_cycles_left(sh2) > left_to_next) {
if (left_to_next < 1)
left_to_next = 1;
sh2_end_run(sh2, left_to_next); sh2_end_run(sh2, left_to_next);
}
} }
static void p32x_run_events(unsigned int until) static void p32x_run_events(unsigned int until)
@ -372,13 +376,13 @@ static void run_sh2(SH2 *sh2, int m68k_cycles)
pevt_log_sh2_o(sh2, EVT_RUN_START); pevt_log_sh2_o(sh2, EVT_RUN_START);
sh2->state |= SH2_STATE_RUN; sh2->state |= SH2_STATE_RUN;
cycles = C_M68K_TO_SH2(*sh2, m68k_cycles); cycles = C_M68K_TO_SH2(sh2, m68k_cycles);
elprintf_sh2(sh2, EL_32X, "+run %u %d @%08x", elprintf_sh2(sh2, EL_32X, "+run %u %d @%08x",
sh2->m68krcycles_done, cycles, sh2->pc); sh2->m68krcycles_done, cycles, sh2->pc);
done = sh2_execute(sh2, cycles, PicoIn.opt & POPT_EN_DRC); done = sh2_execute(sh2, cycles, PicoIn.opt & POPT_EN_DRC);
sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, done); sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, done);
sh2->state &= ~SH2_STATE_RUN; sh2->state &= ~SH2_STATE_RUN;
pevt_log_sh2_o(sh2, EVT_RUN_END); pevt_log_sh2_o(sh2, EVT_RUN_END);
elprintf_sh2(sh2, EL_32X, "-run %u %d", elprintf_sh2(sh2, EL_32X, "-run %u %d",
@ -412,8 +416,7 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target)
// there might be new event to schedule current sh2 to // there might be new event to schedule current sh2 to
if (event_time_next) { if (event_time_next) {
left_to_event = event_time_next - m68k_target; left_to_event = C_M68K_TO_SH2(sh2, (int)(event_time_next - m68k_target));
left_to_event *= 3;
if (sh2_cycles_left(sh2) > left_to_event) { if (sh2_cycles_left(sh2) > left_to_event) {
if (left_to_event < 1) if (left_to_event < 1)
left_to_event = 1; left_to_event = 1;
@ -446,6 +449,7 @@ void sync_sh2s_normal(unsigned int m68k_target)
now = ssh2.m68krcycles_done; now = ssh2.m68krcycles_done;
timer_cycles = now; timer_cycles = now;
pprof_start(m68k);
while (CYCLES_GT(m68k_target, now)) while (CYCLES_GT(m68k_target, now))
{ {
if (event_time_next && CYCLES_GE(now, event_time_next)) if (event_time_next && CYCLES_GE(now, event_time_next))
@ -463,6 +467,7 @@ void sync_sh2s_normal(unsigned int m68k_target)
target - msh2.m68krcycles_done, target - ssh2.m68krcycles_done, target - msh2.m68krcycles_done, target - ssh2.m68krcycles_done,
m68k_target - now, Pico32x.emu_flags); m68k_target - now, Pico32x.emu_flags);
pprof_start(ssh2);
if (!(ssh2.state & SH2_IDLE_STATES)) { if (!(ssh2.state & SH2_IDLE_STATES)) {
cycles = target - ssh2.m68krcycles_done; cycles = target - ssh2.m68krcycles_done;
if (cycles > 0) { if (cycles > 0) {
@ -472,7 +477,9 @@ void sync_sh2s_normal(unsigned int m68k_target)
target = event_time_next; target = event_time_next;
} }
} }
pprof_end(ssh2);
pprof_start(msh2);
if (!(msh2.state & SH2_IDLE_STATES)) { if (!(msh2.state & SH2_IDLE_STATES)) {
cycles = target - msh2.m68krcycles_done; cycles = target - msh2.m68krcycles_done;
if (cycles > 0) { if (cycles > 0) {
@ -482,6 +489,7 @@ void sync_sh2s_normal(unsigned int m68k_target)
target = event_time_next; target = event_time_next;
} }
} }
pprof_end(msh2);
now = target; now = target;
if (!(msh2.state & SH2_IDLE_STATES)) { if (!(msh2.state & SH2_IDLE_STATES)) {
@ -497,6 +505,7 @@ void sync_sh2s_normal(unsigned int m68k_target)
p32x_timers_do(now - timer_cycles); p32x_timers_do(now - timer_cycles);
timer_cycles = now; timer_cycles = now;
} }
pprof_end_sub(m68k);
// advance idle CPUs // advance idle CPUs
if (msh2.state & SH2_IDLE_STATES) { if (msh2.state & SH2_IDLE_STATES) {
@ -553,8 +562,8 @@ void PicoFrame32x(void)
if (!(Pico32x.sh2_regs[0] & 0x80)) if (!(Pico32x.sh2_regs[0] & 0x80))
p32x_schedule_hint(NULL, SekCyclesDone()); p32x_schedule_hint(NULL, SekCyclesDone());
p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, SekCyclesDone());
p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone());
if (PicoIn.AHW & PAHW_MCD) if (PicoIn.AHW & PAHW_MCD)
pcd_prepare_frame(); pcd_prepare_frame();

View file

@ -146,7 +146,7 @@ static void sh2s_sync_on_read(SH2 *sh2)
cycles = sh2_cycles_done(sh2); cycles = sh2_cycles_done(sh2);
if (cycles > 600) if (cycles > 600)
p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + cycles / 3); p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles));
} }
// SH2 faking // SH2 faking

View file

@ -125,6 +125,7 @@ static void SekRunS68k(unsigned int to)
if (SekShouldInterrupt()) if (SekShouldInterrupt())
Pico_mcd->m.s68k_poll_a = 0; Pico_mcd->m.s68k_poll_a = 0;
pprof_start(s68k);
SekCycleCntS68k += cyc_do; SekCycleCntS68k += cyc_do;
#if defined(EMU_C68K) #if defined(EMU_C68K)
PicoCpuCS68k.cycles = cyc_do; PicoCpuCS68k.cycles = cyc_do;
@ -137,6 +138,7 @@ static void SekRunS68k(unsigned int to)
#elif defined(EMU_F68K) #elif defined(EMU_F68K)
SekCycleCntS68k += fm68k_emulate(&PicoCpuFS68k, cyc_do, 0) - cyc_do; SekCycleCntS68k += fm68k_emulate(&PicoCpuFS68k, cyc_do, 0) - cyc_do;
#endif #endif
pprof_end(s68k);
} }
static void pcd_set_cycle_mult(void) static void pcd_set_cycle_mult(void)

View file

@ -241,11 +241,11 @@ extern SH2 sh2s[2];
# define sh2_pc(sh2) (sh2)->pc # define sh2_pc(sh2) (sh2)->pc
#endif #endif
#define sh2_cycles_done(sh2) ((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) #define sh2_cycles_done(sh2) ((unsigned)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))
#define sh2_cycles_done_t(sh2) \ #define sh2_cycles_done_t(sh2) \
((sh2)->m68krcycles_done * 3 + sh2_cycles_done(sh2)) (unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2))
#define sh2_cycles_done_m68k(sh2) \ #define sh2_cycles_done_m68k(sh2) \
((sh2)->m68krcycles_done + (sh2_cycles_done(sh2) / 3)) (unsigned)((sh2)->m68krcycles_done + C_SH2_TO_M68K(sh2, sh2_cycles_done(sh2)))
#define sh2_reg(c, x) (c) ? ssh2.r[x] : msh2.r[x] #define sh2_reg(c, x) (c) ? ssh2.r[x] : msh2.r[x]
#define sh2_gbr(c) (c) ? ssh2.gbr : msh2.gbr #define sh2_gbr(c) (c) ? ssh2.gbr : msh2.gbr