sh2 timer optimization

This commit is contained in:
kub 2020-04-13 22:20:13 +02:00
parent 7c1c9c7742
commit 74cc7aebf6
5 changed files with 40 additions and 32 deletions

View file

@ -48,6 +48,7 @@ typedef struct SH2_
#define SH2_STATE_CPOLL (1 << 2) // polling comm regs #define SH2_STATE_CPOLL (1 << 2) // polling comm regs
#define SH2_STATE_VPOLL (1 << 3) // polling VDP #define SH2_STATE_VPOLL (1 << 3) // polling VDP
#define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM #define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM
#define SH2_TIMER_RUN (1 << 8) // SOC WDT timer is running
unsigned int state; unsigned int state;
uint32_t poll_addr; uint32_t poll_addr;
int poll_cycles; int poll_cycles;

View file

@ -508,12 +508,18 @@ void sync_sh2s_normal(unsigned int m68k_target)
now = ssh2.m68krcycles_done; now = ssh2.m68krcycles_done;
} }
if (CYCLES_GT(now, timer_cycles+STEP_N)) { if (CYCLES_GT(now, timer_cycles+STEP_N)) {
p32x_timers_do(now - timer_cycles); if (msh2.state & SH2_TIMER_RUN)
p32x_timer_do(&msh2, now - timer_cycles);
if (ssh2.state & SH2_TIMER_RUN)
p32x_timer_do(&ssh2, now - timer_cycles);
timer_cycles = now; timer_cycles = now;
} }
} }
p32x_timers_do(now - timer_cycles); if (msh2.state & SH2_TIMER_RUN)
p32x_timer_do(&msh2, now - timer_cycles);
if (ssh2.state & SH2_TIMER_RUN)
p32x_timer_do(&ssh2, now - timer_cycles);
timer_cycles = now; timer_cycles = now;
} }
pprof_end_sub(m68k); pprof_end_sub(m68k);

View file

@ -111,7 +111,7 @@ void p32x_m68k_poll_event(u32 flags)
m68k_poll.addr1 = m68k_poll.addr2 = m68k_poll.cnt = 0; m68k_poll.addr1 = m68k_poll.addr2 = m68k_poll.cnt = 0;
} }
static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt)
{ {
u32 cycles_done = sh2_cycles_done_t(sh2); u32 cycles_done = sh2_cycles_done_t(sh2);
@ -275,7 +275,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2)
d = (s16)sh2_poll_read(a, d, cycles, sh2); d = (s16)sh2_poll_read(a, d, cycles, sh2);
} }
sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5);
DRC_RESTORE_SR(sh2); DRC_RESTORE_SR(sh2);
return d; return d;
@ -296,7 +296,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2)
((u16)sh2_poll_read(a+2, d, cycles, sh2)); ((u16)sh2_poll_read(a+2, d, cycles, sh2));
} }
sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5);
DRC_RESTORE_SR(sh2); DRC_RESTORE_SR(sh2);
return d; return d;
@ -735,7 +735,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0]
| Pico32x.sh2irq_mask[sh2->is_slave]; | Pico32x.sh2irq_mask[sh2->is_slave];
case 0x04/2: // H count (often as comm too) case 0x04/2: // H count (often as comm too)
sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
cycles = sh2_cycles_done_m68k(sh2); cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles); sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2); return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2);
@ -769,7 +769,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
case 0x2a/2: case 0x2a/2:
case 0x2c/2: case 0x2c/2:
case 0x2e/2: case 0x2e/2:
sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
cycles = sh2_cycles_done_m68k(sh2); cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles); sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, r[a / 2], cycles, sh2); return sh2_poll_read(a, r[a / 2], cycles, sh2);
@ -1456,7 +1456,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2)
if ((a & 0x3fff0) == 0x4100) { if ((a & 0x3fff0) == 0x4100) {
d = p32x_vdp_read16(a); d = p32x_vdp_read16(a);
sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9);
goto out_16to8; goto out_16to8;
} }
@ -1519,7 +1519,7 @@ static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2)
if ((a & 0x3fff0) == 0x4100) { if ((a & 0x3fff0) == 0x4100) {
d = p32x_vdp_read16(a); d = p32x_vdp_read16(a);
sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9);
goto out; goto out;
} }

View file

@ -209,6 +209,9 @@ void p32x_timers_recalc(void)
// SH2 timer step // SH2 timer step
for (i = 0; i < 2; i++) { for (i = 0; i < 2; i++) {
sh2s[i].state &= ~SH2_TIMER_RUN;
if (PREG8(sh2s[i].peri_regs, 0x80) & 0x20) // TME
sh2s[i].state |= SH2_TIMER_RUN;
tmp = PREG8(sh2s[i].peri_regs, 0x80) & 7; tmp = PREG8(sh2s[i].peri_regs, 0x80) & 7;
// Sclk cycles per timer tick // Sclk cycles per timer tick
if (tmp) if (tmp)
@ -222,33 +225,30 @@ void p32x_timers_recalc(void)
} }
} }
void p32x_timers_do(unsigned int m68k_slice) NOINLINE void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice)
{ {
unsigned int cycles = m68k_slice * 3; unsigned int cycles = m68k_slice * 3;
int cnt, i; void *pregs = sh2->peri_regs;
int cnt; int i = sh2->is_slave;
// WDT timers // WDT timer
for (i = 0; i < 2; i++) {
void *pregs = sh2s[i].peri_regs;
if (PREG8(pregs, 0x80) & 0x20) { // TME
timer_cycles[i] += cycles; timer_cycles[i] += cycles;
if (timer_cycles[i] > timer_tick_cycles[i]) {
// cnt = timer_cycles[i] / timer_tick_cycles[i]; // cnt = timer_cycles[i] / timer_tick_cycles[i];
cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32; cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32;
timer_cycles[i] -= timer_tick_cycles[i] * cnt; timer_cycles[i] -= timer_tick_cycles[i] * cnt;
if (timer_cycles[i] > timer_tick_cycles[i])
timer_cycles[i] -= timer_tick_cycles[i], cnt++;
cnt += PREG8(pregs, 0x81); cnt += PREG8(pregs, 0x81);
if (cnt >= 0x100) { if (cnt >= 0x100) {
int level = PREG8(pregs, 0xe3) >> 4; int level = PREG8(pregs, 0xe3) >> 4;
int vector = PREG8(pregs, 0xe4) & 0x7f; int vector = PREG8(pregs, 0xe4) & 0x7f;
elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)", elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)",
i ? 's' : 'm', level, vector); i ? 's' : 'm', level, vector);
sh2_internal_irq(&sh2s[i], level, vector); sh2_internal_irq(sh2, level, vector);
cnt &= 0xff; cnt &= 0xff;
} }
PREG8(pregs, 0x81) = cnt; PREG8(pregs, 0x81) = cnt;
} }
}
} }
void sh2_peripheral_reset(SH2 *sh2) void sh2_peripheral_reset(SH2 *sh2)

View file

@ -977,6 +977,7 @@ unsigned int REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, unsigned int d, SH
unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, SH2 *sh2); unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, SH2 *sh2);
unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2); unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2);
void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2);
void p32x_sh2_poll_detect(unsigned int a, SH2 *sh2, unsigned int flags, int maxcnt);
void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles);
int p32x_sh2_memcpy(unsigned int dst, unsigned int src, int count, int size, SH2 *sh2); int p32x_sh2_memcpy(unsigned int dst, unsigned int src, int count, int size, SH2 *sh2);
@ -1012,7 +1013,7 @@ void p32x_pwm_state_loaded(void);
void p32x_dreq0_trigger(void); void p32x_dreq0_trigger(void);
void p32x_dreq1_trigger(void); void p32x_dreq1_trigger(void);
void p32x_timers_recalc(void); void p32x_timers_recalc(void);
void p32x_timers_do(unsigned int m68k_slice); void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice);
void sh2_peripheral_reset(SH2 *sh2); void sh2_peripheral_reset(SH2 *sh2);
unsigned int REGPARM(2) sh2_peripheral_read8(unsigned int a, SH2 *sh2); unsigned int REGPARM(2) sh2_peripheral_read8(unsigned int a, SH2 *sh2);
unsigned int REGPARM(2) sh2_peripheral_read16(unsigned int a, SH2 *sh2); unsigned int REGPARM(2) sh2_peripheral_read16(unsigned int a, SH2 *sh2);