32x, finetuning

This commit is contained in:
kub 2019-10-19 08:53:28 +02:00
parent 1fd8f98696
commit 7e940f142e
4 changed files with 41 additions and 43 deletions

View file

@ -272,9 +272,9 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
// and can be discarded early // and can be discarded early
// XXX: need to tune sizes // XXX: need to tune sizes
static const int tcache_sizes[TCACHE_BUFFERS] = { static const int tcache_sizes[TCACHE_BUFFERS] = {
DRC_TCACHE_SIZE * 14 / 16, // ROM (rarely used), DRAM DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM
DRC_TCACHE_SIZE / 16, // BIOS, data array in master sh2 DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2
DRC_TCACHE_SIZE / 16, // ... slave DRC_TCACHE_SIZE / 32, // ... slave
}; };
static u8 *tcache_bases[TCACHE_BUFFERS]; static u8 *tcache_bases[TCACHE_BUFFERS];
@ -332,13 +332,13 @@ struct block_desc {
struct block_entry entryp[MAX_BLOCK_ENTRIES]; struct block_entry entryp[MAX_BLOCK_ENTRIES];
}; };
#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 16*256) #define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 32*256)
static struct block_desc *block_tables[TCACHE_BUFFERS]; static struct block_desc *block_tables[TCACHE_BUFFERS];
static int block_counts[TCACHE_BUFFERS]; static int block_counts[TCACHE_BUFFERS];
static int block_limit[TCACHE_BUFFERS]; static int block_limit[TCACHE_BUFFERS];
// we have block_link_pool to avoid using mallocs // we have block_link_pool to avoid using mallocs
#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 1024 : 16*1024) #define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 512 : 32*512)
static struct block_link *block_link_pool[TCACHE_BUFFERS]; static struct block_link *block_link_pool[TCACHE_BUFFERS];
static int block_link_pool_counts[TCACHE_BUFFERS]; static int block_link_pool_counts[TCACHE_BUFFERS];
static struct block_link **unresolved_links[TCACHE_BUFFERS]; static struct block_link **unresolved_links[TCACHE_BUFFERS];
@ -363,7 +363,7 @@ static struct block_list *inactive_blocks[TCACHE_BUFFERS];
// each array has len: sizeof(mem) / INVAL_PAGE_SIZE // each array has len: sizeof(mem) / INVAL_PAGE_SIZE
static struct block_list **inval_lookup[TCACHE_BUFFERS]; static struct block_list **inval_lookup[TCACHE_BUFFERS];
#define HASH_TABLE_SIZE(tcid) ((tcid) ? 256 : 64*256) #define HASH_TABLE_SIZE(tcid) ((tcid) ? 512 : 64*512)
static struct block_entry **hash_tables[TCACHE_BUFFERS]; static struct block_entry **hash_tables[TCACHE_BUFFERS];
#define HASH_FUNC(hash_tab, addr, mask) \ #define HASH_FUNC(hash_tab, addr, mask) \
@ -5188,20 +5188,14 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift)
#endif #endif
} }
void sh2_drc_wcheck_ram(unsigned int a, unsigned t, SH2 *sh2) void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2)
{ {
int off = ((u16) t ? 0 : 2); sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT);
int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0);
sh2_smc_rm_blocks(a + off, len, 0, SH2_DRCBLK_RAM_SHIFT);
} }
void sh2_drc_wcheck_da(unsigned int a, unsigned t, SH2 *sh2) void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2)
{ {
int off = ((u16) t ? 0 : 2); sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT);
int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0);
sh2_smc_rm_blocks(a + off, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT);
} }
int sh2_execute_drc(SH2 *sh2c, int cycles) int sh2_execute_drc(SH2 *sh2c, int cycles)
@ -6403,6 +6397,9 @@ end:
last_btarget = 0; last_btarget = 0;
op = 0; // delay/poll insns counter op = 0; // delay/poll insns counter
for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) {
int null;
if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null))
break; // branch target already compiled
opd = &ops[i]; opd = &ops[i];
crc += FETCH_OP(pc); crc += FETCH_OP(pc);
@ -6483,7 +6480,7 @@ end:
op ++; // condition 2 op ++; // condition 2
#endif #endif
} }
end_pc = base_pc + i_end * 2; end_pc = pc;
// end_literals is used to decide to inline a literal or not // end_literals is used to decide to inline a literal or not
// XXX: need better detection if this actually is used in write // XXX: need better detection if this actually is used in write

View file

@ -1,7 +1,7 @@
int sh2_drc_init(SH2 *sh2); int sh2_drc_init(SH2 *sh2);
void sh2_drc_finish(SH2 *sh2); void sh2_drc_finish(SH2 *sh2);
void sh2_drc_wcheck_ram(unsigned int a, unsigned val, SH2 *sh2); void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2);
void sh2_drc_wcheck_da(unsigned int a, unsigned val, SH2 *sh2); void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2);
#ifdef DRC_SH2 #ifdef DRC_SH2
void sh2_drc_mem_setup(SH2 *sh2); void sh2_drc_mem_setup(SH2 *sh2);

View file

@ -162,15 +162,13 @@ void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles)
sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0;
} }
static void sh2s_sync_on_read(SH2 *sh2) static void sh2s_sync_on_read(SH2 *sh2, unsigned cycles)
{ {
int cycles;
if (sh2->poll_cnt != 0) if (sh2->poll_cnt != 0)
return; return;
cycles = sh2_cycles_done(sh2); if (p32x_sh2_ready(sh2->other_sh2, cycles-250))
if (cycles > 600) p32x_sync_other_sh2(sh2, cycles);
p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles));
} }
// poll fifo, stores writes to potential addresses used for polling. // poll fifo, stores writes to potential addresses used for polling.
@ -271,8 +269,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2)
DRC_SAVE_SR(sh2); DRC_SAVE_SR(sh2);
// is this a synchronisation address? // is this a synchronisation address?
if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) {
sh2s_sync_on_read(sh2);
cycles = sh2_cycles_done_m68k(sh2); cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
// check poll fifo and sign-extend the result correctly // check poll fifo and sign-extend the result correctly
d = (s16)sh2_poll_read(a, d, cycles, sh2); d = (s16)sh2_poll_read(a, d, cycles, sh2);
} }
@ -291,8 +289,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2)
DRC_SAVE_SR(sh2); DRC_SAVE_SR(sh2);
// is this a synchronisation address? // is this a synchronisation address?
if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) {
sh2s_sync_on_read(sh2);
cycles = sh2_cycles_done_m68k(sh2); cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
// check poll fifo and sign-extend the result correctly // check poll fifo and sign-extend the result correctly
d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) | d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) |
((u16)sh2_poll_read(a+2, d, cycles, sh2)); ((u16)sh2_poll_read(a+2, d, cycles, sh2));
@ -729,6 +727,7 @@ static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2)
static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
{ {
u16 *r = Pico32x.regs; u16 *r = Pico32x.regs;
unsigned cycles;
a &= 0x3e; a &= 0x3e;
switch (a/2) { switch (a/2) {
@ -737,8 +736,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
| Pico32x.sh2irq_mask[sh2->is_slave]; | Pico32x.sh2irq_mask[sh2->is_slave];
case 0x04/2: // H count (often as comm too) case 0x04/2: // H count (often as comm too)
sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2);
return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], sh2_cycles_done_m68k(sh2), sh2); sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2);
case 0x06/2: case 0x06/2:
return (r[a / 2] & ~P32XS_FULL) | 0x4000; return (r[a / 2] & ~P32XS_FULL) | 0x4000;
case 0x08/2: // DREQ src case 0x08/2: // DREQ src
@ -770,8 +770,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
case 0x2c/2: case 0x2c/2:
case 0x2e/2: case 0x2e/2:
sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2);
return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, r[a / 2], cycles, sh2);
case 0x30/2: // PWM case 0x30/2: // PWM
case 0x32/2: case 0x32/2:
case 0x34/2: case 0x34/2:
@ -825,7 +826,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2)
unsigned int cycles = sh2_cycles_done_m68k(sh2); unsigned int cycles = sh2_cycles_done_m68k(sh2);
Pico32x.sh2_regs[4 / 2] = d; Pico32x.sh2_regs[4 / 2] = d;
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 4); sh2_end_run(sh2, 4);
sh2_poll_write(a & ~1, d, cycles, sh2); sh2_poll_write(a & ~1, d, cycles, sh2);
} }
@ -852,7 +853,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2)
REG8IN16(r, a) = d; REG8IN16(r, a) = d;
p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_m68k_poll_event(P32XF_68KCPOLL);
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 1); sh2_end_run(sh2, 1);
sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); sh2_poll_write(a & ~1, r[a / 2], cycles, sh2);
} }
@ -945,7 +946,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2)
Pico32x.regs[a / 2] = d; Pico32x.regs[a / 2] = d;
p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_m68k_poll_event(P32XF_68KCPOLL);
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 1); sh2_end_run(sh2, 1);
sh2_poll_write(a, d, cycles, sh2); sh2_poll_write(a, d, cycles, sh2);
} }
@ -1580,7 +1581,7 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2)
cycles = sh2_cycles_done_m68k(sh2); cycles = sh2_cycles_done_m68k(sh2);
sh2_poll_write(a, d, cycles, sh2); sh2_poll_write(a, d, cycles, sh2);
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 1); sh2_end_run(sh2, 1);
DRC_RESTORE_SR(sh2); DRC_RESTORE_SR(sh2);
} }
@ -1588,27 +1589,25 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2)
void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t) void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t)
{ {
if (t & 0x80) sh2_sdram_poll(a, d, sh2); if (t & 0x80) sh2_sdram_poll(a, d, sh2);
if (t & 0x7f) sh2_drc_wcheck_ram(a, t & 0x7f, sh2); if (t & 0x7f) sh2_drc_wcheck_ram(a, 2, sh2);
} }
void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t) void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t)
{ {
u32 m = 0x80 | 0x800000;
if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2); if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2);
if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2); if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2);
if (t & ~m) sh2_drc_wcheck_ram(a, t & ~m, sh2); if (t & ~0x800080) sh2_drc_wcheck_ram(a, 4, sh2);
} }
#ifndef _ASM_32X_MEMORY_C #ifndef _ASM_32X_MEMORY_C
static void sh2_da_checks(u32 a, u32 t, SH2 *sh2) static void sh2_da_checks(u32 a, u32 t, SH2 *sh2)
{ {
sh2_drc_wcheck_da(a, t, sh2); sh2_drc_wcheck_da(a, 2, sh2);
} }
static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2) static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2)
{ {
sh2_drc_wcheck_da(a, t, sh2); sh2_drc_wcheck_da(a, 4, sh2);
} }
#endif #endif
#endif #endif

View file

@ -139,12 +139,11 @@ sh2_write8_sdram:
mov r3, r3, lsl #SH2_RAM_SHIFT mov r3, r3, lsl #SH2_RAM_SHIFT
strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] strb r1, [ip, r3, lsr #SH2_RAM_SHIFT]
#ifdef DRC_SH2 #ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_ram] ldr r1, [r2, #OFS_SH2_p_drcblk_ram]
ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1] ldrb r3, [r1, r3, lsr #SH2_RAM_SHIFT+1]
cmp r3, #0 cmp r3, #0
bxeq lr bxeq lr
@ need to load aligned 16 bit data for check @ need to load aligned 16 bit data for check
ldr ip, [r2, #OFS_SH2_p_sdram]
bic r0, r0, #1 bic r0, r0, #1
mov r1, r0, lsl #SH2_RAM_SHIFT mov r1, r0, lsl #SH2_RAM_SHIFT
mov r1, r1, lsr #SH2_RAM_SHIFT mov r1, r1, lsr #SH2_RAM_SHIFT
@ -166,6 +165,7 @@ sh2_write8_da:
bic r0, r0, #1 bic r0, r0, #1
cmp r1, #0 cmp r1, #0
bxeq lr bxeq lr
mov r1, #2
b sh2_drc_wcheck_da b sh2_drc_wcheck_da
#else #else
bx lr bx lr
@ -206,6 +206,7 @@ sh2_write16_da:
ldrb r1, [ip, r3, lsr #1] ldrb r1, [ip, r3, lsr #1]
cmp r1, #0 cmp r1, #0
bxeq lr bxeq lr
mov r1, #2
b sh2_drc_wcheck_da b sh2_drc_wcheck_da
#else #else
bx lr bx lr
@ -256,6 +257,7 @@ sh2_write32_da:
ldrb ip, [ip, #1] ldrb ip, [ip, #1]
orrs r1, r1, ip, lsl #16 orrs r1, r1, ip, lsl #16
bxeq lr bxeq lr
mov r1, #4
b sh2_drc_wcheck_da b sh2_drc_wcheck_da
#else #else
bx lr bx lr