32x, finetuning

This commit is contained in:
kub 2019-10-19 08:53:28 +02:00
parent 1fd8f98696
commit 7e940f142e
4 changed files with 41 additions and 43 deletions

View file

@ -272,9 +272,9 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
// and can be discarded early
// XXX: need to tune sizes
static const int tcache_sizes[TCACHE_BUFFERS] = {
DRC_TCACHE_SIZE * 14 / 16, // ROM (rarely used), DRAM
DRC_TCACHE_SIZE / 16, // BIOS, data array in master sh2
DRC_TCACHE_SIZE / 16, // ... slave
DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM
DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2
DRC_TCACHE_SIZE / 32, // ... slave
};
static u8 *tcache_bases[TCACHE_BUFFERS];
@ -332,13 +332,13 @@ struct block_desc {
struct block_entry entryp[MAX_BLOCK_ENTRIES];
};
#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 16*256)
#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 32*256)
static struct block_desc *block_tables[TCACHE_BUFFERS];
static int block_counts[TCACHE_BUFFERS];
static int block_limit[TCACHE_BUFFERS];
// we have block_link_pool to avoid using mallocs
#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 1024 : 16*1024)
#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 512 : 32*512)
static struct block_link *block_link_pool[TCACHE_BUFFERS];
static int block_link_pool_counts[TCACHE_BUFFERS];
static struct block_link **unresolved_links[TCACHE_BUFFERS];
@ -363,7 +363,7 @@ static struct block_list *inactive_blocks[TCACHE_BUFFERS];
// each array has len: sizeof(mem) / INVAL_PAGE_SIZE
static struct block_list **inval_lookup[TCACHE_BUFFERS];
#define HASH_TABLE_SIZE(tcid) ((tcid) ? 256 : 64*256)
#define HASH_TABLE_SIZE(tcid) ((tcid) ? 512 : 64*512)
static struct block_entry **hash_tables[TCACHE_BUFFERS];
#define HASH_FUNC(hash_tab, addr, mask) \
@ -5188,20 +5188,14 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift)
#endif
}
void sh2_drc_wcheck_ram(unsigned int a, unsigned t, SH2 *sh2)
void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2)
{
int off = ((u16) t ? 0 : 2);
int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0);
sh2_smc_rm_blocks(a + off, len, 0, SH2_DRCBLK_RAM_SHIFT);
sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT);
}
void sh2_drc_wcheck_da(unsigned int a, unsigned t, SH2 *sh2)
void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2)
{
int off = ((u16) t ? 0 : 2);
int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0);
sh2_smc_rm_blocks(a + off, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT);
sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT);
}
int sh2_execute_drc(SH2 *sh2c, int cycles)
@ -6403,6 +6397,9 @@ end:
last_btarget = 0;
op = 0; // delay/poll insns counter
for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) {
int null;
if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null))
break; // branch target already compiled
opd = &ops[i];
crc += FETCH_OP(pc);
@ -6483,7 +6480,7 @@ end:
op ++; // condition 2
#endif
}
end_pc = base_pc + i_end * 2;
end_pc = pc;
// end_literals is used to decide to inline a literal or not
// XXX: need better detection if this actually is used in write

View file

@ -1,7 +1,7 @@
int sh2_drc_init(SH2 *sh2);
void sh2_drc_finish(SH2 *sh2);
void sh2_drc_wcheck_ram(unsigned int a, unsigned val, SH2 *sh2);
void sh2_drc_wcheck_da(unsigned int a, unsigned val, SH2 *sh2);
void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2);
void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2);
#ifdef DRC_SH2
void sh2_drc_mem_setup(SH2 *sh2);

View file

@ -162,15 +162,13 @@ void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles)
sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0;
}
static void sh2s_sync_on_read(SH2 *sh2)
static void sh2s_sync_on_read(SH2 *sh2, unsigned cycles)
{
int cycles;
if (sh2->poll_cnt != 0)
return;
cycles = sh2_cycles_done(sh2);
if (cycles > 600)
p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles));
if (p32x_sh2_ready(sh2->other_sh2, cycles-250))
p32x_sync_other_sh2(sh2, cycles);
}
// poll fifo, stores writes to potential addresses used for polling.
@ -271,8 +269,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2)
DRC_SAVE_SR(sh2);
// is this a synchronisation address?
if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) {
sh2s_sync_on_read(sh2);
cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
// check poll fifo and sign-extend the result correctly
d = (s16)sh2_poll_read(a, d, cycles, sh2);
}
@ -291,8 +289,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2)
DRC_SAVE_SR(sh2);
// is this a synchronisation address?
if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) {
sh2s_sync_on_read(sh2);
cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
// check poll fifo and sign-extend the result correctly
d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) |
((u16)sh2_poll_read(a+2, d, cycles, sh2));
@ -729,6 +727,7 @@ static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2)
static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
{
u16 *r = Pico32x.regs;
unsigned cycles;
a &= 0x3e;
switch (a/2) {
@ -737,8 +736,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
| Pico32x.sh2irq_mask[sh2->is_slave];
case 0x04/2: // H count (often as comm too)
sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
sh2s_sync_on_read(sh2);
return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], sh2_cycles_done_m68k(sh2), sh2);
cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2);
case 0x06/2:
return (r[a / 2] & ~P32XS_FULL) | 0x4000;
case 0x08/2: // DREQ src
@ -770,8 +770,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
case 0x2c/2:
case 0x2e/2:
sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
sh2s_sync_on_read(sh2);
return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2);
cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, r[a / 2], cycles, sh2);
case 0x30/2: // PWM
case 0x32/2:
case 0x34/2:
@ -825,7 +826,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2)
unsigned int cycles = sh2_cycles_done_m68k(sh2);
Pico32x.sh2_regs[4 / 2] = d;
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16))
if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 4);
sh2_poll_write(a & ~1, d, cycles, sh2);
}
@ -852,7 +853,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2)
REG8IN16(r, a) = d;
p32x_m68k_poll_event(P32XF_68KCPOLL);
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16))
if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 1);
sh2_poll_write(a & ~1, r[a / 2], cycles, sh2);
}
@ -945,7 +946,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2)
Pico32x.regs[a / 2] = d;
p32x_m68k_poll_event(P32XF_68KCPOLL);
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16))
if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 1);
sh2_poll_write(a, d, cycles, sh2);
}
@ -1580,7 +1581,7 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2)
cycles = sh2_cycles_done_m68k(sh2);
sh2_poll_write(a, d, cycles, sh2);
p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles);
if (p32x_sh2_ready(sh2->other_sh2, cycles+16))
if (p32x_sh2_ready(sh2->other_sh2, cycles+8))
sh2_end_run(sh2, 1);
DRC_RESTORE_SR(sh2);
}
@ -1588,27 +1589,25 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2)
void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t)
{
if (t & 0x80) sh2_sdram_poll(a, d, sh2);
if (t & 0x7f) sh2_drc_wcheck_ram(a, t & 0x7f, sh2);
if (t & 0x7f) sh2_drc_wcheck_ram(a, 2, sh2);
}
void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t)
{
u32 m = 0x80 | 0x800000;
if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2);
if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2);
if (t & ~m) sh2_drc_wcheck_ram(a, t & ~m, sh2);
if (t & ~0x800080) sh2_drc_wcheck_ram(a, 4, sh2);
}
#ifndef _ASM_32X_MEMORY_C
static void sh2_da_checks(u32 a, u32 t, SH2 *sh2)
{
sh2_drc_wcheck_da(a, t, sh2);
sh2_drc_wcheck_da(a, 2, sh2);
}
static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2)
{
sh2_drc_wcheck_da(a, t, sh2);
sh2_drc_wcheck_da(a, 4, sh2);
}
#endif
#endif

View file

@ -139,12 +139,11 @@ sh2_write8_sdram:
mov r3, r3, lsl #SH2_RAM_SHIFT
strb r1, [ip, r3, lsr #SH2_RAM_SHIFT]
#ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_ram]
ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1]
ldr r1, [r2, #OFS_SH2_p_drcblk_ram]
ldrb r3, [r1, r3, lsr #SH2_RAM_SHIFT+1]
cmp r3, #0
bxeq lr
@ need to load aligned 16 bit data for check
ldr ip, [r2, #OFS_SH2_p_sdram]
bic r0, r0, #1
mov r1, r0, lsl #SH2_RAM_SHIFT
mov r1, r1, lsr #SH2_RAM_SHIFT
@ -166,6 +165,7 @@ sh2_write8_da:
bic r0, r0, #1
cmp r1, #0
bxeq lr
mov r1, #2
b sh2_drc_wcheck_da
#else
bx lr
@ -206,6 +206,7 @@ sh2_write16_da:
ldrb r1, [ip, r3, lsr #1]
cmp r1, #0
bxeq lr
mov r1, #2
b sh2_drc_wcheck_da
#else
bx lr
@ -256,6 +257,7 @@ sh2_write32_da:
ldrb ip, [ip, #1]
orrs r1, r1, ip, lsl #16
bxeq lr
mov r1, #4
b sh2_drc_wcheck_da
#else
bx lr