From 52055c13b253cce969a24fa2b95eb9c39ac7ea79 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Oct 2019 21:54:37 +0200 Subject: [PATCH] sh2 drc: reorganised block mgmt code, plus some small scale optimisations --- cpu/sh2/compiler.c | 721 ++++++++++++++++++------------------ cpu/sh2/compiler.h | 4 +- cpu/sh2/sh2.h | 2 +- pico/32x/memory.c | 56 +-- pico/32x/memory_arm.S | 15 +- pico/pico_int.h | 4 + platform/gp2x/PicoDrive.gpe | 2 + 7 files changed, 410 insertions(+), 394 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 86d4b85a..1acc7215 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -764,58 +764,16 @@ static void rm_from_block_lists(struct block_desc *block) block->list = NULL; } -static void rm_block_list(struct block_list **blist) +static void discard_block_list(struct block_list **blist) { - while (*blist != NULL) - rm_from_block_lists((*blist)->block); -} - -static void REGPARM(1) flush_tcache(int tcid) -{ - int i; -#if (DRC_DEBUG & 1) - int tc_used, bl_used; - - tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); - bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); - elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, - tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); -#endif - - block_counts[tcid] = 0; - block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; - block_link_pool_counts[tcid] = 0; - blink_free[tcid] = NULL; - memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); - memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); - tcache_ptrs[tcid] = tcache_bases[tcid]; - tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; - if (Pico32xMem->sdram != NULL) { - if (tcid == 0) { // ROM, RAM - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); - memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); - sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; - } else { - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); - memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); - memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - sh2s[tcid - 1].rts_cache_idx = 0; - } + struct block_list *next, *current = *blist; + while (current != NULL) { + next = current->next; + current->next = blist_free; + blist_free = current; + current = next; } -#if (DRC_DEBUG & 4) - tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; -#endif - - for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) - rm_block_list(&inval_lookup[tcid][i]); - rm_block_list(&inactive_blocks[tcid]); + *blist = NULL; } static void add_to_hashlist(struct block_entry *be, int tcache_id) @@ -902,243 +860,6 @@ static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) bl->next->prev = bl->prev; } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free); -static void dr_free_oldest_block(int tcache_id) -{ - struct block_desc *bd; - - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { - // block desc wrap around - block_limit[tcache_id] = 0; - } - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - - if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { - // cache wrap around - tcache_ptrs[tcache_id] = bd->tcache_ptr; - } - - if (bd->addr && bd->entry_count) - sh2_smc_rm_block_entry(bd, tcache_id, 0, 1); - - block_limit[tcache_id]++; - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) - block_limit[tcache_id] = 0; - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) - tcache_limit[tcache_id] = bd->tcache_ptr; - else - tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; -} - -static u8 *dr_prepare_cache(int tcache_id, int insn_count) -{ - u8 *limit = tcache_limit[tcache_id]; - - // if no block desc available - if (block_counts[tcache_id] == block_limit[tcache_id]) - dr_free_oldest_block(tcache_id); - - // while not enough cache space left (limit - tcache_ptr < max space needed) - while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) - dr_free_oldest_block(tcache_id); - - if (limit != tcache_limit[tcache_id]) { -#if BRANCH_CACHE - if (tcache_id) - memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - else { - memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); - } -#endif -#if CALL_STACK - if (tcache_id) { - memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); - sh2s[tcache_id-1].rts_cache_idx = 0; - } else { - memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); - memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); - sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; - } -#endif - } - return (u8 *)tcache_ptrs[tcache_id]; -} - -static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit) -{ - u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL; - u32 addr, end, mask = 0, shift = 0, idx; - - // mark memory blocks as containing compiled code - if ((block->addr & 0xc7fc0000) == 0x06000000 - || (block->addr & 0xfffff000) == 0xc0000000) - { - if (tcache_id != 0) { - // data array - drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1]; - lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; - shift = SH2_DRCBLK_DA_SHIFT; - } - else { - // SDRAM - drc_ram_blk = Pico32xMem->drcblk_ram; - lit_ram_blk = Pico32xMem->drclit_ram; - shift = SH2_DRCBLK_RAM_SHIFT; - } - mask = RAM_SIZE(tcache_id) - 1; - - // mark recompiled insns - addr = block->addr & ~((1 << shift) - 1); - end = block->addr + block->size; - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - drc_ram_blk[idx++] += mark; - - // mark literal pool - if (addr < (block->addr_lit & ~((1 << shift) - 1))) - addr = block->addr_lit & ~((1 << shift) - 1); - end = block->addr_lit + block->size_lit; - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - drc_ram_blk[idx++] += mark; - - // mark for literals disabled - if (nolit) { - addr = nolit & ~((1 << shift) - 1); - end = block->addr_lit + block->size_lit; - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - lit_ram_blk[idx++] = 1; - } - - if (mark < 0) - rm_from_block_lists(block); - else { - // add to invalidation lookup lists - addr = block->addr & ~(INVAL_PAGE_SIZE - 1); - end = block->addr + block->size; - for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) - add_to_block_list(&inval_lookup[tcache_id][idx++], block); - - if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1))) - addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1); - end = block->addr_lit + block->size_lit; - for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) - add_to_block_list(&inval_lookup[tcache_id][idx++], block); - } - } -} - -static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) -{ - u8 *lit_ram_blk = NULL; - u32 mask = 0, shift = 0, addr, idx; - - if ((start & 0xc7fc0000) == 0x06000000 - || (start & 0xfffff000) == 0xc0000000) - { - if (tcache_id != 0) { - // data array - lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; - shift = SH2_DRCBLK_DA_SHIFT; - } - else { - // SDRAM - lit_ram_blk = Pico32xMem->drclit_ram; - shift = SH2_DRCBLK_RAM_SHIFT; - } - mask = RAM_SIZE(tcache_id) - 1; - - addr = start & ~((1 << shift) - 1); - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - if (lit_ram_blk[idx++]) - break; - - return (addr < start ? start : addr > end ? end : addr); - } - - return end; -} - -static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, - u32 addr, int size, u32 addr_lit, int size_lit) -{ - struct block_list **head = &inactive_blocks[tcache_id]; - struct block_list *current; - - for (current = *head; current != NULL; current = current->next) { - struct block_desc *block = current->block; - if (block->crc == crc && block->addr == addr && block->size == size && - block->addr_lit == addr_lit && block->size_lit == size_lit) - { - rm_from_block_lists(block); - return block; - } - } - return NULL; -} - -static struct block_desc *dr_add_block(u32 addr, int size, - u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) -{ - struct block_entry *be; - struct block_desc *bd; - int tcache_id; - int *bcount; - - // do a lookup to get tcache_id and override check - be = dr_get_entry(addr, is_slave, &tcache_id); - if (be != NULL) - dbg(1, "block override for %08x", addr); - - bcount = &block_counts[tcache_id]; - if (*bcount == block_limit[tcache_id]) { - dbg(1, "bd overflow for tcache %d", tcache_id); - return NULL; - } - - bd = &block_tables[tcache_id][*bcount]; - bd->addr = addr; - bd->size = size; - bd->addr_lit = addr_lit; - bd->size_lit = size_lit; - bd->tcache_ptr = tcache_ptr; - bd->crc = crc; - bd->active = 0; - bd->entry_count = 0; -#if (DRC_DEBUG & 2) - bd->refcount = 0; -#endif - - *blk_id = *bcount; - (*bcount)++; - if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) - *bcount = 0; - - return bd; -} - -static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) -{ - struct block_entry *be = NULL; - void *block = NULL; - - be = dr_get_entry(pc, sh2->is_slave, tcache_id); - if (be != NULL) - block = be->tcache_ptr; - -#if (DRC_DEBUG & 2) - if (be != NULL) - be->block->refcount++; -#endif - return block; -} - -static void *dr_failure(void) -{ - lprintf("recompilation failed\n"); - exit(1); -} - #if LINK_BRANCHES static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump) { @@ -1262,6 +983,212 @@ static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 p #endif } +static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit) +{ + u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL; + u32 addr, end, mask = 0, shift = 0, idx; + + // mark memory blocks as containing compiled code + if ((block->addr & 0xc7fc0000) == 0x06000000 + || (block->addr & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1]; + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + drc_ram_blk = Pico32xMem->drcblk_ram; + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = RAM_SIZE(tcache_id) - 1; + + // mark recompiled insns + addr = block->addr & ~((1 << shift) - 1); + end = block->addr + block->size; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark literal pool + if (addr < (block->addr_lit & ~((1 << shift) - 1))) + addr = block->addr_lit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark for literals disabled + if (nolit) { + addr = nolit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + lit_ram_blk[idx++] = 1; + } + + if (mark < 0) + rm_from_block_lists(block); + else { + // add to invalidation lookup lists + addr = block->addr & ~(INVAL_PAGE_SIZE - 1); + end = block->addr + block->size; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + + if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1))) + addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + } + } +} + +static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) +{ + u8 *lit_ram_blk = NULL; + u32 mask = 0, shift = 0, addr, idx; + + if ((start & 0xc7fc0000) == 0x06000000 + || (start & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = RAM_SIZE(tcache_id) - 1; + + addr = start & ~((1 << shift) - 1); + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + if (lit_ram_blk[idx++]) + break; + + return (addr < start ? start : addr > end ? end : addr); + } + + return end; +} + +static void dr_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) +{ + struct block_link *bl; + u32 i; + + free = free || nolit; // block is invalid if literals are overwritten + dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", + bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, + tcache_id, bd - block_tables[tcache_id]); + if (bd->addr == 0 || bd->entry_count == 0) { + dbg(1, " killing dead block!? %08x", bd->addr); + return; + } + +#if LINK_BRANCHES + // remove from hash table, make incoming links unresolved + if (bd->active) { + for (i = 0; i < bd->entry_count; i++) { + rm_from_hashlist(&bd->entryp[i], tcache_id); + + while ((bl = bd->entryp[i].links) != NULL) { + dr_block_unlink(bl, 1); + add_to_hashlist_unresolved(bl, tcache_id); + } + } + + dr_mark_memory(-1, bd, tcache_id, nolit); + add_to_block_list(&inactive_blocks[tcache_id], bd); + } + bd->active = 0; +#endif + + if (free) { +#if LINK_BRANCHES + // revoke outgoing links + for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { + if (bl->target) + dr_block_unlink(bl, 0); + else + rm_from_hashlist_unresolved(bl, tcache_id); + bl->jump = NULL; + bl->next = blink_free[bl->tcache_id]; + blink_free[bl->tcache_id] = bl; + } + bd->entryp[0].o_links = NULL; +#endif + // invalidate block + rm_from_block_lists(bd); + bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; + bd->entry_count = 0; + } + emith_update_cache(); +} + +static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, + u32 addr, int size, u32 addr_lit, int size_lit) +{ + struct block_list **head = &inactive_blocks[tcache_id]; + struct block_list *current; + + for (current = *head; current != NULL; current = current->next) { + struct block_desc *block = current->block; + if (block->crc == crc && block->addr == addr && block->size == size && + block->addr_lit == addr_lit && block->size_lit == size_lit) + { + rm_from_block_lists(block); + return block; + } + } + return NULL; +} + +static struct block_desc *dr_add_block(u32 addr, int size, + u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) +{ + struct block_entry *be; + struct block_desc *bd; + int tcache_id; + int *bcount; + + // do a lookup to get tcache_id and override check + be = dr_get_entry(addr, is_slave, &tcache_id); + if (be != NULL) + dbg(1, "block override for %08x", addr); + + bcount = &block_counts[tcache_id]; + if (*bcount == block_limit[tcache_id]) { + dbg(1, "bd overflow for tcache %d", tcache_id); + return NULL; + } + + bd = &block_tables[tcache_id][*bcount]; + bd->addr = addr; + bd->size = size; + bd->addr_lit = addr_lit; + bd->size_lit = size_lit; + bd->tcache_ptr = tcache_ptr; + bd->crc = crc; + bd->active = 0; + bd->list = NULL; + bd->entry_count = 0; +#if (DRC_DEBUG & 2) + bd->refcount = 0; +#endif + + *blk_id = *bcount; + (*bcount)++; + if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) + *bcount = 0; + + return bd; +} + static void dr_link_blocks(struct block_entry *be, int tcache_id) { #if LINK_BRANCHES @@ -1321,6 +1248,139 @@ static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave bd->active = 1; } +static void REGPARM(3) ALIGNED(32) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) +{ + struct block_entry *be = NULL; + void *block = NULL; + + be = dr_get_entry(pc, sh2->is_slave, tcache_id); + if (be != NULL) + block = be->tcache_ptr; + +#if (DRC_DEBUG & 2) + if (be != NULL) + be->block->refcount++; +#endif + return block; +} + +static void dr_free_oldest_block(int tcache_id) +{ + struct block_desc *bd; + + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { + // block desc wrap around + block_limit[tcache_id] = 0; + } + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + + if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { + // cache wrap around + tcache_ptrs[tcache_id] = bd->tcache_ptr; + } + + if (bd->addr && bd->entry_count) + dr_rm_block_entry(bd, tcache_id, 0, 1); + + block_limit[tcache_id]++; + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) + block_limit[tcache_id] = 0; + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) + tcache_limit[tcache_id] = bd->tcache_ptr; + else + tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; +} + +static u8 *dr_prepare_cache(int tcache_id, int insn_count) +{ + u8 *limit = tcache_limit[tcache_id]; + + // if no block desc available + if (block_counts[tcache_id] == block_limit[tcache_id]) + dr_free_oldest_block(tcache_id); + + // while not enough cache space left (limit - tcache_ptr < max space needed) + while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) + dr_free_oldest_block(tcache_id); + + if (limit != tcache_limit[tcache_id]) { +#if BRANCH_CACHE + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + } +#endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif + } + return (u8 *)tcache_ptrs[tcache_id]; +} + +static void dr_flush_tcache(int tcid) +{ + int i; +#if (DRC_DEBUG & 1) + int tc_used, bl_used; + + tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); + bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); + elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, + tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); +#endif + + block_counts[tcid] = 0; + block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; + block_link_pool_counts[tcid] = 0; + blink_free[tcid] = NULL; + memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); + memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); + tcache_ptrs[tcid] = tcache_bases[tcid]; + tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; + if (Pico32xMem->sdram != NULL) { + if (tcid == 0) { // ROM, RAM + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); + memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } else { + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); + memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); + memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + sh2s[tcid - 1].rts_cache_idx = 0; + } + } +#if (DRC_DEBUG & 4) + tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; +#endif + + for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) + discard_block_list(&inval_lookup[tcid][i]); + discard_block_list(&inactive_blocks[tcid]); +} + +static void *dr_failure(void) +{ + lprintf("recompilation failed\n"); + exit(1); +} + #define ADD_TO_ARRAY(array, count, item, failcode) { \ if (count >= ARRAY_SIZE(array)) { \ dbg(1, "warning: " #array " overflow"); \ @@ -5066,61 +5126,7 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) -{ - struct block_link *bl; - u32 i; - - free = free || nolit; // block is invalid if literals are overwritten - dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", - bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, - tcache_id, bd - block_tables[tcache_id]); - if (bd->addr == 0 || bd->entry_count == 0) { - dbg(1, " killing dead block!? %08x", bd->addr); - return; - } - -#if LINK_BRANCHES - // remove from hash table, make incoming links unresolved - if (bd->active) { - for (i = 0; i < bd->entry_count; i++) { - rm_from_hashlist(&bd->entryp[i], tcache_id); - - while ((bl = bd->entryp[i].links) != NULL) { - dr_block_unlink(bl, 1); - add_to_hashlist_unresolved(bl, tcache_id); - } - } - - dr_mark_memory(-1, bd, tcache_id, nolit); - add_to_block_list(&inactive_blocks[tcache_id], bd); - } - bd->active = 0; -#endif - - if (free) { -#if LINK_BRANCHES - // revoke outgoing links - for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { - if (bl->target) - dr_block_unlink(bl, 0); - else - rm_from_hashlist_unresolved(bl, tcache_id); - bl->jump = NULL; - bl->next = blink_free[bl->tcache_id]; - blink_free[bl->tcache_id] = bl; - } - bd->entryp[0].o_links = NULL; -#endif - // invalidate block - rm_from_block_lists(bd); - bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; - bd->entry_count = 0; - } - emith_update_cache(); -} - -static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) +static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) { struct block_list **blist, *entry, *next; u32 mask = RAM_SIZE(tcache_id) - 1; @@ -5146,12 +5152,12 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) start_lit = block->addr_lit & wtmask; end_lit = start_lit + block->size_lit; // disable/delete block if it covers the modified address - if ((start_addr <= a && a < end_addr) || - (start_lit <= a && a < end_lit)) + if ((start_addr <= a+len && a < end_addr) || + (start_lit <= a+len && a < end_lit)) { dbg(2, "smc remove @%08x", a); - end_addr = (start_lit <= a && block->size_lit ? a : 0); - sh2_smc_rm_block_entry(block, tcache_id, end_addr, 0); + end_addr = (start_lit <= a+len && block->size_lit ? a : 0); + dr_rm_block_entry(block, tcache_id, end_addr, 0); #if (DRC_DEBUG & 2) removed = 1; #endif @@ -5182,17 +5188,20 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) #endif } -void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2) +void sh2_drc_wcheck_ram(unsigned int a, unsigned t, SH2 *sh2) { - dbg(2, "%csh2 smc check @%08x v=%d", sh2->is_slave ? 's' : 'm', a, val); - sh2_smc_rm_blocks(a, 0, SH2_DRCBLK_RAM_SHIFT); + int off = ((u16) t ? 0 : 2); + int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); + + sh2_smc_rm_blocks(a + off, len, 0, SH2_DRCBLK_RAM_SHIFT); } -void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2) +void sh2_drc_wcheck_da(unsigned int a, unsigned t, SH2 *sh2) { - int cpuid = sh2->is_slave; - dbg(2, "%csh2 smc check @%08x v=%d", cpuid ? 's' : 'm', a, val); - sh2_smc_rm_blocks(a, 1 + cpuid, SH2_DRCBLK_DA_SHIFT); + int off = ((u16) t ? 0 : 2); + int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); + + sh2_smc_rm_blocks(a + off, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); } int sh2_execute_drc(SH2 *sh2c, int cycles) @@ -5408,9 +5417,9 @@ void sh2_drc_flush_all(void) block_stats(); entry_stats(); bcache_stats(); - flush_tcache(0); - flush_tcache(1); - flush_tcache(2); + dr_flush_tcache(0); + dr_flush_tcache(1); + dr_flush_tcache(2); Pico32x.emu_flags &= ~P32XF_DRC_ROM_C; } diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 3565940d..94dff8c5 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,7 +1,7 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2); -void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2); +void sh2_drc_wcheck_ram(unsigned int a, unsigned val, SH2 *sh2); +void sh2_drc_wcheck_da(unsigned int a, unsigned val, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index cf830dfc..57693ac1 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -80,7 +80,7 @@ typedef struct SH2_ unsigned char data_array[0x1000]; // cache (can be used as RAM) unsigned int peri_regs[0x200/4]; // periphereal regs -} SH2; +} SH2 ALIGNED(32); #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 06215a7c..39504416 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -231,7 +231,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) for (idx = nrd = wr; idx != rd; ) { idx = (idx-1) % PFIFO_SZ; q = &fifo[idx]; - if (q->cpu != cpu && q->a == a) { q->a = -1; } + if (q->a == a && q->cpu != cpu) { q->a = -1; } if (q->a != -1) { nrd = idx; } } rd = nrd; @@ -825,7 +825,8 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 4); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 4); sh2_poll_write(a & ~1, d, cycles, sh2); } return; @@ -851,7 +852,8 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) REG8IN16(r, a) = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); } return; @@ -943,7 +945,8 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) Pico32x.regs[a / 2] = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); sh2_poll_write(a, d, cycles, sh2); } return; @@ -1569,7 +1572,7 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) // writes #ifdef DRC_SH2 -static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) +static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) { unsigned cycles; @@ -1577,34 +1580,35 @@ static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) cycles = sh2_cycles_done_m68k(sh2); sh2_poll_write(a, d, cycles, sh2); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); DRC_RESTORE_SR(sh2); } -void NOINLINE sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, int t) +void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t) { - if (t & 0x80) - sh2_sdram_poll(a, d, sh2); - if (t & 0x7f) - sh2_drc_wcheck_ram(a, t & 0x7f, sh2); + if (t & 0x80) sh2_sdram_poll(a, d, sh2); + if (t & 0x7f) sh2_drc_wcheck_ram(a, t & 0x7f, sh2); } -void NOINLINE sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, int t) +void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t) { - sh2_sdram_checks(a, d>>16, sh2, t); - sh2_sdram_checks(a+2, d, sh2, t>>16); + u32 m = 0x80 | 0x800000; + + if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2); + if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2); + if (t & ~m) sh2_drc_wcheck_ram(a, t & ~m, sh2); } #ifndef _ASM_32X_MEMORY_C -static void sh2_da_checks(u32 a, int t, SH2 *sh2) +static void sh2_da_checks(u32 a, u32 t, SH2 *sh2) { sh2_drc_wcheck_da(a, t, sh2); } -static void NOINLINE sh2_da_checks_l(u32 a, int t, SH2 *sh2) +static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2) { - sh2_da_checks(a, t, sh2); - sh2_da_checks(a+2, t>>16, sh2); + sh2_drc_wcheck_da(a, t, sh2); } #endif #endif @@ -1667,7 +1671,7 @@ static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) ((u8 *)sh2->p_sdram)[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_sdram_checks(a & ~1, ((u16 *)sh2->p_sdram)[a1 / 2], sh2, t); #endif @@ -1679,7 +1683,7 @@ static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) sh2->data_array[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_da_checks(a, t, sh2); #endif @@ -1741,7 +1745,7 @@ static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) ((u16 *)sh2->p_sdram)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_sdram_checks(a, d, sh2, t); #endif @@ -1753,7 +1757,7 @@ static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) ((u16 *)sh2->data_array)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_da_checks(a, t, sh2); #endif @@ -1816,8 +1820,8 @@ static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; - int u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; if (t|(u<<16)) sh2_sdram_checks_l(a, d, sh2, t|(u<<16)); #endif @@ -1829,8 +1833,8 @@ static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; - int u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; if (t|(u<<16)) sh2_da_checks_l(a, t|(u<<16), sh2); #endif diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index ba83a6bf..b3a94b62 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -17,6 +17,7 @@ .equ SH2_DRAM_OW, 1<<(32-SH2_DRAM_SHIFT) @ DRAM overwrite mode bit .text +.align 5 #if 0 @ u32 a, SH2 *sh2 @@ -142,11 +143,12 @@ sh2_write8_sdram: ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1] cmp r3, #0 bxeq lr + @ need to load aligned 16 bit data for check ldr ip, [r2, #OFS_SH2_p_sdram] bic r0, r0, #1 - mov r3, r0, lsl #SH2_RAM_SHIFT - mov r3, r3, lsr #SH2_RAM_SHIFT - ldrh r1, [ip, r3] + mov r1, r0, lsl #SH2_RAM_SHIFT + mov r1, r1, lsr #SH2_RAM_SHIFT + ldrh r1, [ip, r1] b sh2_sdram_checks #else bx lr @@ -252,13 +254,8 @@ sh2_write32_da: ldr ip, [r2, #OFS_SH2_p_drcblk_da] ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! ldrb ip, [ip, #1] - orrs r3, r1, ip, lsl #16 + orrs r1, r1, ip, lsl #16 bxeq lr - stmfd sp!, {r0, r2, ip, lr} - bl sh2_drc_wcheck_da - ldmfd sp!, {r0, r2, ip, lr} - add r0, r0, #2 - mov r1, ip b sh2_drc_wcheck_da #else bx lr diff --git a/pico/pico_int.h b/pico/pico_int.h index 89acc4fb..0fc458ef 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -921,6 +921,10 @@ void p32x_event_schedule(unsigned int now, enum p32x_event event, int after); void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles); +#define p32x_sh2_ready(sh2, cycles) \ + (CYCLES_GT(cycles,sh2->m68krcycles_done) && \ + !(sh2->state&(SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) + // 32x/memory.c extern struct Pico32xMem *Pico32xMem; unsigned int PicoRead8_32x(unsigned int a); diff --git a/platform/gp2x/PicoDrive.gpe b/platform/gp2x/PicoDrive.gpe index 1c065185..59416d93 100644 --- a/platform/gp2x/PicoDrive.gpe +++ b/platform/gp2x/PicoDrive.gpe @@ -7,6 +7,8 @@ if ! [ -e /dev/accel ]; then export POLLUX_RAM_TIMINGS='ram_timings=2,9,4,1,1,1,1' export POLLUX_LCD_TIMINGS_NTSC='lcd_timings=397,1,37,277,341,0,17,337;clkdiv0=9' export POLLUX_LCD_TIMINGS_PAL='lcd_timings=428,1,37,277,341,0,17,337;clkdiv0=10' +else + export POLLUX_RAM_TIMINGS='ram_timings=3,9,4,1,1,1,1' fi ./PicoDrive "$@"