mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 07:17:45 -04:00
sh2 drc: bug fixing
This commit is contained in:
parent
f1da0a362f
commit
57d863cb87
4 changed files with 28 additions and 21 deletions
|
@ -25,7 +25,7 @@
|
|||
#define PR 18 // platform register
|
||||
|
||||
// All operations but ptr ops are using the lower 32 bits of the A64 registers.
|
||||
// The upper 32 bits are only used in ptr ops.
|
||||
// The upper 32 bits are only used in ptr ops and are zeroed by A64 32 bit ops.
|
||||
|
||||
|
||||
#define A64_COND_EQ 0x0
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#define FC 24 // emulated processor flags: C (bit 0), others 0
|
||||
#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x
|
||||
|
||||
// All operations but ptr ops are using the lower 32 bits of the registers.
|
||||
// The upper 32 bits always contain the sign extension from the lower 32 bits.
|
||||
|
||||
// unified conditions; virtual, not corresponding to anything real on MIPS
|
||||
#define DCOND_EQ 0x0
|
||||
|
@ -1095,10 +1097,10 @@ static void emith_lohi_nops(void)
|
|||
emith_lohi_nops(); \
|
||||
EMIT(MIPS_MULT(s1, s2)); \
|
||||
EMIT(MIPS_MFLO(AT)); \
|
||||
emith_add_r_r(dlo, AT); \
|
||||
EMIT(MIPS_SLTU_REG(t_, dlo, AT)); \
|
||||
EMIT(MIPS_MFHI(AT)); \
|
||||
EMIT(MIPS_MFHI(t_)); \
|
||||
last_lohi = (u8 *)tcache_ptr; \
|
||||
emith_add_r_r(dlo, AT); \
|
||||
EMIT(MIPS_SLTU_REG(AT, dlo, AT)); \
|
||||
emith_add_r_r(dhi, AT); \
|
||||
emith_add_r_r(dhi, t_); \
|
||||
rcache_free_tmp(t_); \
|
||||
|
@ -1479,7 +1481,7 @@ static int emith_cond_check(int cond, int *r)
|
|||
|
||||
// NB: ABI SP alignment is 8 for compatibility with MIPS IV
|
||||
#define emith_push_ret(r) do { \
|
||||
emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \
|
||||
emith_add_r_r_ptr_imm(SP, SP, -8-16); /* O32: 16 byte arg save area */ \
|
||||
emith_write_r_r_offs(LR, SP, 4+16); \
|
||||
if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \
|
||||
} while (0)
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
#define FC 29 // emulated processor flags: C (bit 0), others 0
|
||||
#define FV 28 // emulated processor flags: Nt^Ns (bit 31). others x
|
||||
|
||||
// All operations but ptr ops are using the lower 32 bits of the registers.
|
||||
// The upper 32 bits always contain the sign extension from the lower 32 bits.
|
||||
|
||||
// unified conditions; virtual, not corresponding to anything real on RISC-V
|
||||
#define DCOND_EQ 0x0
|
||||
|
@ -217,12 +219,9 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 };
|
|||
// NB: must split 64 bit result into 2 32 bit registers
|
||||
// NB: expects 32 bit values in s1+s2, correctly sign extended to 64 bits
|
||||
#define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \
|
||||
/*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \
|
||||
/*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \
|
||||
EMIT(R5_MUL(dlo, s1, s2)); \
|
||||
EMIT(R5_ASR_IMM(dhi, dlo, 32)); \
|
||||
EMIT(R5_LSL_IMM(dlo, dlo, 32)); \
|
||||
EMIT(R5_ASR_IMM(dlo, dlo, 32)); \
|
||||
EMIT(R5_ADDW_IMM(dlo, dlo, 0)); \
|
||||
} while (0)
|
||||
|
||||
#define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) \
|
||||
|
@ -633,7 +632,7 @@ static int literal_pindex, literal_iindex;
|
|||
static inline int emith_pool_literal(uintptr_t imm)
|
||||
{
|
||||
int idx = literal_pindex - 8; // max look behind in pool
|
||||
// see if one of the last literals was the same (or close enough)
|
||||
// see if one of the last literals was the same
|
||||
for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++)
|
||||
if (imm == literal_pool[idx])
|
||||
break;
|
||||
|
|
|
@ -7,21 +7,24 @@
|
|||
* See COPYING file in the top-level directory.
|
||||
*
|
||||
* notes:
|
||||
* - tcache, block descriptor, link buffer overflows result in sh2_translate()
|
||||
* failure, followed by full tcache invalidation for that region
|
||||
* - tcache, block descriptor, block entry buffer overflows result in oldest
|
||||
* blocks being deleted until enough space is available
|
||||
* - link and list element buffer overflows result in failure and exit
|
||||
* - jumps between blocks are tracked for SMC handling (in block_entry->links),
|
||||
* except jumps between different tcaches
|
||||
* except jumps from global to CPU-local tcaches
|
||||
*
|
||||
* implemented:
|
||||
* - static register allocation
|
||||
* - remaining register caching and tracking in temporaries
|
||||
* - block-local branch linking
|
||||
* - block linking (except between tcaches)
|
||||
* - block linking
|
||||
* - some constant propagation
|
||||
* - call stack caching for host block entry address
|
||||
* - delay, poll, and idle loop detection and handling
|
||||
* - some T/M flag optimizations where the value is known or isn't used
|
||||
*
|
||||
* TODO:
|
||||
* - better constant propagation
|
||||
* - stack caching?
|
||||
* - bug fixing
|
||||
*/
|
||||
#include <stddef.h>
|
||||
|
@ -1068,7 +1071,7 @@ static struct block_desc *dr_add_block(int entries, u32 addr, int size,
|
|||
if (be != NULL)
|
||||
dbg(1, "block override for %08x", addr);
|
||||
|
||||
if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size ||
|
||||
if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size ||
|
||||
entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) {
|
||||
dbg(1, "bd overflow for tcache %d", tcache_id);
|
||||
return NULL;
|
||||
|
@ -3014,13 +3017,13 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2);
|
|||
static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
|
||||
{
|
||||
// branch targets in current block
|
||||
struct linkage branch_targets[MAX_LOCAL_TARGETS];
|
||||
static struct linkage branch_targets[MAX_LOCAL_TARGETS];
|
||||
int branch_target_count = 0;
|
||||
// unresolved local or external targets with block link/exit area if needed
|
||||
struct linkage blx_targets[MAX_LOCAL_BRANCHES];
|
||||
static struct linkage blx_targets[MAX_LOCAL_BRANCHES];
|
||||
int blx_target_count = 0;
|
||||
|
||||
u8 op_flags[BLOCK_INSN_LIMIT];
|
||||
static u8 op_flags[BLOCK_INSN_LIMIT];
|
||||
|
||||
enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 };
|
||||
struct drcf {
|
||||
|
@ -3037,7 +3040,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
|
|||
#if LOOP_OPTIMIZER
|
||||
// loops with pinned registers for optimzation
|
||||
// pinned regs are like statics and don't need saving/restoring inside a loop
|
||||
struct linkage pinned_loops[MAX_LOCAL_TARGETS/16];
|
||||
static struct linkage pinned_loops[MAX_LOCAL_TARGETS/16];
|
||||
int pinned_loop_count = 0;
|
||||
#endif
|
||||
|
||||
|
@ -3479,6 +3482,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
|
|||
// no sense in looking any further than the next rcache flush
|
||||
tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) ||
|
||||
(OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP)));
|
||||
// XXX looking behind cond branch to avoid evicting regs used later?
|
||||
if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above)
|
||||
late |= opd[v].source & ~write;
|
||||
// ignore source regs after they have been written to
|
||||
|
@ -4636,6 +4640,7 @@ end_op:
|
|||
rcache_invalidate();
|
||||
}
|
||||
} else
|
||||
// no space for resolving forward branch, handle it as external
|
||||
dbg(1, "warning: too many unresolved branches");
|
||||
}
|
||||
|
||||
|
@ -4657,6 +4662,7 @@ end_op:
|
|||
EMITH_JMP_START(emith_invert_cond(cond));
|
||||
if (bl) {
|
||||
bl->jump = tcache_ptr;
|
||||
emith_flush(); // flush to inhibit insn swapping
|
||||
bl->type = BL_LDJMP;
|
||||
}
|
||||
tmp = rcache_get_tmp_arg(0);
|
||||
|
@ -5534,7 +5540,7 @@ int sh2_drc_init(SH2 *sh2)
|
|||
i = tcache_ptr - tcache;
|
||||
RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i);
|
||||
for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) {
|
||||
RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_sizes[i-1],
|
||||
RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_ring[i-1].size,
|
||||
tcache_sizes[i]);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue