sh2 drc: bug fixing

This commit is contained in:
kub 2019-11-27 21:02:53 +01:00
parent f1da0a362f
commit 57d863cb87
4 changed files with 28 additions and 21 deletions

View file

@ -25,7 +25,7 @@
#define PR 18 // platform register
// All operations but ptr ops are using the lower 32 bits of the A64 registers.
// The upper 32 bits are only used in ptr ops.
// The upper 32 bits are only used in ptr ops and are zeroed by A64 32 bit ops.
#define A64_COND_EQ 0x0

View file

@ -33,6 +33,8 @@
#define FC 24 // emulated processor flags: C (bit 0), others 0
#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x
// All operations but ptr ops are using the lower 32 bits of the registers.
// The upper 32 bits always contain the sign extension from the lower 32 bits.
// unified conditions; virtual, not corresponding to anything real on MIPS
#define DCOND_EQ 0x0
@ -1095,10 +1097,10 @@ static void emith_lohi_nops(void)
emith_lohi_nops(); \
EMIT(MIPS_MULT(s1, s2)); \
EMIT(MIPS_MFLO(AT)); \
emith_add_r_r(dlo, AT); \
EMIT(MIPS_SLTU_REG(t_, dlo, AT)); \
EMIT(MIPS_MFHI(AT)); \
EMIT(MIPS_MFHI(t_)); \
last_lohi = (u8 *)tcache_ptr; \
emith_add_r_r(dlo, AT); \
EMIT(MIPS_SLTU_REG(AT, dlo, AT)); \
emith_add_r_r(dhi, AT); \
emith_add_r_r(dhi, t_); \
rcache_free_tmp(t_); \
@ -1479,7 +1481,7 @@ static int emith_cond_check(int cond, int *r)
// NB: ABI SP alignment is 8 for compatibility with MIPS IV
#define emith_push_ret(r) do { \
emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \
emith_add_r_r_ptr_imm(SP, SP, -8-16); /* O32: 16 byte arg save area */ \
emith_write_r_r_offs(LR, SP, 4+16); \
if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \
} while (0)

View file

@ -30,6 +30,8 @@
#define FC 29 // emulated processor flags: C (bit 0), others 0
#define FV 28 // emulated processor flags: Nt^Ns (bit 31). others x
// All operations but ptr ops are using the lower 32 bits of the registers.
// The upper 32 bits always contain the sign extension from the lower 32 bits.
// unified conditions; virtual, not corresponding to anything real on RISC-V
#define DCOND_EQ 0x0
@ -217,12 +219,9 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 };
// NB: must split 64 bit result into 2 32 bit registers
// NB: expects 32 bit values in s1+s2, correctly sign extended to 64 bits
#define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \
/*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \
/*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \
EMIT(R5_MUL(dlo, s1, s2)); \
EMIT(R5_ASR_IMM(dhi, dlo, 32)); \
EMIT(R5_LSL_IMM(dlo, dlo, 32)); \
EMIT(R5_ASR_IMM(dlo, dlo, 32)); \
EMIT(R5_ADDW_IMM(dlo, dlo, 0)); \
} while (0)
#define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) \
@ -633,7 +632,7 @@ static int literal_pindex, literal_iindex;
static inline int emith_pool_literal(uintptr_t imm)
{
int idx = literal_pindex - 8; // max look behind in pool
// see if one of the last literals was the same (or close enough)
// see if one of the last literals was the same
for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++)
if (imm == literal_pool[idx])
break;

View file

@ -7,21 +7,24 @@
* See COPYING file in the top-level directory.
*
* notes:
* - tcache, block descriptor, link buffer overflows result in sh2_translate()
* failure, followed by full tcache invalidation for that region
* - tcache, block descriptor, block entry buffer overflows result in oldest
* blocks being deleted until enough space is available
* - link and list element buffer overflows result in failure and exit
* - jumps between blocks are tracked for SMC handling (in block_entry->links),
* except jumps between different tcaches
* except jumps from global to CPU-local tcaches
*
* implemented:
* - static register allocation
* - remaining register caching and tracking in temporaries
* - block-local branch linking
* - block linking (except between tcaches)
* - block linking
* - some constant propagation
* - call stack caching for host block entry address
* - delay, poll, and idle loop detection and handling
* - some T/M flag optimizations where the value is known or isn't used
*
* TODO:
* - better constant propagation
* - stack caching?
* - bug fixing
*/
#include <stddef.h>
@ -1068,7 +1071,7 @@ static struct block_desc *dr_add_block(int entries, u32 addr, int size,
if (be != NULL)
dbg(1, "block override for %08x", addr);
if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size ||
if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size ||
entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) {
dbg(1, "bd overflow for tcache %d", tcache_id);
return NULL;
@ -3014,13 +3017,13 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2);
static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
{
// branch targets in current block
struct linkage branch_targets[MAX_LOCAL_TARGETS];
static struct linkage branch_targets[MAX_LOCAL_TARGETS];
int branch_target_count = 0;
// unresolved local or external targets with block link/exit area if needed
struct linkage blx_targets[MAX_LOCAL_BRANCHES];
static struct linkage blx_targets[MAX_LOCAL_BRANCHES];
int blx_target_count = 0;
u8 op_flags[BLOCK_INSN_LIMIT];
static u8 op_flags[BLOCK_INSN_LIMIT];
enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 };
struct drcf {
@ -3037,7 +3040,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
#if LOOP_OPTIMIZER
// loops with pinned registers for optimzation
// pinned regs are like statics and don't need saving/restoring inside a loop
struct linkage pinned_loops[MAX_LOCAL_TARGETS/16];
static struct linkage pinned_loops[MAX_LOCAL_TARGETS/16];
int pinned_loop_count = 0;
#endif
@ -3479,6 +3482,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
// no sense in looking any further than the next rcache flush
tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) ||
(OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP)));
// XXX looking behind cond branch to avoid evicting regs used later?
if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above)
late |= opd[v].source & ~write;
// ignore source regs after they have been written to
@ -4636,6 +4640,7 @@ end_op:
rcache_invalidate();
}
} else
// no space for resolving forward branch, handle it as external
dbg(1, "warning: too many unresolved branches");
}
@ -4657,6 +4662,7 @@ end_op:
EMITH_JMP_START(emith_invert_cond(cond));
if (bl) {
bl->jump = tcache_ptr;
emith_flush(); // flush to inhibit insn swapping
bl->type = BL_LDJMP;
}
tmp = rcache_get_tmp_arg(0);
@ -5534,7 +5540,7 @@ int sh2_drc_init(SH2 *sh2)
i = tcache_ptr - tcache;
RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i);
for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) {
RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_sizes[i-1],
RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_ring[i-1].size,
tcache_sizes[i]);
}